{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.07083093541104077, "eval_steps": 500, "global_step": 16000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.426933463190049e-06, "grad_norm": 4.568702968807073, "learning_rate": 4.4269334631900484e-10, "loss": 1.1398, "step": 1 }, { "epoch": 8.853866926380097e-06, "grad_norm": 3.7294871375983942, "learning_rate": 8.853866926380097e-10, "loss": 0.8818, "step": 2 }, { "epoch": 1.3280800389570144e-05, "grad_norm": 4.642524069544302, "learning_rate": 1.3280800389570144e-09, "loss": 1.0674, "step": 3 }, { "epoch": 1.7707733852760195e-05, "grad_norm": 3.7006467767725257, "learning_rate": 1.7707733852760194e-09, "loss": 0.9619, "step": 4 }, { "epoch": 2.213466731595024e-05, "grad_norm": 4.386276949704148, "learning_rate": 2.213466731595024e-09, "loss": 1.2438, "step": 5 }, { "epoch": 2.656160077914029e-05, "grad_norm": 4.6693752595776425, "learning_rate": 2.6561600779140288e-09, "loss": 0.9265, "step": 6 }, { "epoch": 3.0988534242330335e-05, "grad_norm": 4.837049303539033, "learning_rate": 3.098853424233034e-09, "loss": 1.4739, "step": 7 }, { "epoch": 3.541546770552039e-05, "grad_norm": 5.618615698138189, "learning_rate": 3.5415467705520388e-09, "loss": 1.6068, "step": 8 }, { "epoch": 3.9842401168710436e-05, "grad_norm": 5.3143820997637565, "learning_rate": 3.984240116871043e-09, "loss": 1.2447, "step": 9 }, { "epoch": 4.426933463190048e-05, "grad_norm": 4.778989375380422, "learning_rate": 4.426933463190048e-09, "loss": 1.118, "step": 10 }, { "epoch": 4.869626809509053e-05, "grad_norm": 4.489792361591213, "learning_rate": 4.869626809509053e-09, "loss": 0.8456, "step": 11 }, { "epoch": 5.312320155828058e-05, "grad_norm": 5.110816670570661, "learning_rate": 5.3123201558280575e-09, "loss": 1.3348, "step": 12 }, { "epoch": 5.755013502147063e-05, "grad_norm": 5.029703298265698, "learning_rate": 5.755013502147063e-09, "loss": 1.0168, "step": 13 }, { "epoch": 6.197706848466067e-05, "grad_norm": 4.13526111258034, "learning_rate": 6.197706848466068e-09, "loss": 0.8901, "step": 14 }, { "epoch": 6.640400194785072e-05, "grad_norm": 5.498977604308183, "learning_rate": 6.6404001947850725e-09, "loss": 0.8996, "step": 15 }, { "epoch": 7.083093541104078e-05, "grad_norm": 4.862229499665775, "learning_rate": 7.0830935411040775e-09, "loss": 1.0637, "step": 16 }, { "epoch": 7.525786887423083e-05, "grad_norm": 3.7835461917601756, "learning_rate": 7.525786887423083e-09, "loss": 0.8935, "step": 17 }, { "epoch": 7.968480233742087e-05, "grad_norm": 4.61573790356622, "learning_rate": 7.968480233742087e-09, "loss": 1.233, "step": 18 }, { "epoch": 8.411173580061092e-05, "grad_norm": 3.8427737571373934, "learning_rate": 8.411173580061093e-09, "loss": 0.9837, "step": 19 }, { "epoch": 8.853866926380097e-05, "grad_norm": 4.778507648939246, "learning_rate": 8.853866926380097e-09, "loss": 1.2734, "step": 20 }, { "epoch": 9.296560272699101e-05, "grad_norm": 3.9330176933452163, "learning_rate": 9.296560272699102e-09, "loss": 0.9954, "step": 21 }, { "epoch": 9.739253619018106e-05, "grad_norm": 4.354099184405387, "learning_rate": 9.739253619018107e-09, "loss": 1.0284, "step": 22 }, { "epoch": 0.00010181946965337111, "grad_norm": 4.445846385798183, "learning_rate": 1.0181946965337113e-08, "loss": 1.0125, "step": 23 }, { "epoch": 0.00010624640311656115, "grad_norm": 6.245672549731536, "learning_rate": 1.0624640311656115e-08, "loss": 1.3064, "step": 24 }, { "epoch": 0.0001106733365797512, "grad_norm": 4.3709437322503915, "learning_rate": 1.1067333657975122e-08, "loss": 1.2149, "step": 25 }, { "epoch": 0.00011510027004294126, "grad_norm": 4.413429632146103, "learning_rate": 1.1510027004294127e-08, "loss": 1.2005, "step": 26 }, { "epoch": 0.00011952720350613131, "grad_norm": 5.777558581013135, "learning_rate": 1.1952720350613132e-08, "loss": 1.6213, "step": 27 }, { "epoch": 0.00012395413696932134, "grad_norm": 4.982394270140917, "learning_rate": 1.2395413696932137e-08, "loss": 1.5178, "step": 28 }, { "epoch": 0.0001283810704325114, "grad_norm": 4.399587585926034, "learning_rate": 1.283810704325114e-08, "loss": 1.1018, "step": 29 }, { "epoch": 0.00013280800389570144, "grad_norm": 4.690269941240886, "learning_rate": 1.3280800389570145e-08, "loss": 1.0858, "step": 30 }, { "epoch": 0.00013723493735889148, "grad_norm": 5.498359217986138, "learning_rate": 1.372349373588915e-08, "loss": 1.1507, "step": 31 }, { "epoch": 0.00014166187082208156, "grad_norm": 4.171499389115206, "learning_rate": 1.4166187082208155e-08, "loss": 0.8319, "step": 32 }, { "epoch": 0.0001460888042852716, "grad_norm": 4.1234452917830735, "learning_rate": 1.4608880428527162e-08, "loss": 0.7972, "step": 33 }, { "epoch": 0.00015051573774846165, "grad_norm": 4.829194502832397, "learning_rate": 1.5051573774846167e-08, "loss": 1.0763, "step": 34 }, { "epoch": 0.0001549426712116517, "grad_norm": 5.605983694937127, "learning_rate": 1.549426712116517e-08, "loss": 1.6158, "step": 35 }, { "epoch": 0.00015936960467484174, "grad_norm": 5.198436767331326, "learning_rate": 1.5936960467484173e-08, "loss": 1.6403, "step": 36 }, { "epoch": 0.0001637965381380318, "grad_norm": 5.4380363593428385, "learning_rate": 1.637965381380318e-08, "loss": 1.5662, "step": 37 }, { "epoch": 0.00016822347160122184, "grad_norm": 5.755791871602957, "learning_rate": 1.6822347160122187e-08, "loss": 0.9704, "step": 38 }, { "epoch": 0.00017265040506441189, "grad_norm": 5.45962152654478, "learning_rate": 1.726504050644119e-08, "loss": 1.1963, "step": 39 }, { "epoch": 0.00017707733852760193, "grad_norm": 4.618242893470786, "learning_rate": 1.7707733852760193e-08, "loss": 1.1306, "step": 40 }, { "epoch": 0.00018150427199079198, "grad_norm": 5.577124021534659, "learning_rate": 1.81504271990792e-08, "loss": 1.6485, "step": 41 }, { "epoch": 0.00018593120545398203, "grad_norm": 4.834341467958839, "learning_rate": 1.8593120545398203e-08, "loss": 0.9461, "step": 42 }, { "epoch": 0.00019035813891717207, "grad_norm": 5.43293050265643, "learning_rate": 1.903581389171721e-08, "loss": 1.3608, "step": 43 }, { "epoch": 0.00019478507238036212, "grad_norm": 4.462652907978979, "learning_rate": 1.9478507238036213e-08, "loss": 1.1215, "step": 44 }, { "epoch": 0.00019921200584355217, "grad_norm": 4.034770030369863, "learning_rate": 1.9921200584355217e-08, "loss": 0.8609, "step": 45 }, { "epoch": 0.00020363893930674221, "grad_norm": 4.575954764931869, "learning_rate": 2.0363893930674227e-08, "loss": 0.8429, "step": 46 }, { "epoch": 0.00020806587276993226, "grad_norm": 4.995933516294001, "learning_rate": 2.0806587276993227e-08, "loss": 1.3311, "step": 47 }, { "epoch": 0.0002124928062331223, "grad_norm": 5.10732615200497, "learning_rate": 2.124928062331223e-08, "loss": 1.1393, "step": 48 }, { "epoch": 0.00021691973969631235, "grad_norm": 4.684677468533909, "learning_rate": 2.169197396963124e-08, "loss": 1.1119, "step": 49 }, { "epoch": 0.0002213466731595024, "grad_norm": 4.646340849585853, "learning_rate": 2.2134667315950243e-08, "loss": 1.1116, "step": 50 }, { "epoch": 0.00022577360662269245, "grad_norm": 4.477868479837972, "learning_rate": 2.257736066226925e-08, "loss": 1.1217, "step": 51 }, { "epoch": 0.00023020054008588252, "grad_norm": 4.086507579908557, "learning_rate": 2.3020054008588253e-08, "loss": 1.2107, "step": 52 }, { "epoch": 0.00023462747354907257, "grad_norm": 4.493374708299464, "learning_rate": 2.3462747354907257e-08, "loss": 0.996, "step": 53 }, { "epoch": 0.00023905440701226262, "grad_norm": 4.972987811780186, "learning_rate": 2.3905440701226263e-08, "loss": 0.9677, "step": 54 }, { "epoch": 0.00024348134047545266, "grad_norm": 4.3691863123935475, "learning_rate": 2.4348134047545267e-08, "loss": 1.2503, "step": 55 }, { "epoch": 0.0002479082739386427, "grad_norm": 4.883460527559483, "learning_rate": 2.4790827393864273e-08, "loss": 1.3355, "step": 56 }, { "epoch": 0.00025233520740183276, "grad_norm": 5.225242338248372, "learning_rate": 2.5233520740183277e-08, "loss": 1.1093, "step": 57 }, { "epoch": 0.0002567621408650228, "grad_norm": 5.224315595360627, "learning_rate": 2.567621408650228e-08, "loss": 1.3745, "step": 58 }, { "epoch": 0.00026118907432821285, "grad_norm": 5.298876198868148, "learning_rate": 2.6118907432821287e-08, "loss": 1.1131, "step": 59 }, { "epoch": 0.00026561600779140287, "grad_norm": 4.36150602383614, "learning_rate": 2.656160077914029e-08, "loss": 1.1792, "step": 60 }, { "epoch": 0.00027004294125459295, "grad_norm": 5.1459832312026546, "learning_rate": 2.7004294125459297e-08, "loss": 0.9608, "step": 61 }, { "epoch": 0.00027446987471778296, "grad_norm": 3.807775515433495, "learning_rate": 2.74469874717783e-08, "loss": 0.8162, "step": 62 }, { "epoch": 0.00027889680818097304, "grad_norm": 5.813427435838518, "learning_rate": 2.788968081809731e-08, "loss": 1.42, "step": 63 }, { "epoch": 0.0002833237416441631, "grad_norm": 4.217920405772174, "learning_rate": 2.833237416441631e-08, "loss": 1.0739, "step": 64 }, { "epoch": 0.00028775067510735313, "grad_norm": 4.821369190893957, "learning_rate": 2.8775067510735313e-08, "loss": 1.2603, "step": 65 }, { "epoch": 0.0002921776085705432, "grad_norm": 3.8431523960275125, "learning_rate": 2.9217760857054323e-08, "loss": 0.8148, "step": 66 }, { "epoch": 0.0002966045420337332, "grad_norm": 4.745998491333527, "learning_rate": 2.9660454203373327e-08, "loss": 1.1044, "step": 67 }, { "epoch": 0.0003010314754969233, "grad_norm": 4.374433224777739, "learning_rate": 3.0103147549692333e-08, "loss": 1.4398, "step": 68 }, { "epoch": 0.0003054584089601133, "grad_norm": 4.572618566724691, "learning_rate": 3.0545840896011333e-08, "loss": 1.0175, "step": 69 }, { "epoch": 0.0003098853424233034, "grad_norm": 4.823853990805637, "learning_rate": 3.098853424233034e-08, "loss": 1.2751, "step": 70 }, { "epoch": 0.0003143122758864934, "grad_norm": 4.497963692630109, "learning_rate": 3.1431227588649347e-08, "loss": 1.175, "step": 71 }, { "epoch": 0.0003187392093496835, "grad_norm": 4.19919271276711, "learning_rate": 3.187392093496835e-08, "loss": 1.0603, "step": 72 }, { "epoch": 0.0003231661428128735, "grad_norm": 4.958889926969993, "learning_rate": 3.231661428128736e-08, "loss": 1.5199, "step": 73 }, { "epoch": 0.0003275930762760636, "grad_norm": 4.040863655943419, "learning_rate": 3.275930762760636e-08, "loss": 1.0281, "step": 74 }, { "epoch": 0.0003320200097392536, "grad_norm": 4.737780926756771, "learning_rate": 3.320200097392536e-08, "loss": 1.3037, "step": 75 }, { "epoch": 0.0003364469432024437, "grad_norm": 4.656295708322059, "learning_rate": 3.3644694320244373e-08, "loss": 1.3052, "step": 76 }, { "epoch": 0.0003408738766656337, "grad_norm": 4.228141902793646, "learning_rate": 3.4087387666563373e-08, "loss": 1.227, "step": 77 }, { "epoch": 0.00034530081012882377, "grad_norm": 4.115966404946292, "learning_rate": 3.453008101288238e-08, "loss": 0.9749, "step": 78 }, { "epoch": 0.0003497277435920138, "grad_norm": 5.0355739555769885, "learning_rate": 3.4972774359201387e-08, "loss": 1.197, "step": 79 }, { "epoch": 0.00035415467705520386, "grad_norm": 4.972584276919269, "learning_rate": 3.541546770552039e-08, "loss": 1.1463, "step": 80 }, { "epoch": 0.0003585816105183939, "grad_norm": 4.636484004402351, "learning_rate": 3.5858161051839393e-08, "loss": 1.3303, "step": 81 }, { "epoch": 0.00036300854398158396, "grad_norm": 4.622750245084774, "learning_rate": 3.63008543981584e-08, "loss": 1.176, "step": 82 }, { "epoch": 0.000367435477444774, "grad_norm": 4.6377469814641055, "learning_rate": 3.6743547744477407e-08, "loss": 1.3359, "step": 83 }, { "epoch": 0.00037186241090796405, "grad_norm": 4.65464088549103, "learning_rate": 3.718624109079641e-08, "loss": 1.5007, "step": 84 }, { "epoch": 0.0003762893443711541, "grad_norm": 5.201921256415853, "learning_rate": 3.7628934437115413e-08, "loss": 1.1502, "step": 85 }, { "epoch": 0.00038071627783434415, "grad_norm": 4.6336660758131325, "learning_rate": 3.807162778343442e-08, "loss": 1.2446, "step": 86 }, { "epoch": 0.0003851432112975342, "grad_norm": 4.759159356173069, "learning_rate": 3.851432112975342e-08, "loss": 0.981, "step": 87 }, { "epoch": 0.00038957014476072424, "grad_norm": 6.275463380675997, "learning_rate": 3.895701447607243e-08, "loss": 1.7191, "step": 88 }, { "epoch": 0.0003939970782239143, "grad_norm": 3.9482637266016094, "learning_rate": 3.939970782239144e-08, "loss": 0.9159, "step": 89 }, { "epoch": 0.00039842401168710433, "grad_norm": 4.977334350271726, "learning_rate": 3.9842401168710433e-08, "loss": 1.4051, "step": 90 }, { "epoch": 0.0004028509451502944, "grad_norm": 4.898783546410448, "learning_rate": 4.028509451502944e-08, "loss": 1.0623, "step": 91 }, { "epoch": 0.00040727787861348443, "grad_norm": 4.598986133823195, "learning_rate": 4.072778786134845e-08, "loss": 1.1048, "step": 92 }, { "epoch": 0.0004117048120766745, "grad_norm": 5.260090102850642, "learning_rate": 4.117048120766745e-08, "loss": 1.2279, "step": 93 }, { "epoch": 0.0004161317455398645, "grad_norm": 4.430701199943617, "learning_rate": 4.1613174553986453e-08, "loss": 1.028, "step": 94 }, { "epoch": 0.0004205586790030546, "grad_norm": 4.888050072705513, "learning_rate": 4.2055867900305467e-08, "loss": 1.1475, "step": 95 }, { "epoch": 0.0004249856124662446, "grad_norm": 4.3498455087071655, "learning_rate": 4.249856124662446e-08, "loss": 0.988, "step": 96 }, { "epoch": 0.0004294125459294347, "grad_norm": 5.507315919543983, "learning_rate": 4.2941254592943473e-08, "loss": 1.4328, "step": 97 }, { "epoch": 0.0004338394793926247, "grad_norm": 5.589163677922502, "learning_rate": 4.338394793926248e-08, "loss": 1.1136, "step": 98 }, { "epoch": 0.0004382664128558148, "grad_norm": 3.7764594269641334, "learning_rate": 4.3826641285581487e-08, "loss": 0.8686, "step": 99 }, { "epoch": 0.0004426933463190048, "grad_norm": 5.081955516916703, "learning_rate": 4.4269334631900487e-08, "loss": 1.1136, "step": 100 }, { "epoch": 0.0004471202797821949, "grad_norm": 4.835813233649325, "learning_rate": 4.4712027978219493e-08, "loss": 1.0645, "step": 101 }, { "epoch": 0.0004515472132453849, "grad_norm": 5.698631575563449, "learning_rate": 4.51547213245385e-08, "loss": 1.3325, "step": 102 }, { "epoch": 0.00045597414670857497, "grad_norm": 4.970317986527668, "learning_rate": 4.55974146708575e-08, "loss": 1.2097, "step": 103 }, { "epoch": 0.00046040108017176505, "grad_norm": 6.790911841136925, "learning_rate": 4.6040108017176507e-08, "loss": 1.7479, "step": 104 }, { "epoch": 0.00046482801363495507, "grad_norm": 4.3213723863522855, "learning_rate": 4.6482801363495513e-08, "loss": 1.0375, "step": 105 }, { "epoch": 0.00046925494709814514, "grad_norm": 4.350491644046595, "learning_rate": 4.6925494709814513e-08, "loss": 1.2551, "step": 106 }, { "epoch": 0.00047368188056133516, "grad_norm": 5.3389243732976945, "learning_rate": 4.736818805613352e-08, "loss": 1.2644, "step": 107 }, { "epoch": 0.00047810881402452523, "grad_norm": 5.035798988020984, "learning_rate": 4.7810881402452527e-08, "loss": 1.294, "step": 108 }, { "epoch": 0.00048253574748771525, "grad_norm": 4.918936189253843, "learning_rate": 4.8253574748771533e-08, "loss": 1.052, "step": 109 }, { "epoch": 0.0004869626809509053, "grad_norm": 5.224552063485891, "learning_rate": 4.8696268095090533e-08, "loss": 1.1444, "step": 110 }, { "epoch": 0.0004913896144140954, "grad_norm": 5.493454674358817, "learning_rate": 4.913896144140954e-08, "loss": 1.3776, "step": 111 }, { "epoch": 0.0004958165478772854, "grad_norm": 5.369000420481522, "learning_rate": 4.9581654787728547e-08, "loss": 1.1963, "step": 112 }, { "epoch": 0.0005002434813404754, "grad_norm": 3.954119801591849, "learning_rate": 5.002434813404755e-08, "loss": 0.9734, "step": 113 }, { "epoch": 0.0005046704148036655, "grad_norm": 5.1661897179746665, "learning_rate": 5.0467041480366553e-08, "loss": 1.2763, "step": 114 }, { "epoch": 0.0005090973482668556, "grad_norm": 4.920170668967374, "learning_rate": 5.090973482668556e-08, "loss": 1.1264, "step": 115 }, { "epoch": 0.0005135242817300456, "grad_norm": 4.355801461593713, "learning_rate": 5.135242817300456e-08, "loss": 1.2083, "step": 116 }, { "epoch": 0.0005179512151932356, "grad_norm": 5.347541642149389, "learning_rate": 5.179512151932357e-08, "loss": 0.9495, "step": 117 }, { "epoch": 0.0005223781486564257, "grad_norm": 5.09801666951443, "learning_rate": 5.2237814865642573e-08, "loss": 1.2247, "step": 118 }, { "epoch": 0.0005268050821196158, "grad_norm": 5.270727093307756, "learning_rate": 5.268050821196158e-08, "loss": 1.6045, "step": 119 }, { "epoch": 0.0005312320155828057, "grad_norm": 5.411291236995923, "learning_rate": 5.312320155828058e-08, "loss": 1.7092, "step": 120 }, { "epoch": 0.0005356589490459958, "grad_norm": 4.839060638045308, "learning_rate": 5.356589490459959e-08, "loss": 1.1422, "step": 121 }, { "epoch": 0.0005400858825091859, "grad_norm": 4.473134262912232, "learning_rate": 5.4008588250918593e-08, "loss": 1.0342, "step": 122 }, { "epoch": 0.000544512815972376, "grad_norm": 4.498567450317491, "learning_rate": 5.4451281597237593e-08, "loss": 0.9691, "step": 123 }, { "epoch": 0.0005489397494355659, "grad_norm": 4.283916773799669, "learning_rate": 5.48939749435566e-08, "loss": 1.0838, "step": 124 }, { "epoch": 0.000553366682898756, "grad_norm": 4.721368027452473, "learning_rate": 5.533666828987561e-08, "loss": 1.1775, "step": 125 }, { "epoch": 0.0005577936163619461, "grad_norm": 5.093557148676, "learning_rate": 5.577936163619462e-08, "loss": 1.5534, "step": 126 }, { "epoch": 0.0005622205498251362, "grad_norm": 4.46204845409757, "learning_rate": 5.6222054982513613e-08, "loss": 0.9736, "step": 127 }, { "epoch": 0.0005666474832883262, "grad_norm": 3.9222277388481968, "learning_rate": 5.666474832883262e-08, "loss": 0.8662, "step": 128 }, { "epoch": 0.0005710744167515162, "grad_norm": 5.218387098970612, "learning_rate": 5.7107441675151633e-08, "loss": 1.3838, "step": 129 }, { "epoch": 0.0005755013502147063, "grad_norm": 4.217670483874216, "learning_rate": 5.755013502147063e-08, "loss": 0.9843, "step": 130 }, { "epoch": 0.0005799282836778963, "grad_norm": 4.029272179322034, "learning_rate": 5.7992828367789633e-08, "loss": 0.8301, "step": 131 }, { "epoch": 0.0005843552171410864, "grad_norm": 4.559229458880547, "learning_rate": 5.8435521714108647e-08, "loss": 1.1142, "step": 132 }, { "epoch": 0.0005887821506042764, "grad_norm": 4.04455640291217, "learning_rate": 5.887821506042764e-08, "loss": 0.8327, "step": 133 }, { "epoch": 0.0005932090840674665, "grad_norm": 4.044698145709796, "learning_rate": 5.9320908406746653e-08, "loss": 0.9875, "step": 134 }, { "epoch": 0.0005976360175306565, "grad_norm": 6.1279787934454975, "learning_rate": 5.976360175306565e-08, "loss": 1.6771, "step": 135 }, { "epoch": 0.0006020629509938466, "grad_norm": 4.010785964378155, "learning_rate": 6.020629509938467e-08, "loss": 0.9865, "step": 136 }, { "epoch": 0.0006064898844570366, "grad_norm": 3.9557392243746605, "learning_rate": 6.064898844570367e-08, "loss": 0.8891, "step": 137 }, { "epoch": 0.0006109168179202266, "grad_norm": 4.307114416359334, "learning_rate": 6.109168179202267e-08, "loss": 0.9015, "step": 138 }, { "epoch": 0.0006153437513834167, "grad_norm": 4.898858332365149, "learning_rate": 6.153437513834168e-08, "loss": 1.2676, "step": 139 }, { "epoch": 0.0006197706848466068, "grad_norm": 4.139852860458081, "learning_rate": 6.197706848466068e-08, "loss": 0.9914, "step": 140 }, { "epoch": 0.0006241976183097968, "grad_norm": 4.649142650223816, "learning_rate": 6.241976183097968e-08, "loss": 1.2467, "step": 141 }, { "epoch": 0.0006286245517729868, "grad_norm": 5.062917974228601, "learning_rate": 6.286245517729869e-08, "loss": 1.4635, "step": 142 }, { "epoch": 0.0006330514852361769, "grad_norm": 4.918545904966794, "learning_rate": 6.330514852361769e-08, "loss": 1.5805, "step": 143 }, { "epoch": 0.000637478418699367, "grad_norm": 4.798849796240021, "learning_rate": 6.37478418699367e-08, "loss": 1.1026, "step": 144 }, { "epoch": 0.0006419053521625569, "grad_norm": 5.138433431008228, "learning_rate": 6.419053521625571e-08, "loss": 1.1618, "step": 145 }, { "epoch": 0.000646332285625747, "grad_norm": 4.7965135922295, "learning_rate": 6.463322856257472e-08, "loss": 1.0079, "step": 146 }, { "epoch": 0.0006507592190889371, "grad_norm": 4.2805668238863435, "learning_rate": 6.507592190889371e-08, "loss": 1.0434, "step": 147 }, { "epoch": 0.0006551861525521272, "grad_norm": 4.181804593703566, "learning_rate": 6.551861525521272e-08, "loss": 1.129, "step": 148 }, { "epoch": 0.0006596130860153172, "grad_norm": 4.108967486634209, "learning_rate": 6.596130860153173e-08, "loss": 0.9538, "step": 149 }, { "epoch": 0.0006640400194785072, "grad_norm": 4.323249224222449, "learning_rate": 6.640400194785072e-08, "loss": 0.7547, "step": 150 }, { "epoch": 0.0006684669529416973, "grad_norm": 4.924983231983813, "learning_rate": 6.684669529416973e-08, "loss": 1.3341, "step": 151 }, { "epoch": 0.0006728938864048874, "grad_norm": 4.279177456133424, "learning_rate": 6.728938864048875e-08, "loss": 0.9385, "step": 152 }, { "epoch": 0.0006773208198680774, "grad_norm": 4.671765582762439, "learning_rate": 6.773208198680773e-08, "loss": 1.4024, "step": 153 }, { "epoch": 0.0006817477533312674, "grad_norm": 4.513398791247246, "learning_rate": 6.817477533312675e-08, "loss": 1.2157, "step": 154 }, { "epoch": 0.0006861746867944575, "grad_norm": 4.4630868952886305, "learning_rate": 6.861746867944576e-08, "loss": 1.0837, "step": 155 }, { "epoch": 0.0006906016202576475, "grad_norm": 3.800908511595419, "learning_rate": 6.906016202576476e-08, "loss": 0.9194, "step": 156 }, { "epoch": 0.0006950285537208376, "grad_norm": 4.279598121040726, "learning_rate": 6.950285537208376e-08, "loss": 1.0339, "step": 157 }, { "epoch": 0.0006994554871840276, "grad_norm": 5.12982098133369, "learning_rate": 6.994554871840277e-08, "loss": 1.4498, "step": 158 }, { "epoch": 0.0007038824206472177, "grad_norm": 4.583597852618188, "learning_rate": 7.038824206472177e-08, "loss": 1.5977, "step": 159 }, { "epoch": 0.0007083093541104077, "grad_norm": 6.235323490570926, "learning_rate": 7.083093541104077e-08, "loss": 1.5718, "step": 160 }, { "epoch": 0.0007127362875735978, "grad_norm": 4.826068111498516, "learning_rate": 7.127362875735979e-08, "loss": 0.9475, "step": 161 }, { "epoch": 0.0007171632210367878, "grad_norm": 4.626566365288617, "learning_rate": 7.171632210367879e-08, "loss": 1.0237, "step": 162 }, { "epoch": 0.0007215901544999778, "grad_norm": 4.216088681816484, "learning_rate": 7.21590154499978e-08, "loss": 0.9028, "step": 163 }, { "epoch": 0.0007260170879631679, "grad_norm": 4.237381764352824, "learning_rate": 7.26017087963168e-08, "loss": 1.0332, "step": 164 }, { "epoch": 0.000730444021426358, "grad_norm": 4.361335633981979, "learning_rate": 7.30444021426358e-08, "loss": 1.2205, "step": 165 }, { "epoch": 0.000734870954889548, "grad_norm": 4.747243991002619, "learning_rate": 7.348709548895481e-08, "loss": 1.5051, "step": 166 }, { "epoch": 0.000739297888352738, "grad_norm": 3.4144671486207603, "learning_rate": 7.392978883527381e-08, "loss": 0.9065, "step": 167 }, { "epoch": 0.0007437248218159281, "grad_norm": 5.025935938916525, "learning_rate": 7.437248218159281e-08, "loss": 1.3668, "step": 168 }, { "epoch": 0.0007481517552791182, "grad_norm": 3.9223642308837614, "learning_rate": 7.481517552791183e-08, "loss": 1.0441, "step": 169 }, { "epoch": 0.0007525786887423083, "grad_norm": 5.595465123415339, "learning_rate": 7.525786887423083e-08, "loss": 1.6108, "step": 170 }, { "epoch": 0.0007570056222054982, "grad_norm": 4.716352634299212, "learning_rate": 7.570056222054983e-08, "loss": 1.1724, "step": 171 }, { "epoch": 0.0007614325556686883, "grad_norm": 4.091717215303721, "learning_rate": 7.614325556686884e-08, "loss": 0.9549, "step": 172 }, { "epoch": 0.0007658594891318784, "grad_norm": 5.115482514482669, "learning_rate": 7.658594891318784e-08, "loss": 1.32, "step": 173 }, { "epoch": 0.0007702864225950684, "grad_norm": 4.117933101149398, "learning_rate": 7.702864225950684e-08, "loss": 1.0717, "step": 174 }, { "epoch": 0.0007747133560582584, "grad_norm": 4.317472139685962, "learning_rate": 7.747133560582585e-08, "loss": 1.2542, "step": 175 }, { "epoch": 0.0007791402895214485, "grad_norm": 4.105230287981827, "learning_rate": 7.791402895214485e-08, "loss": 1.15, "step": 176 }, { "epoch": 0.0007835672229846386, "grad_norm": 4.485303105208456, "learning_rate": 7.835672229846385e-08, "loss": 1.0215, "step": 177 }, { "epoch": 0.0007879941564478286, "grad_norm": 4.433761629952411, "learning_rate": 7.879941564478288e-08, "loss": 1.0434, "step": 178 }, { "epoch": 0.0007924210899110186, "grad_norm": 5.317478489374702, "learning_rate": 7.924210899110187e-08, "loss": 1.5251, "step": 179 }, { "epoch": 0.0007968480233742087, "grad_norm": 5.4572510488602, "learning_rate": 7.968480233742087e-08, "loss": 1.5021, "step": 180 }, { "epoch": 0.0008012749568373987, "grad_norm": 4.413968808788758, "learning_rate": 8.012749568373989e-08, "loss": 1.2095, "step": 181 }, { "epoch": 0.0008057018903005888, "grad_norm": 3.986067322738888, "learning_rate": 8.057018903005888e-08, "loss": 1.0441, "step": 182 }, { "epoch": 0.0008101288237637788, "grad_norm": 4.49566540725697, "learning_rate": 8.101288237637788e-08, "loss": 1.2946, "step": 183 }, { "epoch": 0.0008145557572269689, "grad_norm": 4.00070055689363, "learning_rate": 8.14555757226969e-08, "loss": 1.074, "step": 184 }, { "epoch": 0.0008189826906901589, "grad_norm": 4.7574975461008275, "learning_rate": 8.18982690690159e-08, "loss": 1.536, "step": 185 }, { "epoch": 0.000823409624153349, "grad_norm": 4.886862770362778, "learning_rate": 8.23409624153349e-08, "loss": 1.3852, "step": 186 }, { "epoch": 0.0008278365576165391, "grad_norm": 4.196351419623537, "learning_rate": 8.278365576165392e-08, "loss": 1.1254, "step": 187 }, { "epoch": 0.000832263491079729, "grad_norm": 5.601003956585197, "learning_rate": 8.322634910797291e-08, "loss": 1.019, "step": 188 }, { "epoch": 0.0008366904245429191, "grad_norm": 5.487524906375581, "learning_rate": 8.366904245429191e-08, "loss": 1.6836, "step": 189 }, { "epoch": 0.0008411173580061092, "grad_norm": 4.05461279178461, "learning_rate": 8.411173580061093e-08, "loss": 1.1077, "step": 190 }, { "epoch": 0.0008455442914692993, "grad_norm": 4.415565982960613, "learning_rate": 8.455442914692993e-08, "loss": 1.0572, "step": 191 }, { "epoch": 0.0008499712249324892, "grad_norm": 3.751585883013805, "learning_rate": 8.499712249324892e-08, "loss": 0.8347, "step": 192 }, { "epoch": 0.0008543981583956793, "grad_norm": 4.234760923428289, "learning_rate": 8.543981583956795e-08, "loss": 1.2779, "step": 193 }, { "epoch": 0.0008588250918588694, "grad_norm": 4.741131344350867, "learning_rate": 8.588250918588695e-08, "loss": 1.3811, "step": 194 }, { "epoch": 0.0008632520253220595, "grad_norm": 4.026914515643028, "learning_rate": 8.632520253220593e-08, "loss": 1.0667, "step": 195 }, { "epoch": 0.0008676789587852494, "grad_norm": 4.48697513509747, "learning_rate": 8.676789587852496e-08, "loss": 1.4192, "step": 196 }, { "epoch": 0.0008721058922484395, "grad_norm": 4.442550101277237, "learning_rate": 8.721058922484396e-08, "loss": 1.203, "step": 197 }, { "epoch": 0.0008765328257116296, "grad_norm": 4.595990899857154, "learning_rate": 8.765328257116297e-08, "loss": 1.3667, "step": 198 }, { "epoch": 0.0008809597591748196, "grad_norm": 3.7438863102185027, "learning_rate": 8.809597591748197e-08, "loss": 0.8973, "step": 199 }, { "epoch": 0.0008853866926380096, "grad_norm": 3.7723025053053183, "learning_rate": 8.853866926380097e-08, "loss": 0.9245, "step": 200 }, { "epoch": 0.0008898136261011997, "grad_norm": 3.4756568348829675, "learning_rate": 8.898136261011999e-08, "loss": 0.7244, "step": 201 }, { "epoch": 0.0008942405595643898, "grad_norm": 4.2590466164621805, "learning_rate": 8.942405595643899e-08, "loss": 1.1038, "step": 202 }, { "epoch": 0.0008986674930275798, "grad_norm": 3.6834401554417595, "learning_rate": 8.986674930275799e-08, "loss": 0.7655, "step": 203 }, { "epoch": 0.0009030944264907698, "grad_norm": 4.5124923033918245, "learning_rate": 9.0309442649077e-08, "loss": 1.2212, "step": 204 }, { "epoch": 0.0009075213599539599, "grad_norm": 4.803372129912394, "learning_rate": 9.0752135995396e-08, "loss": 1.2342, "step": 205 }, { "epoch": 0.0009119482934171499, "grad_norm": 3.3898193463021027, "learning_rate": 9.1194829341715e-08, "loss": 0.7638, "step": 206 }, { "epoch": 0.00091637522688034, "grad_norm": 3.695446116703826, "learning_rate": 9.163752268803401e-08, "loss": 0.7999, "step": 207 }, { "epoch": 0.0009208021603435301, "grad_norm": 3.6371369878986655, "learning_rate": 9.208021603435301e-08, "loss": 1.0147, "step": 208 }, { "epoch": 0.0009252290938067201, "grad_norm": 4.139172699749968, "learning_rate": 9.252290938067201e-08, "loss": 1.0178, "step": 209 }, { "epoch": 0.0009296560272699101, "grad_norm": 4.330746866901724, "learning_rate": 9.296560272699103e-08, "loss": 1.0393, "step": 210 }, { "epoch": 0.0009340829607331002, "grad_norm": 3.967867469824891, "learning_rate": 9.340829607331003e-08, "loss": 1.1892, "step": 211 }, { "epoch": 0.0009385098941962903, "grad_norm": 5.528493706190115, "learning_rate": 9.385098941962903e-08, "loss": 1.6594, "step": 212 }, { "epoch": 0.0009429368276594802, "grad_norm": 4.399363674889973, "learning_rate": 9.429368276594804e-08, "loss": 1.233, "step": 213 }, { "epoch": 0.0009473637611226703, "grad_norm": 4.93846629927231, "learning_rate": 9.473637611226704e-08, "loss": 1.3704, "step": 214 }, { "epoch": 0.0009517906945858604, "grad_norm": 3.9972982487514153, "learning_rate": 9.517906945858605e-08, "loss": 1.3671, "step": 215 }, { "epoch": 0.0009562176280490505, "grad_norm": 3.913321182988454, "learning_rate": 9.562176280490505e-08, "loss": 1.1647, "step": 216 }, { "epoch": 0.0009606445615122404, "grad_norm": 5.042738487786732, "learning_rate": 9.606445615122405e-08, "loss": 1.1731, "step": 217 }, { "epoch": 0.0009650714949754305, "grad_norm": 3.2766603472142806, "learning_rate": 9.650714949754307e-08, "loss": 0.9281, "step": 218 }, { "epoch": 0.0009694984284386206, "grad_norm": 3.648111754879671, "learning_rate": 9.694984284386207e-08, "loss": 1.1372, "step": 219 }, { "epoch": 0.0009739253619018107, "grad_norm": 4.289040168698384, "learning_rate": 9.739253619018107e-08, "loss": 1.306, "step": 220 }, { "epoch": 0.0009783522953650007, "grad_norm": 4.746414894708866, "learning_rate": 9.783522953650008e-08, "loss": 1.4214, "step": 221 }, { "epoch": 0.0009827792288281908, "grad_norm": 3.423817981441968, "learning_rate": 9.827792288281908e-08, "loss": 0.9443, "step": 222 }, { "epoch": 0.0009872061622913807, "grad_norm": 3.678110563248427, "learning_rate": 9.872061622913808e-08, "loss": 0.8544, "step": 223 }, { "epoch": 0.0009916330957545707, "grad_norm": 3.7147682989364, "learning_rate": 9.916330957545709e-08, "loss": 1.1756, "step": 224 }, { "epoch": 0.0009960600292177608, "grad_norm": 3.77227046953526, "learning_rate": 9.96060029217761e-08, "loss": 1.3002, "step": 225 }, { "epoch": 0.0010004869626809509, "grad_norm": 4.255318903581046, "learning_rate": 1.000486962680951e-07, "loss": 1.0834, "step": 226 }, { "epoch": 0.001004913896144141, "grad_norm": 4.834977081058987, "learning_rate": 1.0049138961441411e-07, "loss": 1.4871, "step": 227 }, { "epoch": 0.001009340829607331, "grad_norm": 3.474407398448683, "learning_rate": 1.0093408296073311e-07, "loss": 0.8056, "step": 228 }, { "epoch": 0.001013767763070521, "grad_norm": 3.6341466031524963, "learning_rate": 1.0137677630705211e-07, "loss": 0.9547, "step": 229 }, { "epoch": 0.0010181946965337112, "grad_norm": 4.385875580753956, "learning_rate": 1.0181946965337112e-07, "loss": 1.3187, "step": 230 }, { "epoch": 0.0010226216299969013, "grad_norm": 3.701333014879043, "learning_rate": 1.0226216299969012e-07, "loss": 0.8437, "step": 231 }, { "epoch": 0.0010270485634600911, "grad_norm": 3.6715014057008393, "learning_rate": 1.0270485634600912e-07, "loss": 1.0759, "step": 232 }, { "epoch": 0.0010314754969232812, "grad_norm": 4.149089494576073, "learning_rate": 1.0314754969232813e-07, "loss": 1.194, "step": 233 }, { "epoch": 0.0010359024303864713, "grad_norm": 3.575035050375034, "learning_rate": 1.0359024303864713e-07, "loss": 0.952, "step": 234 }, { "epoch": 0.0010403293638496613, "grad_norm": 4.635027684386291, "learning_rate": 1.0403293638496615e-07, "loss": 1.298, "step": 235 }, { "epoch": 0.0010447562973128514, "grad_norm": 3.69019700940798, "learning_rate": 1.0447562973128515e-07, "loss": 1.1107, "step": 236 }, { "epoch": 0.0010491832307760415, "grad_norm": 5.4918710616783235, "learning_rate": 1.0491832307760415e-07, "loss": 1.8577, "step": 237 }, { "epoch": 0.0010536101642392316, "grad_norm": 3.714404745636483, "learning_rate": 1.0536101642392316e-07, "loss": 0.9446, "step": 238 }, { "epoch": 0.0010580370977024216, "grad_norm": 3.506848367423284, "learning_rate": 1.0580370977024216e-07, "loss": 0.8376, "step": 239 }, { "epoch": 0.0010624640311656115, "grad_norm": 4.535406309185503, "learning_rate": 1.0624640311656116e-07, "loss": 1.143, "step": 240 }, { "epoch": 0.0010668909646288016, "grad_norm": 3.905482530721711, "learning_rate": 1.0668909646288017e-07, "loss": 1.0047, "step": 241 }, { "epoch": 0.0010713178980919916, "grad_norm": 4.033643782359584, "learning_rate": 1.0713178980919917e-07, "loss": 1.3369, "step": 242 }, { "epoch": 0.0010757448315551817, "grad_norm": 3.2940676847600585, "learning_rate": 1.0757448315551817e-07, "loss": 0.8897, "step": 243 }, { "epoch": 0.0010801717650183718, "grad_norm": 3.4401696910547406, "learning_rate": 1.0801717650183719e-07, "loss": 1.0943, "step": 244 }, { "epoch": 0.0010845986984815619, "grad_norm": 4.813877817305067, "learning_rate": 1.0845986984815619e-07, "loss": 1.7411, "step": 245 }, { "epoch": 0.001089025631944752, "grad_norm": 3.3099593020733002, "learning_rate": 1.0890256319447519e-07, "loss": 0.8372, "step": 246 }, { "epoch": 0.001093452565407942, "grad_norm": 3.0743674430742742, "learning_rate": 1.093452565407942e-07, "loss": 0.828, "step": 247 }, { "epoch": 0.0010978794988711319, "grad_norm": 3.6459393185508655, "learning_rate": 1.097879498871132e-07, "loss": 1.0359, "step": 248 }, { "epoch": 0.001102306432334322, "grad_norm": 4.2025843477052955, "learning_rate": 1.102306432334322e-07, "loss": 1.2985, "step": 249 }, { "epoch": 0.001106733365797512, "grad_norm": 4.237121417112237, "learning_rate": 1.1067333657975121e-07, "loss": 1.0858, "step": 250 }, { "epoch": 0.001111160299260702, "grad_norm": 4.854087233486504, "learning_rate": 1.1111602992607021e-07, "loss": 1.5361, "step": 251 }, { "epoch": 0.0011155872327238922, "grad_norm": 3.7824610317138183, "learning_rate": 1.1155872327238924e-07, "loss": 1.0251, "step": 252 }, { "epoch": 0.0011200141661870822, "grad_norm": 4.249071611515854, "learning_rate": 1.1200141661870823e-07, "loss": 1.1876, "step": 253 }, { "epoch": 0.0011244410996502723, "grad_norm": 3.8509402861650988, "learning_rate": 1.1244410996502723e-07, "loss": 1.2338, "step": 254 }, { "epoch": 0.0011288680331134624, "grad_norm": 4.347101303179302, "learning_rate": 1.1288680331134625e-07, "loss": 1.1966, "step": 255 }, { "epoch": 0.0011332949665766525, "grad_norm": 3.7877823048060955, "learning_rate": 1.1332949665766524e-07, "loss": 1.093, "step": 256 }, { "epoch": 0.0011377219000398423, "grad_norm": 3.933704170569825, "learning_rate": 1.1377219000398424e-07, "loss": 1.0412, "step": 257 }, { "epoch": 0.0011421488335030324, "grad_norm": 3.4226282061568067, "learning_rate": 1.1421488335030327e-07, "loss": 1.1096, "step": 258 }, { "epoch": 0.0011465757669662225, "grad_norm": 3.7269791086973916, "learning_rate": 1.1465757669662225e-07, "loss": 0.968, "step": 259 }, { "epoch": 0.0011510027004294125, "grad_norm": 3.8562135662218595, "learning_rate": 1.1510027004294125e-07, "loss": 1.0338, "step": 260 }, { "epoch": 0.0011554296338926026, "grad_norm": 3.63862171813411, "learning_rate": 1.1554296338926028e-07, "loss": 0.8264, "step": 261 }, { "epoch": 0.0011598565673557927, "grad_norm": 4.240792131946323, "learning_rate": 1.1598565673557927e-07, "loss": 1.0625, "step": 262 }, { "epoch": 0.0011642835008189828, "grad_norm": 3.2319869775251755, "learning_rate": 1.1642835008189827e-07, "loss": 0.9288, "step": 263 }, { "epoch": 0.0011687104342821728, "grad_norm": 3.192886384520068, "learning_rate": 1.1687104342821729e-07, "loss": 0.7892, "step": 264 }, { "epoch": 0.0011731373677453627, "grad_norm": 3.7346192159965645, "learning_rate": 1.1731373677453629e-07, "loss": 1.0587, "step": 265 }, { "epoch": 0.0011775643012085528, "grad_norm": 4.079412021428963, "learning_rate": 1.1775643012085528e-07, "loss": 0.9738, "step": 266 }, { "epoch": 0.0011819912346717428, "grad_norm": 4.3704136938950295, "learning_rate": 1.1819912346717431e-07, "loss": 1.4164, "step": 267 }, { "epoch": 0.001186418168134933, "grad_norm": 4.047025057546277, "learning_rate": 1.1864181681349331e-07, "loss": 1.1281, "step": 268 }, { "epoch": 0.001190845101598123, "grad_norm": 3.988562116352726, "learning_rate": 1.190845101598123e-07, "loss": 1.1225, "step": 269 }, { "epoch": 0.001195272035061313, "grad_norm": 4.055332340620369, "learning_rate": 1.195272035061313e-07, "loss": 1.0863, "step": 270 }, { "epoch": 0.0011996989685245031, "grad_norm": 4.005097829773642, "learning_rate": 1.199698968524503e-07, "loss": 1.1754, "step": 271 }, { "epoch": 0.0012041259019876932, "grad_norm": 4.213375970592685, "learning_rate": 1.2041259019876933e-07, "loss": 1.1994, "step": 272 }, { "epoch": 0.0012085528354508833, "grad_norm": 3.9235888860864043, "learning_rate": 1.2085528354508833e-07, "loss": 1.1137, "step": 273 }, { "epoch": 0.0012129797689140731, "grad_norm": 3.934911785713089, "learning_rate": 1.2129797689140733e-07, "loss": 1.0395, "step": 274 }, { "epoch": 0.0012174067023772632, "grad_norm": 4.128789229767847, "learning_rate": 1.2174067023772633e-07, "loss": 1.4952, "step": 275 }, { "epoch": 0.0012218336358404533, "grad_norm": 4.183758313613147, "learning_rate": 1.2218336358404533e-07, "loss": 1.5614, "step": 276 }, { "epoch": 0.0012262605693036434, "grad_norm": 3.6661778688468183, "learning_rate": 1.2262605693036433e-07, "loss": 0.9421, "step": 277 }, { "epoch": 0.0012306875027668334, "grad_norm": 3.654189223920407, "learning_rate": 1.2306875027668336e-07, "loss": 0.9881, "step": 278 }, { "epoch": 0.0012351144362300235, "grad_norm": 3.843607675555305, "learning_rate": 1.2351144362300236e-07, "loss": 0.9594, "step": 279 }, { "epoch": 0.0012395413696932136, "grad_norm": 4.12818672834407, "learning_rate": 1.2395413696932136e-07, "loss": 1.4129, "step": 280 }, { "epoch": 0.0012439683031564037, "grad_norm": 4.098154660848775, "learning_rate": 1.2439683031564036e-07, "loss": 1.2284, "step": 281 }, { "epoch": 0.0012483952366195935, "grad_norm": 3.567129159862303, "learning_rate": 1.2483952366195936e-07, "loss": 1.1789, "step": 282 }, { "epoch": 0.0012528221700827836, "grad_norm": 3.429464893748552, "learning_rate": 1.2528221700827836e-07, "loss": 0.9905, "step": 283 }, { "epoch": 0.0012572491035459737, "grad_norm": 3.9800180154211717, "learning_rate": 1.2572491035459739e-07, "loss": 0.8986, "step": 284 }, { "epoch": 0.0012616760370091637, "grad_norm": 4.174688707187563, "learning_rate": 1.2616760370091639e-07, "loss": 1.1584, "step": 285 }, { "epoch": 0.0012661029704723538, "grad_norm": 3.367308396656056, "learning_rate": 1.2661029704723539e-07, "loss": 0.9339, "step": 286 }, { "epoch": 0.0012705299039355439, "grad_norm": 3.871254673685015, "learning_rate": 1.270529903935544e-07, "loss": 1.1917, "step": 287 }, { "epoch": 0.001274956837398734, "grad_norm": 3.995326460109125, "learning_rate": 1.274956837398734e-07, "loss": 1.3279, "step": 288 }, { "epoch": 0.001279383770861924, "grad_norm": 3.195226117783994, "learning_rate": 1.2793837708619241e-07, "loss": 0.8889, "step": 289 }, { "epoch": 0.0012838107043251139, "grad_norm": 3.516967540424851, "learning_rate": 1.2838107043251141e-07, "loss": 1.0617, "step": 290 }, { "epoch": 0.001288237637788304, "grad_norm": 3.7703530899045963, "learning_rate": 1.2882376377883041e-07, "loss": 1.1034, "step": 291 }, { "epoch": 0.001292664571251494, "grad_norm": 4.035069723360983, "learning_rate": 1.2926645712514944e-07, "loss": 1.5571, "step": 292 }, { "epoch": 0.001297091504714684, "grad_norm": 4.592540108073298, "learning_rate": 1.2970915047146841e-07, "loss": 1.3525, "step": 293 }, { "epoch": 0.0013015184381778742, "grad_norm": 4.123940631200102, "learning_rate": 1.3015184381778741e-07, "loss": 1.199, "step": 294 }, { "epoch": 0.0013059453716410643, "grad_norm": 4.213202001335259, "learning_rate": 1.3059453716410644e-07, "loss": 1.1376, "step": 295 }, { "epoch": 0.0013103723051042543, "grad_norm": 4.604353135567301, "learning_rate": 1.3103723051042544e-07, "loss": 1.3266, "step": 296 }, { "epoch": 0.0013147992385674444, "grad_norm": 3.8245220232543513, "learning_rate": 1.3147992385674444e-07, "loss": 1.1138, "step": 297 }, { "epoch": 0.0013192261720306345, "grad_norm": 4.829771876194608, "learning_rate": 1.3192261720306347e-07, "loss": 1.6497, "step": 298 }, { "epoch": 0.0013236531054938243, "grad_norm": 4.335645430269126, "learning_rate": 1.3236531054938244e-07, "loss": 1.0315, "step": 299 }, { "epoch": 0.0013280800389570144, "grad_norm": 3.678500015228411, "learning_rate": 1.3280800389570144e-07, "loss": 1.0875, "step": 300 }, { "epoch": 0.0013325069724202045, "grad_norm": 3.3602031428747337, "learning_rate": 1.3325069724202047e-07, "loss": 0.872, "step": 301 }, { "epoch": 0.0013369339058833946, "grad_norm": 3.9289623188069136, "learning_rate": 1.3369339058833947e-07, "loss": 1.3096, "step": 302 }, { "epoch": 0.0013413608393465846, "grad_norm": 3.4532849402534396, "learning_rate": 1.3413608393465847e-07, "loss": 1.0959, "step": 303 }, { "epoch": 0.0013457877728097747, "grad_norm": 3.5286126906248403, "learning_rate": 1.345787772809775e-07, "loss": 0.8438, "step": 304 }, { "epoch": 0.0013502147062729648, "grad_norm": 3.0422785272420767, "learning_rate": 1.350214706272965e-07, "loss": 0.753, "step": 305 }, { "epoch": 0.0013546416397361549, "grad_norm": 2.989477425471815, "learning_rate": 1.3546416397361547e-07, "loss": 0.6838, "step": 306 }, { "epoch": 0.0013590685731993447, "grad_norm": 4.4202552197202865, "learning_rate": 1.359068573199345e-07, "loss": 1.4284, "step": 307 }, { "epoch": 0.0013634955066625348, "grad_norm": 3.5797681643196353, "learning_rate": 1.363495506662535e-07, "loss": 1.1242, "step": 308 }, { "epoch": 0.0013679224401257249, "grad_norm": 4.257099569261037, "learning_rate": 1.3679224401257252e-07, "loss": 1.2902, "step": 309 }, { "epoch": 0.001372349373588915, "grad_norm": 4.109767598332436, "learning_rate": 1.3723493735889152e-07, "loss": 1.0669, "step": 310 }, { "epoch": 0.001376776307052105, "grad_norm": 4.061535939889984, "learning_rate": 1.3767763070521052e-07, "loss": 1.5242, "step": 311 }, { "epoch": 0.001381203240515295, "grad_norm": 5.109320795885322, "learning_rate": 1.3812032405152952e-07, "loss": 1.6323, "step": 312 }, { "epoch": 0.0013856301739784852, "grad_norm": 4.963449147040941, "learning_rate": 1.3856301739784852e-07, "loss": 1.5461, "step": 313 }, { "epoch": 0.0013900571074416752, "grad_norm": 3.7478564386546642, "learning_rate": 1.3900571074416752e-07, "loss": 1.1299, "step": 314 }, { "epoch": 0.0013944840409048653, "grad_norm": 3.5584679299749826, "learning_rate": 1.3944840409048655e-07, "loss": 1.2658, "step": 315 }, { "epoch": 0.0013989109743680552, "grad_norm": 3.6043395246773127, "learning_rate": 1.3989109743680555e-07, "loss": 1.1368, "step": 316 }, { "epoch": 0.0014033379078312452, "grad_norm": 3.2630982303703098, "learning_rate": 1.4033379078312455e-07, "loss": 0.8201, "step": 317 }, { "epoch": 0.0014077648412944353, "grad_norm": 3.8039785830290023, "learning_rate": 1.4077648412944355e-07, "loss": 0.9182, "step": 318 }, { "epoch": 0.0014121917747576254, "grad_norm": 3.085150509269989, "learning_rate": 1.4121917747576255e-07, "loss": 0.9878, "step": 319 }, { "epoch": 0.0014166187082208155, "grad_norm": 3.295199405353629, "learning_rate": 1.4166187082208155e-07, "loss": 0.7504, "step": 320 }, { "epoch": 0.0014210456416840055, "grad_norm": 3.3682755562472497, "learning_rate": 1.4210456416840057e-07, "loss": 1.1055, "step": 321 }, { "epoch": 0.0014254725751471956, "grad_norm": 3.5474335117545275, "learning_rate": 1.4254725751471957e-07, "loss": 1.0322, "step": 322 }, { "epoch": 0.0014298995086103857, "grad_norm": 3.1155135263336384, "learning_rate": 1.4298995086103857e-07, "loss": 0.9962, "step": 323 }, { "epoch": 0.0014343264420735755, "grad_norm": 3.9292095058617993, "learning_rate": 1.4343264420735757e-07, "loss": 1.3675, "step": 324 }, { "epoch": 0.0014387533755367656, "grad_norm": 3.6938179362721346, "learning_rate": 1.4387533755367657e-07, "loss": 0.9701, "step": 325 }, { "epoch": 0.0014431803089999557, "grad_norm": 3.472315509196415, "learning_rate": 1.443180308999956e-07, "loss": 0.9685, "step": 326 }, { "epoch": 0.0014476072424631458, "grad_norm": 3.3190952980396076, "learning_rate": 1.447607242463146e-07, "loss": 0.7738, "step": 327 }, { "epoch": 0.0014520341759263358, "grad_norm": 4.253214910982907, "learning_rate": 1.452034175926336e-07, "loss": 1.3862, "step": 328 }, { "epoch": 0.001456461109389526, "grad_norm": 4.180993474972666, "learning_rate": 1.456461109389526e-07, "loss": 1.3491, "step": 329 }, { "epoch": 0.001460888042852716, "grad_norm": 2.958867140128297, "learning_rate": 1.460888042852716e-07, "loss": 0.9123, "step": 330 }, { "epoch": 0.001465314976315906, "grad_norm": 3.2373423777933676, "learning_rate": 1.465314976315906e-07, "loss": 0.7485, "step": 331 }, { "epoch": 0.001469741909779096, "grad_norm": 3.2861238194847817, "learning_rate": 1.4697419097790963e-07, "loss": 1.0298, "step": 332 }, { "epoch": 0.001474168843242286, "grad_norm": 3.3019322591548845, "learning_rate": 1.4741688432422863e-07, "loss": 0.9935, "step": 333 }, { "epoch": 0.001478595776705476, "grad_norm": 3.5561537807799604, "learning_rate": 1.4785957767054763e-07, "loss": 0.9396, "step": 334 }, { "epoch": 0.0014830227101686661, "grad_norm": 3.630305613049663, "learning_rate": 1.4830227101686663e-07, "loss": 0.9803, "step": 335 }, { "epoch": 0.0014874496436318562, "grad_norm": 3.2156490371666804, "learning_rate": 1.4874496436318563e-07, "loss": 1.0004, "step": 336 }, { "epoch": 0.0014918765770950463, "grad_norm": 3.3733910120721236, "learning_rate": 1.4918765770950463e-07, "loss": 1.2844, "step": 337 }, { "epoch": 0.0014963035105582364, "grad_norm": 3.8396023839254383, "learning_rate": 1.4963035105582365e-07, "loss": 0.9258, "step": 338 }, { "epoch": 0.0015007304440214264, "grad_norm": 4.388087639878303, "learning_rate": 1.5007304440214265e-07, "loss": 1.2164, "step": 339 }, { "epoch": 0.0015051573774846165, "grad_norm": 3.7902318587158477, "learning_rate": 1.5051573774846165e-07, "loss": 1.166, "step": 340 }, { "epoch": 0.0015095843109478064, "grad_norm": 3.4344299527841353, "learning_rate": 1.5095843109478065e-07, "loss": 0.9911, "step": 341 }, { "epoch": 0.0015140112444109964, "grad_norm": 3.3827317226159046, "learning_rate": 1.5140112444109965e-07, "loss": 1.1769, "step": 342 }, { "epoch": 0.0015184381778741865, "grad_norm": 4.084327098229965, "learning_rate": 1.5184381778741865e-07, "loss": 1.2644, "step": 343 }, { "epoch": 0.0015228651113373766, "grad_norm": 3.617976258306383, "learning_rate": 1.5228651113373768e-07, "loss": 0.8933, "step": 344 }, { "epoch": 0.0015272920448005667, "grad_norm": 3.2413994804573565, "learning_rate": 1.5272920448005668e-07, "loss": 1.187, "step": 345 }, { "epoch": 0.0015317189782637567, "grad_norm": 3.610523478634205, "learning_rate": 1.5317189782637568e-07, "loss": 1.3195, "step": 346 }, { "epoch": 0.0015361459117269468, "grad_norm": 3.794700652808914, "learning_rate": 1.5361459117269468e-07, "loss": 1.2012, "step": 347 }, { "epoch": 0.0015405728451901369, "grad_norm": 3.6485982697003094, "learning_rate": 1.5405728451901368e-07, "loss": 1.0472, "step": 348 }, { "epoch": 0.0015449997786533267, "grad_norm": 3.029179417428395, "learning_rate": 1.544999778653327e-07, "loss": 0.8201, "step": 349 }, { "epoch": 0.0015494267121165168, "grad_norm": 3.9983270994077413, "learning_rate": 1.549426712116517e-07, "loss": 0.7112, "step": 350 }, { "epoch": 0.0015538536455797069, "grad_norm": 3.58043266523187, "learning_rate": 1.553853645579707e-07, "loss": 1.1357, "step": 351 }, { "epoch": 0.001558280579042897, "grad_norm": 3.454826468791413, "learning_rate": 1.558280579042897e-07, "loss": 0.8859, "step": 352 }, { "epoch": 0.001562707512506087, "grad_norm": 3.986256806650854, "learning_rate": 1.5627075125060873e-07, "loss": 1.5062, "step": 353 }, { "epoch": 0.001567134445969277, "grad_norm": 4.910233995668074, "learning_rate": 1.567134445969277e-07, "loss": 1.5855, "step": 354 }, { "epoch": 0.0015715613794324672, "grad_norm": 3.8561921719886394, "learning_rate": 1.5715613794324673e-07, "loss": 1.1073, "step": 355 }, { "epoch": 0.0015759883128956573, "grad_norm": 3.6925685298698574, "learning_rate": 1.5759883128956576e-07, "loss": 1.2798, "step": 356 }, { "epoch": 0.0015804152463588473, "grad_norm": 2.7364799457740485, "learning_rate": 1.5804152463588473e-07, "loss": 0.9601, "step": 357 }, { "epoch": 0.0015848421798220372, "grad_norm": 2.88528232395208, "learning_rate": 1.5848421798220373e-07, "loss": 0.8704, "step": 358 }, { "epoch": 0.0015892691132852273, "grad_norm": 3.4733773232758254, "learning_rate": 1.5892691132852276e-07, "loss": 1.2772, "step": 359 }, { "epoch": 0.0015936960467484173, "grad_norm": 3.168792596446061, "learning_rate": 1.5936960467484173e-07, "loss": 1.0262, "step": 360 }, { "epoch": 0.0015981229802116074, "grad_norm": 4.030471939420363, "learning_rate": 1.5981229802116076e-07, "loss": 0.857, "step": 361 }, { "epoch": 0.0016025499136747975, "grad_norm": 3.505626810700477, "learning_rate": 1.6025499136747979e-07, "loss": 1.1291, "step": 362 }, { "epoch": 0.0016069768471379876, "grad_norm": 3.36233525418631, "learning_rate": 1.6069768471379876e-07, "loss": 1.1876, "step": 363 }, { "epoch": 0.0016114037806011776, "grad_norm": 4.532919654516488, "learning_rate": 1.6114037806011776e-07, "loss": 1.3316, "step": 364 }, { "epoch": 0.0016158307140643677, "grad_norm": 3.0837285552328138, "learning_rate": 1.6158307140643679e-07, "loss": 0.8768, "step": 365 }, { "epoch": 0.0016202576475275576, "grad_norm": 2.8514808666170954, "learning_rate": 1.6202576475275576e-07, "loss": 0.6882, "step": 366 }, { "epoch": 0.0016246845809907476, "grad_norm": 3.5351647043773244, "learning_rate": 1.6246845809907479e-07, "loss": 1.1515, "step": 367 }, { "epoch": 0.0016291115144539377, "grad_norm": 3.520580508662075, "learning_rate": 1.629111514453938e-07, "loss": 0.9559, "step": 368 }, { "epoch": 0.0016335384479171278, "grad_norm": 3.002287647814083, "learning_rate": 1.633538447917128e-07, "loss": 1.0069, "step": 369 }, { "epoch": 0.0016379653813803179, "grad_norm": 3.2567618795202495, "learning_rate": 1.637965381380318e-07, "loss": 1.0593, "step": 370 }, { "epoch": 0.001642392314843508, "grad_norm": 3.3990281906878885, "learning_rate": 1.6423923148435081e-07, "loss": 1.1082, "step": 371 }, { "epoch": 0.001646819248306698, "grad_norm": 3.0316129934542637, "learning_rate": 1.646819248306698e-07, "loss": 0.9024, "step": 372 }, { "epoch": 0.001651246181769888, "grad_norm": 3.1947841172283886, "learning_rate": 1.6512461817698881e-07, "loss": 0.5818, "step": 373 }, { "epoch": 0.0016556731152330782, "grad_norm": 3.997613334862201, "learning_rate": 1.6556731152330784e-07, "loss": 1.4954, "step": 374 }, { "epoch": 0.001660100048696268, "grad_norm": 3.2791438144296152, "learning_rate": 1.6601000486962681e-07, "loss": 0.8568, "step": 375 }, { "epoch": 0.001664526982159458, "grad_norm": 3.3594598575696204, "learning_rate": 1.6645269821594581e-07, "loss": 1.2304, "step": 376 }, { "epoch": 0.0016689539156226482, "grad_norm": 3.7838500301833653, "learning_rate": 1.6689539156226484e-07, "loss": 1.3588, "step": 377 }, { "epoch": 0.0016733808490858382, "grad_norm": 3.3680188502823816, "learning_rate": 1.6733808490858381e-07, "loss": 0.8434, "step": 378 }, { "epoch": 0.0016778077825490283, "grad_norm": 4.512155166195103, "learning_rate": 1.6778077825490284e-07, "loss": 1.5315, "step": 379 }, { "epoch": 0.0016822347160122184, "grad_norm": 3.7079654201152503, "learning_rate": 1.6822347160122187e-07, "loss": 1.3108, "step": 380 }, { "epoch": 0.0016866616494754085, "grad_norm": 2.965843399991668, "learning_rate": 1.6866616494754084e-07, "loss": 0.8285, "step": 381 }, { "epoch": 0.0016910885829385985, "grad_norm": 3.6802311149932594, "learning_rate": 1.6910885829385987e-07, "loss": 0.9305, "step": 382 }, { "epoch": 0.0016955155164017884, "grad_norm": 3.463697899299419, "learning_rate": 1.6955155164017887e-07, "loss": 1.181, "step": 383 }, { "epoch": 0.0016999424498649785, "grad_norm": 3.0988961732344626, "learning_rate": 1.6999424498649784e-07, "loss": 0.937, "step": 384 }, { "epoch": 0.0017043693833281685, "grad_norm": 3.222705106337289, "learning_rate": 1.7043693833281687e-07, "loss": 1.1975, "step": 385 }, { "epoch": 0.0017087963167913586, "grad_norm": 3.9863796047029862, "learning_rate": 1.708796316791359e-07, "loss": 1.2744, "step": 386 }, { "epoch": 0.0017132232502545487, "grad_norm": 3.002793585909969, "learning_rate": 1.7132232502545487e-07, "loss": 1.1571, "step": 387 }, { "epoch": 0.0017176501837177388, "grad_norm": 4.197410559629243, "learning_rate": 1.717650183717739e-07, "loss": 1.4168, "step": 388 }, { "epoch": 0.0017220771171809288, "grad_norm": 3.249326007383332, "learning_rate": 1.722077117180929e-07, "loss": 1.1885, "step": 389 }, { "epoch": 0.001726504050644119, "grad_norm": 3.717932889524058, "learning_rate": 1.7265040506441187e-07, "loss": 1.1415, "step": 390 }, { "epoch": 0.0017309309841073088, "grad_norm": 3.5126018681871116, "learning_rate": 1.730930984107309e-07, "loss": 1.0056, "step": 391 }, { "epoch": 0.0017353579175704988, "grad_norm": 3.3410865446840603, "learning_rate": 1.7353579175704992e-07, "loss": 1.1957, "step": 392 }, { "epoch": 0.001739784851033689, "grad_norm": 3.061120191894057, "learning_rate": 1.7397848510336892e-07, "loss": 1.1285, "step": 393 }, { "epoch": 0.001744211784496879, "grad_norm": 3.6157925349357103, "learning_rate": 1.7442117844968792e-07, "loss": 0.9914, "step": 394 }, { "epoch": 0.001748638717960069, "grad_norm": 2.8986638797618633, "learning_rate": 1.7486387179600692e-07, "loss": 0.8281, "step": 395 }, { "epoch": 0.0017530656514232591, "grad_norm": 3.1161057980947904, "learning_rate": 1.7530656514232595e-07, "loss": 0.8546, "step": 396 }, { "epoch": 0.0017574925848864492, "grad_norm": 3.6716643024159925, "learning_rate": 1.7574925848864492e-07, "loss": 0.7828, "step": 397 }, { "epoch": 0.0017619195183496393, "grad_norm": 3.2508235338650526, "learning_rate": 1.7619195183496395e-07, "loss": 1.1226, "step": 398 }, { "epoch": 0.0017663464518128294, "grad_norm": 2.996618815826796, "learning_rate": 1.7663464518128295e-07, "loss": 0.9606, "step": 399 }, { "epoch": 0.0017707733852760192, "grad_norm": 3.6956480262334006, "learning_rate": 1.7707733852760195e-07, "loss": 1.2592, "step": 400 }, { "epoch": 0.0017752003187392093, "grad_norm": 3.8162047978526936, "learning_rate": 1.7752003187392095e-07, "loss": 1.2579, "step": 401 }, { "epoch": 0.0017796272522023994, "grad_norm": 2.8865728626002887, "learning_rate": 1.7796272522023997e-07, "loss": 0.9204, "step": 402 }, { "epoch": 0.0017840541856655894, "grad_norm": 3.65392940390051, "learning_rate": 1.7840541856655895e-07, "loss": 1.2976, "step": 403 }, { "epoch": 0.0017884811191287795, "grad_norm": 3.6057812892974153, "learning_rate": 1.7884811191287797e-07, "loss": 1.0742, "step": 404 }, { "epoch": 0.0017929080525919696, "grad_norm": 3.772641401383665, "learning_rate": 1.7929080525919697e-07, "loss": 1.4825, "step": 405 }, { "epoch": 0.0017973349860551597, "grad_norm": 3.2293829906799405, "learning_rate": 1.7973349860551597e-07, "loss": 0.9925, "step": 406 }, { "epoch": 0.0018017619195183497, "grad_norm": 2.9945851995440056, "learning_rate": 1.8017619195183497e-07, "loss": 1.0364, "step": 407 }, { "epoch": 0.0018061888529815396, "grad_norm": 3.0475677257438814, "learning_rate": 1.80618885298154e-07, "loss": 0.8933, "step": 408 }, { "epoch": 0.0018106157864447297, "grad_norm": 3.0203064486629647, "learning_rate": 1.8106157864447297e-07, "loss": 0.7252, "step": 409 }, { "epoch": 0.0018150427199079197, "grad_norm": 2.8715092436543146, "learning_rate": 1.81504271990792e-07, "loss": 0.9711, "step": 410 }, { "epoch": 0.0018194696533711098, "grad_norm": 3.74586339372135, "learning_rate": 1.81946965337111e-07, "loss": 0.9282, "step": 411 }, { "epoch": 0.0018238965868342999, "grad_norm": 3.9777700405485508, "learning_rate": 1.8238965868343e-07, "loss": 0.7408, "step": 412 }, { "epoch": 0.00182832352029749, "grad_norm": 2.800993492532208, "learning_rate": 1.82832352029749e-07, "loss": 0.9191, "step": 413 }, { "epoch": 0.00183275045376068, "grad_norm": 3.294786929989752, "learning_rate": 1.8327504537606803e-07, "loss": 1.0503, "step": 414 }, { "epoch": 0.00183717738722387, "grad_norm": 3.1433578845474774, "learning_rate": 1.83717738722387e-07, "loss": 1.1593, "step": 415 }, { "epoch": 0.0018416043206870602, "grad_norm": 4.845045043807783, "learning_rate": 1.8416043206870603e-07, "loss": 1.3756, "step": 416 }, { "epoch": 0.00184603125415025, "grad_norm": 2.6956404197311934, "learning_rate": 1.8460312541502503e-07, "loss": 0.714, "step": 417 }, { "epoch": 0.0018504581876134401, "grad_norm": 3.3131539167381625, "learning_rate": 1.8504581876134403e-07, "loss": 1.0455, "step": 418 }, { "epoch": 0.0018548851210766302, "grad_norm": 3.6213907401738017, "learning_rate": 1.8548851210766303e-07, "loss": 1.277, "step": 419 }, { "epoch": 0.0018593120545398203, "grad_norm": 4.934412048140545, "learning_rate": 1.8593120545398205e-07, "loss": 1.2217, "step": 420 }, { "epoch": 0.0018637389880030103, "grad_norm": 3.900291758851159, "learning_rate": 1.8637389880030103e-07, "loss": 1.4719, "step": 421 }, { "epoch": 0.0018681659214662004, "grad_norm": 3.1296824158963235, "learning_rate": 1.8681659214662005e-07, "loss": 1.0959, "step": 422 }, { "epoch": 0.0018725928549293905, "grad_norm": 3.1691608965296383, "learning_rate": 1.8725928549293905e-07, "loss": 0.9498, "step": 423 }, { "epoch": 0.0018770197883925806, "grad_norm": 3.3919087634324097, "learning_rate": 1.8770197883925805e-07, "loss": 1.0239, "step": 424 }, { "epoch": 0.0018814467218557704, "grad_norm": 3.1422469174964953, "learning_rate": 1.8814467218557705e-07, "loss": 0.977, "step": 425 }, { "epoch": 0.0018858736553189605, "grad_norm": 3.0380379700460955, "learning_rate": 1.8858736553189608e-07, "loss": 1.0755, "step": 426 }, { "epoch": 0.0018903005887821506, "grad_norm": 3.6746652436788088, "learning_rate": 1.8903005887821505e-07, "loss": 0.8716, "step": 427 }, { "epoch": 0.0018947275222453406, "grad_norm": 3.3968158030857856, "learning_rate": 1.8947275222453408e-07, "loss": 0.9074, "step": 428 }, { "epoch": 0.0018991544557085307, "grad_norm": 2.841238834976417, "learning_rate": 1.8991544557085308e-07, "loss": 0.7939, "step": 429 }, { "epoch": 0.0019035813891717208, "grad_norm": 3.661116213862427, "learning_rate": 1.903581389171721e-07, "loss": 1.2139, "step": 430 }, { "epoch": 0.0019080083226349109, "grad_norm": 3.4399978097625294, "learning_rate": 1.9080083226349108e-07, "loss": 1.1731, "step": 431 }, { "epoch": 0.001912435256098101, "grad_norm": 3.684398826527696, "learning_rate": 1.912435256098101e-07, "loss": 1.2573, "step": 432 }, { "epoch": 0.0019168621895612908, "grad_norm": 3.120140114521254, "learning_rate": 1.9168621895612913e-07, "loss": 0.6649, "step": 433 }, { "epoch": 0.0019212891230244809, "grad_norm": 4.020088870672955, "learning_rate": 1.921289123024481e-07, "loss": 1.4417, "step": 434 }, { "epoch": 0.001925716056487671, "grad_norm": 3.258912997471804, "learning_rate": 1.925716056487671e-07, "loss": 1.2134, "step": 435 }, { "epoch": 0.001930142989950861, "grad_norm": 3.2460276788169757, "learning_rate": 1.9301429899508613e-07, "loss": 0.9611, "step": 436 }, { "epoch": 0.001934569923414051, "grad_norm": 3.0842982576687636, "learning_rate": 1.934569923414051e-07, "loss": 0.8548, "step": 437 }, { "epoch": 0.0019389968568772412, "grad_norm": 3.052461310547902, "learning_rate": 1.9389968568772413e-07, "loss": 0.8808, "step": 438 }, { "epoch": 0.0019434237903404312, "grad_norm": 3.9006481437862717, "learning_rate": 1.9434237903404316e-07, "loss": 0.8362, "step": 439 }, { "epoch": 0.0019478507238036213, "grad_norm": 3.275956391167818, "learning_rate": 1.9478507238036213e-07, "loss": 0.5879, "step": 440 }, { "epoch": 0.0019522776572668114, "grad_norm": 2.9173878698407165, "learning_rate": 1.9522776572668113e-07, "loss": 0.9669, "step": 441 }, { "epoch": 0.0019567045907300015, "grad_norm": 3.8290460396318915, "learning_rate": 1.9567045907300016e-07, "loss": 1.0805, "step": 442 }, { "epoch": 0.0019611315241931915, "grad_norm": 3.8081121230064165, "learning_rate": 1.9611315241931913e-07, "loss": 1.216, "step": 443 }, { "epoch": 0.0019655584576563816, "grad_norm": 3.2680473038078732, "learning_rate": 1.9655584576563816e-07, "loss": 1.0171, "step": 444 }, { "epoch": 0.0019699853911195717, "grad_norm": 3.257654690203894, "learning_rate": 1.9699853911195719e-07, "loss": 0.6871, "step": 445 }, { "epoch": 0.0019744123245827613, "grad_norm": 3.1121216446516797, "learning_rate": 1.9744123245827616e-07, "loss": 1.026, "step": 446 }, { "epoch": 0.0019788392580459514, "grad_norm": 3.649135092442161, "learning_rate": 1.9788392580459516e-07, "loss": 1.0415, "step": 447 }, { "epoch": 0.0019832661915091415, "grad_norm": 4.352116652784347, "learning_rate": 1.9832661915091419e-07, "loss": 1.3148, "step": 448 }, { "epoch": 0.0019876931249723315, "grad_norm": 3.2217874350703357, "learning_rate": 1.9876931249723316e-07, "loss": 0.9035, "step": 449 }, { "epoch": 0.0019921200584355216, "grad_norm": 3.00445276372367, "learning_rate": 1.992120058435522e-07, "loss": 0.7218, "step": 450 }, { "epoch": 0.0019965469918987117, "grad_norm": 4.288524876627568, "learning_rate": 1.9965469918987121e-07, "loss": 1.0995, "step": 451 }, { "epoch": 0.0020009739253619018, "grad_norm": 2.69809037385749, "learning_rate": 2.000973925361902e-07, "loss": 0.8125, "step": 452 }, { "epoch": 0.002005400858825092, "grad_norm": 3.5664985310160784, "learning_rate": 2.005400858825092e-07, "loss": 1.0764, "step": 453 }, { "epoch": 0.002009827792288282, "grad_norm": 2.7377442421844527, "learning_rate": 2.0098277922882821e-07, "loss": 0.8891, "step": 454 }, { "epoch": 0.002014254725751472, "grad_norm": 3.04216553969827, "learning_rate": 2.014254725751472e-07, "loss": 0.9656, "step": 455 }, { "epoch": 0.002018681659214662, "grad_norm": 3.975291986599011, "learning_rate": 2.0186816592146621e-07, "loss": 0.9539, "step": 456 }, { "epoch": 0.002023108592677852, "grad_norm": 2.7811833474659884, "learning_rate": 2.0231085926778524e-07, "loss": 0.8497, "step": 457 }, { "epoch": 0.002027535526141042, "grad_norm": 3.9681943466471017, "learning_rate": 2.0275355261410421e-07, "loss": 1.2466, "step": 458 }, { "epoch": 0.0020319624596042323, "grad_norm": 3.516071821815389, "learning_rate": 2.0319624596042324e-07, "loss": 1.0069, "step": 459 }, { "epoch": 0.0020363893930674224, "grad_norm": 2.9226189586448625, "learning_rate": 2.0363893930674224e-07, "loss": 0.8658, "step": 460 }, { "epoch": 0.0020408163265306124, "grad_norm": 4.83952747736377, "learning_rate": 2.0408163265306121e-07, "loss": 0.9618, "step": 461 }, { "epoch": 0.0020452432599938025, "grad_norm": 3.248620358294469, "learning_rate": 2.0452432599938024e-07, "loss": 1.1817, "step": 462 }, { "epoch": 0.002049670193456992, "grad_norm": 3.417963022473675, "learning_rate": 2.0496701934569927e-07, "loss": 0.6403, "step": 463 }, { "epoch": 0.0020540971269201822, "grad_norm": 3.2609349910165153, "learning_rate": 2.0540971269201824e-07, "loss": 1.3112, "step": 464 }, { "epoch": 0.0020585240603833723, "grad_norm": 5.05272709783331, "learning_rate": 2.0585240603833727e-07, "loss": 1.7223, "step": 465 }, { "epoch": 0.0020629509938465624, "grad_norm": 3.201652516879931, "learning_rate": 2.0629509938465627e-07, "loss": 1.0589, "step": 466 }, { "epoch": 0.0020673779273097524, "grad_norm": 3.3979765770834263, "learning_rate": 2.067377927309753e-07, "loss": 0.8076, "step": 467 }, { "epoch": 0.0020718048607729425, "grad_norm": 2.874683451431448, "learning_rate": 2.0718048607729427e-07, "loss": 0.8268, "step": 468 }, { "epoch": 0.0020762317942361326, "grad_norm": 3.0925238418435512, "learning_rate": 2.076231794236133e-07, "loss": 0.9983, "step": 469 }, { "epoch": 0.0020806587276993227, "grad_norm": 4.904317664280653, "learning_rate": 2.080658727699323e-07, "loss": 1.402, "step": 470 }, { "epoch": 0.0020850856611625127, "grad_norm": 4.1250256370861305, "learning_rate": 2.085085661162513e-07, "loss": 1.5926, "step": 471 }, { "epoch": 0.002089512594625703, "grad_norm": 3.2635453799523533, "learning_rate": 2.089512594625703e-07, "loss": 1.2824, "step": 472 }, { "epoch": 0.002093939528088893, "grad_norm": 3.123232118494566, "learning_rate": 2.0939395280888932e-07, "loss": 1.1574, "step": 473 }, { "epoch": 0.002098366461552083, "grad_norm": 2.9914621053663004, "learning_rate": 2.098366461552083e-07, "loss": 0.6947, "step": 474 }, { "epoch": 0.002102793395015273, "grad_norm": 3.3707057054872753, "learning_rate": 2.1027933950152732e-07, "loss": 1.1394, "step": 475 }, { "epoch": 0.002107220328478463, "grad_norm": 2.9393044720730823, "learning_rate": 2.1072203284784632e-07, "loss": 1.1386, "step": 476 }, { "epoch": 0.002111647261941653, "grad_norm": 3.686893408841008, "learning_rate": 2.1116472619416532e-07, "loss": 1.3939, "step": 477 }, { "epoch": 0.0021160741954048433, "grad_norm": 3.42178840601771, "learning_rate": 2.1160741954048432e-07, "loss": 1.0838, "step": 478 }, { "epoch": 0.0021205011288680333, "grad_norm": 3.7722350199927135, "learning_rate": 2.1205011288680335e-07, "loss": 1.4292, "step": 479 }, { "epoch": 0.002124928062331223, "grad_norm": 2.9731649357923096, "learning_rate": 2.1249280623312232e-07, "loss": 0.9876, "step": 480 }, { "epoch": 0.002129354995794413, "grad_norm": 3.958495989298523, "learning_rate": 2.1293549957944135e-07, "loss": 1.3529, "step": 481 }, { "epoch": 0.002133781929257603, "grad_norm": 2.7858107698443595, "learning_rate": 2.1337819292576035e-07, "loss": 0.9043, "step": 482 }, { "epoch": 0.002138208862720793, "grad_norm": 2.9152194986853455, "learning_rate": 2.1382088627207935e-07, "loss": 0.9165, "step": 483 }, { "epoch": 0.0021426357961839833, "grad_norm": 3.3751005401480607, "learning_rate": 2.1426357961839835e-07, "loss": 1.2225, "step": 484 }, { "epoch": 0.0021470627296471733, "grad_norm": 3.1839339211261906, "learning_rate": 2.1470627296471737e-07, "loss": 0.8951, "step": 485 }, { "epoch": 0.0021514896631103634, "grad_norm": 2.73069711490751, "learning_rate": 2.1514896631103635e-07, "loss": 0.8345, "step": 486 }, { "epoch": 0.0021559165965735535, "grad_norm": 3.819489458156413, "learning_rate": 2.1559165965735537e-07, "loss": 0.9893, "step": 487 }, { "epoch": 0.0021603435300367436, "grad_norm": 3.458182592469833, "learning_rate": 2.1603435300367437e-07, "loss": 1.2016, "step": 488 }, { "epoch": 0.0021647704634999336, "grad_norm": 2.949870654029187, "learning_rate": 2.1647704634999337e-07, "loss": 0.9806, "step": 489 }, { "epoch": 0.0021691973969631237, "grad_norm": 2.902923771880944, "learning_rate": 2.1691973969631237e-07, "loss": 0.9123, "step": 490 }, { "epoch": 0.002173624330426314, "grad_norm": 3.6360176999003015, "learning_rate": 2.173624330426314e-07, "loss": 1.1406, "step": 491 }, { "epoch": 0.002178051263889504, "grad_norm": 3.381730524643612, "learning_rate": 2.1780512638895037e-07, "loss": 1.1548, "step": 492 }, { "epoch": 0.002182478197352694, "grad_norm": 2.9583430940053344, "learning_rate": 2.182478197352694e-07, "loss": 0.9625, "step": 493 }, { "epoch": 0.002186905130815884, "grad_norm": 4.25142759655888, "learning_rate": 2.186905130815884e-07, "loss": 1.2045, "step": 494 }, { "epoch": 0.002191332064279074, "grad_norm": 3.1157349669314915, "learning_rate": 2.191332064279074e-07, "loss": 1.0268, "step": 495 }, { "epoch": 0.0021957589977422637, "grad_norm": 2.7283778253698197, "learning_rate": 2.195758997742264e-07, "loss": 0.9626, "step": 496 }, { "epoch": 0.002200185931205454, "grad_norm": 4.49921054685704, "learning_rate": 2.2001859312054543e-07, "loss": 0.9422, "step": 497 }, { "epoch": 0.002204612864668644, "grad_norm": 2.7450828157899503, "learning_rate": 2.204612864668644e-07, "loss": 1.0836, "step": 498 }, { "epoch": 0.002209039798131834, "grad_norm": 3.3925595819387424, "learning_rate": 2.2090397981318343e-07, "loss": 1.1476, "step": 499 }, { "epoch": 0.002213466731595024, "grad_norm": 2.9039744924266473, "learning_rate": 2.2134667315950243e-07, "loss": 0.9443, "step": 500 }, { "epoch": 0.002217893665058214, "grad_norm": 2.8988314782643867, "learning_rate": 2.2178936650582143e-07, "loss": 0.747, "step": 501 }, { "epoch": 0.002222320598521404, "grad_norm": 3.2872948080280753, "learning_rate": 2.2223205985214043e-07, "loss": 1.0686, "step": 502 }, { "epoch": 0.0022267475319845942, "grad_norm": 2.8214468485661817, "learning_rate": 2.2267475319845945e-07, "loss": 0.7527, "step": 503 }, { "epoch": 0.0022311744654477843, "grad_norm": 3.388686604107823, "learning_rate": 2.2311744654477848e-07, "loss": 0.9771, "step": 504 }, { "epoch": 0.0022356013989109744, "grad_norm": 3.2554279425455066, "learning_rate": 2.2356013989109745e-07, "loss": 0.8933, "step": 505 }, { "epoch": 0.0022400283323741645, "grad_norm": 3.7055740115338756, "learning_rate": 2.2400283323741645e-07, "loss": 1.059, "step": 506 }, { "epoch": 0.0022444552658373545, "grad_norm": 3.733734940027744, "learning_rate": 2.2444552658373548e-07, "loss": 1.2234, "step": 507 }, { "epoch": 0.0022488821993005446, "grad_norm": 2.702113998192559, "learning_rate": 2.2488821993005445e-07, "loss": 0.7373, "step": 508 }, { "epoch": 0.0022533091327637347, "grad_norm": 2.7688251209379637, "learning_rate": 2.2533091327637348e-07, "loss": 0.8844, "step": 509 }, { "epoch": 0.0022577360662269248, "grad_norm": 3.356313395040932, "learning_rate": 2.257736066226925e-07, "loss": 0.9129, "step": 510 }, { "epoch": 0.002262162999690115, "grad_norm": 3.0486926563234507, "learning_rate": 2.2621629996901148e-07, "loss": 0.8585, "step": 511 }, { "epoch": 0.002266589933153305, "grad_norm": 3.798511035758492, "learning_rate": 2.2665899331533048e-07, "loss": 1.0819, "step": 512 }, { "epoch": 0.0022710168666164945, "grad_norm": 2.7432192120109344, "learning_rate": 2.271016866616495e-07, "loss": 0.9625, "step": 513 }, { "epoch": 0.0022754438000796846, "grad_norm": 3.8056179165155073, "learning_rate": 2.2754438000796848e-07, "loss": 1.1211, "step": 514 }, { "epoch": 0.0022798707335428747, "grad_norm": 2.8654448857224266, "learning_rate": 2.279870733542875e-07, "loss": 0.7291, "step": 515 }, { "epoch": 0.0022842976670060648, "grad_norm": 3.4162844625944886, "learning_rate": 2.2842976670060653e-07, "loss": 1.1932, "step": 516 }, { "epoch": 0.002288724600469255, "grad_norm": 3.31473381793216, "learning_rate": 2.288724600469255e-07, "loss": 1.204, "step": 517 }, { "epoch": 0.002293151533932445, "grad_norm": 3.954761814020973, "learning_rate": 2.293151533932445e-07, "loss": 1.6789, "step": 518 }, { "epoch": 0.002297578467395635, "grad_norm": 2.68676146031017, "learning_rate": 2.2975784673956353e-07, "loss": 0.7703, "step": 519 }, { "epoch": 0.002302005400858825, "grad_norm": 3.3047524228247447, "learning_rate": 2.302005400858825e-07, "loss": 0.9515, "step": 520 }, { "epoch": 0.002306432334322015, "grad_norm": 4.872401581793098, "learning_rate": 2.3064323343220153e-07, "loss": 1.5365, "step": 521 }, { "epoch": 0.002310859267785205, "grad_norm": 3.373551047174518, "learning_rate": 2.3108592677852056e-07, "loss": 1.1447, "step": 522 }, { "epoch": 0.0023152862012483953, "grad_norm": 2.5766352726265778, "learning_rate": 2.3152862012483953e-07, "loss": 0.7008, "step": 523 }, { "epoch": 0.0023197131347115854, "grad_norm": 2.9535724218513035, "learning_rate": 2.3197131347115853e-07, "loss": 0.8134, "step": 524 }, { "epoch": 0.0023241400681747754, "grad_norm": 4.06128761481402, "learning_rate": 2.3241400681747756e-07, "loss": 1.1471, "step": 525 }, { "epoch": 0.0023285670016379655, "grad_norm": 3.5913879091711465, "learning_rate": 2.3285670016379653e-07, "loss": 1.0449, "step": 526 }, { "epoch": 0.0023329939351011556, "grad_norm": 3.415533660387329, "learning_rate": 2.3329939351011556e-07, "loss": 1.2049, "step": 527 }, { "epoch": 0.0023374208685643457, "grad_norm": 2.794552953173663, "learning_rate": 2.3374208685643459e-07, "loss": 1.0817, "step": 528 }, { "epoch": 0.0023418478020275357, "grad_norm": 3.192504562828491, "learning_rate": 2.3418478020275356e-07, "loss": 0.7554, "step": 529 }, { "epoch": 0.0023462747354907254, "grad_norm": 3.695439741249568, "learning_rate": 2.3462747354907259e-07, "loss": 0.8577, "step": 530 }, { "epoch": 0.0023507016689539154, "grad_norm": 2.707178252440429, "learning_rate": 2.3507016689539159e-07, "loss": 0.8973, "step": 531 }, { "epoch": 0.0023551286024171055, "grad_norm": 3.32655895507442, "learning_rate": 2.3551286024171056e-07, "loss": 1.0061, "step": 532 }, { "epoch": 0.0023595555358802956, "grad_norm": 3.5288705655699557, "learning_rate": 2.359555535880296e-07, "loss": 1.1865, "step": 533 }, { "epoch": 0.0023639824693434857, "grad_norm": 3.8162086465279677, "learning_rate": 2.3639824693434861e-07, "loss": 1.113, "step": 534 }, { "epoch": 0.0023684094028066757, "grad_norm": 3.9044599194253315, "learning_rate": 2.368409402806676e-07, "loss": 0.8465, "step": 535 }, { "epoch": 0.002372836336269866, "grad_norm": 3.4565924534737964, "learning_rate": 2.3728363362698661e-07, "loss": 0.9395, "step": 536 }, { "epoch": 0.002377263269733056, "grad_norm": 2.778853318160378, "learning_rate": 2.3772632697330561e-07, "loss": 0.8467, "step": 537 }, { "epoch": 0.002381690203196246, "grad_norm": 2.5943809276319505, "learning_rate": 2.381690203196246e-07, "loss": 0.8316, "step": 538 }, { "epoch": 0.002386117136659436, "grad_norm": 4.575017609045065, "learning_rate": 2.386117136659436e-07, "loss": 1.406, "step": 539 }, { "epoch": 0.002390544070122626, "grad_norm": 3.878814943467103, "learning_rate": 2.390544070122626e-07, "loss": 1.2016, "step": 540 }, { "epoch": 0.002394971003585816, "grad_norm": 3.438440736528742, "learning_rate": 2.3949710035858167e-07, "loss": 1.0994, "step": 541 }, { "epoch": 0.0023993979370490063, "grad_norm": 3.115182782828646, "learning_rate": 2.399397937049006e-07, "loss": 0.9284, "step": 542 }, { "epoch": 0.0024038248705121963, "grad_norm": 2.8019376627701384, "learning_rate": 2.4038248705121967e-07, "loss": 0.7897, "step": 543 }, { "epoch": 0.0024082518039753864, "grad_norm": 3.3459891878810684, "learning_rate": 2.4082518039753867e-07, "loss": 1.0832, "step": 544 }, { "epoch": 0.0024126787374385765, "grad_norm": 3.473469108547587, "learning_rate": 2.4126787374385767e-07, "loss": 0.8545, "step": 545 }, { "epoch": 0.0024171056709017666, "grad_norm": 3.0383911126221657, "learning_rate": 2.4171056709017667e-07, "loss": 0.9783, "step": 546 }, { "epoch": 0.002421532604364956, "grad_norm": 3.339248678193753, "learning_rate": 2.4215326043649567e-07, "loss": 1.1364, "step": 547 }, { "epoch": 0.0024259595378281463, "grad_norm": 3.0090608367666154, "learning_rate": 2.4259595378281467e-07, "loss": 0.9273, "step": 548 }, { "epoch": 0.0024303864712913363, "grad_norm": 4.400893064564058, "learning_rate": 2.4303864712913367e-07, "loss": 1.4552, "step": 549 }, { "epoch": 0.0024348134047545264, "grad_norm": 3.311382318512759, "learning_rate": 2.4348134047545267e-07, "loss": 1.1969, "step": 550 }, { "epoch": 0.0024392403382177165, "grad_norm": 3.6496147387484066, "learning_rate": 2.4392403382177167e-07, "loss": 0.8683, "step": 551 }, { "epoch": 0.0024436672716809066, "grad_norm": 3.062648086078505, "learning_rate": 2.4436672716809067e-07, "loss": 0.9817, "step": 552 }, { "epoch": 0.0024480942051440966, "grad_norm": 2.6504658993486694, "learning_rate": 2.448094205144097e-07, "loss": 0.9175, "step": 553 }, { "epoch": 0.0024525211386072867, "grad_norm": 2.8577948535864572, "learning_rate": 2.4525211386072867e-07, "loss": 0.807, "step": 554 }, { "epoch": 0.002456948072070477, "grad_norm": 3.937915656841944, "learning_rate": 2.456948072070477e-07, "loss": 1.1906, "step": 555 }, { "epoch": 0.002461375005533667, "grad_norm": 3.3624040617696673, "learning_rate": 2.461375005533667e-07, "loss": 1.3465, "step": 556 }, { "epoch": 0.002465801938996857, "grad_norm": 2.9426886468238496, "learning_rate": 2.465801938996857e-07, "loss": 0.8395, "step": 557 }, { "epoch": 0.002470228872460047, "grad_norm": 3.2604967378182854, "learning_rate": 2.470228872460047e-07, "loss": 0.7659, "step": 558 }, { "epoch": 0.002474655805923237, "grad_norm": 3.3106441278729495, "learning_rate": 2.474655805923237e-07, "loss": 1.2177, "step": 559 }, { "epoch": 0.002479082739386427, "grad_norm": 3.0570611820464464, "learning_rate": 2.479082739386427e-07, "loss": 0.9019, "step": 560 }, { "epoch": 0.0024835096728496172, "grad_norm": 2.985941128668285, "learning_rate": 2.483509672849617e-07, "loss": 0.979, "step": 561 }, { "epoch": 0.0024879366063128073, "grad_norm": 2.931251955465496, "learning_rate": 2.487936606312807e-07, "loss": 0.9274, "step": 562 }, { "epoch": 0.0024923635397759974, "grad_norm": 3.2040881168729807, "learning_rate": 2.492363539775997e-07, "loss": 1.1593, "step": 563 }, { "epoch": 0.002496790473239187, "grad_norm": 3.6489202764486617, "learning_rate": 2.496790473239187e-07, "loss": 1.1923, "step": 564 }, { "epoch": 0.002501217406702377, "grad_norm": 3.113433217592853, "learning_rate": 2.5012174067023777e-07, "loss": 0.9768, "step": 565 }, { "epoch": 0.002505644340165567, "grad_norm": 3.245719177890006, "learning_rate": 2.505644340165567e-07, "loss": 0.8725, "step": 566 }, { "epoch": 0.0025100712736287572, "grad_norm": 3.0889350694992674, "learning_rate": 2.5100712736287577e-07, "loss": 1.2512, "step": 567 }, { "epoch": 0.0025144982070919473, "grad_norm": 2.7300450157708243, "learning_rate": 2.5144982070919477e-07, "loss": 0.8762, "step": 568 }, { "epoch": 0.0025189251405551374, "grad_norm": 4.8903393702726525, "learning_rate": 2.5189251405551377e-07, "loss": 1.2211, "step": 569 }, { "epoch": 0.0025233520740183275, "grad_norm": 3.879785756533116, "learning_rate": 2.5233520740183277e-07, "loss": 0.8027, "step": 570 }, { "epoch": 0.0025277790074815175, "grad_norm": 3.245066741094981, "learning_rate": 2.5277790074815177e-07, "loss": 1.014, "step": 571 }, { "epoch": 0.0025322059409447076, "grad_norm": 3.6909217288268152, "learning_rate": 2.5322059409447077e-07, "loss": 1.1546, "step": 572 }, { "epoch": 0.0025366328744078977, "grad_norm": 2.9619955242233704, "learning_rate": 2.536632874407898e-07, "loss": 0.9233, "step": 573 }, { "epoch": 0.0025410598078710878, "grad_norm": 2.800563144813267, "learning_rate": 2.541059807871088e-07, "loss": 1.0472, "step": 574 }, { "epoch": 0.002545486741334278, "grad_norm": 2.930391493852517, "learning_rate": 2.545486741334278e-07, "loss": 0.7766, "step": 575 }, { "epoch": 0.002549913674797468, "grad_norm": 3.041765981765623, "learning_rate": 2.549913674797468e-07, "loss": 1.0426, "step": 576 }, { "epoch": 0.002554340608260658, "grad_norm": 3.7242595739343276, "learning_rate": 2.5543406082606583e-07, "loss": 1.3344, "step": 577 }, { "epoch": 0.002558767541723848, "grad_norm": 2.7257926215785417, "learning_rate": 2.5587675417238483e-07, "loss": 0.6333, "step": 578 }, { "epoch": 0.002563194475187038, "grad_norm": 3.475126593957818, "learning_rate": 2.5631944751870383e-07, "loss": 1.063, "step": 579 }, { "epoch": 0.0025676214086502278, "grad_norm": 4.013094364003714, "learning_rate": 2.5676214086502283e-07, "loss": 1.4793, "step": 580 }, { "epoch": 0.002572048342113418, "grad_norm": 3.063574981931129, "learning_rate": 2.5720483421134183e-07, "loss": 0.9698, "step": 581 }, { "epoch": 0.002576475275576608, "grad_norm": 3.3554321856801503, "learning_rate": 2.5764752755766083e-07, "loss": 1.1518, "step": 582 }, { "epoch": 0.002580902209039798, "grad_norm": 3.302364662164952, "learning_rate": 2.5809022090397983e-07, "loss": 0.9917, "step": 583 }, { "epoch": 0.002585329142502988, "grad_norm": 3.608733462311985, "learning_rate": 2.585329142502989e-07, "loss": 0.9677, "step": 584 }, { "epoch": 0.002589756075966178, "grad_norm": 3.29226736752343, "learning_rate": 2.5897560759661783e-07, "loss": 1.1747, "step": 585 }, { "epoch": 0.002594183009429368, "grad_norm": 3.5212270124667855, "learning_rate": 2.5941830094293683e-07, "loss": 0.6597, "step": 586 }, { "epoch": 0.0025986099428925583, "grad_norm": 2.9822704329568492, "learning_rate": 2.598609942892559e-07, "loss": 0.5775, "step": 587 }, { "epoch": 0.0026030368763557484, "grad_norm": 2.8410825335655536, "learning_rate": 2.6030368763557483e-07, "loss": 1.0491, "step": 588 }, { "epoch": 0.0026074638098189384, "grad_norm": 3.566015581199372, "learning_rate": 2.607463809818939e-07, "loss": 1.3334, "step": 589 }, { "epoch": 0.0026118907432821285, "grad_norm": 2.810611166752309, "learning_rate": 2.611890743282129e-07, "loss": 0.8929, "step": 590 }, { "epoch": 0.0026163176767453186, "grad_norm": 4.604288707708145, "learning_rate": 2.616317676745319e-07, "loss": 1.2583, "step": 591 }, { "epoch": 0.0026207446102085087, "grad_norm": 3.1519654864924456, "learning_rate": 2.620744610208509e-07, "loss": 0.9726, "step": 592 }, { "epoch": 0.0026251715436716987, "grad_norm": 4.038924406544129, "learning_rate": 2.625171543671699e-07, "loss": 1.3064, "step": 593 }, { "epoch": 0.002629598477134889, "grad_norm": 3.89130514122108, "learning_rate": 2.629598477134889e-07, "loss": 1.5647, "step": 594 }, { "epoch": 0.002634025410598079, "grad_norm": 3.6730544748459577, "learning_rate": 2.634025410598079e-07, "loss": 1.1533, "step": 595 }, { "epoch": 0.002638452344061269, "grad_norm": 2.9257453120862116, "learning_rate": 2.6384523440612693e-07, "loss": 0.9345, "step": 596 }, { "epoch": 0.0026428792775244586, "grad_norm": 2.919949784518536, "learning_rate": 2.642879277524459e-07, "loss": 0.7431, "step": 597 }, { "epoch": 0.0026473062109876487, "grad_norm": 3.3220564234648386, "learning_rate": 2.647306210987649e-07, "loss": 1.0142, "step": 598 }, { "epoch": 0.0026517331444508387, "grad_norm": 3.0136892837848226, "learning_rate": 2.6517331444508393e-07, "loss": 1.0603, "step": 599 }, { "epoch": 0.002656160077914029, "grad_norm": 3.9703078398472793, "learning_rate": 2.656160077914029e-07, "loss": 1.5032, "step": 600 }, { "epoch": 0.002660587011377219, "grad_norm": 3.036627764156118, "learning_rate": 2.6605870113772193e-07, "loss": 0.9803, "step": 601 }, { "epoch": 0.002665013944840409, "grad_norm": 3.9619124736868954, "learning_rate": 2.6650139448404093e-07, "loss": 1.6326, "step": 602 }, { "epoch": 0.002669440878303599, "grad_norm": 3.371555596185436, "learning_rate": 2.6694408783035993e-07, "loss": 1.197, "step": 603 }, { "epoch": 0.002673867811766789, "grad_norm": 2.8075403765131726, "learning_rate": 2.6738678117667893e-07, "loss": 0.9552, "step": 604 }, { "epoch": 0.002678294745229979, "grad_norm": 2.7163116358420463, "learning_rate": 2.6782947452299793e-07, "loss": 0.676, "step": 605 }, { "epoch": 0.0026827216786931693, "grad_norm": 4.286061837716553, "learning_rate": 2.6827216786931693e-07, "loss": 1.4615, "step": 606 }, { "epoch": 0.0026871486121563593, "grad_norm": 3.457206259300456, "learning_rate": 2.6871486121563593e-07, "loss": 0.8665, "step": 607 }, { "epoch": 0.0026915755456195494, "grad_norm": 3.5752312327018516, "learning_rate": 2.69157554561955e-07, "loss": 1.0644, "step": 608 }, { "epoch": 0.0026960024790827395, "grad_norm": 3.208103346787282, "learning_rate": 2.6960024790827393e-07, "loss": 1.3852, "step": 609 }, { "epoch": 0.0027004294125459296, "grad_norm": 3.3835368884765176, "learning_rate": 2.70042941254593e-07, "loss": 0.9977, "step": 610 }, { "epoch": 0.0027048563460091196, "grad_norm": 2.884549791469885, "learning_rate": 2.70485634600912e-07, "loss": 0.6413, "step": 611 }, { "epoch": 0.0027092832794723097, "grad_norm": 3.083872611621591, "learning_rate": 2.7092832794723093e-07, "loss": 0.9126, "step": 612 }, { "epoch": 0.0027137102129354998, "grad_norm": 3.0001572917596646, "learning_rate": 2.7137102129355e-07, "loss": 1.1208, "step": 613 }, { "epoch": 0.0027181371463986894, "grad_norm": 2.620511662898322, "learning_rate": 2.71813714639869e-07, "loss": 0.6977, "step": 614 }, { "epoch": 0.0027225640798618795, "grad_norm": 3.1110798174947476, "learning_rate": 2.72256407986188e-07, "loss": 0.745, "step": 615 }, { "epoch": 0.0027269910133250696, "grad_norm": 4.442179809297845, "learning_rate": 2.72699101332507e-07, "loss": 1.1128, "step": 616 }, { "epoch": 0.0027314179467882596, "grad_norm": 3.942855141814183, "learning_rate": 2.73141794678826e-07, "loss": 1.5638, "step": 617 }, { "epoch": 0.0027358448802514497, "grad_norm": 3.0255347319907337, "learning_rate": 2.7358448802514504e-07, "loss": 0.8647, "step": 618 }, { "epoch": 0.00274027181371464, "grad_norm": 4.016992176190617, "learning_rate": 2.74027181371464e-07, "loss": 1.2421, "step": 619 }, { "epoch": 0.00274469874717783, "grad_norm": 5.095056711263963, "learning_rate": 2.7446987471778304e-07, "loss": 1.5568, "step": 620 }, { "epoch": 0.00274912568064102, "grad_norm": 3.0569893776351034, "learning_rate": 2.7491256806410204e-07, "loss": 1.0491, "step": 621 }, { "epoch": 0.00275355261410421, "grad_norm": 2.7567999687678317, "learning_rate": 2.7535526141042104e-07, "loss": 0.8429, "step": 622 }, { "epoch": 0.0027579795475674, "grad_norm": 3.221166217256834, "learning_rate": 2.7579795475674004e-07, "loss": 0.7007, "step": 623 }, { "epoch": 0.00276240648103059, "grad_norm": 3.7932686429830063, "learning_rate": 2.7624064810305904e-07, "loss": 1.0898, "step": 624 }, { "epoch": 0.0027668334144937802, "grad_norm": 2.849019198386321, "learning_rate": 2.7668334144937804e-07, "loss": 0.8276, "step": 625 }, { "epoch": 0.0027712603479569703, "grad_norm": 3.7416463258821846, "learning_rate": 2.7712603479569704e-07, "loss": 1.0473, "step": 626 }, { "epoch": 0.0027756872814201604, "grad_norm": 2.8114805895571173, "learning_rate": 2.7756872814201604e-07, "loss": 1.0182, "step": 627 }, { "epoch": 0.0027801142148833505, "grad_norm": 3.427839524872786, "learning_rate": 2.7801142148833504e-07, "loss": 0.9083, "step": 628 }, { "epoch": 0.0027845411483465405, "grad_norm": 2.8883772685196227, "learning_rate": 2.7845411483465404e-07, "loss": 0.9107, "step": 629 }, { "epoch": 0.0027889680818097306, "grad_norm": 3.0984450111454516, "learning_rate": 2.788968081809731e-07, "loss": 1.0133, "step": 630 }, { "epoch": 0.0027933950152729202, "grad_norm": 2.8144990206475193, "learning_rate": 2.7933950152729204e-07, "loss": 0.9667, "step": 631 }, { "epoch": 0.0027978219487361103, "grad_norm": 2.581198689839872, "learning_rate": 2.797821948736111e-07, "loss": 0.7076, "step": 632 }, { "epoch": 0.0028022488821993004, "grad_norm": 3.808646006897351, "learning_rate": 2.802248882199301e-07, "loss": 1.4764, "step": 633 }, { "epoch": 0.0028066758156624905, "grad_norm": 3.229108204975667, "learning_rate": 2.806675815662491e-07, "loss": 0.8304, "step": 634 }, { "epoch": 0.0028111027491256805, "grad_norm": 3.852900984143134, "learning_rate": 2.811102749125681e-07, "loss": 0.708, "step": 635 }, { "epoch": 0.0028155296825888706, "grad_norm": 2.82878299014224, "learning_rate": 2.815529682588871e-07, "loss": 0.7299, "step": 636 }, { "epoch": 0.0028199566160520607, "grad_norm": 3.6986821956232143, "learning_rate": 2.819956616052061e-07, "loss": 1.3758, "step": 637 }, { "epoch": 0.0028243835495152508, "grad_norm": 2.6284915670202924, "learning_rate": 2.824383549515251e-07, "loss": 0.6591, "step": 638 }, { "epoch": 0.002828810482978441, "grad_norm": 3.30816077341024, "learning_rate": 2.828810482978441e-07, "loss": 0.9302, "step": 639 }, { "epoch": 0.002833237416441631, "grad_norm": 2.817034966218743, "learning_rate": 2.833237416441631e-07, "loss": 0.9349, "step": 640 }, { "epoch": 0.002837664349904821, "grad_norm": 2.979800495099342, "learning_rate": 2.837664349904821e-07, "loss": 1.294, "step": 641 }, { "epoch": 0.002842091283368011, "grad_norm": 2.659175705790355, "learning_rate": 2.8420912833680115e-07, "loss": 0.9679, "step": 642 }, { "epoch": 0.002846518216831201, "grad_norm": 3.299145500213209, "learning_rate": 2.846518216831201e-07, "loss": 0.9312, "step": 643 }, { "epoch": 0.002850945150294391, "grad_norm": 3.5755795276094973, "learning_rate": 2.8509451502943915e-07, "loss": 1.2733, "step": 644 }, { "epoch": 0.0028553720837575813, "grad_norm": 3.478053019250775, "learning_rate": 2.8553720837575815e-07, "loss": 0.9683, "step": 645 }, { "epoch": 0.0028597990172207714, "grad_norm": 2.828335957162245, "learning_rate": 2.8597990172207715e-07, "loss": 0.7081, "step": 646 }, { "epoch": 0.0028642259506839614, "grad_norm": 3.298988264570957, "learning_rate": 2.8642259506839615e-07, "loss": 0.8281, "step": 647 }, { "epoch": 0.002868652884147151, "grad_norm": 3.3438697788036875, "learning_rate": 2.8686528841471515e-07, "loss": 1.039, "step": 648 }, { "epoch": 0.002873079817610341, "grad_norm": 4.347250739991861, "learning_rate": 2.8730798176103415e-07, "loss": 0.9298, "step": 649 }, { "epoch": 0.0028775067510735312, "grad_norm": 2.993072435004014, "learning_rate": 2.8775067510735315e-07, "loss": 1.1226, "step": 650 }, { "epoch": 0.0028819336845367213, "grad_norm": 3.0472412222865173, "learning_rate": 2.8819336845367215e-07, "loss": 0.8512, "step": 651 }, { "epoch": 0.0028863606179999114, "grad_norm": 2.7578976856945148, "learning_rate": 2.886360617999912e-07, "loss": 1.1674, "step": 652 }, { "epoch": 0.0028907875514631014, "grad_norm": 3.1053650735811957, "learning_rate": 2.8907875514631015e-07, "loss": 1.0118, "step": 653 }, { "epoch": 0.0028952144849262915, "grad_norm": 3.3755843876980145, "learning_rate": 2.895214484926292e-07, "loss": 0.7464, "step": 654 }, { "epoch": 0.0028996414183894816, "grad_norm": 3.0902341373602136, "learning_rate": 2.899641418389482e-07, "loss": 0.6839, "step": 655 }, { "epoch": 0.0029040683518526717, "grad_norm": 3.236668684990986, "learning_rate": 2.904068351852672e-07, "loss": 1.191, "step": 656 }, { "epoch": 0.0029084952853158617, "grad_norm": 2.7423784174846233, "learning_rate": 2.908495285315862e-07, "loss": 1.023, "step": 657 }, { "epoch": 0.002912922218779052, "grad_norm": 3.6181091103358782, "learning_rate": 2.912922218779052e-07, "loss": 0.7973, "step": 658 }, { "epoch": 0.002917349152242242, "grad_norm": 3.3877146717526134, "learning_rate": 2.917349152242242e-07, "loss": 1.0085, "step": 659 }, { "epoch": 0.002921776085705432, "grad_norm": 3.609143313602899, "learning_rate": 2.921776085705432e-07, "loss": 1.1893, "step": 660 }, { "epoch": 0.002926203019168622, "grad_norm": 2.9367911685366113, "learning_rate": 2.9262030191686225e-07, "loss": 0.7024, "step": 661 }, { "epoch": 0.002930629952631812, "grad_norm": 3.039150871198396, "learning_rate": 2.930629952631812e-07, "loss": 1.0772, "step": 662 }, { "epoch": 0.002935056886095002, "grad_norm": 2.844757814284368, "learning_rate": 2.935056886095002e-07, "loss": 0.8155, "step": 663 }, { "epoch": 0.002939483819558192, "grad_norm": 3.415727885010377, "learning_rate": 2.9394838195581925e-07, "loss": 1.1292, "step": 664 }, { "epoch": 0.002943910753021382, "grad_norm": 3.7104211186947, "learning_rate": 2.943910753021382e-07, "loss": 1.1129, "step": 665 }, { "epoch": 0.002948337686484572, "grad_norm": 3.564073780228962, "learning_rate": 2.9483376864845725e-07, "loss": 1.2187, "step": 666 }, { "epoch": 0.002952764619947762, "grad_norm": 3.178936108323416, "learning_rate": 2.9527646199477625e-07, "loss": 0.8058, "step": 667 }, { "epoch": 0.002957191553410952, "grad_norm": 3.621207622382315, "learning_rate": 2.9571915534109525e-07, "loss": 1.34, "step": 668 }, { "epoch": 0.002961618486874142, "grad_norm": 4.321840947833336, "learning_rate": 2.9616184868741425e-07, "loss": 1.0325, "step": 669 }, { "epoch": 0.0029660454203373323, "grad_norm": 2.6656659912082676, "learning_rate": 2.9660454203373325e-07, "loss": 0.8111, "step": 670 }, { "epoch": 0.0029704723538005223, "grad_norm": 2.8002985944634093, "learning_rate": 2.9704723538005225e-07, "loss": 0.869, "step": 671 }, { "epoch": 0.0029748992872637124, "grad_norm": 3.0689814331465595, "learning_rate": 2.9748992872637125e-07, "loss": 0.9107, "step": 672 }, { "epoch": 0.0029793262207269025, "grad_norm": 2.932451123243531, "learning_rate": 2.979326220726903e-07, "loss": 1.0178, "step": 673 }, { "epoch": 0.0029837531541900926, "grad_norm": 2.928566960298534, "learning_rate": 2.9837531541900925e-07, "loss": 1.0583, "step": 674 }, { "epoch": 0.0029881800876532826, "grad_norm": 2.681910733885465, "learning_rate": 2.9881800876532825e-07, "loss": 0.9797, "step": 675 }, { "epoch": 0.0029926070211164727, "grad_norm": 3.0252053810389175, "learning_rate": 2.992607021116473e-07, "loss": 0.7575, "step": 676 }, { "epoch": 0.002997033954579663, "grad_norm": 2.7513036138521865, "learning_rate": 2.9970339545796625e-07, "loss": 0.646, "step": 677 }, { "epoch": 0.003001460888042853, "grad_norm": 2.606338787439231, "learning_rate": 3.001460888042853e-07, "loss": 0.7875, "step": 678 }, { "epoch": 0.003005887821506043, "grad_norm": 3.213510792786291, "learning_rate": 3.005887821506043e-07, "loss": 0.7552, "step": 679 }, { "epoch": 0.003010314754969233, "grad_norm": 3.6935659751454315, "learning_rate": 3.010314754969233e-07, "loss": 1.0534, "step": 680 }, { "epoch": 0.0030147416884324226, "grad_norm": 5.756449929400356, "learning_rate": 3.014741688432423e-07, "loss": 1.3655, "step": 681 }, { "epoch": 0.0030191686218956127, "grad_norm": 2.855005144359647, "learning_rate": 3.019168621895613e-07, "loss": 0.5833, "step": 682 }, { "epoch": 0.003023595555358803, "grad_norm": 3.3761169918156058, "learning_rate": 3.023595555358803e-07, "loss": 1.0928, "step": 683 }, { "epoch": 0.003028022488821993, "grad_norm": 2.766478480308648, "learning_rate": 3.028022488821993e-07, "loss": 0.8409, "step": 684 }, { "epoch": 0.003032449422285183, "grad_norm": 3.006828341956631, "learning_rate": 3.0324494222851836e-07, "loss": 0.8839, "step": 685 }, { "epoch": 0.003036876355748373, "grad_norm": 2.931607667214736, "learning_rate": 3.036876355748373e-07, "loss": 1.0052, "step": 686 }, { "epoch": 0.003041303289211563, "grad_norm": 3.6571351348961194, "learning_rate": 3.0413032892115636e-07, "loss": 0.8455, "step": 687 }, { "epoch": 0.003045730222674753, "grad_norm": 3.8357950668799936, "learning_rate": 3.0457302226747536e-07, "loss": 0.8908, "step": 688 }, { "epoch": 0.0030501571561379432, "grad_norm": 2.8709403596720984, "learning_rate": 3.0501571561379436e-07, "loss": 1.1559, "step": 689 }, { "epoch": 0.0030545840896011333, "grad_norm": 3.124304822400021, "learning_rate": 3.0545840896011336e-07, "loss": 0.9301, "step": 690 }, { "epoch": 0.0030590110230643234, "grad_norm": 4.397439360450209, "learning_rate": 3.0590110230643236e-07, "loss": 1.5286, "step": 691 }, { "epoch": 0.0030634379565275135, "grad_norm": 3.2099490869484004, "learning_rate": 3.0634379565275136e-07, "loss": 0.9984, "step": 692 }, { "epoch": 0.0030678648899907035, "grad_norm": 3.5358203889890762, "learning_rate": 3.0678648899907036e-07, "loss": 0.7967, "step": 693 }, { "epoch": 0.0030722918234538936, "grad_norm": 2.4990053341357688, "learning_rate": 3.0722918234538936e-07, "loss": 0.6309, "step": 694 }, { "epoch": 0.0030767187569170837, "grad_norm": 3.0449250402786765, "learning_rate": 3.076718756917084e-07, "loss": 0.6636, "step": 695 }, { "epoch": 0.0030811456903802738, "grad_norm": 3.3528705044484757, "learning_rate": 3.0811456903802736e-07, "loss": 0.7155, "step": 696 }, { "epoch": 0.003085572623843464, "grad_norm": 4.075163061039038, "learning_rate": 3.085572623843464e-07, "loss": 1.3771, "step": 697 }, { "epoch": 0.0030899995573066535, "grad_norm": 3.574627073538259, "learning_rate": 3.089999557306654e-07, "loss": 1.3817, "step": 698 }, { "epoch": 0.0030944264907698435, "grad_norm": 3.127423810091686, "learning_rate": 3.094426490769844e-07, "loss": 0.9006, "step": 699 }, { "epoch": 0.0030988534242330336, "grad_norm": 2.895709071104761, "learning_rate": 3.098853424233034e-07, "loss": 0.9175, "step": 700 }, { "epoch": 0.0031032803576962237, "grad_norm": 2.7577833928890843, "learning_rate": 3.103280357696224e-07, "loss": 1.0092, "step": 701 }, { "epoch": 0.0031077072911594138, "grad_norm": 3.932416724846708, "learning_rate": 3.107707291159414e-07, "loss": 1.4215, "step": 702 }, { "epoch": 0.003112134224622604, "grad_norm": 3.757512807795834, "learning_rate": 3.112134224622604e-07, "loss": 0.863, "step": 703 }, { "epoch": 0.003116561158085794, "grad_norm": 2.983913668718403, "learning_rate": 3.116561158085794e-07, "loss": 0.8744, "step": 704 }, { "epoch": 0.003120988091548984, "grad_norm": 2.8976630196774704, "learning_rate": 3.120988091548984e-07, "loss": 0.847, "step": 705 }, { "epoch": 0.003125415025012174, "grad_norm": 2.781100474666274, "learning_rate": 3.1254150250121747e-07, "loss": 0.6458, "step": 706 }, { "epoch": 0.003129841958475364, "grad_norm": 2.9887974282345615, "learning_rate": 3.1298419584753647e-07, "loss": 0.7917, "step": 707 }, { "epoch": 0.003134268891938554, "grad_norm": 2.9611660505171304, "learning_rate": 3.134268891938554e-07, "loss": 0.8079, "step": 708 }, { "epoch": 0.0031386958254017443, "grad_norm": 2.8461865539487223, "learning_rate": 3.1386958254017447e-07, "loss": 0.9017, "step": 709 }, { "epoch": 0.0031431227588649344, "grad_norm": 3.0849639537456706, "learning_rate": 3.1431227588649347e-07, "loss": 1.0023, "step": 710 }, { "epoch": 0.0031475496923281244, "grad_norm": 3.277164845711432, "learning_rate": 3.1475496923281247e-07, "loss": 0.9473, "step": 711 }, { "epoch": 0.0031519766257913145, "grad_norm": 3.53881316934012, "learning_rate": 3.151976625791315e-07, "loss": 1.2696, "step": 712 }, { "epoch": 0.0031564035592545046, "grad_norm": 3.208884141561958, "learning_rate": 3.1564035592545047e-07, "loss": 0.995, "step": 713 }, { "epoch": 0.0031608304927176947, "grad_norm": 3.196212828078556, "learning_rate": 3.1608304927176947e-07, "loss": 0.879, "step": 714 }, { "epoch": 0.0031652574261808843, "grad_norm": 2.771270170805451, "learning_rate": 3.165257426180885e-07, "loss": 0.8304, "step": 715 }, { "epoch": 0.0031696843596440744, "grad_norm": 3.489763155747131, "learning_rate": 3.1696843596440747e-07, "loss": 0.9885, "step": 716 }, { "epoch": 0.0031741112931072644, "grad_norm": 3.23554983017382, "learning_rate": 3.1741112931072647e-07, "loss": 0.5076, "step": 717 }, { "epoch": 0.0031785382265704545, "grad_norm": 2.91469027572294, "learning_rate": 3.178538226570455e-07, "loss": 0.8425, "step": 718 }, { "epoch": 0.0031829651600336446, "grad_norm": 3.501117632831376, "learning_rate": 3.182965160033645e-07, "loss": 0.9924, "step": 719 }, { "epoch": 0.0031873920934968347, "grad_norm": 4.788277021012496, "learning_rate": 3.1873920934968347e-07, "loss": 0.9119, "step": 720 }, { "epoch": 0.0031918190269600247, "grad_norm": 3.124356938072981, "learning_rate": 3.191819026960025e-07, "loss": 0.7611, "step": 721 }, { "epoch": 0.003196245960423215, "grad_norm": 3.7273581022549434, "learning_rate": 3.196245960423215e-07, "loss": 1.1453, "step": 722 }, { "epoch": 0.003200672893886405, "grad_norm": 2.6541043095848007, "learning_rate": 3.200672893886405e-07, "loss": 1.166, "step": 723 }, { "epoch": 0.003205099827349595, "grad_norm": 3.4571697618190416, "learning_rate": 3.2050998273495957e-07, "loss": 0.8996, "step": 724 }, { "epoch": 0.003209526760812785, "grad_norm": 3.3436821290845447, "learning_rate": 3.209526760812785e-07, "loss": 1.1097, "step": 725 }, { "epoch": 0.003213953694275975, "grad_norm": 2.974048848044139, "learning_rate": 3.213953694275975e-07, "loss": 0.9318, "step": 726 }, { "epoch": 0.003218380627739165, "grad_norm": 3.586665160560469, "learning_rate": 3.2183806277391657e-07, "loss": 0.9521, "step": 727 }, { "epoch": 0.0032228075612023553, "grad_norm": 3.429149062458439, "learning_rate": 3.222807561202355e-07, "loss": 1.0159, "step": 728 }, { "epoch": 0.0032272344946655453, "grad_norm": 2.554571681498805, "learning_rate": 3.227234494665545e-07, "loss": 0.7743, "step": 729 }, { "epoch": 0.0032316614281287354, "grad_norm": 2.9054162549391394, "learning_rate": 3.2316614281287357e-07, "loss": 0.8505, "step": 730 }, { "epoch": 0.0032360883615919255, "grad_norm": 2.89145491146463, "learning_rate": 3.2360883615919257e-07, "loss": 0.9907, "step": 731 }, { "epoch": 0.003240515295055115, "grad_norm": 2.854933181949555, "learning_rate": 3.240515295055115e-07, "loss": 0.7966, "step": 732 }, { "epoch": 0.003244942228518305, "grad_norm": 2.933900061275116, "learning_rate": 3.2449422285183057e-07, "loss": 0.6904, "step": 733 }, { "epoch": 0.0032493691619814953, "grad_norm": 2.8480091307137387, "learning_rate": 3.2493691619814957e-07, "loss": 0.9998, "step": 734 }, { "epoch": 0.0032537960954446853, "grad_norm": 2.9598636232473607, "learning_rate": 3.2537960954446857e-07, "loss": 1.1325, "step": 735 }, { "epoch": 0.0032582230289078754, "grad_norm": 3.749037116577011, "learning_rate": 3.258223028907876e-07, "loss": 1.0396, "step": 736 }, { "epoch": 0.0032626499623710655, "grad_norm": 3.4426494764172575, "learning_rate": 3.262649962371066e-07, "loss": 1.1046, "step": 737 }, { "epoch": 0.0032670768958342556, "grad_norm": 3.384936885615769, "learning_rate": 3.267076895834256e-07, "loss": 0.7364, "step": 738 }, { "epoch": 0.0032715038292974456, "grad_norm": 3.090676574496274, "learning_rate": 3.2715038292974463e-07, "loss": 1.0675, "step": 739 }, { "epoch": 0.0032759307627606357, "grad_norm": 3.4549865194472895, "learning_rate": 3.275930762760636e-07, "loss": 1.2175, "step": 740 }, { "epoch": 0.003280357696223826, "grad_norm": 2.891175179097339, "learning_rate": 3.280357696223826e-07, "loss": 0.8628, "step": 741 }, { "epoch": 0.003284784629687016, "grad_norm": 3.146703711911746, "learning_rate": 3.2847846296870163e-07, "loss": 0.9562, "step": 742 }, { "epoch": 0.003289211563150206, "grad_norm": 3.088890676800684, "learning_rate": 3.2892115631502063e-07, "loss": 1.0886, "step": 743 }, { "epoch": 0.003293638496613396, "grad_norm": 3.3217360079322775, "learning_rate": 3.293638496613396e-07, "loss": 1.0435, "step": 744 }, { "epoch": 0.003298065430076586, "grad_norm": 3.419669881017844, "learning_rate": 3.2980654300765863e-07, "loss": 0.9751, "step": 745 }, { "epoch": 0.003302492363539776, "grad_norm": 3.7562661558402413, "learning_rate": 3.3024923635397763e-07, "loss": 0.9099, "step": 746 }, { "epoch": 0.0033069192970029662, "grad_norm": 2.808660710975175, "learning_rate": 3.3069192970029663e-07, "loss": 0.8692, "step": 747 }, { "epoch": 0.0033113462304661563, "grad_norm": 3.3461286614036445, "learning_rate": 3.311346230466157e-07, "loss": 1.0091, "step": 748 }, { "epoch": 0.003315773163929346, "grad_norm": 2.9195159559633397, "learning_rate": 3.3157731639293463e-07, "loss": 1.1814, "step": 749 }, { "epoch": 0.003320200097392536, "grad_norm": 3.045136260854985, "learning_rate": 3.3202000973925363e-07, "loss": 1.005, "step": 750 }, { "epoch": 0.003324627030855726, "grad_norm": 3.585657753781474, "learning_rate": 3.324627030855727e-07, "loss": 1.4072, "step": 751 }, { "epoch": 0.003329053964318916, "grad_norm": 3.2693015894874615, "learning_rate": 3.3290539643189163e-07, "loss": 0.9155, "step": 752 }, { "epoch": 0.0033334808977821062, "grad_norm": 2.9160656486972876, "learning_rate": 3.3334808977821063e-07, "loss": 0.7727, "step": 753 }, { "epoch": 0.0033379078312452963, "grad_norm": 3.5094839221558947, "learning_rate": 3.337907831245297e-07, "loss": 0.8094, "step": 754 }, { "epoch": 0.0033423347647084864, "grad_norm": 3.2193453444968654, "learning_rate": 3.342334764708487e-07, "loss": 0.9459, "step": 755 }, { "epoch": 0.0033467616981716765, "grad_norm": 2.972376798767833, "learning_rate": 3.3467616981716763e-07, "loss": 0.961, "step": 756 }, { "epoch": 0.0033511886316348665, "grad_norm": 3.478909907618945, "learning_rate": 3.351188631634867e-07, "loss": 1.1063, "step": 757 }, { "epoch": 0.0033556155650980566, "grad_norm": 2.877013107907005, "learning_rate": 3.355615565098057e-07, "loss": 0.9108, "step": 758 }, { "epoch": 0.0033600424985612467, "grad_norm": 3.0929813393276335, "learning_rate": 3.360042498561247e-07, "loss": 0.8688, "step": 759 }, { "epoch": 0.0033644694320244368, "grad_norm": 2.6432247065020724, "learning_rate": 3.3644694320244373e-07, "loss": 0.987, "step": 760 }, { "epoch": 0.003368896365487627, "grad_norm": 2.9231932318628657, "learning_rate": 3.368896365487627e-07, "loss": 0.9336, "step": 761 }, { "epoch": 0.003373323298950817, "grad_norm": 3.12141422185942, "learning_rate": 3.373323298950817e-07, "loss": 1.0994, "step": 762 }, { "epoch": 0.003377750232414007, "grad_norm": 2.974367194846739, "learning_rate": 3.3777502324140073e-07, "loss": 0.9441, "step": 763 }, { "epoch": 0.003382177165877197, "grad_norm": 3.2464341917591435, "learning_rate": 3.3821771658771973e-07, "loss": 0.9029, "step": 764 }, { "epoch": 0.0033866040993403867, "grad_norm": 2.645046714110841, "learning_rate": 3.386604099340387e-07, "loss": 0.9235, "step": 765 }, { "epoch": 0.0033910310328035768, "grad_norm": 3.088878500566462, "learning_rate": 3.3910310328035773e-07, "loss": 0.7149, "step": 766 }, { "epoch": 0.003395457966266767, "grad_norm": 5.128383887375556, "learning_rate": 3.3954579662667673e-07, "loss": 0.8491, "step": 767 }, { "epoch": 0.003399884899729957, "grad_norm": 3.3069767832428885, "learning_rate": 3.399884899729957e-07, "loss": 0.805, "step": 768 }, { "epoch": 0.003404311833193147, "grad_norm": 2.6120742286074514, "learning_rate": 3.4043118331931473e-07, "loss": 0.7406, "step": 769 }, { "epoch": 0.003408738766656337, "grad_norm": 3.488468803889521, "learning_rate": 3.4087387666563373e-07, "loss": 1.2845, "step": 770 }, { "epoch": 0.003413165700119527, "grad_norm": 3.6987845459955895, "learning_rate": 3.4131657001195273e-07, "loss": 1.024, "step": 771 }, { "epoch": 0.0034175926335827172, "grad_norm": 3.3604948550989353, "learning_rate": 3.417592633582718e-07, "loss": 1.3553, "step": 772 }, { "epoch": 0.0034220195670459073, "grad_norm": 3.6672302692024163, "learning_rate": 3.4220195670459073e-07, "loss": 1.1709, "step": 773 }, { "epoch": 0.0034264465005090974, "grad_norm": 3.439702921269269, "learning_rate": 3.4264465005090973e-07, "loss": 1.1933, "step": 774 }, { "epoch": 0.0034308734339722874, "grad_norm": 2.818486563601605, "learning_rate": 3.430873433972288e-07, "loss": 0.8984, "step": 775 }, { "epoch": 0.0034353003674354775, "grad_norm": 3.0322848042873356, "learning_rate": 3.435300367435478e-07, "loss": 0.9008, "step": 776 }, { "epoch": 0.0034397273008986676, "grad_norm": 3.157995497051434, "learning_rate": 3.4397273008986673e-07, "loss": 0.9523, "step": 777 }, { "epoch": 0.0034441542343618577, "grad_norm": 2.939105586114453, "learning_rate": 3.444154234361858e-07, "loss": 0.9333, "step": 778 }, { "epoch": 0.0034485811678250477, "grad_norm": 3.228054877024826, "learning_rate": 3.448581167825048e-07, "loss": 1.1297, "step": 779 }, { "epoch": 0.003453008101288238, "grad_norm": 3.2264612718538452, "learning_rate": 3.4530081012882373e-07, "loss": 0.8405, "step": 780 }, { "epoch": 0.003457435034751428, "grad_norm": 3.3530222972917416, "learning_rate": 3.457435034751428e-07, "loss": 1.1706, "step": 781 }, { "epoch": 0.0034618619682146175, "grad_norm": 4.663172644823181, "learning_rate": 3.461861968214618e-07, "loss": 1.1472, "step": 782 }, { "epoch": 0.0034662889016778076, "grad_norm": 2.7960532320856264, "learning_rate": 3.4662889016778084e-07, "loss": 1.039, "step": 783 }, { "epoch": 0.0034707158351409977, "grad_norm": 3.9716670848969504, "learning_rate": 3.4707158351409984e-07, "loss": 0.8654, "step": 784 }, { "epoch": 0.0034751427686041877, "grad_norm": 2.710490296526512, "learning_rate": 3.475142768604188e-07, "loss": 0.7992, "step": 785 }, { "epoch": 0.003479569702067378, "grad_norm": 2.7524051355415513, "learning_rate": 3.4795697020673784e-07, "loss": 0.8483, "step": 786 }, { "epoch": 0.003483996635530568, "grad_norm": 3.7226112509299933, "learning_rate": 3.4839966355305684e-07, "loss": 1.125, "step": 787 }, { "epoch": 0.003488423568993758, "grad_norm": 2.622508562190559, "learning_rate": 3.4884235689937584e-07, "loss": 0.8256, "step": 788 }, { "epoch": 0.003492850502456948, "grad_norm": 2.5251970517753928, "learning_rate": 3.492850502456949e-07, "loss": 0.9366, "step": 789 }, { "epoch": 0.003497277435920138, "grad_norm": 3.427914119815316, "learning_rate": 3.4972774359201384e-07, "loss": 1.0166, "step": 790 }, { "epoch": 0.003501704369383328, "grad_norm": 4.054517181055827, "learning_rate": 3.5017043693833284e-07, "loss": 1.3698, "step": 791 }, { "epoch": 0.0035061313028465183, "grad_norm": 2.7667130897691363, "learning_rate": 3.506131302846519e-07, "loss": 1.0275, "step": 792 }, { "epoch": 0.0035105582363097083, "grad_norm": 3.131874299404981, "learning_rate": 3.5105582363097084e-07, "loss": 1.1826, "step": 793 }, { "epoch": 0.0035149851697728984, "grad_norm": 3.0370567617162987, "learning_rate": 3.5149851697728984e-07, "loss": 0.9734, "step": 794 }, { "epoch": 0.0035194121032360885, "grad_norm": 2.932928259848433, "learning_rate": 3.519412103236089e-07, "loss": 0.8072, "step": 795 }, { "epoch": 0.0035238390366992786, "grad_norm": 3.2920106286978488, "learning_rate": 3.523839036699279e-07, "loss": 0.8017, "step": 796 }, { "epoch": 0.0035282659701624686, "grad_norm": 3.393650185604498, "learning_rate": 3.5282659701624684e-07, "loss": 1.0935, "step": 797 }, { "epoch": 0.0035326929036256587, "grad_norm": 2.5269644053305216, "learning_rate": 3.532692903625659e-07, "loss": 0.7318, "step": 798 }, { "epoch": 0.0035371198370888484, "grad_norm": 3.0737035800230132, "learning_rate": 3.537119837088849e-07, "loss": 1.0087, "step": 799 }, { "epoch": 0.0035415467705520384, "grad_norm": 2.747756014087919, "learning_rate": 3.541546770552039e-07, "loss": 1.0528, "step": 800 }, { "epoch": 0.0035459737040152285, "grad_norm": 3.0638643740793077, "learning_rate": 3.5459737040152295e-07, "loss": 0.979, "step": 801 }, { "epoch": 0.0035504006374784186, "grad_norm": 2.5993810879278745, "learning_rate": 3.550400637478419e-07, "loss": 0.7357, "step": 802 }, { "epoch": 0.0035548275709416086, "grad_norm": 2.815325820450687, "learning_rate": 3.554827570941609e-07, "loss": 0.7352, "step": 803 }, { "epoch": 0.0035592545044047987, "grad_norm": 3.2850854881223444, "learning_rate": 3.5592545044047995e-07, "loss": 0.9551, "step": 804 }, { "epoch": 0.003563681437867989, "grad_norm": 3.0897613614993023, "learning_rate": 3.563681437867989e-07, "loss": 0.9989, "step": 805 }, { "epoch": 0.003568108371331179, "grad_norm": 5.307309133785024, "learning_rate": 3.568108371331179e-07, "loss": 1.5329, "step": 806 }, { "epoch": 0.003572535304794369, "grad_norm": 3.6769803736314235, "learning_rate": 3.5725353047943695e-07, "loss": 1.0983, "step": 807 }, { "epoch": 0.003576962238257559, "grad_norm": 3.3201372206793525, "learning_rate": 3.5769622382575595e-07, "loss": 1.0759, "step": 808 }, { "epoch": 0.003581389171720749, "grad_norm": 3.39995371998384, "learning_rate": 3.581389171720749e-07, "loss": 1.2078, "step": 809 }, { "epoch": 0.003585816105183939, "grad_norm": 3.6524290163165523, "learning_rate": 3.5858161051839395e-07, "loss": 1.3654, "step": 810 }, { "epoch": 0.0035902430386471292, "grad_norm": 3.075735756463682, "learning_rate": 3.5902430386471295e-07, "loss": 0.6702, "step": 811 }, { "epoch": 0.0035946699721103193, "grad_norm": 3.40990707165126, "learning_rate": 3.5946699721103195e-07, "loss": 0.9469, "step": 812 }, { "epoch": 0.0035990969055735094, "grad_norm": 2.7679450675391037, "learning_rate": 3.59909690557351e-07, "loss": 0.8077, "step": 813 }, { "epoch": 0.0036035238390366995, "grad_norm": 2.657329124226957, "learning_rate": 3.6035238390366995e-07, "loss": 0.9927, "step": 814 }, { "epoch": 0.0036079507724998895, "grad_norm": 2.743345156524812, "learning_rate": 3.6079507724998895e-07, "loss": 0.9397, "step": 815 }, { "epoch": 0.003612377705963079, "grad_norm": 3.2604655461321848, "learning_rate": 3.61237770596308e-07, "loss": 1.3969, "step": 816 }, { "epoch": 0.0036168046394262693, "grad_norm": 3.2255814275998738, "learning_rate": 3.6168046394262695e-07, "loss": 0.7048, "step": 817 }, { "epoch": 0.0036212315728894593, "grad_norm": 3.4581223925931757, "learning_rate": 3.6212315728894595e-07, "loss": 1.0148, "step": 818 }, { "epoch": 0.0036256585063526494, "grad_norm": 3.0659041551652413, "learning_rate": 3.62565850635265e-07, "loss": 0.8928, "step": 819 }, { "epoch": 0.0036300854398158395, "grad_norm": 2.7384154827049008, "learning_rate": 3.63008543981584e-07, "loss": 0.95, "step": 820 }, { "epoch": 0.0036345123732790295, "grad_norm": 3.397898744400896, "learning_rate": 3.6345123732790295e-07, "loss": 0.965, "step": 821 }, { "epoch": 0.0036389393067422196, "grad_norm": 3.624320135972553, "learning_rate": 3.63893930674222e-07, "loss": 1.2834, "step": 822 }, { "epoch": 0.0036433662402054097, "grad_norm": 3.010104550774989, "learning_rate": 3.64336624020541e-07, "loss": 0.99, "step": 823 }, { "epoch": 0.0036477931736685998, "grad_norm": 3.0005665673233293, "learning_rate": 3.6477931736686e-07, "loss": 0.6942, "step": 824 }, { "epoch": 0.00365222010713179, "grad_norm": 2.8193698377521557, "learning_rate": 3.6522201071317905e-07, "loss": 0.7085, "step": 825 }, { "epoch": 0.00365664704059498, "grad_norm": 2.9095720209644194, "learning_rate": 3.65664704059498e-07, "loss": 0.8423, "step": 826 }, { "epoch": 0.00366107397405817, "grad_norm": 2.835284713115594, "learning_rate": 3.66107397405817e-07, "loss": 0.7726, "step": 827 }, { "epoch": 0.00366550090752136, "grad_norm": 2.8786302323482826, "learning_rate": 3.6655009075213605e-07, "loss": 0.8587, "step": 828 }, { "epoch": 0.00366992784098455, "grad_norm": 3.157937590092658, "learning_rate": 3.66992784098455e-07, "loss": 1.1319, "step": 829 }, { "epoch": 0.00367435477444774, "grad_norm": 3.0396051173953227, "learning_rate": 3.67435477444774e-07, "loss": 0.6572, "step": 830 }, { "epoch": 0.0036787817079109303, "grad_norm": 2.902523527416478, "learning_rate": 3.6787817079109305e-07, "loss": 1.0152, "step": 831 }, { "epoch": 0.0036832086413741204, "grad_norm": 3.657165642499141, "learning_rate": 3.6832086413741205e-07, "loss": 1.4101, "step": 832 }, { "epoch": 0.00368763557483731, "grad_norm": 3.170698088453728, "learning_rate": 3.68763557483731e-07, "loss": 0.6148, "step": 833 }, { "epoch": 0.0036920625083005, "grad_norm": 4.353809656083729, "learning_rate": 3.6920625083005005e-07, "loss": 0.7336, "step": 834 }, { "epoch": 0.00369648944176369, "grad_norm": 3.493393059164183, "learning_rate": 3.6964894417636905e-07, "loss": 0.7896, "step": 835 }, { "epoch": 0.0037009163752268802, "grad_norm": 2.8558325523205665, "learning_rate": 3.7009163752268805e-07, "loss": 1.0681, "step": 836 }, { "epoch": 0.0037053433086900703, "grad_norm": 3.727773117918903, "learning_rate": 3.705343308690071e-07, "loss": 0.8333, "step": 837 }, { "epoch": 0.0037097702421532604, "grad_norm": 4.146744787802097, "learning_rate": 3.7097702421532605e-07, "loss": 0.97, "step": 838 }, { "epoch": 0.0037141971756164504, "grad_norm": 2.5737360481507223, "learning_rate": 3.7141971756164505e-07, "loss": 0.8253, "step": 839 }, { "epoch": 0.0037186241090796405, "grad_norm": 3.185684649345917, "learning_rate": 3.718624109079641e-07, "loss": 0.8873, "step": 840 }, { "epoch": 0.0037230510425428306, "grad_norm": 3.2508062728449976, "learning_rate": 3.723051042542831e-07, "loss": 0.6534, "step": 841 }, { "epoch": 0.0037274779760060207, "grad_norm": 2.687132530075719, "learning_rate": 3.7274779760060205e-07, "loss": 0.7907, "step": 842 }, { "epoch": 0.0037319049094692107, "grad_norm": 3.577162606344451, "learning_rate": 3.731904909469211e-07, "loss": 0.8869, "step": 843 }, { "epoch": 0.003736331842932401, "grad_norm": 2.438930653037092, "learning_rate": 3.736331842932401e-07, "loss": 0.6336, "step": 844 }, { "epoch": 0.003740758776395591, "grad_norm": 3.2000911120007753, "learning_rate": 3.7407587763955905e-07, "loss": 0.9368, "step": 845 }, { "epoch": 0.003745185709858781, "grad_norm": 2.7632338613036587, "learning_rate": 3.745185709858781e-07, "loss": 0.6332, "step": 846 }, { "epoch": 0.003749612643321971, "grad_norm": 3.3371768722684965, "learning_rate": 3.749612643321971e-07, "loss": 1.0283, "step": 847 }, { "epoch": 0.003754039576785161, "grad_norm": 2.763788021769551, "learning_rate": 3.754039576785161e-07, "loss": 0.8962, "step": 848 }, { "epoch": 0.0037584665102483508, "grad_norm": 3.4052827324323616, "learning_rate": 3.7584665102483516e-07, "loss": 1.101, "step": 849 }, { "epoch": 0.003762893443711541, "grad_norm": 3.6465019705254673, "learning_rate": 3.762893443711541e-07, "loss": 1.1534, "step": 850 }, { "epoch": 0.003767320377174731, "grad_norm": 3.063835046075328, "learning_rate": 3.767320377174731e-07, "loss": 0.8229, "step": 851 }, { "epoch": 0.003771747310637921, "grad_norm": 2.569794324396091, "learning_rate": 3.7717473106379216e-07, "loss": 0.7262, "step": 852 }, { "epoch": 0.003776174244101111, "grad_norm": 3.2364596810489132, "learning_rate": 3.7761742441011116e-07, "loss": 0.7052, "step": 853 }, { "epoch": 0.003780601177564301, "grad_norm": 3.5339451427992046, "learning_rate": 3.780601177564301e-07, "loss": 1.1854, "step": 854 }, { "epoch": 0.003785028111027491, "grad_norm": 3.4708535288936937, "learning_rate": 3.7850281110274916e-07, "loss": 1.1806, "step": 855 }, { "epoch": 0.0037894550444906813, "grad_norm": 3.298080625220319, "learning_rate": 3.7894550444906816e-07, "loss": 0.7638, "step": 856 }, { "epoch": 0.0037938819779538713, "grad_norm": 3.7364439423014657, "learning_rate": 3.793881977953872e-07, "loss": 1.2851, "step": 857 }, { "epoch": 0.0037983089114170614, "grad_norm": 2.6714057270268667, "learning_rate": 3.7983089114170616e-07, "loss": 0.7005, "step": 858 }, { "epoch": 0.0038027358448802515, "grad_norm": 3.610377357716092, "learning_rate": 3.8027358448802516e-07, "loss": 1.4233, "step": 859 }, { "epoch": 0.0038071627783434416, "grad_norm": 4.202215050492347, "learning_rate": 3.807162778343442e-07, "loss": 1.565, "step": 860 }, { "epoch": 0.0038115897118066316, "grad_norm": 2.8234409800311098, "learning_rate": 3.811589711806632e-07, "loss": 0.9779, "step": 861 }, { "epoch": 0.0038160166452698217, "grad_norm": 3.7059453184946327, "learning_rate": 3.8160166452698216e-07, "loss": 0.8386, "step": 862 }, { "epoch": 0.003820443578733012, "grad_norm": 3.6162147465679286, "learning_rate": 3.820443578733012e-07, "loss": 0.9143, "step": 863 }, { "epoch": 0.003824870512196202, "grad_norm": 3.150223228119962, "learning_rate": 3.824870512196202e-07, "loss": 1.1951, "step": 864 }, { "epoch": 0.003829297445659392, "grad_norm": 2.6191294313315763, "learning_rate": 3.829297445659392e-07, "loss": 0.8251, "step": 865 }, { "epoch": 0.0038337243791225816, "grad_norm": 3.7859376407879455, "learning_rate": 3.8337243791225827e-07, "loss": 0.7401, "step": 866 }, { "epoch": 0.0038381513125857717, "grad_norm": 3.4514382228302645, "learning_rate": 3.838151312585772e-07, "loss": 0.8497, "step": 867 }, { "epoch": 0.0038425782460489617, "grad_norm": 3.215601870419071, "learning_rate": 3.842578246048962e-07, "loss": 0.9711, "step": 868 }, { "epoch": 0.003847005179512152, "grad_norm": 2.9237022096365077, "learning_rate": 3.8470051795121527e-07, "loss": 0.9139, "step": 869 }, { "epoch": 0.003851432112975342, "grad_norm": 2.7197263915910908, "learning_rate": 3.851432112975342e-07, "loss": 0.8143, "step": 870 }, { "epoch": 0.003855859046438532, "grad_norm": 2.682329535660819, "learning_rate": 3.855859046438532e-07, "loss": 0.709, "step": 871 }, { "epoch": 0.003860285979901722, "grad_norm": 3.7811205875208658, "learning_rate": 3.8602859799017227e-07, "loss": 0.7927, "step": 872 }, { "epoch": 0.003864712913364912, "grad_norm": 3.6916699114406963, "learning_rate": 3.8647129133649127e-07, "loss": 0.8906, "step": 873 }, { "epoch": 0.003869139846828102, "grad_norm": 3.5747210241695693, "learning_rate": 3.869139846828102e-07, "loss": 1.0733, "step": 874 }, { "epoch": 0.0038735667802912922, "grad_norm": 2.9186456409820156, "learning_rate": 3.8735667802912927e-07, "loss": 0.808, "step": 875 }, { "epoch": 0.0038779937137544823, "grad_norm": 2.7584862767291742, "learning_rate": 3.8779937137544827e-07, "loss": 0.79, "step": 876 }, { "epoch": 0.0038824206472176724, "grad_norm": 2.876641913451347, "learning_rate": 3.8824206472176727e-07, "loss": 0.784, "step": 877 }, { "epoch": 0.0038868475806808625, "grad_norm": 3.4306696981367053, "learning_rate": 3.886847580680863e-07, "loss": 1.0958, "step": 878 }, { "epoch": 0.0038912745141440525, "grad_norm": 4.160640188963592, "learning_rate": 3.8912745141440527e-07, "loss": 1.099, "step": 879 }, { "epoch": 0.0038957014476072426, "grad_norm": 3.0752471338092655, "learning_rate": 3.8957014476072427e-07, "loss": 1.0026, "step": 880 }, { "epoch": 0.0039001283810704327, "grad_norm": 2.855426791940973, "learning_rate": 3.900128381070433e-07, "loss": 0.9382, "step": 881 }, { "epoch": 0.0039045553145336228, "grad_norm": 2.969658464404825, "learning_rate": 3.9045553145336227e-07, "loss": 0.4746, "step": 882 }, { "epoch": 0.003908982247996812, "grad_norm": 3.753363857191982, "learning_rate": 3.9089822479968127e-07, "loss": 1.2768, "step": 883 }, { "epoch": 0.003913409181460003, "grad_norm": 3.3888307005112246, "learning_rate": 3.913409181460003e-07, "loss": 0.8518, "step": 884 }, { "epoch": 0.0039178361149231926, "grad_norm": 3.382852425312058, "learning_rate": 3.917836114923193e-07, "loss": 0.9163, "step": 885 }, { "epoch": 0.003922263048386383, "grad_norm": 2.4327306284093844, "learning_rate": 3.9222630483863827e-07, "loss": 0.6386, "step": 886 }, { "epoch": 0.003926689981849573, "grad_norm": 3.3641069088664235, "learning_rate": 3.926689981849573e-07, "loss": 0.8993, "step": 887 }, { "epoch": 0.003931116915312763, "grad_norm": 3.5081935102198725, "learning_rate": 3.931116915312763e-07, "loss": 1.1228, "step": 888 }, { "epoch": 0.003935543848775953, "grad_norm": 4.175505729390596, "learning_rate": 3.935543848775953e-07, "loss": 1.2462, "step": 889 }, { "epoch": 0.003939970782239143, "grad_norm": 4.160549611569053, "learning_rate": 3.9399707822391437e-07, "loss": 1.1316, "step": 890 }, { "epoch": 0.003944397715702333, "grad_norm": 3.5464996573013603, "learning_rate": 3.944397715702333e-07, "loss": 1.4063, "step": 891 }, { "epoch": 0.003948824649165523, "grad_norm": 3.1139226502108643, "learning_rate": 3.948824649165523e-07, "loss": 0.9639, "step": 892 }, { "epoch": 0.003953251582628713, "grad_norm": 2.825034341432033, "learning_rate": 3.9532515826287137e-07, "loss": 0.7699, "step": 893 }, { "epoch": 0.003957678516091903, "grad_norm": 4.008089917357613, "learning_rate": 3.957678516091903e-07, "loss": 1.4496, "step": 894 }, { "epoch": 0.003962105449555093, "grad_norm": 4.2382152322134115, "learning_rate": 3.962105449555093e-07, "loss": 1.3158, "step": 895 }, { "epoch": 0.003966532383018283, "grad_norm": 3.446234060401884, "learning_rate": 3.9665323830182837e-07, "loss": 1.3995, "step": 896 }, { "epoch": 0.0039709593164814734, "grad_norm": 2.9852989705754593, "learning_rate": 3.9709593164814737e-07, "loss": 0.9505, "step": 897 }, { "epoch": 0.003975386249944663, "grad_norm": 2.9319917962736137, "learning_rate": 3.975386249944663e-07, "loss": 0.7887, "step": 898 }, { "epoch": 0.003979813183407854, "grad_norm": 2.842732156312256, "learning_rate": 3.9798131834078537e-07, "loss": 0.6321, "step": 899 }, { "epoch": 0.003984240116871043, "grad_norm": 2.8613473745786564, "learning_rate": 3.984240116871044e-07, "loss": 0.9539, "step": 900 }, { "epoch": 0.003988667050334234, "grad_norm": 2.876136317592518, "learning_rate": 3.988667050334234e-07, "loss": 0.8792, "step": 901 }, { "epoch": 0.003993093983797423, "grad_norm": 3.859114637250689, "learning_rate": 3.9930939837974243e-07, "loss": 1.4338, "step": 902 }, { "epoch": 0.003997520917260614, "grad_norm": 3.718913734299672, "learning_rate": 3.997520917260614e-07, "loss": 0.9271, "step": 903 }, { "epoch": 0.0040019478507238035, "grad_norm": 3.4558269923842055, "learning_rate": 4.001947850723804e-07, "loss": 1.1256, "step": 904 }, { "epoch": 0.004006374784186994, "grad_norm": 2.8069767707940008, "learning_rate": 4.0063747841869943e-07, "loss": 0.8427, "step": 905 }, { "epoch": 0.004010801717650184, "grad_norm": 3.4859395314753616, "learning_rate": 4.010801717650184e-07, "loss": 1.2051, "step": 906 }, { "epoch": 0.004015228651113374, "grad_norm": 2.95612329644737, "learning_rate": 4.015228651113374e-07, "loss": 0.8534, "step": 907 }, { "epoch": 0.004019655584576564, "grad_norm": 2.841291332424238, "learning_rate": 4.0196555845765643e-07, "loss": 0.8062, "step": 908 }, { "epoch": 0.0040240825180397535, "grad_norm": 4.4995012063581274, "learning_rate": 4.0240825180397543e-07, "loss": 0.7961, "step": 909 }, { "epoch": 0.004028509451502944, "grad_norm": 3.1591702866064115, "learning_rate": 4.028509451502944e-07, "loss": 0.6507, "step": 910 }, { "epoch": 0.004032936384966134, "grad_norm": 3.3752926955627434, "learning_rate": 4.0329363849661343e-07, "loss": 1.0571, "step": 911 }, { "epoch": 0.004037363318429324, "grad_norm": 2.833832275568232, "learning_rate": 4.0373633184293243e-07, "loss": 0.6461, "step": 912 }, { "epoch": 0.004041790251892514, "grad_norm": 2.9774753811868355, "learning_rate": 4.0417902518925143e-07, "loss": 0.9013, "step": 913 }, { "epoch": 0.004046217185355704, "grad_norm": 2.933161083959412, "learning_rate": 4.046217185355705e-07, "loss": 0.8596, "step": 914 }, { "epoch": 0.004050644118818894, "grad_norm": 3.017493626153042, "learning_rate": 4.0506441188188943e-07, "loss": 0.7219, "step": 915 }, { "epoch": 0.004055071052282084, "grad_norm": 3.219064890235681, "learning_rate": 4.0550710522820843e-07, "loss": 0.8822, "step": 916 }, { "epoch": 0.004059497985745274, "grad_norm": 4.068926555823489, "learning_rate": 4.059497985745275e-07, "loss": 1.3861, "step": 917 }, { "epoch": 0.0040639249192084646, "grad_norm": 3.4560990027087395, "learning_rate": 4.063924919208465e-07, "loss": 1.5072, "step": 918 }, { "epoch": 0.004068351852671654, "grad_norm": 3.399677958123884, "learning_rate": 4.0683518526716543e-07, "loss": 1.2116, "step": 919 }, { "epoch": 0.004072778786134845, "grad_norm": 2.712865655238298, "learning_rate": 4.072778786134845e-07, "loss": 0.7904, "step": 920 }, { "epoch": 0.004077205719598034, "grad_norm": 3.220898795176963, "learning_rate": 4.077205719598035e-07, "loss": 0.9877, "step": 921 }, { "epoch": 0.004081632653061225, "grad_norm": 2.773096152685965, "learning_rate": 4.0816326530612243e-07, "loss": 1.0415, "step": 922 }, { "epoch": 0.0040860595865244145, "grad_norm": 2.960003276355302, "learning_rate": 4.086059586524415e-07, "loss": 1.0421, "step": 923 }, { "epoch": 0.004090486519987605, "grad_norm": 3.118920236653291, "learning_rate": 4.090486519987605e-07, "loss": 1.2156, "step": 924 }, { "epoch": 0.004094913453450795, "grad_norm": 2.8557150289574382, "learning_rate": 4.094913453450795e-07, "loss": 0.8907, "step": 925 }, { "epoch": 0.004099340386913984, "grad_norm": 2.6592396022745066, "learning_rate": 4.0993403869139853e-07, "loss": 0.7508, "step": 926 }, { "epoch": 0.004103767320377175, "grad_norm": 3.7887176428477303, "learning_rate": 4.103767320377175e-07, "loss": 1.2788, "step": 927 }, { "epoch": 0.0041081942538403644, "grad_norm": 3.568267635836816, "learning_rate": 4.108194253840365e-07, "loss": 0.8847, "step": 928 }, { "epoch": 0.004112621187303555, "grad_norm": 3.261435185504325, "learning_rate": 4.1126211873035553e-07, "loss": 0.751, "step": 929 }, { "epoch": 0.004117048120766745, "grad_norm": 2.7289680937195127, "learning_rate": 4.1170481207667453e-07, "loss": 0.6528, "step": 930 }, { "epoch": 0.004121475054229935, "grad_norm": 3.1190157507985097, "learning_rate": 4.121475054229936e-07, "loss": 0.7349, "step": 931 }, { "epoch": 0.004125901987693125, "grad_norm": 3.086678759483285, "learning_rate": 4.1259019876931253e-07, "loss": 1.0268, "step": 932 }, { "epoch": 0.004130328921156315, "grad_norm": 3.4054066857884573, "learning_rate": 4.1303289211563153e-07, "loss": 1.3207, "step": 933 }, { "epoch": 0.004134755854619505, "grad_norm": 2.9742021993037433, "learning_rate": 4.134755854619506e-07, "loss": 1.0745, "step": 934 }, { "epoch": 0.004139182788082695, "grad_norm": 2.8066052005627924, "learning_rate": 4.1391827880826953e-07, "loss": 0.9394, "step": 935 }, { "epoch": 0.004143609721545885, "grad_norm": 3.5686862419366348, "learning_rate": 4.1436097215458853e-07, "loss": 1.2207, "step": 936 }, { "epoch": 0.0041480366550090755, "grad_norm": 3.6454145186641282, "learning_rate": 4.148036655009076e-07, "loss": 0.8734, "step": 937 }, { "epoch": 0.004152463588472265, "grad_norm": 3.1036995116139976, "learning_rate": 4.152463588472266e-07, "loss": 1.1067, "step": 938 }, { "epoch": 0.004156890521935456, "grad_norm": 3.606483901949344, "learning_rate": 4.1568905219354553e-07, "loss": 0.8722, "step": 939 }, { "epoch": 0.004161317455398645, "grad_norm": 2.9164004141534634, "learning_rate": 4.161317455398646e-07, "loss": 0.9624, "step": 940 }, { "epoch": 0.004165744388861836, "grad_norm": 2.2959074131136954, "learning_rate": 4.165744388861836e-07, "loss": 0.789, "step": 941 }, { "epoch": 0.0041701713223250255, "grad_norm": 2.8183586364436177, "learning_rate": 4.170171322325026e-07, "loss": 1.0924, "step": 942 }, { "epoch": 0.004174598255788215, "grad_norm": 2.5650902133049827, "learning_rate": 4.1745982557882164e-07, "loss": 0.7, "step": 943 }, { "epoch": 0.004179025189251406, "grad_norm": 3.1252230644854078, "learning_rate": 4.179025189251406e-07, "loss": 1.0408, "step": 944 }, { "epoch": 0.004183452122714595, "grad_norm": 2.593347804058168, "learning_rate": 4.183452122714596e-07, "loss": 0.8695, "step": 945 }, { "epoch": 0.004187879056177786, "grad_norm": 3.6363038472336635, "learning_rate": 4.1878790561777864e-07, "loss": 1.3908, "step": 946 }, { "epoch": 0.004192305989640975, "grad_norm": 3.0351983751922536, "learning_rate": 4.192305989640976e-07, "loss": 0.9148, "step": 947 }, { "epoch": 0.004196732923104166, "grad_norm": 3.509715516426323, "learning_rate": 4.196732923104166e-07, "loss": 1.0104, "step": 948 }, { "epoch": 0.0042011598565673556, "grad_norm": 3.2902207882766814, "learning_rate": 4.2011598565673564e-07, "loss": 1.2129, "step": 949 }, { "epoch": 0.004205586790030546, "grad_norm": 3.87068107456396, "learning_rate": 4.2055867900305464e-07, "loss": 0.9402, "step": 950 }, { "epoch": 0.004210013723493736, "grad_norm": 2.9788628666959744, "learning_rate": 4.210013723493736e-07, "loss": 0.7905, "step": 951 }, { "epoch": 0.004214440656956926, "grad_norm": 3.1242710201119537, "learning_rate": 4.2144406569569264e-07, "loss": 1.0795, "step": 952 }, { "epoch": 0.004218867590420116, "grad_norm": 2.4930503512126228, "learning_rate": 4.2188675904201164e-07, "loss": 0.7121, "step": 953 }, { "epoch": 0.004223294523883306, "grad_norm": 3.931872098537103, "learning_rate": 4.2232945238833064e-07, "loss": 1.4721, "step": 954 }, { "epoch": 0.004227721457346496, "grad_norm": 2.498619503504055, "learning_rate": 4.227721457346497e-07, "loss": 0.7818, "step": 955 }, { "epoch": 0.0042321483908096865, "grad_norm": 3.2742453237792843, "learning_rate": 4.2321483908096864e-07, "loss": 0.9421, "step": 956 }, { "epoch": 0.004236575324272876, "grad_norm": 5.282840505389755, "learning_rate": 4.2365753242728764e-07, "loss": 1.1779, "step": 957 }, { "epoch": 0.004241002257736067, "grad_norm": 3.2713560513246747, "learning_rate": 4.241002257736067e-07, "loss": 0.8554, "step": 958 }, { "epoch": 0.004245429191199256, "grad_norm": 3.8553377223376595, "learning_rate": 4.2454291911992564e-07, "loss": 1.6052, "step": 959 }, { "epoch": 0.004249856124662446, "grad_norm": 2.615788422666165, "learning_rate": 4.2498561246624464e-07, "loss": 0.5208, "step": 960 }, { "epoch": 0.0042542830581256364, "grad_norm": 3.5775874178245957, "learning_rate": 4.254283058125637e-07, "loss": 1.0323, "step": 961 }, { "epoch": 0.004258709991588826, "grad_norm": 2.96884449726743, "learning_rate": 4.258709991588827e-07, "loss": 0.7757, "step": 962 }, { "epoch": 0.004263136925052017, "grad_norm": 2.8547512759171383, "learning_rate": 4.2631369250520164e-07, "loss": 0.9235, "step": 963 }, { "epoch": 0.004267563858515206, "grad_norm": 3.074495311780966, "learning_rate": 4.267563858515207e-07, "loss": 0.9423, "step": 964 }, { "epoch": 0.004271990791978397, "grad_norm": 4.492815555957163, "learning_rate": 4.271990791978397e-07, "loss": 1.3717, "step": 965 }, { "epoch": 0.004276417725441586, "grad_norm": 3.0639928821223363, "learning_rate": 4.276417725441587e-07, "loss": 0.8388, "step": 966 }, { "epoch": 0.004280844658904777, "grad_norm": 2.5217742373062824, "learning_rate": 4.2808446589047775e-07, "loss": 0.7802, "step": 967 }, { "epoch": 0.0042852715923679665, "grad_norm": 2.90410461320445, "learning_rate": 4.285271592367967e-07, "loss": 0.988, "step": 968 }, { "epoch": 0.004289698525831157, "grad_norm": 2.6203974203965807, "learning_rate": 4.289698525831157e-07, "loss": 0.8995, "step": 969 }, { "epoch": 0.004294125459294347, "grad_norm": 3.787234198995615, "learning_rate": 4.2941254592943475e-07, "loss": 0.9803, "step": 970 }, { "epoch": 0.004298552392757537, "grad_norm": 2.9185551186320473, "learning_rate": 4.298552392757537e-07, "loss": 0.9726, "step": 971 }, { "epoch": 0.004302979326220727, "grad_norm": 3.656897027675081, "learning_rate": 4.302979326220727e-07, "loss": 1.343, "step": 972 }, { "epoch": 0.004307406259683917, "grad_norm": 5.129960266697864, "learning_rate": 4.3074062596839175e-07, "loss": 1.1258, "step": 973 }, { "epoch": 0.004311833193147107, "grad_norm": 2.4772686918504636, "learning_rate": 4.3118331931471075e-07, "loss": 0.7476, "step": 974 }, { "epoch": 0.004316260126610297, "grad_norm": 2.7652838324711855, "learning_rate": 4.316260126610297e-07, "loss": 0.6799, "step": 975 }, { "epoch": 0.004320687060073487, "grad_norm": 3.0452490075800096, "learning_rate": 4.3206870600734875e-07, "loss": 0.985, "step": 976 }, { "epoch": 0.004325113993536677, "grad_norm": 2.5307142255319657, "learning_rate": 4.3251139935366775e-07, "loss": 0.7438, "step": 977 }, { "epoch": 0.004329540926999867, "grad_norm": 3.0636893380980164, "learning_rate": 4.3295409269998675e-07, "loss": 0.8106, "step": 978 }, { "epoch": 0.004333967860463057, "grad_norm": 2.5702161722111274, "learning_rate": 4.333967860463058e-07, "loss": 0.8396, "step": 979 }, { "epoch": 0.004338394793926247, "grad_norm": 2.679682601564171, "learning_rate": 4.3383947939262475e-07, "loss": 0.7161, "step": 980 }, { "epoch": 0.004342821727389437, "grad_norm": 2.819649333534439, "learning_rate": 4.3428217273894375e-07, "loss": 0.7884, "step": 981 }, { "epoch": 0.004347248660852628, "grad_norm": 5.387675580911667, "learning_rate": 4.347248660852628e-07, "loss": 1.9727, "step": 982 }, { "epoch": 0.004351675594315817, "grad_norm": 3.15224347294634, "learning_rate": 4.351675594315818e-07, "loss": 1.0385, "step": 983 }, { "epoch": 0.004356102527779008, "grad_norm": 4.525256203698042, "learning_rate": 4.3561025277790075e-07, "loss": 0.9096, "step": 984 }, { "epoch": 0.004360529461242197, "grad_norm": 2.852720394656246, "learning_rate": 4.360529461242198e-07, "loss": 0.6731, "step": 985 }, { "epoch": 0.004364956394705388, "grad_norm": 3.0986171523408377, "learning_rate": 4.364956394705388e-07, "loss": 0.8826, "step": 986 }, { "epoch": 0.0043693833281685775, "grad_norm": 3.0374580257472013, "learning_rate": 4.3693833281685775e-07, "loss": 0.9369, "step": 987 }, { "epoch": 0.004373810261631768, "grad_norm": 3.6650264465951636, "learning_rate": 4.373810261631768e-07, "loss": 0.8207, "step": 988 }, { "epoch": 0.004378237195094958, "grad_norm": 3.11567890744868, "learning_rate": 4.378237195094958e-07, "loss": 0.874, "step": 989 }, { "epoch": 0.004382664128558148, "grad_norm": 3.7623343909153513, "learning_rate": 4.382664128558148e-07, "loss": 0.915, "step": 990 }, { "epoch": 0.004387091062021338, "grad_norm": 2.659141681748097, "learning_rate": 4.3870910620213385e-07, "loss": 0.7311, "step": 991 }, { "epoch": 0.0043915179954845274, "grad_norm": 2.846864631935523, "learning_rate": 4.391517995484528e-07, "loss": 0.8903, "step": 992 }, { "epoch": 0.004395944928947718, "grad_norm": 2.7158873785569897, "learning_rate": 4.395944928947718e-07, "loss": 0.8652, "step": 993 }, { "epoch": 0.004400371862410908, "grad_norm": 3.1158689624044897, "learning_rate": 4.4003718624109085e-07, "loss": 1.4411, "step": 994 }, { "epoch": 0.004404798795874098, "grad_norm": 2.976382806004632, "learning_rate": 4.4047987958740985e-07, "loss": 0.9742, "step": 995 }, { "epoch": 0.004409225729337288, "grad_norm": 3.8983069579839347, "learning_rate": 4.409225729337288e-07, "loss": 1.3615, "step": 996 }, { "epoch": 0.004413652662800478, "grad_norm": 2.9852689844289526, "learning_rate": 4.4136526628004785e-07, "loss": 0.7832, "step": 997 }, { "epoch": 0.004418079596263668, "grad_norm": 3.3825593084264347, "learning_rate": 4.4180795962636685e-07, "loss": 0.8836, "step": 998 }, { "epoch": 0.004422506529726858, "grad_norm": 3.7060198590430784, "learning_rate": 4.422506529726858e-07, "loss": 1.1505, "step": 999 }, { "epoch": 0.004426933463190048, "grad_norm": 3.7851946200637907, "learning_rate": 4.4269334631900485e-07, "loss": 1.3349, "step": 1000 }, { "epoch": 0.0044313603966532385, "grad_norm": 2.937028140437669, "learning_rate": 4.4313603966532385e-07, "loss": 0.9703, "step": 1001 }, { "epoch": 0.004435787330116428, "grad_norm": 2.7592394609627187, "learning_rate": 4.4357873301164285e-07, "loss": 0.6598, "step": 1002 }, { "epoch": 0.004440214263579619, "grad_norm": 3.037621626054055, "learning_rate": 4.440214263579619e-07, "loss": 0.8415, "step": 1003 }, { "epoch": 0.004444641197042808, "grad_norm": 3.713585942961489, "learning_rate": 4.4446411970428085e-07, "loss": 1.128, "step": 1004 }, { "epoch": 0.004449068130505999, "grad_norm": 2.5871840217240836, "learning_rate": 4.449068130505999e-07, "loss": 0.9422, "step": 1005 }, { "epoch": 0.0044534950639691885, "grad_norm": 2.8685763407770373, "learning_rate": 4.453495063969189e-07, "loss": 0.9366, "step": 1006 }, { "epoch": 0.004457921997432379, "grad_norm": 3.0819325976699714, "learning_rate": 4.457921997432379e-07, "loss": 1.0368, "step": 1007 }, { "epoch": 0.004462348930895569, "grad_norm": 3.5032151317161753, "learning_rate": 4.4623489308955696e-07, "loss": 0.8862, "step": 1008 }, { "epoch": 0.004466775864358758, "grad_norm": 2.827294573566274, "learning_rate": 4.466775864358759e-07, "loss": 1.1537, "step": 1009 }, { "epoch": 0.004471202797821949, "grad_norm": 2.933964688255319, "learning_rate": 4.471202797821949e-07, "loss": 0.8259, "step": 1010 }, { "epoch": 0.004475629731285138, "grad_norm": 2.9428448951195803, "learning_rate": 4.4756297312851396e-07, "loss": 0.9057, "step": 1011 }, { "epoch": 0.004480056664748329, "grad_norm": 3.339998606520496, "learning_rate": 4.480056664748329e-07, "loss": 0.8707, "step": 1012 }, { "epoch": 0.0044844835982115186, "grad_norm": 2.6660539911985612, "learning_rate": 4.484483598211519e-07, "loss": 0.9105, "step": 1013 }, { "epoch": 0.004488910531674709, "grad_norm": 2.6677418441342895, "learning_rate": 4.4889105316747096e-07, "loss": 0.7397, "step": 1014 }, { "epoch": 0.004493337465137899, "grad_norm": 2.8417029930523054, "learning_rate": 4.4933374651378996e-07, "loss": 0.8176, "step": 1015 }, { "epoch": 0.004497764398601089, "grad_norm": 4.727767838848219, "learning_rate": 4.497764398601089e-07, "loss": 1.2844, "step": 1016 }, { "epoch": 0.004502191332064279, "grad_norm": 2.7911194094125795, "learning_rate": 4.5021913320642796e-07, "loss": 0.9389, "step": 1017 }, { "epoch": 0.004506618265527469, "grad_norm": 3.817029936596686, "learning_rate": 4.5066182655274696e-07, "loss": 0.976, "step": 1018 }, { "epoch": 0.004511045198990659, "grad_norm": 2.837092171464182, "learning_rate": 4.5110451989906596e-07, "loss": 0.8788, "step": 1019 }, { "epoch": 0.0045154721324538495, "grad_norm": 3.5293654177238776, "learning_rate": 4.51547213245385e-07, "loss": 0.9634, "step": 1020 }, { "epoch": 0.004519899065917039, "grad_norm": 2.9187287657644028, "learning_rate": 4.5198990659170396e-07, "loss": 0.5871, "step": 1021 }, { "epoch": 0.00452432599938023, "grad_norm": 3.90650979463481, "learning_rate": 4.5243259993802296e-07, "loss": 0.7966, "step": 1022 }, { "epoch": 0.004528752932843419, "grad_norm": 2.8201843943300604, "learning_rate": 4.52875293284342e-07, "loss": 0.7251, "step": 1023 }, { "epoch": 0.00453317986630661, "grad_norm": 3.1292430813444003, "learning_rate": 4.5331798663066096e-07, "loss": 1.2268, "step": 1024 }, { "epoch": 0.0045376067997697994, "grad_norm": 3.108185937308085, "learning_rate": 4.5376067997697996e-07, "loss": 1.065, "step": 1025 }, { "epoch": 0.004542033733232989, "grad_norm": 3.427556294569741, "learning_rate": 4.54203373323299e-07, "loss": 1.1929, "step": 1026 }, { "epoch": 0.00454646066669618, "grad_norm": 2.532652686032827, "learning_rate": 4.54646066669618e-07, "loss": 0.7565, "step": 1027 }, { "epoch": 0.004550887600159369, "grad_norm": 3.210639225077091, "learning_rate": 4.5508876001593696e-07, "loss": 0.8987, "step": 1028 }, { "epoch": 0.00455531453362256, "grad_norm": 3.082431883425314, "learning_rate": 4.55531453362256e-07, "loss": 0.9972, "step": 1029 }, { "epoch": 0.004559741467085749, "grad_norm": 3.2232316214404735, "learning_rate": 4.55974146708575e-07, "loss": 0.9576, "step": 1030 }, { "epoch": 0.00456416840054894, "grad_norm": 4.299629089409958, "learning_rate": 4.56416840054894e-07, "loss": 1.2945, "step": 1031 }, { "epoch": 0.0045685953340121295, "grad_norm": 2.70940144232904, "learning_rate": 4.5685953340121307e-07, "loss": 1.0829, "step": 1032 }, { "epoch": 0.00457302226747532, "grad_norm": 2.8563250657720713, "learning_rate": 4.57302226747532e-07, "loss": 0.8018, "step": 1033 }, { "epoch": 0.00457744920093851, "grad_norm": 2.6636142506760776, "learning_rate": 4.57744920093851e-07, "loss": 1.1017, "step": 1034 }, { "epoch": 0.0045818761344017, "grad_norm": 3.0879112610509716, "learning_rate": 4.5818761344017007e-07, "loss": 1.1032, "step": 1035 }, { "epoch": 0.00458630306786489, "grad_norm": 2.4966665714497323, "learning_rate": 4.58630306786489e-07, "loss": 0.749, "step": 1036 }, { "epoch": 0.00459073000132808, "grad_norm": 3.3798245619086265, "learning_rate": 4.59073000132808e-07, "loss": 0.8782, "step": 1037 }, { "epoch": 0.00459515693479127, "grad_norm": 3.3703241561846093, "learning_rate": 4.5951569347912707e-07, "loss": 0.84, "step": 1038 }, { "epoch": 0.0045995838682544605, "grad_norm": 3.584416287690235, "learning_rate": 4.5995838682544607e-07, "loss": 1.2047, "step": 1039 }, { "epoch": 0.00460401080171765, "grad_norm": 2.970384913031086, "learning_rate": 4.60401080171765e-07, "loss": 0.9781, "step": 1040 }, { "epoch": 0.004608437735180841, "grad_norm": 3.2175300498973587, "learning_rate": 4.6084377351808407e-07, "loss": 1.2342, "step": 1041 }, { "epoch": 0.00461286466864403, "grad_norm": 3.5581774298817903, "learning_rate": 4.6128646686440307e-07, "loss": 0.7758, "step": 1042 }, { "epoch": 0.00461729160210722, "grad_norm": 2.8871732592671697, "learning_rate": 4.6172916021072207e-07, "loss": 1.0546, "step": 1043 }, { "epoch": 0.00462171853557041, "grad_norm": 3.6863462987120315, "learning_rate": 4.621718535570411e-07, "loss": 0.7653, "step": 1044 }, { "epoch": 0.0046261454690336, "grad_norm": 2.7068759583197908, "learning_rate": 4.6261454690336007e-07, "loss": 0.7666, "step": 1045 }, { "epoch": 0.004630572402496791, "grad_norm": 2.811906719583798, "learning_rate": 4.6305724024967907e-07, "loss": 0.9447, "step": 1046 }, { "epoch": 0.00463499933595998, "grad_norm": 2.861258741090206, "learning_rate": 4.634999335959981e-07, "loss": 0.6194, "step": 1047 }, { "epoch": 0.004639426269423171, "grad_norm": 4.4110984311634365, "learning_rate": 4.6394262694231707e-07, "loss": 1.5424, "step": 1048 }, { "epoch": 0.00464385320288636, "grad_norm": 3.5565739933470955, "learning_rate": 4.6438532028863607e-07, "loss": 1.1104, "step": 1049 }, { "epoch": 0.004648280136349551, "grad_norm": 3.1100504373172377, "learning_rate": 4.648280136349551e-07, "loss": 1.0755, "step": 1050 }, { "epoch": 0.0046527070698127405, "grad_norm": 3.4281188725146863, "learning_rate": 4.652707069812741e-07, "loss": 0.7778, "step": 1051 }, { "epoch": 0.004657134003275931, "grad_norm": 3.3405097491379157, "learning_rate": 4.6571340032759307e-07, "loss": 1.1299, "step": 1052 }, { "epoch": 0.004661560936739121, "grad_norm": 3.490179575826404, "learning_rate": 4.661560936739121e-07, "loss": 1.0857, "step": 1053 }, { "epoch": 0.004665987870202311, "grad_norm": 2.558505435436546, "learning_rate": 4.665987870202311e-07, "loss": 0.8071, "step": 1054 }, { "epoch": 0.004670414803665501, "grad_norm": 4.003323559212114, "learning_rate": 4.670414803665501e-07, "loss": 1.1943, "step": 1055 }, { "epoch": 0.004674841737128691, "grad_norm": 3.8520537503558208, "learning_rate": 4.6748417371286917e-07, "loss": 0.8564, "step": 1056 }, { "epoch": 0.004679268670591881, "grad_norm": 3.452075634432907, "learning_rate": 4.679268670591881e-07, "loss": 1.0427, "step": 1057 }, { "epoch": 0.0046836956040550715, "grad_norm": 3.5118435495486398, "learning_rate": 4.683695604055071e-07, "loss": 1.2115, "step": 1058 }, { "epoch": 0.004688122537518261, "grad_norm": 3.5912706738116924, "learning_rate": 4.6881225375182617e-07, "loss": 1.2091, "step": 1059 }, { "epoch": 0.004692549470981451, "grad_norm": 2.744714843646194, "learning_rate": 4.6925494709814517e-07, "loss": 0.769, "step": 1060 }, { "epoch": 0.004696976404444641, "grad_norm": 3.1563631003006445, "learning_rate": 4.696976404444641e-07, "loss": 0.824, "step": 1061 }, { "epoch": 0.004701403337907831, "grad_norm": 3.2143858071397378, "learning_rate": 4.7014033379078317e-07, "loss": 0.8112, "step": 1062 }, { "epoch": 0.004705830271371021, "grad_norm": 3.1733863524696706, "learning_rate": 4.7058302713710217e-07, "loss": 0.9694, "step": 1063 }, { "epoch": 0.004710257204834211, "grad_norm": 2.75476343870828, "learning_rate": 4.710257204834211e-07, "loss": 0.7482, "step": 1064 }, { "epoch": 0.0047146841382974015, "grad_norm": 3.083444235934464, "learning_rate": 4.714684138297402e-07, "loss": 1.0303, "step": 1065 }, { "epoch": 0.004719111071760591, "grad_norm": 3.7552957561385756, "learning_rate": 4.719111071760592e-07, "loss": 1.0868, "step": 1066 }, { "epoch": 0.004723538005223782, "grad_norm": 3.643124450231473, "learning_rate": 4.723538005223782e-07, "loss": 1.1756, "step": 1067 }, { "epoch": 0.004727964938686971, "grad_norm": 2.911230414296508, "learning_rate": 4.7279649386869723e-07, "loss": 1.0186, "step": 1068 }, { "epoch": 0.004732391872150162, "grad_norm": 3.2679618556693404, "learning_rate": 4.732391872150162e-07, "loss": 0.914, "step": 1069 }, { "epoch": 0.0047368188056133515, "grad_norm": 3.324809862129143, "learning_rate": 4.736818805613352e-07, "loss": 0.717, "step": 1070 }, { "epoch": 0.004741245739076542, "grad_norm": 3.062469681325644, "learning_rate": 4.7412457390765423e-07, "loss": 0.9124, "step": 1071 }, { "epoch": 0.004745672672539732, "grad_norm": 4.232036063713837, "learning_rate": 4.7456726725397323e-07, "loss": 1.1735, "step": 1072 }, { "epoch": 0.004750099606002922, "grad_norm": 3.2300449590804488, "learning_rate": 4.750099606002922e-07, "loss": 1.035, "step": 1073 }, { "epoch": 0.004754526539466112, "grad_norm": 3.1489974714950626, "learning_rate": 4.7545265394661123e-07, "loss": 0.9282, "step": 1074 }, { "epoch": 0.004758953472929302, "grad_norm": 3.417283196493904, "learning_rate": 4.7589534729293023e-07, "loss": 0.9535, "step": 1075 }, { "epoch": 0.004763380406392492, "grad_norm": 3.6134304896035463, "learning_rate": 4.763380406392492e-07, "loss": 0.9435, "step": 1076 }, { "epoch": 0.0047678073398556816, "grad_norm": 4.190424710132398, "learning_rate": 4.7678073398556823e-07, "loss": 1.2766, "step": 1077 }, { "epoch": 0.004772234273318872, "grad_norm": 2.7878684168076076, "learning_rate": 4.772234273318872e-07, "loss": 0.9051, "step": 1078 }, { "epoch": 0.004776661206782062, "grad_norm": 3.027576666046329, "learning_rate": 4.776661206782063e-07, "loss": 0.8213, "step": 1079 }, { "epoch": 0.004781088140245252, "grad_norm": 3.038275356347558, "learning_rate": 4.781088140245252e-07, "loss": 1.0184, "step": 1080 }, { "epoch": 0.004785515073708442, "grad_norm": 3.204924983905961, "learning_rate": 4.785515073708443e-07, "loss": 0.676, "step": 1081 }, { "epoch": 0.004789942007171632, "grad_norm": 2.4380452700346416, "learning_rate": 4.789942007171633e-07, "loss": 0.7025, "step": 1082 }, { "epoch": 0.004794368940634822, "grad_norm": 3.9399900176308194, "learning_rate": 4.794368940634823e-07, "loss": 0.855, "step": 1083 }, { "epoch": 0.0047987958740980125, "grad_norm": 2.6021912332114083, "learning_rate": 4.798795874098012e-07, "loss": 0.6493, "step": 1084 }, { "epoch": 0.004803222807561202, "grad_norm": 3.096118510917079, "learning_rate": 4.803222807561203e-07, "loss": 0.7921, "step": 1085 }, { "epoch": 0.004807649741024393, "grad_norm": 3.107298771869077, "learning_rate": 4.807649741024393e-07, "loss": 0.7736, "step": 1086 }, { "epoch": 0.004812076674487582, "grad_norm": 4.092213631896101, "learning_rate": 4.812076674487583e-07, "loss": 1.3117, "step": 1087 }, { "epoch": 0.004816503607950773, "grad_norm": 2.571291316248094, "learning_rate": 4.816503607950773e-07, "loss": 0.5891, "step": 1088 }, { "epoch": 0.0048209305414139625, "grad_norm": 3.341004763714011, "learning_rate": 4.820930541413963e-07, "loss": 0.8619, "step": 1089 }, { "epoch": 0.004825357474877153, "grad_norm": 2.8984939728111763, "learning_rate": 4.825357474877153e-07, "loss": 0.9267, "step": 1090 }, { "epoch": 0.004829784408340343, "grad_norm": 3.4831391796300113, "learning_rate": 4.829784408340344e-07, "loss": 1.1476, "step": 1091 }, { "epoch": 0.004834211341803533, "grad_norm": 2.9408004371009104, "learning_rate": 4.834211341803533e-07, "loss": 1.013, "step": 1092 }, { "epoch": 0.004838638275266723, "grad_norm": 2.8852978906606146, "learning_rate": 4.838638275266723e-07, "loss": 0.97, "step": 1093 }, { "epoch": 0.004843065208729912, "grad_norm": 3.041162369568146, "learning_rate": 4.843065208729913e-07, "loss": 0.923, "step": 1094 }, { "epoch": 0.004847492142193103, "grad_norm": 3.0238211745397896, "learning_rate": 4.847492142193103e-07, "loss": 0.8127, "step": 1095 }, { "epoch": 0.0048519190756562925, "grad_norm": 2.733546517965465, "learning_rate": 4.851919075656293e-07, "loss": 1.0374, "step": 1096 }, { "epoch": 0.004856346009119483, "grad_norm": 2.962955471712187, "learning_rate": 4.856346009119484e-07, "loss": 1.1607, "step": 1097 }, { "epoch": 0.004860772942582673, "grad_norm": 3.6258741616451053, "learning_rate": 4.860772942582673e-07, "loss": 0.8942, "step": 1098 }, { "epoch": 0.004865199876045863, "grad_norm": 3.075211757585918, "learning_rate": 4.865199876045863e-07, "loss": 0.7454, "step": 1099 }, { "epoch": 0.004869626809509053, "grad_norm": 2.675148844814695, "learning_rate": 4.869626809509053e-07, "loss": 0.8488, "step": 1100 }, { "epoch": 0.004874053742972243, "grad_norm": 3.4541131098425133, "learning_rate": 4.874053742972244e-07, "loss": 1.1916, "step": 1101 }, { "epoch": 0.004878480676435433, "grad_norm": 3.0935822965845365, "learning_rate": 4.878480676435433e-07, "loss": 1.1465, "step": 1102 }, { "epoch": 0.0048829076098986235, "grad_norm": 4.203120858152073, "learning_rate": 4.882907609898624e-07, "loss": 1.3628, "step": 1103 }, { "epoch": 0.004887334543361813, "grad_norm": 3.5382508898464926, "learning_rate": 4.887334543361813e-07, "loss": 0.5484, "step": 1104 }, { "epoch": 0.004891761476825004, "grad_norm": 2.940355226623967, "learning_rate": 4.891761476825004e-07, "loss": 0.864, "step": 1105 }, { "epoch": 0.004896188410288193, "grad_norm": 2.8202719960962463, "learning_rate": 4.896188410288194e-07, "loss": 1.0029, "step": 1106 }, { "epoch": 0.004900615343751384, "grad_norm": 3.2231844304504964, "learning_rate": 4.900615343751384e-07, "loss": 0.4817, "step": 1107 }, { "epoch": 0.004905042277214573, "grad_norm": 4.083328438311959, "learning_rate": 4.905042277214573e-07, "loss": 0.7568, "step": 1108 }, { "epoch": 0.004909469210677764, "grad_norm": 2.9201787556763574, "learning_rate": 4.909469210677764e-07, "loss": 1.1418, "step": 1109 }, { "epoch": 0.004913896144140954, "grad_norm": 2.943275646117475, "learning_rate": 4.913896144140954e-07, "loss": 1.1794, "step": 1110 }, { "epoch": 0.004918323077604143, "grad_norm": 3.911160416717694, "learning_rate": 4.918323077604144e-07, "loss": 1.4972, "step": 1111 }, { "epoch": 0.004922750011067334, "grad_norm": 3.497446323001504, "learning_rate": 4.922750011067334e-07, "loss": 0.9416, "step": 1112 }, { "epoch": 0.004927176944530523, "grad_norm": 2.910547532381917, "learning_rate": 4.927176944530524e-07, "loss": 0.8824, "step": 1113 }, { "epoch": 0.004931603877993714, "grad_norm": 3.746415775074426, "learning_rate": 4.931603877993714e-07, "loss": 1.2555, "step": 1114 }, { "epoch": 0.0049360308114569035, "grad_norm": 3.7759530777366783, "learning_rate": 4.936030811456905e-07, "loss": 1.195, "step": 1115 }, { "epoch": 0.004940457744920094, "grad_norm": 2.65420436879788, "learning_rate": 4.940457744920094e-07, "loss": 0.6952, "step": 1116 }, { "epoch": 0.004944884678383284, "grad_norm": 2.765962861093201, "learning_rate": 4.944884678383284e-07, "loss": 1.0709, "step": 1117 }, { "epoch": 0.004949311611846474, "grad_norm": 3.4589813321876814, "learning_rate": 4.949311611846474e-07, "loss": 1.0008, "step": 1118 }, { "epoch": 0.004953738545309664, "grad_norm": 3.0540015575218216, "learning_rate": 4.953738545309664e-07, "loss": 1.1571, "step": 1119 }, { "epoch": 0.004958165478772854, "grad_norm": 2.7742003742511936, "learning_rate": 4.958165478772854e-07, "loss": 0.8892, "step": 1120 }, { "epoch": 0.004962592412236044, "grad_norm": 3.2012429633679256, "learning_rate": 4.962592412236045e-07, "loss": 0.9724, "step": 1121 }, { "epoch": 0.0049670193456992345, "grad_norm": 3.8637767404666366, "learning_rate": 4.967019345699234e-07, "loss": 1.0356, "step": 1122 }, { "epoch": 0.004971446279162424, "grad_norm": 3.2254862426754745, "learning_rate": 4.971446279162424e-07, "loss": 0.9737, "step": 1123 }, { "epoch": 0.004975873212625615, "grad_norm": 3.5302490267003974, "learning_rate": 4.975873212625614e-07, "loss": 0.8632, "step": 1124 }, { "epoch": 0.004980300146088804, "grad_norm": 3.009970018464252, "learning_rate": 4.980300146088805e-07, "loss": 0.8876, "step": 1125 }, { "epoch": 0.004984727079551995, "grad_norm": 3.2597004294204397, "learning_rate": 4.984727079551994e-07, "loss": 0.817, "step": 1126 }, { "epoch": 0.004989154013015184, "grad_norm": 3.105895209292835, "learning_rate": 4.989154013015185e-07, "loss": 0.6266, "step": 1127 }, { "epoch": 0.004993580946478374, "grad_norm": 2.512720851819566, "learning_rate": 4.993580946478374e-07, "loss": 0.6212, "step": 1128 }, { "epoch": 0.0049980078799415645, "grad_norm": 2.723742419586831, "learning_rate": 4.998007879941565e-07, "loss": 0.8012, "step": 1129 }, { "epoch": 0.005002434813404754, "grad_norm": 2.5138419050587917, "learning_rate": 5.002434813404755e-07, "loss": 0.7519, "step": 1130 }, { "epoch": 0.005006861746867945, "grad_norm": 2.7660492390745537, "learning_rate": 5.006861746867945e-07, "loss": 0.7481, "step": 1131 }, { "epoch": 0.005011288680331134, "grad_norm": 2.9768598170936778, "learning_rate": 5.011288680331134e-07, "loss": 0.7693, "step": 1132 }, { "epoch": 0.005015715613794325, "grad_norm": 2.6968698321513465, "learning_rate": 5.015715613794325e-07, "loss": 0.7713, "step": 1133 }, { "epoch": 0.0050201425472575145, "grad_norm": 3.893219122887571, "learning_rate": 5.020142547257515e-07, "loss": 1.0013, "step": 1134 }, { "epoch": 0.005024569480720705, "grad_norm": 3.620922456896492, "learning_rate": 5.024569480720705e-07, "loss": 0.942, "step": 1135 }, { "epoch": 0.005028996414183895, "grad_norm": 2.866558923898977, "learning_rate": 5.028996414183895e-07, "loss": 1.143, "step": 1136 }, { "epoch": 0.005033423347647085, "grad_norm": 2.5716426618412407, "learning_rate": 5.033423347647085e-07, "loss": 0.7607, "step": 1137 }, { "epoch": 0.005037850281110275, "grad_norm": 2.9382487821791137, "learning_rate": 5.037850281110275e-07, "loss": 0.937, "step": 1138 }, { "epoch": 0.005042277214573465, "grad_norm": 2.817305263249217, "learning_rate": 5.042277214573466e-07, "loss": 0.8237, "step": 1139 }, { "epoch": 0.005046704148036655, "grad_norm": 3.6264462458040345, "learning_rate": 5.046704148036655e-07, "loss": 0.8288, "step": 1140 }, { "epoch": 0.0050511310814998454, "grad_norm": 3.2518202401311807, "learning_rate": 5.051131081499845e-07, "loss": 1.1832, "step": 1141 }, { "epoch": 0.005055558014963035, "grad_norm": 3.363738508835309, "learning_rate": 5.055558014963035e-07, "loss": 0.9648, "step": 1142 }, { "epoch": 0.005059984948426226, "grad_norm": 2.676777913917982, "learning_rate": 5.059984948426226e-07, "loss": 1.045, "step": 1143 }, { "epoch": 0.005064411881889415, "grad_norm": 3.2635091747105998, "learning_rate": 5.064411881889415e-07, "loss": 0.6041, "step": 1144 }, { "epoch": 0.005068838815352605, "grad_norm": 3.082273656873582, "learning_rate": 5.068838815352606e-07, "loss": 0.7339, "step": 1145 }, { "epoch": 0.005073265748815795, "grad_norm": 2.855724307569371, "learning_rate": 5.073265748815795e-07, "loss": 0.8191, "step": 1146 }, { "epoch": 0.005077692682278985, "grad_norm": 3.4352359520172318, "learning_rate": 5.077692682278985e-07, "loss": 0.9614, "step": 1147 }, { "epoch": 0.0050821196157421755, "grad_norm": 3.2857219141899288, "learning_rate": 5.082119615742175e-07, "loss": 1.2198, "step": 1148 }, { "epoch": 0.005086546549205365, "grad_norm": 2.9162142473109296, "learning_rate": 5.086546549205366e-07, "loss": 0.8187, "step": 1149 }, { "epoch": 0.005090973482668556, "grad_norm": 3.0668668495769, "learning_rate": 5.090973482668555e-07, "loss": 0.9019, "step": 1150 }, { "epoch": 0.005095400416131745, "grad_norm": 2.8391952344786056, "learning_rate": 5.095400416131746e-07, "loss": 0.7162, "step": 1151 }, { "epoch": 0.005099827349594936, "grad_norm": 3.8316477000338436, "learning_rate": 5.099827349594935e-07, "loss": 0.9779, "step": 1152 }, { "epoch": 0.0051042542830581255, "grad_norm": 3.0440189619760343, "learning_rate": 5.104254283058126e-07, "loss": 0.9206, "step": 1153 }, { "epoch": 0.005108681216521316, "grad_norm": 3.084345411437815, "learning_rate": 5.108681216521317e-07, "loss": 0.9109, "step": 1154 }, { "epoch": 0.005113108149984506, "grad_norm": 3.752862608585949, "learning_rate": 5.113108149984506e-07, "loss": 1.1508, "step": 1155 }, { "epoch": 0.005117535083447696, "grad_norm": 4.001531021779913, "learning_rate": 5.117535083447697e-07, "loss": 1.2635, "step": 1156 }, { "epoch": 0.005121962016910886, "grad_norm": 3.8227000924064427, "learning_rate": 5.121962016910886e-07, "loss": 0.9682, "step": 1157 }, { "epoch": 0.005126388950374076, "grad_norm": 3.1981436141293056, "learning_rate": 5.126388950374077e-07, "loss": 0.9638, "step": 1158 }, { "epoch": 0.005130815883837266, "grad_norm": 3.331978987028245, "learning_rate": 5.130815883837267e-07, "loss": 0.9808, "step": 1159 }, { "epoch": 0.0051352428173004555, "grad_norm": 3.24057155204021, "learning_rate": 5.135242817300457e-07, "loss": 0.8541, "step": 1160 }, { "epoch": 0.005139669750763646, "grad_norm": 2.8591797880901235, "learning_rate": 5.139669750763646e-07, "loss": 0.8612, "step": 1161 }, { "epoch": 0.005144096684226836, "grad_norm": 2.6149715833340985, "learning_rate": 5.144096684226837e-07, "loss": 0.7448, "step": 1162 }, { "epoch": 0.005148523617690026, "grad_norm": 2.867142472145841, "learning_rate": 5.148523617690027e-07, "loss": 0.709, "step": 1163 }, { "epoch": 0.005152950551153216, "grad_norm": 2.9085314502367505, "learning_rate": 5.152950551153217e-07, "loss": 1.1252, "step": 1164 }, { "epoch": 0.005157377484616406, "grad_norm": 3.9681924239549327, "learning_rate": 5.157377484616407e-07, "loss": 1.1362, "step": 1165 }, { "epoch": 0.005161804418079596, "grad_norm": 3.080107436134735, "learning_rate": 5.161804418079597e-07, "loss": 1.1184, "step": 1166 }, { "epoch": 0.0051662313515427865, "grad_norm": 2.9301554264053293, "learning_rate": 5.166231351542787e-07, "loss": 0.7502, "step": 1167 }, { "epoch": 0.005170658285005976, "grad_norm": 2.836024283984258, "learning_rate": 5.170658285005978e-07, "loss": 0.4395, "step": 1168 }, { "epoch": 0.005175085218469167, "grad_norm": 3.191167805097309, "learning_rate": 5.175085218469167e-07, "loss": 0.8222, "step": 1169 }, { "epoch": 0.005179512151932356, "grad_norm": 3.096266130177262, "learning_rate": 5.179512151932357e-07, "loss": 1.0908, "step": 1170 }, { "epoch": 0.005183939085395547, "grad_norm": 3.063678004678825, "learning_rate": 5.183939085395547e-07, "loss": 0.9679, "step": 1171 }, { "epoch": 0.005188366018858736, "grad_norm": 3.5715542813767653, "learning_rate": 5.188366018858737e-07, "loss": 0.5772, "step": 1172 }, { "epoch": 0.005192792952321927, "grad_norm": 3.141945206731455, "learning_rate": 5.192792952321927e-07, "loss": 0.8131, "step": 1173 }, { "epoch": 0.005197219885785117, "grad_norm": 3.8289945334870517, "learning_rate": 5.197219885785118e-07, "loss": 1.1803, "step": 1174 }, { "epoch": 0.005201646819248307, "grad_norm": 3.666708776696213, "learning_rate": 5.201646819248307e-07, "loss": 1.1419, "step": 1175 }, { "epoch": 0.005206073752711497, "grad_norm": 2.4348966573289865, "learning_rate": 5.206073752711497e-07, "loss": 0.7399, "step": 1176 }, { "epoch": 0.005210500686174686, "grad_norm": 3.810371176385697, "learning_rate": 5.210500686174687e-07, "loss": 1.1426, "step": 1177 }, { "epoch": 0.005214927619637877, "grad_norm": 2.8889605497619586, "learning_rate": 5.214927619637878e-07, "loss": 0.9755, "step": 1178 }, { "epoch": 0.0052193545531010665, "grad_norm": 3.1292860023450038, "learning_rate": 5.219354553101067e-07, "loss": 1.2299, "step": 1179 }, { "epoch": 0.005223781486564257, "grad_norm": 5.0236946698869, "learning_rate": 5.223781486564258e-07, "loss": 1.2769, "step": 1180 }, { "epoch": 0.005228208420027447, "grad_norm": 3.523329185144008, "learning_rate": 5.228208420027447e-07, "loss": 1.1739, "step": 1181 }, { "epoch": 0.005232635353490637, "grad_norm": 3.886527540824922, "learning_rate": 5.232635353490638e-07, "loss": 0.9649, "step": 1182 }, { "epoch": 0.005237062286953827, "grad_norm": 3.344891923628684, "learning_rate": 5.237062286953828e-07, "loss": 0.6864, "step": 1183 }, { "epoch": 0.005241489220417017, "grad_norm": 3.9024904635394453, "learning_rate": 5.241489220417018e-07, "loss": 1.1673, "step": 1184 }, { "epoch": 0.005245916153880207, "grad_norm": 3.215596461632348, "learning_rate": 5.245916153880207e-07, "loss": 0.9543, "step": 1185 }, { "epoch": 0.0052503430873433975, "grad_norm": 4.6900103018109585, "learning_rate": 5.250343087343398e-07, "loss": 1.0713, "step": 1186 }, { "epoch": 0.005254770020806587, "grad_norm": 2.8074072185486214, "learning_rate": 5.254770020806588e-07, "loss": 0.5737, "step": 1187 }, { "epoch": 0.005259196954269778, "grad_norm": 3.4132735771768785, "learning_rate": 5.259196954269778e-07, "loss": 0.8681, "step": 1188 }, { "epoch": 0.005263623887732967, "grad_norm": 2.4841957322168002, "learning_rate": 5.263623887732968e-07, "loss": 0.6308, "step": 1189 }, { "epoch": 0.005268050821196158, "grad_norm": 2.7159903376086243, "learning_rate": 5.268050821196158e-07, "loss": 0.6408, "step": 1190 }, { "epoch": 0.005272477754659347, "grad_norm": 3.646774852049493, "learning_rate": 5.272477754659348e-07, "loss": 0.8901, "step": 1191 }, { "epoch": 0.005276904688122538, "grad_norm": 2.755664817634019, "learning_rate": 5.276904688122539e-07, "loss": 0.693, "step": 1192 }, { "epoch": 0.0052813316215857276, "grad_norm": 2.972669834549896, "learning_rate": 5.281331621585728e-07, "loss": 1.0899, "step": 1193 }, { "epoch": 0.005285758555048917, "grad_norm": 4.671887598373455, "learning_rate": 5.285758555048918e-07, "loss": 1.0401, "step": 1194 }, { "epoch": 0.005290185488512108, "grad_norm": 2.7964936926138773, "learning_rate": 5.290185488512108e-07, "loss": 0.897, "step": 1195 }, { "epoch": 0.005294612421975297, "grad_norm": 3.33140944377848, "learning_rate": 5.294612421975298e-07, "loss": 0.9681, "step": 1196 }, { "epoch": 0.005299039355438488, "grad_norm": 4.121323145474122, "learning_rate": 5.299039355438488e-07, "loss": 1.3297, "step": 1197 }, { "epoch": 0.0053034662889016775, "grad_norm": 2.9944927131372427, "learning_rate": 5.303466288901679e-07, "loss": 1.0259, "step": 1198 }, { "epoch": 0.005307893222364868, "grad_norm": 3.121509883584909, "learning_rate": 5.307893222364868e-07, "loss": 0.7924, "step": 1199 }, { "epoch": 0.005312320155828058, "grad_norm": 2.5185941799600062, "learning_rate": 5.312320155828058e-07, "loss": 0.9292, "step": 1200 }, { "epoch": 0.005316747089291248, "grad_norm": 2.651538370602665, "learning_rate": 5.316747089291248e-07, "loss": 0.9539, "step": 1201 }, { "epoch": 0.005321174022754438, "grad_norm": 3.3629071870480662, "learning_rate": 5.321174022754439e-07, "loss": 0.9458, "step": 1202 }, { "epoch": 0.005325600956217628, "grad_norm": 3.8009971247336796, "learning_rate": 5.325600956217628e-07, "loss": 1.2017, "step": 1203 }, { "epoch": 0.005330027889680818, "grad_norm": 3.537082980647394, "learning_rate": 5.330027889680819e-07, "loss": 1.2264, "step": 1204 }, { "epoch": 0.0053344548231440084, "grad_norm": 2.824346181319655, "learning_rate": 5.334454823144008e-07, "loss": 0.9442, "step": 1205 }, { "epoch": 0.005338881756607198, "grad_norm": 3.0630128400970604, "learning_rate": 5.338881756607199e-07, "loss": 0.7808, "step": 1206 }, { "epoch": 0.005343308690070389, "grad_norm": 3.4895210069710076, "learning_rate": 5.343308690070389e-07, "loss": 0.8197, "step": 1207 }, { "epoch": 0.005347735623533578, "grad_norm": 2.898771805314197, "learning_rate": 5.347735623533579e-07, "loss": 0.8569, "step": 1208 }, { "epoch": 0.005352162556996769, "grad_norm": 2.9842343675644094, "learning_rate": 5.352162556996768e-07, "loss": 0.9236, "step": 1209 }, { "epoch": 0.005356589490459958, "grad_norm": 2.6312551593081297, "learning_rate": 5.356589490459959e-07, "loss": 0.6923, "step": 1210 }, { "epoch": 0.005361016423923148, "grad_norm": 3.723006987577352, "learning_rate": 5.361016423923149e-07, "loss": 0.7587, "step": 1211 }, { "epoch": 0.0053654433573863385, "grad_norm": 3.0278993607738185, "learning_rate": 5.365443357386339e-07, "loss": 0.9695, "step": 1212 }, { "epoch": 0.005369870290849528, "grad_norm": 2.8323534121985805, "learning_rate": 5.369870290849529e-07, "loss": 0.9594, "step": 1213 }, { "epoch": 0.005374297224312719, "grad_norm": 3.526314786373807, "learning_rate": 5.374297224312719e-07, "loss": 1.1336, "step": 1214 }, { "epoch": 0.005378724157775908, "grad_norm": 2.861974125474566, "learning_rate": 5.378724157775909e-07, "loss": 0.8355, "step": 1215 }, { "epoch": 0.005383151091239099, "grad_norm": 3.005169451785124, "learning_rate": 5.3831510912391e-07, "loss": 0.9266, "step": 1216 }, { "epoch": 0.0053875780247022885, "grad_norm": 3.9821188134240155, "learning_rate": 5.387578024702289e-07, "loss": 1.2714, "step": 1217 }, { "epoch": 0.005392004958165479, "grad_norm": 4.112957118885218, "learning_rate": 5.392004958165479e-07, "loss": 1.0168, "step": 1218 }, { "epoch": 0.005396431891628669, "grad_norm": 4.127135772642141, "learning_rate": 5.396431891628669e-07, "loss": 1.2844, "step": 1219 }, { "epoch": 0.005400858825091859, "grad_norm": 3.0437662066454583, "learning_rate": 5.40085882509186e-07, "loss": 1.0204, "step": 1220 }, { "epoch": 0.005405285758555049, "grad_norm": 3.0005750215009317, "learning_rate": 5.405285758555049e-07, "loss": 0.9543, "step": 1221 }, { "epoch": 0.005409712692018239, "grad_norm": 3.2290614074033557, "learning_rate": 5.40971269201824e-07, "loss": 0.9941, "step": 1222 }, { "epoch": 0.005414139625481429, "grad_norm": 2.8269284298966277, "learning_rate": 5.414139625481429e-07, "loss": 0.7914, "step": 1223 }, { "epoch": 0.005418566558944619, "grad_norm": 2.8383978008566957, "learning_rate": 5.418566558944619e-07, "loss": 0.7923, "step": 1224 }, { "epoch": 0.005422993492407809, "grad_norm": 2.990289912686335, "learning_rate": 5.422993492407809e-07, "loss": 0.87, "step": 1225 }, { "epoch": 0.0054274204258709996, "grad_norm": 3.6051597918535037, "learning_rate": 5.427420425871e-07, "loss": 1.3108, "step": 1226 }, { "epoch": 0.005431847359334189, "grad_norm": 3.275900677772709, "learning_rate": 5.43184735933419e-07, "loss": 0.9418, "step": 1227 }, { "epoch": 0.005436274292797379, "grad_norm": 3.0033922139567535, "learning_rate": 5.43627429279738e-07, "loss": 0.6777, "step": 1228 }, { "epoch": 0.005440701226260569, "grad_norm": 3.1978471239285318, "learning_rate": 5.440701226260569e-07, "loss": 1.0096, "step": 1229 }, { "epoch": 0.005445128159723759, "grad_norm": 3.1280972913452376, "learning_rate": 5.44512815972376e-07, "loss": 1.1128, "step": 1230 }, { "epoch": 0.0054495550931869495, "grad_norm": 3.365487888471362, "learning_rate": 5.44955509318695e-07, "loss": 1.0067, "step": 1231 }, { "epoch": 0.005453982026650139, "grad_norm": 3.0693251933789116, "learning_rate": 5.45398202665014e-07, "loss": 0.9055, "step": 1232 }, { "epoch": 0.00545840896011333, "grad_norm": 2.5758648403548627, "learning_rate": 5.45840896011333e-07, "loss": 0.7547, "step": 1233 }, { "epoch": 0.005462835893576519, "grad_norm": 3.186610891592706, "learning_rate": 5.46283589357652e-07, "loss": 1.1235, "step": 1234 }, { "epoch": 0.00546726282703971, "grad_norm": 3.55170837015472, "learning_rate": 5.46726282703971e-07, "loss": 1.0863, "step": 1235 }, { "epoch": 0.0054716897605028994, "grad_norm": 3.2573524663951, "learning_rate": 5.471689760502901e-07, "loss": 1.0508, "step": 1236 }, { "epoch": 0.00547611669396609, "grad_norm": 3.3586522115941784, "learning_rate": 5.47611669396609e-07, "loss": 0.9015, "step": 1237 }, { "epoch": 0.00548054362742928, "grad_norm": 3.2321254279543337, "learning_rate": 5.48054362742928e-07, "loss": 0.9975, "step": 1238 }, { "epoch": 0.00548497056089247, "grad_norm": 3.471605276250946, "learning_rate": 5.48497056089247e-07, "loss": 0.9542, "step": 1239 }, { "epoch": 0.00548939749435566, "grad_norm": 2.6629037151084063, "learning_rate": 5.489397494355661e-07, "loss": 0.7299, "step": 1240 }, { "epoch": 0.00549382442781885, "grad_norm": 2.862617562897706, "learning_rate": 5.49382442781885e-07, "loss": 0.5321, "step": 1241 }, { "epoch": 0.00549825136128204, "grad_norm": 2.988714003271417, "learning_rate": 5.498251361282041e-07, "loss": 1.0756, "step": 1242 }, { "epoch": 0.00550267829474523, "grad_norm": 2.6724702533694944, "learning_rate": 5.50267829474523e-07, "loss": 0.9462, "step": 1243 }, { "epoch": 0.00550710522820842, "grad_norm": 2.9774511998925344, "learning_rate": 5.507105228208421e-07, "loss": 0.9855, "step": 1244 }, { "epoch": 0.00551153216167161, "grad_norm": 3.439049769997075, "learning_rate": 5.511532161671611e-07, "loss": 1.2097, "step": 1245 }, { "epoch": 0.0055159590951348, "grad_norm": 3.1741867812572346, "learning_rate": 5.515959095134801e-07, "loss": 1.1688, "step": 1246 }, { "epoch": 0.00552038602859799, "grad_norm": 3.3390910208401445, "learning_rate": 5.52038602859799e-07, "loss": 0.9871, "step": 1247 }, { "epoch": 0.00552481296206118, "grad_norm": 2.8221262546590853, "learning_rate": 5.524812962061181e-07, "loss": 0.9312, "step": 1248 }, { "epoch": 0.00552923989552437, "grad_norm": 3.595040884414574, "learning_rate": 5.52923989552437e-07, "loss": 0.8777, "step": 1249 }, { "epoch": 0.0055336668289875605, "grad_norm": 2.8574437204203695, "learning_rate": 5.533666828987561e-07, "loss": 0.8414, "step": 1250 }, { "epoch": 0.00553809376245075, "grad_norm": 4.122446062468255, "learning_rate": 5.538093762450751e-07, "loss": 1.3236, "step": 1251 }, { "epoch": 0.005542520695913941, "grad_norm": 3.856555572121472, "learning_rate": 5.542520695913941e-07, "loss": 1.338, "step": 1252 }, { "epoch": 0.00554694762937713, "grad_norm": 2.8648469586481298, "learning_rate": 5.54694762937713e-07, "loss": 0.7625, "step": 1253 }, { "epoch": 0.005551374562840321, "grad_norm": 3.2922757170193027, "learning_rate": 5.551374562840321e-07, "loss": 0.5653, "step": 1254 }, { "epoch": 0.00555580149630351, "grad_norm": 3.111612007933348, "learning_rate": 5.555801496303511e-07, "loss": 0.8516, "step": 1255 }, { "epoch": 0.005560228429766701, "grad_norm": 2.8264855657266805, "learning_rate": 5.560228429766701e-07, "loss": 1.0009, "step": 1256 }, { "epoch": 0.0055646553632298906, "grad_norm": 3.6884720287704313, "learning_rate": 5.564655363229891e-07, "loss": 1.0109, "step": 1257 }, { "epoch": 0.005569082296693081, "grad_norm": 3.884521291172319, "learning_rate": 5.569082296693081e-07, "loss": 0.5887, "step": 1258 }, { "epoch": 0.005573509230156271, "grad_norm": 4.495239000078489, "learning_rate": 5.573509230156271e-07, "loss": 1.3213, "step": 1259 }, { "epoch": 0.005577936163619461, "grad_norm": 2.8641881291676734, "learning_rate": 5.577936163619462e-07, "loss": 1.03, "step": 1260 }, { "epoch": 0.005582363097082651, "grad_norm": 2.8429016639544424, "learning_rate": 5.582363097082651e-07, "loss": 0.9403, "step": 1261 }, { "epoch": 0.0055867900305458405, "grad_norm": 2.970128641436965, "learning_rate": 5.586790030545841e-07, "loss": 0.9299, "step": 1262 }, { "epoch": 0.005591216964009031, "grad_norm": 2.998917565644391, "learning_rate": 5.591216964009031e-07, "loss": 0.7818, "step": 1263 }, { "epoch": 0.005595643897472221, "grad_norm": 3.1894936323776655, "learning_rate": 5.595643897472222e-07, "loss": 0.7795, "step": 1264 }, { "epoch": 0.005600070830935411, "grad_norm": 3.008852162190257, "learning_rate": 5.600070830935411e-07, "loss": 0.9114, "step": 1265 }, { "epoch": 0.005604497764398601, "grad_norm": 3.598634197148823, "learning_rate": 5.604497764398602e-07, "loss": 1.3854, "step": 1266 }, { "epoch": 0.005608924697861791, "grad_norm": 3.2497257719088464, "learning_rate": 5.608924697861791e-07, "loss": 0.9058, "step": 1267 }, { "epoch": 0.005613351631324981, "grad_norm": 3.796881053127154, "learning_rate": 5.613351631324982e-07, "loss": 0.9431, "step": 1268 }, { "epoch": 0.0056177785647881714, "grad_norm": 2.510095144316084, "learning_rate": 5.617778564788172e-07, "loss": 0.7253, "step": 1269 }, { "epoch": 0.005622205498251361, "grad_norm": 3.214638934634937, "learning_rate": 5.622205498251362e-07, "loss": 0.8705, "step": 1270 }, { "epoch": 0.005626632431714552, "grad_norm": 2.694402070646404, "learning_rate": 5.626632431714551e-07, "loss": 0.8251, "step": 1271 }, { "epoch": 0.005631059365177741, "grad_norm": 3.2489035501610015, "learning_rate": 5.631059365177742e-07, "loss": 0.9085, "step": 1272 }, { "epoch": 0.005635486298640932, "grad_norm": 2.871932753441504, "learning_rate": 5.635486298640931e-07, "loss": 0.8949, "step": 1273 }, { "epoch": 0.005639913232104121, "grad_norm": 2.583720030056591, "learning_rate": 5.639913232104122e-07, "loss": 0.6415, "step": 1274 }, { "epoch": 0.005644340165567312, "grad_norm": 2.7308047091388885, "learning_rate": 5.644340165567312e-07, "loss": 0.8312, "step": 1275 }, { "epoch": 0.0056487670990305015, "grad_norm": 2.474321246902381, "learning_rate": 5.648767099030502e-07, "loss": 0.7059, "step": 1276 }, { "epoch": 0.005653194032493692, "grad_norm": 3.6512808247107835, "learning_rate": 5.653194032493691e-07, "loss": 1.1602, "step": 1277 }, { "epoch": 0.005657620965956882, "grad_norm": 3.2193053375100673, "learning_rate": 5.657620965956882e-07, "loss": 0.7015, "step": 1278 }, { "epoch": 0.005662047899420071, "grad_norm": 3.2981446769884832, "learning_rate": 5.662047899420072e-07, "loss": 0.6152, "step": 1279 }, { "epoch": 0.005666474832883262, "grad_norm": 2.9035591420733717, "learning_rate": 5.666474832883262e-07, "loss": 1.0615, "step": 1280 }, { "epoch": 0.0056709017663464515, "grad_norm": 3.4094747161399845, "learning_rate": 5.670901766346452e-07, "loss": 0.7444, "step": 1281 }, { "epoch": 0.005675328699809642, "grad_norm": 2.8848421747577055, "learning_rate": 5.675328699809642e-07, "loss": 0.8334, "step": 1282 }, { "epoch": 0.005679755633272832, "grad_norm": 2.5352795029998196, "learning_rate": 5.679755633272832e-07, "loss": 0.8322, "step": 1283 }, { "epoch": 0.005684182566736022, "grad_norm": 3.5443648527339082, "learning_rate": 5.684182566736023e-07, "loss": 0.7699, "step": 1284 }, { "epoch": 0.005688609500199212, "grad_norm": 2.9965057000463573, "learning_rate": 5.688609500199212e-07, "loss": 1.0071, "step": 1285 }, { "epoch": 0.005693036433662402, "grad_norm": 3.2321394073091954, "learning_rate": 5.693036433662402e-07, "loss": 0.8099, "step": 1286 }, { "epoch": 0.005697463367125592, "grad_norm": 4.081764264869503, "learning_rate": 5.697463367125592e-07, "loss": 1.2841, "step": 1287 }, { "epoch": 0.005701890300588782, "grad_norm": 3.0080483797009907, "learning_rate": 5.701890300588783e-07, "loss": 0.7636, "step": 1288 }, { "epoch": 0.005706317234051972, "grad_norm": 2.8956508185936847, "learning_rate": 5.706317234051972e-07, "loss": 0.6876, "step": 1289 }, { "epoch": 0.005710744167515163, "grad_norm": 3.161573812650825, "learning_rate": 5.710744167515163e-07, "loss": 1.2168, "step": 1290 }, { "epoch": 0.005715171100978352, "grad_norm": 3.1092975577209447, "learning_rate": 5.715171100978352e-07, "loss": 0.7325, "step": 1291 }, { "epoch": 0.005719598034441543, "grad_norm": 2.55515962447549, "learning_rate": 5.719598034441543e-07, "loss": 0.8952, "step": 1292 }, { "epoch": 0.005724024967904732, "grad_norm": 3.1274658297723152, "learning_rate": 5.724024967904733e-07, "loss": 0.8239, "step": 1293 }, { "epoch": 0.005728451901367923, "grad_norm": 2.4736994881622016, "learning_rate": 5.728451901367923e-07, "loss": 0.5614, "step": 1294 }, { "epoch": 0.0057328788348311125, "grad_norm": 3.0826976402190294, "learning_rate": 5.732878834831112e-07, "loss": 1.0993, "step": 1295 }, { "epoch": 0.005737305768294302, "grad_norm": 4.140768761319629, "learning_rate": 5.737305768294303e-07, "loss": 1.2002, "step": 1296 }, { "epoch": 0.005741732701757493, "grad_norm": 3.2094078163173134, "learning_rate": 5.741732701757493e-07, "loss": 1.0416, "step": 1297 }, { "epoch": 0.005746159635220682, "grad_norm": 2.974390833633511, "learning_rate": 5.746159635220683e-07, "loss": 0.784, "step": 1298 }, { "epoch": 0.005750586568683873, "grad_norm": 3.4687510086123554, "learning_rate": 5.750586568683873e-07, "loss": 1.1056, "step": 1299 }, { "epoch": 0.0057550135021470624, "grad_norm": 2.9291422095439423, "learning_rate": 5.755013502147063e-07, "loss": 0.8502, "step": 1300 }, { "epoch": 0.005759440435610253, "grad_norm": 2.8204502384895433, "learning_rate": 5.759440435610253e-07, "loss": 0.8063, "step": 1301 }, { "epoch": 0.005763867369073443, "grad_norm": 3.459042976580426, "learning_rate": 5.763867369073443e-07, "loss": 1.0033, "step": 1302 }, { "epoch": 0.005768294302536633, "grad_norm": 3.0875442660420216, "learning_rate": 5.768294302536633e-07, "loss": 0.8991, "step": 1303 }, { "epoch": 0.005772721235999823, "grad_norm": 3.129780760302496, "learning_rate": 5.772721235999824e-07, "loss": 1.158, "step": 1304 }, { "epoch": 0.005777148169463013, "grad_norm": 2.986798516181502, "learning_rate": 5.777148169463013e-07, "loss": 0.6232, "step": 1305 }, { "epoch": 0.005781575102926203, "grad_norm": 3.848500152944883, "learning_rate": 5.781575102926203e-07, "loss": 0.8703, "step": 1306 }, { "epoch": 0.005786002036389393, "grad_norm": 3.5843680412920964, "learning_rate": 5.786002036389393e-07, "loss": 1.1382, "step": 1307 }, { "epoch": 0.005790428969852583, "grad_norm": 3.166200916931839, "learning_rate": 5.790428969852584e-07, "loss": 0.9791, "step": 1308 }, { "epoch": 0.0057948559033157735, "grad_norm": 3.383197314740902, "learning_rate": 5.794855903315773e-07, "loss": 0.9532, "step": 1309 }, { "epoch": 0.005799282836778963, "grad_norm": 3.3851220594379914, "learning_rate": 5.799282836778964e-07, "loss": 0.6242, "step": 1310 }, { "epoch": 0.005803709770242154, "grad_norm": 3.265654175349635, "learning_rate": 5.803709770242153e-07, "loss": 0.6882, "step": 1311 }, { "epoch": 0.005808136703705343, "grad_norm": 3.3955407923513174, "learning_rate": 5.808136703705344e-07, "loss": 0.6659, "step": 1312 }, { "epoch": 0.005812563637168533, "grad_norm": 2.653769904315235, "learning_rate": 5.812563637168535e-07, "loss": 0.6508, "step": 1313 }, { "epoch": 0.0058169905706317235, "grad_norm": 2.7558119755064814, "learning_rate": 5.816990570631724e-07, "loss": 0.6851, "step": 1314 }, { "epoch": 0.005821417504094913, "grad_norm": 3.485007781099308, "learning_rate": 5.821417504094913e-07, "loss": 1.1576, "step": 1315 }, { "epoch": 0.005825844437558104, "grad_norm": 3.6936093575958293, "learning_rate": 5.825844437558104e-07, "loss": 1.2485, "step": 1316 }, { "epoch": 0.005830271371021293, "grad_norm": 2.9238673683988887, "learning_rate": 5.830271371021295e-07, "loss": 0.7515, "step": 1317 }, { "epoch": 0.005834698304484484, "grad_norm": 2.9659783488167903, "learning_rate": 5.834698304484484e-07, "loss": 0.7956, "step": 1318 }, { "epoch": 0.005839125237947673, "grad_norm": 3.3432214605086177, "learning_rate": 5.839125237947675e-07, "loss": 1.1745, "step": 1319 }, { "epoch": 0.005843552171410864, "grad_norm": 3.3950570087075587, "learning_rate": 5.843552171410864e-07, "loss": 0.8686, "step": 1320 }, { "epoch": 0.0058479791048740536, "grad_norm": 3.0203539131128228, "learning_rate": 5.847979104874055e-07, "loss": 0.8142, "step": 1321 }, { "epoch": 0.005852406038337244, "grad_norm": 3.6622701370664106, "learning_rate": 5.852406038337245e-07, "loss": 0.6793, "step": 1322 }, { "epoch": 0.005856832971800434, "grad_norm": 2.872227971623828, "learning_rate": 5.856832971800435e-07, "loss": 0.6505, "step": 1323 }, { "epoch": 0.005861259905263624, "grad_norm": 3.1724773170516483, "learning_rate": 5.861259905263624e-07, "loss": 1.2655, "step": 1324 }, { "epoch": 0.005865686838726814, "grad_norm": 2.873684940954204, "learning_rate": 5.865686838726815e-07, "loss": 0.9249, "step": 1325 }, { "epoch": 0.005870113772190004, "grad_norm": 3.230733417172174, "learning_rate": 5.870113772190004e-07, "loss": 1.0036, "step": 1326 }, { "epoch": 0.005874540705653194, "grad_norm": 3.5009015942223582, "learning_rate": 5.874540705653195e-07, "loss": 0.9311, "step": 1327 }, { "epoch": 0.005878967639116384, "grad_norm": 3.036155984316553, "learning_rate": 5.878967639116385e-07, "loss": 0.7417, "step": 1328 }, { "epoch": 0.005883394572579574, "grad_norm": 3.569714721787205, "learning_rate": 5.883394572579575e-07, "loss": 0.7877, "step": 1329 }, { "epoch": 0.005887821506042764, "grad_norm": 2.6807836614874674, "learning_rate": 5.887821506042764e-07, "loss": 0.728, "step": 1330 }, { "epoch": 0.005892248439505954, "grad_norm": 2.976467885506168, "learning_rate": 5.892248439505955e-07, "loss": 0.6239, "step": 1331 }, { "epoch": 0.005896675372969144, "grad_norm": 2.808921398943963, "learning_rate": 5.896675372969145e-07, "loss": 0.8614, "step": 1332 }, { "epoch": 0.0059011023064323345, "grad_norm": 2.8149127920051007, "learning_rate": 5.901102306432335e-07, "loss": 0.6447, "step": 1333 }, { "epoch": 0.005905529239895524, "grad_norm": 2.9782362156917945, "learning_rate": 5.905529239895525e-07, "loss": 0.768, "step": 1334 }, { "epoch": 0.005909956173358715, "grad_norm": 2.6811712723845558, "learning_rate": 5.909956173358715e-07, "loss": 0.7171, "step": 1335 }, { "epoch": 0.005914383106821904, "grad_norm": 3.995412802618899, "learning_rate": 5.914383106821905e-07, "loss": 1.0915, "step": 1336 }, { "epoch": 0.005918810040285095, "grad_norm": 3.1684450730850857, "learning_rate": 5.918810040285096e-07, "loss": 0.8892, "step": 1337 }, { "epoch": 0.005923236973748284, "grad_norm": 3.2737957377042686, "learning_rate": 5.923236973748285e-07, "loss": 1.086, "step": 1338 }, { "epoch": 0.005927663907211475, "grad_norm": 2.9095659816654322, "learning_rate": 5.927663907211475e-07, "loss": 1.0665, "step": 1339 }, { "epoch": 0.0059320908406746645, "grad_norm": 3.3568281848528017, "learning_rate": 5.932090840674665e-07, "loss": 1.1463, "step": 1340 }, { "epoch": 0.005936517774137855, "grad_norm": 2.9048785208321934, "learning_rate": 5.936517774137856e-07, "loss": 0.6209, "step": 1341 }, { "epoch": 0.005940944707601045, "grad_norm": 3.156895589932803, "learning_rate": 5.940944707601045e-07, "loss": 0.7402, "step": 1342 }, { "epoch": 0.005945371641064235, "grad_norm": 2.734824798711056, "learning_rate": 5.945371641064236e-07, "loss": 0.7411, "step": 1343 }, { "epoch": 0.005949798574527425, "grad_norm": 2.551855145816929, "learning_rate": 5.949798574527425e-07, "loss": 0.7439, "step": 1344 }, { "epoch": 0.0059542255079906145, "grad_norm": 3.110906079360838, "learning_rate": 5.954225507990616e-07, "loss": 1.0189, "step": 1345 }, { "epoch": 0.005958652441453805, "grad_norm": 2.99449120067991, "learning_rate": 5.958652441453806e-07, "loss": 0.8346, "step": 1346 }, { "epoch": 0.005963079374916995, "grad_norm": 2.750305564719132, "learning_rate": 5.963079374916996e-07, "loss": 0.775, "step": 1347 }, { "epoch": 0.005967506308380185, "grad_norm": 3.073197945934144, "learning_rate": 5.967506308380185e-07, "loss": 0.8963, "step": 1348 }, { "epoch": 0.005971933241843375, "grad_norm": 4.192089034667243, "learning_rate": 5.971933241843376e-07, "loss": 1.4699, "step": 1349 }, { "epoch": 0.005976360175306565, "grad_norm": 2.582431725908126, "learning_rate": 5.976360175306565e-07, "loss": 0.7089, "step": 1350 }, { "epoch": 0.005980787108769755, "grad_norm": 3.0554901245604604, "learning_rate": 5.980787108769756e-07, "loss": 0.6237, "step": 1351 }, { "epoch": 0.005985214042232945, "grad_norm": 3.291111827367943, "learning_rate": 5.985214042232946e-07, "loss": 0.8797, "step": 1352 }, { "epoch": 0.005989640975696135, "grad_norm": 3.126617846521076, "learning_rate": 5.989640975696136e-07, "loss": 0.7769, "step": 1353 }, { "epoch": 0.005994067909159326, "grad_norm": 2.912542780721538, "learning_rate": 5.994067909159325e-07, "loss": 0.6888, "step": 1354 }, { "epoch": 0.005998494842622515, "grad_norm": 3.142079780328701, "learning_rate": 5.998494842622516e-07, "loss": 0.99, "step": 1355 }, { "epoch": 0.006002921776085706, "grad_norm": 2.7045573983139426, "learning_rate": 6.002921776085706e-07, "loss": 0.901, "step": 1356 }, { "epoch": 0.006007348709548895, "grad_norm": 3.034940117186635, "learning_rate": 6.007348709548896e-07, "loss": 0.8071, "step": 1357 }, { "epoch": 0.006011775643012086, "grad_norm": 2.7337710535297046, "learning_rate": 6.011775643012086e-07, "loss": 1.1731, "step": 1358 }, { "epoch": 0.0060162025764752755, "grad_norm": 2.750107407271773, "learning_rate": 6.016202576475276e-07, "loss": 0.8819, "step": 1359 }, { "epoch": 0.006020629509938466, "grad_norm": 2.7351081099803127, "learning_rate": 6.020629509938466e-07, "loss": 0.9567, "step": 1360 }, { "epoch": 0.006025056443401656, "grad_norm": 3.139532645542961, "learning_rate": 6.025056443401657e-07, "loss": 0.9366, "step": 1361 }, { "epoch": 0.006029483376864845, "grad_norm": 3.560154209393561, "learning_rate": 6.029483376864846e-07, "loss": 1.2769, "step": 1362 }, { "epoch": 0.006033910310328036, "grad_norm": 3.693692532352268, "learning_rate": 6.033910310328036e-07, "loss": 1.3414, "step": 1363 }, { "epoch": 0.0060383372437912254, "grad_norm": 4.257315500861119, "learning_rate": 6.038337243791226e-07, "loss": 0.6234, "step": 1364 }, { "epoch": 0.006042764177254416, "grad_norm": 3.5073412632160785, "learning_rate": 6.042764177254417e-07, "loss": 0.98, "step": 1365 }, { "epoch": 0.006047191110717606, "grad_norm": 2.574808857830615, "learning_rate": 6.047191110717606e-07, "loss": 0.719, "step": 1366 }, { "epoch": 0.006051618044180796, "grad_norm": 3.145419130975098, "learning_rate": 6.051618044180797e-07, "loss": 1.1524, "step": 1367 }, { "epoch": 0.006056044977643986, "grad_norm": 2.5080100862324937, "learning_rate": 6.056044977643986e-07, "loss": 0.5955, "step": 1368 }, { "epoch": 0.006060471911107176, "grad_norm": 2.918409852664582, "learning_rate": 6.060471911107177e-07, "loss": 0.6204, "step": 1369 }, { "epoch": 0.006064898844570366, "grad_norm": 3.6082087583251417, "learning_rate": 6.064898844570367e-07, "loss": 1.2388, "step": 1370 }, { "epoch": 0.006069325778033556, "grad_norm": 3.085006499483201, "learning_rate": 6.069325778033557e-07, "loss": 1.2803, "step": 1371 }, { "epoch": 0.006073752711496746, "grad_norm": 3.245917227074307, "learning_rate": 6.073752711496746e-07, "loss": 1.2302, "step": 1372 }, { "epoch": 0.0060781796449599365, "grad_norm": 3.067307163187933, "learning_rate": 6.078179644959937e-07, "loss": 1.026, "step": 1373 }, { "epoch": 0.006082606578423126, "grad_norm": 2.6776106225308722, "learning_rate": 6.082606578423127e-07, "loss": 0.8714, "step": 1374 }, { "epoch": 0.006087033511886317, "grad_norm": 2.905737345976523, "learning_rate": 6.087033511886317e-07, "loss": 0.8397, "step": 1375 }, { "epoch": 0.006091460445349506, "grad_norm": 3.82038378607219, "learning_rate": 6.091460445349507e-07, "loss": 0.9455, "step": 1376 }, { "epoch": 0.006095887378812697, "grad_norm": 3.432169924830013, "learning_rate": 6.095887378812697e-07, "loss": 0.8578, "step": 1377 }, { "epoch": 0.0061003143122758865, "grad_norm": 4.338972343215784, "learning_rate": 6.100314312275887e-07, "loss": 1.2791, "step": 1378 }, { "epoch": 0.006104741245739076, "grad_norm": 3.023310410790029, "learning_rate": 6.104741245739077e-07, "loss": 0.8355, "step": 1379 }, { "epoch": 0.006109168179202267, "grad_norm": 2.8274788596960327, "learning_rate": 6.109168179202267e-07, "loss": 0.9524, "step": 1380 }, { "epoch": 0.006113595112665456, "grad_norm": 2.733144032040274, "learning_rate": 6.113595112665458e-07, "loss": 1.018, "step": 1381 }, { "epoch": 0.006118022046128647, "grad_norm": 3.032902309822225, "learning_rate": 6.118022046128647e-07, "loss": 0.7834, "step": 1382 }, { "epoch": 0.006122448979591836, "grad_norm": 2.9279824484675103, "learning_rate": 6.122448979591837e-07, "loss": 0.9305, "step": 1383 }, { "epoch": 0.006126875913055027, "grad_norm": 4.139648360501357, "learning_rate": 6.126875913055027e-07, "loss": 1.4249, "step": 1384 }, { "epoch": 0.006131302846518217, "grad_norm": 3.190253141482772, "learning_rate": 6.131302846518218e-07, "loss": 0.7035, "step": 1385 }, { "epoch": 0.006135729779981407, "grad_norm": 2.6949258133462037, "learning_rate": 6.135729779981407e-07, "loss": 0.9394, "step": 1386 }, { "epoch": 0.006140156713444597, "grad_norm": 2.620715577763178, "learning_rate": 6.140156713444598e-07, "loss": 0.9941, "step": 1387 }, { "epoch": 0.006144583646907787, "grad_norm": 3.603392025469344, "learning_rate": 6.144583646907787e-07, "loss": 1.1494, "step": 1388 }, { "epoch": 0.006149010580370977, "grad_norm": 2.894653502518626, "learning_rate": 6.149010580370978e-07, "loss": 0.8001, "step": 1389 }, { "epoch": 0.006153437513834167, "grad_norm": 3.7140049374351958, "learning_rate": 6.153437513834168e-07, "loss": 0.9778, "step": 1390 }, { "epoch": 0.006157864447297357, "grad_norm": 3.2332021459689724, "learning_rate": 6.157864447297358e-07, "loss": 0.9437, "step": 1391 }, { "epoch": 0.0061622913807605475, "grad_norm": 2.760502308447251, "learning_rate": 6.162291380760547e-07, "loss": 1.062, "step": 1392 }, { "epoch": 0.006166718314223737, "grad_norm": 2.8408342513214566, "learning_rate": 6.166718314223738e-07, "loss": 1.0388, "step": 1393 }, { "epoch": 0.006171145247686928, "grad_norm": 3.7510890032161464, "learning_rate": 6.171145247686928e-07, "loss": 0.8821, "step": 1394 }, { "epoch": 0.006175572181150117, "grad_norm": 3.138428988722518, "learning_rate": 6.175572181150118e-07, "loss": 0.5098, "step": 1395 }, { "epoch": 0.006179999114613307, "grad_norm": 2.790779061791533, "learning_rate": 6.179999114613308e-07, "loss": 0.7545, "step": 1396 }, { "epoch": 0.0061844260480764975, "grad_norm": 2.8046047857716414, "learning_rate": 6.184426048076498e-07, "loss": 0.7473, "step": 1397 }, { "epoch": 0.006188852981539687, "grad_norm": 3.3858634782397554, "learning_rate": 6.188852981539688e-07, "loss": 1.1269, "step": 1398 }, { "epoch": 0.006193279915002878, "grad_norm": 3.600016632804397, "learning_rate": 6.193279915002879e-07, "loss": 1.4389, "step": 1399 }, { "epoch": 0.006197706848466067, "grad_norm": 4.23312445584923, "learning_rate": 6.197706848466068e-07, "loss": 0.6015, "step": 1400 }, { "epoch": 0.006202133781929258, "grad_norm": 3.6975430820189965, "learning_rate": 6.202133781929258e-07, "loss": 0.8121, "step": 1401 }, { "epoch": 0.006206560715392447, "grad_norm": 3.6048694569905573, "learning_rate": 6.206560715392448e-07, "loss": 1.1765, "step": 1402 }, { "epoch": 0.006210987648855638, "grad_norm": 2.876209256696108, "learning_rate": 6.210987648855638e-07, "loss": 0.8005, "step": 1403 }, { "epoch": 0.0062154145823188275, "grad_norm": 3.8534279798942133, "learning_rate": 6.215414582318828e-07, "loss": 0.8228, "step": 1404 }, { "epoch": 0.006219841515782018, "grad_norm": 2.9478676937714017, "learning_rate": 6.219841515782019e-07, "loss": 0.5976, "step": 1405 }, { "epoch": 0.006224268449245208, "grad_norm": 3.1830709097434196, "learning_rate": 6.224268449245208e-07, "loss": 0.7282, "step": 1406 }, { "epoch": 0.006228695382708398, "grad_norm": 3.096967929578963, "learning_rate": 6.228695382708398e-07, "loss": 1.1348, "step": 1407 }, { "epoch": 0.006233122316171588, "grad_norm": 3.294833741447254, "learning_rate": 6.233122316171588e-07, "loss": 1.1867, "step": 1408 }, { "epoch": 0.006237549249634778, "grad_norm": 3.850858147888065, "learning_rate": 6.237549249634779e-07, "loss": 1.1302, "step": 1409 }, { "epoch": 0.006241976183097968, "grad_norm": 2.599495659504654, "learning_rate": 6.241976183097968e-07, "loss": 0.7873, "step": 1410 }, { "epoch": 0.0062464031165611585, "grad_norm": 4.164322441279376, "learning_rate": 6.246403116561159e-07, "loss": 1.4375, "step": 1411 }, { "epoch": 0.006250830050024348, "grad_norm": 3.151988294911533, "learning_rate": 6.250830050024349e-07, "loss": 0.8765, "step": 1412 }, { "epoch": 0.006255256983487538, "grad_norm": 3.356556700872066, "learning_rate": 6.255256983487539e-07, "loss": 1.1557, "step": 1413 }, { "epoch": 0.006259683916950728, "grad_norm": 4.684513247065635, "learning_rate": 6.259683916950729e-07, "loss": 1.1563, "step": 1414 }, { "epoch": 0.006264110850413918, "grad_norm": 2.68571528402944, "learning_rate": 6.26411085041392e-07, "loss": 0.847, "step": 1415 }, { "epoch": 0.006268537783877108, "grad_norm": 2.89453342601421, "learning_rate": 6.268537783877108e-07, "loss": 0.8826, "step": 1416 }, { "epoch": 0.006272964717340298, "grad_norm": 2.7949749337517855, "learning_rate": 6.272964717340299e-07, "loss": 0.9105, "step": 1417 }, { "epoch": 0.006277391650803489, "grad_norm": 2.4884651008162475, "learning_rate": 6.277391650803489e-07, "loss": 0.7853, "step": 1418 }, { "epoch": 0.006281818584266678, "grad_norm": 2.441901941884719, "learning_rate": 6.281818584266679e-07, "loss": 0.8169, "step": 1419 }, { "epoch": 0.006286245517729869, "grad_norm": 2.9143641898066286, "learning_rate": 6.286245517729869e-07, "loss": 0.804, "step": 1420 }, { "epoch": 0.006290672451193058, "grad_norm": 3.0014973186414573, "learning_rate": 6.29067245119306e-07, "loss": 0.8245, "step": 1421 }, { "epoch": 0.006295099384656249, "grad_norm": 2.386338043473839, "learning_rate": 6.295099384656249e-07, "loss": 0.7543, "step": 1422 }, { "epoch": 0.0062995263181194385, "grad_norm": 2.8988500477634305, "learning_rate": 6.29952631811944e-07, "loss": 0.9478, "step": 1423 }, { "epoch": 0.006303953251582629, "grad_norm": 3.6315279133182083, "learning_rate": 6.30395325158263e-07, "loss": 1.2833, "step": 1424 }, { "epoch": 0.006308380185045819, "grad_norm": 2.670516088239834, "learning_rate": 6.308380185045819e-07, "loss": 0.7098, "step": 1425 }, { "epoch": 0.006312807118509009, "grad_norm": 3.0594198340341126, "learning_rate": 6.312807118509009e-07, "loss": 0.951, "step": 1426 }, { "epoch": 0.006317234051972199, "grad_norm": 3.068076175331497, "learning_rate": 6.3172340519722e-07, "loss": 0.7578, "step": 1427 }, { "epoch": 0.006321660985435389, "grad_norm": 3.7841509611121262, "learning_rate": 6.321660985435389e-07, "loss": 1.11, "step": 1428 }, { "epoch": 0.006326087918898579, "grad_norm": 2.9334975289909244, "learning_rate": 6.32608791889858e-07, "loss": 0.8214, "step": 1429 }, { "epoch": 0.006330514852361769, "grad_norm": 3.2409931115833004, "learning_rate": 6.33051485236177e-07, "loss": 1.2418, "step": 1430 }, { "epoch": 0.006334941785824959, "grad_norm": 2.971225511573049, "learning_rate": 6.334941785824959e-07, "loss": 0.9006, "step": 1431 }, { "epoch": 0.006339368719288149, "grad_norm": 2.852808115503903, "learning_rate": 6.339368719288149e-07, "loss": 0.9873, "step": 1432 }, { "epoch": 0.006343795652751339, "grad_norm": 2.806176642056817, "learning_rate": 6.34379565275134e-07, "loss": 0.7748, "step": 1433 }, { "epoch": 0.006348222586214529, "grad_norm": 3.467142796960236, "learning_rate": 6.348222586214529e-07, "loss": 0.9293, "step": 1434 }, { "epoch": 0.006352649519677719, "grad_norm": 3.743710972074694, "learning_rate": 6.35264951967772e-07, "loss": 1.2643, "step": 1435 }, { "epoch": 0.006357076453140909, "grad_norm": 4.139715851292483, "learning_rate": 6.35707645314091e-07, "loss": 1.4237, "step": 1436 }, { "epoch": 0.0063615033866040996, "grad_norm": 2.7443211810530648, "learning_rate": 6.3615033866041e-07, "loss": 0.8798, "step": 1437 }, { "epoch": 0.006365930320067289, "grad_norm": 3.1350549201133022, "learning_rate": 6.36593032006729e-07, "loss": 0.9589, "step": 1438 }, { "epoch": 0.00637035725353048, "grad_norm": 3.394311643902474, "learning_rate": 6.370357253530481e-07, "loss": 0.8489, "step": 1439 }, { "epoch": 0.006374784186993669, "grad_norm": 3.3694191506476665, "learning_rate": 6.374784186993669e-07, "loss": 1.1189, "step": 1440 }, { "epoch": 0.00637921112045686, "grad_norm": 4.054965311553716, "learning_rate": 6.37921112045686e-07, "loss": 1.1752, "step": 1441 }, { "epoch": 0.0063836380539200495, "grad_norm": 3.3235349518404056, "learning_rate": 6.38363805392005e-07, "loss": 1.19, "step": 1442 }, { "epoch": 0.00638806498738324, "grad_norm": 2.8999976848921287, "learning_rate": 6.38806498738324e-07, "loss": 0.7331, "step": 1443 }, { "epoch": 0.00639249192084643, "grad_norm": 2.9360913902290893, "learning_rate": 6.39249192084643e-07, "loss": 0.7003, "step": 1444 }, { "epoch": 0.00639691885430962, "grad_norm": 2.795700982135531, "learning_rate": 6.396918854309621e-07, "loss": 0.9358, "step": 1445 }, { "epoch": 0.00640134578777281, "grad_norm": 3.0813609042851304, "learning_rate": 6.40134578777281e-07, "loss": 1.1019, "step": 1446 }, { "epoch": 0.006405772721235999, "grad_norm": 2.752847798862404, "learning_rate": 6.405772721236001e-07, "loss": 0.6382, "step": 1447 }, { "epoch": 0.00641019965469919, "grad_norm": 2.4586706122378987, "learning_rate": 6.410199654699191e-07, "loss": 0.7583, "step": 1448 }, { "epoch": 0.00641462658816238, "grad_norm": 2.945340445278483, "learning_rate": 6.41462658816238e-07, "loss": 1.0139, "step": 1449 }, { "epoch": 0.00641905352162557, "grad_norm": 3.6813487457053666, "learning_rate": 6.41905352162557e-07, "loss": 1.084, "step": 1450 }, { "epoch": 0.00642348045508876, "grad_norm": 3.205666137270463, "learning_rate": 6.423480455088761e-07, "loss": 0.8725, "step": 1451 }, { "epoch": 0.00642790738855195, "grad_norm": 3.4874472150845404, "learning_rate": 6.42790738855195e-07, "loss": 1.0607, "step": 1452 }, { "epoch": 0.00643233432201514, "grad_norm": 3.327821487580372, "learning_rate": 6.432334322015141e-07, "loss": 1.0729, "step": 1453 }, { "epoch": 0.00643676125547833, "grad_norm": 3.3864718847667934, "learning_rate": 6.436761255478331e-07, "loss": 1.326, "step": 1454 }, { "epoch": 0.00644118818894152, "grad_norm": 3.1661662768012957, "learning_rate": 6.44118818894152e-07, "loss": 0.7743, "step": 1455 }, { "epoch": 0.0064456151224047105, "grad_norm": 3.148290376091654, "learning_rate": 6.44561512240471e-07, "loss": 1.1391, "step": 1456 }, { "epoch": 0.0064500420558679, "grad_norm": 3.6844718659648867, "learning_rate": 6.450042055867901e-07, "loss": 1.429, "step": 1457 }, { "epoch": 0.006454468989331091, "grad_norm": 2.9572133327885695, "learning_rate": 6.45446898933109e-07, "loss": 0.7382, "step": 1458 }, { "epoch": 0.00645889592279428, "grad_norm": 3.564270054426463, "learning_rate": 6.458895922794281e-07, "loss": 1.2203, "step": 1459 }, { "epoch": 0.006463322856257471, "grad_norm": 3.505826416221496, "learning_rate": 6.463322856257471e-07, "loss": 1.0618, "step": 1460 }, { "epoch": 0.0064677497897206605, "grad_norm": 3.691211545497424, "learning_rate": 6.467749789720661e-07, "loss": 1.3159, "step": 1461 }, { "epoch": 0.006472176723183851, "grad_norm": 3.2732375150444586, "learning_rate": 6.472176723183851e-07, "loss": 0.9721, "step": 1462 }, { "epoch": 0.006476603656647041, "grad_norm": 2.6134752851024063, "learning_rate": 6.476603656647042e-07, "loss": 0.6038, "step": 1463 }, { "epoch": 0.00648103059011023, "grad_norm": 3.496653928444513, "learning_rate": 6.48103059011023e-07, "loss": 0.7781, "step": 1464 }, { "epoch": 0.006485457523573421, "grad_norm": 3.411492180365541, "learning_rate": 6.485457523573421e-07, "loss": 1.4098, "step": 1465 }, { "epoch": 0.00648988445703661, "grad_norm": 3.769779509703752, "learning_rate": 6.489884457036611e-07, "loss": 1.2069, "step": 1466 }, { "epoch": 0.006494311390499801, "grad_norm": 3.1354739293441702, "learning_rate": 6.494311390499801e-07, "loss": 0.8049, "step": 1467 }, { "epoch": 0.0064987383239629905, "grad_norm": 2.821946365161451, "learning_rate": 6.498738323962991e-07, "loss": 0.5855, "step": 1468 }, { "epoch": 0.006503165257426181, "grad_norm": 3.4743198051873962, "learning_rate": 6.503165257426182e-07, "loss": 0.8152, "step": 1469 }, { "epoch": 0.006507592190889371, "grad_norm": 2.850220538373483, "learning_rate": 6.507592190889371e-07, "loss": 0.752, "step": 1470 }, { "epoch": 0.006512019124352561, "grad_norm": 2.886809459342104, "learning_rate": 6.512019124352562e-07, "loss": 0.8163, "step": 1471 }, { "epoch": 0.006516446057815751, "grad_norm": 2.815571927040584, "learning_rate": 6.516446057815753e-07, "loss": 1.0123, "step": 1472 }, { "epoch": 0.006520872991278941, "grad_norm": 3.0117697478386565, "learning_rate": 6.520872991278941e-07, "loss": 0.8736, "step": 1473 }, { "epoch": 0.006525299924742131, "grad_norm": 3.058364221730066, "learning_rate": 6.525299924742131e-07, "loss": 1.0629, "step": 1474 }, { "epoch": 0.0065297268582053215, "grad_norm": 3.144797317516404, "learning_rate": 6.529726858205322e-07, "loss": 1.1432, "step": 1475 }, { "epoch": 0.006534153791668511, "grad_norm": 3.448202813456812, "learning_rate": 6.534153791668511e-07, "loss": 0.6266, "step": 1476 }, { "epoch": 0.006538580725131702, "grad_norm": 2.8308980784904674, "learning_rate": 6.538580725131702e-07, "loss": 0.5694, "step": 1477 }, { "epoch": 0.006543007658594891, "grad_norm": 2.9573898622352055, "learning_rate": 6.543007658594893e-07, "loss": 0.6597, "step": 1478 }, { "epoch": 0.006547434592058082, "grad_norm": 2.9273151630300855, "learning_rate": 6.547434592058081e-07, "loss": 0.9715, "step": 1479 }, { "epoch": 0.0065518615255212714, "grad_norm": 3.5207082627386894, "learning_rate": 6.551861525521271e-07, "loss": 1.219, "step": 1480 }, { "epoch": 0.006556288458984461, "grad_norm": 3.242248469326586, "learning_rate": 6.556288458984462e-07, "loss": 0.8408, "step": 1481 }, { "epoch": 0.006560715392447652, "grad_norm": 3.5349924106156303, "learning_rate": 6.560715392447651e-07, "loss": 0.8725, "step": 1482 }, { "epoch": 0.006565142325910841, "grad_norm": 3.319076725928388, "learning_rate": 6.565142325910842e-07, "loss": 0.8381, "step": 1483 }, { "epoch": 0.006569569259374032, "grad_norm": 3.9525888547579875, "learning_rate": 6.569569259374033e-07, "loss": 1.5551, "step": 1484 }, { "epoch": 0.006573996192837221, "grad_norm": 3.3285075490536364, "learning_rate": 6.573996192837222e-07, "loss": 1.0462, "step": 1485 }, { "epoch": 0.006578423126300412, "grad_norm": 2.878573635482556, "learning_rate": 6.578423126300413e-07, "loss": 0.7831, "step": 1486 }, { "epoch": 0.0065828500597636015, "grad_norm": 3.268993344978605, "learning_rate": 6.582850059763603e-07, "loss": 0.8892, "step": 1487 }, { "epoch": 0.006587276993226792, "grad_norm": 3.452219224530138, "learning_rate": 6.587276993226791e-07, "loss": 0.8577, "step": 1488 }, { "epoch": 0.006591703926689982, "grad_norm": 2.692425094861446, "learning_rate": 6.591703926689982e-07, "loss": 0.8134, "step": 1489 }, { "epoch": 0.006596130860153172, "grad_norm": 2.652820647267013, "learning_rate": 6.596130860153173e-07, "loss": 0.6176, "step": 1490 }, { "epoch": 0.006600557793616362, "grad_norm": 3.2470017083767164, "learning_rate": 6.600557793616362e-07, "loss": 0.9156, "step": 1491 }, { "epoch": 0.006604984727079552, "grad_norm": 3.3442602386611195, "learning_rate": 6.604984727079553e-07, "loss": 0.7271, "step": 1492 }, { "epoch": 0.006609411660542742, "grad_norm": 2.6380793155914635, "learning_rate": 6.609411660542743e-07, "loss": 0.9436, "step": 1493 }, { "epoch": 0.0066138385940059325, "grad_norm": 2.751898132395238, "learning_rate": 6.613838594005933e-07, "loss": 1.0139, "step": 1494 }, { "epoch": 0.006618265527469122, "grad_norm": 3.457532172265569, "learning_rate": 6.618265527469123e-07, "loss": 0.9931, "step": 1495 }, { "epoch": 0.006622692460932313, "grad_norm": 2.823595744374179, "learning_rate": 6.622692460932314e-07, "loss": 0.5361, "step": 1496 }, { "epoch": 0.006627119394395502, "grad_norm": 3.2432907644536106, "learning_rate": 6.627119394395502e-07, "loss": 1.0811, "step": 1497 }, { "epoch": 0.006631546327858692, "grad_norm": 2.869162915997667, "learning_rate": 6.631546327858693e-07, "loss": 0.8825, "step": 1498 }, { "epoch": 0.006635973261321882, "grad_norm": 3.4168069487307067, "learning_rate": 6.635973261321883e-07, "loss": 0.7818, "step": 1499 }, { "epoch": 0.006640400194785072, "grad_norm": 3.1499645512947376, "learning_rate": 6.640400194785073e-07, "loss": 0.9842, "step": 1500 }, { "epoch": 0.0066448271282482626, "grad_norm": 2.8140562060565895, "learning_rate": 6.644827128248263e-07, "loss": 0.8965, "step": 1501 }, { "epoch": 0.006649254061711452, "grad_norm": 3.4465521635894345, "learning_rate": 6.649254061711454e-07, "loss": 0.65, "step": 1502 }, { "epoch": 0.006653680995174643, "grad_norm": 2.456844407119742, "learning_rate": 6.653680995174643e-07, "loss": 0.8518, "step": 1503 }, { "epoch": 0.006658107928637832, "grad_norm": 3.0800976310941457, "learning_rate": 6.658107928637833e-07, "loss": 0.6708, "step": 1504 }, { "epoch": 0.006662534862101023, "grad_norm": 4.250665991405102, "learning_rate": 6.662534862101023e-07, "loss": 1.0002, "step": 1505 }, { "epoch": 0.0066669617955642125, "grad_norm": 3.326291700489124, "learning_rate": 6.666961795564213e-07, "loss": 0.8487, "step": 1506 }, { "epoch": 0.006671388729027403, "grad_norm": 3.086725399798804, "learning_rate": 6.671388729027403e-07, "loss": 0.4833, "step": 1507 }, { "epoch": 0.006675815662490593, "grad_norm": 2.532547799824143, "learning_rate": 6.675815662490594e-07, "loss": 0.6316, "step": 1508 }, { "epoch": 0.006680242595953783, "grad_norm": 2.8535572463588386, "learning_rate": 6.680242595953783e-07, "loss": 1.0827, "step": 1509 }, { "epoch": 0.006684669529416973, "grad_norm": 2.662418020747091, "learning_rate": 6.684669529416974e-07, "loss": 0.5693, "step": 1510 }, { "epoch": 0.006689096462880163, "grad_norm": 2.6997452194199307, "learning_rate": 6.689096462880164e-07, "loss": 0.8701, "step": 1511 }, { "epoch": 0.006693523396343353, "grad_norm": 4.719200190055898, "learning_rate": 6.693523396343353e-07, "loss": 1.1034, "step": 1512 }, { "epoch": 0.006697950329806543, "grad_norm": 3.3921013356293312, "learning_rate": 6.697950329806543e-07, "loss": 1.159, "step": 1513 }, { "epoch": 0.006702377263269733, "grad_norm": 2.835598748766663, "learning_rate": 6.702377263269734e-07, "loss": 0.8772, "step": 1514 }, { "epoch": 0.006706804196732923, "grad_norm": 3.345009514165654, "learning_rate": 6.706804196732923e-07, "loss": 0.7044, "step": 1515 }, { "epoch": 0.006711231130196113, "grad_norm": 3.0056129514618917, "learning_rate": 6.711231130196114e-07, "loss": 0.8728, "step": 1516 }, { "epoch": 0.006715658063659303, "grad_norm": 3.104936255464612, "learning_rate": 6.715658063659304e-07, "loss": 0.845, "step": 1517 }, { "epoch": 0.006720084997122493, "grad_norm": 3.5512285110317414, "learning_rate": 6.720084997122494e-07, "loss": 1.0917, "step": 1518 }, { "epoch": 0.006724511930585683, "grad_norm": 3.020318965300296, "learning_rate": 6.724511930585684e-07, "loss": 0.7048, "step": 1519 }, { "epoch": 0.0067289388640488735, "grad_norm": 2.7119353000040096, "learning_rate": 6.728938864048875e-07, "loss": 0.6254, "step": 1520 }, { "epoch": 0.006733365797512063, "grad_norm": 3.2554341534218807, "learning_rate": 6.733365797512063e-07, "loss": 0.8697, "step": 1521 }, { "epoch": 0.006737792730975254, "grad_norm": 3.0443177210344254, "learning_rate": 6.737792730975254e-07, "loss": 1.049, "step": 1522 }, { "epoch": 0.006742219664438443, "grad_norm": 2.86092882316975, "learning_rate": 6.742219664438444e-07, "loss": 0.883, "step": 1523 }, { "epoch": 0.006746646597901634, "grad_norm": 3.8862507572738845, "learning_rate": 6.746646597901634e-07, "loss": 1.055, "step": 1524 }, { "epoch": 0.0067510735313648235, "grad_norm": 3.594336403261911, "learning_rate": 6.751073531364824e-07, "loss": 0.9031, "step": 1525 }, { "epoch": 0.006755500464828014, "grad_norm": 2.6665991725560483, "learning_rate": 6.755500464828015e-07, "loss": 0.8015, "step": 1526 }, { "epoch": 0.006759927398291204, "grad_norm": 3.031280143167187, "learning_rate": 6.759927398291204e-07, "loss": 0.8481, "step": 1527 }, { "epoch": 0.006764354331754394, "grad_norm": 2.923308048047501, "learning_rate": 6.764354331754395e-07, "loss": 0.4927, "step": 1528 }, { "epoch": 0.006768781265217584, "grad_norm": 3.737977806592082, "learning_rate": 6.768781265217584e-07, "loss": 1.1706, "step": 1529 }, { "epoch": 0.006773208198680773, "grad_norm": 4.066550136362476, "learning_rate": 6.773208198680774e-07, "loss": 0.8808, "step": 1530 }, { "epoch": 0.006777635132143964, "grad_norm": 2.828816401441551, "learning_rate": 6.777635132143964e-07, "loss": 0.8594, "step": 1531 }, { "epoch": 0.0067820620656071536, "grad_norm": 2.875536514938937, "learning_rate": 6.782062065607155e-07, "loss": 0.9175, "step": 1532 }, { "epoch": 0.006786488999070344, "grad_norm": 3.203057211897045, "learning_rate": 6.786488999070344e-07, "loss": 1.1769, "step": 1533 }, { "epoch": 0.006790915932533534, "grad_norm": 3.341747190275047, "learning_rate": 6.790915932533535e-07, "loss": 1.1268, "step": 1534 }, { "epoch": 0.006795342865996724, "grad_norm": 3.4176452150534544, "learning_rate": 6.795342865996725e-07, "loss": 1.2678, "step": 1535 }, { "epoch": 0.006799769799459914, "grad_norm": 3.5574769241864987, "learning_rate": 6.799769799459914e-07, "loss": 0.8595, "step": 1536 }, { "epoch": 0.006804196732923104, "grad_norm": 2.65678972559503, "learning_rate": 6.804196732923104e-07, "loss": 0.77, "step": 1537 }, { "epoch": 0.006808623666386294, "grad_norm": 2.5634051543765795, "learning_rate": 6.808623666386295e-07, "loss": 0.6761, "step": 1538 }, { "epoch": 0.0068130505998494845, "grad_norm": 3.538938069712124, "learning_rate": 6.813050599849484e-07, "loss": 1.1505, "step": 1539 }, { "epoch": 0.006817477533312674, "grad_norm": 2.3437022887343137, "learning_rate": 6.817477533312675e-07, "loss": 0.6206, "step": 1540 }, { "epoch": 0.006821904466775865, "grad_norm": 3.649429081711648, "learning_rate": 6.821904466775865e-07, "loss": 0.9573, "step": 1541 }, { "epoch": 0.006826331400239054, "grad_norm": 2.4222064806393706, "learning_rate": 6.826331400239055e-07, "loss": 0.5943, "step": 1542 }, { "epoch": 0.006830758333702245, "grad_norm": 2.6039447060637944, "learning_rate": 6.830758333702245e-07, "loss": 1.0361, "step": 1543 }, { "epoch": 0.0068351852671654344, "grad_norm": 3.0228379346726504, "learning_rate": 6.835185267165436e-07, "loss": 0.7832, "step": 1544 }, { "epoch": 0.006839612200628625, "grad_norm": 3.819556295602929, "learning_rate": 6.839612200628624e-07, "loss": 0.9937, "step": 1545 }, { "epoch": 0.006844039134091815, "grad_norm": 3.5329668954919575, "learning_rate": 6.844039134091815e-07, "loss": 1.2217, "step": 1546 }, { "epoch": 0.006848466067555004, "grad_norm": 2.4086161329411335, "learning_rate": 6.848466067555005e-07, "loss": 0.8173, "step": 1547 }, { "epoch": 0.006852893001018195, "grad_norm": 4.3836786745362, "learning_rate": 6.852893001018195e-07, "loss": 1.2354, "step": 1548 }, { "epoch": 0.006857319934481384, "grad_norm": 2.7146348129753983, "learning_rate": 6.857319934481385e-07, "loss": 0.8025, "step": 1549 }, { "epoch": 0.006861746867944575, "grad_norm": 4.233341394312943, "learning_rate": 6.861746867944576e-07, "loss": 1.4004, "step": 1550 }, { "epoch": 0.0068661738014077645, "grad_norm": 3.7096991714977334, "learning_rate": 6.866173801407765e-07, "loss": 0.9367, "step": 1551 }, { "epoch": 0.006870600734870955, "grad_norm": 3.372692707828797, "learning_rate": 6.870600734870956e-07, "loss": 1.3619, "step": 1552 }, { "epoch": 0.006875027668334145, "grad_norm": 2.4335888814861804, "learning_rate": 6.875027668334146e-07, "loss": 0.6216, "step": 1553 }, { "epoch": 0.006879454601797335, "grad_norm": 3.0498538627794605, "learning_rate": 6.879454601797335e-07, "loss": 0.816, "step": 1554 }, { "epoch": 0.006883881535260525, "grad_norm": 4.506926836786762, "learning_rate": 6.883881535260525e-07, "loss": 1.4033, "step": 1555 }, { "epoch": 0.006888308468723715, "grad_norm": 3.111349007638839, "learning_rate": 6.888308468723716e-07, "loss": 1.1462, "step": 1556 }, { "epoch": 0.006892735402186905, "grad_norm": 3.392504890265954, "learning_rate": 6.892735402186905e-07, "loss": 0.8472, "step": 1557 }, { "epoch": 0.0068971623356500955, "grad_norm": 2.9805801993284633, "learning_rate": 6.897162335650096e-07, "loss": 0.9628, "step": 1558 }, { "epoch": 0.006901589269113285, "grad_norm": 2.678576227048205, "learning_rate": 6.901589269113286e-07, "loss": 0.8769, "step": 1559 }, { "epoch": 0.006906016202576476, "grad_norm": 2.9014698547474356, "learning_rate": 6.906016202576475e-07, "loss": 0.8114, "step": 1560 }, { "epoch": 0.006910443136039665, "grad_norm": 2.7025413920153896, "learning_rate": 6.910443136039665e-07, "loss": 0.9853, "step": 1561 }, { "epoch": 0.006914870069502856, "grad_norm": 2.3666805395290615, "learning_rate": 6.914870069502856e-07, "loss": 0.7286, "step": 1562 }, { "epoch": 0.006919297002966045, "grad_norm": 2.930940650217099, "learning_rate": 6.919297002966046e-07, "loss": 0.8516, "step": 1563 }, { "epoch": 0.006923723936429235, "grad_norm": 2.989796830920254, "learning_rate": 6.923723936429236e-07, "loss": 0.7515, "step": 1564 }, { "epoch": 0.0069281508698924256, "grad_norm": 3.1310293649672487, "learning_rate": 6.928150869892426e-07, "loss": 0.8131, "step": 1565 }, { "epoch": 0.006932577803355615, "grad_norm": 2.8210434193945866, "learning_rate": 6.932577803355617e-07, "loss": 0.8192, "step": 1566 }, { "epoch": 0.006937004736818806, "grad_norm": 2.551255244883538, "learning_rate": 6.937004736818806e-07, "loss": 0.6695, "step": 1567 }, { "epoch": 0.006941431670281995, "grad_norm": 3.22541182825044, "learning_rate": 6.941431670281997e-07, "loss": 0.8773, "step": 1568 }, { "epoch": 0.006945858603745186, "grad_norm": 3.1292955880954088, "learning_rate": 6.945858603745187e-07, "loss": 1.0311, "step": 1569 }, { "epoch": 0.0069502855372083755, "grad_norm": 2.671764521490788, "learning_rate": 6.950285537208376e-07, "loss": 1.0843, "step": 1570 }, { "epoch": 0.006954712470671566, "grad_norm": 4.659099102102501, "learning_rate": 6.954712470671566e-07, "loss": 1.6459, "step": 1571 }, { "epoch": 0.006959139404134756, "grad_norm": 3.06370932376208, "learning_rate": 6.959139404134757e-07, "loss": 1.1221, "step": 1572 }, { "epoch": 0.006963566337597946, "grad_norm": 3.064946970936045, "learning_rate": 6.963566337597946e-07, "loss": 0.7217, "step": 1573 }, { "epoch": 0.006967993271061136, "grad_norm": 3.0244826685178006, "learning_rate": 6.967993271061137e-07, "loss": 0.6342, "step": 1574 }, { "epoch": 0.006972420204524326, "grad_norm": 2.983249719767706, "learning_rate": 6.972420204524327e-07, "loss": 0.7098, "step": 1575 }, { "epoch": 0.006976847137987516, "grad_norm": 3.2106405755861327, "learning_rate": 6.976847137987517e-07, "loss": 0.8751, "step": 1576 }, { "epoch": 0.0069812740714507064, "grad_norm": 2.700835825385018, "learning_rate": 6.981274071450707e-07, "loss": 0.7239, "step": 1577 }, { "epoch": 0.006985701004913896, "grad_norm": 3.356410718626009, "learning_rate": 6.985701004913898e-07, "loss": 0.9276, "step": 1578 }, { "epoch": 0.006990127938377087, "grad_norm": 3.1671170096058936, "learning_rate": 6.990127938377086e-07, "loss": 1.0533, "step": 1579 }, { "epoch": 0.006994554871840276, "grad_norm": 3.3728168422300904, "learning_rate": 6.994554871840277e-07, "loss": 0.9727, "step": 1580 }, { "epoch": 0.006998981805303466, "grad_norm": 3.2508099460509623, "learning_rate": 6.998981805303467e-07, "loss": 0.8155, "step": 1581 }, { "epoch": 0.007003408738766656, "grad_norm": 3.8168800188690093, "learning_rate": 7.003408738766657e-07, "loss": 1.0064, "step": 1582 }, { "epoch": 0.007007835672229846, "grad_norm": 2.9633815569314734, "learning_rate": 7.007835672229847e-07, "loss": 0.9062, "step": 1583 }, { "epoch": 0.0070122626056930365, "grad_norm": 3.2615092957392746, "learning_rate": 7.012262605693038e-07, "loss": 1.0999, "step": 1584 }, { "epoch": 0.007016689539156226, "grad_norm": 4.042981004784027, "learning_rate": 7.016689539156226e-07, "loss": 0.9449, "step": 1585 }, { "epoch": 0.007021116472619417, "grad_norm": 3.422726749512924, "learning_rate": 7.021116472619417e-07, "loss": 1.074, "step": 1586 }, { "epoch": 0.007025543406082606, "grad_norm": 2.696121440558784, "learning_rate": 7.025543406082607e-07, "loss": 0.6178, "step": 1587 }, { "epoch": 0.007029970339545797, "grad_norm": 2.797998948150866, "learning_rate": 7.029970339545797e-07, "loss": 0.6975, "step": 1588 }, { "epoch": 0.0070343972730089865, "grad_norm": 3.3374922000550447, "learning_rate": 7.034397273008987e-07, "loss": 1.1257, "step": 1589 }, { "epoch": 0.007038824206472177, "grad_norm": 3.0445953190966404, "learning_rate": 7.038824206472178e-07, "loss": 0.809, "step": 1590 }, { "epoch": 0.007043251139935367, "grad_norm": 4.812234696553347, "learning_rate": 7.043251139935367e-07, "loss": 1.0645, "step": 1591 }, { "epoch": 0.007047678073398557, "grad_norm": 3.1142187098656735, "learning_rate": 7.047678073398558e-07, "loss": 0.8379, "step": 1592 }, { "epoch": 0.007052105006861747, "grad_norm": 3.456704053219509, "learning_rate": 7.052105006861748e-07, "loss": 0.8375, "step": 1593 }, { "epoch": 0.007056531940324937, "grad_norm": 2.9962101082540356, "learning_rate": 7.056531940324937e-07, "loss": 1.0867, "step": 1594 }, { "epoch": 0.007060958873788127, "grad_norm": 3.1673533519488095, "learning_rate": 7.060958873788127e-07, "loss": 0.9122, "step": 1595 }, { "epoch": 0.007065385807251317, "grad_norm": 3.2017020106196843, "learning_rate": 7.065385807251318e-07, "loss": 1.1662, "step": 1596 }, { "epoch": 0.007069812740714507, "grad_norm": 3.380671059854764, "learning_rate": 7.069812740714507e-07, "loss": 0.967, "step": 1597 }, { "epoch": 0.007074239674177697, "grad_norm": 3.9332388037835764, "learning_rate": 7.074239674177698e-07, "loss": 0.5854, "step": 1598 }, { "epoch": 0.007078666607640887, "grad_norm": 2.9506379249368773, "learning_rate": 7.078666607640888e-07, "loss": 0.8984, "step": 1599 }, { "epoch": 0.007083093541104077, "grad_norm": 2.6947240270026156, "learning_rate": 7.083093541104078e-07, "loss": 0.7541, "step": 1600 }, { "epoch": 0.007087520474567267, "grad_norm": 2.62514627152181, "learning_rate": 7.087520474567268e-07, "loss": 0.7805, "step": 1601 }, { "epoch": 0.007091947408030457, "grad_norm": 3.3303844835704024, "learning_rate": 7.091947408030459e-07, "loss": 0.6271, "step": 1602 }, { "epoch": 0.0070963743414936475, "grad_norm": 5.142749709129944, "learning_rate": 7.096374341493647e-07, "loss": 1.3292, "step": 1603 }, { "epoch": 0.007100801274956837, "grad_norm": 2.9184798412699826, "learning_rate": 7.100801274956838e-07, "loss": 0.6051, "step": 1604 }, { "epoch": 0.007105228208420028, "grad_norm": 3.354059894159883, "learning_rate": 7.105228208420028e-07, "loss": 0.7571, "step": 1605 }, { "epoch": 0.007109655141883217, "grad_norm": 4.8159104044483065, "learning_rate": 7.109655141883218e-07, "loss": 1.1547, "step": 1606 }, { "epoch": 0.007114082075346408, "grad_norm": 2.8123756592941325, "learning_rate": 7.114082075346408e-07, "loss": 0.8702, "step": 1607 }, { "epoch": 0.0071185090088095974, "grad_norm": 3.061842313836786, "learning_rate": 7.118509008809599e-07, "loss": 0.9142, "step": 1608 }, { "epoch": 0.007122935942272788, "grad_norm": 2.951332336133362, "learning_rate": 7.122935942272787e-07, "loss": 1.0815, "step": 1609 }, { "epoch": 0.007127362875735978, "grad_norm": 4.086219100380267, "learning_rate": 7.127362875735978e-07, "loss": 1.2011, "step": 1610 }, { "epoch": 0.007131789809199168, "grad_norm": 2.8259408679340545, "learning_rate": 7.131789809199168e-07, "loss": 0.8679, "step": 1611 }, { "epoch": 0.007136216742662358, "grad_norm": 2.760672336125489, "learning_rate": 7.136216742662358e-07, "loss": 1.0272, "step": 1612 }, { "epoch": 0.007140643676125548, "grad_norm": 2.793718912929479, "learning_rate": 7.140643676125548e-07, "loss": 0.7259, "step": 1613 }, { "epoch": 0.007145070609588738, "grad_norm": 4.156500928820756, "learning_rate": 7.145070609588739e-07, "loss": 1.2432, "step": 1614 }, { "epoch": 0.0071494975430519275, "grad_norm": 3.556520026990184, "learning_rate": 7.149497543051928e-07, "loss": 0.92, "step": 1615 }, { "epoch": 0.007153924476515118, "grad_norm": 3.019219219400913, "learning_rate": 7.153924476515119e-07, "loss": 0.9013, "step": 1616 }, { "epoch": 0.007158351409978308, "grad_norm": 3.5024855298755955, "learning_rate": 7.158351409978309e-07, "loss": 1.0918, "step": 1617 }, { "epoch": 0.007162778343441498, "grad_norm": 3.0402319986925694, "learning_rate": 7.162778343441498e-07, "loss": 0.5488, "step": 1618 }, { "epoch": 0.007167205276904688, "grad_norm": 3.281389963234099, "learning_rate": 7.167205276904688e-07, "loss": 0.6395, "step": 1619 }, { "epoch": 0.007171632210367878, "grad_norm": 3.347414526223031, "learning_rate": 7.171632210367879e-07, "loss": 0.7488, "step": 1620 }, { "epoch": 0.007176059143831068, "grad_norm": 3.250822491190481, "learning_rate": 7.176059143831068e-07, "loss": 1.2411, "step": 1621 }, { "epoch": 0.0071804860772942585, "grad_norm": 3.791558590027601, "learning_rate": 7.180486077294259e-07, "loss": 1.3939, "step": 1622 }, { "epoch": 0.007184913010757448, "grad_norm": 2.6050960442470514, "learning_rate": 7.184913010757449e-07, "loss": 0.4904, "step": 1623 }, { "epoch": 0.007189339944220639, "grad_norm": 3.268414318843509, "learning_rate": 7.189339944220639e-07, "loss": 0.8541, "step": 1624 }, { "epoch": 0.007193766877683828, "grad_norm": 2.5385620579869386, "learning_rate": 7.193766877683829e-07, "loss": 0.4898, "step": 1625 }, { "epoch": 0.007198193811147019, "grad_norm": 4.080228397576643, "learning_rate": 7.19819381114702e-07, "loss": 1.2274, "step": 1626 }, { "epoch": 0.007202620744610208, "grad_norm": 2.834552964338971, "learning_rate": 7.202620744610208e-07, "loss": 1.0939, "step": 1627 }, { "epoch": 0.007207047678073399, "grad_norm": 3.2820250118200636, "learning_rate": 7.207047678073399e-07, "loss": 0.9531, "step": 1628 }, { "epoch": 0.0072114746115365886, "grad_norm": 2.6142971935999384, "learning_rate": 7.211474611536589e-07, "loss": 0.7721, "step": 1629 }, { "epoch": 0.007215901544999779, "grad_norm": 2.710329528221174, "learning_rate": 7.215901544999779e-07, "loss": 0.9838, "step": 1630 }, { "epoch": 0.007220328478462969, "grad_norm": 2.9180314997261343, "learning_rate": 7.220328478462969e-07, "loss": 1.0322, "step": 1631 }, { "epoch": 0.007224755411926158, "grad_norm": 3.1397523241020826, "learning_rate": 7.22475541192616e-07, "loss": 0.9621, "step": 1632 }, { "epoch": 0.007229182345389349, "grad_norm": 2.9036484673678986, "learning_rate": 7.229182345389348e-07, "loss": 0.9135, "step": 1633 }, { "epoch": 0.0072336092788525385, "grad_norm": 3.652674999643568, "learning_rate": 7.233609278852539e-07, "loss": 1.1086, "step": 1634 }, { "epoch": 0.007238036212315729, "grad_norm": 2.862018083267352, "learning_rate": 7.238036212315729e-07, "loss": 0.8725, "step": 1635 }, { "epoch": 0.007242463145778919, "grad_norm": 2.5481770243322472, "learning_rate": 7.242463145778919e-07, "loss": 0.853, "step": 1636 }, { "epoch": 0.007246890079242109, "grad_norm": 3.206597071076612, "learning_rate": 7.24689007924211e-07, "loss": 1.1071, "step": 1637 }, { "epoch": 0.007251317012705299, "grad_norm": 3.7534972709548504, "learning_rate": 7.2513170127053e-07, "loss": 0.9088, "step": 1638 }, { "epoch": 0.007255743946168489, "grad_norm": 3.392560904416914, "learning_rate": 7.25574394616849e-07, "loss": 0.6669, "step": 1639 }, { "epoch": 0.007260170879631679, "grad_norm": 2.9553405930461794, "learning_rate": 7.26017087963168e-07, "loss": 0.7304, "step": 1640 }, { "epoch": 0.0072645978130948695, "grad_norm": 3.084831022046123, "learning_rate": 7.264597813094871e-07, "loss": 0.6815, "step": 1641 }, { "epoch": 0.007269024746558059, "grad_norm": 3.209964949399632, "learning_rate": 7.269024746558059e-07, "loss": 0.7063, "step": 1642 }, { "epoch": 0.00727345168002125, "grad_norm": 3.374105032873327, "learning_rate": 7.27345168002125e-07, "loss": 0.7891, "step": 1643 }, { "epoch": 0.007277878613484439, "grad_norm": 3.2388690402783658, "learning_rate": 7.27787861348444e-07, "loss": 0.8733, "step": 1644 }, { "epoch": 0.00728230554694763, "grad_norm": 3.7562960081166827, "learning_rate": 7.28230554694763e-07, "loss": 0.8555, "step": 1645 }, { "epoch": 0.007286732480410819, "grad_norm": 3.011735499631367, "learning_rate": 7.28673248041082e-07, "loss": 0.8665, "step": 1646 }, { "epoch": 0.00729115941387401, "grad_norm": 2.7661374519756765, "learning_rate": 7.291159413874011e-07, "loss": 0.7171, "step": 1647 }, { "epoch": 0.0072955863473371995, "grad_norm": 2.947917739498885, "learning_rate": 7.2955863473372e-07, "loss": 0.8978, "step": 1648 }, { "epoch": 0.007300013280800389, "grad_norm": 3.684162302053232, "learning_rate": 7.300013280800391e-07, "loss": 0.9659, "step": 1649 }, { "epoch": 0.00730444021426358, "grad_norm": 2.850356127081348, "learning_rate": 7.304440214263581e-07, "loss": 0.9, "step": 1650 }, { "epoch": 0.007308867147726769, "grad_norm": 2.8428256619247225, "learning_rate": 7.30886714772677e-07, "loss": 1.1083, "step": 1651 }, { "epoch": 0.00731329408118996, "grad_norm": 3.5222356857167565, "learning_rate": 7.31329408118996e-07, "loss": 0.8939, "step": 1652 }, { "epoch": 0.0073177210146531495, "grad_norm": 2.671392635783064, "learning_rate": 7.317721014653151e-07, "loss": 0.7533, "step": 1653 }, { "epoch": 0.00732214794811634, "grad_norm": 3.6272345825963446, "learning_rate": 7.32214794811634e-07, "loss": 0.9405, "step": 1654 }, { "epoch": 0.00732657488157953, "grad_norm": 2.9197514486614193, "learning_rate": 7.326574881579531e-07, "loss": 0.954, "step": 1655 }, { "epoch": 0.00733100181504272, "grad_norm": 2.6708581999050995, "learning_rate": 7.331001815042721e-07, "loss": 0.667, "step": 1656 }, { "epoch": 0.00733542874850591, "grad_norm": 3.9040720358399468, "learning_rate": 7.335428748505911e-07, "loss": 0.8599, "step": 1657 }, { "epoch": 0.0073398556819691, "grad_norm": 3.180792153136222, "learning_rate": 7.3398556819691e-07, "loss": 0.9629, "step": 1658 }, { "epoch": 0.00734428261543229, "grad_norm": 3.261519745070417, "learning_rate": 7.344282615432291e-07, "loss": 1.0628, "step": 1659 }, { "epoch": 0.00734870954889548, "grad_norm": 3.229255001432841, "learning_rate": 7.34870954889548e-07, "loss": 0.9391, "step": 1660 }, { "epoch": 0.00735313648235867, "grad_norm": 2.873343118809401, "learning_rate": 7.353136482358671e-07, "loss": 0.6767, "step": 1661 }, { "epoch": 0.007357563415821861, "grad_norm": 3.656992717604281, "learning_rate": 7.357563415821861e-07, "loss": 0.9985, "step": 1662 }, { "epoch": 0.00736199034928505, "grad_norm": 2.764079019023833, "learning_rate": 7.361990349285051e-07, "loss": 0.8288, "step": 1663 }, { "epoch": 0.007366417282748241, "grad_norm": 2.9241410833656922, "learning_rate": 7.366417282748241e-07, "loss": 0.8382, "step": 1664 }, { "epoch": 0.00737084421621143, "grad_norm": 2.8147155878596894, "learning_rate": 7.370844216211432e-07, "loss": 0.7014, "step": 1665 }, { "epoch": 0.00737527114967462, "grad_norm": 3.4620135364114133, "learning_rate": 7.37527114967462e-07, "loss": 0.8883, "step": 1666 }, { "epoch": 0.0073796980831378105, "grad_norm": 2.934205784021284, "learning_rate": 7.379698083137811e-07, "loss": 1.0001, "step": 1667 }, { "epoch": 0.007384125016601, "grad_norm": 3.0158097539228423, "learning_rate": 7.384125016601001e-07, "loss": 0.6758, "step": 1668 }, { "epoch": 0.007388551950064191, "grad_norm": 2.994939136883447, "learning_rate": 7.388551950064191e-07, "loss": 1.0914, "step": 1669 }, { "epoch": 0.00739297888352738, "grad_norm": 3.92583972478644, "learning_rate": 7.392978883527381e-07, "loss": 1.2836, "step": 1670 }, { "epoch": 0.007397405816990571, "grad_norm": 2.9998870308723733, "learning_rate": 7.397405816990572e-07, "loss": 1.0898, "step": 1671 }, { "epoch": 0.0074018327504537604, "grad_norm": 3.491315777908663, "learning_rate": 7.401832750453761e-07, "loss": 1.4174, "step": 1672 }, { "epoch": 0.007406259683916951, "grad_norm": 3.3348550256705827, "learning_rate": 7.406259683916952e-07, "loss": 1.0951, "step": 1673 }, { "epoch": 0.007410686617380141, "grad_norm": 2.527366390680121, "learning_rate": 7.410686617380142e-07, "loss": 0.9004, "step": 1674 }, { "epoch": 0.007415113550843331, "grad_norm": 2.736994373022722, "learning_rate": 7.415113550843331e-07, "loss": 0.7928, "step": 1675 }, { "epoch": 0.007419540484306521, "grad_norm": 2.6667344844915633, "learning_rate": 7.419540484306521e-07, "loss": 0.985, "step": 1676 }, { "epoch": 0.007423967417769711, "grad_norm": 2.9479038040026104, "learning_rate": 7.423967417769712e-07, "loss": 0.8209, "step": 1677 }, { "epoch": 0.007428394351232901, "grad_norm": 3.1934214145586632, "learning_rate": 7.428394351232901e-07, "loss": 1.0525, "step": 1678 }, { "epoch": 0.007432821284696091, "grad_norm": 2.9706145397431674, "learning_rate": 7.432821284696092e-07, "loss": 0.4632, "step": 1679 }, { "epoch": 0.007437248218159281, "grad_norm": 2.675853136618783, "learning_rate": 7.437248218159282e-07, "loss": 0.8491, "step": 1680 }, { "epoch": 0.007441675151622471, "grad_norm": 3.196007435797718, "learning_rate": 7.441675151622472e-07, "loss": 0.8893, "step": 1681 }, { "epoch": 0.007446102085085661, "grad_norm": 2.781882345139446, "learning_rate": 7.446102085085662e-07, "loss": 0.836, "step": 1682 }, { "epoch": 0.007450529018548851, "grad_norm": 3.184615589137735, "learning_rate": 7.450529018548852e-07, "loss": 1.1882, "step": 1683 }, { "epoch": 0.007454955952012041, "grad_norm": 3.2631808269448075, "learning_rate": 7.454955952012041e-07, "loss": 1.0188, "step": 1684 }, { "epoch": 0.007459382885475231, "grad_norm": 3.919921852797759, "learning_rate": 7.459382885475232e-07, "loss": 1.3377, "step": 1685 }, { "epoch": 0.0074638098189384215, "grad_norm": 2.583625711599065, "learning_rate": 7.463809818938422e-07, "loss": 0.7082, "step": 1686 }, { "epoch": 0.007468236752401611, "grad_norm": 3.2321853317285574, "learning_rate": 7.468236752401612e-07, "loss": 1.0387, "step": 1687 }, { "epoch": 0.007472663685864802, "grad_norm": 3.464582460943697, "learning_rate": 7.472663685864802e-07, "loss": 0.8269, "step": 1688 }, { "epoch": 0.007477090619327991, "grad_norm": 2.674216916569153, "learning_rate": 7.477090619327993e-07, "loss": 0.915, "step": 1689 }, { "epoch": 0.007481517552791182, "grad_norm": 4.286646122098737, "learning_rate": 7.481517552791181e-07, "loss": 1.0229, "step": 1690 }, { "epoch": 0.007485944486254371, "grad_norm": 4.356808266471555, "learning_rate": 7.485944486254372e-07, "loss": 0.9758, "step": 1691 }, { "epoch": 0.007490371419717562, "grad_norm": 3.4046600177513664, "learning_rate": 7.490371419717562e-07, "loss": 0.9894, "step": 1692 }, { "epoch": 0.007494798353180752, "grad_norm": 2.9118741819163203, "learning_rate": 7.494798353180752e-07, "loss": 0.7622, "step": 1693 }, { "epoch": 0.007499225286643942, "grad_norm": 2.796500435659111, "learning_rate": 7.499225286643942e-07, "loss": 0.6779, "step": 1694 }, { "epoch": 0.007503652220107132, "grad_norm": 2.8761955964158803, "learning_rate": 7.503652220107133e-07, "loss": 0.8497, "step": 1695 }, { "epoch": 0.007508079153570322, "grad_norm": 2.7032694536222643, "learning_rate": 7.508079153570322e-07, "loss": 0.8645, "step": 1696 }, { "epoch": 0.007512506087033512, "grad_norm": 3.3795088659816486, "learning_rate": 7.512506087033513e-07, "loss": 0.8972, "step": 1697 }, { "epoch": 0.0075169330204967015, "grad_norm": 2.637436594918249, "learning_rate": 7.516933020496703e-07, "loss": 0.7719, "step": 1698 }, { "epoch": 0.007521359953959892, "grad_norm": 2.9212490890897853, "learning_rate": 7.521359953959892e-07, "loss": 0.9287, "step": 1699 }, { "epoch": 0.007525786887423082, "grad_norm": 4.000778315144951, "learning_rate": 7.525786887423082e-07, "loss": 0.8907, "step": 1700 }, { "epoch": 0.007530213820886272, "grad_norm": 2.3402581564107385, "learning_rate": 7.530213820886273e-07, "loss": 0.7958, "step": 1701 }, { "epoch": 0.007534640754349462, "grad_norm": 3.503145607787093, "learning_rate": 7.534640754349462e-07, "loss": 1.1079, "step": 1702 }, { "epoch": 0.007539067687812652, "grad_norm": 3.2622293110764367, "learning_rate": 7.539067687812653e-07, "loss": 0.9705, "step": 1703 }, { "epoch": 0.007543494621275842, "grad_norm": 3.1753013124961984, "learning_rate": 7.543494621275843e-07, "loss": 0.9281, "step": 1704 }, { "epoch": 0.0075479215547390325, "grad_norm": 2.720382274694128, "learning_rate": 7.547921554739033e-07, "loss": 0.7428, "step": 1705 }, { "epoch": 0.007552348488202222, "grad_norm": 2.6417950843452465, "learning_rate": 7.552348488202223e-07, "loss": 0.8721, "step": 1706 }, { "epoch": 0.007556775421665413, "grad_norm": 3.652811385195005, "learning_rate": 7.556775421665414e-07, "loss": 1.0415, "step": 1707 }, { "epoch": 0.007561202355128602, "grad_norm": 3.002314335580968, "learning_rate": 7.561202355128602e-07, "loss": 0.7766, "step": 1708 }, { "epoch": 0.007565629288591793, "grad_norm": 3.4774310917407103, "learning_rate": 7.565629288591793e-07, "loss": 0.9174, "step": 1709 }, { "epoch": 0.007570056222054982, "grad_norm": 3.3046787623098415, "learning_rate": 7.570056222054983e-07, "loss": 0.8835, "step": 1710 }, { "epoch": 0.007574483155518173, "grad_norm": 2.6748754621175777, "learning_rate": 7.574483155518174e-07, "loss": 0.9458, "step": 1711 }, { "epoch": 0.0075789100889813625, "grad_norm": 3.0180826253838466, "learning_rate": 7.578910088981363e-07, "loss": 1.0945, "step": 1712 }, { "epoch": 0.007583337022444553, "grad_norm": 3.6052782660340985, "learning_rate": 7.583337022444554e-07, "loss": 0.8986, "step": 1713 }, { "epoch": 0.007587763955907743, "grad_norm": 3.172479330643992, "learning_rate": 7.587763955907744e-07, "loss": 1.0298, "step": 1714 }, { "epoch": 0.007592190889370932, "grad_norm": 3.5918307301710644, "learning_rate": 7.592190889370933e-07, "loss": 1.0292, "step": 1715 }, { "epoch": 0.007596617822834123, "grad_norm": 2.5528003785876745, "learning_rate": 7.596617822834123e-07, "loss": 0.4837, "step": 1716 }, { "epoch": 0.0076010447562973125, "grad_norm": 3.1349572187003605, "learning_rate": 7.601044756297314e-07, "loss": 1.0605, "step": 1717 }, { "epoch": 0.007605471689760503, "grad_norm": 2.921345739584861, "learning_rate": 7.605471689760503e-07, "loss": 0.8687, "step": 1718 }, { "epoch": 0.007609898623223693, "grad_norm": 2.7181612873425447, "learning_rate": 7.609898623223694e-07, "loss": 1.0587, "step": 1719 }, { "epoch": 0.007614325556686883, "grad_norm": 3.30548136968632, "learning_rate": 7.614325556686884e-07, "loss": 0.9417, "step": 1720 }, { "epoch": 0.007618752490150073, "grad_norm": 2.7286019287086956, "learning_rate": 7.618752490150074e-07, "loss": 1.047, "step": 1721 }, { "epoch": 0.007623179423613263, "grad_norm": 2.7733042472329816, "learning_rate": 7.623179423613264e-07, "loss": 0.651, "step": 1722 }, { "epoch": 0.007627606357076453, "grad_norm": 3.8439919990077405, "learning_rate": 7.627606357076455e-07, "loss": 0.9165, "step": 1723 }, { "epoch": 0.007632033290539643, "grad_norm": 2.596117529936186, "learning_rate": 7.632033290539643e-07, "loss": 0.63, "step": 1724 }, { "epoch": 0.007636460224002833, "grad_norm": 2.7523439324599717, "learning_rate": 7.636460224002834e-07, "loss": 0.8211, "step": 1725 }, { "epoch": 0.007640887157466024, "grad_norm": 3.5506987214209476, "learning_rate": 7.640887157466024e-07, "loss": 0.8478, "step": 1726 }, { "epoch": 0.007645314090929213, "grad_norm": 2.96365604839545, "learning_rate": 7.645314090929214e-07, "loss": 0.8493, "step": 1727 }, { "epoch": 0.007649741024392404, "grad_norm": 3.3049392076732627, "learning_rate": 7.649741024392404e-07, "loss": 0.8594, "step": 1728 }, { "epoch": 0.007654167957855593, "grad_norm": 3.437659602775867, "learning_rate": 7.654167957855595e-07, "loss": 0.8999, "step": 1729 }, { "epoch": 0.007658594891318784, "grad_norm": 3.4466958428587215, "learning_rate": 7.658594891318784e-07, "loss": 1.1562, "step": 1730 }, { "epoch": 0.0076630218247819735, "grad_norm": 3.0978985898976434, "learning_rate": 7.663021824781975e-07, "loss": 0.6827, "step": 1731 }, { "epoch": 0.007667448758245163, "grad_norm": 2.3921840211927616, "learning_rate": 7.667448758245165e-07, "loss": 0.6485, "step": 1732 }, { "epoch": 0.007671875691708354, "grad_norm": 2.880325476757837, "learning_rate": 7.671875691708354e-07, "loss": 0.9475, "step": 1733 }, { "epoch": 0.007676302625171543, "grad_norm": 3.69096756160886, "learning_rate": 7.676302625171544e-07, "loss": 1.5352, "step": 1734 }, { "epoch": 0.007680729558634734, "grad_norm": 3.1858389995345013, "learning_rate": 7.680729558634735e-07, "loss": 1.0207, "step": 1735 }, { "epoch": 0.0076851564920979235, "grad_norm": 2.6431584613774404, "learning_rate": 7.685156492097924e-07, "loss": 0.6604, "step": 1736 }, { "epoch": 0.007689583425561114, "grad_norm": 2.5441088431254393, "learning_rate": 7.689583425561115e-07, "loss": 0.8091, "step": 1737 }, { "epoch": 0.007694010359024304, "grad_norm": 4.0279991842533835, "learning_rate": 7.694010359024305e-07, "loss": 0.9056, "step": 1738 }, { "epoch": 0.007698437292487494, "grad_norm": 2.704030473755065, "learning_rate": 7.698437292487494e-07, "loss": 0.569, "step": 1739 }, { "epoch": 0.007702864225950684, "grad_norm": 2.814041641025208, "learning_rate": 7.702864225950684e-07, "loss": 0.6997, "step": 1740 }, { "epoch": 0.007707291159413874, "grad_norm": 3.823264506794007, "learning_rate": 7.707291159413875e-07, "loss": 0.6438, "step": 1741 }, { "epoch": 0.007711718092877064, "grad_norm": 3.757603925431785, "learning_rate": 7.711718092877064e-07, "loss": 0.9993, "step": 1742 }, { "epoch": 0.007716145026340254, "grad_norm": 2.635840490962735, "learning_rate": 7.716145026340255e-07, "loss": 0.7954, "step": 1743 }, { "epoch": 0.007720571959803444, "grad_norm": 3.434577277301463, "learning_rate": 7.720571959803445e-07, "loss": 1.0577, "step": 1744 }, { "epoch": 0.0077249988932666346, "grad_norm": 3.4321951318965196, "learning_rate": 7.724998893266635e-07, "loss": 0.7795, "step": 1745 }, { "epoch": 0.007729425826729824, "grad_norm": 3.728964832464447, "learning_rate": 7.729425826729825e-07, "loss": 0.8443, "step": 1746 }, { "epoch": 0.007733852760193015, "grad_norm": 2.5687691099485934, "learning_rate": 7.733852760193016e-07, "loss": 0.9279, "step": 1747 }, { "epoch": 0.007738279693656204, "grad_norm": 2.8924224628462594, "learning_rate": 7.738279693656204e-07, "loss": 0.6845, "step": 1748 }, { "epoch": 0.007742706627119394, "grad_norm": 4.377118360443089, "learning_rate": 7.742706627119395e-07, "loss": 1.3265, "step": 1749 }, { "epoch": 0.0077471335605825845, "grad_norm": 3.114764931733603, "learning_rate": 7.747133560582585e-07, "loss": 0.7244, "step": 1750 }, { "epoch": 0.007751560494045774, "grad_norm": 3.0561973062491203, "learning_rate": 7.751560494045775e-07, "loss": 0.9203, "step": 1751 }, { "epoch": 0.007755987427508965, "grad_norm": 3.06002173635343, "learning_rate": 7.755987427508965e-07, "loss": 1.1756, "step": 1752 }, { "epoch": 0.007760414360972154, "grad_norm": 3.9556730052682543, "learning_rate": 7.760414360972156e-07, "loss": 1.1003, "step": 1753 }, { "epoch": 0.007764841294435345, "grad_norm": 2.7063859809282174, "learning_rate": 7.764841294435345e-07, "loss": 0.8548, "step": 1754 }, { "epoch": 0.007769268227898534, "grad_norm": 3.18554889971358, "learning_rate": 7.769268227898536e-07, "loss": 0.8104, "step": 1755 }, { "epoch": 0.007773695161361725, "grad_norm": 2.804013050479873, "learning_rate": 7.773695161361726e-07, "loss": 0.7394, "step": 1756 }, { "epoch": 0.007778122094824915, "grad_norm": 3.022318152508772, "learning_rate": 7.778122094824915e-07, "loss": 1.0607, "step": 1757 }, { "epoch": 0.007782549028288105, "grad_norm": 2.9129990913673827, "learning_rate": 7.782549028288105e-07, "loss": 1.072, "step": 1758 }, { "epoch": 0.007786975961751295, "grad_norm": 3.1617787698152853, "learning_rate": 7.786975961751296e-07, "loss": 1.0117, "step": 1759 }, { "epoch": 0.007791402895214485, "grad_norm": 3.547146590246444, "learning_rate": 7.791402895214485e-07, "loss": 1.1015, "step": 1760 }, { "epoch": 0.007795829828677675, "grad_norm": 3.7058345429954977, "learning_rate": 7.795829828677676e-07, "loss": 1.1378, "step": 1761 }, { "epoch": 0.007800256762140865, "grad_norm": 3.014689458933094, "learning_rate": 7.800256762140866e-07, "loss": 0.9204, "step": 1762 }, { "epoch": 0.007804683695604055, "grad_norm": 3.322095271253999, "learning_rate": 7.804683695604055e-07, "loss": 0.9196, "step": 1763 }, { "epoch": 0.0078091106290672455, "grad_norm": 3.482732944274231, "learning_rate": 7.809110629067245e-07, "loss": 0.9428, "step": 1764 }, { "epoch": 0.007813537562530436, "grad_norm": 2.742511784528909, "learning_rate": 7.813537562530436e-07, "loss": 1.033, "step": 1765 }, { "epoch": 0.007817964495993625, "grad_norm": 3.353532913196837, "learning_rate": 7.817964495993625e-07, "loss": 1.1008, "step": 1766 }, { "epoch": 0.007822391429456815, "grad_norm": 2.7728602649029455, "learning_rate": 7.822391429456816e-07, "loss": 0.5453, "step": 1767 }, { "epoch": 0.007826818362920006, "grad_norm": 3.477062337074208, "learning_rate": 7.826818362920006e-07, "loss": 0.9489, "step": 1768 }, { "epoch": 0.007831245296383195, "grad_norm": 3.140547679751551, "learning_rate": 7.831245296383196e-07, "loss": 1.1017, "step": 1769 }, { "epoch": 0.007835672229846385, "grad_norm": 3.4565952754203435, "learning_rate": 7.835672229846386e-07, "loss": 0.541, "step": 1770 }, { "epoch": 0.007840099163309576, "grad_norm": 3.899013673553325, "learning_rate": 7.840099163309577e-07, "loss": 1.2085, "step": 1771 }, { "epoch": 0.007844526096772766, "grad_norm": 3.9293429214688866, "learning_rate": 7.844526096772765e-07, "loss": 1.2508, "step": 1772 }, { "epoch": 0.007848953030235955, "grad_norm": 3.868421298061306, "learning_rate": 7.848953030235956e-07, "loss": 1.1775, "step": 1773 }, { "epoch": 0.007853379963699145, "grad_norm": 4.267134188806948, "learning_rate": 7.853379963699146e-07, "loss": 1.3218, "step": 1774 }, { "epoch": 0.007857806897162336, "grad_norm": 3.696294621303878, "learning_rate": 7.857806897162336e-07, "loss": 0.9946, "step": 1775 }, { "epoch": 0.007862233830625526, "grad_norm": 3.872703839166246, "learning_rate": 7.862233830625526e-07, "loss": 0.8307, "step": 1776 }, { "epoch": 0.007866660764088715, "grad_norm": 2.759062668850026, "learning_rate": 7.866660764088717e-07, "loss": 0.9322, "step": 1777 }, { "epoch": 0.007871087697551906, "grad_norm": 3.5844251314797893, "learning_rate": 7.871087697551906e-07, "loss": 0.9222, "step": 1778 }, { "epoch": 0.007875514631015096, "grad_norm": 3.0129991317322116, "learning_rate": 7.875514631015097e-07, "loss": 0.6374, "step": 1779 }, { "epoch": 0.007879941564478287, "grad_norm": 4.203184054846212, "learning_rate": 7.879941564478287e-07, "loss": 0.8656, "step": 1780 }, { "epoch": 0.007884368497941475, "grad_norm": 3.541651245940705, "learning_rate": 7.884368497941476e-07, "loss": 1.1077, "step": 1781 }, { "epoch": 0.007888795431404666, "grad_norm": 3.3815670640896744, "learning_rate": 7.888795431404666e-07, "loss": 0.9248, "step": 1782 }, { "epoch": 0.007893222364867856, "grad_norm": 2.423281342342377, "learning_rate": 7.893222364867857e-07, "loss": 0.905, "step": 1783 }, { "epoch": 0.007897649298331045, "grad_norm": 2.888073488454762, "learning_rate": 7.897649298331046e-07, "loss": 1.1007, "step": 1784 }, { "epoch": 0.007902076231794236, "grad_norm": 3.3738264939078593, "learning_rate": 7.902076231794237e-07, "loss": 1.0066, "step": 1785 }, { "epoch": 0.007906503165257426, "grad_norm": 3.364459235757458, "learning_rate": 7.906503165257427e-07, "loss": 0.8974, "step": 1786 }, { "epoch": 0.007910930098720617, "grad_norm": 2.7670868377275744, "learning_rate": 7.910930098720616e-07, "loss": 0.8784, "step": 1787 }, { "epoch": 0.007915357032183806, "grad_norm": 2.9423651073284622, "learning_rate": 7.915357032183806e-07, "loss": 1.0083, "step": 1788 }, { "epoch": 0.007919783965646996, "grad_norm": 2.901439803886506, "learning_rate": 7.919783965646997e-07, "loss": 0.527, "step": 1789 }, { "epoch": 0.007924210899110187, "grad_norm": 2.9598897206214656, "learning_rate": 7.924210899110186e-07, "loss": 0.8801, "step": 1790 }, { "epoch": 0.007928637832573377, "grad_norm": 2.5067009172736556, "learning_rate": 7.928637832573377e-07, "loss": 0.6704, "step": 1791 }, { "epoch": 0.007933064766036566, "grad_norm": 2.980628240567023, "learning_rate": 7.933064766036567e-07, "loss": 0.937, "step": 1792 }, { "epoch": 0.007937491699499756, "grad_norm": 3.283915296460554, "learning_rate": 7.937491699499757e-07, "loss": 0.9061, "step": 1793 }, { "epoch": 0.007941918632962947, "grad_norm": 2.5844413963076875, "learning_rate": 7.941918632962947e-07, "loss": 0.7075, "step": 1794 }, { "epoch": 0.007946345566426137, "grad_norm": 2.5528427188274536, "learning_rate": 7.946345566426138e-07, "loss": 0.7065, "step": 1795 }, { "epoch": 0.007950772499889326, "grad_norm": 3.3304605909821587, "learning_rate": 7.950772499889326e-07, "loss": 1.0374, "step": 1796 }, { "epoch": 0.007955199433352517, "grad_norm": 2.9299770792329665, "learning_rate": 7.955199433352517e-07, "loss": 0.8323, "step": 1797 }, { "epoch": 0.007959626366815707, "grad_norm": 3.01804527464965, "learning_rate": 7.959626366815707e-07, "loss": 0.7635, "step": 1798 }, { "epoch": 0.007964053300278896, "grad_norm": 2.6646753358277087, "learning_rate": 7.964053300278897e-07, "loss": 0.7094, "step": 1799 }, { "epoch": 0.007968480233742086, "grad_norm": 3.378318846509159, "learning_rate": 7.968480233742087e-07, "loss": 1.0316, "step": 1800 }, { "epoch": 0.007972907167205277, "grad_norm": 3.493540409432346, "learning_rate": 7.972907167205278e-07, "loss": 1.0357, "step": 1801 }, { "epoch": 0.007977334100668467, "grad_norm": 2.586348859998215, "learning_rate": 7.977334100668467e-07, "loss": 0.6899, "step": 1802 }, { "epoch": 0.007981761034131656, "grad_norm": 2.8734033400268464, "learning_rate": 7.981761034131658e-07, "loss": 0.6473, "step": 1803 }, { "epoch": 0.007986187967594847, "grad_norm": 2.597501519372502, "learning_rate": 7.986187967594849e-07, "loss": 0.5547, "step": 1804 }, { "epoch": 0.007990614901058037, "grad_norm": 2.747485252222714, "learning_rate": 7.990614901058037e-07, "loss": 0.6714, "step": 1805 }, { "epoch": 0.007995041834521228, "grad_norm": 3.0583666941449588, "learning_rate": 7.995041834521227e-07, "loss": 1.0202, "step": 1806 }, { "epoch": 0.007999468767984417, "grad_norm": 2.985107386800567, "learning_rate": 7.999468767984418e-07, "loss": 0.731, "step": 1807 }, { "epoch": 0.008003895701447607, "grad_norm": 3.1100319732357367, "learning_rate": 8.003895701447607e-07, "loss": 0.7284, "step": 1808 }, { "epoch": 0.008008322634910798, "grad_norm": 3.068893067193423, "learning_rate": 8.008322634910798e-07, "loss": 0.8312, "step": 1809 }, { "epoch": 0.008012749568373988, "grad_norm": 2.7342366233617845, "learning_rate": 8.012749568373989e-07, "loss": 0.7113, "step": 1810 }, { "epoch": 0.008017176501837177, "grad_norm": 2.878917114185175, "learning_rate": 8.017176501837178e-07, "loss": 0.8207, "step": 1811 }, { "epoch": 0.008021603435300367, "grad_norm": 3.275931596476763, "learning_rate": 8.021603435300367e-07, "loss": 0.6721, "step": 1812 }, { "epoch": 0.008026030368763558, "grad_norm": 4.163725119399636, "learning_rate": 8.026030368763558e-07, "loss": 1.4099, "step": 1813 }, { "epoch": 0.008030457302226748, "grad_norm": 3.1055109869757067, "learning_rate": 8.030457302226747e-07, "loss": 0.6787, "step": 1814 }, { "epoch": 0.008034884235689937, "grad_norm": 3.493200409248802, "learning_rate": 8.034884235689938e-07, "loss": 0.8226, "step": 1815 }, { "epoch": 0.008039311169153128, "grad_norm": 2.5004093526802986, "learning_rate": 8.039311169153129e-07, "loss": 0.7769, "step": 1816 }, { "epoch": 0.008043738102616318, "grad_norm": 3.28418948835758, "learning_rate": 8.043738102616318e-07, "loss": 0.8795, "step": 1817 }, { "epoch": 0.008048165036079507, "grad_norm": 3.1057996517669304, "learning_rate": 8.048165036079509e-07, "loss": 0.8111, "step": 1818 }, { "epoch": 0.008052591969542697, "grad_norm": 3.046816893509643, "learning_rate": 8.052591969542699e-07, "loss": 0.838, "step": 1819 }, { "epoch": 0.008057018903005888, "grad_norm": 3.47687884221007, "learning_rate": 8.057018903005887e-07, "loss": 0.8881, "step": 1820 }, { "epoch": 0.008061445836469078, "grad_norm": 2.5129146893105654, "learning_rate": 8.061445836469078e-07, "loss": 0.7241, "step": 1821 }, { "epoch": 0.008065872769932267, "grad_norm": 3.0444191593402343, "learning_rate": 8.065872769932269e-07, "loss": 0.865, "step": 1822 }, { "epoch": 0.008070299703395458, "grad_norm": 4.153542495992499, "learning_rate": 8.070299703395458e-07, "loss": 1.5337, "step": 1823 }, { "epoch": 0.008074726636858648, "grad_norm": 2.6033919398829557, "learning_rate": 8.074726636858649e-07, "loss": 0.5063, "step": 1824 }, { "epoch": 0.008079153570321839, "grad_norm": 2.6687043767251355, "learning_rate": 8.079153570321839e-07, "loss": 0.5773, "step": 1825 }, { "epoch": 0.008083580503785028, "grad_norm": 3.583812623006863, "learning_rate": 8.083580503785029e-07, "loss": 0.9953, "step": 1826 }, { "epoch": 0.008088007437248218, "grad_norm": 2.787443009915901, "learning_rate": 8.088007437248219e-07, "loss": 0.8358, "step": 1827 }, { "epoch": 0.008092434370711409, "grad_norm": 3.4485553185432725, "learning_rate": 8.09243437071141e-07, "loss": 1.4433, "step": 1828 }, { "epoch": 0.008096861304174599, "grad_norm": 3.1617914144003603, "learning_rate": 8.096861304174598e-07, "loss": 1.1509, "step": 1829 }, { "epoch": 0.008101288237637788, "grad_norm": 3.1680293576275513, "learning_rate": 8.101288237637789e-07, "loss": 0.961, "step": 1830 }, { "epoch": 0.008105715171100978, "grad_norm": 3.372171203483678, "learning_rate": 8.105715171100979e-07, "loss": 1.1845, "step": 1831 }, { "epoch": 0.008110142104564169, "grad_norm": 3.2530597558941, "learning_rate": 8.110142104564169e-07, "loss": 0.7969, "step": 1832 }, { "epoch": 0.008114569038027358, "grad_norm": 4.844154301516813, "learning_rate": 8.114569038027359e-07, "loss": 1.1808, "step": 1833 }, { "epoch": 0.008118995971490548, "grad_norm": 2.8911404675787913, "learning_rate": 8.11899597149055e-07, "loss": 0.7065, "step": 1834 }, { "epoch": 0.008123422904953739, "grad_norm": 2.8464055964275277, "learning_rate": 8.123422904953739e-07, "loss": 0.6986, "step": 1835 }, { "epoch": 0.008127849838416929, "grad_norm": 3.0519714314291853, "learning_rate": 8.12784983841693e-07, "loss": 0.8954, "step": 1836 }, { "epoch": 0.008132276771880118, "grad_norm": 3.7515143505834376, "learning_rate": 8.13227677188012e-07, "loss": 0.955, "step": 1837 }, { "epoch": 0.008136703705343308, "grad_norm": 5.044814800074217, "learning_rate": 8.136703705343309e-07, "loss": 0.8341, "step": 1838 }, { "epoch": 0.008141130638806499, "grad_norm": 2.7895118966514247, "learning_rate": 8.141130638806499e-07, "loss": 0.7435, "step": 1839 }, { "epoch": 0.00814555757226969, "grad_norm": 2.3818119530574657, "learning_rate": 8.14555757226969e-07, "loss": 0.696, "step": 1840 }, { "epoch": 0.008149984505732878, "grad_norm": 5.047356702056676, "learning_rate": 8.149984505732879e-07, "loss": 1.3325, "step": 1841 }, { "epoch": 0.008154411439196069, "grad_norm": 2.8559888263105218, "learning_rate": 8.15441143919607e-07, "loss": 0.7302, "step": 1842 }, { "epoch": 0.00815883837265926, "grad_norm": 3.1487886855711484, "learning_rate": 8.15883837265926e-07, "loss": 1.0593, "step": 1843 }, { "epoch": 0.00816326530612245, "grad_norm": 3.39567107268577, "learning_rate": 8.163265306122449e-07, "loss": 1.1882, "step": 1844 }, { "epoch": 0.008167692239585638, "grad_norm": 2.4910483265187464, "learning_rate": 8.167692239585639e-07, "loss": 0.6911, "step": 1845 }, { "epoch": 0.008172119173048829, "grad_norm": 2.6779538455606744, "learning_rate": 8.17211917304883e-07, "loss": 0.8328, "step": 1846 }, { "epoch": 0.00817654610651202, "grad_norm": 3.73167687656161, "learning_rate": 8.176546106512019e-07, "loss": 1.1776, "step": 1847 }, { "epoch": 0.00818097303997521, "grad_norm": 3.3495799153879613, "learning_rate": 8.18097303997521e-07, "loss": 0.9732, "step": 1848 }, { "epoch": 0.008185399973438399, "grad_norm": 3.0604199803359275, "learning_rate": 8.1853999734384e-07, "loss": 0.9671, "step": 1849 }, { "epoch": 0.00818982690690159, "grad_norm": 3.296153944270268, "learning_rate": 8.18982690690159e-07, "loss": 1.3283, "step": 1850 }, { "epoch": 0.00819425384036478, "grad_norm": 2.70381207741663, "learning_rate": 8.19425384036478e-07, "loss": 0.8275, "step": 1851 }, { "epoch": 0.008198680773827969, "grad_norm": 2.686835547798736, "learning_rate": 8.198680773827971e-07, "loss": 1.0279, "step": 1852 }, { "epoch": 0.008203107707291159, "grad_norm": 2.5533072403926815, "learning_rate": 8.203107707291159e-07, "loss": 0.8079, "step": 1853 }, { "epoch": 0.00820753464075435, "grad_norm": 2.735653594098871, "learning_rate": 8.20753464075435e-07, "loss": 0.7144, "step": 1854 }, { "epoch": 0.00821196157421754, "grad_norm": 2.832210432818971, "learning_rate": 8.21196157421754e-07, "loss": 0.7897, "step": 1855 }, { "epoch": 0.008216388507680729, "grad_norm": 2.5565760507445523, "learning_rate": 8.21638850768073e-07, "loss": 0.8551, "step": 1856 }, { "epoch": 0.00822081544114392, "grad_norm": 4.437725820117307, "learning_rate": 8.22081544114392e-07, "loss": 1.2938, "step": 1857 }, { "epoch": 0.00822524237460711, "grad_norm": 3.0480394049195443, "learning_rate": 8.225242374607111e-07, "loss": 1.0151, "step": 1858 }, { "epoch": 0.0082296693080703, "grad_norm": 4.00094852557762, "learning_rate": 8.229669308070301e-07, "loss": 1.2512, "step": 1859 }, { "epoch": 0.00823409624153349, "grad_norm": 2.8286864918808736, "learning_rate": 8.234096241533491e-07, "loss": 0.6213, "step": 1860 }, { "epoch": 0.00823852317499668, "grad_norm": 3.142519742683193, "learning_rate": 8.238523174996681e-07, "loss": 1.0765, "step": 1861 }, { "epoch": 0.00824295010845987, "grad_norm": 2.8636197094240754, "learning_rate": 8.242950108459872e-07, "loss": 0.9706, "step": 1862 }, { "epoch": 0.00824737704192306, "grad_norm": 4.113627985183104, "learning_rate": 8.24737704192306e-07, "loss": 1.0481, "step": 1863 }, { "epoch": 0.00825180397538625, "grad_norm": 2.5633310192335745, "learning_rate": 8.251803975386251e-07, "loss": 0.7824, "step": 1864 }, { "epoch": 0.00825623090884944, "grad_norm": 2.7830372773309837, "learning_rate": 8.256230908849441e-07, "loss": 1.0221, "step": 1865 }, { "epoch": 0.00826065784231263, "grad_norm": 3.050615574772644, "learning_rate": 8.260657842312631e-07, "loss": 0.8153, "step": 1866 }, { "epoch": 0.00826508477577582, "grad_norm": 2.9552713084157287, "learning_rate": 8.265084775775821e-07, "loss": 0.7926, "step": 1867 }, { "epoch": 0.00826951170923901, "grad_norm": 3.2551478082971346, "learning_rate": 8.269511709239012e-07, "loss": 0.8812, "step": 1868 }, { "epoch": 0.0082739386427022, "grad_norm": 3.224217303577321, "learning_rate": 8.2739386427022e-07, "loss": 0.811, "step": 1869 }, { "epoch": 0.00827836557616539, "grad_norm": 4.024340554380324, "learning_rate": 8.278365576165391e-07, "loss": 1.5412, "step": 1870 }, { "epoch": 0.00828279250962858, "grad_norm": 2.9560478572785756, "learning_rate": 8.282792509628581e-07, "loss": 0.8462, "step": 1871 }, { "epoch": 0.00828721944309177, "grad_norm": 3.0760276184836743, "learning_rate": 8.287219443091771e-07, "loss": 0.8687, "step": 1872 }, { "epoch": 0.00829164637655496, "grad_norm": 3.1004716203497527, "learning_rate": 8.291646376554961e-07, "loss": 0.9869, "step": 1873 }, { "epoch": 0.008296073310018151, "grad_norm": 3.5457689373785826, "learning_rate": 8.296073310018152e-07, "loss": 1.0131, "step": 1874 }, { "epoch": 0.00830050024348134, "grad_norm": 2.6130683137489132, "learning_rate": 8.300500243481341e-07, "loss": 0.8969, "step": 1875 }, { "epoch": 0.00830492717694453, "grad_norm": 3.2354644344295607, "learning_rate": 8.304927176944532e-07, "loss": 0.9618, "step": 1876 }, { "epoch": 0.00830935411040772, "grad_norm": 3.0850098037030684, "learning_rate": 8.309354110407722e-07, "loss": 1.0608, "step": 1877 }, { "epoch": 0.008313781043870911, "grad_norm": 3.5532840290261327, "learning_rate": 8.313781043870911e-07, "loss": 1.2046, "step": 1878 }, { "epoch": 0.0083182079773341, "grad_norm": 2.5085370073013493, "learning_rate": 8.318207977334101e-07, "loss": 0.5346, "step": 1879 }, { "epoch": 0.00832263491079729, "grad_norm": 3.212130072932136, "learning_rate": 8.322634910797292e-07, "loss": 0.9452, "step": 1880 }, { "epoch": 0.008327061844260481, "grad_norm": 3.464709072711436, "learning_rate": 8.327061844260481e-07, "loss": 1.0183, "step": 1881 }, { "epoch": 0.008331488777723672, "grad_norm": 2.9529918244702156, "learning_rate": 8.331488777723672e-07, "loss": 0.787, "step": 1882 }, { "epoch": 0.00833591571118686, "grad_norm": 2.6892853488075086, "learning_rate": 8.335915711186862e-07, "loss": 0.8493, "step": 1883 }, { "epoch": 0.008340342644650051, "grad_norm": 2.5898987658846493, "learning_rate": 8.340342644650052e-07, "loss": 1.0833, "step": 1884 }, { "epoch": 0.008344769578113241, "grad_norm": 2.9064181971532284, "learning_rate": 8.344769578113242e-07, "loss": 0.7836, "step": 1885 }, { "epoch": 0.00834919651157643, "grad_norm": 4.234879962804699, "learning_rate": 8.349196511576433e-07, "loss": 1.0565, "step": 1886 }, { "epoch": 0.00835362344503962, "grad_norm": 2.731051703425381, "learning_rate": 8.353623445039621e-07, "loss": 1.0636, "step": 1887 }, { "epoch": 0.008358050378502811, "grad_norm": 3.2556210766108125, "learning_rate": 8.358050378502812e-07, "loss": 1.1978, "step": 1888 }, { "epoch": 0.008362477311966002, "grad_norm": 3.04775861861569, "learning_rate": 8.362477311966002e-07, "loss": 0.9462, "step": 1889 }, { "epoch": 0.00836690424542919, "grad_norm": 3.2970362822894645, "learning_rate": 8.366904245429192e-07, "loss": 1.2917, "step": 1890 }, { "epoch": 0.008371331178892381, "grad_norm": 2.9041089829813718, "learning_rate": 8.371331178892382e-07, "loss": 0.4521, "step": 1891 }, { "epoch": 0.008375758112355572, "grad_norm": 2.6273592822070015, "learning_rate": 8.375758112355573e-07, "loss": 0.7865, "step": 1892 }, { "epoch": 0.008380185045818762, "grad_norm": 3.5720654909604175, "learning_rate": 8.380185045818761e-07, "loss": 1.2382, "step": 1893 }, { "epoch": 0.00838461197928195, "grad_norm": 2.35414571695614, "learning_rate": 8.384611979281952e-07, "loss": 0.6774, "step": 1894 }, { "epoch": 0.008389038912745141, "grad_norm": 2.6949166810021348, "learning_rate": 8.389038912745142e-07, "loss": 0.9259, "step": 1895 }, { "epoch": 0.008393465846208332, "grad_norm": 3.8412493597807083, "learning_rate": 8.393465846208332e-07, "loss": 1.1107, "step": 1896 }, { "epoch": 0.008397892779671522, "grad_norm": 2.9566922045030064, "learning_rate": 8.397892779671522e-07, "loss": 0.86, "step": 1897 }, { "epoch": 0.008402319713134711, "grad_norm": 2.920214552419788, "learning_rate": 8.402319713134713e-07, "loss": 0.7931, "step": 1898 }, { "epoch": 0.008406746646597902, "grad_norm": 3.080334869329619, "learning_rate": 8.406746646597902e-07, "loss": 0.8963, "step": 1899 }, { "epoch": 0.008411173580061092, "grad_norm": 2.97991651712044, "learning_rate": 8.411173580061093e-07, "loss": 0.589, "step": 1900 }, { "epoch": 0.008415600513524281, "grad_norm": 2.6384080305627884, "learning_rate": 8.415600513524283e-07, "loss": 0.7661, "step": 1901 }, { "epoch": 0.008420027446987471, "grad_norm": 2.9990037133709073, "learning_rate": 8.420027446987472e-07, "loss": 0.7517, "step": 1902 }, { "epoch": 0.008424454380450662, "grad_norm": 2.7698435850229, "learning_rate": 8.424454380450662e-07, "loss": 0.8438, "step": 1903 }, { "epoch": 0.008428881313913852, "grad_norm": 3.0542110915640195, "learning_rate": 8.428881313913853e-07, "loss": 1.0738, "step": 1904 }, { "epoch": 0.008433308247377041, "grad_norm": 3.3180301930655323, "learning_rate": 8.433308247377042e-07, "loss": 0.967, "step": 1905 }, { "epoch": 0.008437735180840232, "grad_norm": 3.2064516090183828, "learning_rate": 8.437735180840233e-07, "loss": 0.9148, "step": 1906 }, { "epoch": 0.008442162114303422, "grad_norm": 2.655409824690159, "learning_rate": 8.442162114303423e-07, "loss": 0.7228, "step": 1907 }, { "epoch": 0.008446589047766613, "grad_norm": 3.502976250051058, "learning_rate": 8.446589047766613e-07, "loss": 0.5383, "step": 1908 }, { "epoch": 0.008451015981229801, "grad_norm": 3.1737768234979855, "learning_rate": 8.451015981229803e-07, "loss": 0.8229, "step": 1909 }, { "epoch": 0.008455442914692992, "grad_norm": 3.510529346483792, "learning_rate": 8.455442914692994e-07, "loss": 1.4926, "step": 1910 }, { "epoch": 0.008459869848156183, "grad_norm": 2.8824112613917587, "learning_rate": 8.459869848156182e-07, "loss": 0.7172, "step": 1911 }, { "epoch": 0.008464296781619373, "grad_norm": 2.9859992432691844, "learning_rate": 8.464296781619373e-07, "loss": 0.6158, "step": 1912 }, { "epoch": 0.008468723715082562, "grad_norm": 3.143850345868074, "learning_rate": 8.468723715082563e-07, "loss": 0.8632, "step": 1913 }, { "epoch": 0.008473150648545752, "grad_norm": 4.256155430412167, "learning_rate": 8.473150648545753e-07, "loss": 1.3924, "step": 1914 }, { "epoch": 0.008477577582008943, "grad_norm": 3.493212584180437, "learning_rate": 8.477577582008943e-07, "loss": 1.1688, "step": 1915 }, { "epoch": 0.008482004515472133, "grad_norm": 2.680701491232683, "learning_rate": 8.482004515472134e-07, "loss": 0.9507, "step": 1916 }, { "epoch": 0.008486431448935322, "grad_norm": 2.6064192965496025, "learning_rate": 8.486431448935322e-07, "loss": 0.8497, "step": 1917 }, { "epoch": 0.008490858382398513, "grad_norm": 2.6074889043505523, "learning_rate": 8.490858382398513e-07, "loss": 0.4985, "step": 1918 }, { "epoch": 0.008495285315861703, "grad_norm": 3.084004607364423, "learning_rate": 8.495285315861703e-07, "loss": 1.0617, "step": 1919 }, { "epoch": 0.008499712249324892, "grad_norm": 3.0778275541546796, "learning_rate": 8.499712249324893e-07, "loss": 0.9114, "step": 1920 }, { "epoch": 0.008504139182788082, "grad_norm": 2.83810688209872, "learning_rate": 8.504139182788083e-07, "loss": 0.7618, "step": 1921 }, { "epoch": 0.008508566116251273, "grad_norm": 2.869064703738274, "learning_rate": 8.508566116251274e-07, "loss": 0.8388, "step": 1922 }, { "epoch": 0.008512993049714463, "grad_norm": 2.9549146556095014, "learning_rate": 8.512993049714463e-07, "loss": 0.5003, "step": 1923 }, { "epoch": 0.008517419983177652, "grad_norm": 2.9186693907522483, "learning_rate": 8.517419983177654e-07, "loss": 0.6544, "step": 1924 }, { "epoch": 0.008521846916640843, "grad_norm": 2.7087869795943984, "learning_rate": 8.521846916640844e-07, "loss": 0.9174, "step": 1925 }, { "epoch": 0.008526273850104033, "grad_norm": 3.6677493971070163, "learning_rate": 8.526273850104033e-07, "loss": 1.0295, "step": 1926 }, { "epoch": 0.008530700783567224, "grad_norm": 3.5663050600435175, "learning_rate": 8.530700783567223e-07, "loss": 1.0615, "step": 1927 }, { "epoch": 0.008535127717030412, "grad_norm": 2.759743952919974, "learning_rate": 8.535127717030414e-07, "loss": 0.9147, "step": 1928 }, { "epoch": 0.008539554650493603, "grad_norm": 2.929396749871279, "learning_rate": 8.539554650493603e-07, "loss": 1.0428, "step": 1929 }, { "epoch": 0.008543981583956793, "grad_norm": 3.0639768637102494, "learning_rate": 8.543981583956794e-07, "loss": 0.8444, "step": 1930 }, { "epoch": 0.008548408517419984, "grad_norm": 3.3783805861113887, "learning_rate": 8.548408517419984e-07, "loss": 1.2734, "step": 1931 }, { "epoch": 0.008552835450883173, "grad_norm": 3.605855635808594, "learning_rate": 8.552835450883174e-07, "loss": 1.2379, "step": 1932 }, { "epoch": 0.008557262384346363, "grad_norm": 3.6827924331681383, "learning_rate": 8.557262384346364e-07, "loss": 1.1106, "step": 1933 }, { "epoch": 0.008561689317809554, "grad_norm": 3.2949239027466937, "learning_rate": 8.561689317809555e-07, "loss": 0.7617, "step": 1934 }, { "epoch": 0.008566116251272743, "grad_norm": 2.8850277811324228, "learning_rate": 8.566116251272743e-07, "loss": 0.7429, "step": 1935 }, { "epoch": 0.008570543184735933, "grad_norm": 3.298783157773112, "learning_rate": 8.570543184735934e-07, "loss": 0.9235, "step": 1936 }, { "epoch": 0.008574970118199124, "grad_norm": 2.737350769028345, "learning_rate": 8.574970118199124e-07, "loss": 1.0177, "step": 1937 }, { "epoch": 0.008579397051662314, "grad_norm": 3.0788428731527797, "learning_rate": 8.579397051662314e-07, "loss": 1.0373, "step": 1938 }, { "epoch": 0.008583823985125503, "grad_norm": 3.092244308099947, "learning_rate": 8.583823985125504e-07, "loss": 0.9381, "step": 1939 }, { "epoch": 0.008588250918588693, "grad_norm": 2.875328282456437, "learning_rate": 8.588250918588695e-07, "loss": 0.874, "step": 1940 }, { "epoch": 0.008592677852051884, "grad_norm": 3.26101026039975, "learning_rate": 8.592677852051883e-07, "loss": 1.0897, "step": 1941 }, { "epoch": 0.008597104785515074, "grad_norm": 2.844175102733914, "learning_rate": 8.597104785515074e-07, "loss": 0.872, "step": 1942 }, { "epoch": 0.008601531718978263, "grad_norm": 3.0705914434003243, "learning_rate": 8.601531718978264e-07, "loss": 0.9213, "step": 1943 }, { "epoch": 0.008605958652441454, "grad_norm": 3.463870859187011, "learning_rate": 8.605958652441454e-07, "loss": 0.7181, "step": 1944 }, { "epoch": 0.008610385585904644, "grad_norm": 2.7766549891516568, "learning_rate": 8.610385585904644e-07, "loss": 0.6971, "step": 1945 }, { "epoch": 0.008614812519367835, "grad_norm": 3.942433810620836, "learning_rate": 8.614812519367835e-07, "loss": 1.0615, "step": 1946 }, { "epoch": 0.008619239452831023, "grad_norm": 4.127340169592094, "learning_rate": 8.619239452831024e-07, "loss": 0.9643, "step": 1947 }, { "epoch": 0.008623666386294214, "grad_norm": 4.565941710186617, "learning_rate": 8.623666386294215e-07, "loss": 1.2053, "step": 1948 }, { "epoch": 0.008628093319757404, "grad_norm": 2.775237612316672, "learning_rate": 8.628093319757405e-07, "loss": 0.8126, "step": 1949 }, { "epoch": 0.008632520253220593, "grad_norm": 3.5787509439890237, "learning_rate": 8.632520253220594e-07, "loss": 0.921, "step": 1950 }, { "epoch": 0.008636947186683784, "grad_norm": 2.8778191214638693, "learning_rate": 8.636947186683784e-07, "loss": 1.0323, "step": 1951 }, { "epoch": 0.008641374120146974, "grad_norm": 3.442275514609883, "learning_rate": 8.641374120146975e-07, "loss": 1.0497, "step": 1952 }, { "epoch": 0.008645801053610165, "grad_norm": 3.0552627048461285, "learning_rate": 8.645801053610164e-07, "loss": 0.7774, "step": 1953 }, { "epoch": 0.008650227987073354, "grad_norm": 3.0453657167020296, "learning_rate": 8.650227987073355e-07, "loss": 0.8732, "step": 1954 }, { "epoch": 0.008654654920536544, "grad_norm": 4.0592828688259806, "learning_rate": 8.654654920536545e-07, "loss": 1.5721, "step": 1955 }, { "epoch": 0.008659081853999735, "grad_norm": 3.3626024337979854, "learning_rate": 8.659081853999735e-07, "loss": 0.8339, "step": 1956 }, { "epoch": 0.008663508787462925, "grad_norm": 3.5180385981739937, "learning_rate": 8.663508787462925e-07, "loss": 0.6696, "step": 1957 }, { "epoch": 0.008667935720926114, "grad_norm": 2.938872303486695, "learning_rate": 8.667935720926116e-07, "loss": 0.754, "step": 1958 }, { "epoch": 0.008672362654389304, "grad_norm": 2.3813869825652656, "learning_rate": 8.672362654389304e-07, "loss": 0.6481, "step": 1959 }, { "epoch": 0.008676789587852495, "grad_norm": 2.863186641882317, "learning_rate": 8.676789587852495e-07, "loss": 0.9955, "step": 1960 }, { "epoch": 0.008681216521315685, "grad_norm": 3.1848849908769874, "learning_rate": 8.681216521315685e-07, "loss": 0.6804, "step": 1961 }, { "epoch": 0.008685643454778874, "grad_norm": 3.1539360915866435, "learning_rate": 8.685643454778875e-07, "loss": 0.7694, "step": 1962 }, { "epoch": 0.008690070388242065, "grad_norm": 2.675028834988208, "learning_rate": 8.690070388242065e-07, "loss": 0.6569, "step": 1963 }, { "epoch": 0.008694497321705255, "grad_norm": 3.612822727857058, "learning_rate": 8.694497321705256e-07, "loss": 0.7917, "step": 1964 }, { "epoch": 0.008698924255168446, "grad_norm": 2.7521055111689194, "learning_rate": 8.698924255168445e-07, "loss": 0.5741, "step": 1965 }, { "epoch": 0.008703351188631634, "grad_norm": 4.351181697114101, "learning_rate": 8.703351188631636e-07, "loss": 1.0133, "step": 1966 }, { "epoch": 0.008707778122094825, "grad_norm": 2.4982576933768397, "learning_rate": 8.707778122094825e-07, "loss": 0.7973, "step": 1967 }, { "epoch": 0.008712205055558015, "grad_norm": 3.753745104220334, "learning_rate": 8.712205055558015e-07, "loss": 1.0515, "step": 1968 }, { "epoch": 0.008716631989021204, "grad_norm": 2.9898674839648622, "learning_rate": 8.716631989021205e-07, "loss": 1.1903, "step": 1969 }, { "epoch": 0.008721058922484395, "grad_norm": 2.9181795165121542, "learning_rate": 8.721058922484396e-07, "loss": 0.7756, "step": 1970 }, { "epoch": 0.008725485855947585, "grad_norm": 3.0264464492564724, "learning_rate": 8.725485855947585e-07, "loss": 0.7684, "step": 1971 }, { "epoch": 0.008729912789410776, "grad_norm": 3.9922255213788302, "learning_rate": 8.729912789410776e-07, "loss": 0.9018, "step": 1972 }, { "epoch": 0.008734339722873964, "grad_norm": 3.4849685998677997, "learning_rate": 8.734339722873967e-07, "loss": 0.9265, "step": 1973 }, { "epoch": 0.008738766656337155, "grad_norm": 3.7339199401739305, "learning_rate": 8.738766656337155e-07, "loss": 0.8701, "step": 1974 }, { "epoch": 0.008743193589800346, "grad_norm": 3.4227722235235154, "learning_rate": 8.743193589800345e-07, "loss": 0.9538, "step": 1975 }, { "epoch": 0.008747620523263536, "grad_norm": 2.607739452153745, "learning_rate": 8.747620523263536e-07, "loss": 0.7969, "step": 1976 }, { "epoch": 0.008752047456726725, "grad_norm": 2.819092838543546, "learning_rate": 8.752047456726725e-07, "loss": 1.0061, "step": 1977 }, { "epoch": 0.008756474390189915, "grad_norm": 3.3112289581993757, "learning_rate": 8.756474390189916e-07, "loss": 0.9354, "step": 1978 }, { "epoch": 0.008760901323653106, "grad_norm": 3.450970883463057, "learning_rate": 8.760901323653107e-07, "loss": 1.044, "step": 1979 }, { "epoch": 0.008765328257116296, "grad_norm": 3.6776113054859243, "learning_rate": 8.765328257116296e-07, "loss": 0.7926, "step": 1980 }, { "epoch": 0.008769755190579485, "grad_norm": 2.8962397923920893, "learning_rate": 8.769755190579487e-07, "loss": 0.7496, "step": 1981 }, { "epoch": 0.008774182124042676, "grad_norm": 2.974802728951602, "learning_rate": 8.774182124042677e-07, "loss": 1.0464, "step": 1982 }, { "epoch": 0.008778609057505866, "grad_norm": 3.0805718032906215, "learning_rate": 8.778609057505865e-07, "loss": 0.9407, "step": 1983 }, { "epoch": 0.008783035990969055, "grad_norm": 2.833444230005629, "learning_rate": 8.783035990969056e-07, "loss": 0.9415, "step": 1984 }, { "epoch": 0.008787462924432245, "grad_norm": 3.1943248086884783, "learning_rate": 8.787462924432247e-07, "loss": 0.6373, "step": 1985 }, { "epoch": 0.008791889857895436, "grad_norm": 3.003319187235004, "learning_rate": 8.791889857895436e-07, "loss": 0.8408, "step": 1986 }, { "epoch": 0.008796316791358626, "grad_norm": 3.496780433918409, "learning_rate": 8.796316791358627e-07, "loss": 0.9404, "step": 1987 }, { "epoch": 0.008800743724821815, "grad_norm": 2.9549241595081175, "learning_rate": 8.800743724821817e-07, "loss": 0.9993, "step": 1988 }, { "epoch": 0.008805170658285006, "grad_norm": 3.336058095341366, "learning_rate": 8.805170658285007e-07, "loss": 0.92, "step": 1989 }, { "epoch": 0.008809597591748196, "grad_norm": 2.7429515681412404, "learning_rate": 8.809597591748197e-07, "loss": 0.69, "step": 1990 }, { "epoch": 0.008814024525211387, "grad_norm": 3.9062478280740818, "learning_rate": 8.814024525211388e-07, "loss": 1.1474, "step": 1991 }, { "epoch": 0.008818451458674575, "grad_norm": 3.064336185051802, "learning_rate": 8.818451458674576e-07, "loss": 0.7333, "step": 1992 }, { "epoch": 0.008822878392137766, "grad_norm": 3.215037881564547, "learning_rate": 8.822878392137767e-07, "loss": 0.9184, "step": 1993 }, { "epoch": 0.008827305325600956, "grad_norm": 4.031565560799782, "learning_rate": 8.827305325600957e-07, "loss": 1.0853, "step": 1994 }, { "epoch": 0.008831732259064147, "grad_norm": 4.055248507566793, "learning_rate": 8.831732259064147e-07, "loss": 1.3216, "step": 1995 }, { "epoch": 0.008836159192527336, "grad_norm": 2.694966172238666, "learning_rate": 8.836159192527337e-07, "loss": 1.0449, "step": 1996 }, { "epoch": 0.008840586125990526, "grad_norm": 3.605009610424858, "learning_rate": 8.840586125990528e-07, "loss": 1.14, "step": 1997 }, { "epoch": 0.008845013059453717, "grad_norm": 2.5726576563655748, "learning_rate": 8.845013059453716e-07, "loss": 0.4099, "step": 1998 }, { "epoch": 0.008849439992916907, "grad_norm": 3.2884305760255184, "learning_rate": 8.849439992916907e-07, "loss": 1.2325, "step": 1999 }, { "epoch": 0.008853866926380096, "grad_norm": 3.6818181680895834, "learning_rate": 8.853866926380097e-07, "loss": 0.9437, "step": 2000 }, { "epoch": 0.008858293859843287, "grad_norm": 3.220277766883052, "learning_rate": 8.858293859843287e-07, "loss": 1.1009, "step": 2001 }, { "epoch": 0.008862720793306477, "grad_norm": 3.4048524033248575, "learning_rate": 8.862720793306477e-07, "loss": 0.9801, "step": 2002 }, { "epoch": 0.008867147726769666, "grad_norm": 2.598089707340574, "learning_rate": 8.867147726769668e-07, "loss": 0.5651, "step": 2003 }, { "epoch": 0.008871574660232856, "grad_norm": 2.918395332732907, "learning_rate": 8.871574660232857e-07, "loss": 1.0275, "step": 2004 }, { "epoch": 0.008876001593696047, "grad_norm": 2.8353081011050847, "learning_rate": 8.876001593696048e-07, "loss": 0.8116, "step": 2005 }, { "epoch": 0.008880428527159237, "grad_norm": 3.8338566489627937, "learning_rate": 8.880428527159238e-07, "loss": 0.797, "step": 2006 }, { "epoch": 0.008884855460622426, "grad_norm": 4.011220823046523, "learning_rate": 8.884855460622429e-07, "loss": 1.1103, "step": 2007 }, { "epoch": 0.008889282394085617, "grad_norm": 2.4520450366843387, "learning_rate": 8.889282394085617e-07, "loss": 0.589, "step": 2008 }, { "epoch": 0.008893709327548807, "grad_norm": 2.9100716627679866, "learning_rate": 8.893709327548808e-07, "loss": 0.9127, "step": 2009 }, { "epoch": 0.008898136261011998, "grad_norm": 2.6764829890707342, "learning_rate": 8.898136261011998e-07, "loss": 0.5372, "step": 2010 }, { "epoch": 0.008902563194475186, "grad_norm": 3.0056707602485657, "learning_rate": 8.902563194475188e-07, "loss": 1.066, "step": 2011 }, { "epoch": 0.008906990127938377, "grad_norm": 4.3217264485854665, "learning_rate": 8.906990127938378e-07, "loss": 1.2212, "step": 2012 }, { "epoch": 0.008911417061401567, "grad_norm": 2.686194796812086, "learning_rate": 8.911417061401569e-07, "loss": 0.7981, "step": 2013 }, { "epoch": 0.008915843994864758, "grad_norm": 2.905448022236443, "learning_rate": 8.915843994864758e-07, "loss": 0.8401, "step": 2014 }, { "epoch": 0.008920270928327947, "grad_norm": 3.929506943051783, "learning_rate": 8.920270928327949e-07, "loss": 1.0516, "step": 2015 }, { "epoch": 0.008924697861791137, "grad_norm": 3.0211747924772023, "learning_rate": 8.924697861791139e-07, "loss": 0.8122, "step": 2016 }, { "epoch": 0.008929124795254328, "grad_norm": 2.378365945365694, "learning_rate": 8.929124795254328e-07, "loss": 0.7451, "step": 2017 }, { "epoch": 0.008933551728717517, "grad_norm": 2.9549496816974616, "learning_rate": 8.933551728717518e-07, "loss": 0.6554, "step": 2018 }, { "epoch": 0.008937978662180707, "grad_norm": 3.17225403303715, "learning_rate": 8.937978662180709e-07, "loss": 0.7796, "step": 2019 }, { "epoch": 0.008942405595643898, "grad_norm": 2.914071288627282, "learning_rate": 8.942405595643898e-07, "loss": 0.7885, "step": 2020 }, { "epoch": 0.008946832529107088, "grad_norm": 2.8455144568486728, "learning_rate": 8.946832529107089e-07, "loss": 0.828, "step": 2021 }, { "epoch": 0.008951259462570277, "grad_norm": 4.141697080333425, "learning_rate": 8.951259462570279e-07, "loss": 0.8277, "step": 2022 }, { "epoch": 0.008955686396033467, "grad_norm": 3.1712460268076414, "learning_rate": 8.955686396033468e-07, "loss": 0.8619, "step": 2023 }, { "epoch": 0.008960113329496658, "grad_norm": 3.3748842604225984, "learning_rate": 8.960113329496658e-07, "loss": 0.8772, "step": 2024 }, { "epoch": 0.008964540262959848, "grad_norm": 3.676837877209716, "learning_rate": 8.964540262959849e-07, "loss": 0.8698, "step": 2025 }, { "epoch": 0.008968967196423037, "grad_norm": 3.7813447460250895, "learning_rate": 8.968967196423038e-07, "loss": 0.5826, "step": 2026 }, { "epoch": 0.008973394129886228, "grad_norm": 2.72569842707075, "learning_rate": 8.973394129886229e-07, "loss": 0.9009, "step": 2027 }, { "epoch": 0.008977821063349418, "grad_norm": 3.651374059809126, "learning_rate": 8.977821063349419e-07, "loss": 0.9557, "step": 2028 }, { "epoch": 0.008982247996812609, "grad_norm": 3.906219467113075, "learning_rate": 8.982247996812609e-07, "loss": 1.1456, "step": 2029 }, { "epoch": 0.008986674930275797, "grad_norm": 3.7501453344267013, "learning_rate": 8.986674930275799e-07, "loss": 1.2021, "step": 2030 }, { "epoch": 0.008991101863738988, "grad_norm": 2.6930878944598877, "learning_rate": 8.99110186373899e-07, "loss": 0.9272, "step": 2031 }, { "epoch": 0.008995528797202178, "grad_norm": 2.817391492799896, "learning_rate": 8.995528797202178e-07, "loss": 0.6711, "step": 2032 }, { "epoch": 0.008999955730665369, "grad_norm": 3.5937744628287533, "learning_rate": 8.999955730665369e-07, "loss": 0.9145, "step": 2033 }, { "epoch": 0.009004382664128558, "grad_norm": 3.1614297845894095, "learning_rate": 9.004382664128559e-07, "loss": 0.9323, "step": 2034 }, { "epoch": 0.009008809597591748, "grad_norm": 2.671277788825994, "learning_rate": 9.008809597591749e-07, "loss": 0.5654, "step": 2035 }, { "epoch": 0.009013236531054939, "grad_norm": 2.9666159000839913, "learning_rate": 9.013236531054939e-07, "loss": 0.7388, "step": 2036 }, { "epoch": 0.009017663464518128, "grad_norm": 3.261136430322326, "learning_rate": 9.01766346451813e-07, "loss": 0.898, "step": 2037 }, { "epoch": 0.009022090397981318, "grad_norm": 2.9656473062189694, "learning_rate": 9.022090397981319e-07, "loss": 1.0686, "step": 2038 }, { "epoch": 0.009026517331444509, "grad_norm": 2.790627360064313, "learning_rate": 9.02651733144451e-07, "loss": 0.6269, "step": 2039 }, { "epoch": 0.009030944264907699, "grad_norm": 3.9688719508931998, "learning_rate": 9.0309442649077e-07, "loss": 1.0025, "step": 2040 }, { "epoch": 0.009035371198370888, "grad_norm": 2.469567039844065, "learning_rate": 9.035371198370889e-07, "loss": 0.667, "step": 2041 }, { "epoch": 0.009039798131834078, "grad_norm": 3.3580006097270445, "learning_rate": 9.039798131834079e-07, "loss": 1.0116, "step": 2042 }, { "epoch": 0.009044225065297269, "grad_norm": 3.4025110626038653, "learning_rate": 9.04422506529727e-07, "loss": 1.0841, "step": 2043 }, { "epoch": 0.00904865199876046, "grad_norm": 2.70359727206953, "learning_rate": 9.048651998760459e-07, "loss": 0.7915, "step": 2044 }, { "epoch": 0.009053078932223648, "grad_norm": 3.666309475334056, "learning_rate": 9.05307893222365e-07, "loss": 0.9855, "step": 2045 }, { "epoch": 0.009057505865686839, "grad_norm": 3.0254553916191878, "learning_rate": 9.05750586568684e-07, "loss": 0.9896, "step": 2046 }, { "epoch": 0.009061932799150029, "grad_norm": 3.2977031221792568, "learning_rate": 9.061932799150029e-07, "loss": 1.1563, "step": 2047 }, { "epoch": 0.00906635973261322, "grad_norm": 2.7190218025048813, "learning_rate": 9.066359732613219e-07, "loss": 0.8863, "step": 2048 }, { "epoch": 0.009070786666076408, "grad_norm": 2.900815597219237, "learning_rate": 9.07078666607641e-07, "loss": 0.6305, "step": 2049 }, { "epoch": 0.009075213599539599, "grad_norm": 2.913809906846437, "learning_rate": 9.075213599539599e-07, "loss": 0.847, "step": 2050 }, { "epoch": 0.00907964053300279, "grad_norm": 3.821744941972548, "learning_rate": 9.07964053300279e-07, "loss": 1.3138, "step": 2051 }, { "epoch": 0.009084067466465978, "grad_norm": 2.7392896852887203, "learning_rate": 9.08406746646598e-07, "loss": 0.9024, "step": 2052 }, { "epoch": 0.009088494399929169, "grad_norm": 3.2645426136658626, "learning_rate": 9.08849439992917e-07, "loss": 1.0088, "step": 2053 }, { "epoch": 0.00909292133339236, "grad_norm": 2.8492601344907302, "learning_rate": 9.09292133339236e-07, "loss": 0.6762, "step": 2054 }, { "epoch": 0.00909734826685555, "grad_norm": 3.5007020556721664, "learning_rate": 9.097348266855551e-07, "loss": 1.0003, "step": 2055 }, { "epoch": 0.009101775200318738, "grad_norm": 5.230800576444354, "learning_rate": 9.101775200318739e-07, "loss": 1.3362, "step": 2056 }, { "epoch": 0.009106202133781929, "grad_norm": 2.735214563300016, "learning_rate": 9.10620213378193e-07, "loss": 0.7083, "step": 2057 }, { "epoch": 0.00911062906724512, "grad_norm": 2.4644902768866292, "learning_rate": 9.11062906724512e-07, "loss": 1.0227, "step": 2058 }, { "epoch": 0.00911505600070831, "grad_norm": 3.515347966617324, "learning_rate": 9.11505600070831e-07, "loss": 0.9112, "step": 2059 }, { "epoch": 0.009119482934171499, "grad_norm": 2.7769904130641603, "learning_rate": 9.1194829341715e-07, "loss": 1.0072, "step": 2060 }, { "epoch": 0.00912390986763469, "grad_norm": 2.7483002707957755, "learning_rate": 9.123909867634691e-07, "loss": 1.0915, "step": 2061 }, { "epoch": 0.00912833680109788, "grad_norm": 3.697818028905094, "learning_rate": 9.12833680109788e-07, "loss": 0.734, "step": 2062 }, { "epoch": 0.00913276373456107, "grad_norm": 3.689914512542722, "learning_rate": 9.132763734561071e-07, "loss": 0.8842, "step": 2063 }, { "epoch": 0.009137190668024259, "grad_norm": 3.1308794224897407, "learning_rate": 9.137190668024261e-07, "loss": 0.8748, "step": 2064 }, { "epoch": 0.00914161760148745, "grad_norm": 3.5989351568825265, "learning_rate": 9.14161760148745e-07, "loss": 1.1359, "step": 2065 }, { "epoch": 0.00914604453495064, "grad_norm": 2.9139115870251686, "learning_rate": 9.14604453495064e-07, "loss": 0.8893, "step": 2066 }, { "epoch": 0.00915047146841383, "grad_norm": 3.1744279354233007, "learning_rate": 9.150471468413831e-07, "loss": 0.6513, "step": 2067 }, { "epoch": 0.00915489840187702, "grad_norm": 2.925289144328851, "learning_rate": 9.15489840187702e-07, "loss": 0.863, "step": 2068 }, { "epoch": 0.00915932533534021, "grad_norm": 3.4028200230934957, "learning_rate": 9.159325335340211e-07, "loss": 0.776, "step": 2069 }, { "epoch": 0.0091637522688034, "grad_norm": 3.0878435187955016, "learning_rate": 9.163752268803401e-07, "loss": 0.6234, "step": 2070 }, { "epoch": 0.00916817920226659, "grad_norm": 3.1281364533549385, "learning_rate": 9.16817920226659e-07, "loss": 1.1769, "step": 2071 }, { "epoch": 0.00917260613572978, "grad_norm": 2.56722720379689, "learning_rate": 9.17260613572978e-07, "loss": 0.4608, "step": 2072 }, { "epoch": 0.00917703306919297, "grad_norm": 3.2695338093749298, "learning_rate": 9.177033069192971e-07, "loss": 0.7216, "step": 2073 }, { "epoch": 0.00918146000265616, "grad_norm": 2.863868447600463, "learning_rate": 9.18146000265616e-07, "loss": 0.9338, "step": 2074 }, { "epoch": 0.00918588693611935, "grad_norm": 3.714210336287793, "learning_rate": 9.185886936119351e-07, "loss": 0.9509, "step": 2075 }, { "epoch": 0.00919031386958254, "grad_norm": 2.894889925248796, "learning_rate": 9.190313869582541e-07, "loss": 0.8077, "step": 2076 }, { "epoch": 0.00919474080304573, "grad_norm": 3.0063699342938324, "learning_rate": 9.194740803045731e-07, "loss": 0.9518, "step": 2077 }, { "epoch": 0.009199167736508921, "grad_norm": 3.703367072558871, "learning_rate": 9.199167736508921e-07, "loss": 0.9118, "step": 2078 }, { "epoch": 0.00920359466997211, "grad_norm": 3.2534353087681116, "learning_rate": 9.203594669972112e-07, "loss": 0.9649, "step": 2079 }, { "epoch": 0.0092080216034353, "grad_norm": 3.5304535547158906, "learning_rate": 9.2080216034353e-07, "loss": 1.1935, "step": 2080 }, { "epoch": 0.00921244853689849, "grad_norm": 3.070627286641121, "learning_rate": 9.212448536898491e-07, "loss": 0.8225, "step": 2081 }, { "epoch": 0.009216875470361681, "grad_norm": 2.622327342954354, "learning_rate": 9.216875470361681e-07, "loss": 0.689, "step": 2082 }, { "epoch": 0.00922130240382487, "grad_norm": 2.8990959337963527, "learning_rate": 9.221302403824871e-07, "loss": 0.7457, "step": 2083 }, { "epoch": 0.00922572933728806, "grad_norm": 3.514653499295582, "learning_rate": 9.225729337288061e-07, "loss": 0.8696, "step": 2084 }, { "epoch": 0.009230156270751251, "grad_norm": 2.668909910569488, "learning_rate": 9.230156270751252e-07, "loss": 0.6821, "step": 2085 }, { "epoch": 0.00923458320421444, "grad_norm": 2.8072702045725166, "learning_rate": 9.234583204214441e-07, "loss": 0.6006, "step": 2086 }, { "epoch": 0.00923901013767763, "grad_norm": 2.6567304147250654, "learning_rate": 9.239010137677632e-07, "loss": 1.0219, "step": 2087 }, { "epoch": 0.00924343707114082, "grad_norm": 4.70378301960309, "learning_rate": 9.243437071140822e-07, "loss": 1.3064, "step": 2088 }, { "epoch": 0.009247864004604011, "grad_norm": 2.7419682756167125, "learning_rate": 9.247864004604011e-07, "loss": 0.8868, "step": 2089 }, { "epoch": 0.0092522909380672, "grad_norm": 3.5350881530328753, "learning_rate": 9.252290938067201e-07, "loss": 0.7045, "step": 2090 }, { "epoch": 0.00925671787153039, "grad_norm": 3.051772669842008, "learning_rate": 9.256717871530392e-07, "loss": 0.8163, "step": 2091 }, { "epoch": 0.009261144804993581, "grad_norm": 2.6258937708991206, "learning_rate": 9.261144804993581e-07, "loss": 0.8498, "step": 2092 }, { "epoch": 0.009265571738456772, "grad_norm": 3.0951774031599317, "learning_rate": 9.265571738456772e-07, "loss": 0.8001, "step": 2093 }, { "epoch": 0.00926999867191996, "grad_norm": 3.2661546088723084, "learning_rate": 9.269998671919962e-07, "loss": 1.0028, "step": 2094 }, { "epoch": 0.009274425605383151, "grad_norm": 3.5859011706804997, "learning_rate": 9.274425605383152e-07, "loss": 0.6498, "step": 2095 }, { "epoch": 0.009278852538846341, "grad_norm": 2.8437061204499963, "learning_rate": 9.278852538846341e-07, "loss": 0.7869, "step": 2096 }, { "epoch": 0.009283279472309532, "grad_norm": 3.216674125790708, "learning_rate": 9.283279472309532e-07, "loss": 1.0723, "step": 2097 }, { "epoch": 0.00928770640577272, "grad_norm": 3.273664549969423, "learning_rate": 9.287706405772721e-07, "loss": 1.0909, "step": 2098 }, { "epoch": 0.009292133339235911, "grad_norm": 2.582490937991697, "learning_rate": 9.292133339235912e-07, "loss": 0.7473, "step": 2099 }, { "epoch": 0.009296560272699102, "grad_norm": 3.964399968188928, "learning_rate": 9.296560272699102e-07, "loss": 0.6673, "step": 2100 }, { "epoch": 0.009300987206162292, "grad_norm": 2.711341697278202, "learning_rate": 9.300987206162292e-07, "loss": 0.7039, "step": 2101 }, { "epoch": 0.009305414139625481, "grad_norm": 3.06255689836713, "learning_rate": 9.305414139625482e-07, "loss": 1.1205, "step": 2102 }, { "epoch": 0.009309841073088672, "grad_norm": 3.117462705807063, "learning_rate": 9.309841073088673e-07, "loss": 1.0295, "step": 2103 }, { "epoch": 0.009314268006551862, "grad_norm": 3.2328711718414014, "learning_rate": 9.314268006551861e-07, "loss": 0.7456, "step": 2104 }, { "epoch": 0.00931869494001505, "grad_norm": 3.095814361989625, "learning_rate": 9.318694940015052e-07, "loss": 0.8268, "step": 2105 }, { "epoch": 0.009323121873478241, "grad_norm": 2.7773612579600533, "learning_rate": 9.323121873478242e-07, "loss": 0.6461, "step": 2106 }, { "epoch": 0.009327548806941432, "grad_norm": 3.187312278614201, "learning_rate": 9.327548806941432e-07, "loss": 0.9559, "step": 2107 }, { "epoch": 0.009331975740404622, "grad_norm": 2.63370783338845, "learning_rate": 9.331975740404622e-07, "loss": 0.9244, "step": 2108 }, { "epoch": 0.009336402673867811, "grad_norm": 2.891482102226985, "learning_rate": 9.336402673867813e-07, "loss": 0.6056, "step": 2109 }, { "epoch": 0.009340829607331002, "grad_norm": 2.783356597392846, "learning_rate": 9.340829607331002e-07, "loss": 0.951, "step": 2110 }, { "epoch": 0.009345256540794192, "grad_norm": 3.205416203507934, "learning_rate": 9.345256540794193e-07, "loss": 1.0525, "step": 2111 }, { "epoch": 0.009349683474257383, "grad_norm": 2.9270348127066574, "learning_rate": 9.349683474257383e-07, "loss": 0.8644, "step": 2112 }, { "epoch": 0.009354110407720571, "grad_norm": 2.878110604793606, "learning_rate": 9.354110407720572e-07, "loss": 0.6801, "step": 2113 }, { "epoch": 0.009358537341183762, "grad_norm": 2.6309559036740766, "learning_rate": 9.358537341183762e-07, "loss": 0.7682, "step": 2114 }, { "epoch": 0.009362964274646952, "grad_norm": 4.653866430024475, "learning_rate": 9.362964274646953e-07, "loss": 1.2364, "step": 2115 }, { "epoch": 0.009367391208110143, "grad_norm": 2.6490422326127527, "learning_rate": 9.367391208110142e-07, "loss": 0.6771, "step": 2116 }, { "epoch": 0.009371818141573332, "grad_norm": 3.1229883086046923, "learning_rate": 9.371818141573333e-07, "loss": 0.8235, "step": 2117 }, { "epoch": 0.009376245075036522, "grad_norm": 2.6544696145579834, "learning_rate": 9.376245075036523e-07, "loss": 0.6782, "step": 2118 }, { "epoch": 0.009380672008499713, "grad_norm": 4.572566846921016, "learning_rate": 9.380672008499713e-07, "loss": 1.0982, "step": 2119 }, { "epoch": 0.009385098941962901, "grad_norm": 3.9791464019271703, "learning_rate": 9.385098941962903e-07, "loss": 0.7224, "step": 2120 }, { "epoch": 0.009389525875426092, "grad_norm": 2.8456711315565193, "learning_rate": 9.389525875426093e-07, "loss": 0.4603, "step": 2121 }, { "epoch": 0.009393952808889282, "grad_norm": 2.9317180274354495, "learning_rate": 9.393952808889282e-07, "loss": 1.0307, "step": 2122 }, { "epoch": 0.009398379742352473, "grad_norm": 2.9934361160350433, "learning_rate": 9.398379742352473e-07, "loss": 1.0095, "step": 2123 }, { "epoch": 0.009402806675815662, "grad_norm": 4.573537807693457, "learning_rate": 9.402806675815663e-07, "loss": 1.2792, "step": 2124 }, { "epoch": 0.009407233609278852, "grad_norm": 3.343686102937342, "learning_rate": 9.407233609278853e-07, "loss": 0.8826, "step": 2125 }, { "epoch": 0.009411660542742043, "grad_norm": 3.810095823584374, "learning_rate": 9.411660542742043e-07, "loss": 0.8259, "step": 2126 }, { "epoch": 0.009416087476205233, "grad_norm": 2.6847509677366586, "learning_rate": 9.416087476205234e-07, "loss": 0.5465, "step": 2127 }, { "epoch": 0.009420514409668422, "grad_norm": 3.117596942384329, "learning_rate": 9.420514409668422e-07, "loss": 0.7166, "step": 2128 }, { "epoch": 0.009424941343131613, "grad_norm": 2.949612858766774, "learning_rate": 9.424941343131613e-07, "loss": 0.915, "step": 2129 }, { "epoch": 0.009429368276594803, "grad_norm": 2.6688233156139782, "learning_rate": 9.429368276594803e-07, "loss": 0.7922, "step": 2130 }, { "epoch": 0.009433795210057994, "grad_norm": 3.025900433834544, "learning_rate": 9.433795210057993e-07, "loss": 1.1799, "step": 2131 }, { "epoch": 0.009438222143521182, "grad_norm": 3.40071788961425, "learning_rate": 9.438222143521183e-07, "loss": 1.0876, "step": 2132 }, { "epoch": 0.009442649076984373, "grad_norm": 2.813881120609785, "learning_rate": 9.442649076984374e-07, "loss": 0.7289, "step": 2133 }, { "epoch": 0.009447076010447563, "grad_norm": 2.8441512400981352, "learning_rate": 9.447076010447563e-07, "loss": 0.9622, "step": 2134 }, { "epoch": 0.009451502943910752, "grad_norm": 2.6040581310172204, "learning_rate": 9.451502943910754e-07, "loss": 0.9837, "step": 2135 }, { "epoch": 0.009455929877373943, "grad_norm": 2.5789600498883978, "learning_rate": 9.455929877373945e-07, "loss": 0.9055, "step": 2136 }, { "epoch": 0.009460356810837133, "grad_norm": 2.8457706595288386, "learning_rate": 9.460356810837133e-07, "loss": 0.8774, "step": 2137 }, { "epoch": 0.009464783744300324, "grad_norm": 3.026833888889827, "learning_rate": 9.464783744300323e-07, "loss": 0.9687, "step": 2138 }, { "epoch": 0.009469210677763512, "grad_norm": 3.4057617382382577, "learning_rate": 9.469210677763514e-07, "loss": 0.9824, "step": 2139 }, { "epoch": 0.009473637611226703, "grad_norm": 3.4960213271029845, "learning_rate": 9.473637611226703e-07, "loss": 0.9336, "step": 2140 }, { "epoch": 0.009478064544689893, "grad_norm": 3.2524450649194208, "learning_rate": 9.478064544689894e-07, "loss": 0.7014, "step": 2141 }, { "epoch": 0.009482491478153084, "grad_norm": 3.239749683688126, "learning_rate": 9.482491478153085e-07, "loss": 0.6769, "step": 2142 }, { "epoch": 0.009486918411616273, "grad_norm": 3.142598545865047, "learning_rate": 9.486918411616274e-07, "loss": 1.0238, "step": 2143 }, { "epoch": 0.009491345345079463, "grad_norm": 2.8560570795026847, "learning_rate": 9.491345345079465e-07, "loss": 0.7762, "step": 2144 }, { "epoch": 0.009495772278542654, "grad_norm": 2.663485953115793, "learning_rate": 9.495772278542655e-07, "loss": 0.6367, "step": 2145 }, { "epoch": 0.009500199212005844, "grad_norm": 3.5888217214722364, "learning_rate": 9.500199212005843e-07, "loss": 0.8305, "step": 2146 }, { "epoch": 0.009504626145469033, "grad_norm": 2.937287624542422, "learning_rate": 9.504626145469034e-07, "loss": 0.9386, "step": 2147 }, { "epoch": 0.009509053078932224, "grad_norm": 2.6288194840793078, "learning_rate": 9.509053078932225e-07, "loss": 0.5755, "step": 2148 }, { "epoch": 0.009513480012395414, "grad_norm": 2.9722455772696943, "learning_rate": 9.513480012395414e-07, "loss": 0.8824, "step": 2149 }, { "epoch": 0.009517906945858605, "grad_norm": 2.583067484319655, "learning_rate": 9.517906945858605e-07, "loss": 0.892, "step": 2150 }, { "epoch": 0.009522333879321793, "grad_norm": 3.0202022970515356, "learning_rate": 9.522333879321795e-07, "loss": 0.9592, "step": 2151 }, { "epoch": 0.009526760812784984, "grad_norm": 3.3252164434614078, "learning_rate": 9.526760812784983e-07, "loss": 1.0416, "step": 2152 }, { "epoch": 0.009531187746248174, "grad_norm": 2.7129517627158792, "learning_rate": 9.531187746248174e-07, "loss": 0.8842, "step": 2153 }, { "epoch": 0.009535614679711363, "grad_norm": 3.216887317777979, "learning_rate": 9.535614679711365e-07, "loss": 0.7401, "step": 2154 }, { "epoch": 0.009540041613174554, "grad_norm": 3.3181309792171954, "learning_rate": 9.540041613174555e-07, "loss": 0.9701, "step": 2155 }, { "epoch": 0.009544468546637744, "grad_norm": 2.5706824599746954, "learning_rate": 9.544468546637745e-07, "loss": 0.7914, "step": 2156 }, { "epoch": 0.009548895480100935, "grad_norm": 3.027252386903522, "learning_rate": 9.548895480100936e-07, "loss": 0.9037, "step": 2157 }, { "epoch": 0.009553322413564123, "grad_norm": 3.1145402708545666, "learning_rate": 9.553322413564126e-07, "loss": 0.9079, "step": 2158 }, { "epoch": 0.009557749347027314, "grad_norm": 3.5286213027224047, "learning_rate": 9.557749347027315e-07, "loss": 0.937, "step": 2159 }, { "epoch": 0.009562176280490504, "grad_norm": 3.2642062839513533, "learning_rate": 9.562176280490505e-07, "loss": 0.8561, "step": 2160 }, { "epoch": 0.009566603213953695, "grad_norm": 3.87977343310474, "learning_rate": 9.566603213953696e-07, "loss": 1.3271, "step": 2161 }, { "epoch": 0.009571030147416884, "grad_norm": 3.2730780902890997, "learning_rate": 9.571030147416886e-07, "loss": 1.3566, "step": 2162 }, { "epoch": 0.009575457080880074, "grad_norm": 2.8944894159803014, "learning_rate": 9.575457080880075e-07, "loss": 0.8023, "step": 2163 }, { "epoch": 0.009579884014343265, "grad_norm": 3.6438191487300604, "learning_rate": 9.579884014343267e-07, "loss": 1.0358, "step": 2164 }, { "epoch": 0.009584310947806455, "grad_norm": 3.270580659462693, "learning_rate": 9.584310947806454e-07, "loss": 0.8052, "step": 2165 }, { "epoch": 0.009588737881269644, "grad_norm": 2.9321718264442245, "learning_rate": 9.588737881269646e-07, "loss": 1.0088, "step": 2166 }, { "epoch": 0.009593164814732835, "grad_norm": 2.6884602325618108, "learning_rate": 9.593164814732835e-07, "loss": 0.7111, "step": 2167 }, { "epoch": 0.009597591748196025, "grad_norm": 3.6201775916523977, "learning_rate": 9.597591748196025e-07, "loss": 1.0548, "step": 2168 }, { "epoch": 0.009602018681659214, "grad_norm": 2.6274348643610224, "learning_rate": 9.602018681659216e-07, "loss": 0.6911, "step": 2169 }, { "epoch": 0.009606445615122404, "grad_norm": 2.9291047394197993, "learning_rate": 9.606445615122406e-07, "loss": 0.6536, "step": 2170 }, { "epoch": 0.009610872548585595, "grad_norm": 3.1304322775275146, "learning_rate": 9.610872548585595e-07, "loss": 0.7173, "step": 2171 }, { "epoch": 0.009615299482048785, "grad_norm": 3.408320760925546, "learning_rate": 9.615299482048787e-07, "loss": 0.9582, "step": 2172 }, { "epoch": 0.009619726415511974, "grad_norm": 2.7242193735432334, "learning_rate": 9.619726415511976e-07, "loss": 0.7232, "step": 2173 }, { "epoch": 0.009624153348975165, "grad_norm": 3.155283790633187, "learning_rate": 9.624153348975166e-07, "loss": 1.0028, "step": 2174 }, { "epoch": 0.009628580282438355, "grad_norm": 3.0062153507511327, "learning_rate": 9.628580282438355e-07, "loss": 0.9002, "step": 2175 }, { "epoch": 0.009633007215901546, "grad_norm": 2.6433682943027983, "learning_rate": 9.633007215901547e-07, "loss": 0.723, "step": 2176 }, { "epoch": 0.009637434149364734, "grad_norm": 2.806956089439596, "learning_rate": 9.637434149364736e-07, "loss": 1.0892, "step": 2177 }, { "epoch": 0.009641861082827925, "grad_norm": 2.7970494789381606, "learning_rate": 9.641861082827926e-07, "loss": 0.6357, "step": 2178 }, { "epoch": 0.009646288016291115, "grad_norm": 2.463278651497976, "learning_rate": 9.646288016291117e-07, "loss": 0.7117, "step": 2179 }, { "epoch": 0.009650714949754306, "grad_norm": 2.8057901732872574, "learning_rate": 9.650714949754307e-07, "loss": 0.6832, "step": 2180 }, { "epoch": 0.009655141883217495, "grad_norm": 3.411496574426862, "learning_rate": 9.655141883217496e-07, "loss": 1.0379, "step": 2181 }, { "epoch": 0.009659568816680685, "grad_norm": 2.5917037798250524, "learning_rate": 9.659568816680688e-07, "loss": 0.618, "step": 2182 }, { "epoch": 0.009663995750143876, "grad_norm": 2.8651173684343587, "learning_rate": 9.663995750143875e-07, "loss": 0.9789, "step": 2183 }, { "epoch": 0.009668422683607066, "grad_norm": 3.4081840887189467, "learning_rate": 9.668422683607067e-07, "loss": 0.6718, "step": 2184 }, { "epoch": 0.009672849617070255, "grad_norm": 4.122855508641523, "learning_rate": 9.672849617070256e-07, "loss": 1.1457, "step": 2185 }, { "epoch": 0.009677276550533446, "grad_norm": 3.713516734351099, "learning_rate": 9.677276550533446e-07, "loss": 1.2601, "step": 2186 }, { "epoch": 0.009681703483996636, "grad_norm": 3.3322409515934535, "learning_rate": 9.681703483996637e-07, "loss": 1.2146, "step": 2187 }, { "epoch": 0.009686130417459825, "grad_norm": 3.314239217434122, "learning_rate": 9.686130417459827e-07, "loss": 0.6316, "step": 2188 }, { "epoch": 0.009690557350923015, "grad_norm": 3.1141540692083325, "learning_rate": 9.690557350923016e-07, "loss": 0.9993, "step": 2189 }, { "epoch": 0.009694984284386206, "grad_norm": 2.4942826466390446, "learning_rate": 9.694984284386206e-07, "loss": 0.7784, "step": 2190 }, { "epoch": 0.009699411217849396, "grad_norm": 3.0787538477819107, "learning_rate": 9.699411217849397e-07, "loss": 0.769, "step": 2191 }, { "epoch": 0.009703838151312585, "grad_norm": 2.9998093223187188, "learning_rate": 9.703838151312587e-07, "loss": 1.0383, "step": 2192 }, { "epoch": 0.009708265084775776, "grad_norm": 2.807229154370178, "learning_rate": 9.708265084775776e-07, "loss": 0.8611, "step": 2193 }, { "epoch": 0.009712692018238966, "grad_norm": 3.305080170439609, "learning_rate": 9.712692018238968e-07, "loss": 1.1156, "step": 2194 }, { "epoch": 0.009717118951702157, "grad_norm": 2.7785467088567053, "learning_rate": 9.717118951702157e-07, "loss": 0.6799, "step": 2195 }, { "epoch": 0.009721545885165345, "grad_norm": 3.6655587346081133, "learning_rate": 9.721545885165347e-07, "loss": 0.9268, "step": 2196 }, { "epoch": 0.009725972818628536, "grad_norm": 2.720528306542493, "learning_rate": 9.725972818628538e-07, "loss": 1.0914, "step": 2197 }, { "epoch": 0.009730399752091726, "grad_norm": 3.0267958076466397, "learning_rate": 9.730399752091726e-07, "loss": 0.8198, "step": 2198 }, { "epoch": 0.009734826685554917, "grad_norm": 3.1969432711832506, "learning_rate": 9.734826685554917e-07, "loss": 1.1188, "step": 2199 }, { "epoch": 0.009739253619018106, "grad_norm": 3.187009097119546, "learning_rate": 9.739253619018107e-07, "loss": 1.2654, "step": 2200 }, { "epoch": 0.009743680552481296, "grad_norm": 3.2006853842408125, "learning_rate": 9.743680552481296e-07, "loss": 0.5733, "step": 2201 }, { "epoch": 0.009748107485944487, "grad_norm": 2.8691175869972896, "learning_rate": 9.748107485944488e-07, "loss": 0.6394, "step": 2202 }, { "epoch": 0.009752534419407675, "grad_norm": 3.6462461721019093, "learning_rate": 9.752534419407677e-07, "loss": 0.9735, "step": 2203 }, { "epoch": 0.009756961352870866, "grad_norm": 3.434079856677388, "learning_rate": 9.756961352870867e-07, "loss": 1.0121, "step": 2204 }, { "epoch": 0.009761388286334056, "grad_norm": 3.083740456810993, "learning_rate": 9.761388286334058e-07, "loss": 1.0143, "step": 2205 }, { "epoch": 0.009765815219797247, "grad_norm": 2.889714645626384, "learning_rate": 9.765815219797248e-07, "loss": 1.0594, "step": 2206 }, { "epoch": 0.009770242153260436, "grad_norm": 3.0066203177603996, "learning_rate": 9.770242153260437e-07, "loss": 0.667, "step": 2207 }, { "epoch": 0.009774669086723626, "grad_norm": 3.6854842105218952, "learning_rate": 9.774669086723627e-07, "loss": 0.8611, "step": 2208 }, { "epoch": 0.009779096020186817, "grad_norm": 3.3050509709757834, "learning_rate": 9.779096020186818e-07, "loss": 1.0283, "step": 2209 }, { "epoch": 0.009783522953650007, "grad_norm": 5.407787921834411, "learning_rate": 9.783522953650008e-07, "loss": 1.3427, "step": 2210 }, { "epoch": 0.009787949887113196, "grad_norm": 3.7050386544314025, "learning_rate": 9.787949887113197e-07, "loss": 0.9368, "step": 2211 }, { "epoch": 0.009792376820576387, "grad_norm": 3.174225156180518, "learning_rate": 9.792376820576389e-07, "loss": 0.8882, "step": 2212 }, { "epoch": 0.009796803754039577, "grad_norm": 3.280644697477675, "learning_rate": 9.796803754039576e-07, "loss": 0.7637, "step": 2213 }, { "epoch": 0.009801230687502768, "grad_norm": 3.4684883893492806, "learning_rate": 9.801230687502768e-07, "loss": 0.8515, "step": 2214 }, { "epoch": 0.009805657620965956, "grad_norm": 2.909984737516109, "learning_rate": 9.805657620965957e-07, "loss": 0.7949, "step": 2215 }, { "epoch": 0.009810084554429147, "grad_norm": 3.218205114398336, "learning_rate": 9.810084554429147e-07, "loss": 1.1032, "step": 2216 }, { "epoch": 0.009814511487892337, "grad_norm": 2.9240377813118106, "learning_rate": 9.814511487892338e-07, "loss": 0.6809, "step": 2217 }, { "epoch": 0.009818938421355528, "grad_norm": 3.5208440555468976, "learning_rate": 9.818938421355528e-07, "loss": 0.759, "step": 2218 }, { "epoch": 0.009823365354818717, "grad_norm": 3.2322812206863403, "learning_rate": 9.823365354818717e-07, "loss": 0.8042, "step": 2219 }, { "epoch": 0.009827792288281907, "grad_norm": 4.51404253826938, "learning_rate": 9.827792288281909e-07, "loss": 1.4274, "step": 2220 }, { "epoch": 0.009832219221745098, "grad_norm": 3.676577716553298, "learning_rate": 9.832219221745098e-07, "loss": 0.747, "step": 2221 }, { "epoch": 0.009836646155208286, "grad_norm": 2.7927960650474146, "learning_rate": 9.836646155208288e-07, "loss": 0.7092, "step": 2222 }, { "epoch": 0.009841073088671477, "grad_norm": 3.123002069159057, "learning_rate": 9.841073088671477e-07, "loss": 0.8758, "step": 2223 }, { "epoch": 0.009845500022134667, "grad_norm": 3.2747581764556717, "learning_rate": 9.845500022134669e-07, "loss": 0.7613, "step": 2224 }, { "epoch": 0.009849926955597858, "grad_norm": 4.399444192906443, "learning_rate": 9.849926955597858e-07, "loss": 1.1198, "step": 2225 }, { "epoch": 0.009854353889061047, "grad_norm": 3.895446096597802, "learning_rate": 9.854353889061048e-07, "loss": 0.942, "step": 2226 }, { "epoch": 0.009858780822524237, "grad_norm": 4.131382575076808, "learning_rate": 9.85878082252424e-07, "loss": 1.0602, "step": 2227 }, { "epoch": 0.009863207755987428, "grad_norm": 2.768202635502689, "learning_rate": 9.863207755987429e-07, "loss": 0.8698, "step": 2228 }, { "epoch": 0.009867634689450618, "grad_norm": 5.130844240901322, "learning_rate": 9.867634689450618e-07, "loss": 1.3143, "step": 2229 }, { "epoch": 0.009872061622913807, "grad_norm": 3.033546751582821, "learning_rate": 9.87206162291381e-07, "loss": 0.9258, "step": 2230 }, { "epoch": 0.009876488556376998, "grad_norm": 3.321595110467069, "learning_rate": 9.876488556376997e-07, "loss": 0.7591, "step": 2231 }, { "epoch": 0.009880915489840188, "grad_norm": 2.710765983148247, "learning_rate": 9.880915489840189e-07, "loss": 0.44, "step": 2232 }, { "epoch": 0.009885342423303379, "grad_norm": 2.8465389396118344, "learning_rate": 9.885342423303378e-07, "loss": 0.9968, "step": 2233 }, { "epoch": 0.009889769356766567, "grad_norm": 2.6128360184247175, "learning_rate": 9.889769356766568e-07, "loss": 0.7548, "step": 2234 }, { "epoch": 0.009894196290229758, "grad_norm": 3.117535468592984, "learning_rate": 9.89419629022976e-07, "loss": 1.0563, "step": 2235 }, { "epoch": 0.009898623223692948, "grad_norm": 3.2401591894383253, "learning_rate": 9.898623223692949e-07, "loss": 0.9665, "step": 2236 }, { "epoch": 0.009903050157156137, "grad_norm": 3.297509599571498, "learning_rate": 9.903050157156138e-07, "loss": 0.8224, "step": 2237 }, { "epoch": 0.009907477090619328, "grad_norm": 3.5266850355090367, "learning_rate": 9.907477090619328e-07, "loss": 0.8076, "step": 2238 }, { "epoch": 0.009911904024082518, "grad_norm": 2.879626411743864, "learning_rate": 9.91190402408252e-07, "loss": 0.9782, "step": 2239 }, { "epoch": 0.009916330957545709, "grad_norm": 2.8534615918969033, "learning_rate": 9.916330957545709e-07, "loss": 0.7512, "step": 2240 }, { "epoch": 0.009920757891008897, "grad_norm": 3.198489527527555, "learning_rate": 9.920757891008898e-07, "loss": 0.6403, "step": 2241 }, { "epoch": 0.009925184824472088, "grad_norm": 3.047602459931093, "learning_rate": 9.92518482447209e-07, "loss": 1.1342, "step": 2242 }, { "epoch": 0.009929611757935278, "grad_norm": 3.6088279923077113, "learning_rate": 9.92961175793528e-07, "loss": 0.8426, "step": 2243 }, { "epoch": 0.009934038691398469, "grad_norm": 3.181801161818615, "learning_rate": 9.934038691398469e-07, "loss": 0.8215, "step": 2244 }, { "epoch": 0.009938465624861658, "grad_norm": 3.8235611881474165, "learning_rate": 9.93846562486166e-07, "loss": 1.0275, "step": 2245 }, { "epoch": 0.009942892558324848, "grad_norm": 2.882130982720657, "learning_rate": 9.942892558324848e-07, "loss": 0.7358, "step": 2246 }, { "epoch": 0.009947319491788039, "grad_norm": 3.7720729776950845, "learning_rate": 9.94731949178804e-07, "loss": 0.669, "step": 2247 }, { "epoch": 0.00995174642525123, "grad_norm": 2.7996196370894877, "learning_rate": 9.951746425251229e-07, "loss": 0.9346, "step": 2248 }, { "epoch": 0.009956173358714418, "grad_norm": 2.518252559293474, "learning_rate": 9.956173358714418e-07, "loss": 0.799, "step": 2249 }, { "epoch": 0.009960600292177609, "grad_norm": 3.245449384505372, "learning_rate": 9.96060029217761e-07, "loss": 0.8564, "step": 2250 }, { "epoch": 0.009965027225640799, "grad_norm": 3.239185650699724, "learning_rate": 9.9650272256408e-07, "loss": 1.0556, "step": 2251 }, { "epoch": 0.00996945415910399, "grad_norm": 2.6940952081257623, "learning_rate": 9.969454159103989e-07, "loss": 0.7329, "step": 2252 }, { "epoch": 0.009973881092567178, "grad_norm": 2.824853914164558, "learning_rate": 9.97388109256718e-07, "loss": 1.0552, "step": 2253 }, { "epoch": 0.009978308026030369, "grad_norm": 4.330655063766949, "learning_rate": 9.97830802603037e-07, "loss": 0.8473, "step": 2254 }, { "epoch": 0.00998273495949356, "grad_norm": 3.618523749249107, "learning_rate": 9.98273495949356e-07, "loss": 1.0965, "step": 2255 }, { "epoch": 0.009987161892956748, "grad_norm": 3.7710751783469845, "learning_rate": 9.987161892956749e-07, "loss": 1.0756, "step": 2256 }, { "epoch": 0.009991588826419939, "grad_norm": 3.2668505943466104, "learning_rate": 9.99158882641994e-07, "loss": 0.5139, "step": 2257 }, { "epoch": 0.009996015759883129, "grad_norm": 4.02152029399599, "learning_rate": 9.99601575988313e-07, "loss": 1.2812, "step": 2258 }, { "epoch": 0.01000044269334632, "grad_norm": 3.3763774080998967, "learning_rate": 1.000044269334632e-06, "loss": 1.0358, "step": 2259 }, { "epoch": 0.010004869626809508, "grad_norm": 3.5404824433663196, "learning_rate": 1.000486962680951e-06, "loss": 1.3284, "step": 2260 }, { "epoch": 0.010009296560272699, "grad_norm": 3.4024977328185173, "learning_rate": 1.00092965602727e-06, "loss": 1.2831, "step": 2261 }, { "epoch": 0.01001372349373589, "grad_norm": 3.447625055832009, "learning_rate": 1.001372349373589e-06, "loss": 1.1009, "step": 2262 }, { "epoch": 0.01001815042719908, "grad_norm": 2.927392191602169, "learning_rate": 1.001815042719908e-06, "loss": 0.9297, "step": 2263 }, { "epoch": 0.010022577360662269, "grad_norm": 2.669565771345296, "learning_rate": 1.0022577360662269e-06, "loss": 0.6696, "step": 2264 }, { "epoch": 0.01002700429412546, "grad_norm": 3.4591690300432285, "learning_rate": 1.002700429412546e-06, "loss": 0.6622, "step": 2265 }, { "epoch": 0.01003143122758865, "grad_norm": 3.084053383910095, "learning_rate": 1.003143122758865e-06, "loss": 0.6922, "step": 2266 }, { "epoch": 0.01003585816105184, "grad_norm": 4.0025484778336855, "learning_rate": 1.003585816105184e-06, "loss": 1.3437, "step": 2267 }, { "epoch": 0.010040285094515029, "grad_norm": 3.4524503478974946, "learning_rate": 1.004028509451503e-06, "loss": 0.8632, "step": 2268 }, { "epoch": 0.01004471202797822, "grad_norm": 2.9284049741970324, "learning_rate": 1.004471202797822e-06, "loss": 0.7278, "step": 2269 }, { "epoch": 0.01004913896144141, "grad_norm": 2.7340473290240896, "learning_rate": 1.004913896144141e-06, "loss": 0.5915, "step": 2270 }, { "epoch": 0.010053565894904599, "grad_norm": 2.4261359131688804, "learning_rate": 1.00535658949046e-06, "loss": 0.6527, "step": 2271 }, { "epoch": 0.01005799282836779, "grad_norm": 2.647087695968297, "learning_rate": 1.005799282836779e-06, "loss": 0.6582, "step": 2272 }, { "epoch": 0.01006241976183098, "grad_norm": 3.6937093218199584, "learning_rate": 1.006241976183098e-06, "loss": 1.3135, "step": 2273 }, { "epoch": 0.01006684669529417, "grad_norm": 2.8621421792530986, "learning_rate": 1.006684669529417e-06, "loss": 0.8008, "step": 2274 }, { "epoch": 0.010071273628757359, "grad_norm": 3.2963081089228528, "learning_rate": 1.0071273628757361e-06, "loss": 0.8214, "step": 2275 }, { "epoch": 0.01007570056222055, "grad_norm": 2.466060863139718, "learning_rate": 1.007570056222055e-06, "loss": 0.7252, "step": 2276 }, { "epoch": 0.01008012749568374, "grad_norm": 2.802699652696631, "learning_rate": 1.008012749568374e-06, "loss": 0.8902, "step": 2277 }, { "epoch": 0.01008455442914693, "grad_norm": 2.7043169871794093, "learning_rate": 1.0084554429146932e-06, "loss": 0.7522, "step": 2278 }, { "epoch": 0.01008898136261012, "grad_norm": 3.032673081267721, "learning_rate": 1.008898136261012e-06, "loss": 0.5412, "step": 2279 }, { "epoch": 0.01009340829607331, "grad_norm": 2.780275853882908, "learning_rate": 1.009340829607331e-06, "loss": 0.6596, "step": 2280 }, { "epoch": 0.0100978352295365, "grad_norm": 3.274164699587252, "learning_rate": 1.00978352295365e-06, "loss": 0.9422, "step": 2281 }, { "epoch": 0.010102262162999691, "grad_norm": 4.302321411746555, "learning_rate": 1.010226216299969e-06, "loss": 1.1698, "step": 2282 }, { "epoch": 0.01010668909646288, "grad_norm": 2.9159143583910416, "learning_rate": 1.0106689096462881e-06, "loss": 0.8857, "step": 2283 }, { "epoch": 0.01011111602992607, "grad_norm": 2.750515437804951, "learning_rate": 1.011111602992607e-06, "loss": 0.7614, "step": 2284 }, { "epoch": 0.01011554296338926, "grad_norm": 3.0035234766106758, "learning_rate": 1.011554296338926e-06, "loss": 0.7501, "step": 2285 }, { "epoch": 0.010119969896852451, "grad_norm": 2.6248550712963628, "learning_rate": 1.0119969896852452e-06, "loss": 0.725, "step": 2286 }, { "epoch": 0.01012439683031564, "grad_norm": 3.549898072332694, "learning_rate": 1.0124396830315641e-06, "loss": 0.6659, "step": 2287 }, { "epoch": 0.01012882376377883, "grad_norm": 2.867070540569954, "learning_rate": 1.012882376377883e-06, "loss": 0.6762, "step": 2288 }, { "epoch": 0.010133250697242021, "grad_norm": 3.3335045404498036, "learning_rate": 1.013325069724202e-06, "loss": 0.5118, "step": 2289 }, { "epoch": 0.01013767763070521, "grad_norm": 2.81078364329917, "learning_rate": 1.0137677630705212e-06, "loss": 0.8975, "step": 2290 }, { "epoch": 0.0101421045641684, "grad_norm": 3.2082928146228427, "learning_rate": 1.0142104564168401e-06, "loss": 1.2283, "step": 2291 }, { "epoch": 0.01014653149763159, "grad_norm": 2.911987797329153, "learning_rate": 1.014653149763159e-06, "loss": 0.9832, "step": 2292 }, { "epoch": 0.010150958431094781, "grad_norm": 3.6409385196799553, "learning_rate": 1.0150958431094783e-06, "loss": 1.0911, "step": 2293 }, { "epoch": 0.01015538536455797, "grad_norm": 2.5735499532609136, "learning_rate": 1.015538536455797e-06, "loss": 0.9309, "step": 2294 }, { "epoch": 0.01015981229802116, "grad_norm": 2.7904582860520986, "learning_rate": 1.0159812298021161e-06, "loss": 0.8762, "step": 2295 }, { "epoch": 0.010164239231484351, "grad_norm": 2.6961619884718275, "learning_rate": 1.016423923148435e-06, "loss": 0.6225, "step": 2296 }, { "epoch": 0.010168666164947542, "grad_norm": 2.454434836510755, "learning_rate": 1.016866616494754e-06, "loss": 0.693, "step": 2297 }, { "epoch": 0.01017309309841073, "grad_norm": 3.2177217494924206, "learning_rate": 1.0173093098410732e-06, "loss": 0.902, "step": 2298 }, { "epoch": 0.01017752003187392, "grad_norm": 2.850202071965705, "learning_rate": 1.0177520031873921e-06, "loss": 0.9283, "step": 2299 }, { "epoch": 0.010181946965337111, "grad_norm": 2.879132490179294, "learning_rate": 1.018194696533711e-06, "loss": 1.141, "step": 2300 }, { "epoch": 0.010186373898800302, "grad_norm": 3.855296411499067, "learning_rate": 1.0186373898800303e-06, "loss": 1.2382, "step": 2301 }, { "epoch": 0.01019080083226349, "grad_norm": 2.804441422603533, "learning_rate": 1.0190800832263492e-06, "loss": 0.6068, "step": 2302 }, { "epoch": 0.010195227765726681, "grad_norm": 3.195058114976874, "learning_rate": 1.0195227765726681e-06, "loss": 1.1722, "step": 2303 }, { "epoch": 0.010199654699189872, "grad_norm": 3.380675235020069, "learning_rate": 1.019965469918987e-06, "loss": 1.2322, "step": 2304 }, { "epoch": 0.01020408163265306, "grad_norm": 4.077337067827092, "learning_rate": 1.0204081632653063e-06, "loss": 0.7966, "step": 2305 }, { "epoch": 0.010208508566116251, "grad_norm": 2.9660188283561535, "learning_rate": 1.0208508566116252e-06, "loss": 0.6004, "step": 2306 }, { "epoch": 0.010212935499579441, "grad_norm": 2.7215324147754867, "learning_rate": 1.0212935499579441e-06, "loss": 0.9955, "step": 2307 }, { "epoch": 0.010217362433042632, "grad_norm": 2.7445810379871856, "learning_rate": 1.0217362433042633e-06, "loss": 0.9804, "step": 2308 }, { "epoch": 0.01022178936650582, "grad_norm": 3.484406866058578, "learning_rate": 1.0221789366505823e-06, "loss": 1.2633, "step": 2309 }, { "epoch": 0.010226216299969011, "grad_norm": 2.66366947343852, "learning_rate": 1.0226216299969012e-06, "loss": 0.7712, "step": 2310 }, { "epoch": 0.010230643233432202, "grad_norm": 3.170428123900448, "learning_rate": 1.0230643233432204e-06, "loss": 0.8759, "step": 2311 }, { "epoch": 0.010235070166895392, "grad_norm": 3.3443348662680794, "learning_rate": 1.0235070166895393e-06, "loss": 0.7462, "step": 2312 }, { "epoch": 0.010239497100358581, "grad_norm": 2.9442854464839128, "learning_rate": 1.0239497100358583e-06, "loss": 0.6011, "step": 2313 }, { "epoch": 0.010243924033821772, "grad_norm": 2.5168387416682907, "learning_rate": 1.0243924033821772e-06, "loss": 0.7942, "step": 2314 }, { "epoch": 0.010248350967284962, "grad_norm": 2.8897586584514734, "learning_rate": 1.0248350967284964e-06, "loss": 0.784, "step": 2315 }, { "epoch": 0.010252777900748153, "grad_norm": 2.7133745596618453, "learning_rate": 1.0252777900748153e-06, "loss": 0.8368, "step": 2316 }, { "epoch": 0.010257204834211341, "grad_norm": 2.724860052602245, "learning_rate": 1.0257204834211343e-06, "loss": 0.8743, "step": 2317 }, { "epoch": 0.010261631767674532, "grad_norm": 3.228439040574102, "learning_rate": 1.0261631767674534e-06, "loss": 0.5854, "step": 2318 }, { "epoch": 0.010266058701137722, "grad_norm": 3.247796717963464, "learning_rate": 1.0266058701137721e-06, "loss": 0.967, "step": 2319 }, { "epoch": 0.010270485634600911, "grad_norm": 2.681515888269831, "learning_rate": 1.0270485634600913e-06, "loss": 0.8263, "step": 2320 }, { "epoch": 0.010274912568064102, "grad_norm": 3.130311831975324, "learning_rate": 1.0274912568064103e-06, "loss": 0.7234, "step": 2321 }, { "epoch": 0.010279339501527292, "grad_norm": 3.281494399878898, "learning_rate": 1.0279339501527292e-06, "loss": 0.8951, "step": 2322 }, { "epoch": 0.010283766434990483, "grad_norm": 3.245372355179137, "learning_rate": 1.0283766434990484e-06, "loss": 0.6451, "step": 2323 }, { "epoch": 0.010288193368453671, "grad_norm": 2.88441300611629, "learning_rate": 1.0288193368453673e-06, "loss": 0.7382, "step": 2324 }, { "epoch": 0.010292620301916862, "grad_norm": 3.1480032480189504, "learning_rate": 1.0292620301916863e-06, "loss": 0.4031, "step": 2325 }, { "epoch": 0.010297047235380052, "grad_norm": 3.053115101826522, "learning_rate": 1.0297047235380054e-06, "loss": 0.7855, "step": 2326 }, { "epoch": 0.010301474168843243, "grad_norm": 3.236251750358594, "learning_rate": 1.0301474168843244e-06, "loss": 0.938, "step": 2327 }, { "epoch": 0.010305901102306432, "grad_norm": 3.5244404361239754, "learning_rate": 1.0305901102306433e-06, "loss": 1.1909, "step": 2328 }, { "epoch": 0.010310328035769622, "grad_norm": 3.857254091298965, "learning_rate": 1.0310328035769623e-06, "loss": 1.0061, "step": 2329 }, { "epoch": 0.010314754969232813, "grad_norm": 2.6996015163296705, "learning_rate": 1.0314754969232814e-06, "loss": 0.7989, "step": 2330 }, { "epoch": 0.010319181902696003, "grad_norm": 2.9124537083530493, "learning_rate": 1.0319181902696004e-06, "loss": 0.9817, "step": 2331 }, { "epoch": 0.010323608836159192, "grad_norm": 2.884324787382357, "learning_rate": 1.0323608836159193e-06, "loss": 0.7098, "step": 2332 }, { "epoch": 0.010328035769622382, "grad_norm": 3.119152608298829, "learning_rate": 1.0328035769622385e-06, "loss": 0.9041, "step": 2333 }, { "epoch": 0.010332462703085573, "grad_norm": 3.2239364420490695, "learning_rate": 1.0332462703085574e-06, "loss": 0.7044, "step": 2334 }, { "epoch": 0.010336889636548763, "grad_norm": 4.153176320690272, "learning_rate": 1.0336889636548764e-06, "loss": 1.3293, "step": 2335 }, { "epoch": 0.010341316570011952, "grad_norm": 2.9643433240988104, "learning_rate": 1.0341316570011955e-06, "loss": 0.7732, "step": 2336 }, { "epoch": 0.010345743503475143, "grad_norm": 2.5374860948259745, "learning_rate": 1.0345743503475143e-06, "loss": 0.6798, "step": 2337 }, { "epoch": 0.010350170436938333, "grad_norm": 3.2665876263785463, "learning_rate": 1.0350170436938334e-06, "loss": 1.0154, "step": 2338 }, { "epoch": 0.010354597370401522, "grad_norm": 2.4092651328789776, "learning_rate": 1.0354597370401524e-06, "loss": 0.6495, "step": 2339 }, { "epoch": 0.010359024303864713, "grad_norm": 2.7075425118368073, "learning_rate": 1.0359024303864713e-06, "loss": 0.6846, "step": 2340 }, { "epoch": 0.010363451237327903, "grad_norm": 3.4086160680684685, "learning_rate": 1.0363451237327905e-06, "loss": 0.8084, "step": 2341 }, { "epoch": 0.010367878170791094, "grad_norm": 5.288991629052723, "learning_rate": 1.0367878170791094e-06, "loss": 1.3142, "step": 2342 }, { "epoch": 0.010372305104254282, "grad_norm": 2.728732800575979, "learning_rate": 1.0372305104254284e-06, "loss": 0.8382, "step": 2343 }, { "epoch": 0.010376732037717473, "grad_norm": 2.8203404819888376, "learning_rate": 1.0376732037717473e-06, "loss": 0.8768, "step": 2344 }, { "epoch": 0.010381158971180663, "grad_norm": 2.6937317107288203, "learning_rate": 1.0381158971180665e-06, "loss": 0.6123, "step": 2345 }, { "epoch": 0.010385585904643854, "grad_norm": 2.7727353673343518, "learning_rate": 1.0385585904643854e-06, "loss": 0.6964, "step": 2346 }, { "epoch": 0.010390012838107043, "grad_norm": 3.129945649238249, "learning_rate": 1.0390012838107044e-06, "loss": 1.3381, "step": 2347 }, { "epoch": 0.010394439771570233, "grad_norm": 2.591530787150406, "learning_rate": 1.0394439771570235e-06, "loss": 0.7687, "step": 2348 }, { "epoch": 0.010398866705033424, "grad_norm": 2.665986596110576, "learning_rate": 1.0398866705033425e-06, "loss": 0.6407, "step": 2349 }, { "epoch": 0.010403293638496614, "grad_norm": 2.519281732593193, "learning_rate": 1.0403293638496614e-06, "loss": 0.7175, "step": 2350 }, { "epoch": 0.010407720571959803, "grad_norm": 3.649325642653874, "learning_rate": 1.0407720571959806e-06, "loss": 1.1428, "step": 2351 }, { "epoch": 0.010412147505422993, "grad_norm": 2.6395977340740955, "learning_rate": 1.0412147505422993e-06, "loss": 0.8235, "step": 2352 }, { "epoch": 0.010416574438886184, "grad_norm": 3.0400864331604796, "learning_rate": 1.0416574438886185e-06, "loss": 1.0321, "step": 2353 }, { "epoch": 0.010421001372349373, "grad_norm": 3.9366526638930144, "learning_rate": 1.0421001372349374e-06, "loss": 0.9497, "step": 2354 }, { "epoch": 0.010425428305812563, "grad_norm": 3.3160084414923015, "learning_rate": 1.0425428305812564e-06, "loss": 1.0133, "step": 2355 }, { "epoch": 0.010429855239275754, "grad_norm": 2.5411899229592905, "learning_rate": 1.0429855239275755e-06, "loss": 0.7959, "step": 2356 }, { "epoch": 0.010434282172738944, "grad_norm": 2.466765700926997, "learning_rate": 1.0434282172738945e-06, "loss": 0.8654, "step": 2357 }, { "epoch": 0.010438709106202133, "grad_norm": 3.0001242157091395, "learning_rate": 1.0438709106202134e-06, "loss": 0.9635, "step": 2358 }, { "epoch": 0.010443136039665324, "grad_norm": 3.0308775271519326, "learning_rate": 1.0443136039665326e-06, "loss": 0.7357, "step": 2359 }, { "epoch": 0.010447562973128514, "grad_norm": 2.82009650590733, "learning_rate": 1.0447562973128515e-06, "loss": 0.8936, "step": 2360 }, { "epoch": 0.010451989906591705, "grad_norm": 3.9414958295381917, "learning_rate": 1.0451989906591705e-06, "loss": 1.2154, "step": 2361 }, { "epoch": 0.010456416840054893, "grad_norm": 2.4898807620960453, "learning_rate": 1.0456416840054894e-06, "loss": 0.705, "step": 2362 }, { "epoch": 0.010460843773518084, "grad_norm": 3.1773490155127426, "learning_rate": 1.0460843773518086e-06, "loss": 0.9933, "step": 2363 }, { "epoch": 0.010465270706981274, "grad_norm": 3.499203925790307, "learning_rate": 1.0465270706981275e-06, "loss": 0.9345, "step": 2364 }, { "epoch": 0.010469697640444465, "grad_norm": 2.551323887877941, "learning_rate": 1.0469697640444465e-06, "loss": 0.6268, "step": 2365 }, { "epoch": 0.010474124573907654, "grad_norm": 2.770206783806148, "learning_rate": 1.0474124573907656e-06, "loss": 0.8361, "step": 2366 }, { "epoch": 0.010478551507370844, "grad_norm": 2.946846695062478, "learning_rate": 1.0478551507370844e-06, "loss": 0.6338, "step": 2367 }, { "epoch": 0.010482978440834035, "grad_norm": 3.98017486823725, "learning_rate": 1.0482978440834035e-06, "loss": 0.9707, "step": 2368 }, { "epoch": 0.010487405374297225, "grad_norm": 3.91738808568071, "learning_rate": 1.0487405374297225e-06, "loss": 1.0352, "step": 2369 }, { "epoch": 0.010491832307760414, "grad_norm": 2.7307270093167415, "learning_rate": 1.0491832307760414e-06, "loss": 0.5655, "step": 2370 }, { "epoch": 0.010496259241223604, "grad_norm": 3.285809680427539, "learning_rate": 1.0496259241223606e-06, "loss": 1.0861, "step": 2371 }, { "epoch": 0.010500686174686795, "grad_norm": 3.79291488392452, "learning_rate": 1.0500686174686795e-06, "loss": 1.0027, "step": 2372 }, { "epoch": 0.010505113108149984, "grad_norm": 2.356809862330214, "learning_rate": 1.0505113108149985e-06, "loss": 0.7337, "step": 2373 }, { "epoch": 0.010509540041613174, "grad_norm": 2.6010502941042635, "learning_rate": 1.0509540041613176e-06, "loss": 0.7089, "step": 2374 }, { "epoch": 0.010513966975076365, "grad_norm": 3.3394647948384097, "learning_rate": 1.0513966975076366e-06, "loss": 0.9586, "step": 2375 }, { "epoch": 0.010518393908539555, "grad_norm": 3.509131600744723, "learning_rate": 1.0518393908539555e-06, "loss": 1.5097, "step": 2376 }, { "epoch": 0.010522820842002744, "grad_norm": 2.6058837052817347, "learning_rate": 1.0522820842002745e-06, "loss": 0.6648, "step": 2377 }, { "epoch": 0.010527247775465935, "grad_norm": 3.141985537488502, "learning_rate": 1.0527247775465936e-06, "loss": 0.9241, "step": 2378 }, { "epoch": 0.010531674708929125, "grad_norm": 3.4896287632791707, "learning_rate": 1.0531674708929126e-06, "loss": 0.975, "step": 2379 }, { "epoch": 0.010536101642392316, "grad_norm": 3.2671764688382696, "learning_rate": 1.0536101642392315e-06, "loss": 1.0209, "step": 2380 }, { "epoch": 0.010540528575855504, "grad_norm": 3.0775993228850504, "learning_rate": 1.0540528575855507e-06, "loss": 0.8927, "step": 2381 }, { "epoch": 0.010544955509318695, "grad_norm": 2.8953496999253323, "learning_rate": 1.0544955509318696e-06, "loss": 0.8917, "step": 2382 }, { "epoch": 0.010549382442781885, "grad_norm": 2.444123872055019, "learning_rate": 1.0549382442781886e-06, "loss": 0.7264, "step": 2383 }, { "epoch": 0.010553809376245076, "grad_norm": 2.173197006205113, "learning_rate": 1.0553809376245077e-06, "loss": 0.5999, "step": 2384 }, { "epoch": 0.010558236309708265, "grad_norm": 2.878160043448158, "learning_rate": 1.0558236309708265e-06, "loss": 0.8399, "step": 2385 }, { "epoch": 0.010562663243171455, "grad_norm": 3.14450507144596, "learning_rate": 1.0562663243171456e-06, "loss": 0.9821, "step": 2386 }, { "epoch": 0.010567090176634646, "grad_norm": 3.714093631058968, "learning_rate": 1.0567090176634646e-06, "loss": 0.6674, "step": 2387 }, { "epoch": 0.010571517110097834, "grad_norm": 2.7204306133953673, "learning_rate": 1.0571517110097835e-06, "loss": 0.7839, "step": 2388 }, { "epoch": 0.010575944043561025, "grad_norm": 2.877682406631308, "learning_rate": 1.0575944043561027e-06, "loss": 1.1072, "step": 2389 }, { "epoch": 0.010580370977024215, "grad_norm": 2.892737162807604, "learning_rate": 1.0580370977024216e-06, "loss": 0.7812, "step": 2390 }, { "epoch": 0.010584797910487406, "grad_norm": 2.7873972571608276, "learning_rate": 1.0584797910487406e-06, "loss": 0.8744, "step": 2391 }, { "epoch": 0.010589224843950595, "grad_norm": 3.070150944839219, "learning_rate": 1.0589224843950595e-06, "loss": 0.7245, "step": 2392 }, { "epoch": 0.010593651777413785, "grad_norm": 3.135081207042765, "learning_rate": 1.0593651777413787e-06, "loss": 0.836, "step": 2393 }, { "epoch": 0.010598078710876976, "grad_norm": 3.2957509250848425, "learning_rate": 1.0598078710876976e-06, "loss": 0.9514, "step": 2394 }, { "epoch": 0.010602505644340166, "grad_norm": 3.2211224251512456, "learning_rate": 1.0602505644340166e-06, "loss": 0.9035, "step": 2395 }, { "epoch": 0.010606932577803355, "grad_norm": 2.289569834724089, "learning_rate": 1.0606932577803357e-06, "loss": 0.6006, "step": 2396 }, { "epoch": 0.010611359511266545, "grad_norm": 3.730386899751128, "learning_rate": 1.0611359511266547e-06, "loss": 1.137, "step": 2397 }, { "epoch": 0.010615786444729736, "grad_norm": 2.8381219333493015, "learning_rate": 1.0615786444729736e-06, "loss": 0.7348, "step": 2398 }, { "epoch": 0.010620213378192927, "grad_norm": 3.671885952908909, "learning_rate": 1.0620213378192928e-06, "loss": 1.0619, "step": 2399 }, { "epoch": 0.010624640311656115, "grad_norm": 3.184322187748044, "learning_rate": 1.0624640311656115e-06, "loss": 1.0403, "step": 2400 }, { "epoch": 0.010629067245119306, "grad_norm": 2.940588342347485, "learning_rate": 1.0629067245119307e-06, "loss": 0.9839, "step": 2401 }, { "epoch": 0.010633494178582496, "grad_norm": 3.43690240989107, "learning_rate": 1.0633494178582496e-06, "loss": 0.9448, "step": 2402 }, { "epoch": 0.010637921112045687, "grad_norm": 3.099783385946069, "learning_rate": 1.0637921112045686e-06, "loss": 0.8152, "step": 2403 }, { "epoch": 0.010642348045508876, "grad_norm": 2.734811821323957, "learning_rate": 1.0642348045508877e-06, "loss": 0.6322, "step": 2404 }, { "epoch": 0.010646774978972066, "grad_norm": 2.400203439123118, "learning_rate": 1.0646774978972067e-06, "loss": 0.7599, "step": 2405 }, { "epoch": 0.010651201912435257, "grad_norm": 3.666376298877907, "learning_rate": 1.0651201912435256e-06, "loss": 0.6323, "step": 2406 }, { "epoch": 0.010655628845898445, "grad_norm": 3.264034848448634, "learning_rate": 1.0655628845898448e-06, "loss": 0.8126, "step": 2407 }, { "epoch": 0.010660055779361636, "grad_norm": 3.3881080961810914, "learning_rate": 1.0660055779361637e-06, "loss": 0.9409, "step": 2408 }, { "epoch": 0.010664482712824826, "grad_norm": 3.1749461416576055, "learning_rate": 1.0664482712824827e-06, "loss": 0.9601, "step": 2409 }, { "epoch": 0.010668909646288017, "grad_norm": 2.9061574395917087, "learning_rate": 1.0668909646288016e-06, "loss": 0.8405, "step": 2410 }, { "epoch": 0.010673336579751206, "grad_norm": 3.4923725233598524, "learning_rate": 1.0673336579751208e-06, "loss": 1.0722, "step": 2411 }, { "epoch": 0.010677763513214396, "grad_norm": 3.298226666144393, "learning_rate": 1.0677763513214397e-06, "loss": 0.9252, "step": 2412 }, { "epoch": 0.010682190446677587, "grad_norm": 3.0146791379631965, "learning_rate": 1.0682190446677587e-06, "loss": 1.0895, "step": 2413 }, { "epoch": 0.010686617380140777, "grad_norm": 2.5877135172802563, "learning_rate": 1.0686617380140778e-06, "loss": 0.7888, "step": 2414 }, { "epoch": 0.010691044313603966, "grad_norm": 3.2979747523416454, "learning_rate": 1.0691044313603968e-06, "loss": 0.6462, "step": 2415 }, { "epoch": 0.010695471247067156, "grad_norm": 3.1158450773884327, "learning_rate": 1.0695471247067157e-06, "loss": 0.7688, "step": 2416 }, { "epoch": 0.010699898180530347, "grad_norm": 3.3090157732694303, "learning_rate": 1.0699898180530347e-06, "loss": 0.8924, "step": 2417 }, { "epoch": 0.010704325113993537, "grad_norm": 2.9270957005821403, "learning_rate": 1.0704325113993536e-06, "loss": 0.6337, "step": 2418 }, { "epoch": 0.010708752047456726, "grad_norm": 2.8798577173188358, "learning_rate": 1.0708752047456728e-06, "loss": 0.6063, "step": 2419 }, { "epoch": 0.010713178980919917, "grad_norm": 3.5508535052176806, "learning_rate": 1.0713178980919917e-06, "loss": 0.883, "step": 2420 }, { "epoch": 0.010717605914383107, "grad_norm": 3.152769080052473, "learning_rate": 1.0717605914383107e-06, "loss": 0.665, "step": 2421 }, { "epoch": 0.010722032847846296, "grad_norm": 2.4254707487995173, "learning_rate": 1.0722032847846298e-06, "loss": 0.7142, "step": 2422 }, { "epoch": 0.010726459781309487, "grad_norm": 2.8578758382366134, "learning_rate": 1.0726459781309488e-06, "loss": 0.9773, "step": 2423 }, { "epoch": 0.010730886714772677, "grad_norm": 2.788448473293716, "learning_rate": 1.0730886714772677e-06, "loss": 0.606, "step": 2424 }, { "epoch": 0.010735313648235868, "grad_norm": 2.750303009611163, "learning_rate": 1.0735313648235867e-06, "loss": 0.7211, "step": 2425 }, { "epoch": 0.010739740581699056, "grad_norm": 3.9938285112085294, "learning_rate": 1.0739740581699058e-06, "loss": 1.0108, "step": 2426 }, { "epoch": 0.010744167515162247, "grad_norm": 3.0726975154321448, "learning_rate": 1.0744167515162248e-06, "loss": 0.642, "step": 2427 }, { "epoch": 0.010748594448625437, "grad_norm": 2.833848632686747, "learning_rate": 1.0748594448625437e-06, "loss": 1.0593, "step": 2428 }, { "epoch": 0.010753021382088628, "grad_norm": 3.2354949520265004, "learning_rate": 1.0753021382088629e-06, "loss": 0.764, "step": 2429 }, { "epoch": 0.010757448315551817, "grad_norm": 2.9715634887497786, "learning_rate": 1.0757448315551818e-06, "loss": 1.0945, "step": 2430 }, { "epoch": 0.010761875249015007, "grad_norm": 2.3128432426874594, "learning_rate": 1.0761875249015008e-06, "loss": 0.6858, "step": 2431 }, { "epoch": 0.010766302182478198, "grad_norm": 2.838383927481297, "learning_rate": 1.07663021824782e-06, "loss": 1.0205, "step": 2432 }, { "epoch": 0.010770729115941388, "grad_norm": 3.802236511625961, "learning_rate": 1.0770729115941387e-06, "loss": 1.1907, "step": 2433 }, { "epoch": 0.010775156049404577, "grad_norm": 3.27458335664751, "learning_rate": 1.0775156049404578e-06, "loss": 1.2585, "step": 2434 }, { "epoch": 0.010779582982867767, "grad_norm": 2.691146163441799, "learning_rate": 1.0779582982867768e-06, "loss": 0.5265, "step": 2435 }, { "epoch": 0.010784009916330958, "grad_norm": 2.9622305980844934, "learning_rate": 1.0784009916330957e-06, "loss": 0.759, "step": 2436 }, { "epoch": 0.010788436849794148, "grad_norm": 2.783796176601744, "learning_rate": 1.0788436849794149e-06, "loss": 0.6661, "step": 2437 }, { "epoch": 0.010792863783257337, "grad_norm": 2.7958829658612876, "learning_rate": 1.0792863783257338e-06, "loss": 0.6553, "step": 2438 }, { "epoch": 0.010797290716720528, "grad_norm": 3.058130729131919, "learning_rate": 1.0797290716720528e-06, "loss": 0.9784, "step": 2439 }, { "epoch": 0.010801717650183718, "grad_norm": 3.548055048082815, "learning_rate": 1.080171765018372e-06, "loss": 0.7881, "step": 2440 }, { "epoch": 0.010806144583646907, "grad_norm": 3.1950931463730603, "learning_rate": 1.0806144583646909e-06, "loss": 1.0532, "step": 2441 }, { "epoch": 0.010810571517110098, "grad_norm": 3.0452757126591865, "learning_rate": 1.0810571517110098e-06, "loss": 0.7293, "step": 2442 }, { "epoch": 0.010814998450573288, "grad_norm": 2.631664470512548, "learning_rate": 1.0814998450573288e-06, "loss": 0.9564, "step": 2443 }, { "epoch": 0.010819425384036479, "grad_norm": 3.0014160014889977, "learning_rate": 1.081942538403648e-06, "loss": 0.9977, "step": 2444 }, { "epoch": 0.010823852317499667, "grad_norm": 3.5698390504458795, "learning_rate": 1.0823852317499669e-06, "loss": 1.0145, "step": 2445 }, { "epoch": 0.010828279250962858, "grad_norm": 2.8849866271847326, "learning_rate": 1.0828279250962858e-06, "loss": 0.6491, "step": 2446 }, { "epoch": 0.010832706184426048, "grad_norm": 3.975404582039323, "learning_rate": 1.083270618442605e-06, "loss": 1.2564, "step": 2447 }, { "epoch": 0.010837133117889239, "grad_norm": 2.903334855624367, "learning_rate": 1.0837133117889237e-06, "loss": 0.9327, "step": 2448 }, { "epoch": 0.010841560051352428, "grad_norm": 3.068130199991701, "learning_rate": 1.0841560051352429e-06, "loss": 0.3959, "step": 2449 }, { "epoch": 0.010845986984815618, "grad_norm": 4.088946213812644, "learning_rate": 1.0845986984815618e-06, "loss": 1.4469, "step": 2450 }, { "epoch": 0.010850413918278809, "grad_norm": 3.379854729921501, "learning_rate": 1.0850413918278808e-06, "loss": 0.7933, "step": 2451 }, { "epoch": 0.010854840851741999, "grad_norm": 2.7699584615003543, "learning_rate": 1.0854840851742e-06, "loss": 0.7983, "step": 2452 }, { "epoch": 0.010859267785205188, "grad_norm": 2.7771171640991104, "learning_rate": 1.0859267785205189e-06, "loss": 0.6319, "step": 2453 }, { "epoch": 0.010863694718668378, "grad_norm": 3.4264313909082795, "learning_rate": 1.086369471866838e-06, "loss": 1.1761, "step": 2454 }, { "epoch": 0.010868121652131569, "grad_norm": 3.2507191261004, "learning_rate": 1.086812165213157e-06, "loss": 1.0301, "step": 2455 }, { "epoch": 0.010872548585594758, "grad_norm": 3.830186660686721, "learning_rate": 1.087254858559476e-06, "loss": 0.849, "step": 2456 }, { "epoch": 0.010876975519057948, "grad_norm": 3.3995189642508405, "learning_rate": 1.0876975519057951e-06, "loss": 1.3033, "step": 2457 }, { "epoch": 0.010881402452521139, "grad_norm": 3.409842230731174, "learning_rate": 1.0881402452521138e-06, "loss": 0.9778, "step": 2458 }, { "epoch": 0.01088582938598433, "grad_norm": 2.615180677014017, "learning_rate": 1.088582938598433e-06, "loss": 0.9952, "step": 2459 }, { "epoch": 0.010890256319447518, "grad_norm": 3.236061457063261, "learning_rate": 1.089025631944752e-06, "loss": 0.818, "step": 2460 }, { "epoch": 0.010894683252910708, "grad_norm": 4.230898638092313, "learning_rate": 1.089468325291071e-06, "loss": 1.1936, "step": 2461 }, { "epoch": 0.010899110186373899, "grad_norm": 4.238939976916404, "learning_rate": 1.08991101863739e-06, "loss": 1.4348, "step": 2462 }, { "epoch": 0.01090353711983709, "grad_norm": 2.6525626779759715, "learning_rate": 1.090353711983709e-06, "loss": 0.6633, "step": 2463 }, { "epoch": 0.010907964053300278, "grad_norm": 3.070581335520692, "learning_rate": 1.090796405330028e-06, "loss": 0.6392, "step": 2464 }, { "epoch": 0.010912390986763469, "grad_norm": 3.274647238991075, "learning_rate": 1.0912390986763471e-06, "loss": 0.7933, "step": 2465 }, { "epoch": 0.01091681792022666, "grad_norm": 2.836668253040769, "learning_rate": 1.091681792022666e-06, "loss": 0.9487, "step": 2466 }, { "epoch": 0.01092124485368985, "grad_norm": 3.755724702296166, "learning_rate": 1.092124485368985e-06, "loss": 0.6249, "step": 2467 }, { "epoch": 0.010925671787153039, "grad_norm": 3.2124269505867806, "learning_rate": 1.092567178715304e-06, "loss": 1.0938, "step": 2468 }, { "epoch": 0.010930098720616229, "grad_norm": 4.068558717339183, "learning_rate": 1.0930098720616231e-06, "loss": 0.937, "step": 2469 }, { "epoch": 0.01093452565407942, "grad_norm": 3.576711559837061, "learning_rate": 1.093452565407942e-06, "loss": 1.212, "step": 2470 }, { "epoch": 0.01093895258754261, "grad_norm": 4.517706830732908, "learning_rate": 1.093895258754261e-06, "loss": 1.496, "step": 2471 }, { "epoch": 0.010943379521005799, "grad_norm": 3.124293590382895, "learning_rate": 1.0943379521005802e-06, "loss": 0.7204, "step": 2472 }, { "epoch": 0.01094780645446899, "grad_norm": 3.49274439373566, "learning_rate": 1.094780645446899e-06, "loss": 1.153, "step": 2473 }, { "epoch": 0.01095223338793218, "grad_norm": 3.98649059372852, "learning_rate": 1.095223338793218e-06, "loss": 0.9513, "step": 2474 }, { "epoch": 0.010956660321395369, "grad_norm": 4.230589561116278, "learning_rate": 1.095666032139537e-06, "loss": 1.7181, "step": 2475 }, { "epoch": 0.01096108725485856, "grad_norm": 3.1982255311467447, "learning_rate": 1.096108725485856e-06, "loss": 1.0747, "step": 2476 }, { "epoch": 0.01096551418832175, "grad_norm": 2.9854982946839845, "learning_rate": 1.0965514188321751e-06, "loss": 1.008, "step": 2477 }, { "epoch": 0.01096994112178494, "grad_norm": 2.66011954110885, "learning_rate": 1.096994112178494e-06, "loss": 0.6337, "step": 2478 }, { "epoch": 0.010974368055248129, "grad_norm": 4.2596802233751285, "learning_rate": 1.097436805524813e-06, "loss": 1.0243, "step": 2479 }, { "epoch": 0.01097879498871132, "grad_norm": 2.554042370065565, "learning_rate": 1.0978794988711322e-06, "loss": 0.7419, "step": 2480 }, { "epoch": 0.01098322192217451, "grad_norm": 3.086405970294785, "learning_rate": 1.0983221922174511e-06, "loss": 1.0512, "step": 2481 }, { "epoch": 0.0109876488556377, "grad_norm": 2.755380458765054, "learning_rate": 1.09876488556377e-06, "loss": 0.9227, "step": 2482 }, { "epoch": 0.01099207578910089, "grad_norm": 2.809434434499035, "learning_rate": 1.099207578910089e-06, "loss": 0.6683, "step": 2483 }, { "epoch": 0.01099650272256408, "grad_norm": 2.68897389084783, "learning_rate": 1.0996502722564082e-06, "loss": 0.6181, "step": 2484 }, { "epoch": 0.01100092965602727, "grad_norm": 2.559400572782516, "learning_rate": 1.1000929656027271e-06, "loss": 0.749, "step": 2485 }, { "epoch": 0.01100535658949046, "grad_norm": 3.209217616740189, "learning_rate": 1.100535658949046e-06, "loss": 0.9555, "step": 2486 }, { "epoch": 0.01100978352295365, "grad_norm": 3.1796100653810058, "learning_rate": 1.1009783522953652e-06, "loss": 1.0983, "step": 2487 }, { "epoch": 0.01101421045641684, "grad_norm": 3.2336251446664015, "learning_rate": 1.1014210456416842e-06, "loss": 0.7238, "step": 2488 }, { "epoch": 0.01101863738988003, "grad_norm": 4.016637918375035, "learning_rate": 1.1018637389880031e-06, "loss": 1.1702, "step": 2489 }, { "epoch": 0.01102306432334322, "grad_norm": 2.8337664033776755, "learning_rate": 1.1023064323343223e-06, "loss": 0.781, "step": 2490 }, { "epoch": 0.01102749125680641, "grad_norm": 2.5703797077834136, "learning_rate": 1.102749125680641e-06, "loss": 0.7906, "step": 2491 }, { "epoch": 0.0110319181902696, "grad_norm": 3.3049785621709917, "learning_rate": 1.1031918190269602e-06, "loss": 0.6139, "step": 2492 }, { "epoch": 0.01103634512373279, "grad_norm": 3.6199132333367157, "learning_rate": 1.1036345123732791e-06, "loss": 0.8075, "step": 2493 }, { "epoch": 0.01104077205719598, "grad_norm": 3.36299850896258, "learning_rate": 1.104077205719598e-06, "loss": 0.8511, "step": 2494 }, { "epoch": 0.01104519899065917, "grad_norm": 2.5568359609528235, "learning_rate": 1.1045198990659172e-06, "loss": 1.0148, "step": 2495 }, { "epoch": 0.01104962592412236, "grad_norm": 3.195376219774744, "learning_rate": 1.1049625924122362e-06, "loss": 0.9812, "step": 2496 }, { "epoch": 0.011054052857585551, "grad_norm": 3.6853983086865387, "learning_rate": 1.1054052857585551e-06, "loss": 1.1574, "step": 2497 }, { "epoch": 0.01105847979104874, "grad_norm": 3.200702927696722, "learning_rate": 1.105847979104874e-06, "loss": 0.9771, "step": 2498 }, { "epoch": 0.01106290672451193, "grad_norm": 2.8704571027798087, "learning_rate": 1.1062906724511932e-06, "loss": 0.804, "step": 2499 }, { "epoch": 0.011067333657975121, "grad_norm": 3.550569115573594, "learning_rate": 1.1067333657975122e-06, "loss": 0.4779, "step": 2500 }, { "epoch": 0.011071760591438311, "grad_norm": 3.0210165128039908, "learning_rate": 1.1071760591438311e-06, "loss": 0.5502, "step": 2501 }, { "epoch": 0.0110761875249015, "grad_norm": 3.12912646137703, "learning_rate": 1.1076187524901503e-06, "loss": 0.834, "step": 2502 }, { "epoch": 0.01108061445836469, "grad_norm": 2.8986026166334544, "learning_rate": 1.1080614458364692e-06, "loss": 0.7263, "step": 2503 }, { "epoch": 0.011085041391827881, "grad_norm": 3.6462929008507143, "learning_rate": 1.1085041391827882e-06, "loss": 1.1661, "step": 2504 }, { "epoch": 0.01108946832529107, "grad_norm": 3.7291679524763772, "learning_rate": 1.1089468325291073e-06, "loss": 1.4055, "step": 2505 }, { "epoch": 0.01109389525875426, "grad_norm": 3.1941663043774566, "learning_rate": 1.109389525875426e-06, "loss": 1.3019, "step": 2506 }, { "epoch": 0.011098322192217451, "grad_norm": 2.5607904795726504, "learning_rate": 1.1098322192217452e-06, "loss": 0.7805, "step": 2507 }, { "epoch": 0.011102749125680642, "grad_norm": 2.5498201925054995, "learning_rate": 1.1102749125680642e-06, "loss": 0.6601, "step": 2508 }, { "epoch": 0.01110717605914383, "grad_norm": 2.7429610125122177, "learning_rate": 1.1107176059143831e-06, "loss": 0.7507, "step": 2509 }, { "epoch": 0.01111160299260702, "grad_norm": 3.4200450082971865, "learning_rate": 1.1111602992607023e-06, "loss": 0.9873, "step": 2510 }, { "epoch": 0.011116029926070211, "grad_norm": 3.5661791802023273, "learning_rate": 1.1116029926070212e-06, "loss": 0.6006, "step": 2511 }, { "epoch": 0.011120456859533402, "grad_norm": 2.806478914598661, "learning_rate": 1.1120456859533402e-06, "loss": 0.9292, "step": 2512 }, { "epoch": 0.01112488379299659, "grad_norm": 2.806588900069604, "learning_rate": 1.1124883792996593e-06, "loss": 0.7126, "step": 2513 }, { "epoch": 0.011129310726459781, "grad_norm": 3.2806554014160847, "learning_rate": 1.1129310726459783e-06, "loss": 0.9383, "step": 2514 }, { "epoch": 0.011133737659922972, "grad_norm": 3.3820599927260213, "learning_rate": 1.1133737659922972e-06, "loss": 1.053, "step": 2515 }, { "epoch": 0.011138164593386162, "grad_norm": 3.0959608654773336, "learning_rate": 1.1138164593386162e-06, "loss": 0.8486, "step": 2516 }, { "epoch": 0.011142591526849351, "grad_norm": 2.6419758634793897, "learning_rate": 1.1142591526849353e-06, "loss": 0.8312, "step": 2517 }, { "epoch": 0.011147018460312541, "grad_norm": 2.8022327170508996, "learning_rate": 1.1147018460312543e-06, "loss": 0.9327, "step": 2518 }, { "epoch": 0.011151445393775732, "grad_norm": 3.1562493740449415, "learning_rate": 1.1151445393775732e-06, "loss": 0.8449, "step": 2519 }, { "epoch": 0.011155872327238922, "grad_norm": 3.279423146936862, "learning_rate": 1.1155872327238924e-06, "loss": 0.7762, "step": 2520 }, { "epoch": 0.011160299260702111, "grad_norm": 3.9348627887080183, "learning_rate": 1.1160299260702111e-06, "loss": 0.9789, "step": 2521 }, { "epoch": 0.011164726194165302, "grad_norm": 2.8534755506443075, "learning_rate": 1.1164726194165303e-06, "loss": 0.7535, "step": 2522 }, { "epoch": 0.011169153127628492, "grad_norm": 2.4362720219190317, "learning_rate": 1.1169153127628492e-06, "loss": 0.4765, "step": 2523 }, { "epoch": 0.011173580061091681, "grad_norm": 2.9640499419245483, "learning_rate": 1.1173580061091682e-06, "loss": 0.9434, "step": 2524 }, { "epoch": 0.011178006994554871, "grad_norm": 2.661979133500243, "learning_rate": 1.1178006994554873e-06, "loss": 0.9472, "step": 2525 }, { "epoch": 0.011182433928018062, "grad_norm": 2.7641217422653335, "learning_rate": 1.1182433928018063e-06, "loss": 1.0668, "step": 2526 }, { "epoch": 0.011186860861481253, "grad_norm": 2.7686310199451767, "learning_rate": 1.1186860861481252e-06, "loss": 0.7488, "step": 2527 }, { "epoch": 0.011191287794944441, "grad_norm": 2.8622812607597723, "learning_rate": 1.1191287794944444e-06, "loss": 0.8428, "step": 2528 }, { "epoch": 0.011195714728407632, "grad_norm": 3.0995877358781376, "learning_rate": 1.1195714728407633e-06, "loss": 0.739, "step": 2529 }, { "epoch": 0.011200141661870822, "grad_norm": 2.799053374421053, "learning_rate": 1.1200141661870823e-06, "loss": 0.5376, "step": 2530 }, { "epoch": 0.011204568595334013, "grad_norm": 2.9216238647650106, "learning_rate": 1.1204568595334012e-06, "loss": 0.9079, "step": 2531 }, { "epoch": 0.011208995528797202, "grad_norm": 2.696979281436951, "learning_rate": 1.1208995528797204e-06, "loss": 0.8058, "step": 2532 }, { "epoch": 0.011213422462260392, "grad_norm": 2.470646593919911, "learning_rate": 1.1213422462260393e-06, "loss": 0.7914, "step": 2533 }, { "epoch": 0.011217849395723583, "grad_norm": 3.525466256734503, "learning_rate": 1.1217849395723583e-06, "loss": 0.8878, "step": 2534 }, { "epoch": 0.011222276329186773, "grad_norm": 3.239318622549618, "learning_rate": 1.1222276329186774e-06, "loss": 0.8518, "step": 2535 }, { "epoch": 0.011226703262649962, "grad_norm": 3.5039274926236637, "learning_rate": 1.1226703262649964e-06, "loss": 1.0207, "step": 2536 }, { "epoch": 0.011231130196113152, "grad_norm": 2.753604571645729, "learning_rate": 1.1231130196113153e-06, "loss": 0.9429, "step": 2537 }, { "epoch": 0.011235557129576343, "grad_norm": 3.8275751847023365, "learning_rate": 1.1235557129576345e-06, "loss": 1.2983, "step": 2538 }, { "epoch": 0.011239984063039532, "grad_norm": 2.664053143530025, "learning_rate": 1.1239984063039532e-06, "loss": 0.9703, "step": 2539 }, { "epoch": 0.011244410996502722, "grad_norm": 3.9434597395728526, "learning_rate": 1.1244410996502724e-06, "loss": 1.0854, "step": 2540 }, { "epoch": 0.011248837929965913, "grad_norm": 4.252397972941446, "learning_rate": 1.1248837929965913e-06, "loss": 1.3229, "step": 2541 }, { "epoch": 0.011253264863429103, "grad_norm": 2.3324444332392535, "learning_rate": 1.1253264863429103e-06, "loss": 0.4834, "step": 2542 }, { "epoch": 0.011257691796892292, "grad_norm": 3.092341877234809, "learning_rate": 1.1257691796892294e-06, "loss": 0.9089, "step": 2543 }, { "epoch": 0.011262118730355482, "grad_norm": 3.0715329552929527, "learning_rate": 1.1262118730355484e-06, "loss": 0.8058, "step": 2544 }, { "epoch": 0.011266545663818673, "grad_norm": 2.777314858935063, "learning_rate": 1.1266545663818673e-06, "loss": 0.7287, "step": 2545 }, { "epoch": 0.011270972597281863, "grad_norm": 3.118241016979928, "learning_rate": 1.1270972597281863e-06, "loss": 0.6308, "step": 2546 }, { "epoch": 0.011275399530745052, "grad_norm": 2.9307549105833246, "learning_rate": 1.1275399530745054e-06, "loss": 0.7353, "step": 2547 }, { "epoch": 0.011279826464208243, "grad_norm": 2.963029943917832, "learning_rate": 1.1279826464208244e-06, "loss": 0.8593, "step": 2548 }, { "epoch": 0.011284253397671433, "grad_norm": 2.614473496180673, "learning_rate": 1.1284253397671433e-06, "loss": 0.7631, "step": 2549 }, { "epoch": 0.011288680331134624, "grad_norm": 3.349030129137406, "learning_rate": 1.1288680331134625e-06, "loss": 0.8207, "step": 2550 }, { "epoch": 0.011293107264597813, "grad_norm": 3.553653121678765, "learning_rate": 1.1293107264597814e-06, "loss": 1.3072, "step": 2551 }, { "epoch": 0.011297534198061003, "grad_norm": 2.767539128373942, "learning_rate": 1.1297534198061004e-06, "loss": 0.6245, "step": 2552 }, { "epoch": 0.011301961131524194, "grad_norm": 3.1447006341827644, "learning_rate": 1.1301961131524195e-06, "loss": 0.9105, "step": 2553 }, { "epoch": 0.011306388064987384, "grad_norm": 3.8395896326579493, "learning_rate": 1.1306388064987383e-06, "loss": 1.1371, "step": 2554 }, { "epoch": 0.011310814998450573, "grad_norm": 3.111706191495869, "learning_rate": 1.1310814998450574e-06, "loss": 1.1477, "step": 2555 }, { "epoch": 0.011315241931913763, "grad_norm": 3.164272381185462, "learning_rate": 1.1315241931913764e-06, "loss": 1.0469, "step": 2556 }, { "epoch": 0.011319668865376954, "grad_norm": 4.094897084155687, "learning_rate": 1.1319668865376953e-06, "loss": 1.0676, "step": 2557 }, { "epoch": 0.011324095798840143, "grad_norm": 2.903167575868744, "learning_rate": 1.1324095798840145e-06, "loss": 0.8143, "step": 2558 }, { "epoch": 0.011328522732303333, "grad_norm": 2.79243942567771, "learning_rate": 1.1328522732303334e-06, "loss": 0.9427, "step": 2559 }, { "epoch": 0.011332949665766524, "grad_norm": 2.314637710803924, "learning_rate": 1.1332949665766524e-06, "loss": 0.6681, "step": 2560 }, { "epoch": 0.011337376599229714, "grad_norm": 3.098311399144544, "learning_rate": 1.1337376599229715e-06, "loss": 0.4718, "step": 2561 }, { "epoch": 0.011341803532692903, "grad_norm": 3.647998710134452, "learning_rate": 1.1341803532692905e-06, "loss": 1.0271, "step": 2562 }, { "epoch": 0.011346230466156093, "grad_norm": 3.01677146500326, "learning_rate": 1.1346230466156094e-06, "loss": 1.1437, "step": 2563 }, { "epoch": 0.011350657399619284, "grad_norm": 3.2352517241943075, "learning_rate": 1.1350657399619284e-06, "loss": 0.8028, "step": 2564 }, { "epoch": 0.011355084333082474, "grad_norm": 3.3630545765869893, "learning_rate": 1.1355084333082475e-06, "loss": 0.845, "step": 2565 }, { "epoch": 0.011359511266545663, "grad_norm": 3.388771999068445, "learning_rate": 1.1359511266545665e-06, "loss": 0.9907, "step": 2566 }, { "epoch": 0.011363938200008854, "grad_norm": 2.4115336423588354, "learning_rate": 1.1363938200008854e-06, "loss": 0.6869, "step": 2567 }, { "epoch": 0.011368365133472044, "grad_norm": 3.0070533912237805, "learning_rate": 1.1368365133472046e-06, "loss": 0.8149, "step": 2568 }, { "epoch": 0.011372792066935235, "grad_norm": 2.806702855588543, "learning_rate": 1.1372792066935235e-06, "loss": 0.7287, "step": 2569 }, { "epoch": 0.011377219000398424, "grad_norm": 2.7126150711688664, "learning_rate": 1.1377219000398425e-06, "loss": 0.8574, "step": 2570 }, { "epoch": 0.011381645933861614, "grad_norm": 3.800929079969482, "learning_rate": 1.1381645933861614e-06, "loss": 1.4239, "step": 2571 }, { "epoch": 0.011386072867324805, "grad_norm": 3.135329832028598, "learning_rate": 1.1386072867324804e-06, "loss": 0.9835, "step": 2572 }, { "epoch": 0.011390499800787993, "grad_norm": 4.076069974286361, "learning_rate": 1.1390499800787995e-06, "loss": 1.1501, "step": 2573 }, { "epoch": 0.011394926734251184, "grad_norm": 2.833130459301326, "learning_rate": 1.1394926734251185e-06, "loss": 0.8495, "step": 2574 }, { "epoch": 0.011399353667714374, "grad_norm": 2.667065004176415, "learning_rate": 1.1399353667714374e-06, "loss": 0.7806, "step": 2575 }, { "epoch": 0.011403780601177565, "grad_norm": 2.782157435329478, "learning_rate": 1.1403780601177566e-06, "loss": 0.9249, "step": 2576 }, { "epoch": 0.011408207534640754, "grad_norm": 3.120602569900275, "learning_rate": 1.1408207534640755e-06, "loss": 0.9348, "step": 2577 }, { "epoch": 0.011412634468103944, "grad_norm": 3.2914012274652076, "learning_rate": 1.1412634468103945e-06, "loss": 0.9737, "step": 2578 }, { "epoch": 0.011417061401567135, "grad_norm": 3.1994495755835035, "learning_rate": 1.1417061401567134e-06, "loss": 0.9234, "step": 2579 }, { "epoch": 0.011421488335030325, "grad_norm": 3.224529870018916, "learning_rate": 1.1421488335030326e-06, "loss": 0.6249, "step": 2580 }, { "epoch": 0.011425915268493514, "grad_norm": 3.206562004070733, "learning_rate": 1.1425915268493515e-06, "loss": 0.5831, "step": 2581 }, { "epoch": 0.011430342201956704, "grad_norm": 3.1097696404611885, "learning_rate": 1.1430342201956705e-06, "loss": 1.0526, "step": 2582 }, { "epoch": 0.011434769135419895, "grad_norm": 2.4686380696103445, "learning_rate": 1.1434769135419896e-06, "loss": 0.609, "step": 2583 }, { "epoch": 0.011439196068883085, "grad_norm": 2.7890463842154958, "learning_rate": 1.1439196068883086e-06, "loss": 0.7551, "step": 2584 }, { "epoch": 0.011443623002346274, "grad_norm": 2.7842903885371806, "learning_rate": 1.1443623002346275e-06, "loss": 0.722, "step": 2585 }, { "epoch": 0.011448049935809465, "grad_norm": 2.955545785650399, "learning_rate": 1.1448049935809467e-06, "loss": 0.8841, "step": 2586 }, { "epoch": 0.011452476869272655, "grad_norm": 2.722399254542458, "learning_rate": 1.1452476869272654e-06, "loss": 0.8222, "step": 2587 }, { "epoch": 0.011456903802735846, "grad_norm": 3.070717156308178, "learning_rate": 1.1456903802735846e-06, "loss": 0.9037, "step": 2588 }, { "epoch": 0.011461330736199035, "grad_norm": 2.812505585763221, "learning_rate": 1.1461330736199035e-06, "loss": 0.8513, "step": 2589 }, { "epoch": 0.011465757669662225, "grad_norm": 2.871027040461364, "learning_rate": 1.1465757669662225e-06, "loss": 0.9737, "step": 2590 }, { "epoch": 0.011470184603125416, "grad_norm": 3.2555809413827097, "learning_rate": 1.1470184603125416e-06, "loss": 0.936, "step": 2591 }, { "epoch": 0.011474611536588604, "grad_norm": 3.330059213398681, "learning_rate": 1.1474611536588606e-06, "loss": 0.9728, "step": 2592 }, { "epoch": 0.011479038470051795, "grad_norm": 2.6437460764186707, "learning_rate": 1.1479038470051795e-06, "loss": 0.5806, "step": 2593 }, { "epoch": 0.011483465403514985, "grad_norm": 2.8752496381734582, "learning_rate": 1.1483465403514987e-06, "loss": 1.0192, "step": 2594 }, { "epoch": 0.011487892336978176, "grad_norm": 2.9795236888438397, "learning_rate": 1.1487892336978176e-06, "loss": 0.9654, "step": 2595 }, { "epoch": 0.011492319270441365, "grad_norm": 3.678364365024046, "learning_rate": 1.1492319270441366e-06, "loss": 0.8358, "step": 2596 }, { "epoch": 0.011496746203904555, "grad_norm": 3.5942546317343913, "learning_rate": 1.1496746203904555e-06, "loss": 0.5581, "step": 2597 }, { "epoch": 0.011501173137367746, "grad_norm": 4.073608523504294, "learning_rate": 1.1501173137367747e-06, "loss": 1.0557, "step": 2598 }, { "epoch": 0.011505600070830936, "grad_norm": 3.1136726059834965, "learning_rate": 1.1505600070830936e-06, "loss": 0.8916, "step": 2599 }, { "epoch": 0.011510027004294125, "grad_norm": 3.3218478844879886, "learning_rate": 1.1510027004294126e-06, "loss": 1.0234, "step": 2600 }, { "epoch": 0.011514453937757315, "grad_norm": 3.0808800627587902, "learning_rate": 1.1514453937757317e-06, "loss": 0.826, "step": 2601 }, { "epoch": 0.011518880871220506, "grad_norm": 2.68409157420508, "learning_rate": 1.1518880871220507e-06, "loss": 0.6613, "step": 2602 }, { "epoch": 0.011523307804683696, "grad_norm": 3.4984182221983358, "learning_rate": 1.1523307804683696e-06, "loss": 1.1333, "step": 2603 }, { "epoch": 0.011527734738146885, "grad_norm": 3.397294079222816, "learning_rate": 1.1527734738146886e-06, "loss": 0.6986, "step": 2604 }, { "epoch": 0.011532161671610076, "grad_norm": 2.7067603480658646, "learning_rate": 1.1532161671610077e-06, "loss": 0.6767, "step": 2605 }, { "epoch": 0.011536588605073266, "grad_norm": 2.8848089106492303, "learning_rate": 1.1536588605073267e-06, "loss": 0.6275, "step": 2606 }, { "epoch": 0.011541015538536455, "grad_norm": 3.6096178873501783, "learning_rate": 1.1541015538536456e-06, "loss": 1.2036, "step": 2607 }, { "epoch": 0.011545442471999645, "grad_norm": 2.6146797048114836, "learning_rate": 1.1545442471999648e-06, "loss": 1.0225, "step": 2608 }, { "epoch": 0.011549869405462836, "grad_norm": 3.0457369728091903, "learning_rate": 1.1549869405462837e-06, "loss": 0.926, "step": 2609 }, { "epoch": 0.011554296338926026, "grad_norm": 3.104051069435863, "learning_rate": 1.1554296338926027e-06, "loss": 0.8873, "step": 2610 }, { "epoch": 0.011558723272389215, "grad_norm": 3.1416395096876113, "learning_rate": 1.1558723272389219e-06, "loss": 1.0695, "step": 2611 }, { "epoch": 0.011563150205852406, "grad_norm": 2.7887040112194774, "learning_rate": 1.1563150205852406e-06, "loss": 0.704, "step": 2612 }, { "epoch": 0.011567577139315596, "grad_norm": 2.8502203329293967, "learning_rate": 1.1567577139315597e-06, "loss": 1.0597, "step": 2613 }, { "epoch": 0.011572004072778787, "grad_norm": 2.7185747913345413, "learning_rate": 1.1572004072778787e-06, "loss": 0.7432, "step": 2614 }, { "epoch": 0.011576431006241976, "grad_norm": 2.7497198200911943, "learning_rate": 1.1576431006241976e-06, "loss": 0.855, "step": 2615 }, { "epoch": 0.011580857939705166, "grad_norm": 3.0228818565827447, "learning_rate": 1.1580857939705168e-06, "loss": 0.9016, "step": 2616 }, { "epoch": 0.011585284873168357, "grad_norm": 2.495510350984324, "learning_rate": 1.1585284873168357e-06, "loss": 0.8193, "step": 2617 }, { "epoch": 0.011589711806631547, "grad_norm": 3.0592842004773266, "learning_rate": 1.1589711806631547e-06, "loss": 1.0299, "step": 2618 }, { "epoch": 0.011594138740094736, "grad_norm": 2.637526085154585, "learning_rate": 1.1594138740094739e-06, "loss": 0.9329, "step": 2619 }, { "epoch": 0.011598565673557926, "grad_norm": 2.8527715831589964, "learning_rate": 1.1598565673557928e-06, "loss": 0.8431, "step": 2620 }, { "epoch": 0.011602992607021117, "grad_norm": 4.36248976183118, "learning_rate": 1.1602992607021117e-06, "loss": 1.1657, "step": 2621 }, { "epoch": 0.011607419540484307, "grad_norm": 3.6964952422832202, "learning_rate": 1.1607419540484307e-06, "loss": 0.9369, "step": 2622 }, { "epoch": 0.011611846473947496, "grad_norm": 3.057574114537713, "learning_rate": 1.1611846473947499e-06, "loss": 1.0643, "step": 2623 }, { "epoch": 0.011616273407410687, "grad_norm": 3.084607419765643, "learning_rate": 1.1616273407410688e-06, "loss": 0.7271, "step": 2624 }, { "epoch": 0.011620700340873877, "grad_norm": 3.5054496459822575, "learning_rate": 1.1620700340873877e-06, "loss": 1.0661, "step": 2625 }, { "epoch": 0.011625127274337066, "grad_norm": 3.731390170685083, "learning_rate": 1.162512727433707e-06, "loss": 0.7198, "step": 2626 }, { "epoch": 0.011629554207800256, "grad_norm": 3.0626584985309626, "learning_rate": 1.1629554207800256e-06, "loss": 1.0595, "step": 2627 }, { "epoch": 0.011633981141263447, "grad_norm": 3.121872218327993, "learning_rate": 1.1633981141263448e-06, "loss": 0.959, "step": 2628 }, { "epoch": 0.011638408074726637, "grad_norm": 2.9509054651023154, "learning_rate": 1.1638408074726637e-06, "loss": 0.9983, "step": 2629 }, { "epoch": 0.011642835008189826, "grad_norm": 3.343119864884684, "learning_rate": 1.1642835008189827e-06, "loss": 1.151, "step": 2630 }, { "epoch": 0.011647261941653017, "grad_norm": 2.6050927145274643, "learning_rate": 1.1647261941653019e-06, "loss": 0.6532, "step": 2631 }, { "epoch": 0.011651688875116207, "grad_norm": 3.346437523072107, "learning_rate": 1.1651688875116208e-06, "loss": 0.7093, "step": 2632 }, { "epoch": 0.011656115808579398, "grad_norm": 2.9672250125996023, "learning_rate": 1.1656115808579397e-06, "loss": 0.8893, "step": 2633 }, { "epoch": 0.011660542742042587, "grad_norm": 3.0581933867642324, "learning_rate": 1.166054274204259e-06, "loss": 1.1624, "step": 2634 }, { "epoch": 0.011664969675505777, "grad_norm": 2.9747969494526627, "learning_rate": 1.1664969675505779e-06, "loss": 0.8228, "step": 2635 }, { "epoch": 0.011669396608968968, "grad_norm": 2.7553656663519037, "learning_rate": 1.1669396608968968e-06, "loss": 0.7355, "step": 2636 }, { "epoch": 0.011673823542432158, "grad_norm": 2.775983350257866, "learning_rate": 1.1673823542432157e-06, "loss": 0.7308, "step": 2637 }, { "epoch": 0.011678250475895347, "grad_norm": 2.7733688629230366, "learning_rate": 1.167825047589535e-06, "loss": 0.6621, "step": 2638 }, { "epoch": 0.011682677409358537, "grad_norm": 3.1562283084151925, "learning_rate": 1.1682677409358539e-06, "loss": 0.6129, "step": 2639 }, { "epoch": 0.011687104342821728, "grad_norm": 2.943711635426941, "learning_rate": 1.1687104342821728e-06, "loss": 0.8099, "step": 2640 }, { "epoch": 0.011691531276284917, "grad_norm": 2.852712520772589, "learning_rate": 1.169153127628492e-06, "loss": 0.778, "step": 2641 }, { "epoch": 0.011695958209748107, "grad_norm": 2.4061680930159115, "learning_rate": 1.169595820974811e-06, "loss": 0.7107, "step": 2642 }, { "epoch": 0.011700385143211298, "grad_norm": 2.5070852780892414, "learning_rate": 1.1700385143211299e-06, "loss": 0.6959, "step": 2643 }, { "epoch": 0.011704812076674488, "grad_norm": 3.4334792263609133, "learning_rate": 1.170481207667449e-06, "loss": 0.5503, "step": 2644 }, { "epoch": 0.011709239010137677, "grad_norm": 2.8544026121697335, "learning_rate": 1.1709239010137677e-06, "loss": 0.6552, "step": 2645 }, { "epoch": 0.011713665943600867, "grad_norm": 2.8795302238540033, "learning_rate": 1.171366594360087e-06, "loss": 0.8713, "step": 2646 }, { "epoch": 0.011718092877064058, "grad_norm": 3.24203609285132, "learning_rate": 1.1718092877064059e-06, "loss": 1.0361, "step": 2647 }, { "epoch": 0.011722519810527248, "grad_norm": 2.5758048695152347, "learning_rate": 1.1722519810527248e-06, "loss": 0.5063, "step": 2648 }, { "epoch": 0.011726946743990437, "grad_norm": 2.643977647442471, "learning_rate": 1.172694674399044e-06, "loss": 0.7345, "step": 2649 }, { "epoch": 0.011731373677453628, "grad_norm": 5.543453894965821, "learning_rate": 1.173137367745363e-06, "loss": 1.5134, "step": 2650 }, { "epoch": 0.011735800610916818, "grad_norm": 3.1948856960985905, "learning_rate": 1.1735800610916819e-06, "loss": 1.0136, "step": 2651 }, { "epoch": 0.011740227544380009, "grad_norm": 2.602984752877093, "learning_rate": 1.1740227544380008e-06, "loss": 0.5704, "step": 2652 }, { "epoch": 0.011744654477843198, "grad_norm": 3.300890407239163, "learning_rate": 1.17446544778432e-06, "loss": 0.8189, "step": 2653 }, { "epoch": 0.011749081411306388, "grad_norm": 3.429949416906391, "learning_rate": 1.174908141130639e-06, "loss": 0.9562, "step": 2654 }, { "epoch": 0.011753508344769579, "grad_norm": 3.1637973172398612, "learning_rate": 1.1753508344769579e-06, "loss": 0.9048, "step": 2655 }, { "epoch": 0.011757935278232767, "grad_norm": 2.9375999036926594, "learning_rate": 1.175793527823277e-06, "loss": 0.9928, "step": 2656 }, { "epoch": 0.011762362211695958, "grad_norm": 4.831789494490342, "learning_rate": 1.176236221169596e-06, "loss": 1.7076, "step": 2657 }, { "epoch": 0.011766789145159148, "grad_norm": 2.6488181050087176, "learning_rate": 1.176678914515915e-06, "loss": 0.6166, "step": 2658 }, { "epoch": 0.011771216078622339, "grad_norm": 3.3880698773490328, "learning_rate": 1.177121607862234e-06, "loss": 0.5193, "step": 2659 }, { "epoch": 0.011775643012085528, "grad_norm": 2.4980154707545146, "learning_rate": 1.1775643012085528e-06, "loss": 0.5958, "step": 2660 }, { "epoch": 0.011780069945548718, "grad_norm": 2.8803601002093853, "learning_rate": 1.178006994554872e-06, "loss": 0.6713, "step": 2661 }, { "epoch": 0.011784496879011909, "grad_norm": 3.3093647352534643, "learning_rate": 1.178449687901191e-06, "loss": 0.9513, "step": 2662 }, { "epoch": 0.011788923812475099, "grad_norm": 3.7217447693246735, "learning_rate": 1.1788923812475099e-06, "loss": 0.998, "step": 2663 }, { "epoch": 0.011793350745938288, "grad_norm": 3.1448244609154754, "learning_rate": 1.179335074593829e-06, "loss": 1.0225, "step": 2664 }, { "epoch": 0.011797777679401478, "grad_norm": 4.005037238463537, "learning_rate": 1.179777767940148e-06, "loss": 1.3435, "step": 2665 }, { "epoch": 0.011802204612864669, "grad_norm": 3.902488300972262, "learning_rate": 1.180220461286467e-06, "loss": 0.7697, "step": 2666 }, { "epoch": 0.01180663154632786, "grad_norm": 3.871367168826878, "learning_rate": 1.180663154632786e-06, "loss": 1.029, "step": 2667 }, { "epoch": 0.011811058479791048, "grad_norm": 3.3889734279368984, "learning_rate": 1.181105847979105e-06, "loss": 1.2619, "step": 2668 }, { "epoch": 0.011815485413254239, "grad_norm": 3.437262132012878, "learning_rate": 1.181548541325424e-06, "loss": 1.0231, "step": 2669 }, { "epoch": 0.01181991234671743, "grad_norm": 3.1119161871556287, "learning_rate": 1.181991234671743e-06, "loss": 0.8077, "step": 2670 }, { "epoch": 0.01182433928018062, "grad_norm": 3.4351756162561227, "learning_rate": 1.182433928018062e-06, "loss": 0.8974, "step": 2671 }, { "epoch": 0.011828766213643808, "grad_norm": 3.398305860894807, "learning_rate": 1.182876621364381e-06, "loss": 1.0069, "step": 2672 }, { "epoch": 0.011833193147106999, "grad_norm": 3.122743948569803, "learning_rate": 1.1833193147107e-06, "loss": 0.7203, "step": 2673 }, { "epoch": 0.01183762008057019, "grad_norm": 4.462829634325777, "learning_rate": 1.1837620080570191e-06, "loss": 1.748, "step": 2674 }, { "epoch": 0.011842047014033378, "grad_norm": 2.542561414774735, "learning_rate": 1.1842047014033379e-06, "loss": 0.7075, "step": 2675 }, { "epoch": 0.011846473947496569, "grad_norm": 3.5638517297678023, "learning_rate": 1.184647394749657e-06, "loss": 1.0789, "step": 2676 }, { "epoch": 0.01185090088095976, "grad_norm": 2.663827536656535, "learning_rate": 1.185090088095976e-06, "loss": 0.7204, "step": 2677 }, { "epoch": 0.01185532781442295, "grad_norm": 3.507256798501448, "learning_rate": 1.185532781442295e-06, "loss": 1.077, "step": 2678 }, { "epoch": 0.011859754747886139, "grad_norm": 2.937981118951475, "learning_rate": 1.185975474788614e-06, "loss": 1.0092, "step": 2679 }, { "epoch": 0.011864181681349329, "grad_norm": 2.847924234285785, "learning_rate": 1.186418168134933e-06, "loss": 0.905, "step": 2680 }, { "epoch": 0.01186860861481252, "grad_norm": 3.462350152514598, "learning_rate": 1.186860861481252e-06, "loss": 0.9535, "step": 2681 }, { "epoch": 0.01187303554827571, "grad_norm": 2.8865541596925732, "learning_rate": 1.1873035548275711e-06, "loss": 0.6396, "step": 2682 }, { "epoch": 0.011877462481738899, "grad_norm": 3.127177053297926, "learning_rate": 1.18774624817389e-06, "loss": 1.0961, "step": 2683 }, { "epoch": 0.01188188941520209, "grad_norm": 3.7918170555676167, "learning_rate": 1.188188941520209e-06, "loss": 1.0132, "step": 2684 }, { "epoch": 0.01188631634866528, "grad_norm": 2.749749940548089, "learning_rate": 1.188631634866528e-06, "loss": 0.5959, "step": 2685 }, { "epoch": 0.01189074328212847, "grad_norm": 2.7318801607138137, "learning_rate": 1.1890743282128471e-06, "loss": 0.8111, "step": 2686 }, { "epoch": 0.01189517021559166, "grad_norm": 3.0217176669946806, "learning_rate": 1.189517021559166e-06, "loss": 0.9418, "step": 2687 }, { "epoch": 0.01189959714905485, "grad_norm": 2.3634428154788925, "learning_rate": 1.189959714905485e-06, "loss": 0.5817, "step": 2688 }, { "epoch": 0.01190402408251804, "grad_norm": 3.201838787246529, "learning_rate": 1.1904024082518042e-06, "loss": 0.964, "step": 2689 }, { "epoch": 0.011908451015981229, "grad_norm": 3.1319847886588876, "learning_rate": 1.1908451015981231e-06, "loss": 1.0714, "step": 2690 }, { "epoch": 0.01191287794944442, "grad_norm": 3.1477241576421964, "learning_rate": 1.191287794944442e-06, "loss": 0.7836, "step": 2691 }, { "epoch": 0.01191730488290761, "grad_norm": 3.102221219258308, "learning_rate": 1.1917304882907612e-06, "loss": 0.7257, "step": 2692 }, { "epoch": 0.0119217318163708, "grad_norm": 2.8042010468049368, "learning_rate": 1.19217318163708e-06, "loss": 0.6547, "step": 2693 }, { "epoch": 0.01192615874983399, "grad_norm": 2.7751529108516078, "learning_rate": 1.1926158749833991e-06, "loss": 0.9374, "step": 2694 }, { "epoch": 0.01193058568329718, "grad_norm": 3.852573591029863, "learning_rate": 1.193058568329718e-06, "loss": 0.7573, "step": 2695 }, { "epoch": 0.01193501261676037, "grad_norm": 3.4227104736792024, "learning_rate": 1.193501261676037e-06, "loss": 1.1261, "step": 2696 }, { "epoch": 0.01193943955022356, "grad_norm": 2.9345874384424193, "learning_rate": 1.1939439550223562e-06, "loss": 0.9729, "step": 2697 }, { "epoch": 0.01194386648368675, "grad_norm": 2.9045091111926316, "learning_rate": 1.1943866483686751e-06, "loss": 0.8132, "step": 2698 }, { "epoch": 0.01194829341714994, "grad_norm": 2.989244269819064, "learning_rate": 1.194829341714994e-06, "loss": 0.8214, "step": 2699 }, { "epoch": 0.01195272035061313, "grad_norm": 3.314027399955375, "learning_rate": 1.195272035061313e-06, "loss": 0.7842, "step": 2700 }, { "epoch": 0.011957147284076321, "grad_norm": 2.969129887755465, "learning_rate": 1.1957147284076322e-06, "loss": 0.8961, "step": 2701 }, { "epoch": 0.01196157421753951, "grad_norm": 2.880762005002469, "learning_rate": 1.1961574217539511e-06, "loss": 0.8564, "step": 2702 }, { "epoch": 0.0119660011510027, "grad_norm": 2.966517996543321, "learning_rate": 1.19660011510027e-06, "loss": 0.9471, "step": 2703 }, { "epoch": 0.01197042808446589, "grad_norm": 5.2586460559189145, "learning_rate": 1.1970428084465892e-06, "loss": 1.6841, "step": 2704 }, { "epoch": 0.011974855017929081, "grad_norm": 3.614829415607939, "learning_rate": 1.1974855017929082e-06, "loss": 0.9935, "step": 2705 }, { "epoch": 0.01197928195139227, "grad_norm": 2.7493752531062805, "learning_rate": 1.1979281951392271e-06, "loss": 0.871, "step": 2706 }, { "epoch": 0.01198370888485546, "grad_norm": 3.0157690924071985, "learning_rate": 1.1983708884855463e-06, "loss": 0.8252, "step": 2707 }, { "epoch": 0.011988135818318651, "grad_norm": 3.0510059154515323, "learning_rate": 1.198813581831865e-06, "loss": 0.6375, "step": 2708 }, { "epoch": 0.01199256275178184, "grad_norm": 3.098966310087747, "learning_rate": 1.1992562751781842e-06, "loss": 0.9268, "step": 2709 }, { "epoch": 0.01199698968524503, "grad_norm": 2.6791270154470235, "learning_rate": 1.1996989685245031e-06, "loss": 0.6239, "step": 2710 }, { "epoch": 0.012001416618708221, "grad_norm": 2.8425272517073275, "learning_rate": 1.200141661870822e-06, "loss": 0.6082, "step": 2711 }, { "epoch": 0.012005843552171411, "grad_norm": 3.8116535337259876, "learning_rate": 1.2005843552171412e-06, "loss": 0.8517, "step": 2712 }, { "epoch": 0.0120102704856346, "grad_norm": 2.9882436190087702, "learning_rate": 1.2010270485634602e-06, "loss": 0.6981, "step": 2713 }, { "epoch": 0.01201469741909779, "grad_norm": 4.072075070181512, "learning_rate": 1.2014697419097791e-06, "loss": 0.9763, "step": 2714 }, { "epoch": 0.012019124352560981, "grad_norm": 2.8614547836447595, "learning_rate": 1.2019124352560983e-06, "loss": 0.8564, "step": 2715 }, { "epoch": 0.012023551286024172, "grad_norm": 2.83455803474234, "learning_rate": 1.2023551286024172e-06, "loss": 0.8832, "step": 2716 }, { "epoch": 0.01202797821948736, "grad_norm": 3.673689629962747, "learning_rate": 1.2027978219487362e-06, "loss": 1.3007, "step": 2717 }, { "epoch": 0.012032405152950551, "grad_norm": 2.7768996181118695, "learning_rate": 1.2032405152950551e-06, "loss": 0.8482, "step": 2718 }, { "epoch": 0.012036832086413742, "grad_norm": 2.807636668295737, "learning_rate": 1.2036832086413743e-06, "loss": 1.1103, "step": 2719 }, { "epoch": 0.012041259019876932, "grad_norm": 2.5436023354735036, "learning_rate": 1.2041259019876932e-06, "loss": 0.7212, "step": 2720 }, { "epoch": 0.01204568595334012, "grad_norm": 2.814872391141641, "learning_rate": 1.2045685953340122e-06, "loss": 0.8428, "step": 2721 }, { "epoch": 0.012050112886803311, "grad_norm": 2.7619299386389518, "learning_rate": 1.2050112886803313e-06, "loss": 1.0269, "step": 2722 }, { "epoch": 0.012054539820266502, "grad_norm": 2.863683483916331, "learning_rate": 1.2054539820266503e-06, "loss": 0.8136, "step": 2723 }, { "epoch": 0.01205896675372969, "grad_norm": 2.563122796812797, "learning_rate": 1.2058966753729692e-06, "loss": 0.8213, "step": 2724 }, { "epoch": 0.012063393687192881, "grad_norm": 3.1706248208148677, "learning_rate": 1.2063393687192882e-06, "loss": 1.0099, "step": 2725 }, { "epoch": 0.012067820620656072, "grad_norm": 2.9084328930297336, "learning_rate": 1.2067820620656071e-06, "loss": 0.8983, "step": 2726 }, { "epoch": 0.012072247554119262, "grad_norm": 3.6793302505738965, "learning_rate": 1.2072247554119263e-06, "loss": 0.8757, "step": 2727 }, { "epoch": 0.012076674487582451, "grad_norm": 2.947239663094348, "learning_rate": 1.2076674487582452e-06, "loss": 0.9581, "step": 2728 }, { "epoch": 0.012081101421045641, "grad_norm": 3.349563361025814, "learning_rate": 1.2081101421045642e-06, "loss": 1.0964, "step": 2729 }, { "epoch": 0.012085528354508832, "grad_norm": 3.4456835293847754, "learning_rate": 1.2085528354508833e-06, "loss": 0.7956, "step": 2730 }, { "epoch": 0.012089955287972022, "grad_norm": 2.5876715584408907, "learning_rate": 1.2089955287972023e-06, "loss": 0.7486, "step": 2731 }, { "epoch": 0.012094382221435211, "grad_norm": 3.2260574852370802, "learning_rate": 1.2094382221435212e-06, "loss": 1.0473, "step": 2732 }, { "epoch": 0.012098809154898402, "grad_norm": 3.1229649941330955, "learning_rate": 1.2098809154898402e-06, "loss": 1.0429, "step": 2733 }, { "epoch": 0.012103236088361592, "grad_norm": 3.1735292773055748, "learning_rate": 1.2103236088361593e-06, "loss": 0.8283, "step": 2734 }, { "epoch": 0.012107663021824783, "grad_norm": 2.5532700079744433, "learning_rate": 1.2107663021824783e-06, "loss": 0.7526, "step": 2735 }, { "epoch": 0.012112089955287971, "grad_norm": 2.742419151338962, "learning_rate": 1.2112089955287972e-06, "loss": 0.6598, "step": 2736 }, { "epoch": 0.012116516888751162, "grad_norm": 3.024482864160096, "learning_rate": 1.2116516888751164e-06, "loss": 0.6012, "step": 2737 }, { "epoch": 0.012120943822214353, "grad_norm": 3.846918704594824, "learning_rate": 1.2120943822214353e-06, "loss": 0.7344, "step": 2738 }, { "epoch": 0.012125370755677543, "grad_norm": 3.38674726505222, "learning_rate": 1.2125370755677543e-06, "loss": 0.8184, "step": 2739 }, { "epoch": 0.012129797689140732, "grad_norm": 3.09116457876787, "learning_rate": 1.2129797689140734e-06, "loss": 0.9207, "step": 2740 }, { "epoch": 0.012134224622603922, "grad_norm": 3.581945705424779, "learning_rate": 1.2134224622603922e-06, "loss": 1.2009, "step": 2741 }, { "epoch": 0.012138651556067113, "grad_norm": 3.799023456247418, "learning_rate": 1.2138651556067113e-06, "loss": 1.2022, "step": 2742 }, { "epoch": 0.012143078489530302, "grad_norm": 2.3120519476859074, "learning_rate": 1.2143078489530303e-06, "loss": 0.615, "step": 2743 }, { "epoch": 0.012147505422993492, "grad_norm": 2.5428410130338515, "learning_rate": 1.2147505422993492e-06, "loss": 0.7987, "step": 2744 }, { "epoch": 0.012151932356456683, "grad_norm": 3.04656309257177, "learning_rate": 1.2151932356456684e-06, "loss": 0.8404, "step": 2745 }, { "epoch": 0.012156359289919873, "grad_norm": 3.8758955028239366, "learning_rate": 1.2156359289919873e-06, "loss": 1.232, "step": 2746 }, { "epoch": 0.012160786223383062, "grad_norm": 3.4130666373271787, "learning_rate": 1.2160786223383063e-06, "loss": 1.0733, "step": 2747 }, { "epoch": 0.012165213156846252, "grad_norm": 2.9432074739687786, "learning_rate": 1.2165213156846254e-06, "loss": 0.8534, "step": 2748 }, { "epoch": 0.012169640090309443, "grad_norm": 3.189738783941263, "learning_rate": 1.2169640090309444e-06, "loss": 0.9942, "step": 2749 }, { "epoch": 0.012174067023772633, "grad_norm": 2.853696257958732, "learning_rate": 1.2174067023772633e-06, "loss": 0.9249, "step": 2750 }, { "epoch": 0.012178493957235822, "grad_norm": 2.956116103275009, "learning_rate": 1.2178493957235823e-06, "loss": 1.0426, "step": 2751 }, { "epoch": 0.012182920890699013, "grad_norm": 2.6680820746154215, "learning_rate": 1.2182920890699014e-06, "loss": 0.9739, "step": 2752 }, { "epoch": 0.012187347824162203, "grad_norm": 3.292383850852675, "learning_rate": 1.2187347824162204e-06, "loss": 0.9262, "step": 2753 }, { "epoch": 0.012191774757625394, "grad_norm": 3.8237176851278893, "learning_rate": 1.2191774757625393e-06, "loss": 1.0604, "step": 2754 }, { "epoch": 0.012196201691088582, "grad_norm": 2.930313885835253, "learning_rate": 1.2196201691088585e-06, "loss": 0.501, "step": 2755 }, { "epoch": 0.012200628624551773, "grad_norm": 4.1953466116769285, "learning_rate": 1.2200628624551774e-06, "loss": 1.4665, "step": 2756 }, { "epoch": 0.012205055558014963, "grad_norm": 2.3327392181877946, "learning_rate": 1.2205055558014964e-06, "loss": 0.4171, "step": 2757 }, { "epoch": 0.012209482491478152, "grad_norm": 3.0298332543585835, "learning_rate": 1.2209482491478153e-06, "loss": 1.0289, "step": 2758 }, { "epoch": 0.012213909424941343, "grad_norm": 3.065041448995755, "learning_rate": 1.2213909424941345e-06, "loss": 0.7437, "step": 2759 }, { "epoch": 0.012218336358404533, "grad_norm": 2.986504167308181, "learning_rate": 1.2218336358404534e-06, "loss": 0.867, "step": 2760 }, { "epoch": 0.012222763291867724, "grad_norm": 2.859655430130135, "learning_rate": 1.2222763291867724e-06, "loss": 0.9139, "step": 2761 }, { "epoch": 0.012227190225330913, "grad_norm": 2.912838170630353, "learning_rate": 1.2227190225330915e-06, "loss": 0.7883, "step": 2762 }, { "epoch": 0.012231617158794103, "grad_norm": 3.037128676340066, "learning_rate": 1.2231617158794105e-06, "loss": 0.5476, "step": 2763 }, { "epoch": 0.012236044092257294, "grad_norm": 3.5736028332352925, "learning_rate": 1.2236044092257294e-06, "loss": 1.3198, "step": 2764 }, { "epoch": 0.012240471025720484, "grad_norm": 3.97750449220738, "learning_rate": 1.2240471025720486e-06, "loss": 1.1672, "step": 2765 }, { "epoch": 0.012244897959183673, "grad_norm": 2.4406956120732004, "learning_rate": 1.2244897959183673e-06, "loss": 0.7175, "step": 2766 }, { "epoch": 0.012249324892646863, "grad_norm": 3.3261965652368093, "learning_rate": 1.2249324892646865e-06, "loss": 0.7937, "step": 2767 }, { "epoch": 0.012253751826110054, "grad_norm": 4.0544023678751255, "learning_rate": 1.2253751826110054e-06, "loss": 0.7345, "step": 2768 }, { "epoch": 0.012258178759573244, "grad_norm": 3.0298824539954503, "learning_rate": 1.2258178759573244e-06, "loss": 0.7392, "step": 2769 }, { "epoch": 0.012262605693036433, "grad_norm": 2.860982862191016, "learning_rate": 1.2262605693036435e-06, "loss": 0.9342, "step": 2770 }, { "epoch": 0.012267032626499624, "grad_norm": 2.9556423551210793, "learning_rate": 1.2267032626499625e-06, "loss": 0.9982, "step": 2771 }, { "epoch": 0.012271459559962814, "grad_norm": 2.8319634330626564, "learning_rate": 1.2271459559962814e-06, "loss": 0.7864, "step": 2772 }, { "epoch": 0.012275886493426005, "grad_norm": 2.5415586343689864, "learning_rate": 1.2275886493426006e-06, "loss": 0.8404, "step": 2773 }, { "epoch": 0.012280313426889193, "grad_norm": 3.4451417136314846, "learning_rate": 1.2280313426889195e-06, "loss": 0.7953, "step": 2774 }, { "epoch": 0.012284740360352384, "grad_norm": 2.922213741777942, "learning_rate": 1.2284740360352385e-06, "loss": 0.892, "step": 2775 }, { "epoch": 0.012289167293815574, "grad_norm": 3.019233455572953, "learning_rate": 1.2289167293815574e-06, "loss": 0.7481, "step": 2776 }, { "epoch": 0.012293594227278763, "grad_norm": 3.6280407311761027, "learning_rate": 1.2293594227278766e-06, "loss": 0.84, "step": 2777 }, { "epoch": 0.012298021160741954, "grad_norm": 2.814123328985727, "learning_rate": 1.2298021160741955e-06, "loss": 0.9482, "step": 2778 }, { "epoch": 0.012302448094205144, "grad_norm": 2.9339436915068124, "learning_rate": 1.2302448094205145e-06, "loss": 0.7074, "step": 2779 }, { "epoch": 0.012306875027668335, "grad_norm": 2.4930256581695285, "learning_rate": 1.2306875027668337e-06, "loss": 0.5054, "step": 2780 }, { "epoch": 0.012311301961131524, "grad_norm": 2.8425044353702353, "learning_rate": 1.2311301961131524e-06, "loss": 0.5287, "step": 2781 }, { "epoch": 0.012315728894594714, "grad_norm": 3.9179386756137524, "learning_rate": 1.2315728894594715e-06, "loss": 1.4387, "step": 2782 }, { "epoch": 0.012320155828057905, "grad_norm": 3.165908846984915, "learning_rate": 1.2320155828057905e-06, "loss": 0.7802, "step": 2783 }, { "epoch": 0.012324582761521095, "grad_norm": 2.8157256243822797, "learning_rate": 1.2324582761521094e-06, "loss": 0.7477, "step": 2784 }, { "epoch": 0.012329009694984284, "grad_norm": 2.884061723652575, "learning_rate": 1.2329009694984286e-06, "loss": 0.5258, "step": 2785 }, { "epoch": 0.012333436628447474, "grad_norm": 3.249873475374686, "learning_rate": 1.2333436628447475e-06, "loss": 0.8104, "step": 2786 }, { "epoch": 0.012337863561910665, "grad_norm": 3.1927180935782475, "learning_rate": 1.2337863561910665e-06, "loss": 1.238, "step": 2787 }, { "epoch": 0.012342290495373855, "grad_norm": 2.6041199912350788, "learning_rate": 1.2342290495373857e-06, "loss": 0.5764, "step": 2788 }, { "epoch": 0.012346717428837044, "grad_norm": 2.5782565210215758, "learning_rate": 1.2346717428837046e-06, "loss": 0.735, "step": 2789 }, { "epoch": 0.012351144362300235, "grad_norm": 2.96014764865179, "learning_rate": 1.2351144362300235e-06, "loss": 0.8189, "step": 2790 }, { "epoch": 0.012355571295763425, "grad_norm": 3.2340699601592586, "learning_rate": 1.2355571295763425e-06, "loss": 0.9328, "step": 2791 }, { "epoch": 0.012359998229226614, "grad_norm": 3.340782957906907, "learning_rate": 1.2359998229226617e-06, "loss": 0.9887, "step": 2792 }, { "epoch": 0.012364425162689804, "grad_norm": 2.603897549929219, "learning_rate": 1.2364425162689806e-06, "loss": 0.9336, "step": 2793 }, { "epoch": 0.012368852096152995, "grad_norm": 3.6096497848130693, "learning_rate": 1.2368852096152995e-06, "loss": 1.0536, "step": 2794 }, { "epoch": 0.012373279029616185, "grad_norm": 3.144534965755562, "learning_rate": 1.2373279029616187e-06, "loss": 0.9739, "step": 2795 }, { "epoch": 0.012377705963079374, "grad_norm": 3.4429820527839388, "learning_rate": 1.2377705963079377e-06, "loss": 0.9941, "step": 2796 }, { "epoch": 0.012382132896542565, "grad_norm": 3.168659877378099, "learning_rate": 1.2382132896542566e-06, "loss": 0.7463, "step": 2797 }, { "epoch": 0.012386559830005755, "grad_norm": 3.1958590041217643, "learning_rate": 1.2386559830005758e-06, "loss": 0.8973, "step": 2798 }, { "epoch": 0.012390986763468946, "grad_norm": 3.4696379402489947, "learning_rate": 1.2390986763468945e-06, "loss": 0.8089, "step": 2799 }, { "epoch": 0.012395413696932134, "grad_norm": 4.281106992454647, "learning_rate": 1.2395413696932137e-06, "loss": 1.3253, "step": 2800 }, { "epoch": 0.012399840630395325, "grad_norm": 3.163943943413251, "learning_rate": 1.2399840630395326e-06, "loss": 1.0164, "step": 2801 }, { "epoch": 0.012404267563858516, "grad_norm": 2.7896704812359703, "learning_rate": 1.2404267563858515e-06, "loss": 0.5169, "step": 2802 }, { "epoch": 0.012408694497321706, "grad_norm": 3.4859823225542237, "learning_rate": 1.2408694497321707e-06, "loss": 1.1507, "step": 2803 }, { "epoch": 0.012413121430784895, "grad_norm": 3.2368764606186966, "learning_rate": 1.2413121430784897e-06, "loss": 0.6517, "step": 2804 }, { "epoch": 0.012417548364248085, "grad_norm": 2.4825318381220236, "learning_rate": 1.2417548364248086e-06, "loss": 0.7005, "step": 2805 }, { "epoch": 0.012421975297711276, "grad_norm": 3.7434625299110604, "learning_rate": 1.2421975297711275e-06, "loss": 1.0523, "step": 2806 }, { "epoch": 0.012426402231174466, "grad_norm": 2.8770207025522603, "learning_rate": 1.2426402231174467e-06, "loss": 1.0033, "step": 2807 }, { "epoch": 0.012430829164637655, "grad_norm": 3.092671013272792, "learning_rate": 1.2430829164637657e-06, "loss": 0.6991, "step": 2808 }, { "epoch": 0.012435256098100846, "grad_norm": 2.5629502058902087, "learning_rate": 1.2435256098100846e-06, "loss": 0.8253, "step": 2809 }, { "epoch": 0.012439683031564036, "grad_norm": 3.3895106758712172, "learning_rate": 1.2439683031564038e-06, "loss": 0.7583, "step": 2810 }, { "epoch": 0.012444109965027225, "grad_norm": 3.3513825856950006, "learning_rate": 1.2444109965027227e-06, "loss": 1.1731, "step": 2811 }, { "epoch": 0.012448536898490415, "grad_norm": 3.41198445771558, "learning_rate": 1.2448536898490417e-06, "loss": 0.8466, "step": 2812 }, { "epoch": 0.012452963831953606, "grad_norm": 2.9318768368770414, "learning_rate": 1.2452963831953608e-06, "loss": 0.4744, "step": 2813 }, { "epoch": 0.012457390765416796, "grad_norm": 2.7279676373145114, "learning_rate": 1.2457390765416795e-06, "loss": 0.614, "step": 2814 }, { "epoch": 0.012461817698879985, "grad_norm": 4.761240611872312, "learning_rate": 1.2461817698879987e-06, "loss": 1.2688, "step": 2815 }, { "epoch": 0.012466244632343176, "grad_norm": 3.3062484713276326, "learning_rate": 1.2466244632343177e-06, "loss": 0.8573, "step": 2816 }, { "epoch": 0.012470671565806366, "grad_norm": 2.703831266544141, "learning_rate": 1.2470671565806366e-06, "loss": 0.7896, "step": 2817 }, { "epoch": 0.012475098499269557, "grad_norm": 3.6959103750669944, "learning_rate": 1.2475098499269558e-06, "loss": 0.92, "step": 2818 }, { "epoch": 0.012479525432732745, "grad_norm": 3.676848021666315, "learning_rate": 1.2479525432732747e-06, "loss": 1.2712, "step": 2819 }, { "epoch": 0.012483952366195936, "grad_norm": 2.4703884943777017, "learning_rate": 1.2483952366195937e-06, "loss": 0.7106, "step": 2820 }, { "epoch": 0.012488379299659126, "grad_norm": 3.290525547903074, "learning_rate": 1.2488379299659128e-06, "loss": 0.9536, "step": 2821 }, { "epoch": 0.012492806233122317, "grad_norm": 3.040017838278183, "learning_rate": 1.2492806233122318e-06, "loss": 0.934, "step": 2822 }, { "epoch": 0.012497233166585506, "grad_norm": 3.4654062117037894, "learning_rate": 1.2497233166585507e-06, "loss": 0.9645, "step": 2823 }, { "epoch": 0.012501660100048696, "grad_norm": 3.063391226258586, "learning_rate": 1.2501660100048699e-06, "loss": 0.7135, "step": 2824 }, { "epoch": 0.012506087033511887, "grad_norm": 2.630497936371056, "learning_rate": 1.2506087033511888e-06, "loss": 0.8609, "step": 2825 }, { "epoch": 0.012510513966975076, "grad_norm": 3.1732713046821615, "learning_rate": 1.2510513966975078e-06, "loss": 0.8636, "step": 2826 }, { "epoch": 0.012514940900438266, "grad_norm": 2.916713305882774, "learning_rate": 1.251494090043827e-06, "loss": 0.7338, "step": 2827 }, { "epoch": 0.012519367833901457, "grad_norm": 2.896366621643141, "learning_rate": 1.2519367833901459e-06, "loss": 0.5443, "step": 2828 }, { "epoch": 0.012523794767364647, "grad_norm": 2.3308821035855933, "learning_rate": 1.2523794767364646e-06, "loss": 0.5872, "step": 2829 }, { "epoch": 0.012528221700827836, "grad_norm": 3.252685984275778, "learning_rate": 1.252822170082784e-06, "loss": 0.6094, "step": 2830 }, { "epoch": 0.012532648634291026, "grad_norm": 3.1913015319561535, "learning_rate": 1.2532648634291027e-06, "loss": 0.7331, "step": 2831 }, { "epoch": 0.012537075567754217, "grad_norm": 2.553338209642904, "learning_rate": 1.2537075567754217e-06, "loss": 0.6135, "step": 2832 }, { "epoch": 0.012541502501217407, "grad_norm": 2.7992940120679224, "learning_rate": 1.2541502501217408e-06, "loss": 0.6797, "step": 2833 }, { "epoch": 0.012545929434680596, "grad_norm": 4.171964398844946, "learning_rate": 1.2545929434680598e-06, "loss": 1.0656, "step": 2834 }, { "epoch": 0.012550356368143787, "grad_norm": 3.4616385144490356, "learning_rate": 1.2550356368143787e-06, "loss": 1.2539, "step": 2835 }, { "epoch": 0.012554783301606977, "grad_norm": 3.5261541932126423, "learning_rate": 1.2554783301606979e-06, "loss": 0.77, "step": 2836 }, { "epoch": 0.012559210235070168, "grad_norm": 3.1717189822750758, "learning_rate": 1.2559210235070168e-06, "loss": 1.1163, "step": 2837 }, { "epoch": 0.012563637168533356, "grad_norm": 2.912066145366755, "learning_rate": 1.2563637168533358e-06, "loss": 0.8856, "step": 2838 }, { "epoch": 0.012568064101996547, "grad_norm": 2.7848761868411187, "learning_rate": 1.256806410199655e-06, "loss": 0.9014, "step": 2839 }, { "epoch": 0.012572491035459737, "grad_norm": 2.5347583251753685, "learning_rate": 1.2572491035459739e-06, "loss": 0.584, "step": 2840 }, { "epoch": 0.012576917968922926, "grad_norm": 3.005215104996989, "learning_rate": 1.2576917968922928e-06, "loss": 0.7965, "step": 2841 }, { "epoch": 0.012581344902386117, "grad_norm": 2.8719075948698407, "learning_rate": 1.258134490238612e-06, "loss": 0.7758, "step": 2842 }, { "epoch": 0.012585771835849307, "grad_norm": 4.133346383555084, "learning_rate": 1.258577183584931e-06, "loss": 0.9965, "step": 2843 }, { "epoch": 0.012590198769312498, "grad_norm": 4.414895877781622, "learning_rate": 1.2590198769312499e-06, "loss": 1.2209, "step": 2844 }, { "epoch": 0.012594625702775687, "grad_norm": 2.665156870293847, "learning_rate": 1.259462570277569e-06, "loss": 0.869, "step": 2845 }, { "epoch": 0.012599052636238877, "grad_norm": 2.5988471353374396, "learning_rate": 1.259905263623888e-06, "loss": 0.5261, "step": 2846 }, { "epoch": 0.012603479569702068, "grad_norm": 2.9689863998256167, "learning_rate": 1.2603479569702067e-06, "loss": 0.8425, "step": 2847 }, { "epoch": 0.012607906503165258, "grad_norm": 3.0928933276095343, "learning_rate": 1.260790650316526e-06, "loss": 0.9734, "step": 2848 }, { "epoch": 0.012612333436628447, "grad_norm": 3.0181539102350303, "learning_rate": 1.2612333436628448e-06, "loss": 0.7519, "step": 2849 }, { "epoch": 0.012616760370091637, "grad_norm": 2.7775644848905654, "learning_rate": 1.2616760370091638e-06, "loss": 0.8503, "step": 2850 }, { "epoch": 0.012621187303554828, "grad_norm": 3.0635262728513806, "learning_rate": 1.262118730355483e-06, "loss": 0.8018, "step": 2851 }, { "epoch": 0.012625614237018018, "grad_norm": 3.6928736919735097, "learning_rate": 1.2625614237018019e-06, "loss": 0.8833, "step": 2852 }, { "epoch": 0.012630041170481207, "grad_norm": 3.061356599508181, "learning_rate": 1.2630041170481208e-06, "loss": 0.8873, "step": 2853 }, { "epoch": 0.012634468103944398, "grad_norm": 3.2270590812936963, "learning_rate": 1.26344681039444e-06, "loss": 0.7202, "step": 2854 }, { "epoch": 0.012638895037407588, "grad_norm": 2.801791253663632, "learning_rate": 1.263889503740759e-06, "loss": 0.8613, "step": 2855 }, { "epoch": 0.012643321970870779, "grad_norm": 3.9187864436877278, "learning_rate": 1.2643321970870779e-06, "loss": 1.2621, "step": 2856 }, { "epoch": 0.012647748904333967, "grad_norm": 3.3150298992731475, "learning_rate": 1.264774890433397e-06, "loss": 0.9615, "step": 2857 }, { "epoch": 0.012652175837797158, "grad_norm": 3.212612428283926, "learning_rate": 1.265217583779716e-06, "loss": 1.128, "step": 2858 }, { "epoch": 0.012656602771260348, "grad_norm": 3.0221411065182644, "learning_rate": 1.265660277126035e-06, "loss": 0.5779, "step": 2859 }, { "epoch": 0.012661029704723537, "grad_norm": 2.5697175558972036, "learning_rate": 1.266102970472354e-06, "loss": 0.7478, "step": 2860 }, { "epoch": 0.012665456638186728, "grad_norm": 2.595816800747911, "learning_rate": 1.266545663818673e-06, "loss": 0.7262, "step": 2861 }, { "epoch": 0.012669883571649918, "grad_norm": 2.7813014722827676, "learning_rate": 1.2669883571649918e-06, "loss": 1.0124, "step": 2862 }, { "epoch": 0.012674310505113109, "grad_norm": 3.7006949014690096, "learning_rate": 1.2674310505113111e-06, "loss": 0.5301, "step": 2863 }, { "epoch": 0.012678737438576297, "grad_norm": 3.3158962078978984, "learning_rate": 1.2678737438576299e-06, "loss": 1.1379, "step": 2864 }, { "epoch": 0.012683164372039488, "grad_norm": 3.4049108355425437, "learning_rate": 1.2683164372039488e-06, "loss": 0.9129, "step": 2865 }, { "epoch": 0.012687591305502679, "grad_norm": 3.0733492981450445, "learning_rate": 1.268759130550268e-06, "loss": 0.7571, "step": 2866 }, { "epoch": 0.012692018238965869, "grad_norm": 3.188708944256784, "learning_rate": 1.269201823896587e-06, "loss": 1.0914, "step": 2867 }, { "epoch": 0.012696445172429058, "grad_norm": 3.042721772568518, "learning_rate": 1.2696445172429059e-06, "loss": 0.8436, "step": 2868 }, { "epoch": 0.012700872105892248, "grad_norm": 2.8068549979965782, "learning_rate": 1.270087210589225e-06, "loss": 0.737, "step": 2869 }, { "epoch": 0.012705299039355439, "grad_norm": 3.1385618823688457, "learning_rate": 1.270529903935544e-06, "loss": 0.6475, "step": 2870 }, { "epoch": 0.01270972597281863, "grad_norm": 3.9802780541258196, "learning_rate": 1.270972597281863e-06, "loss": 0.8394, "step": 2871 }, { "epoch": 0.012714152906281818, "grad_norm": 2.9761697018343187, "learning_rate": 1.271415290628182e-06, "loss": 0.8459, "step": 2872 }, { "epoch": 0.012718579839745009, "grad_norm": 3.280306215713769, "learning_rate": 1.271857983974501e-06, "loss": 0.8215, "step": 2873 }, { "epoch": 0.012723006773208199, "grad_norm": 2.514653140092519, "learning_rate": 1.27230067732082e-06, "loss": 0.6679, "step": 2874 }, { "epoch": 0.012727433706671388, "grad_norm": 2.784534292900158, "learning_rate": 1.2727433706671391e-06, "loss": 0.6965, "step": 2875 }, { "epoch": 0.012731860640134578, "grad_norm": 2.6940109334568647, "learning_rate": 1.273186064013458e-06, "loss": 0.7791, "step": 2876 }, { "epoch": 0.012736287573597769, "grad_norm": 3.294409864568097, "learning_rate": 1.273628757359777e-06, "loss": 0.9581, "step": 2877 }, { "epoch": 0.01274071450706096, "grad_norm": 3.5874530908610684, "learning_rate": 1.2740714507060962e-06, "loss": 0.8105, "step": 2878 }, { "epoch": 0.012745141440524148, "grad_norm": 3.3206752706103266, "learning_rate": 1.274514144052415e-06, "loss": 0.9058, "step": 2879 }, { "epoch": 0.012749568373987339, "grad_norm": 3.3136115347622708, "learning_rate": 1.2749568373987339e-06, "loss": 0.9557, "step": 2880 }, { "epoch": 0.01275399530745053, "grad_norm": 2.462153242150125, "learning_rate": 1.275399530745053e-06, "loss": 0.7814, "step": 2881 }, { "epoch": 0.01275842224091372, "grad_norm": 3.031829362963184, "learning_rate": 1.275842224091372e-06, "loss": 0.9433, "step": 2882 }, { "epoch": 0.012762849174376908, "grad_norm": 4.026655550664166, "learning_rate": 1.276284917437691e-06, "loss": 0.8259, "step": 2883 }, { "epoch": 0.012767276107840099, "grad_norm": 3.9054226646716734, "learning_rate": 1.27672761078401e-06, "loss": 1.2621, "step": 2884 }, { "epoch": 0.01277170304130329, "grad_norm": 3.6402462492933325, "learning_rate": 1.277170304130329e-06, "loss": 1.0942, "step": 2885 }, { "epoch": 0.01277612997476648, "grad_norm": 3.2725299499280887, "learning_rate": 1.277612997476648e-06, "loss": 0.9982, "step": 2886 }, { "epoch": 0.012780556908229669, "grad_norm": 2.720444573346781, "learning_rate": 1.2780556908229671e-06, "loss": 0.7309, "step": 2887 }, { "epoch": 0.01278498384169286, "grad_norm": 3.497358561914108, "learning_rate": 1.278498384169286e-06, "loss": 1.105, "step": 2888 }, { "epoch": 0.01278941077515605, "grad_norm": 3.3968616565340386, "learning_rate": 1.278941077515605e-06, "loss": 0.5387, "step": 2889 }, { "epoch": 0.01279383770861924, "grad_norm": 3.0358936053922596, "learning_rate": 1.2793837708619242e-06, "loss": 0.9462, "step": 2890 }, { "epoch": 0.012798264642082429, "grad_norm": 2.276956876020348, "learning_rate": 1.2798264642082431e-06, "loss": 0.7328, "step": 2891 }, { "epoch": 0.01280269157554562, "grad_norm": 3.926528004450514, "learning_rate": 1.280269157554562e-06, "loss": 1.1316, "step": 2892 }, { "epoch": 0.01280711850900881, "grad_norm": 2.409299389922969, "learning_rate": 1.2807118509008812e-06, "loss": 0.9097, "step": 2893 }, { "epoch": 0.012811545442471999, "grad_norm": 3.0710392287196506, "learning_rate": 1.2811545442472002e-06, "loss": 0.9718, "step": 2894 }, { "epoch": 0.01281597237593519, "grad_norm": 2.8218608188085987, "learning_rate": 1.281597237593519e-06, "loss": 0.7785, "step": 2895 }, { "epoch": 0.01282039930939838, "grad_norm": 2.7194056536600923, "learning_rate": 1.2820399309398383e-06, "loss": 0.8868, "step": 2896 }, { "epoch": 0.01282482624286157, "grad_norm": 3.402502376068132, "learning_rate": 1.282482624286157e-06, "loss": 0.9751, "step": 2897 }, { "epoch": 0.01282925317632476, "grad_norm": 2.9901460423161774, "learning_rate": 1.282925317632476e-06, "loss": 1.0624, "step": 2898 }, { "epoch": 0.01283368010978795, "grad_norm": 3.0859935787106774, "learning_rate": 1.2833680109787951e-06, "loss": 0.9902, "step": 2899 }, { "epoch": 0.01283810704325114, "grad_norm": 3.8448895456142647, "learning_rate": 1.283810704325114e-06, "loss": 1.088, "step": 2900 }, { "epoch": 0.01284253397671433, "grad_norm": 2.816381349699482, "learning_rate": 1.284253397671433e-06, "loss": 0.8604, "step": 2901 }, { "epoch": 0.01284696091017752, "grad_norm": 3.3850050316459668, "learning_rate": 1.2846960910177522e-06, "loss": 0.8298, "step": 2902 }, { "epoch": 0.01285138784364071, "grad_norm": 2.772467872869057, "learning_rate": 1.2851387843640711e-06, "loss": 0.9098, "step": 2903 }, { "epoch": 0.0128558147771039, "grad_norm": 2.9948222130298183, "learning_rate": 1.28558147771039e-06, "loss": 0.8846, "step": 2904 }, { "epoch": 0.012860241710567091, "grad_norm": 3.4068777133096355, "learning_rate": 1.2860241710567092e-06, "loss": 0.968, "step": 2905 }, { "epoch": 0.01286466864403028, "grad_norm": 2.7252855639544893, "learning_rate": 1.2864668644030282e-06, "loss": 0.7588, "step": 2906 }, { "epoch": 0.01286909557749347, "grad_norm": 3.2763064742056422, "learning_rate": 1.2869095577493471e-06, "loss": 0.8848, "step": 2907 }, { "epoch": 0.01287352251095666, "grad_norm": 3.166353228742069, "learning_rate": 1.2873522510956663e-06, "loss": 0.8144, "step": 2908 }, { "epoch": 0.01287794944441985, "grad_norm": 3.4194868139459724, "learning_rate": 1.2877949444419852e-06, "loss": 1.0709, "step": 2909 }, { "epoch": 0.01288237637788304, "grad_norm": 3.657797407714756, "learning_rate": 1.288237637788304e-06, "loss": 1.0195, "step": 2910 }, { "epoch": 0.01288680331134623, "grad_norm": 2.427901158913934, "learning_rate": 1.2886803311346233e-06, "loss": 0.6754, "step": 2911 }, { "epoch": 0.012891230244809421, "grad_norm": 2.730883153411859, "learning_rate": 1.289123024480942e-06, "loss": 0.7577, "step": 2912 }, { "epoch": 0.01289565717827261, "grad_norm": 3.071056995893074, "learning_rate": 1.289565717827261e-06, "loss": 0.7588, "step": 2913 }, { "epoch": 0.0129000841117358, "grad_norm": 2.8893684896624006, "learning_rate": 1.2900084111735802e-06, "loss": 0.7429, "step": 2914 }, { "epoch": 0.01290451104519899, "grad_norm": 2.9409358525864864, "learning_rate": 1.2904511045198991e-06, "loss": 1.0104, "step": 2915 }, { "epoch": 0.012908937978662181, "grad_norm": 3.5021688964695805, "learning_rate": 1.290893797866218e-06, "loss": 0.6588, "step": 2916 }, { "epoch": 0.01291336491212537, "grad_norm": 2.458599490508384, "learning_rate": 1.2913364912125372e-06, "loss": 0.8494, "step": 2917 }, { "epoch": 0.01291779184558856, "grad_norm": 2.626190828128322, "learning_rate": 1.2917791845588562e-06, "loss": 0.7539, "step": 2918 }, { "epoch": 0.012922218779051751, "grad_norm": 3.697810098830915, "learning_rate": 1.2922218779051751e-06, "loss": 0.8256, "step": 2919 }, { "epoch": 0.012926645712514942, "grad_norm": 2.6166719535552634, "learning_rate": 1.2926645712514943e-06, "loss": 0.6274, "step": 2920 }, { "epoch": 0.01293107264597813, "grad_norm": 3.0903711752204033, "learning_rate": 1.2931072645978132e-06, "loss": 1.1061, "step": 2921 }, { "epoch": 0.012935499579441321, "grad_norm": 2.9903934100784193, "learning_rate": 1.2935499579441322e-06, "loss": 0.7476, "step": 2922 }, { "epoch": 0.012939926512904511, "grad_norm": 3.280872842845911, "learning_rate": 1.2939926512904513e-06, "loss": 0.7543, "step": 2923 }, { "epoch": 0.012944353446367702, "grad_norm": 3.1071571192130425, "learning_rate": 1.2944353446367703e-06, "loss": 0.9026, "step": 2924 }, { "epoch": 0.01294878037983089, "grad_norm": 3.8962251547834095, "learning_rate": 1.2948780379830892e-06, "loss": 1.3591, "step": 2925 }, { "epoch": 0.012953207313294081, "grad_norm": 3.2838308985046116, "learning_rate": 1.2953207313294084e-06, "loss": 1.1837, "step": 2926 }, { "epoch": 0.012957634246757272, "grad_norm": 3.00966118298288, "learning_rate": 1.2957634246757273e-06, "loss": 0.8903, "step": 2927 }, { "epoch": 0.01296206118022046, "grad_norm": 4.907231155615042, "learning_rate": 1.296206118022046e-06, "loss": 1.23, "step": 2928 }, { "epoch": 0.012966488113683651, "grad_norm": 4.561701965632818, "learning_rate": 1.2966488113683652e-06, "loss": 1.2427, "step": 2929 }, { "epoch": 0.012970915047146842, "grad_norm": 3.6455372932070675, "learning_rate": 1.2970915047146842e-06, "loss": 1.1086, "step": 2930 }, { "epoch": 0.012975341980610032, "grad_norm": 3.0135370269566093, "learning_rate": 1.2975341980610031e-06, "loss": 0.6178, "step": 2931 }, { "epoch": 0.01297976891407322, "grad_norm": 3.45752044344941, "learning_rate": 1.2979768914073223e-06, "loss": 0.7883, "step": 2932 }, { "epoch": 0.012984195847536411, "grad_norm": 2.731289900849955, "learning_rate": 1.2984195847536412e-06, "loss": 0.6929, "step": 2933 }, { "epoch": 0.012988622780999602, "grad_norm": 3.952783779732979, "learning_rate": 1.2988622780999602e-06, "loss": 1.0196, "step": 2934 }, { "epoch": 0.012993049714462792, "grad_norm": 3.422349690313853, "learning_rate": 1.2993049714462793e-06, "loss": 0.8611, "step": 2935 }, { "epoch": 0.012997476647925981, "grad_norm": 3.57496247026528, "learning_rate": 1.2997476647925983e-06, "loss": 0.5116, "step": 2936 }, { "epoch": 0.013001903581389172, "grad_norm": 2.9194532313532475, "learning_rate": 1.3001903581389172e-06, "loss": 1.251, "step": 2937 }, { "epoch": 0.013006330514852362, "grad_norm": 2.724794772045661, "learning_rate": 1.3006330514852364e-06, "loss": 0.5756, "step": 2938 }, { "epoch": 0.013010757448315553, "grad_norm": 2.75478471075176, "learning_rate": 1.3010757448315553e-06, "loss": 0.7699, "step": 2939 }, { "epoch": 0.013015184381778741, "grad_norm": 2.755893254020136, "learning_rate": 1.3015184381778743e-06, "loss": 0.7224, "step": 2940 }, { "epoch": 0.013019611315241932, "grad_norm": 2.692989502451498, "learning_rate": 1.3019611315241935e-06, "loss": 0.8008, "step": 2941 }, { "epoch": 0.013024038248705122, "grad_norm": 3.0778029525443067, "learning_rate": 1.3024038248705124e-06, "loss": 0.8896, "step": 2942 }, { "epoch": 0.013028465182168311, "grad_norm": 3.1926589531052283, "learning_rate": 1.3028465182168311e-06, "loss": 0.9111, "step": 2943 }, { "epoch": 0.013032892115631502, "grad_norm": 2.726944936381685, "learning_rate": 1.3032892115631505e-06, "loss": 0.8274, "step": 2944 }, { "epoch": 0.013037319049094692, "grad_norm": 3.7047119628060505, "learning_rate": 1.3037319049094692e-06, "loss": 0.9742, "step": 2945 }, { "epoch": 0.013041745982557883, "grad_norm": 3.4512364722686257, "learning_rate": 1.3041745982557882e-06, "loss": 1.0287, "step": 2946 }, { "epoch": 0.013046172916021071, "grad_norm": 3.4190133809276064, "learning_rate": 1.3046172916021073e-06, "loss": 1.1885, "step": 2947 }, { "epoch": 0.013050599849484262, "grad_norm": 2.7270973329803296, "learning_rate": 1.3050599849484263e-06, "loss": 0.9501, "step": 2948 }, { "epoch": 0.013055026782947452, "grad_norm": 3.711490705617974, "learning_rate": 1.3055026782947452e-06, "loss": 1.2743, "step": 2949 }, { "epoch": 0.013059453716410643, "grad_norm": 3.4730363745729758, "learning_rate": 1.3059453716410644e-06, "loss": 1.2984, "step": 2950 }, { "epoch": 0.013063880649873832, "grad_norm": 2.8940911125196864, "learning_rate": 1.3063880649873833e-06, "loss": 0.8338, "step": 2951 }, { "epoch": 0.013068307583337022, "grad_norm": 2.9388031722495027, "learning_rate": 1.3068307583337023e-06, "loss": 0.7172, "step": 2952 }, { "epoch": 0.013072734516800213, "grad_norm": 4.121090718528824, "learning_rate": 1.3072734516800215e-06, "loss": 1.1567, "step": 2953 }, { "epoch": 0.013077161450263403, "grad_norm": 2.8851940182499383, "learning_rate": 1.3077161450263404e-06, "loss": 0.8146, "step": 2954 }, { "epoch": 0.013081588383726592, "grad_norm": 3.0010174464131842, "learning_rate": 1.3081588383726593e-06, "loss": 0.8624, "step": 2955 }, { "epoch": 0.013086015317189783, "grad_norm": 2.637031147541098, "learning_rate": 1.3086015317189785e-06, "loss": 0.8355, "step": 2956 }, { "epoch": 0.013090442250652973, "grad_norm": 2.648190753564002, "learning_rate": 1.3090442250652975e-06, "loss": 0.9655, "step": 2957 }, { "epoch": 0.013094869184116164, "grad_norm": 3.3415295837505, "learning_rate": 1.3094869184116162e-06, "loss": 1.0331, "step": 2958 }, { "epoch": 0.013099296117579352, "grad_norm": 3.7894837981366383, "learning_rate": 1.3099296117579356e-06, "loss": 0.9791, "step": 2959 }, { "epoch": 0.013103723051042543, "grad_norm": 2.9365892753669884, "learning_rate": 1.3103723051042543e-06, "loss": 1.0943, "step": 2960 }, { "epoch": 0.013108149984505733, "grad_norm": 3.4617502524313277, "learning_rate": 1.3108149984505732e-06, "loss": 1.3342, "step": 2961 }, { "epoch": 0.013112576917968922, "grad_norm": 2.7672227256437765, "learning_rate": 1.3112576917968924e-06, "loss": 0.7887, "step": 2962 }, { "epoch": 0.013117003851432113, "grad_norm": 2.5127583421628112, "learning_rate": 1.3117003851432113e-06, "loss": 0.7134, "step": 2963 }, { "epoch": 0.013121430784895303, "grad_norm": 3.014077674523336, "learning_rate": 1.3121430784895303e-06, "loss": 0.8402, "step": 2964 }, { "epoch": 0.013125857718358494, "grad_norm": 2.499920876174938, "learning_rate": 1.3125857718358495e-06, "loss": 0.6983, "step": 2965 }, { "epoch": 0.013130284651821682, "grad_norm": 3.5251204765880986, "learning_rate": 1.3130284651821684e-06, "loss": 0.8462, "step": 2966 }, { "epoch": 0.013134711585284873, "grad_norm": 3.8798786674748023, "learning_rate": 1.3134711585284873e-06, "loss": 1.0398, "step": 2967 }, { "epoch": 0.013139138518748063, "grad_norm": 3.4897008702953443, "learning_rate": 1.3139138518748065e-06, "loss": 1.0764, "step": 2968 }, { "epoch": 0.013143565452211254, "grad_norm": 3.160826832363537, "learning_rate": 1.3143565452211255e-06, "loss": 0.7554, "step": 2969 }, { "epoch": 0.013147992385674443, "grad_norm": 2.6274292083994375, "learning_rate": 1.3147992385674444e-06, "loss": 0.7968, "step": 2970 }, { "epoch": 0.013152419319137633, "grad_norm": 3.5931487015294246, "learning_rate": 1.3152419319137636e-06, "loss": 0.6323, "step": 2971 }, { "epoch": 0.013156846252600824, "grad_norm": 2.7828168194237155, "learning_rate": 1.3156846252600825e-06, "loss": 0.7944, "step": 2972 }, { "epoch": 0.013161273186064014, "grad_norm": 2.657077354268313, "learning_rate": 1.3161273186064015e-06, "loss": 0.6841, "step": 2973 }, { "epoch": 0.013165700119527203, "grad_norm": 2.8283542064780574, "learning_rate": 1.3165700119527206e-06, "loss": 0.5261, "step": 2974 }, { "epoch": 0.013170127052990394, "grad_norm": 3.294258451216889, "learning_rate": 1.3170127052990396e-06, "loss": 0.6114, "step": 2975 }, { "epoch": 0.013174553986453584, "grad_norm": 2.503540520911287, "learning_rate": 1.3174553986453583e-06, "loss": 0.7355, "step": 2976 }, { "epoch": 0.013178980919916773, "grad_norm": 2.7455822010618514, "learning_rate": 1.3178980919916777e-06, "loss": 0.8814, "step": 2977 }, { "epoch": 0.013183407853379963, "grad_norm": 2.8307611008682305, "learning_rate": 1.3183407853379964e-06, "loss": 0.7601, "step": 2978 }, { "epoch": 0.013187834786843154, "grad_norm": 2.755495820298696, "learning_rate": 1.3187834786843153e-06, "loss": 0.6207, "step": 2979 }, { "epoch": 0.013192261720306344, "grad_norm": 2.673197341084712, "learning_rate": 1.3192261720306345e-06, "loss": 0.7989, "step": 2980 }, { "epoch": 0.013196688653769533, "grad_norm": 3.4098705752658263, "learning_rate": 1.3196688653769535e-06, "loss": 0.9152, "step": 2981 }, { "epoch": 0.013201115587232724, "grad_norm": 4.479862996767601, "learning_rate": 1.3201115587232724e-06, "loss": 1.1288, "step": 2982 }, { "epoch": 0.013205542520695914, "grad_norm": 2.979133335771632, "learning_rate": 1.3205542520695916e-06, "loss": 0.7503, "step": 2983 }, { "epoch": 0.013209969454159105, "grad_norm": 2.8375715158469323, "learning_rate": 1.3209969454159105e-06, "loss": 0.8886, "step": 2984 }, { "epoch": 0.013214396387622293, "grad_norm": 3.21281926171568, "learning_rate": 1.3214396387622295e-06, "loss": 0.669, "step": 2985 }, { "epoch": 0.013218823321085484, "grad_norm": 3.2418313239770913, "learning_rate": 1.3218823321085486e-06, "loss": 0.9287, "step": 2986 }, { "epoch": 0.013223250254548674, "grad_norm": 3.01214007679443, "learning_rate": 1.3223250254548676e-06, "loss": 0.9005, "step": 2987 }, { "epoch": 0.013227677188011865, "grad_norm": 3.3555363725931797, "learning_rate": 1.3227677188011865e-06, "loss": 1.0879, "step": 2988 }, { "epoch": 0.013232104121475054, "grad_norm": 2.6436165728907945, "learning_rate": 1.3232104121475057e-06, "loss": 0.8329, "step": 2989 }, { "epoch": 0.013236531054938244, "grad_norm": 2.4209252077418086, "learning_rate": 1.3236531054938246e-06, "loss": 0.8349, "step": 2990 }, { "epoch": 0.013240957988401435, "grad_norm": 2.61243645417703, "learning_rate": 1.3240957988401433e-06, "loss": 0.8589, "step": 2991 }, { "epoch": 0.013245384921864625, "grad_norm": 2.7658845207546356, "learning_rate": 1.3245384921864627e-06, "loss": 0.9543, "step": 2992 }, { "epoch": 0.013249811855327814, "grad_norm": 3.217770338253749, "learning_rate": 1.3249811855327815e-06, "loss": 0.691, "step": 2993 }, { "epoch": 0.013254238788791005, "grad_norm": 3.1893325114352553, "learning_rate": 1.3254238788791004e-06, "loss": 0.7474, "step": 2994 }, { "epoch": 0.013258665722254195, "grad_norm": 2.8094696166557718, "learning_rate": 1.3258665722254196e-06, "loss": 0.7454, "step": 2995 }, { "epoch": 0.013263092655717384, "grad_norm": 3.0584695929628536, "learning_rate": 1.3263092655717385e-06, "loss": 0.882, "step": 2996 }, { "epoch": 0.013267519589180574, "grad_norm": 2.87519184301938, "learning_rate": 1.3267519589180575e-06, "loss": 0.7484, "step": 2997 }, { "epoch": 0.013271946522643765, "grad_norm": 2.9156647151959927, "learning_rate": 1.3271946522643766e-06, "loss": 0.7435, "step": 2998 }, { "epoch": 0.013276373456106955, "grad_norm": 2.855190391934188, "learning_rate": 1.3276373456106956e-06, "loss": 0.5344, "step": 2999 }, { "epoch": 0.013280800389570144, "grad_norm": 2.780738387597829, "learning_rate": 1.3280800389570145e-06, "loss": 0.8131, "step": 3000 }, { "epoch": 0.013285227323033335, "grad_norm": 2.9386966592238735, "learning_rate": 1.3285227323033337e-06, "loss": 0.8177, "step": 3001 }, { "epoch": 0.013289654256496525, "grad_norm": 3.568060629542472, "learning_rate": 1.3289654256496526e-06, "loss": 0.8765, "step": 3002 }, { "epoch": 0.013294081189959716, "grad_norm": 3.441733496359784, "learning_rate": 1.3294081189959716e-06, "loss": 1.2182, "step": 3003 }, { "epoch": 0.013298508123422904, "grad_norm": 2.996659844877356, "learning_rate": 1.3298508123422907e-06, "loss": 0.9249, "step": 3004 }, { "epoch": 0.013302935056886095, "grad_norm": 2.721622530493524, "learning_rate": 1.3302935056886097e-06, "loss": 0.6975, "step": 3005 }, { "epoch": 0.013307361990349285, "grad_norm": 2.8117197745304283, "learning_rate": 1.3307361990349286e-06, "loss": 0.7943, "step": 3006 }, { "epoch": 0.013311788923812476, "grad_norm": 3.232117868072421, "learning_rate": 1.3311788923812478e-06, "loss": 0.6882, "step": 3007 }, { "epoch": 0.013316215857275665, "grad_norm": 2.862140112115877, "learning_rate": 1.3316215857275665e-06, "loss": 0.4451, "step": 3008 }, { "epoch": 0.013320642790738855, "grad_norm": 3.0795604410277817, "learning_rate": 1.3320642790738855e-06, "loss": 1.2638, "step": 3009 }, { "epoch": 0.013325069724202046, "grad_norm": 3.3187707750359716, "learning_rate": 1.3325069724202046e-06, "loss": 0.9546, "step": 3010 }, { "epoch": 0.013329496657665234, "grad_norm": 3.5032150429602695, "learning_rate": 1.3329496657665236e-06, "loss": 1.0771, "step": 3011 }, { "epoch": 0.013333923591128425, "grad_norm": 3.7158523299976136, "learning_rate": 1.3333923591128425e-06, "loss": 1.0306, "step": 3012 }, { "epoch": 0.013338350524591615, "grad_norm": 3.0706581261785812, "learning_rate": 1.3338350524591617e-06, "loss": 0.9932, "step": 3013 }, { "epoch": 0.013342777458054806, "grad_norm": 4.070164043651729, "learning_rate": 1.3342777458054806e-06, "loss": 0.8957, "step": 3014 }, { "epoch": 0.013347204391517995, "grad_norm": 3.007496358257708, "learning_rate": 1.3347204391517996e-06, "loss": 0.5656, "step": 3015 }, { "epoch": 0.013351631324981185, "grad_norm": 3.344258230528983, "learning_rate": 1.3351631324981187e-06, "loss": 0.8678, "step": 3016 }, { "epoch": 0.013356058258444376, "grad_norm": 2.6861111944931673, "learning_rate": 1.3356058258444377e-06, "loss": 0.8268, "step": 3017 }, { "epoch": 0.013360485191907566, "grad_norm": 2.671484646778165, "learning_rate": 1.3360485191907566e-06, "loss": 0.928, "step": 3018 }, { "epoch": 0.013364912125370755, "grad_norm": 2.698422391292124, "learning_rate": 1.3364912125370758e-06, "loss": 0.6665, "step": 3019 }, { "epoch": 0.013369339058833946, "grad_norm": 3.362684637703308, "learning_rate": 1.3369339058833947e-06, "loss": 0.9451, "step": 3020 }, { "epoch": 0.013373765992297136, "grad_norm": 3.661496869395799, "learning_rate": 1.3373765992297137e-06, "loss": 1.0195, "step": 3021 }, { "epoch": 0.013378192925760327, "grad_norm": 2.904015952533618, "learning_rate": 1.3378192925760328e-06, "loss": 0.7558, "step": 3022 }, { "epoch": 0.013382619859223515, "grad_norm": 2.8514141106224327, "learning_rate": 1.3382619859223518e-06, "loss": 0.7329, "step": 3023 }, { "epoch": 0.013387046792686706, "grad_norm": 2.709575687037838, "learning_rate": 1.3387046792686705e-06, "loss": 0.6475, "step": 3024 }, { "epoch": 0.013391473726149896, "grad_norm": 3.4402458365247597, "learning_rate": 1.3391473726149899e-06, "loss": 1.3304, "step": 3025 }, { "epoch": 0.013395900659613085, "grad_norm": 4.1929559013188475, "learning_rate": 1.3395900659613086e-06, "loss": 1.3687, "step": 3026 }, { "epoch": 0.013400327593076276, "grad_norm": 2.6297598677517975, "learning_rate": 1.3400327593076276e-06, "loss": 0.7627, "step": 3027 }, { "epoch": 0.013404754526539466, "grad_norm": 2.886585433641857, "learning_rate": 1.3404754526539467e-06, "loss": 0.8466, "step": 3028 }, { "epoch": 0.013409181460002657, "grad_norm": 3.2483578096889647, "learning_rate": 1.3409181460002657e-06, "loss": 0.8568, "step": 3029 }, { "epoch": 0.013413608393465845, "grad_norm": 2.864613104623942, "learning_rate": 1.3413608393465846e-06, "loss": 0.603, "step": 3030 }, { "epoch": 0.013418035326929036, "grad_norm": 2.6890198879279668, "learning_rate": 1.3418035326929038e-06, "loss": 0.8732, "step": 3031 }, { "epoch": 0.013422462260392226, "grad_norm": 2.903020907881806, "learning_rate": 1.3422462260392227e-06, "loss": 0.4435, "step": 3032 }, { "epoch": 0.013426889193855417, "grad_norm": 4.661626069323498, "learning_rate": 1.3426889193855417e-06, "loss": 1.3841, "step": 3033 }, { "epoch": 0.013431316127318606, "grad_norm": 2.7932680807131125, "learning_rate": 1.3431316127318608e-06, "loss": 0.911, "step": 3034 }, { "epoch": 0.013435743060781796, "grad_norm": 2.7673993088777737, "learning_rate": 1.3435743060781798e-06, "loss": 0.6675, "step": 3035 }, { "epoch": 0.013440169994244987, "grad_norm": 2.7988640479920397, "learning_rate": 1.3440169994244987e-06, "loss": 0.5713, "step": 3036 }, { "epoch": 0.013444596927708177, "grad_norm": 3.065032075967547, "learning_rate": 1.3444596927708179e-06, "loss": 0.988, "step": 3037 }, { "epoch": 0.013449023861171366, "grad_norm": 3.1186414735308117, "learning_rate": 1.3449023861171368e-06, "loss": 0.7469, "step": 3038 }, { "epoch": 0.013453450794634557, "grad_norm": 2.38640724933061, "learning_rate": 1.3453450794634556e-06, "loss": 0.6823, "step": 3039 }, { "epoch": 0.013457877728097747, "grad_norm": 3.051494219147414, "learning_rate": 1.345787772809775e-06, "loss": 0.8929, "step": 3040 }, { "epoch": 0.013462304661560938, "grad_norm": 2.793531659352331, "learning_rate": 1.3462304661560937e-06, "loss": 0.6174, "step": 3041 }, { "epoch": 0.013466731595024126, "grad_norm": 2.999621150970409, "learning_rate": 1.3466731595024126e-06, "loss": 0.7206, "step": 3042 }, { "epoch": 0.013471158528487317, "grad_norm": 3.082022917471427, "learning_rate": 1.3471158528487318e-06, "loss": 0.7068, "step": 3043 }, { "epoch": 0.013475585461950507, "grad_norm": 2.3269838148188886, "learning_rate": 1.3475585461950507e-06, "loss": 0.7802, "step": 3044 }, { "epoch": 0.013480012395413696, "grad_norm": 2.895364162275908, "learning_rate": 1.3480012395413697e-06, "loss": 0.8504, "step": 3045 }, { "epoch": 0.013484439328876887, "grad_norm": 3.6938710828897245, "learning_rate": 1.3484439328876888e-06, "loss": 1.0682, "step": 3046 }, { "epoch": 0.013488866262340077, "grad_norm": 3.872280296326971, "learning_rate": 1.3488866262340078e-06, "loss": 0.8099, "step": 3047 }, { "epoch": 0.013493293195803268, "grad_norm": 2.6380869806254514, "learning_rate": 1.3493293195803267e-06, "loss": 0.9284, "step": 3048 }, { "epoch": 0.013497720129266456, "grad_norm": 3.7709825722641144, "learning_rate": 1.3497720129266459e-06, "loss": 0.8218, "step": 3049 }, { "epoch": 0.013502147062729647, "grad_norm": 3.0229802109177224, "learning_rate": 1.3502147062729648e-06, "loss": 0.8965, "step": 3050 }, { "epoch": 0.013506573996192837, "grad_norm": 3.0137906833493804, "learning_rate": 1.3506573996192838e-06, "loss": 0.9002, "step": 3051 }, { "epoch": 0.013511000929656028, "grad_norm": 3.0658724037879894, "learning_rate": 1.351100092965603e-06, "loss": 0.6759, "step": 3052 }, { "epoch": 0.013515427863119217, "grad_norm": 3.144239860439094, "learning_rate": 1.3515427863119219e-06, "loss": 0.9325, "step": 3053 }, { "epoch": 0.013519854796582407, "grad_norm": 4.1389646139135685, "learning_rate": 1.3519854796582408e-06, "loss": 1.1913, "step": 3054 }, { "epoch": 0.013524281730045598, "grad_norm": 3.311572265441675, "learning_rate": 1.35242817300456e-06, "loss": 1.0369, "step": 3055 }, { "epoch": 0.013528708663508788, "grad_norm": 3.1451549177150087, "learning_rate": 1.352870866350879e-06, "loss": 0.8193, "step": 3056 }, { "epoch": 0.013533135596971977, "grad_norm": 2.9996961717010397, "learning_rate": 1.3533135596971977e-06, "loss": 0.587, "step": 3057 }, { "epoch": 0.013537562530435168, "grad_norm": 3.2017758248112775, "learning_rate": 1.3537562530435168e-06, "loss": 0.7905, "step": 3058 }, { "epoch": 0.013541989463898358, "grad_norm": 3.103365808703881, "learning_rate": 1.3541989463898358e-06, "loss": 1.0196, "step": 3059 }, { "epoch": 0.013546416397361547, "grad_norm": 2.9035893930772128, "learning_rate": 1.3546416397361547e-06, "loss": 0.6809, "step": 3060 }, { "epoch": 0.013550843330824737, "grad_norm": 3.406436208829185, "learning_rate": 1.3550843330824739e-06, "loss": 1.0603, "step": 3061 }, { "epoch": 0.013555270264287928, "grad_norm": 3.2747323603400913, "learning_rate": 1.3555270264287928e-06, "loss": 0.8253, "step": 3062 }, { "epoch": 0.013559697197751118, "grad_norm": 2.983974455246901, "learning_rate": 1.3559697197751118e-06, "loss": 0.7331, "step": 3063 }, { "epoch": 0.013564124131214307, "grad_norm": 3.255136070444901, "learning_rate": 1.356412413121431e-06, "loss": 0.7763, "step": 3064 }, { "epoch": 0.013568551064677498, "grad_norm": 4.286720974167773, "learning_rate": 1.3568551064677499e-06, "loss": 0.8178, "step": 3065 }, { "epoch": 0.013572977998140688, "grad_norm": 2.770210693673694, "learning_rate": 1.3572977998140688e-06, "loss": 0.843, "step": 3066 }, { "epoch": 0.013577404931603879, "grad_norm": 3.063118449800877, "learning_rate": 1.357740493160388e-06, "loss": 1.0934, "step": 3067 }, { "epoch": 0.013581831865067067, "grad_norm": 2.421685506978743, "learning_rate": 1.358183186506707e-06, "loss": 0.4682, "step": 3068 }, { "epoch": 0.013586258798530258, "grad_norm": 3.0592768911944184, "learning_rate": 1.3586258798530259e-06, "loss": 0.6115, "step": 3069 }, { "epoch": 0.013590685731993448, "grad_norm": 3.0427717771646905, "learning_rate": 1.359068573199345e-06, "loss": 0.974, "step": 3070 }, { "epoch": 0.013595112665456639, "grad_norm": 3.003955700163821, "learning_rate": 1.359511266545664e-06, "loss": 1.0202, "step": 3071 }, { "epoch": 0.013599539598919828, "grad_norm": 3.15673726566962, "learning_rate": 1.3599539598919827e-06, "loss": 0.8893, "step": 3072 }, { "epoch": 0.013603966532383018, "grad_norm": 3.310764895553239, "learning_rate": 1.360396653238302e-06, "loss": 1.1005, "step": 3073 }, { "epoch": 0.013608393465846209, "grad_norm": 3.35696609617331, "learning_rate": 1.3608393465846208e-06, "loss": 1.0946, "step": 3074 }, { "epoch": 0.0136128203993094, "grad_norm": 3.0504133712153774, "learning_rate": 1.3612820399309398e-06, "loss": 0.9054, "step": 3075 }, { "epoch": 0.013617247332772588, "grad_norm": 3.8296690755974856, "learning_rate": 1.361724733277259e-06, "loss": 1.0825, "step": 3076 }, { "epoch": 0.013621674266235778, "grad_norm": 2.7806679776042684, "learning_rate": 1.3621674266235779e-06, "loss": 0.822, "step": 3077 }, { "epoch": 0.013626101199698969, "grad_norm": 2.722294601143928, "learning_rate": 1.3626101199698968e-06, "loss": 0.8209, "step": 3078 }, { "epoch": 0.013630528133162158, "grad_norm": 2.8335440860828713, "learning_rate": 1.363052813316216e-06, "loss": 0.832, "step": 3079 }, { "epoch": 0.013634955066625348, "grad_norm": 3.1932611149317385, "learning_rate": 1.363495506662535e-06, "loss": 0.9807, "step": 3080 }, { "epoch": 0.013639382000088539, "grad_norm": 3.840850992662665, "learning_rate": 1.3639382000088539e-06, "loss": 1.3022, "step": 3081 }, { "epoch": 0.01364380893355173, "grad_norm": 3.029715784233842, "learning_rate": 1.364380893355173e-06, "loss": 0.9023, "step": 3082 }, { "epoch": 0.013648235867014918, "grad_norm": 3.3118037859366907, "learning_rate": 1.364823586701492e-06, "loss": 0.9879, "step": 3083 }, { "epoch": 0.013652662800478109, "grad_norm": 2.803403006991019, "learning_rate": 1.365266280047811e-06, "loss": 0.9815, "step": 3084 }, { "epoch": 0.013657089733941299, "grad_norm": 3.4840110605610684, "learning_rate": 1.36570897339413e-06, "loss": 1.1145, "step": 3085 }, { "epoch": 0.01366151666740449, "grad_norm": 2.624021107315944, "learning_rate": 1.366151666740449e-06, "loss": 0.6956, "step": 3086 }, { "epoch": 0.013665943600867678, "grad_norm": 3.1897654655583927, "learning_rate": 1.3665943600867678e-06, "loss": 0.7437, "step": 3087 }, { "epoch": 0.013670370534330869, "grad_norm": 2.5159745322688205, "learning_rate": 1.3670370534330871e-06, "loss": 0.5888, "step": 3088 }, { "epoch": 0.01367479746779406, "grad_norm": 2.8988082784111135, "learning_rate": 1.3674797467794059e-06, "loss": 0.7172, "step": 3089 }, { "epoch": 0.01367922440125725, "grad_norm": 3.569080749076827, "learning_rate": 1.3679224401257248e-06, "loss": 0.8227, "step": 3090 }, { "epoch": 0.013683651334720439, "grad_norm": 3.105971950067423, "learning_rate": 1.368365133472044e-06, "loss": 0.9361, "step": 3091 }, { "epoch": 0.01368807826818363, "grad_norm": 3.1550601146936783, "learning_rate": 1.368807826818363e-06, "loss": 1.1116, "step": 3092 }, { "epoch": 0.01369250520164682, "grad_norm": 2.983954283073445, "learning_rate": 1.3692505201646819e-06, "loss": 0.5875, "step": 3093 }, { "epoch": 0.013696932135110008, "grad_norm": 3.8257289976799074, "learning_rate": 1.369693213511001e-06, "loss": 1.1744, "step": 3094 }, { "epoch": 0.013701359068573199, "grad_norm": 3.0396119909396337, "learning_rate": 1.37013590685732e-06, "loss": 0.9502, "step": 3095 }, { "epoch": 0.01370578600203639, "grad_norm": 2.456220539385699, "learning_rate": 1.370578600203639e-06, "loss": 0.7771, "step": 3096 }, { "epoch": 0.01371021293549958, "grad_norm": 3.3208383984655057, "learning_rate": 1.371021293549958e-06, "loss": 0.8796, "step": 3097 }, { "epoch": 0.013714639868962769, "grad_norm": 2.446265552533301, "learning_rate": 1.371463986896277e-06, "loss": 0.587, "step": 3098 }, { "epoch": 0.01371906680242596, "grad_norm": 2.6401843703996346, "learning_rate": 1.371906680242596e-06, "loss": 0.833, "step": 3099 }, { "epoch": 0.01372349373588915, "grad_norm": 2.851175365106917, "learning_rate": 1.3723493735889151e-06, "loss": 0.9272, "step": 3100 }, { "epoch": 0.01372792066935234, "grad_norm": 2.799297940990325, "learning_rate": 1.372792066935234e-06, "loss": 0.6493, "step": 3101 }, { "epoch": 0.013732347602815529, "grad_norm": 3.30242880683356, "learning_rate": 1.373234760281553e-06, "loss": 1.0974, "step": 3102 }, { "epoch": 0.01373677453627872, "grad_norm": 4.365460503142248, "learning_rate": 1.3736774536278722e-06, "loss": 0.6026, "step": 3103 }, { "epoch": 0.01374120146974191, "grad_norm": 2.5561890597792085, "learning_rate": 1.3741201469741911e-06, "loss": 0.7636, "step": 3104 }, { "epoch": 0.0137456284032051, "grad_norm": 3.254233191700607, "learning_rate": 1.3745628403205099e-06, "loss": 0.7365, "step": 3105 }, { "epoch": 0.01375005533666829, "grad_norm": 3.1550426664845515, "learning_rate": 1.3750055336668293e-06, "loss": 0.7745, "step": 3106 }, { "epoch": 0.01375448227013148, "grad_norm": 3.52495362811593, "learning_rate": 1.375448227013148e-06, "loss": 0.9979, "step": 3107 }, { "epoch": 0.01375890920359467, "grad_norm": 2.9294688543113896, "learning_rate": 1.375890920359467e-06, "loss": 0.9508, "step": 3108 }, { "epoch": 0.01376333613705786, "grad_norm": 3.2551977895992064, "learning_rate": 1.376333613705786e-06, "loss": 0.8767, "step": 3109 }, { "epoch": 0.01376776307052105, "grad_norm": 2.969837333508375, "learning_rate": 1.376776307052105e-06, "loss": 0.5965, "step": 3110 }, { "epoch": 0.01377219000398424, "grad_norm": 2.93413359816062, "learning_rate": 1.377219000398424e-06, "loss": 1.1773, "step": 3111 }, { "epoch": 0.01377661693744743, "grad_norm": 2.4829058767583834, "learning_rate": 1.3776616937447431e-06, "loss": 0.4866, "step": 3112 }, { "epoch": 0.01378104387091062, "grad_norm": 3.518102758082425, "learning_rate": 1.378104387091062e-06, "loss": 0.8199, "step": 3113 }, { "epoch": 0.01378547080437381, "grad_norm": 3.445150869495527, "learning_rate": 1.378547080437381e-06, "loss": 1.1133, "step": 3114 }, { "epoch": 0.013789897737837, "grad_norm": 3.2015977678714465, "learning_rate": 1.3789897737837002e-06, "loss": 0.8612, "step": 3115 }, { "epoch": 0.013794324671300191, "grad_norm": 2.9192591784910644, "learning_rate": 1.3794324671300191e-06, "loss": 0.9259, "step": 3116 }, { "epoch": 0.01379875160476338, "grad_norm": 3.1776329897458857, "learning_rate": 1.379875160476338e-06, "loss": 0.6943, "step": 3117 }, { "epoch": 0.01380317853822657, "grad_norm": 3.8491220778925057, "learning_rate": 1.3803178538226573e-06, "loss": 1.2305, "step": 3118 }, { "epoch": 0.01380760547168976, "grad_norm": 2.5785979999677533, "learning_rate": 1.3807605471689762e-06, "loss": 0.6823, "step": 3119 }, { "epoch": 0.013812032405152951, "grad_norm": 2.836469802548888, "learning_rate": 1.381203240515295e-06, "loss": 0.7795, "step": 3120 }, { "epoch": 0.01381645933861614, "grad_norm": 3.323096816972331, "learning_rate": 1.3816459338616143e-06, "loss": 1.0349, "step": 3121 }, { "epoch": 0.01382088627207933, "grad_norm": 3.1716031955849027, "learning_rate": 1.382088627207933e-06, "loss": 0.8544, "step": 3122 }, { "epoch": 0.013825313205542521, "grad_norm": 3.126397422234947, "learning_rate": 1.3825313205542524e-06, "loss": 0.9791, "step": 3123 }, { "epoch": 0.013829740139005712, "grad_norm": 3.5875318645183905, "learning_rate": 1.3829740139005711e-06, "loss": 0.869, "step": 3124 }, { "epoch": 0.0138341670724689, "grad_norm": 2.444475795219823, "learning_rate": 1.38341670724689e-06, "loss": 0.6875, "step": 3125 }, { "epoch": 0.01383859400593209, "grad_norm": 2.8915101831308623, "learning_rate": 1.3838594005932093e-06, "loss": 0.7984, "step": 3126 }, { "epoch": 0.013843020939395281, "grad_norm": 3.0125129911957993, "learning_rate": 1.3843020939395282e-06, "loss": 0.7647, "step": 3127 }, { "epoch": 0.01384744787285847, "grad_norm": 3.215939940147134, "learning_rate": 1.3847447872858471e-06, "loss": 1.0835, "step": 3128 }, { "epoch": 0.01385187480632166, "grad_norm": 3.35177375411386, "learning_rate": 1.3851874806321663e-06, "loss": 0.7736, "step": 3129 }, { "epoch": 0.013856301739784851, "grad_norm": 3.533314306894571, "learning_rate": 1.3856301739784853e-06, "loss": 0.8974, "step": 3130 }, { "epoch": 0.013860728673248042, "grad_norm": 2.93293071642216, "learning_rate": 1.3860728673248042e-06, "loss": 0.9135, "step": 3131 }, { "epoch": 0.01386515560671123, "grad_norm": 2.96147306939151, "learning_rate": 1.3865155606711234e-06, "loss": 0.8564, "step": 3132 }, { "epoch": 0.013869582540174421, "grad_norm": 3.1438850650043717, "learning_rate": 1.3869582540174423e-06, "loss": 0.7971, "step": 3133 }, { "epoch": 0.013874009473637611, "grad_norm": 2.741155915206747, "learning_rate": 1.3874009473637613e-06, "loss": 0.9994, "step": 3134 }, { "epoch": 0.013878436407100802, "grad_norm": 3.5768097622132076, "learning_rate": 1.3878436407100804e-06, "loss": 0.9013, "step": 3135 }, { "epoch": 0.01388286334056399, "grad_norm": 2.924002941744124, "learning_rate": 1.3882863340563994e-06, "loss": 0.5147, "step": 3136 }, { "epoch": 0.013887290274027181, "grad_norm": 4.193874057202655, "learning_rate": 1.388729027402718e-06, "loss": 1.4689, "step": 3137 }, { "epoch": 0.013891717207490372, "grad_norm": 4.255027828494279, "learning_rate": 1.3891717207490375e-06, "loss": 1.0504, "step": 3138 }, { "epoch": 0.013896144140953562, "grad_norm": 4.579306270540682, "learning_rate": 1.3896144140953562e-06, "loss": 1.3896, "step": 3139 }, { "epoch": 0.013900571074416751, "grad_norm": 2.721019929602492, "learning_rate": 1.3900571074416751e-06, "loss": 0.7204, "step": 3140 }, { "epoch": 0.013904998007879942, "grad_norm": 2.7798616005471395, "learning_rate": 1.3904998007879943e-06, "loss": 0.766, "step": 3141 }, { "epoch": 0.013909424941343132, "grad_norm": 2.488132313746705, "learning_rate": 1.3909424941343133e-06, "loss": 0.6803, "step": 3142 }, { "epoch": 0.013913851874806323, "grad_norm": 3.0787799570854126, "learning_rate": 1.3913851874806322e-06, "loss": 0.6862, "step": 3143 }, { "epoch": 0.013918278808269511, "grad_norm": 2.8663064537675256, "learning_rate": 1.3918278808269514e-06, "loss": 0.7735, "step": 3144 }, { "epoch": 0.013922705741732702, "grad_norm": 3.2831449605422036, "learning_rate": 1.3922705741732703e-06, "loss": 1.0747, "step": 3145 }, { "epoch": 0.013927132675195892, "grad_norm": 3.664860927831026, "learning_rate": 1.3927132675195893e-06, "loss": 1.081, "step": 3146 }, { "epoch": 0.013931559608659081, "grad_norm": 2.753686566015897, "learning_rate": 1.3931559608659084e-06, "loss": 0.6055, "step": 3147 }, { "epoch": 0.013935986542122272, "grad_norm": 3.2780594143327777, "learning_rate": 1.3935986542122274e-06, "loss": 1.1446, "step": 3148 }, { "epoch": 0.013940413475585462, "grad_norm": 3.0602941631747806, "learning_rate": 1.3940413475585463e-06, "loss": 0.8141, "step": 3149 }, { "epoch": 0.013944840409048653, "grad_norm": 3.9343110507680477, "learning_rate": 1.3944840409048655e-06, "loss": 1.0246, "step": 3150 }, { "epoch": 0.013949267342511841, "grad_norm": 2.84502845458596, "learning_rate": 1.3949267342511844e-06, "loss": 0.5763, "step": 3151 }, { "epoch": 0.013953694275975032, "grad_norm": 3.0995483257721204, "learning_rate": 1.3953694275975034e-06, "loss": 1.1282, "step": 3152 }, { "epoch": 0.013958121209438222, "grad_norm": 2.3249454926800674, "learning_rate": 1.3958121209438225e-06, "loss": 0.6874, "step": 3153 }, { "epoch": 0.013962548142901413, "grad_norm": 2.9593774221181337, "learning_rate": 1.3962548142901415e-06, "loss": 0.6034, "step": 3154 }, { "epoch": 0.013966975076364602, "grad_norm": 2.9926882004974944, "learning_rate": 1.3966975076364602e-06, "loss": 0.8437, "step": 3155 }, { "epoch": 0.013971402009827792, "grad_norm": 2.8268906659496507, "learning_rate": 1.3971402009827796e-06, "loss": 1.0906, "step": 3156 }, { "epoch": 0.013975828943290983, "grad_norm": 3.774263026589379, "learning_rate": 1.3975828943290983e-06, "loss": 1.1957, "step": 3157 }, { "epoch": 0.013980255876754173, "grad_norm": 3.343091600544479, "learning_rate": 1.3980255876754173e-06, "loss": 1.1336, "step": 3158 }, { "epoch": 0.013984682810217362, "grad_norm": 2.4878402412893283, "learning_rate": 1.3984682810217364e-06, "loss": 0.5223, "step": 3159 }, { "epoch": 0.013989109743680552, "grad_norm": 3.0897828119693074, "learning_rate": 1.3989109743680554e-06, "loss": 1.1697, "step": 3160 }, { "epoch": 0.013993536677143743, "grad_norm": 2.8950620497056216, "learning_rate": 1.3993536677143743e-06, "loss": 0.7743, "step": 3161 }, { "epoch": 0.013997963610606932, "grad_norm": 3.272368246805741, "learning_rate": 1.3997963610606935e-06, "loss": 0.5486, "step": 3162 }, { "epoch": 0.014002390544070122, "grad_norm": 3.1110753631226817, "learning_rate": 1.4002390544070124e-06, "loss": 0.8223, "step": 3163 }, { "epoch": 0.014006817477533313, "grad_norm": 3.0112277207976526, "learning_rate": 1.4006817477533314e-06, "loss": 1.1827, "step": 3164 }, { "epoch": 0.014011244410996503, "grad_norm": 2.9484292862738317, "learning_rate": 1.4011244410996505e-06, "loss": 0.7489, "step": 3165 }, { "epoch": 0.014015671344459692, "grad_norm": 3.6616477992196383, "learning_rate": 1.4015671344459695e-06, "loss": 1.2829, "step": 3166 }, { "epoch": 0.014020098277922883, "grad_norm": 2.9945137697365434, "learning_rate": 1.4020098277922884e-06, "loss": 0.7332, "step": 3167 }, { "epoch": 0.014024525211386073, "grad_norm": 3.2384686174484147, "learning_rate": 1.4024525211386076e-06, "loss": 0.7171, "step": 3168 }, { "epoch": 0.014028952144849264, "grad_norm": 3.5408048637059473, "learning_rate": 1.4028952144849265e-06, "loss": 0.8428, "step": 3169 }, { "epoch": 0.014033379078312452, "grad_norm": 3.2483998369873035, "learning_rate": 1.4033379078312453e-06, "loss": 0.5488, "step": 3170 }, { "epoch": 0.014037806011775643, "grad_norm": 2.699839247918376, "learning_rate": 1.4037806011775646e-06, "loss": 0.6382, "step": 3171 }, { "epoch": 0.014042232945238833, "grad_norm": 2.9986798124765937, "learning_rate": 1.4042232945238834e-06, "loss": 1.0809, "step": 3172 }, { "epoch": 0.014046659878702024, "grad_norm": 4.040807073088157, "learning_rate": 1.4046659878702023e-06, "loss": 1.174, "step": 3173 }, { "epoch": 0.014051086812165213, "grad_norm": 3.228551203366152, "learning_rate": 1.4051086812165215e-06, "loss": 1.0644, "step": 3174 }, { "epoch": 0.014055513745628403, "grad_norm": 2.998624343671557, "learning_rate": 1.4055513745628404e-06, "loss": 0.8614, "step": 3175 }, { "epoch": 0.014059940679091594, "grad_norm": 3.60183173031505, "learning_rate": 1.4059940679091594e-06, "loss": 1.0738, "step": 3176 }, { "epoch": 0.014064367612554784, "grad_norm": 3.7483801142956774, "learning_rate": 1.4064367612554785e-06, "loss": 1.2222, "step": 3177 }, { "epoch": 0.014068794546017973, "grad_norm": 2.648588614723674, "learning_rate": 1.4068794546017975e-06, "loss": 0.504, "step": 3178 }, { "epoch": 0.014073221479481163, "grad_norm": 2.858504431828886, "learning_rate": 1.4073221479481164e-06, "loss": 0.7019, "step": 3179 }, { "epoch": 0.014077648412944354, "grad_norm": 4.009974869373597, "learning_rate": 1.4077648412944356e-06, "loss": 0.7784, "step": 3180 }, { "epoch": 0.014082075346407543, "grad_norm": 3.307474380659421, "learning_rate": 1.4082075346407545e-06, "loss": 1.0217, "step": 3181 }, { "epoch": 0.014086502279870733, "grad_norm": 3.0210486614127197, "learning_rate": 1.4086502279870735e-06, "loss": 0.7113, "step": 3182 }, { "epoch": 0.014090929213333924, "grad_norm": 3.3329074991869847, "learning_rate": 1.4090929213333926e-06, "loss": 0.8601, "step": 3183 }, { "epoch": 0.014095356146797114, "grad_norm": 3.1418159119563924, "learning_rate": 1.4095356146797116e-06, "loss": 0.6081, "step": 3184 }, { "epoch": 0.014099783080260303, "grad_norm": 2.6455365756640568, "learning_rate": 1.4099783080260305e-06, "loss": 0.7646, "step": 3185 }, { "epoch": 0.014104210013723494, "grad_norm": 2.99587350816727, "learning_rate": 1.4104210013723497e-06, "loss": 1.0041, "step": 3186 }, { "epoch": 0.014108636947186684, "grad_norm": 3.5143049699590247, "learning_rate": 1.4108636947186684e-06, "loss": 0.8744, "step": 3187 }, { "epoch": 0.014113063880649875, "grad_norm": 3.0375667128895754, "learning_rate": 1.4113063880649874e-06, "loss": 0.9099, "step": 3188 }, { "epoch": 0.014117490814113063, "grad_norm": 2.7559427473500873, "learning_rate": 1.4117490814113065e-06, "loss": 0.858, "step": 3189 }, { "epoch": 0.014121917747576254, "grad_norm": 3.1694398410857962, "learning_rate": 1.4121917747576255e-06, "loss": 1.0078, "step": 3190 }, { "epoch": 0.014126344681039444, "grad_norm": 3.136322556982128, "learning_rate": 1.4126344681039444e-06, "loss": 1.221, "step": 3191 }, { "epoch": 0.014130771614502635, "grad_norm": 2.85245827580345, "learning_rate": 1.4130771614502636e-06, "loss": 0.7413, "step": 3192 }, { "epoch": 0.014135198547965824, "grad_norm": 2.729849430753159, "learning_rate": 1.4135198547965825e-06, "loss": 0.6694, "step": 3193 }, { "epoch": 0.014139625481429014, "grad_norm": 3.1033384580196866, "learning_rate": 1.4139625481429015e-06, "loss": 0.7042, "step": 3194 }, { "epoch": 0.014144052414892205, "grad_norm": 3.5657374056556796, "learning_rate": 1.4144052414892206e-06, "loss": 0.9762, "step": 3195 }, { "epoch": 0.014148479348355393, "grad_norm": 2.730229211341086, "learning_rate": 1.4148479348355396e-06, "loss": 0.9017, "step": 3196 }, { "epoch": 0.014152906281818584, "grad_norm": 3.008612137552622, "learning_rate": 1.4152906281818585e-06, "loss": 0.9191, "step": 3197 }, { "epoch": 0.014157333215281774, "grad_norm": 3.0827134558435354, "learning_rate": 1.4157333215281777e-06, "loss": 0.8836, "step": 3198 }, { "epoch": 0.014161760148744965, "grad_norm": 3.306413364310489, "learning_rate": 1.4161760148744966e-06, "loss": 1.0166, "step": 3199 }, { "epoch": 0.014166187082208154, "grad_norm": 3.0628673002694584, "learning_rate": 1.4166187082208156e-06, "loss": 1.024, "step": 3200 }, { "epoch": 0.014170614015671344, "grad_norm": 2.9655773611333465, "learning_rate": 1.4170614015671347e-06, "loss": 0.6398, "step": 3201 }, { "epoch": 0.014175040949134535, "grad_norm": 3.9232562720494903, "learning_rate": 1.4175040949134537e-06, "loss": 0.7383, "step": 3202 }, { "epoch": 0.014179467882597725, "grad_norm": 3.089982797111563, "learning_rate": 1.4179467882597724e-06, "loss": 0.8427, "step": 3203 }, { "epoch": 0.014183894816060914, "grad_norm": 2.577055835417993, "learning_rate": 1.4183894816060918e-06, "loss": 0.8904, "step": 3204 }, { "epoch": 0.014188321749524105, "grad_norm": 2.5577328595938176, "learning_rate": 1.4188321749524105e-06, "loss": 0.8738, "step": 3205 }, { "epoch": 0.014192748682987295, "grad_norm": 3.807176782556339, "learning_rate": 1.4192748682987295e-06, "loss": 1.7193, "step": 3206 }, { "epoch": 0.014197175616450486, "grad_norm": 3.049171676567945, "learning_rate": 1.4197175616450486e-06, "loss": 0.7537, "step": 3207 }, { "epoch": 0.014201602549913674, "grad_norm": 3.6239460111161756, "learning_rate": 1.4201602549913676e-06, "loss": 1.0678, "step": 3208 }, { "epoch": 0.014206029483376865, "grad_norm": 2.8034501302169312, "learning_rate": 1.4206029483376865e-06, "loss": 0.7193, "step": 3209 }, { "epoch": 0.014210456416840055, "grad_norm": 2.472631049353419, "learning_rate": 1.4210456416840057e-06, "loss": 0.6794, "step": 3210 }, { "epoch": 0.014214883350303244, "grad_norm": 3.2166358339231556, "learning_rate": 1.4214883350303246e-06, "loss": 0.6052, "step": 3211 }, { "epoch": 0.014219310283766435, "grad_norm": 2.787608769102594, "learning_rate": 1.4219310283766436e-06, "loss": 0.7233, "step": 3212 }, { "epoch": 0.014223737217229625, "grad_norm": 2.970272749096959, "learning_rate": 1.4223737217229627e-06, "loss": 0.8492, "step": 3213 }, { "epoch": 0.014228164150692816, "grad_norm": 3.821021152637422, "learning_rate": 1.4228164150692817e-06, "loss": 1.1351, "step": 3214 }, { "epoch": 0.014232591084156004, "grad_norm": 2.780065464192654, "learning_rate": 1.4232591084156006e-06, "loss": 0.6835, "step": 3215 }, { "epoch": 0.014237018017619195, "grad_norm": 3.6262109873961967, "learning_rate": 1.4237018017619198e-06, "loss": 0.55, "step": 3216 }, { "epoch": 0.014241444951082385, "grad_norm": 3.212187458311215, "learning_rate": 1.4241444951082387e-06, "loss": 0.8449, "step": 3217 }, { "epoch": 0.014245871884545576, "grad_norm": 3.3430545707242603, "learning_rate": 1.4245871884545575e-06, "loss": 0.9271, "step": 3218 }, { "epoch": 0.014250298818008765, "grad_norm": 2.755828701099461, "learning_rate": 1.4250298818008768e-06, "loss": 0.6002, "step": 3219 }, { "epoch": 0.014254725751471955, "grad_norm": 2.9794488038645186, "learning_rate": 1.4254725751471956e-06, "loss": 1.1392, "step": 3220 }, { "epoch": 0.014259152684935146, "grad_norm": 3.2727467644509725, "learning_rate": 1.4259152684935145e-06, "loss": 0.9811, "step": 3221 }, { "epoch": 0.014263579618398336, "grad_norm": 2.828770033682858, "learning_rate": 1.4263579618398337e-06, "loss": 0.8784, "step": 3222 }, { "epoch": 0.014268006551861525, "grad_norm": 2.753193602638478, "learning_rate": 1.4268006551861526e-06, "loss": 0.8837, "step": 3223 }, { "epoch": 0.014272433485324715, "grad_norm": 2.748695614062214, "learning_rate": 1.4272433485324716e-06, "loss": 0.773, "step": 3224 }, { "epoch": 0.014276860418787906, "grad_norm": 3.082751802736375, "learning_rate": 1.4276860418787907e-06, "loss": 0.7751, "step": 3225 }, { "epoch": 0.014281287352251096, "grad_norm": 2.342604915205627, "learning_rate": 1.4281287352251097e-06, "loss": 0.6649, "step": 3226 }, { "epoch": 0.014285714285714285, "grad_norm": 3.0327070606690523, "learning_rate": 1.4285714285714286e-06, "loss": 0.8372, "step": 3227 }, { "epoch": 0.014290141219177476, "grad_norm": 3.635280419119372, "learning_rate": 1.4290141219177478e-06, "loss": 0.8799, "step": 3228 }, { "epoch": 0.014294568152640666, "grad_norm": 2.7666752201247395, "learning_rate": 1.4294568152640667e-06, "loss": 0.7071, "step": 3229 }, { "epoch": 0.014298995086103855, "grad_norm": 2.601945315839842, "learning_rate": 1.4298995086103857e-06, "loss": 0.7243, "step": 3230 }, { "epoch": 0.014303422019567046, "grad_norm": 2.9758513623162037, "learning_rate": 1.4303422019567048e-06, "loss": 0.7822, "step": 3231 }, { "epoch": 0.014307848953030236, "grad_norm": 3.4846082826911213, "learning_rate": 1.4307848953030238e-06, "loss": 0.9253, "step": 3232 }, { "epoch": 0.014312275886493427, "grad_norm": 2.785055410708, "learning_rate": 1.4312275886493427e-06, "loss": 0.8661, "step": 3233 }, { "epoch": 0.014316702819956615, "grad_norm": 2.6603632590667807, "learning_rate": 1.4316702819956619e-06, "loss": 0.7233, "step": 3234 }, { "epoch": 0.014321129753419806, "grad_norm": 3.9983220521383056, "learning_rate": 1.4321129753419808e-06, "loss": 0.9843, "step": 3235 }, { "epoch": 0.014325556686882996, "grad_norm": 3.5810684898600513, "learning_rate": 1.4325556686882996e-06, "loss": 1.0819, "step": 3236 }, { "epoch": 0.014329983620346187, "grad_norm": 2.8370593258304697, "learning_rate": 1.4329983620346187e-06, "loss": 1.0349, "step": 3237 }, { "epoch": 0.014334410553809376, "grad_norm": 3.121256817281554, "learning_rate": 1.4334410553809377e-06, "loss": 0.8113, "step": 3238 }, { "epoch": 0.014338837487272566, "grad_norm": 2.93718505290864, "learning_rate": 1.4338837487272566e-06, "loss": 0.9676, "step": 3239 }, { "epoch": 0.014343264420735757, "grad_norm": 4.092969636555787, "learning_rate": 1.4343264420735758e-06, "loss": 0.4934, "step": 3240 }, { "epoch": 0.014347691354198947, "grad_norm": 3.8917353965257977, "learning_rate": 1.4347691354198947e-06, "loss": 0.9157, "step": 3241 }, { "epoch": 0.014352118287662136, "grad_norm": 2.5623979596269844, "learning_rate": 1.4352118287662137e-06, "loss": 0.6278, "step": 3242 }, { "epoch": 0.014356545221125326, "grad_norm": 3.08724145376279, "learning_rate": 1.4356545221125328e-06, "loss": 1.0336, "step": 3243 }, { "epoch": 0.014360972154588517, "grad_norm": 2.6486570034857397, "learning_rate": 1.4360972154588518e-06, "loss": 0.6139, "step": 3244 }, { "epoch": 0.014365399088051706, "grad_norm": 3.403257611268265, "learning_rate": 1.4365399088051707e-06, "loss": 1.0742, "step": 3245 }, { "epoch": 0.014369826021514896, "grad_norm": 3.7041681546729777, "learning_rate": 1.4369826021514899e-06, "loss": 0.9082, "step": 3246 }, { "epoch": 0.014374252954978087, "grad_norm": 3.027372348451169, "learning_rate": 1.4374252954978088e-06, "loss": 1.1523, "step": 3247 }, { "epoch": 0.014378679888441277, "grad_norm": 2.6721991498517657, "learning_rate": 1.4378679888441278e-06, "loss": 0.9664, "step": 3248 }, { "epoch": 0.014383106821904466, "grad_norm": 3.359068292661891, "learning_rate": 1.438310682190447e-06, "loss": 1.1509, "step": 3249 }, { "epoch": 0.014387533755367657, "grad_norm": 2.9571142978089737, "learning_rate": 1.4387533755367659e-06, "loss": 0.5293, "step": 3250 }, { "epoch": 0.014391960688830847, "grad_norm": 2.830632714354606, "learning_rate": 1.4391960688830846e-06, "loss": 0.982, "step": 3251 }, { "epoch": 0.014396387622294038, "grad_norm": 2.591230453278638, "learning_rate": 1.439638762229404e-06, "loss": 0.8781, "step": 3252 }, { "epoch": 0.014400814555757226, "grad_norm": 3.5368972257123557, "learning_rate": 1.4400814555757227e-06, "loss": 0.876, "step": 3253 }, { "epoch": 0.014405241489220417, "grad_norm": 3.078379670194707, "learning_rate": 1.4405241489220417e-06, "loss": 0.8552, "step": 3254 }, { "epoch": 0.014409668422683607, "grad_norm": 3.6676135242719523, "learning_rate": 1.4409668422683608e-06, "loss": 1.328, "step": 3255 }, { "epoch": 0.014414095356146798, "grad_norm": 3.865688500058449, "learning_rate": 1.4414095356146798e-06, "loss": 0.94, "step": 3256 }, { "epoch": 0.014418522289609987, "grad_norm": 3.6353411632882775, "learning_rate": 1.4418522289609987e-06, "loss": 1.0016, "step": 3257 }, { "epoch": 0.014422949223073177, "grad_norm": 3.5706514576854014, "learning_rate": 1.4422949223073179e-06, "loss": 0.6672, "step": 3258 }, { "epoch": 0.014427376156536368, "grad_norm": 2.731552669014305, "learning_rate": 1.4427376156536368e-06, "loss": 0.5789, "step": 3259 }, { "epoch": 0.014431803089999558, "grad_norm": 3.3634652331307673, "learning_rate": 1.4431803089999558e-06, "loss": 0.9807, "step": 3260 }, { "epoch": 0.014436230023462747, "grad_norm": 3.662269491260725, "learning_rate": 1.443623002346275e-06, "loss": 0.9008, "step": 3261 }, { "epoch": 0.014440656956925937, "grad_norm": 3.0959930363706567, "learning_rate": 1.4440656956925939e-06, "loss": 0.6355, "step": 3262 }, { "epoch": 0.014445083890389128, "grad_norm": 2.8845491716787457, "learning_rate": 1.4445083890389128e-06, "loss": 0.9203, "step": 3263 }, { "epoch": 0.014449510823852317, "grad_norm": 2.768471345377662, "learning_rate": 1.444951082385232e-06, "loss": 0.7504, "step": 3264 }, { "epoch": 0.014453937757315507, "grad_norm": 2.914644830234974, "learning_rate": 1.445393775731551e-06, "loss": 0.6967, "step": 3265 }, { "epoch": 0.014458364690778698, "grad_norm": 3.310524771083725, "learning_rate": 1.4458364690778697e-06, "loss": 0.8204, "step": 3266 }, { "epoch": 0.014462791624241888, "grad_norm": 3.363162426559112, "learning_rate": 1.446279162424189e-06, "loss": 1.0357, "step": 3267 }, { "epoch": 0.014467218557705077, "grad_norm": 2.9177990316266667, "learning_rate": 1.4467218557705078e-06, "loss": 1.0021, "step": 3268 }, { "epoch": 0.014471645491168268, "grad_norm": 2.7880244850491014, "learning_rate": 1.4471645491168267e-06, "loss": 0.8602, "step": 3269 }, { "epoch": 0.014476072424631458, "grad_norm": 3.272237527360255, "learning_rate": 1.4476072424631459e-06, "loss": 1.0229, "step": 3270 }, { "epoch": 0.014480499358094649, "grad_norm": 2.899046784614757, "learning_rate": 1.4480499358094648e-06, "loss": 0.8497, "step": 3271 }, { "epoch": 0.014484926291557837, "grad_norm": 3.009778072567318, "learning_rate": 1.4484926291557838e-06, "loss": 0.915, "step": 3272 }, { "epoch": 0.014489353225021028, "grad_norm": 3.170577423855442, "learning_rate": 1.448935322502103e-06, "loss": 0.8676, "step": 3273 }, { "epoch": 0.014493780158484218, "grad_norm": 3.337808504242211, "learning_rate": 1.449378015848422e-06, "loss": 0.7469, "step": 3274 }, { "epoch": 0.014498207091947409, "grad_norm": 2.6559430199385896, "learning_rate": 1.4498207091947408e-06, "loss": 0.8857, "step": 3275 }, { "epoch": 0.014502634025410598, "grad_norm": 2.771879248865497, "learning_rate": 1.45026340254106e-06, "loss": 0.8397, "step": 3276 }, { "epoch": 0.014507060958873788, "grad_norm": 2.696729917497376, "learning_rate": 1.450706095887379e-06, "loss": 0.8686, "step": 3277 }, { "epoch": 0.014511487892336979, "grad_norm": 3.1004728751331694, "learning_rate": 1.451148789233698e-06, "loss": 0.8481, "step": 3278 }, { "epoch": 0.014515914825800167, "grad_norm": 3.1480116985493027, "learning_rate": 1.451591482580017e-06, "loss": 0.7288, "step": 3279 }, { "epoch": 0.014520341759263358, "grad_norm": 3.1707932218049293, "learning_rate": 1.452034175926336e-06, "loss": 1.1028, "step": 3280 }, { "epoch": 0.014524768692726548, "grad_norm": 3.856118136280934, "learning_rate": 1.452476869272655e-06, "loss": 0.9858, "step": 3281 }, { "epoch": 0.014529195626189739, "grad_norm": 3.5216587514940443, "learning_rate": 1.4529195626189741e-06, "loss": 0.7368, "step": 3282 }, { "epoch": 0.014533622559652928, "grad_norm": 3.4736900259805297, "learning_rate": 1.453362255965293e-06, "loss": 0.6634, "step": 3283 }, { "epoch": 0.014538049493116118, "grad_norm": 3.022484701616674, "learning_rate": 1.4538049493116118e-06, "loss": 0.757, "step": 3284 }, { "epoch": 0.014542476426579309, "grad_norm": 2.845410139410133, "learning_rate": 1.4542476426579312e-06, "loss": 0.6508, "step": 3285 }, { "epoch": 0.0145469033600425, "grad_norm": 3.901113121719759, "learning_rate": 1.45469033600425e-06, "loss": 0.6407, "step": 3286 }, { "epoch": 0.014551330293505688, "grad_norm": 2.452671014144479, "learning_rate": 1.4551330293505688e-06, "loss": 0.9114, "step": 3287 }, { "epoch": 0.014555757226968878, "grad_norm": 3.392162132396016, "learning_rate": 1.455575722696888e-06, "loss": 1.3066, "step": 3288 }, { "epoch": 0.014560184160432069, "grad_norm": 2.6972197334668624, "learning_rate": 1.456018416043207e-06, "loss": 0.68, "step": 3289 }, { "epoch": 0.01456461109389526, "grad_norm": 2.410752504788972, "learning_rate": 1.456461109389526e-06, "loss": 0.8581, "step": 3290 }, { "epoch": 0.014569038027358448, "grad_norm": 3.4584300832540023, "learning_rate": 1.456903802735845e-06, "loss": 0.5965, "step": 3291 }, { "epoch": 0.014573464960821639, "grad_norm": 3.1939431620886674, "learning_rate": 1.457346496082164e-06, "loss": 0.8924, "step": 3292 }, { "epoch": 0.01457789189428483, "grad_norm": 2.7109040454097846, "learning_rate": 1.457789189428483e-06, "loss": 0.5539, "step": 3293 }, { "epoch": 0.01458231882774802, "grad_norm": 3.446451403908207, "learning_rate": 1.4582318827748021e-06, "loss": 1.1618, "step": 3294 }, { "epoch": 0.014586745761211209, "grad_norm": 3.0807972253567644, "learning_rate": 1.458674576121121e-06, "loss": 1.0681, "step": 3295 }, { "epoch": 0.014591172694674399, "grad_norm": 3.187550492113312, "learning_rate": 1.45911726946744e-06, "loss": 0.83, "step": 3296 }, { "epoch": 0.01459559962813759, "grad_norm": 3.3127900157602883, "learning_rate": 1.4595599628137592e-06, "loss": 0.713, "step": 3297 }, { "epoch": 0.014600026561600778, "grad_norm": 2.854131692174952, "learning_rate": 1.4600026561600781e-06, "loss": 0.7283, "step": 3298 }, { "epoch": 0.014604453495063969, "grad_norm": 3.418516545474854, "learning_rate": 1.4604453495063968e-06, "loss": 0.7605, "step": 3299 }, { "epoch": 0.01460888042852716, "grad_norm": 2.682827482885113, "learning_rate": 1.4608880428527162e-06, "loss": 0.8162, "step": 3300 }, { "epoch": 0.01461330736199035, "grad_norm": 3.1055156488397673, "learning_rate": 1.461330736199035e-06, "loss": 0.9244, "step": 3301 }, { "epoch": 0.014617734295453539, "grad_norm": 2.8965512782358447, "learning_rate": 1.461773429545354e-06, "loss": 0.7883, "step": 3302 }, { "epoch": 0.01462216122891673, "grad_norm": 2.9251113605650585, "learning_rate": 1.462216122891673e-06, "loss": 0.6711, "step": 3303 }, { "epoch": 0.01462658816237992, "grad_norm": 2.687035240369629, "learning_rate": 1.462658816237992e-06, "loss": 0.8917, "step": 3304 }, { "epoch": 0.01463101509584311, "grad_norm": 3.2203645648606725, "learning_rate": 1.463101509584311e-06, "loss": 0.6473, "step": 3305 }, { "epoch": 0.014635442029306299, "grad_norm": 3.0216267347042907, "learning_rate": 1.4635442029306301e-06, "loss": 0.9785, "step": 3306 }, { "epoch": 0.01463986896276949, "grad_norm": 3.6451976902050154, "learning_rate": 1.463986896276949e-06, "loss": 0.7677, "step": 3307 }, { "epoch": 0.01464429589623268, "grad_norm": 3.5312100646746734, "learning_rate": 1.464429589623268e-06, "loss": 0.8848, "step": 3308 }, { "epoch": 0.01464872282969587, "grad_norm": 2.4993256134897655, "learning_rate": 1.4648722829695872e-06, "loss": 0.4638, "step": 3309 }, { "epoch": 0.01465314976315906, "grad_norm": 2.551494670955481, "learning_rate": 1.4653149763159061e-06, "loss": 0.7206, "step": 3310 }, { "epoch": 0.01465757669662225, "grad_norm": 2.670143511276574, "learning_rate": 1.465757669662225e-06, "loss": 0.7409, "step": 3311 }, { "epoch": 0.01466200363008544, "grad_norm": 3.314716582637272, "learning_rate": 1.4662003630085442e-06, "loss": 0.5915, "step": 3312 }, { "epoch": 0.014666430563548629, "grad_norm": 3.82278756167572, "learning_rate": 1.4666430563548632e-06, "loss": 0.9818, "step": 3313 }, { "epoch": 0.01467085749701182, "grad_norm": 2.5711068497095875, "learning_rate": 1.4670857497011821e-06, "loss": 0.7134, "step": 3314 }, { "epoch": 0.01467528443047501, "grad_norm": 2.790393780917593, "learning_rate": 1.4675284430475013e-06, "loss": 0.7547, "step": 3315 }, { "epoch": 0.0146797113639382, "grad_norm": 2.5703657687753627, "learning_rate": 1.46797113639382e-06, "loss": 0.6738, "step": 3316 }, { "epoch": 0.01468413829740139, "grad_norm": 2.783746920466608, "learning_rate": 1.468413829740139e-06, "loss": 0.771, "step": 3317 }, { "epoch": 0.01468856523086458, "grad_norm": 3.0479122673169874, "learning_rate": 1.4688565230864581e-06, "loss": 0.7138, "step": 3318 }, { "epoch": 0.01469299216432777, "grad_norm": 2.863064282484826, "learning_rate": 1.469299216432777e-06, "loss": 0.6776, "step": 3319 }, { "epoch": 0.01469741909779096, "grad_norm": 3.4579244390525647, "learning_rate": 1.469741909779096e-06, "loss": 0.843, "step": 3320 }, { "epoch": 0.01470184603125415, "grad_norm": 3.1464293819838227, "learning_rate": 1.4701846031254152e-06, "loss": 0.5479, "step": 3321 }, { "epoch": 0.01470627296471734, "grad_norm": 3.1117993973078284, "learning_rate": 1.4706272964717341e-06, "loss": 0.9805, "step": 3322 }, { "epoch": 0.01471069989818053, "grad_norm": 3.1254856483253994, "learning_rate": 1.471069989818053e-06, "loss": 0.9627, "step": 3323 }, { "epoch": 0.014715126831643721, "grad_norm": 3.1120210096402112, "learning_rate": 1.4715126831643722e-06, "loss": 0.6793, "step": 3324 }, { "epoch": 0.01471955376510691, "grad_norm": 2.4263169118253303, "learning_rate": 1.4719553765106912e-06, "loss": 0.5997, "step": 3325 }, { "epoch": 0.0147239806985701, "grad_norm": 2.6479588814418427, "learning_rate": 1.4723980698570101e-06, "loss": 0.6501, "step": 3326 }, { "epoch": 0.014728407632033291, "grad_norm": 3.345821615042833, "learning_rate": 1.4728407632033293e-06, "loss": 1.2385, "step": 3327 }, { "epoch": 0.014732834565496481, "grad_norm": 3.008018760241592, "learning_rate": 1.4732834565496482e-06, "loss": 0.9822, "step": 3328 }, { "epoch": 0.01473726149895967, "grad_norm": 3.8816008469324212, "learning_rate": 1.4737261498959672e-06, "loss": 0.9538, "step": 3329 }, { "epoch": 0.01474168843242286, "grad_norm": 2.7498653245896354, "learning_rate": 1.4741688432422863e-06, "loss": 0.6743, "step": 3330 }, { "epoch": 0.014746115365886051, "grad_norm": 3.7904921051547626, "learning_rate": 1.4746115365886053e-06, "loss": 1.0515, "step": 3331 }, { "epoch": 0.01475054229934924, "grad_norm": 2.6860537152755555, "learning_rate": 1.475054229934924e-06, "loss": 0.7917, "step": 3332 }, { "epoch": 0.01475496923281243, "grad_norm": 2.3551935494655307, "learning_rate": 1.4754969232812434e-06, "loss": 0.5781, "step": 3333 }, { "epoch": 0.014759396166275621, "grad_norm": 3.6036681638622223, "learning_rate": 1.4759396166275621e-06, "loss": 1.1044, "step": 3334 }, { "epoch": 0.014763823099738812, "grad_norm": 2.6763808828836733, "learning_rate": 1.476382309973881e-06, "loss": 0.7817, "step": 3335 }, { "epoch": 0.014768250033202, "grad_norm": 3.2385388659416887, "learning_rate": 1.4768250033202002e-06, "loss": 0.9281, "step": 3336 }, { "epoch": 0.01477267696666519, "grad_norm": 3.0022879673957914, "learning_rate": 1.4772676966665192e-06, "loss": 0.8207, "step": 3337 }, { "epoch": 0.014777103900128381, "grad_norm": 2.8625110134991614, "learning_rate": 1.4777103900128381e-06, "loss": 0.7671, "step": 3338 }, { "epoch": 0.014781530833591572, "grad_norm": 3.4324361690910923, "learning_rate": 1.4781530833591573e-06, "loss": 0.5072, "step": 3339 }, { "epoch": 0.01478595776705476, "grad_norm": 3.2250063635065884, "learning_rate": 1.4785957767054762e-06, "loss": 0.7895, "step": 3340 }, { "epoch": 0.014790384700517951, "grad_norm": 3.9431799012751547, "learning_rate": 1.4790384700517952e-06, "loss": 1.0528, "step": 3341 }, { "epoch": 0.014794811633981142, "grad_norm": 2.762013931405411, "learning_rate": 1.4794811633981143e-06, "loss": 0.7839, "step": 3342 }, { "epoch": 0.014799238567444332, "grad_norm": 2.920824094163376, "learning_rate": 1.4799238567444333e-06, "loss": 0.3663, "step": 3343 }, { "epoch": 0.014803665500907521, "grad_norm": 3.3375405993757843, "learning_rate": 1.4803665500907522e-06, "loss": 1.0933, "step": 3344 }, { "epoch": 0.014808092434370711, "grad_norm": 2.915484338336724, "learning_rate": 1.4808092434370714e-06, "loss": 0.7144, "step": 3345 }, { "epoch": 0.014812519367833902, "grad_norm": 2.7900885460393043, "learning_rate": 1.4812519367833903e-06, "loss": 1.0836, "step": 3346 }, { "epoch": 0.01481694630129709, "grad_norm": 3.079968946877246, "learning_rate": 1.481694630129709e-06, "loss": 0.6321, "step": 3347 }, { "epoch": 0.014821373234760281, "grad_norm": 3.7419635700949314, "learning_rate": 1.4821373234760284e-06, "loss": 0.9596, "step": 3348 }, { "epoch": 0.014825800168223472, "grad_norm": 3.194885765007474, "learning_rate": 1.4825800168223472e-06, "loss": 1.2019, "step": 3349 }, { "epoch": 0.014830227101686662, "grad_norm": 3.3008763486182957, "learning_rate": 1.4830227101686661e-06, "loss": 1.4249, "step": 3350 }, { "epoch": 0.014834654035149851, "grad_norm": 3.028897897323548, "learning_rate": 1.4834654035149853e-06, "loss": 0.7795, "step": 3351 }, { "epoch": 0.014839080968613041, "grad_norm": 2.941719049878655, "learning_rate": 1.4839080968613042e-06, "loss": 0.9127, "step": 3352 }, { "epoch": 0.014843507902076232, "grad_norm": 2.6666854221931886, "learning_rate": 1.4843507902076232e-06, "loss": 0.7246, "step": 3353 }, { "epoch": 0.014847934835539423, "grad_norm": 3.451736930444538, "learning_rate": 1.4847934835539423e-06, "loss": 0.7334, "step": 3354 }, { "epoch": 0.014852361769002611, "grad_norm": 3.2389762420369537, "learning_rate": 1.4852361769002613e-06, "loss": 1.3709, "step": 3355 }, { "epoch": 0.014856788702465802, "grad_norm": 2.568807295559085, "learning_rate": 1.4856788702465802e-06, "loss": 0.859, "step": 3356 }, { "epoch": 0.014861215635928992, "grad_norm": 2.848229686188389, "learning_rate": 1.4861215635928994e-06, "loss": 0.7945, "step": 3357 }, { "epoch": 0.014865642569392183, "grad_norm": 2.8883794679735293, "learning_rate": 1.4865642569392183e-06, "loss": 0.7543, "step": 3358 }, { "epoch": 0.014870069502855372, "grad_norm": 3.014971305323427, "learning_rate": 1.4870069502855373e-06, "loss": 0.8786, "step": 3359 }, { "epoch": 0.014874496436318562, "grad_norm": 3.433974243054392, "learning_rate": 1.4874496436318564e-06, "loss": 0.9265, "step": 3360 }, { "epoch": 0.014878923369781753, "grad_norm": 2.755507600511741, "learning_rate": 1.4878923369781754e-06, "loss": 0.5936, "step": 3361 }, { "epoch": 0.014883350303244941, "grad_norm": 2.885008754187599, "learning_rate": 1.4883350303244943e-06, "loss": 0.7187, "step": 3362 }, { "epoch": 0.014887777236708132, "grad_norm": 2.4856167199755186, "learning_rate": 1.4887777236708135e-06, "loss": 0.7276, "step": 3363 }, { "epoch": 0.014892204170171322, "grad_norm": 3.03354522992113, "learning_rate": 1.4892204170171324e-06, "loss": 0.927, "step": 3364 }, { "epoch": 0.014896631103634513, "grad_norm": 2.7325807605472874, "learning_rate": 1.4896631103634512e-06, "loss": 0.7893, "step": 3365 }, { "epoch": 0.014901058037097702, "grad_norm": 3.902668496515313, "learning_rate": 1.4901058037097703e-06, "loss": 0.9066, "step": 3366 }, { "epoch": 0.014905484970560892, "grad_norm": 2.6271453961754716, "learning_rate": 1.4905484970560893e-06, "loss": 0.7093, "step": 3367 }, { "epoch": 0.014909911904024083, "grad_norm": 3.160039432676945, "learning_rate": 1.4909911904024082e-06, "loss": 1.0114, "step": 3368 }, { "epoch": 0.014914338837487273, "grad_norm": 4.116753904695278, "learning_rate": 1.4914338837487274e-06, "loss": 1.111, "step": 3369 }, { "epoch": 0.014918765770950462, "grad_norm": 3.6079660103397644, "learning_rate": 1.4918765770950463e-06, "loss": 1.4032, "step": 3370 }, { "epoch": 0.014923192704413652, "grad_norm": 3.114259753976035, "learning_rate": 1.4923192704413653e-06, "loss": 0.5798, "step": 3371 }, { "epoch": 0.014927619637876843, "grad_norm": 3.2846719475524067, "learning_rate": 1.4927619637876844e-06, "loss": 1.1007, "step": 3372 }, { "epoch": 0.014932046571340033, "grad_norm": 2.7278649929394154, "learning_rate": 1.4932046571340034e-06, "loss": 0.6813, "step": 3373 }, { "epoch": 0.014936473504803222, "grad_norm": 3.2936973449838765, "learning_rate": 1.4936473504803223e-06, "loss": 0.6297, "step": 3374 }, { "epoch": 0.014940900438266413, "grad_norm": 2.9731715653029447, "learning_rate": 1.4940900438266415e-06, "loss": 0.6984, "step": 3375 }, { "epoch": 0.014945327371729603, "grad_norm": 2.736132864505277, "learning_rate": 1.4945327371729604e-06, "loss": 0.6815, "step": 3376 }, { "epoch": 0.014949754305192794, "grad_norm": 2.5310661674530337, "learning_rate": 1.4949754305192794e-06, "loss": 0.7329, "step": 3377 }, { "epoch": 0.014954181238655983, "grad_norm": 3.1793010852670314, "learning_rate": 1.4954181238655985e-06, "loss": 0.9073, "step": 3378 }, { "epoch": 0.014958608172119173, "grad_norm": 2.9385372969913317, "learning_rate": 1.4958608172119175e-06, "loss": 0.8646, "step": 3379 }, { "epoch": 0.014963035105582364, "grad_norm": 3.065214565975281, "learning_rate": 1.4963035105582362e-06, "loss": 0.9666, "step": 3380 }, { "epoch": 0.014967462039045552, "grad_norm": 3.1402089999196305, "learning_rate": 1.4967462039045556e-06, "loss": 1.003, "step": 3381 }, { "epoch": 0.014971888972508743, "grad_norm": 2.492212237167225, "learning_rate": 1.4971888972508743e-06, "loss": 0.8441, "step": 3382 }, { "epoch": 0.014976315905971933, "grad_norm": 6.375635892176533, "learning_rate": 1.4976315905971933e-06, "loss": 1.8338, "step": 3383 }, { "epoch": 0.014980742839435124, "grad_norm": 3.1853198342505573, "learning_rate": 1.4980742839435124e-06, "loss": 0.6553, "step": 3384 }, { "epoch": 0.014985169772898313, "grad_norm": 3.340768760274044, "learning_rate": 1.4985169772898314e-06, "loss": 0.6292, "step": 3385 }, { "epoch": 0.014989596706361503, "grad_norm": 2.4410589905565936, "learning_rate": 1.4989596706361503e-06, "loss": 0.894, "step": 3386 }, { "epoch": 0.014994023639824694, "grad_norm": 3.8773087616974253, "learning_rate": 1.4994023639824695e-06, "loss": 0.9442, "step": 3387 }, { "epoch": 0.014998450573287884, "grad_norm": 2.8230138012512827, "learning_rate": 1.4998450573287884e-06, "loss": 0.8675, "step": 3388 }, { "epoch": 0.015002877506751073, "grad_norm": 2.792549084712176, "learning_rate": 1.5002877506751074e-06, "loss": 0.6849, "step": 3389 }, { "epoch": 0.015007304440214263, "grad_norm": 3.21348888093802, "learning_rate": 1.5007304440214265e-06, "loss": 0.7915, "step": 3390 }, { "epoch": 0.015011731373677454, "grad_norm": 3.0234506510330825, "learning_rate": 1.5011731373677455e-06, "loss": 0.9369, "step": 3391 }, { "epoch": 0.015016158307140644, "grad_norm": 2.9218654107505917, "learning_rate": 1.5016158307140644e-06, "loss": 0.732, "step": 3392 }, { "epoch": 0.015020585240603833, "grad_norm": 3.2265416395821434, "learning_rate": 1.5020585240603836e-06, "loss": 0.7701, "step": 3393 }, { "epoch": 0.015025012174067024, "grad_norm": 4.056467258488318, "learning_rate": 1.5025012174067025e-06, "loss": 0.9153, "step": 3394 }, { "epoch": 0.015029439107530214, "grad_norm": 2.9474439626397415, "learning_rate": 1.5029439107530213e-06, "loss": 0.6488, "step": 3395 }, { "epoch": 0.015033866040993403, "grad_norm": 2.637096383062085, "learning_rate": 1.5033866040993406e-06, "loss": 0.7907, "step": 3396 }, { "epoch": 0.015038292974456594, "grad_norm": 2.8187344302514923, "learning_rate": 1.5038292974456594e-06, "loss": 0.8878, "step": 3397 }, { "epoch": 0.015042719907919784, "grad_norm": 2.8174114585174013, "learning_rate": 1.5042719907919783e-06, "loss": 1.0344, "step": 3398 }, { "epoch": 0.015047146841382975, "grad_norm": 2.8720413446341757, "learning_rate": 1.5047146841382975e-06, "loss": 0.6556, "step": 3399 }, { "epoch": 0.015051573774846163, "grad_norm": 2.481577268177867, "learning_rate": 1.5051573774846164e-06, "loss": 0.8373, "step": 3400 }, { "epoch": 0.015056000708309354, "grad_norm": 2.9719411384992247, "learning_rate": 1.5056000708309354e-06, "loss": 0.5472, "step": 3401 }, { "epoch": 0.015060427641772544, "grad_norm": 2.6014648134862393, "learning_rate": 1.5060427641772545e-06, "loss": 0.633, "step": 3402 }, { "epoch": 0.015064854575235735, "grad_norm": 4.281176832067156, "learning_rate": 1.5064854575235735e-06, "loss": 1.0409, "step": 3403 }, { "epoch": 0.015069281508698924, "grad_norm": 2.609484332557554, "learning_rate": 1.5069281508698924e-06, "loss": 0.8104, "step": 3404 }, { "epoch": 0.015073708442162114, "grad_norm": 4.440653699476884, "learning_rate": 1.5073708442162116e-06, "loss": 1.085, "step": 3405 }, { "epoch": 0.015078135375625305, "grad_norm": 3.4030840410144276, "learning_rate": 1.5078135375625305e-06, "loss": 0.9574, "step": 3406 }, { "epoch": 0.015082562309088495, "grad_norm": 3.5956194298432314, "learning_rate": 1.5082562309088495e-06, "loss": 1.1448, "step": 3407 }, { "epoch": 0.015086989242551684, "grad_norm": 2.8585700459579546, "learning_rate": 1.5086989242551686e-06, "loss": 0.5838, "step": 3408 }, { "epoch": 0.015091416176014874, "grad_norm": 3.827224951148886, "learning_rate": 1.5091416176014876e-06, "loss": 1.0427, "step": 3409 }, { "epoch": 0.015095843109478065, "grad_norm": 2.8003874730776466, "learning_rate": 1.5095843109478065e-06, "loss": 0.9175, "step": 3410 }, { "epoch": 0.015100270042941255, "grad_norm": 3.1599877057310377, "learning_rate": 1.5100270042941257e-06, "loss": 1.107, "step": 3411 }, { "epoch": 0.015104696976404444, "grad_norm": 3.179480560833532, "learning_rate": 1.5104696976404446e-06, "loss": 0.7909, "step": 3412 }, { "epoch": 0.015109123909867635, "grad_norm": 2.6931236754484966, "learning_rate": 1.5109123909867634e-06, "loss": 0.5729, "step": 3413 }, { "epoch": 0.015113550843330825, "grad_norm": 2.6950987526492, "learning_rate": 1.5113550843330827e-06, "loss": 0.4947, "step": 3414 }, { "epoch": 0.015117977776794014, "grad_norm": 2.7076472539269694, "learning_rate": 1.5117977776794015e-06, "loss": 0.7017, "step": 3415 }, { "epoch": 0.015122404710257204, "grad_norm": 2.713199647426037, "learning_rate": 1.5122404710257204e-06, "loss": 0.8989, "step": 3416 }, { "epoch": 0.015126831643720395, "grad_norm": 2.836626517300508, "learning_rate": 1.5126831643720396e-06, "loss": 0.8984, "step": 3417 }, { "epoch": 0.015131258577183586, "grad_norm": 2.9712845197751165, "learning_rate": 1.5131258577183585e-06, "loss": 0.7618, "step": 3418 }, { "epoch": 0.015135685510646774, "grad_norm": 2.706850685001386, "learning_rate": 1.5135685510646777e-06, "loss": 0.7139, "step": 3419 }, { "epoch": 0.015140112444109965, "grad_norm": 4.3230649348843695, "learning_rate": 1.5140112444109966e-06, "loss": 1.3443, "step": 3420 }, { "epoch": 0.015144539377573155, "grad_norm": 2.593635377691468, "learning_rate": 1.5144539377573156e-06, "loss": 0.5365, "step": 3421 }, { "epoch": 0.015148966311036346, "grad_norm": 2.9167985203751843, "learning_rate": 1.5148966311036347e-06, "loss": 0.6299, "step": 3422 }, { "epoch": 0.015153393244499535, "grad_norm": 2.359070871893583, "learning_rate": 1.5153393244499537e-06, "loss": 0.8278, "step": 3423 }, { "epoch": 0.015157820177962725, "grad_norm": 3.6457460112485975, "learning_rate": 1.5157820177962726e-06, "loss": 1.138, "step": 3424 }, { "epoch": 0.015162247111425916, "grad_norm": 2.9382332702865197, "learning_rate": 1.5162247111425918e-06, "loss": 0.7764, "step": 3425 }, { "epoch": 0.015166674044889106, "grad_norm": 3.198105905938217, "learning_rate": 1.5166674044889107e-06, "loss": 0.7249, "step": 3426 }, { "epoch": 0.015171100978352295, "grad_norm": 2.647929304151845, "learning_rate": 1.5171100978352297e-06, "loss": 0.8212, "step": 3427 }, { "epoch": 0.015175527911815485, "grad_norm": 3.381318240035632, "learning_rate": 1.5175527911815489e-06, "loss": 0.9796, "step": 3428 }, { "epoch": 0.015179954845278676, "grad_norm": 3.9319773024153575, "learning_rate": 1.5179954845278678e-06, "loss": 1.1972, "step": 3429 }, { "epoch": 0.015184381778741865, "grad_norm": 2.952297001161198, "learning_rate": 1.5184381778741865e-06, "loss": 0.8605, "step": 3430 }, { "epoch": 0.015188808712205055, "grad_norm": 2.8178730792366844, "learning_rate": 1.518880871220506e-06, "loss": 0.7184, "step": 3431 }, { "epoch": 0.015193235645668246, "grad_norm": 3.029745496759928, "learning_rate": 1.5193235645668246e-06, "loss": 0.811, "step": 3432 }, { "epoch": 0.015197662579131436, "grad_norm": 3.9705086699913066, "learning_rate": 1.5197662579131436e-06, "loss": 1.1653, "step": 3433 }, { "epoch": 0.015202089512594625, "grad_norm": 2.6357977589206274, "learning_rate": 1.5202089512594627e-06, "loss": 0.5275, "step": 3434 }, { "epoch": 0.015206516446057815, "grad_norm": 2.7630539206269407, "learning_rate": 1.5206516446057817e-06, "loss": 0.598, "step": 3435 }, { "epoch": 0.015210943379521006, "grad_norm": 2.9393741511448694, "learning_rate": 1.5210943379521006e-06, "loss": 0.8022, "step": 3436 }, { "epoch": 0.015215370312984196, "grad_norm": 3.468886513728764, "learning_rate": 1.5215370312984198e-06, "loss": 0.6329, "step": 3437 }, { "epoch": 0.015219797246447385, "grad_norm": 3.520625184696694, "learning_rate": 1.5219797246447387e-06, "loss": 0.7323, "step": 3438 }, { "epoch": 0.015224224179910576, "grad_norm": 2.922204436271383, "learning_rate": 1.5224224179910577e-06, "loss": 0.7149, "step": 3439 }, { "epoch": 0.015228651113373766, "grad_norm": 2.6337403673964657, "learning_rate": 1.5228651113373769e-06, "loss": 0.9579, "step": 3440 }, { "epoch": 0.015233078046836957, "grad_norm": 3.3451716620189127, "learning_rate": 1.5233078046836958e-06, "loss": 0.9875, "step": 3441 }, { "epoch": 0.015237504980300146, "grad_norm": 3.353571483371727, "learning_rate": 1.5237504980300147e-06, "loss": 1.035, "step": 3442 }, { "epoch": 0.015241931913763336, "grad_norm": 3.0633694152280446, "learning_rate": 1.524193191376334e-06, "loss": 0.9303, "step": 3443 }, { "epoch": 0.015246358847226527, "grad_norm": 2.491969170289517, "learning_rate": 1.5246358847226529e-06, "loss": 0.5267, "step": 3444 }, { "epoch": 0.015250785780689717, "grad_norm": 2.489442645643696, "learning_rate": 1.5250785780689716e-06, "loss": 0.5365, "step": 3445 }, { "epoch": 0.015255212714152906, "grad_norm": 2.8489776694891735, "learning_rate": 1.525521271415291e-06, "loss": 0.8, "step": 3446 }, { "epoch": 0.015259639647616096, "grad_norm": 3.0478242979259225, "learning_rate": 1.5259639647616097e-06, "loss": 0.8111, "step": 3447 }, { "epoch": 0.015264066581079287, "grad_norm": 3.4612023753889734, "learning_rate": 1.5264066581079286e-06, "loss": 1.076, "step": 3448 }, { "epoch": 0.015268493514542476, "grad_norm": 3.1360754824167896, "learning_rate": 1.5268493514542478e-06, "loss": 0.5076, "step": 3449 }, { "epoch": 0.015272920448005666, "grad_norm": 2.8802396417011433, "learning_rate": 1.5272920448005667e-06, "loss": 0.8285, "step": 3450 }, { "epoch": 0.015277347381468857, "grad_norm": 2.9477346677128757, "learning_rate": 1.5277347381468857e-06, "loss": 1.1714, "step": 3451 }, { "epoch": 0.015281774314932047, "grad_norm": 4.144238266492551, "learning_rate": 1.5281774314932049e-06, "loss": 1.2504, "step": 3452 }, { "epoch": 0.015286201248395236, "grad_norm": 2.697207131049028, "learning_rate": 1.5286201248395238e-06, "loss": 0.6361, "step": 3453 }, { "epoch": 0.015290628181858426, "grad_norm": 3.3019987133801223, "learning_rate": 1.5290628181858427e-06, "loss": 0.9528, "step": 3454 }, { "epoch": 0.015295055115321617, "grad_norm": 3.674218734947987, "learning_rate": 1.529505511532162e-06, "loss": 0.9111, "step": 3455 }, { "epoch": 0.015299482048784807, "grad_norm": 3.2436551771850377, "learning_rate": 1.5299482048784809e-06, "loss": 1.0777, "step": 3456 }, { "epoch": 0.015303908982247996, "grad_norm": 3.1410103146666066, "learning_rate": 1.5303908982247998e-06, "loss": 0.6752, "step": 3457 }, { "epoch": 0.015308335915711187, "grad_norm": 3.019760114612302, "learning_rate": 1.530833591571119e-06, "loss": 0.8534, "step": 3458 }, { "epoch": 0.015312762849174377, "grad_norm": 3.2930581721060443, "learning_rate": 1.531276284917438e-06, "loss": 0.9624, "step": 3459 }, { "epoch": 0.015317189782637568, "grad_norm": 2.8464408310329232, "learning_rate": 1.5317189782637569e-06, "loss": 1.0073, "step": 3460 }, { "epoch": 0.015321616716100757, "grad_norm": 3.1380815582664545, "learning_rate": 1.532161671610076e-06, "loss": 0.7795, "step": 3461 }, { "epoch": 0.015326043649563947, "grad_norm": 2.7608857808550824, "learning_rate": 1.532604364956395e-06, "loss": 0.4089, "step": 3462 }, { "epoch": 0.015330470583027138, "grad_norm": 3.0166367233698157, "learning_rate": 1.5330470583027137e-06, "loss": 0.7879, "step": 3463 }, { "epoch": 0.015334897516490326, "grad_norm": 2.3408662184962012, "learning_rate": 1.533489751649033e-06, "loss": 0.7084, "step": 3464 }, { "epoch": 0.015339324449953517, "grad_norm": 3.3682022450502056, "learning_rate": 1.5339324449953518e-06, "loss": 0.9963, "step": 3465 }, { "epoch": 0.015343751383416707, "grad_norm": 2.712279085675758, "learning_rate": 1.5343751383416707e-06, "loss": 0.9757, "step": 3466 }, { "epoch": 0.015348178316879898, "grad_norm": 2.6672429381682776, "learning_rate": 1.53481783168799e-06, "loss": 0.754, "step": 3467 }, { "epoch": 0.015352605250343087, "grad_norm": 2.7280462618695154, "learning_rate": 1.5352605250343089e-06, "loss": 0.8139, "step": 3468 }, { "epoch": 0.015357032183806277, "grad_norm": 3.514880312840966, "learning_rate": 1.5357032183806278e-06, "loss": 0.7397, "step": 3469 }, { "epoch": 0.015361459117269468, "grad_norm": 2.9853836238315052, "learning_rate": 1.536145911726947e-06, "loss": 0.8537, "step": 3470 }, { "epoch": 0.015365886050732658, "grad_norm": 2.931577474952247, "learning_rate": 1.536588605073266e-06, "loss": 0.6809, "step": 3471 }, { "epoch": 0.015370312984195847, "grad_norm": 3.1022187275771747, "learning_rate": 1.5370312984195849e-06, "loss": 0.722, "step": 3472 }, { "epoch": 0.015374739917659037, "grad_norm": 3.048870615330171, "learning_rate": 1.537473991765904e-06, "loss": 0.9576, "step": 3473 }, { "epoch": 0.015379166851122228, "grad_norm": 3.13353620591637, "learning_rate": 1.537916685112223e-06, "loss": 0.8446, "step": 3474 }, { "epoch": 0.015383593784585418, "grad_norm": 3.304148696604237, "learning_rate": 1.538359378458542e-06, "loss": 0.979, "step": 3475 }, { "epoch": 0.015388020718048607, "grad_norm": 3.4916349215725058, "learning_rate": 1.538802071804861e-06, "loss": 1.1264, "step": 3476 }, { "epoch": 0.015392447651511798, "grad_norm": 2.6089551823874215, "learning_rate": 1.53924476515118e-06, "loss": 0.5509, "step": 3477 }, { "epoch": 0.015396874584974988, "grad_norm": 2.5346148941744917, "learning_rate": 1.5396874584974987e-06, "loss": 0.8621, "step": 3478 }, { "epoch": 0.015401301518438179, "grad_norm": 2.7593117399496303, "learning_rate": 1.5401301518438181e-06, "loss": 0.7385, "step": 3479 }, { "epoch": 0.015405728451901367, "grad_norm": 3.522895955498592, "learning_rate": 1.5405728451901369e-06, "loss": 0.8163, "step": 3480 }, { "epoch": 0.015410155385364558, "grad_norm": 2.16717652079374, "learning_rate": 1.5410155385364558e-06, "loss": 0.4192, "step": 3481 }, { "epoch": 0.015414582318827749, "grad_norm": 2.6063817379239396, "learning_rate": 1.541458231882775e-06, "loss": 0.8363, "step": 3482 }, { "epoch": 0.015419009252290937, "grad_norm": 2.717823416036241, "learning_rate": 1.541900925229094e-06, "loss": 0.8205, "step": 3483 }, { "epoch": 0.015423436185754128, "grad_norm": 2.2490048322067784, "learning_rate": 1.5423436185754129e-06, "loss": 0.5769, "step": 3484 }, { "epoch": 0.015427863119217318, "grad_norm": 3.4690023629526463, "learning_rate": 1.542786311921732e-06, "loss": 1.0944, "step": 3485 }, { "epoch": 0.015432290052680509, "grad_norm": 3.903701374041067, "learning_rate": 1.543229005268051e-06, "loss": 1.0754, "step": 3486 }, { "epoch": 0.015436716986143698, "grad_norm": 2.659683514272689, "learning_rate": 1.54367169861437e-06, "loss": 0.8261, "step": 3487 }, { "epoch": 0.015441143919606888, "grad_norm": 2.636860988354934, "learning_rate": 1.544114391960689e-06, "loss": 0.7728, "step": 3488 }, { "epoch": 0.015445570853070079, "grad_norm": 4.367350302363418, "learning_rate": 1.544557085307008e-06, "loss": 0.7168, "step": 3489 }, { "epoch": 0.015449997786533269, "grad_norm": 3.643325642984016, "learning_rate": 1.544999778653327e-06, "loss": 1.2296, "step": 3490 }, { "epoch": 0.015454424719996458, "grad_norm": 2.9099852390260947, "learning_rate": 1.5454424719996461e-06, "loss": 0.8143, "step": 3491 }, { "epoch": 0.015458851653459648, "grad_norm": 2.9475623378307625, "learning_rate": 1.545885165345965e-06, "loss": 1.001, "step": 3492 }, { "epoch": 0.015463278586922839, "grad_norm": 3.1148148866504326, "learning_rate": 1.546327858692284e-06, "loss": 0.8417, "step": 3493 }, { "epoch": 0.01546770552038603, "grad_norm": 3.688975051165829, "learning_rate": 1.5467705520386032e-06, "loss": 1.1848, "step": 3494 }, { "epoch": 0.015472132453849218, "grad_norm": 2.554411135584981, "learning_rate": 1.547213245384922e-06, "loss": 0.8596, "step": 3495 }, { "epoch": 0.015476559387312409, "grad_norm": 2.4988694410133347, "learning_rate": 1.5476559387312409e-06, "loss": 0.5931, "step": 3496 }, { "epoch": 0.0154809863207756, "grad_norm": 3.099183254798585, "learning_rate": 1.54809863207756e-06, "loss": 0.6861, "step": 3497 }, { "epoch": 0.015485413254238788, "grad_norm": 4.181529762084913, "learning_rate": 1.548541325423879e-06, "loss": 1.1171, "step": 3498 }, { "epoch": 0.015489840187701978, "grad_norm": 2.6835255284029755, "learning_rate": 1.548984018770198e-06, "loss": 0.7559, "step": 3499 }, { "epoch": 0.015494267121165169, "grad_norm": 2.9379134726181966, "learning_rate": 1.549426712116517e-06, "loss": 0.583, "step": 3500 }, { "epoch": 0.01549869405462836, "grad_norm": 3.1553183382815906, "learning_rate": 1.549869405462836e-06, "loss": 0.8666, "step": 3501 }, { "epoch": 0.015503120988091548, "grad_norm": 2.7388888335854804, "learning_rate": 1.550312098809155e-06, "loss": 0.7503, "step": 3502 }, { "epoch": 0.015507547921554739, "grad_norm": 3.576701559528241, "learning_rate": 1.5507547921554741e-06, "loss": 0.8674, "step": 3503 }, { "epoch": 0.01551197485501793, "grad_norm": 3.6113558649651205, "learning_rate": 1.551197485501793e-06, "loss": 0.7837, "step": 3504 }, { "epoch": 0.01551640178848112, "grad_norm": 3.4194913746186595, "learning_rate": 1.551640178848112e-06, "loss": 1.1917, "step": 3505 }, { "epoch": 0.015520828721944309, "grad_norm": 2.9939479544831356, "learning_rate": 1.5520828721944312e-06, "loss": 0.7527, "step": 3506 }, { "epoch": 0.015525255655407499, "grad_norm": 3.365868682426884, "learning_rate": 1.5525255655407501e-06, "loss": 1.1193, "step": 3507 }, { "epoch": 0.01552968258887069, "grad_norm": 3.077189353253704, "learning_rate": 1.552968258887069e-06, "loss": 1.1036, "step": 3508 }, { "epoch": 0.01553410952233388, "grad_norm": 2.8835127419398656, "learning_rate": 1.5534109522333882e-06, "loss": 1.1077, "step": 3509 }, { "epoch": 0.015538536455797069, "grad_norm": 3.215674166493078, "learning_rate": 1.5538536455797072e-06, "loss": 1.0276, "step": 3510 }, { "epoch": 0.01554296338926026, "grad_norm": 2.6099904628747015, "learning_rate": 1.554296338926026e-06, "loss": 0.8082, "step": 3511 }, { "epoch": 0.01554739032272345, "grad_norm": 2.953618522456119, "learning_rate": 1.5547390322723453e-06, "loss": 0.911, "step": 3512 }, { "epoch": 0.01555181725618664, "grad_norm": 3.1277145604667367, "learning_rate": 1.555181725618664e-06, "loss": 0.7072, "step": 3513 }, { "epoch": 0.01555624418964983, "grad_norm": 2.9084441651220057, "learning_rate": 1.555624418964983e-06, "loss": 0.9877, "step": 3514 }, { "epoch": 0.01556067112311302, "grad_norm": 2.6497578203363052, "learning_rate": 1.5560671123113021e-06, "loss": 0.5276, "step": 3515 }, { "epoch": 0.01556509805657621, "grad_norm": 3.1024673635002284, "learning_rate": 1.556509805657621e-06, "loss": 0.8079, "step": 3516 }, { "epoch": 0.015569524990039399, "grad_norm": 2.7858819973349958, "learning_rate": 1.55695249900394e-06, "loss": 0.6829, "step": 3517 }, { "epoch": 0.01557395192350259, "grad_norm": 2.5865397872817653, "learning_rate": 1.5573951923502592e-06, "loss": 0.6746, "step": 3518 }, { "epoch": 0.01557837885696578, "grad_norm": 2.675388346608944, "learning_rate": 1.5578378856965781e-06, "loss": 0.531, "step": 3519 }, { "epoch": 0.01558280579042897, "grad_norm": 4.824855192091376, "learning_rate": 1.558280579042897e-06, "loss": 1.5235, "step": 3520 }, { "epoch": 0.01558723272389216, "grad_norm": 3.853178953006227, "learning_rate": 1.5587232723892162e-06, "loss": 1.0495, "step": 3521 }, { "epoch": 0.01559165965735535, "grad_norm": 2.6368320829946743, "learning_rate": 1.5591659657355352e-06, "loss": 0.6856, "step": 3522 }, { "epoch": 0.01559608659081854, "grad_norm": 2.922548684399094, "learning_rate": 1.5596086590818541e-06, "loss": 0.9166, "step": 3523 }, { "epoch": 0.01560051352428173, "grad_norm": 2.7472656011770162, "learning_rate": 1.5600513524281733e-06, "loss": 0.9157, "step": 3524 }, { "epoch": 0.01560494045774492, "grad_norm": 2.8314488415936574, "learning_rate": 1.5604940457744922e-06, "loss": 0.9845, "step": 3525 }, { "epoch": 0.01560936739120811, "grad_norm": 2.6283754410287923, "learning_rate": 1.560936739120811e-06, "loss": 0.9079, "step": 3526 }, { "epoch": 0.0156137943246713, "grad_norm": 3.145701230324901, "learning_rate": 1.5613794324671303e-06, "loss": 0.6509, "step": 3527 }, { "epoch": 0.015618221258134491, "grad_norm": 2.726999869534538, "learning_rate": 1.561822125813449e-06, "loss": 0.7397, "step": 3528 }, { "epoch": 0.01562264819159768, "grad_norm": 4.100645745186504, "learning_rate": 1.562264819159768e-06, "loss": 1.0599, "step": 3529 }, { "epoch": 0.015627075125060872, "grad_norm": 2.9509285076110965, "learning_rate": 1.5627075125060872e-06, "loss": 0.8687, "step": 3530 }, { "epoch": 0.01563150205852406, "grad_norm": 2.6092268737808855, "learning_rate": 1.5631502058524061e-06, "loss": 0.5403, "step": 3531 }, { "epoch": 0.01563592899198725, "grad_norm": 2.6779413952984004, "learning_rate": 1.563592899198725e-06, "loss": 0.7827, "step": 3532 }, { "epoch": 0.01564035592545044, "grad_norm": 3.1061948854562322, "learning_rate": 1.5640355925450442e-06, "loss": 0.6445, "step": 3533 }, { "epoch": 0.01564478285891363, "grad_norm": 2.4283003061810637, "learning_rate": 1.5644782858913632e-06, "loss": 0.4734, "step": 3534 }, { "epoch": 0.01564920979237682, "grad_norm": 3.714041603631345, "learning_rate": 1.5649209792376821e-06, "loss": 1.1922, "step": 3535 }, { "epoch": 0.01565363672584001, "grad_norm": 3.1651430959674074, "learning_rate": 1.5653636725840013e-06, "loss": 0.8736, "step": 3536 }, { "epoch": 0.015658063659303202, "grad_norm": 3.3860032493066465, "learning_rate": 1.5658063659303202e-06, "loss": 1.2702, "step": 3537 }, { "epoch": 0.01566249059276639, "grad_norm": 4.94411111456212, "learning_rate": 1.5662490592766392e-06, "loss": 1.5042, "step": 3538 }, { "epoch": 0.01566691752622958, "grad_norm": 2.768717911009561, "learning_rate": 1.5666917526229583e-06, "loss": 1.0631, "step": 3539 }, { "epoch": 0.01567134445969277, "grad_norm": 2.8894435292750766, "learning_rate": 1.5671344459692773e-06, "loss": 0.8478, "step": 3540 }, { "epoch": 0.01567577139315596, "grad_norm": 2.682927107209387, "learning_rate": 1.5675771393155962e-06, "loss": 0.9704, "step": 3541 }, { "epoch": 0.01568019832661915, "grad_norm": 2.97035222539463, "learning_rate": 1.5680198326619154e-06, "loss": 0.976, "step": 3542 }, { "epoch": 0.01568462526008234, "grad_norm": 2.884848477187179, "learning_rate": 1.5684625260082343e-06, "loss": 0.808, "step": 3543 }, { "epoch": 0.015689052193545532, "grad_norm": 2.508531430791158, "learning_rate": 1.568905219354553e-06, "loss": 0.6149, "step": 3544 }, { "epoch": 0.015693479127008723, "grad_norm": 3.7423943229895933, "learning_rate": 1.5693479127008724e-06, "loss": 1.04, "step": 3545 }, { "epoch": 0.01569790606047191, "grad_norm": 3.307063172808267, "learning_rate": 1.5697906060471912e-06, "loss": 0.7753, "step": 3546 }, { "epoch": 0.0157023329939351, "grad_norm": 3.0947196994706996, "learning_rate": 1.5702332993935101e-06, "loss": 1.1236, "step": 3547 }, { "epoch": 0.01570675992739829, "grad_norm": 2.93658753826742, "learning_rate": 1.5706759927398293e-06, "loss": 1.016, "step": 3548 }, { "epoch": 0.01571118686086148, "grad_norm": 2.8044226259746923, "learning_rate": 1.5711186860861482e-06, "loss": 0.83, "step": 3549 }, { "epoch": 0.015715613794324672, "grad_norm": 2.9357259367962047, "learning_rate": 1.5715613794324672e-06, "loss": 0.7064, "step": 3550 }, { "epoch": 0.015720040727787862, "grad_norm": 2.635046570610781, "learning_rate": 1.5720040727787863e-06, "loss": 0.7235, "step": 3551 }, { "epoch": 0.015724467661251053, "grad_norm": 3.0068221593791273, "learning_rate": 1.5724467661251053e-06, "loss": 1.0067, "step": 3552 }, { "epoch": 0.01572889459471424, "grad_norm": 2.6656653490179782, "learning_rate": 1.5728894594714242e-06, "loss": 0.8335, "step": 3553 }, { "epoch": 0.01573332152817743, "grad_norm": 3.0647926618603036, "learning_rate": 1.5733321528177434e-06, "loss": 0.7167, "step": 3554 }, { "epoch": 0.01573774846164062, "grad_norm": 2.5131228436438406, "learning_rate": 1.5737748461640623e-06, "loss": 0.6188, "step": 3555 }, { "epoch": 0.01574217539510381, "grad_norm": 2.440408953633796, "learning_rate": 1.5742175395103813e-06, "loss": 0.5882, "step": 3556 }, { "epoch": 0.015746602328567002, "grad_norm": 2.469250567702864, "learning_rate": 1.5746602328567004e-06, "loss": 0.7876, "step": 3557 }, { "epoch": 0.015751029262030192, "grad_norm": 3.1516110826134764, "learning_rate": 1.5751029262030194e-06, "loss": 1.0276, "step": 3558 }, { "epoch": 0.015755456195493383, "grad_norm": 2.6579109724419263, "learning_rate": 1.5755456195493381e-06, "loss": 0.4163, "step": 3559 }, { "epoch": 0.015759883128956573, "grad_norm": 3.04951032484824, "learning_rate": 1.5759883128956575e-06, "loss": 0.8444, "step": 3560 }, { "epoch": 0.01576431006241976, "grad_norm": 2.8021697713362443, "learning_rate": 1.5764310062419762e-06, "loss": 0.9434, "step": 3561 }, { "epoch": 0.01576873699588295, "grad_norm": 3.2313998609189176, "learning_rate": 1.5768736995882952e-06, "loss": 0.9008, "step": 3562 }, { "epoch": 0.01577316392934614, "grad_norm": 2.8438942904149958, "learning_rate": 1.5773163929346143e-06, "loss": 0.9963, "step": 3563 }, { "epoch": 0.015777590862809332, "grad_norm": 4.2295202422497775, "learning_rate": 1.5777590862809333e-06, "loss": 1.2015, "step": 3564 }, { "epoch": 0.015782017796272522, "grad_norm": 3.4925103431881768, "learning_rate": 1.5782017796272522e-06, "loss": 0.84, "step": 3565 }, { "epoch": 0.015786444729735713, "grad_norm": 3.2546251669810875, "learning_rate": 1.5786444729735714e-06, "loss": 0.6459, "step": 3566 }, { "epoch": 0.015790871663198904, "grad_norm": 3.5780629349736053, "learning_rate": 1.5790871663198903e-06, "loss": 0.8043, "step": 3567 }, { "epoch": 0.01579529859666209, "grad_norm": 3.3339215039497256, "learning_rate": 1.5795298596662093e-06, "loss": 0.8831, "step": 3568 }, { "epoch": 0.01579972553012528, "grad_norm": 3.0834069984513564, "learning_rate": 1.5799725530125284e-06, "loss": 0.7462, "step": 3569 }, { "epoch": 0.01580415246358847, "grad_norm": 3.5833169386256722, "learning_rate": 1.5804152463588474e-06, "loss": 0.9813, "step": 3570 }, { "epoch": 0.015808579397051662, "grad_norm": 3.2394064286634188, "learning_rate": 1.5808579397051663e-06, "loss": 0.8852, "step": 3571 }, { "epoch": 0.015813006330514853, "grad_norm": 2.6523747420169084, "learning_rate": 1.5813006330514855e-06, "loss": 0.4825, "step": 3572 }, { "epoch": 0.015817433263978043, "grad_norm": 2.88888989543102, "learning_rate": 1.5817433263978044e-06, "loss": 0.7328, "step": 3573 }, { "epoch": 0.015821860197441234, "grad_norm": 3.0198106979348105, "learning_rate": 1.5821860197441232e-06, "loss": 0.6744, "step": 3574 }, { "epoch": 0.015826287130904424, "grad_norm": 2.8257859891078687, "learning_rate": 1.5826287130904425e-06, "loss": 0.9082, "step": 3575 }, { "epoch": 0.01583071406436761, "grad_norm": 3.0569527997673323, "learning_rate": 1.5830714064367613e-06, "loss": 0.7899, "step": 3576 }, { "epoch": 0.0158351409978308, "grad_norm": 2.657657036230457, "learning_rate": 1.5835140997830802e-06, "loss": 0.8207, "step": 3577 }, { "epoch": 0.015839567931293992, "grad_norm": 2.798685820758782, "learning_rate": 1.5839567931293994e-06, "loss": 1.045, "step": 3578 }, { "epoch": 0.015843994864757183, "grad_norm": 3.021374987697879, "learning_rate": 1.5843994864757183e-06, "loss": 1.0121, "step": 3579 }, { "epoch": 0.015848421798220373, "grad_norm": 3.128250443514748, "learning_rate": 1.5848421798220373e-06, "loss": 0.8703, "step": 3580 }, { "epoch": 0.015852848731683564, "grad_norm": 2.968104815606327, "learning_rate": 1.5852848731683564e-06, "loss": 1.0885, "step": 3581 }, { "epoch": 0.015857275665146754, "grad_norm": 3.1058618287474786, "learning_rate": 1.5857275665146754e-06, "loss": 0.8298, "step": 3582 }, { "epoch": 0.01586170259860994, "grad_norm": 3.0186139474722076, "learning_rate": 1.5861702598609943e-06, "loss": 1.0557, "step": 3583 }, { "epoch": 0.01586612953207313, "grad_norm": 2.8927234400241044, "learning_rate": 1.5866129532073135e-06, "loss": 0.9894, "step": 3584 }, { "epoch": 0.015870556465536322, "grad_norm": 2.624535319766538, "learning_rate": 1.5870556465536324e-06, "loss": 0.6688, "step": 3585 }, { "epoch": 0.015874983398999513, "grad_norm": 2.676014874625987, "learning_rate": 1.5874983398999514e-06, "loss": 0.705, "step": 3586 }, { "epoch": 0.015879410332462703, "grad_norm": 3.485664693757638, "learning_rate": 1.5879410332462705e-06, "loss": 0.9443, "step": 3587 }, { "epoch": 0.015883837265925894, "grad_norm": 3.6613607475080916, "learning_rate": 1.5883837265925895e-06, "loss": 1.0472, "step": 3588 }, { "epoch": 0.015888264199389084, "grad_norm": 2.95154482579951, "learning_rate": 1.5888264199389084e-06, "loss": 0.7239, "step": 3589 }, { "epoch": 0.015892691132852275, "grad_norm": 3.6788584533806614, "learning_rate": 1.5892691132852276e-06, "loss": 1.0534, "step": 3590 }, { "epoch": 0.015897118066315462, "grad_norm": 2.699125324892854, "learning_rate": 1.5897118066315465e-06, "loss": 0.7315, "step": 3591 }, { "epoch": 0.015901544999778652, "grad_norm": 3.176714928285383, "learning_rate": 1.5901544999778653e-06, "loss": 0.4722, "step": 3592 }, { "epoch": 0.015905971933241843, "grad_norm": 3.2123946633502682, "learning_rate": 1.5905971933241847e-06, "loss": 0.5476, "step": 3593 }, { "epoch": 0.015910398866705033, "grad_norm": 2.6830886420257403, "learning_rate": 1.5910398866705034e-06, "loss": 0.5914, "step": 3594 }, { "epoch": 0.015914825800168224, "grad_norm": 3.122808384984061, "learning_rate": 1.5914825800168223e-06, "loss": 0.9328, "step": 3595 }, { "epoch": 0.015919252733631414, "grad_norm": 2.754109104980052, "learning_rate": 1.5919252733631415e-06, "loss": 0.8147, "step": 3596 }, { "epoch": 0.015923679667094605, "grad_norm": 3.4254002878771166, "learning_rate": 1.5923679667094604e-06, "loss": 0.6258, "step": 3597 }, { "epoch": 0.015928106600557792, "grad_norm": 2.800727198602105, "learning_rate": 1.5928106600557794e-06, "loss": 0.9627, "step": 3598 }, { "epoch": 0.015932533534020982, "grad_norm": 2.4427849176342047, "learning_rate": 1.5932533534020985e-06, "loss": 0.7311, "step": 3599 }, { "epoch": 0.015936960467484173, "grad_norm": 2.7630108856596594, "learning_rate": 1.5936960467484175e-06, "loss": 0.7358, "step": 3600 }, { "epoch": 0.015941387400947363, "grad_norm": 2.4579410998948474, "learning_rate": 1.5941387400947364e-06, "loss": 0.7811, "step": 3601 }, { "epoch": 0.015945814334410554, "grad_norm": 2.8893586826793207, "learning_rate": 1.5945814334410556e-06, "loss": 0.763, "step": 3602 }, { "epoch": 0.015950241267873744, "grad_norm": 3.0541094651828633, "learning_rate": 1.5950241267873745e-06, "loss": 0.7125, "step": 3603 }, { "epoch": 0.015954668201336935, "grad_norm": 2.735832340872831, "learning_rate": 1.5954668201336935e-06, "loss": 0.7832, "step": 3604 }, { "epoch": 0.015959095134800125, "grad_norm": 3.4637196721050514, "learning_rate": 1.5959095134800127e-06, "loss": 1.1773, "step": 3605 }, { "epoch": 0.015963522068263312, "grad_norm": 3.113470571625366, "learning_rate": 1.5963522068263316e-06, "loss": 0.7579, "step": 3606 }, { "epoch": 0.015967949001726503, "grad_norm": 2.724003835800461, "learning_rate": 1.5967949001726503e-06, "loss": 0.8654, "step": 3607 }, { "epoch": 0.015972375935189694, "grad_norm": 3.0139482522841567, "learning_rate": 1.5972375935189697e-06, "loss": 0.7125, "step": 3608 }, { "epoch": 0.015976802868652884, "grad_norm": 4.239252770850329, "learning_rate": 1.5976802868652884e-06, "loss": 0.9609, "step": 3609 }, { "epoch": 0.015981229802116075, "grad_norm": 2.8479411326016915, "learning_rate": 1.5981229802116074e-06, "loss": 1.0072, "step": 3610 }, { "epoch": 0.015985656735579265, "grad_norm": 3.4035138739223325, "learning_rate": 1.5985656735579265e-06, "loss": 0.6867, "step": 3611 }, { "epoch": 0.015990083669042456, "grad_norm": 2.566087773088606, "learning_rate": 1.5990083669042455e-06, "loss": 0.88, "step": 3612 }, { "epoch": 0.015994510602505646, "grad_norm": 2.675717064586619, "learning_rate": 1.5994510602505644e-06, "loss": 0.7946, "step": 3613 }, { "epoch": 0.015998937535968833, "grad_norm": 2.778357292476927, "learning_rate": 1.5998937535968836e-06, "loss": 0.9427, "step": 3614 }, { "epoch": 0.016003364469432024, "grad_norm": 2.97264117363331, "learning_rate": 1.6003364469432025e-06, "loss": 0.7333, "step": 3615 }, { "epoch": 0.016007791402895214, "grad_norm": 3.1065931752244498, "learning_rate": 1.6007791402895215e-06, "loss": 0.8133, "step": 3616 }, { "epoch": 0.016012218336358405, "grad_norm": 2.834724850853262, "learning_rate": 1.6012218336358407e-06, "loss": 0.6799, "step": 3617 }, { "epoch": 0.016016645269821595, "grad_norm": 3.354589091112184, "learning_rate": 1.6016645269821596e-06, "loss": 0.8779, "step": 3618 }, { "epoch": 0.016021072203284786, "grad_norm": 2.9324176498721846, "learning_rate": 1.6021072203284785e-06, "loss": 0.6602, "step": 3619 }, { "epoch": 0.016025499136747976, "grad_norm": 2.9319687226390174, "learning_rate": 1.6025499136747977e-06, "loss": 0.8571, "step": 3620 }, { "epoch": 0.016029926070211163, "grad_norm": 3.578983580729361, "learning_rate": 1.6029926070211167e-06, "loss": 1.323, "step": 3621 }, { "epoch": 0.016034353003674354, "grad_norm": 3.408954241500829, "learning_rate": 1.6034353003674356e-06, "loss": 0.9474, "step": 3622 }, { "epoch": 0.016038779937137544, "grad_norm": 3.47975985846004, "learning_rate": 1.6038779937137548e-06, "loss": 1.2992, "step": 3623 }, { "epoch": 0.016043206870600735, "grad_norm": 3.4773249407114086, "learning_rate": 1.6043206870600735e-06, "loss": 0.9831, "step": 3624 }, { "epoch": 0.016047633804063925, "grad_norm": 2.9490042211891376, "learning_rate": 1.6047633804063924e-06, "loss": 0.9623, "step": 3625 }, { "epoch": 0.016052060737527116, "grad_norm": 2.545113699098619, "learning_rate": 1.6052060737527116e-06, "loss": 0.7026, "step": 3626 }, { "epoch": 0.016056487670990306, "grad_norm": 3.3079106619991623, "learning_rate": 1.6056487670990305e-06, "loss": 0.8901, "step": 3627 }, { "epoch": 0.016060914604453497, "grad_norm": 2.6556030860433024, "learning_rate": 1.6060914604453495e-06, "loss": 0.4891, "step": 3628 }, { "epoch": 0.016065341537916684, "grad_norm": 2.5369599365902227, "learning_rate": 1.6065341537916687e-06, "loss": 0.7126, "step": 3629 }, { "epoch": 0.016069768471379874, "grad_norm": 3.274306722910032, "learning_rate": 1.6069768471379876e-06, "loss": 0.8661, "step": 3630 }, { "epoch": 0.016074195404843065, "grad_norm": 3.027854968045882, "learning_rate": 1.6074195404843065e-06, "loss": 0.874, "step": 3631 }, { "epoch": 0.016078622338306255, "grad_norm": 2.8052999403234717, "learning_rate": 1.6078622338306257e-06, "loss": 0.8278, "step": 3632 }, { "epoch": 0.016083049271769446, "grad_norm": 3.3951235277335687, "learning_rate": 1.6083049271769447e-06, "loss": 1.2847, "step": 3633 }, { "epoch": 0.016087476205232636, "grad_norm": 3.3043691901535293, "learning_rate": 1.6087476205232636e-06, "loss": 0.7731, "step": 3634 }, { "epoch": 0.016091903138695827, "grad_norm": 2.9240978058810647, "learning_rate": 1.6091903138695828e-06, "loss": 0.7776, "step": 3635 }, { "epoch": 0.016096330072159014, "grad_norm": 3.234737327880613, "learning_rate": 1.6096330072159017e-06, "loss": 0.8787, "step": 3636 }, { "epoch": 0.016100757005622204, "grad_norm": 2.7489618663118796, "learning_rate": 1.6100757005622207e-06, "loss": 0.9066, "step": 3637 }, { "epoch": 0.016105183939085395, "grad_norm": 3.01167886033951, "learning_rate": 1.6105183939085398e-06, "loss": 1.05, "step": 3638 }, { "epoch": 0.016109610872548585, "grad_norm": 2.2988812782291617, "learning_rate": 1.6109610872548588e-06, "loss": 0.6353, "step": 3639 }, { "epoch": 0.016114037806011776, "grad_norm": 2.9233881104382453, "learning_rate": 1.6114037806011775e-06, "loss": 0.8055, "step": 3640 }, { "epoch": 0.016118464739474966, "grad_norm": 3.395318028593771, "learning_rate": 1.6118464739474969e-06, "loss": 0.9498, "step": 3641 }, { "epoch": 0.016122891672938157, "grad_norm": 2.8349165748242457, "learning_rate": 1.6122891672938156e-06, "loss": 0.8181, "step": 3642 }, { "epoch": 0.016127318606401347, "grad_norm": 3.089133741582312, "learning_rate": 1.6127318606401345e-06, "loss": 0.9641, "step": 3643 }, { "epoch": 0.016131745539864534, "grad_norm": 3.410944068703974, "learning_rate": 1.6131745539864537e-06, "loss": 0.7987, "step": 3644 }, { "epoch": 0.016136172473327725, "grad_norm": 2.6978543322818593, "learning_rate": 1.6136172473327727e-06, "loss": 0.7904, "step": 3645 }, { "epoch": 0.016140599406790915, "grad_norm": 2.9190282816101414, "learning_rate": 1.6140599406790916e-06, "loss": 0.6538, "step": 3646 }, { "epoch": 0.016145026340254106, "grad_norm": 2.565000675399038, "learning_rate": 1.6145026340254108e-06, "loss": 0.8553, "step": 3647 }, { "epoch": 0.016149453273717296, "grad_norm": 3.681693958195204, "learning_rate": 1.6149453273717297e-06, "loss": 1.0054, "step": 3648 }, { "epoch": 0.016153880207180487, "grad_norm": 3.1820101416492563, "learning_rate": 1.6153880207180487e-06, "loss": 0.87, "step": 3649 }, { "epoch": 0.016158307140643677, "grad_norm": 2.6791705949151208, "learning_rate": 1.6158307140643678e-06, "loss": 0.6286, "step": 3650 }, { "epoch": 0.016162734074106865, "grad_norm": 2.8090926094621427, "learning_rate": 1.6162734074106868e-06, "loss": 0.541, "step": 3651 }, { "epoch": 0.016167161007570055, "grad_norm": 2.3410525990340285, "learning_rate": 1.6167161007570057e-06, "loss": 0.6357, "step": 3652 }, { "epoch": 0.016171587941033246, "grad_norm": 2.672576531748015, "learning_rate": 1.6171587941033249e-06, "loss": 0.5836, "step": 3653 }, { "epoch": 0.016176014874496436, "grad_norm": 2.691648328662078, "learning_rate": 1.6176014874496438e-06, "loss": 0.781, "step": 3654 }, { "epoch": 0.016180441807959627, "grad_norm": 2.8357835296949854, "learning_rate": 1.6180441807959625e-06, "loss": 0.8419, "step": 3655 }, { "epoch": 0.016184868741422817, "grad_norm": 2.961392506058208, "learning_rate": 1.618486874142282e-06, "loss": 0.9596, "step": 3656 }, { "epoch": 0.016189295674886008, "grad_norm": 3.383060322994821, "learning_rate": 1.6189295674886007e-06, "loss": 0.8423, "step": 3657 }, { "epoch": 0.016193722608349198, "grad_norm": 3.1347407794568554, "learning_rate": 1.6193722608349196e-06, "loss": 0.8557, "step": 3658 }, { "epoch": 0.016198149541812385, "grad_norm": 2.6925051537006657, "learning_rate": 1.6198149541812388e-06, "loss": 0.6224, "step": 3659 }, { "epoch": 0.016202576475275576, "grad_norm": 2.4256027347693547, "learning_rate": 1.6202576475275577e-06, "loss": 0.6081, "step": 3660 }, { "epoch": 0.016207003408738766, "grad_norm": 2.994372405035978, "learning_rate": 1.6207003408738767e-06, "loss": 0.7348, "step": 3661 }, { "epoch": 0.016211430342201957, "grad_norm": 2.753669861141697, "learning_rate": 1.6211430342201958e-06, "loss": 0.5948, "step": 3662 }, { "epoch": 0.016215857275665147, "grad_norm": 4.437925802521538, "learning_rate": 1.6215857275665148e-06, "loss": 0.9548, "step": 3663 }, { "epoch": 0.016220284209128338, "grad_norm": 3.188469362614156, "learning_rate": 1.6220284209128337e-06, "loss": 0.5743, "step": 3664 }, { "epoch": 0.016224711142591528, "grad_norm": 3.5357688737190562, "learning_rate": 1.6224711142591529e-06, "loss": 0.862, "step": 3665 }, { "epoch": 0.016229138076054715, "grad_norm": 2.7618757802665965, "learning_rate": 1.6229138076054718e-06, "loss": 0.8057, "step": 3666 }, { "epoch": 0.016233565009517906, "grad_norm": 2.690236881007154, "learning_rate": 1.6233565009517908e-06, "loss": 0.8422, "step": 3667 }, { "epoch": 0.016237991942981096, "grad_norm": 3.222912711102922, "learning_rate": 1.62379919429811e-06, "loss": 0.8598, "step": 3668 }, { "epoch": 0.016242418876444287, "grad_norm": 3.8172467341318796, "learning_rate": 1.6242418876444289e-06, "loss": 1.0544, "step": 3669 }, { "epoch": 0.016246845809907477, "grad_norm": 3.573357231376633, "learning_rate": 1.6246845809907478e-06, "loss": 1.2099, "step": 3670 }, { "epoch": 0.016251272743370668, "grad_norm": 2.67206405179542, "learning_rate": 1.625127274337067e-06, "loss": 1.0423, "step": 3671 }, { "epoch": 0.016255699676833858, "grad_norm": 2.912357126028766, "learning_rate": 1.625569967683386e-06, "loss": 0.9787, "step": 3672 }, { "epoch": 0.01626012661029705, "grad_norm": 3.5375148666587832, "learning_rate": 1.6260126610297047e-06, "loss": 1.1764, "step": 3673 }, { "epoch": 0.016264553543760236, "grad_norm": 3.0442463380973326, "learning_rate": 1.626455354376024e-06, "loss": 0.9808, "step": 3674 }, { "epoch": 0.016268980477223426, "grad_norm": 2.6472315684583694, "learning_rate": 1.6268980477223428e-06, "loss": 0.4335, "step": 3675 }, { "epoch": 0.016273407410686617, "grad_norm": 6.447430363028068, "learning_rate": 1.6273407410686617e-06, "loss": 1.8753, "step": 3676 }, { "epoch": 0.016277834344149807, "grad_norm": 2.923222168403921, "learning_rate": 1.6277834344149809e-06, "loss": 0.719, "step": 3677 }, { "epoch": 0.016282261277612998, "grad_norm": 3.3000468343379405, "learning_rate": 1.6282261277612998e-06, "loss": 1.005, "step": 3678 }, { "epoch": 0.01628668821107619, "grad_norm": 2.5468820473181335, "learning_rate": 1.6286688211076188e-06, "loss": 0.5724, "step": 3679 }, { "epoch": 0.01629111514453938, "grad_norm": 2.74741181813776, "learning_rate": 1.629111514453938e-06, "loss": 0.9494, "step": 3680 }, { "epoch": 0.01629554207800257, "grad_norm": 2.2440808482033097, "learning_rate": 1.6295542078002569e-06, "loss": 0.4679, "step": 3681 }, { "epoch": 0.016299969011465756, "grad_norm": 2.792438828063964, "learning_rate": 1.6299969011465758e-06, "loss": 0.7338, "step": 3682 }, { "epoch": 0.016304395944928947, "grad_norm": 4.125506249104031, "learning_rate": 1.630439594492895e-06, "loss": 1.0442, "step": 3683 }, { "epoch": 0.016308822878392137, "grad_norm": 2.74454763804645, "learning_rate": 1.630882287839214e-06, "loss": 0.68, "step": 3684 }, { "epoch": 0.016313249811855328, "grad_norm": 2.8392980832797265, "learning_rate": 1.6313249811855329e-06, "loss": 1.0242, "step": 3685 }, { "epoch": 0.01631767674531852, "grad_norm": 3.1909229298029116, "learning_rate": 1.631767674531852e-06, "loss": 0.8236, "step": 3686 }, { "epoch": 0.01632210367878171, "grad_norm": 2.612113928876529, "learning_rate": 1.632210367878171e-06, "loss": 0.7048, "step": 3687 }, { "epoch": 0.0163265306122449, "grad_norm": 3.0800197957136772, "learning_rate": 1.6326530612244897e-06, "loss": 0.8728, "step": 3688 }, { "epoch": 0.016330957545708086, "grad_norm": 3.5040396004317484, "learning_rate": 1.633095754570809e-06, "loss": 0.9351, "step": 3689 }, { "epoch": 0.016335384479171277, "grad_norm": 3.6459955885292272, "learning_rate": 1.6335384479171278e-06, "loss": 1.0336, "step": 3690 }, { "epoch": 0.016339811412634467, "grad_norm": 3.7421747727113703, "learning_rate": 1.6339811412634468e-06, "loss": 0.917, "step": 3691 }, { "epoch": 0.016344238346097658, "grad_norm": 2.6409350485509018, "learning_rate": 1.634423834609766e-06, "loss": 0.9163, "step": 3692 }, { "epoch": 0.01634866527956085, "grad_norm": 2.5948293193537224, "learning_rate": 1.6348665279560849e-06, "loss": 0.5012, "step": 3693 }, { "epoch": 0.01635309221302404, "grad_norm": 3.0632901079185664, "learning_rate": 1.6353092213024038e-06, "loss": 0.8579, "step": 3694 }, { "epoch": 0.01635751914648723, "grad_norm": 2.76417341582196, "learning_rate": 1.635751914648723e-06, "loss": 0.6768, "step": 3695 }, { "epoch": 0.01636194607995042, "grad_norm": 2.7415137134100296, "learning_rate": 1.636194607995042e-06, "loss": 0.8508, "step": 3696 }, { "epoch": 0.016366373013413607, "grad_norm": 2.8179660915381772, "learning_rate": 1.6366373013413609e-06, "loss": 0.8008, "step": 3697 }, { "epoch": 0.016370799946876798, "grad_norm": 3.449408540155003, "learning_rate": 1.63707999468768e-06, "loss": 1.3129, "step": 3698 }, { "epoch": 0.016375226880339988, "grad_norm": 2.6462188056643945, "learning_rate": 1.637522688033999e-06, "loss": 0.7982, "step": 3699 }, { "epoch": 0.01637965381380318, "grad_norm": 3.3907466041330667, "learning_rate": 1.637965381380318e-06, "loss": 0.7596, "step": 3700 }, { "epoch": 0.01638408074726637, "grad_norm": 2.5375835404304086, "learning_rate": 1.638408074726637e-06, "loss": 0.6398, "step": 3701 }, { "epoch": 0.01638850768072956, "grad_norm": 2.7835521220011774, "learning_rate": 1.638850768072956e-06, "loss": 1.0235, "step": 3702 }, { "epoch": 0.01639293461419275, "grad_norm": 2.8007198049812096, "learning_rate": 1.6392934614192748e-06, "loss": 0.9152, "step": 3703 }, { "epoch": 0.016397361547655937, "grad_norm": 2.8870391933781443, "learning_rate": 1.6397361547655941e-06, "loss": 0.7359, "step": 3704 }, { "epoch": 0.016401788481119128, "grad_norm": 2.8670610817865914, "learning_rate": 1.6401788481119129e-06, "loss": 0.5907, "step": 3705 }, { "epoch": 0.016406215414582318, "grad_norm": 3.325675355902704, "learning_rate": 1.6406215414582318e-06, "loss": 1.0665, "step": 3706 }, { "epoch": 0.01641064234804551, "grad_norm": 3.0226267746622946, "learning_rate": 1.641064234804551e-06, "loss": 0.9474, "step": 3707 }, { "epoch": 0.0164150692815087, "grad_norm": 2.3501345530243243, "learning_rate": 1.64150692815087e-06, "loss": 0.547, "step": 3708 }, { "epoch": 0.01641949621497189, "grad_norm": 3.542646957082915, "learning_rate": 1.6419496214971889e-06, "loss": 0.9069, "step": 3709 }, { "epoch": 0.01642392314843508, "grad_norm": 2.4212162154424823, "learning_rate": 1.642392314843508e-06, "loss": 0.744, "step": 3710 }, { "epoch": 0.01642835008189827, "grad_norm": 2.750848478484111, "learning_rate": 1.642835008189827e-06, "loss": 0.4609, "step": 3711 }, { "epoch": 0.016432777015361458, "grad_norm": 2.7270795961213645, "learning_rate": 1.643277701536146e-06, "loss": 0.7614, "step": 3712 }, { "epoch": 0.016437203948824648, "grad_norm": 2.804110345329747, "learning_rate": 1.643720394882465e-06, "loss": 0.7723, "step": 3713 }, { "epoch": 0.01644163088228784, "grad_norm": 3.642435096899856, "learning_rate": 1.644163088228784e-06, "loss": 1.4132, "step": 3714 }, { "epoch": 0.01644605781575103, "grad_norm": 2.6003016430791766, "learning_rate": 1.644605781575103e-06, "loss": 0.7482, "step": 3715 }, { "epoch": 0.01645048474921422, "grad_norm": 4.178809107803249, "learning_rate": 1.6450484749214221e-06, "loss": 0.9053, "step": 3716 }, { "epoch": 0.01645491168267741, "grad_norm": 2.851589362376572, "learning_rate": 1.645491168267741e-06, "loss": 0.7926, "step": 3717 }, { "epoch": 0.0164593386161406, "grad_norm": 4.289916416270282, "learning_rate": 1.6459338616140602e-06, "loss": 0.8703, "step": 3718 }, { "epoch": 0.016463765549603788, "grad_norm": 3.464436962345318, "learning_rate": 1.6463765549603792e-06, "loss": 1.248, "step": 3719 }, { "epoch": 0.01646819248306698, "grad_norm": 2.5726587012595457, "learning_rate": 1.6468192483066981e-06, "loss": 0.8369, "step": 3720 }, { "epoch": 0.01647261941653017, "grad_norm": 2.6526871104014265, "learning_rate": 1.6472619416530173e-06, "loss": 0.6434, "step": 3721 }, { "epoch": 0.01647704634999336, "grad_norm": 2.451912371912052, "learning_rate": 1.6477046349993362e-06, "loss": 0.5955, "step": 3722 }, { "epoch": 0.01648147328345655, "grad_norm": 2.8845529509636485, "learning_rate": 1.648147328345655e-06, "loss": 0.7196, "step": 3723 }, { "epoch": 0.01648590021691974, "grad_norm": 3.0442153995353745, "learning_rate": 1.6485900216919743e-06, "loss": 0.6632, "step": 3724 }, { "epoch": 0.01649032715038293, "grad_norm": 2.4945483003628137, "learning_rate": 1.649032715038293e-06, "loss": 0.7085, "step": 3725 }, { "epoch": 0.01649475408384612, "grad_norm": 2.8101528667966207, "learning_rate": 1.649475408384612e-06, "loss": 0.7158, "step": 3726 }, { "epoch": 0.01649918101730931, "grad_norm": 2.272056646975871, "learning_rate": 1.6499181017309312e-06, "loss": 0.6009, "step": 3727 }, { "epoch": 0.0165036079507725, "grad_norm": 2.436663548965249, "learning_rate": 1.6503607950772501e-06, "loss": 0.5518, "step": 3728 }, { "epoch": 0.01650803488423569, "grad_norm": 3.3514785937547344, "learning_rate": 1.650803488423569e-06, "loss": 0.8167, "step": 3729 }, { "epoch": 0.01651246181769888, "grad_norm": 3.2750283888534657, "learning_rate": 1.6512461817698882e-06, "loss": 1.0686, "step": 3730 }, { "epoch": 0.01651688875116207, "grad_norm": 3.6874248107193046, "learning_rate": 1.6516888751162072e-06, "loss": 1.3824, "step": 3731 }, { "epoch": 0.01652131568462526, "grad_norm": 2.7044153724668636, "learning_rate": 1.6521315684625261e-06, "loss": 0.7286, "step": 3732 }, { "epoch": 0.01652574261808845, "grad_norm": 2.8573433921024076, "learning_rate": 1.6525742618088453e-06, "loss": 0.784, "step": 3733 }, { "epoch": 0.01653016955155164, "grad_norm": 3.3287929751269956, "learning_rate": 1.6530169551551642e-06, "loss": 0.8898, "step": 3734 }, { "epoch": 0.01653459648501483, "grad_norm": 2.6274387622526354, "learning_rate": 1.6534596485014832e-06, "loss": 0.875, "step": 3735 }, { "epoch": 0.01653902341847802, "grad_norm": 2.5834411031714795, "learning_rate": 1.6539023418478023e-06, "loss": 0.6498, "step": 3736 }, { "epoch": 0.01654345035194121, "grad_norm": 3.0085272040891846, "learning_rate": 1.6543450351941213e-06, "loss": 0.907, "step": 3737 }, { "epoch": 0.0165478772854044, "grad_norm": 3.3542273340866133, "learning_rate": 1.65478772854044e-06, "loss": 1.1399, "step": 3738 }, { "epoch": 0.01655230421886759, "grad_norm": 2.9542429702281754, "learning_rate": 1.6552304218867594e-06, "loss": 0.6488, "step": 3739 }, { "epoch": 0.01655673115233078, "grad_norm": 2.5751830528697552, "learning_rate": 1.6556731152330781e-06, "loss": 0.7834, "step": 3740 }, { "epoch": 0.016561158085793972, "grad_norm": 3.0283154580204927, "learning_rate": 1.656115808579397e-06, "loss": 0.7971, "step": 3741 }, { "epoch": 0.01656558501925716, "grad_norm": 3.1599047321637515, "learning_rate": 1.6565585019257162e-06, "loss": 0.5351, "step": 3742 }, { "epoch": 0.01657001195272035, "grad_norm": 3.4100817158446786, "learning_rate": 1.6570011952720352e-06, "loss": 0.7733, "step": 3743 }, { "epoch": 0.01657443888618354, "grad_norm": 4.39501675191258, "learning_rate": 1.6574438886183541e-06, "loss": 1.2441, "step": 3744 }, { "epoch": 0.01657886581964673, "grad_norm": 2.6456782372273957, "learning_rate": 1.6578865819646733e-06, "loss": 0.7491, "step": 3745 }, { "epoch": 0.01658329275310992, "grad_norm": 3.313273667597062, "learning_rate": 1.6583292753109922e-06, "loss": 0.8443, "step": 3746 }, { "epoch": 0.01658771968657311, "grad_norm": 3.2708037819312765, "learning_rate": 1.6587719686573112e-06, "loss": 0.8502, "step": 3747 }, { "epoch": 0.016592146620036302, "grad_norm": 2.719865626664456, "learning_rate": 1.6592146620036303e-06, "loss": 0.986, "step": 3748 }, { "epoch": 0.01659657355349949, "grad_norm": 3.135979223617, "learning_rate": 1.6596573553499493e-06, "loss": 0.891, "step": 3749 }, { "epoch": 0.01660100048696268, "grad_norm": 2.7559503378529184, "learning_rate": 1.6601000486962682e-06, "loss": 0.6461, "step": 3750 }, { "epoch": 0.01660542742042587, "grad_norm": 2.888978196655017, "learning_rate": 1.6605427420425874e-06, "loss": 0.5616, "step": 3751 }, { "epoch": 0.01660985435388906, "grad_norm": 2.5854319099746035, "learning_rate": 1.6609854353889063e-06, "loss": 0.8493, "step": 3752 }, { "epoch": 0.01661428128735225, "grad_norm": 2.4010047540418995, "learning_rate": 1.661428128735225e-06, "loss": 0.6714, "step": 3753 }, { "epoch": 0.01661870822081544, "grad_norm": 3.2019811186789306, "learning_rate": 1.6618708220815445e-06, "loss": 0.9118, "step": 3754 }, { "epoch": 0.016623135154278632, "grad_norm": 2.8419163561513536, "learning_rate": 1.6623135154278632e-06, "loss": 0.895, "step": 3755 }, { "epoch": 0.016627562087741823, "grad_norm": 3.181995967880877, "learning_rate": 1.6627562087741821e-06, "loss": 0.5287, "step": 3756 }, { "epoch": 0.01663198902120501, "grad_norm": 2.577951662394244, "learning_rate": 1.6631989021205013e-06, "loss": 0.7124, "step": 3757 }, { "epoch": 0.0166364159546682, "grad_norm": 3.1698470226926694, "learning_rate": 1.6636415954668202e-06, "loss": 0.8725, "step": 3758 }, { "epoch": 0.01664084288813139, "grad_norm": 2.9987521719862866, "learning_rate": 1.6640842888131392e-06, "loss": 0.7693, "step": 3759 }, { "epoch": 0.01664526982159458, "grad_norm": 2.911751342156978, "learning_rate": 1.6645269821594583e-06, "loss": 0.7134, "step": 3760 }, { "epoch": 0.016649696755057772, "grad_norm": 2.715012616577982, "learning_rate": 1.6649696755057773e-06, "loss": 0.738, "step": 3761 }, { "epoch": 0.016654123688520962, "grad_norm": 2.6871473653492868, "learning_rate": 1.6654123688520962e-06, "loss": 0.8403, "step": 3762 }, { "epoch": 0.016658550621984153, "grad_norm": 2.8211049846612775, "learning_rate": 1.6658550621984154e-06, "loss": 0.8931, "step": 3763 }, { "epoch": 0.016662977555447343, "grad_norm": 3.457573608219877, "learning_rate": 1.6662977555447343e-06, "loss": 0.9962, "step": 3764 }, { "epoch": 0.01666740448891053, "grad_norm": 2.527163785133425, "learning_rate": 1.6667404488910533e-06, "loss": 0.786, "step": 3765 }, { "epoch": 0.01667183142237372, "grad_norm": 2.7094217267965712, "learning_rate": 1.6671831422373725e-06, "loss": 0.6306, "step": 3766 }, { "epoch": 0.01667625835583691, "grad_norm": 2.3954506106807227, "learning_rate": 1.6676258355836914e-06, "loss": 0.7308, "step": 3767 }, { "epoch": 0.016680685289300102, "grad_norm": 4.004454607786643, "learning_rate": 1.6680685289300103e-06, "loss": 0.855, "step": 3768 }, { "epoch": 0.016685112222763292, "grad_norm": 2.765958644693945, "learning_rate": 1.6685112222763295e-06, "loss": 0.6205, "step": 3769 }, { "epoch": 0.016689539156226483, "grad_norm": 2.4007049035093324, "learning_rate": 1.6689539156226485e-06, "loss": 0.5946, "step": 3770 }, { "epoch": 0.016693966089689673, "grad_norm": 3.7231941509028985, "learning_rate": 1.6693966089689672e-06, "loss": 1.0806, "step": 3771 }, { "epoch": 0.01669839302315286, "grad_norm": 2.801316066758607, "learning_rate": 1.6698393023152866e-06, "loss": 0.9073, "step": 3772 }, { "epoch": 0.01670281995661605, "grad_norm": 2.8085206157316667, "learning_rate": 1.6702819956616053e-06, "loss": 0.7653, "step": 3773 }, { "epoch": 0.01670724689007924, "grad_norm": 3.4528663749754065, "learning_rate": 1.6707246890079242e-06, "loss": 0.9917, "step": 3774 }, { "epoch": 0.016711673823542432, "grad_norm": 3.2444733677144817, "learning_rate": 1.6711673823542434e-06, "loss": 1.0255, "step": 3775 }, { "epoch": 0.016716100757005622, "grad_norm": 3.0765029225341882, "learning_rate": 1.6716100757005623e-06, "loss": 0.8983, "step": 3776 }, { "epoch": 0.016720527690468813, "grad_norm": 2.451654909598495, "learning_rate": 1.6720527690468813e-06, "loss": 0.6992, "step": 3777 }, { "epoch": 0.016724954623932003, "grad_norm": 3.0933294890823486, "learning_rate": 1.6724954623932005e-06, "loss": 0.7822, "step": 3778 }, { "epoch": 0.016729381557395194, "grad_norm": 3.6436337777403636, "learning_rate": 1.6729381557395194e-06, "loss": 1.1345, "step": 3779 }, { "epoch": 0.01673380849085838, "grad_norm": 2.9905270949967133, "learning_rate": 1.6733808490858383e-06, "loss": 0.861, "step": 3780 }, { "epoch": 0.01673823542432157, "grad_norm": 2.755507898536756, "learning_rate": 1.6738235424321575e-06, "loss": 0.5596, "step": 3781 }, { "epoch": 0.016742662357784762, "grad_norm": 2.931859134674878, "learning_rate": 1.6742662357784765e-06, "loss": 0.6548, "step": 3782 }, { "epoch": 0.016747089291247953, "grad_norm": 3.9714662430575065, "learning_rate": 1.6747089291247954e-06, "loss": 1.3334, "step": 3783 }, { "epoch": 0.016751516224711143, "grad_norm": 2.7053666039681907, "learning_rate": 1.6751516224711146e-06, "loss": 0.8207, "step": 3784 }, { "epoch": 0.016755943158174334, "grad_norm": 4.136463780770617, "learning_rate": 1.6755943158174335e-06, "loss": 1.4242, "step": 3785 }, { "epoch": 0.016760370091637524, "grad_norm": 2.5322904354954217, "learning_rate": 1.6760370091637522e-06, "loss": 0.8811, "step": 3786 }, { "epoch": 0.01676479702510071, "grad_norm": 2.9445098827540166, "learning_rate": 1.6764797025100716e-06, "loss": 0.6178, "step": 3787 }, { "epoch": 0.0167692239585639, "grad_norm": 3.1537432963526966, "learning_rate": 1.6769223958563903e-06, "loss": 0.6332, "step": 3788 }, { "epoch": 0.016773650892027092, "grad_norm": 3.143016946557452, "learning_rate": 1.6773650892027093e-06, "loss": 1.1798, "step": 3789 }, { "epoch": 0.016778077825490283, "grad_norm": 3.0390929241131968, "learning_rate": 1.6778077825490285e-06, "loss": 0.724, "step": 3790 }, { "epoch": 0.016782504758953473, "grad_norm": 2.9490970568064165, "learning_rate": 1.6782504758953474e-06, "loss": 0.9311, "step": 3791 }, { "epoch": 0.016786931692416664, "grad_norm": 2.526584201823748, "learning_rate": 1.6786931692416663e-06, "loss": 0.7205, "step": 3792 }, { "epoch": 0.016791358625879854, "grad_norm": 3.661057267443886, "learning_rate": 1.6791358625879855e-06, "loss": 1.2812, "step": 3793 }, { "epoch": 0.016795785559343045, "grad_norm": 2.638081788264193, "learning_rate": 1.6795785559343045e-06, "loss": 0.632, "step": 3794 }, { "epoch": 0.01680021249280623, "grad_norm": 2.462870965987474, "learning_rate": 1.6800212492806234e-06, "loss": 0.6734, "step": 3795 }, { "epoch": 0.016804639426269422, "grad_norm": 4.073778706532516, "learning_rate": 1.6804639426269426e-06, "loss": 1.0399, "step": 3796 }, { "epoch": 0.016809066359732613, "grad_norm": 2.729378492739424, "learning_rate": 1.6809066359732615e-06, "loss": 0.9967, "step": 3797 }, { "epoch": 0.016813493293195803, "grad_norm": 3.9922919613609094, "learning_rate": 1.6813493293195805e-06, "loss": 1.0293, "step": 3798 }, { "epoch": 0.016817920226658994, "grad_norm": 3.4202273187785357, "learning_rate": 1.6817920226658996e-06, "loss": 1.1238, "step": 3799 }, { "epoch": 0.016822347160122184, "grad_norm": 3.107368134968925, "learning_rate": 1.6822347160122186e-06, "loss": 0.7942, "step": 3800 }, { "epoch": 0.016826774093585375, "grad_norm": 2.854544247750824, "learning_rate": 1.6826774093585375e-06, "loss": 0.8012, "step": 3801 }, { "epoch": 0.016831201027048562, "grad_norm": 2.6563292472039994, "learning_rate": 1.6831201027048567e-06, "loss": 0.7827, "step": 3802 }, { "epoch": 0.016835627960511752, "grad_norm": 2.334042835479191, "learning_rate": 1.6835627960511756e-06, "loss": 0.4514, "step": 3803 }, { "epoch": 0.016840054893974943, "grad_norm": 3.153180994677383, "learning_rate": 1.6840054893974943e-06, "loss": 0.8817, "step": 3804 }, { "epoch": 0.016844481827438133, "grad_norm": 2.633081486641202, "learning_rate": 1.6844481827438135e-06, "loss": 0.5537, "step": 3805 }, { "epoch": 0.016848908760901324, "grad_norm": 2.9620126330510788, "learning_rate": 1.6848908760901325e-06, "loss": 0.792, "step": 3806 }, { "epoch": 0.016853335694364514, "grad_norm": 3.1699284359935787, "learning_rate": 1.6853335694364514e-06, "loss": 1.1131, "step": 3807 }, { "epoch": 0.016857762627827705, "grad_norm": 3.9809430982388996, "learning_rate": 1.6857762627827706e-06, "loss": 1.0699, "step": 3808 }, { "epoch": 0.016862189561290895, "grad_norm": 2.9297287731267354, "learning_rate": 1.6862189561290895e-06, "loss": 0.811, "step": 3809 }, { "epoch": 0.016866616494754082, "grad_norm": 3.344133373927534, "learning_rate": 1.6866616494754085e-06, "loss": 0.7189, "step": 3810 }, { "epoch": 0.016871043428217273, "grad_norm": 3.188013504995113, "learning_rate": 1.6871043428217276e-06, "loss": 0.6885, "step": 3811 }, { "epoch": 0.016875470361680463, "grad_norm": 2.3008696466148826, "learning_rate": 1.6875470361680466e-06, "loss": 0.5025, "step": 3812 }, { "epoch": 0.016879897295143654, "grad_norm": 2.677914700855473, "learning_rate": 1.6879897295143655e-06, "loss": 0.823, "step": 3813 }, { "epoch": 0.016884324228606844, "grad_norm": 2.6112481000679995, "learning_rate": 1.6884324228606847e-06, "loss": 0.762, "step": 3814 }, { "epoch": 0.016888751162070035, "grad_norm": 2.85520231855189, "learning_rate": 1.6888751162070036e-06, "loss": 1.061, "step": 3815 }, { "epoch": 0.016893178095533225, "grad_norm": 3.095402370558029, "learning_rate": 1.6893178095533226e-06, "loss": 0.6602, "step": 3816 }, { "epoch": 0.016897605028996412, "grad_norm": 2.3314883948230656, "learning_rate": 1.6897605028996417e-06, "loss": 0.6024, "step": 3817 }, { "epoch": 0.016902031962459603, "grad_norm": 3.4315589869303795, "learning_rate": 1.6902031962459607e-06, "loss": 0.8416, "step": 3818 }, { "epoch": 0.016906458895922793, "grad_norm": 3.122582374501398, "learning_rate": 1.6906458895922794e-06, "loss": 1.1107, "step": 3819 }, { "epoch": 0.016910885829385984, "grad_norm": 3.596237741991738, "learning_rate": 1.6910885829385988e-06, "loss": 1.0151, "step": 3820 }, { "epoch": 0.016915312762849175, "grad_norm": 2.9257367746106273, "learning_rate": 1.6915312762849175e-06, "loss": 0.6407, "step": 3821 }, { "epoch": 0.016919739696312365, "grad_norm": 3.0824303290464448, "learning_rate": 1.6919739696312365e-06, "loss": 0.8675, "step": 3822 }, { "epoch": 0.016924166629775556, "grad_norm": 3.0437937536436714, "learning_rate": 1.6924166629775556e-06, "loss": 1.0816, "step": 3823 }, { "epoch": 0.016928593563238746, "grad_norm": 2.966923917447272, "learning_rate": 1.6928593563238746e-06, "loss": 0.6855, "step": 3824 }, { "epoch": 0.016933020496701933, "grad_norm": 3.0430900717995826, "learning_rate": 1.6933020496701935e-06, "loss": 0.9801, "step": 3825 }, { "epoch": 0.016937447430165124, "grad_norm": 2.849782336509599, "learning_rate": 1.6937447430165127e-06, "loss": 1.0175, "step": 3826 }, { "epoch": 0.016941874363628314, "grad_norm": 3.0181803178521704, "learning_rate": 1.6941874363628316e-06, "loss": 0.8845, "step": 3827 }, { "epoch": 0.016946301297091505, "grad_norm": 3.1525165700304933, "learning_rate": 1.6946301297091506e-06, "loss": 0.5046, "step": 3828 }, { "epoch": 0.016950728230554695, "grad_norm": 2.5843141785383796, "learning_rate": 1.6950728230554697e-06, "loss": 0.5957, "step": 3829 }, { "epoch": 0.016955155164017886, "grad_norm": 3.453257784270664, "learning_rate": 1.6955155164017887e-06, "loss": 0.975, "step": 3830 }, { "epoch": 0.016959582097481076, "grad_norm": 3.7454008337374134, "learning_rate": 1.6959582097481076e-06, "loss": 1.0112, "step": 3831 }, { "epoch": 0.016964009030944267, "grad_norm": 3.464152963779453, "learning_rate": 1.6964009030944268e-06, "loss": 1.0401, "step": 3832 }, { "epoch": 0.016968435964407454, "grad_norm": 3.1317488719517375, "learning_rate": 1.6968435964407457e-06, "loss": 0.6763, "step": 3833 }, { "epoch": 0.016972862897870644, "grad_norm": 3.405095825613958, "learning_rate": 1.6972862897870645e-06, "loss": 0.9786, "step": 3834 }, { "epoch": 0.016977289831333835, "grad_norm": 3.705353824995939, "learning_rate": 1.6977289831333838e-06, "loss": 1.1165, "step": 3835 }, { "epoch": 0.016981716764797025, "grad_norm": 3.122051538243051, "learning_rate": 1.6981716764797026e-06, "loss": 1.1015, "step": 3836 }, { "epoch": 0.016986143698260216, "grad_norm": 2.382417482281608, "learning_rate": 1.6986143698260215e-06, "loss": 0.6786, "step": 3837 }, { "epoch": 0.016990570631723406, "grad_norm": 3.5756193393108746, "learning_rate": 1.6990570631723407e-06, "loss": 0.7125, "step": 3838 }, { "epoch": 0.016994997565186597, "grad_norm": 3.915678371833362, "learning_rate": 1.6994997565186596e-06, "loss": 1.2819, "step": 3839 }, { "epoch": 0.016999424498649784, "grad_norm": 3.338383087971275, "learning_rate": 1.6999424498649786e-06, "loss": 1.0331, "step": 3840 }, { "epoch": 0.017003851432112974, "grad_norm": 2.9369539871588275, "learning_rate": 1.7003851432112977e-06, "loss": 0.6925, "step": 3841 }, { "epoch": 0.017008278365576165, "grad_norm": 3.35719583766334, "learning_rate": 1.7008278365576167e-06, "loss": 0.7483, "step": 3842 }, { "epoch": 0.017012705299039355, "grad_norm": 2.697727275493436, "learning_rate": 1.7012705299039356e-06, "loss": 0.9219, "step": 3843 }, { "epoch": 0.017017132232502546, "grad_norm": 4.013603218298534, "learning_rate": 1.7017132232502548e-06, "loss": 1.2257, "step": 3844 }, { "epoch": 0.017021559165965736, "grad_norm": 2.5292645127994073, "learning_rate": 1.7021559165965737e-06, "loss": 0.6403, "step": 3845 }, { "epoch": 0.017025986099428927, "grad_norm": 3.2678672092803613, "learning_rate": 1.7025986099428927e-06, "loss": 0.9691, "step": 3846 }, { "epoch": 0.017030413032892117, "grad_norm": 3.9061330926843603, "learning_rate": 1.7030413032892118e-06, "loss": 1.2598, "step": 3847 }, { "epoch": 0.017034839966355304, "grad_norm": 2.4708678592962277, "learning_rate": 1.7034839966355308e-06, "loss": 0.7865, "step": 3848 }, { "epoch": 0.017039266899818495, "grad_norm": 2.7715071009452674, "learning_rate": 1.7039266899818497e-06, "loss": 0.7941, "step": 3849 }, { "epoch": 0.017043693833281685, "grad_norm": 2.910214985578328, "learning_rate": 1.7043693833281689e-06, "loss": 0.774, "step": 3850 }, { "epoch": 0.017048120766744876, "grad_norm": 3.204479828833712, "learning_rate": 1.7048120766744878e-06, "loss": 0.8627, "step": 3851 }, { "epoch": 0.017052547700208066, "grad_norm": 2.392732318806, "learning_rate": 1.7052547700208066e-06, "loss": 0.3984, "step": 3852 }, { "epoch": 0.017056974633671257, "grad_norm": 3.111321893401225, "learning_rate": 1.705697463367126e-06, "loss": 1.1564, "step": 3853 }, { "epoch": 0.017061401567134447, "grad_norm": 3.4378472074633253, "learning_rate": 1.7061401567134447e-06, "loss": 0.6625, "step": 3854 }, { "epoch": 0.017065828500597634, "grad_norm": 2.8300450546569498, "learning_rate": 1.7065828500597636e-06, "loss": 0.851, "step": 3855 }, { "epoch": 0.017070255434060825, "grad_norm": 2.778325590366212, "learning_rate": 1.7070255434060828e-06, "loss": 0.6813, "step": 3856 }, { "epoch": 0.017074682367524015, "grad_norm": 3.3317264858037765, "learning_rate": 1.7074682367524017e-06, "loss": 0.9684, "step": 3857 }, { "epoch": 0.017079109300987206, "grad_norm": 3.053893269968303, "learning_rate": 1.7079109300987207e-06, "loss": 0.5506, "step": 3858 }, { "epoch": 0.017083536234450396, "grad_norm": 3.013132104330856, "learning_rate": 1.7083536234450398e-06, "loss": 1.1765, "step": 3859 }, { "epoch": 0.017087963167913587, "grad_norm": 3.114726324018863, "learning_rate": 1.7087963167913588e-06, "loss": 0.9988, "step": 3860 }, { "epoch": 0.017092390101376777, "grad_norm": 2.397819252191555, "learning_rate": 1.7092390101376777e-06, "loss": 0.5813, "step": 3861 }, { "epoch": 0.017096817034839968, "grad_norm": 2.9550908479210585, "learning_rate": 1.7096817034839969e-06, "loss": 0.8349, "step": 3862 }, { "epoch": 0.017101243968303155, "grad_norm": 2.698473318539608, "learning_rate": 1.7101243968303158e-06, "loss": 0.8023, "step": 3863 }, { "epoch": 0.017105670901766346, "grad_norm": 2.765076894997254, "learning_rate": 1.7105670901766348e-06, "loss": 0.8675, "step": 3864 }, { "epoch": 0.017110097835229536, "grad_norm": 3.9019577227052067, "learning_rate": 1.711009783522954e-06, "loss": 1.081, "step": 3865 }, { "epoch": 0.017114524768692727, "grad_norm": 2.9065587361085474, "learning_rate": 1.7114524768692729e-06, "loss": 0.8219, "step": 3866 }, { "epoch": 0.017118951702155917, "grad_norm": 3.7706848398727497, "learning_rate": 1.7118951702155916e-06, "loss": 1.0248, "step": 3867 }, { "epoch": 0.017123378635619108, "grad_norm": 2.6904648352781364, "learning_rate": 1.712337863561911e-06, "loss": 0.9711, "step": 3868 }, { "epoch": 0.017127805569082298, "grad_norm": 2.313761747566206, "learning_rate": 1.7127805569082297e-06, "loss": 0.4546, "step": 3869 }, { "epoch": 0.017132232502545485, "grad_norm": 3.1470235890933447, "learning_rate": 1.7132232502545487e-06, "loss": 1.0657, "step": 3870 }, { "epoch": 0.017136659436008676, "grad_norm": 3.9976518278051625, "learning_rate": 1.7136659436008678e-06, "loss": 1.0682, "step": 3871 }, { "epoch": 0.017141086369471866, "grad_norm": 4.184820199970674, "learning_rate": 1.7141086369471868e-06, "loss": 1.1425, "step": 3872 }, { "epoch": 0.017145513302935057, "grad_norm": 3.4766932705012965, "learning_rate": 1.7145513302935057e-06, "loss": 0.6341, "step": 3873 }, { "epoch": 0.017149940236398247, "grad_norm": 2.8457599137444465, "learning_rate": 1.7149940236398249e-06, "loss": 0.9798, "step": 3874 }, { "epoch": 0.017154367169861438, "grad_norm": 3.5025558679972266, "learning_rate": 1.7154367169861438e-06, "loss": 1.0777, "step": 3875 }, { "epoch": 0.017158794103324628, "grad_norm": 4.130832828386288, "learning_rate": 1.7158794103324628e-06, "loss": 1.448, "step": 3876 }, { "epoch": 0.01716322103678782, "grad_norm": 2.7913319371900376, "learning_rate": 1.716322103678782e-06, "loss": 0.5303, "step": 3877 }, { "epoch": 0.017167647970251006, "grad_norm": 3.3223882790598918, "learning_rate": 1.7167647970251009e-06, "loss": 1.1194, "step": 3878 }, { "epoch": 0.017172074903714196, "grad_norm": 3.5524525297123675, "learning_rate": 1.7172074903714198e-06, "loss": 1.2485, "step": 3879 }, { "epoch": 0.017176501837177387, "grad_norm": 3.1752576222237905, "learning_rate": 1.717650183717739e-06, "loss": 0.7896, "step": 3880 }, { "epoch": 0.017180928770640577, "grad_norm": 2.8641600672962193, "learning_rate": 1.718092877064058e-06, "loss": 0.6996, "step": 3881 }, { "epoch": 0.017185355704103768, "grad_norm": 2.9618945515091224, "learning_rate": 1.7185355704103767e-06, "loss": 0.891, "step": 3882 }, { "epoch": 0.017189782637566958, "grad_norm": 3.594944026532145, "learning_rate": 1.718978263756696e-06, "loss": 1.0064, "step": 3883 }, { "epoch": 0.01719420957103015, "grad_norm": 2.8498114545920172, "learning_rate": 1.7194209571030148e-06, "loss": 0.836, "step": 3884 }, { "epoch": 0.017198636504493336, "grad_norm": 2.4908609526411767, "learning_rate": 1.7198636504493337e-06, "loss": 0.8147, "step": 3885 }, { "epoch": 0.017203063437956526, "grad_norm": 3.125985813200785, "learning_rate": 1.7203063437956529e-06, "loss": 0.8804, "step": 3886 }, { "epoch": 0.017207490371419717, "grad_norm": 2.5006722996225985, "learning_rate": 1.7207490371419718e-06, "loss": 0.5341, "step": 3887 }, { "epoch": 0.017211917304882907, "grad_norm": 3.034565488261091, "learning_rate": 1.7211917304882908e-06, "loss": 0.8086, "step": 3888 }, { "epoch": 0.017216344238346098, "grad_norm": 3.149964580871383, "learning_rate": 1.72163442383461e-06, "loss": 0.9946, "step": 3889 }, { "epoch": 0.01722077117180929, "grad_norm": 3.7771358110729887, "learning_rate": 1.7220771171809289e-06, "loss": 1.2077, "step": 3890 }, { "epoch": 0.01722519810527248, "grad_norm": 2.9997045811904908, "learning_rate": 1.7225198105272478e-06, "loss": 0.5683, "step": 3891 }, { "epoch": 0.01722962503873567, "grad_norm": 3.0931007585111137, "learning_rate": 1.722962503873567e-06, "loss": 0.7557, "step": 3892 }, { "epoch": 0.017234051972198856, "grad_norm": 2.809029932255592, "learning_rate": 1.723405197219886e-06, "loss": 1.0137, "step": 3893 }, { "epoch": 0.017238478905662047, "grad_norm": 2.974115072904784, "learning_rate": 1.7238478905662049e-06, "loss": 0.7176, "step": 3894 }, { "epoch": 0.017242905839125237, "grad_norm": 2.629712687882522, "learning_rate": 1.724290583912524e-06, "loss": 0.6609, "step": 3895 }, { "epoch": 0.017247332772588428, "grad_norm": 2.8510887801155054, "learning_rate": 1.724733277258843e-06, "loss": 0.7605, "step": 3896 }, { "epoch": 0.01725175970605162, "grad_norm": 3.379913646515439, "learning_rate": 1.725175970605162e-06, "loss": 0.8572, "step": 3897 }, { "epoch": 0.01725618663951481, "grad_norm": 2.834722309034113, "learning_rate": 1.725618663951481e-06, "loss": 0.7114, "step": 3898 }, { "epoch": 0.017260613572978, "grad_norm": 2.6098398890967403, "learning_rate": 1.7260613572978e-06, "loss": 0.8633, "step": 3899 }, { "epoch": 0.017265040506441186, "grad_norm": 3.6013687456427435, "learning_rate": 1.7265040506441188e-06, "loss": 0.9693, "step": 3900 }, { "epoch": 0.017269467439904377, "grad_norm": 3.467403706911126, "learning_rate": 1.7269467439904381e-06, "loss": 1.1218, "step": 3901 }, { "epoch": 0.017273894373367567, "grad_norm": 3.4029750497239437, "learning_rate": 1.7273894373367569e-06, "loss": 1.29, "step": 3902 }, { "epoch": 0.017278321306830758, "grad_norm": 2.9807968593788843, "learning_rate": 1.7278321306830758e-06, "loss": 0.7729, "step": 3903 }, { "epoch": 0.01728274824029395, "grad_norm": 2.755827692709053, "learning_rate": 1.728274824029395e-06, "loss": 0.8205, "step": 3904 }, { "epoch": 0.01728717517375714, "grad_norm": 3.5806838968223644, "learning_rate": 1.728717517375714e-06, "loss": 0.8896, "step": 3905 }, { "epoch": 0.01729160210722033, "grad_norm": 3.082740971302052, "learning_rate": 1.7291602107220329e-06, "loss": 0.8395, "step": 3906 }, { "epoch": 0.01729602904068352, "grad_norm": 2.4075719748284556, "learning_rate": 1.729602904068352e-06, "loss": 0.6163, "step": 3907 }, { "epoch": 0.017300455974146707, "grad_norm": 2.93200159942866, "learning_rate": 1.730045597414671e-06, "loss": 0.9134, "step": 3908 }, { "epoch": 0.017304882907609898, "grad_norm": 2.50201953292444, "learning_rate": 1.73048829076099e-06, "loss": 0.5012, "step": 3909 }, { "epoch": 0.017309309841073088, "grad_norm": 3.6811083835779512, "learning_rate": 1.730930984107309e-06, "loss": 0.9845, "step": 3910 }, { "epoch": 0.01731373677453628, "grad_norm": 3.3870450442136137, "learning_rate": 1.731373677453628e-06, "loss": 1.0535, "step": 3911 }, { "epoch": 0.01731816370799947, "grad_norm": 2.6880264861922516, "learning_rate": 1.731816370799947e-06, "loss": 0.7487, "step": 3912 }, { "epoch": 0.01732259064146266, "grad_norm": 2.4138724152422593, "learning_rate": 1.7322590641462661e-06, "loss": 0.5549, "step": 3913 }, { "epoch": 0.01732701757492585, "grad_norm": 2.922301776024741, "learning_rate": 1.732701757492585e-06, "loss": 0.6038, "step": 3914 }, { "epoch": 0.01733144450838904, "grad_norm": 2.6487971438898876, "learning_rate": 1.7331444508389038e-06, "loss": 0.8386, "step": 3915 }, { "epoch": 0.017335871441852228, "grad_norm": 2.5599700418142928, "learning_rate": 1.7335871441852232e-06, "loss": 0.7352, "step": 3916 }, { "epoch": 0.017340298375315418, "grad_norm": 3.507613338274039, "learning_rate": 1.734029837531542e-06, "loss": 0.703, "step": 3917 }, { "epoch": 0.01734472530877861, "grad_norm": 3.2132372979909145, "learning_rate": 1.7344725308778609e-06, "loss": 1.0517, "step": 3918 }, { "epoch": 0.0173491522422418, "grad_norm": 3.610070164168388, "learning_rate": 1.73491522422418e-06, "loss": 0.6672, "step": 3919 }, { "epoch": 0.01735357917570499, "grad_norm": 2.7787896782560577, "learning_rate": 1.735357917570499e-06, "loss": 0.7382, "step": 3920 }, { "epoch": 0.01735800610916818, "grad_norm": 3.1351657224922156, "learning_rate": 1.735800610916818e-06, "loss": 0.6504, "step": 3921 }, { "epoch": 0.01736243304263137, "grad_norm": 2.9419577936510786, "learning_rate": 1.736243304263137e-06, "loss": 0.9383, "step": 3922 }, { "epoch": 0.017366859976094558, "grad_norm": 3.2018453736013885, "learning_rate": 1.736685997609456e-06, "loss": 1.056, "step": 3923 }, { "epoch": 0.017371286909557748, "grad_norm": 2.708416986581597, "learning_rate": 1.737128690955775e-06, "loss": 0.883, "step": 3924 }, { "epoch": 0.01737571384302094, "grad_norm": 2.6051258529333516, "learning_rate": 1.7375713843020941e-06, "loss": 0.7105, "step": 3925 }, { "epoch": 0.01738014077648413, "grad_norm": 2.73023865977049, "learning_rate": 1.738014077648413e-06, "loss": 0.7471, "step": 3926 }, { "epoch": 0.01738456770994732, "grad_norm": 2.690649913826841, "learning_rate": 1.738456770994732e-06, "loss": 0.7766, "step": 3927 }, { "epoch": 0.01738899464341051, "grad_norm": 2.5253687772834694, "learning_rate": 1.7388994643410512e-06, "loss": 0.7005, "step": 3928 }, { "epoch": 0.0173934215768737, "grad_norm": 2.506332154302534, "learning_rate": 1.7393421576873701e-06, "loss": 0.6912, "step": 3929 }, { "epoch": 0.01739784851033689, "grad_norm": 2.618018871671262, "learning_rate": 1.739784851033689e-06, "loss": 0.7303, "step": 3930 }, { "epoch": 0.01740227544380008, "grad_norm": 3.0348339671384275, "learning_rate": 1.7402275443800083e-06, "loss": 0.9802, "step": 3931 }, { "epoch": 0.01740670237726327, "grad_norm": 2.4660508107151653, "learning_rate": 1.7406702377263272e-06, "loss": 0.8376, "step": 3932 }, { "epoch": 0.01741112931072646, "grad_norm": 3.1013805778583223, "learning_rate": 1.741112931072646e-06, "loss": 0.624, "step": 3933 }, { "epoch": 0.01741555624418965, "grad_norm": 3.1302935452908343, "learning_rate": 1.741555624418965e-06, "loss": 1.1395, "step": 3934 }, { "epoch": 0.01741998317765284, "grad_norm": 2.8731819109691847, "learning_rate": 1.741998317765284e-06, "loss": 0.7373, "step": 3935 }, { "epoch": 0.01742441011111603, "grad_norm": 3.2464209712444827, "learning_rate": 1.742441011111603e-06, "loss": 1.0206, "step": 3936 }, { "epoch": 0.01742883704457922, "grad_norm": 3.0790491496178065, "learning_rate": 1.7428837044579221e-06, "loss": 1.0061, "step": 3937 }, { "epoch": 0.01743326397804241, "grad_norm": 2.756958600913359, "learning_rate": 1.743326397804241e-06, "loss": 0.909, "step": 3938 }, { "epoch": 0.0174376909115056, "grad_norm": 2.7289486004794665, "learning_rate": 1.74376909115056e-06, "loss": 0.9049, "step": 3939 }, { "epoch": 0.01744211784496879, "grad_norm": 2.576258683559734, "learning_rate": 1.7442117844968792e-06, "loss": 0.7345, "step": 3940 }, { "epoch": 0.01744654477843198, "grad_norm": 2.518754593972757, "learning_rate": 1.7446544778431981e-06, "loss": 0.6085, "step": 3941 }, { "epoch": 0.01745097171189517, "grad_norm": 2.697798430778784, "learning_rate": 1.745097171189517e-06, "loss": 0.8503, "step": 3942 }, { "epoch": 0.01745539864535836, "grad_norm": 2.6751072675599494, "learning_rate": 1.7455398645358363e-06, "loss": 0.7914, "step": 3943 }, { "epoch": 0.01745982557882155, "grad_norm": 2.589888568058722, "learning_rate": 1.7459825578821552e-06, "loss": 0.7, "step": 3944 }, { "epoch": 0.017464252512284742, "grad_norm": 2.7466814824607297, "learning_rate": 1.7464252512284741e-06, "loss": 0.9833, "step": 3945 }, { "epoch": 0.01746867944574793, "grad_norm": 2.7216125351196316, "learning_rate": 1.7468679445747933e-06, "loss": 0.7137, "step": 3946 }, { "epoch": 0.01747310637921112, "grad_norm": 2.8672364524916305, "learning_rate": 1.7473106379211123e-06, "loss": 0.8141, "step": 3947 }, { "epoch": 0.01747753331267431, "grad_norm": 3.6640263328427913, "learning_rate": 1.747753331267431e-06, "loss": 0.9206, "step": 3948 }, { "epoch": 0.0174819602461375, "grad_norm": 3.4820110513706517, "learning_rate": 1.7481960246137504e-06, "loss": 1.0201, "step": 3949 }, { "epoch": 0.01748638717960069, "grad_norm": 2.46120959785742, "learning_rate": 1.748638717960069e-06, "loss": 0.7794, "step": 3950 }, { "epoch": 0.01749081411306388, "grad_norm": 3.035742732332923, "learning_rate": 1.749081411306388e-06, "loss": 0.8355, "step": 3951 }, { "epoch": 0.017495241046527072, "grad_norm": 2.7211647224027478, "learning_rate": 1.7495241046527072e-06, "loss": 0.5832, "step": 3952 }, { "epoch": 0.01749966797999026, "grad_norm": 2.783839651582143, "learning_rate": 1.7499667979990261e-06, "loss": 0.6197, "step": 3953 }, { "epoch": 0.01750409491345345, "grad_norm": 2.584378299003127, "learning_rate": 1.750409491345345e-06, "loss": 0.6897, "step": 3954 }, { "epoch": 0.01750852184691664, "grad_norm": 2.841142167687989, "learning_rate": 1.7508521846916643e-06, "loss": 0.9971, "step": 3955 }, { "epoch": 0.01751294878037983, "grad_norm": 2.469349958970825, "learning_rate": 1.7512948780379832e-06, "loss": 0.8098, "step": 3956 }, { "epoch": 0.01751737571384302, "grad_norm": 2.925483688348765, "learning_rate": 1.7517375713843021e-06, "loss": 0.9294, "step": 3957 }, { "epoch": 0.01752180264730621, "grad_norm": 3.284392513165227, "learning_rate": 1.7521802647306213e-06, "loss": 1.1722, "step": 3958 }, { "epoch": 0.017526229580769402, "grad_norm": 3.0148076577355973, "learning_rate": 1.7526229580769403e-06, "loss": 0.7558, "step": 3959 }, { "epoch": 0.017530656514232593, "grad_norm": 4.661715322121891, "learning_rate": 1.7530656514232592e-06, "loss": 1.578, "step": 3960 }, { "epoch": 0.01753508344769578, "grad_norm": 3.5807501200219916, "learning_rate": 1.7535083447695784e-06, "loss": 1.1456, "step": 3961 }, { "epoch": 0.01753951038115897, "grad_norm": 3.2637934804674504, "learning_rate": 1.7539510381158973e-06, "loss": 0.7484, "step": 3962 }, { "epoch": 0.01754393731462216, "grad_norm": 2.590379466977558, "learning_rate": 1.754393731462216e-06, "loss": 0.5054, "step": 3963 }, { "epoch": 0.01754836424808535, "grad_norm": 2.434719646444467, "learning_rate": 1.7548364248085354e-06, "loss": 0.5643, "step": 3964 }, { "epoch": 0.01755279118154854, "grad_norm": 2.8170342702023157, "learning_rate": 1.7552791181548541e-06, "loss": 0.774, "step": 3965 }, { "epoch": 0.017557218115011732, "grad_norm": 2.9634665749508735, "learning_rate": 1.755721811501173e-06, "loss": 0.7916, "step": 3966 }, { "epoch": 0.017561645048474923, "grad_norm": 2.4444807846250947, "learning_rate": 1.7561645048474923e-06, "loss": 0.6216, "step": 3967 }, { "epoch": 0.01756607198193811, "grad_norm": 2.766998893345153, "learning_rate": 1.7566071981938112e-06, "loss": 0.8945, "step": 3968 }, { "epoch": 0.0175704989154013, "grad_norm": 3.4815166425709556, "learning_rate": 1.7570498915401301e-06, "loss": 1.0326, "step": 3969 }, { "epoch": 0.01757492584886449, "grad_norm": 3.1158034292386865, "learning_rate": 1.7574925848864493e-06, "loss": 0.7426, "step": 3970 }, { "epoch": 0.01757935278232768, "grad_norm": 2.726893723561289, "learning_rate": 1.7579352782327683e-06, "loss": 0.6298, "step": 3971 }, { "epoch": 0.017583779715790872, "grad_norm": 2.38420392886664, "learning_rate": 1.7583779715790872e-06, "loss": 0.8418, "step": 3972 }, { "epoch": 0.017588206649254062, "grad_norm": 2.8295381379946156, "learning_rate": 1.7588206649254064e-06, "loss": 0.7569, "step": 3973 }, { "epoch": 0.017592633582717253, "grad_norm": 2.4212143741402685, "learning_rate": 1.7592633582717253e-06, "loss": 0.596, "step": 3974 }, { "epoch": 0.017597060516180443, "grad_norm": 4.068629763622809, "learning_rate": 1.7597060516180443e-06, "loss": 0.9854, "step": 3975 }, { "epoch": 0.01760148744964363, "grad_norm": 3.4957469070734013, "learning_rate": 1.7601487449643634e-06, "loss": 0.8093, "step": 3976 }, { "epoch": 0.01760591438310682, "grad_norm": 2.68385395312238, "learning_rate": 1.7605914383106824e-06, "loss": 0.7522, "step": 3977 }, { "epoch": 0.01761034131657001, "grad_norm": 4.761374733623879, "learning_rate": 1.7610341316570013e-06, "loss": 0.8877, "step": 3978 }, { "epoch": 0.017614768250033202, "grad_norm": 2.6031610227702604, "learning_rate": 1.7614768250033205e-06, "loss": 0.7537, "step": 3979 }, { "epoch": 0.017619195183496392, "grad_norm": 3.5312321941902414, "learning_rate": 1.7619195183496394e-06, "loss": 1.2687, "step": 3980 }, { "epoch": 0.017623622116959583, "grad_norm": 2.753424055391606, "learning_rate": 1.7623622116959581e-06, "loss": 0.6892, "step": 3981 }, { "epoch": 0.017628049050422773, "grad_norm": 2.795127868978786, "learning_rate": 1.7628049050422775e-06, "loss": 0.6021, "step": 3982 }, { "epoch": 0.017632475983885964, "grad_norm": 2.5401494009083994, "learning_rate": 1.7632475983885963e-06, "loss": 0.5863, "step": 3983 }, { "epoch": 0.01763690291734915, "grad_norm": 2.3785114277896056, "learning_rate": 1.7636902917349152e-06, "loss": 0.5284, "step": 3984 }, { "epoch": 0.01764132985081234, "grad_norm": 3.7173895408438042, "learning_rate": 1.7641329850812344e-06, "loss": 0.7162, "step": 3985 }, { "epoch": 0.017645756784275532, "grad_norm": 4.243106417036904, "learning_rate": 1.7645756784275533e-06, "loss": 1.1805, "step": 3986 }, { "epoch": 0.017650183717738722, "grad_norm": 2.9223819174595875, "learning_rate": 1.7650183717738723e-06, "loss": 0.8931, "step": 3987 }, { "epoch": 0.017654610651201913, "grad_norm": 2.8644797396566153, "learning_rate": 1.7654610651201914e-06, "loss": 0.7222, "step": 3988 }, { "epoch": 0.017659037584665103, "grad_norm": 2.634489596695874, "learning_rate": 1.7659037584665104e-06, "loss": 0.6898, "step": 3989 }, { "epoch": 0.017663464518128294, "grad_norm": 3.120137912441874, "learning_rate": 1.7663464518128293e-06, "loss": 0.6927, "step": 3990 }, { "epoch": 0.01766789145159148, "grad_norm": 3.280442477068643, "learning_rate": 1.7667891451591485e-06, "loss": 0.7264, "step": 3991 }, { "epoch": 0.01767231838505467, "grad_norm": 2.614056708981012, "learning_rate": 1.7672318385054674e-06, "loss": 0.7779, "step": 3992 }, { "epoch": 0.017676745318517862, "grad_norm": 4.024261563293432, "learning_rate": 1.7676745318517864e-06, "loss": 0.9604, "step": 3993 }, { "epoch": 0.017681172251981053, "grad_norm": 2.918886025120752, "learning_rate": 1.7681172251981055e-06, "loss": 0.8667, "step": 3994 }, { "epoch": 0.017685599185444243, "grad_norm": 2.3679398891391523, "learning_rate": 1.7685599185444245e-06, "loss": 0.5837, "step": 3995 }, { "epoch": 0.017690026118907434, "grad_norm": 2.8922405721211515, "learning_rate": 1.7690026118907432e-06, "loss": 0.8544, "step": 3996 }, { "epoch": 0.017694453052370624, "grad_norm": 3.5991711140996565, "learning_rate": 1.7694453052370626e-06, "loss": 0.9018, "step": 3997 }, { "epoch": 0.017698879985833815, "grad_norm": 3.0238009346674866, "learning_rate": 1.7698879985833813e-06, "loss": 1.0014, "step": 3998 }, { "epoch": 0.017703306919297, "grad_norm": 3.5565245804603367, "learning_rate": 1.7703306919297003e-06, "loss": 0.8289, "step": 3999 }, { "epoch": 0.017707733852760192, "grad_norm": 2.8861050946308424, "learning_rate": 1.7707733852760194e-06, "loss": 0.6483, "step": 4000 }, { "epoch": 0.017712160786223383, "grad_norm": 3.1896724689145755, "learning_rate": 1.7712160786223384e-06, "loss": 0.9941, "step": 4001 }, { "epoch": 0.017716587719686573, "grad_norm": 3.272355943823857, "learning_rate": 1.7716587719686573e-06, "loss": 0.965, "step": 4002 }, { "epoch": 0.017721014653149764, "grad_norm": 2.9693155384933267, "learning_rate": 1.7721014653149765e-06, "loss": 0.7809, "step": 4003 }, { "epoch": 0.017725441586612954, "grad_norm": 3.428516730290199, "learning_rate": 1.7725441586612954e-06, "loss": 0.9603, "step": 4004 }, { "epoch": 0.017729868520076145, "grad_norm": 3.5880136719148172, "learning_rate": 1.7729868520076144e-06, "loss": 0.9412, "step": 4005 }, { "epoch": 0.01773429545353933, "grad_norm": 2.648074238701115, "learning_rate": 1.7734295453539335e-06, "loss": 0.6827, "step": 4006 }, { "epoch": 0.017738722387002522, "grad_norm": 2.6866215993067653, "learning_rate": 1.7738722387002525e-06, "loss": 0.9766, "step": 4007 }, { "epoch": 0.017743149320465713, "grad_norm": 2.6134773714092017, "learning_rate": 1.7743149320465714e-06, "loss": 0.6595, "step": 4008 }, { "epoch": 0.017747576253928903, "grad_norm": 3.9312377985677416, "learning_rate": 1.7747576253928906e-06, "loss": 1.1874, "step": 4009 }, { "epoch": 0.017752003187392094, "grad_norm": 3.2869478363522786, "learning_rate": 1.7752003187392095e-06, "loss": 1.0938, "step": 4010 }, { "epoch": 0.017756430120855284, "grad_norm": 2.9221561524727613, "learning_rate": 1.7756430120855283e-06, "loss": 0.8774, "step": 4011 }, { "epoch": 0.017760857054318475, "grad_norm": 3.0240665556854145, "learning_rate": 1.7760857054318476e-06, "loss": 0.7723, "step": 4012 }, { "epoch": 0.017765283987781665, "grad_norm": 3.05063251731821, "learning_rate": 1.7765283987781664e-06, "loss": 0.8808, "step": 4013 }, { "epoch": 0.017769710921244852, "grad_norm": 3.2535878852271, "learning_rate": 1.7769710921244857e-06, "loss": 0.9601, "step": 4014 }, { "epoch": 0.017774137854708043, "grad_norm": 3.558284411629979, "learning_rate": 1.7774137854708045e-06, "loss": 0.432, "step": 4015 }, { "epoch": 0.017778564788171233, "grad_norm": 2.7673037979341903, "learning_rate": 1.7778564788171234e-06, "loss": 0.8359, "step": 4016 }, { "epoch": 0.017782991721634424, "grad_norm": 3.0127364539544046, "learning_rate": 1.7782991721634426e-06, "loss": 0.7322, "step": 4017 }, { "epoch": 0.017787418655097614, "grad_norm": 3.5138411405902334, "learning_rate": 1.7787418655097615e-06, "loss": 1.0909, "step": 4018 }, { "epoch": 0.017791845588560805, "grad_norm": 3.5449946166849915, "learning_rate": 1.7791845588560805e-06, "loss": 0.9515, "step": 4019 }, { "epoch": 0.017796272522023995, "grad_norm": 3.074077049473447, "learning_rate": 1.7796272522023996e-06, "loss": 0.8896, "step": 4020 }, { "epoch": 0.017800699455487182, "grad_norm": 3.399236776513104, "learning_rate": 1.7800699455487186e-06, "loss": 1.2859, "step": 4021 }, { "epoch": 0.017805126388950373, "grad_norm": 2.6815427393838513, "learning_rate": 1.7805126388950375e-06, "loss": 0.7094, "step": 4022 }, { "epoch": 0.017809553322413563, "grad_norm": 3.352556907108384, "learning_rate": 1.7809553322413567e-06, "loss": 0.8528, "step": 4023 }, { "epoch": 0.017813980255876754, "grad_norm": 3.7242011506419006, "learning_rate": 1.7813980255876756e-06, "loss": 0.8196, "step": 4024 }, { "epoch": 0.017818407189339944, "grad_norm": 4.131213213799301, "learning_rate": 1.7818407189339946e-06, "loss": 1.1267, "step": 4025 }, { "epoch": 0.017822834122803135, "grad_norm": 2.900415831084947, "learning_rate": 1.7822834122803137e-06, "loss": 0.5817, "step": 4026 }, { "epoch": 0.017827261056266325, "grad_norm": 2.830927245511024, "learning_rate": 1.7827261056266327e-06, "loss": 0.8076, "step": 4027 }, { "epoch": 0.017831687989729516, "grad_norm": 4.159282445445757, "learning_rate": 1.7831687989729516e-06, "loss": 1.0404, "step": 4028 }, { "epoch": 0.017836114923192703, "grad_norm": 2.769436016378074, "learning_rate": 1.7836114923192708e-06, "loss": 0.798, "step": 4029 }, { "epoch": 0.017840541856655893, "grad_norm": 3.171041371093006, "learning_rate": 1.7840541856655897e-06, "loss": 1.0734, "step": 4030 }, { "epoch": 0.017844968790119084, "grad_norm": 3.1516561299784702, "learning_rate": 1.7844968790119085e-06, "loss": 0.8267, "step": 4031 }, { "epoch": 0.017849395723582274, "grad_norm": 2.608671231770159, "learning_rate": 1.7849395723582278e-06, "loss": 0.6102, "step": 4032 }, { "epoch": 0.017853822657045465, "grad_norm": 2.8540839693354765, "learning_rate": 1.7853822657045466e-06, "loss": 0.5992, "step": 4033 }, { "epoch": 0.017858249590508656, "grad_norm": 3.2402916909401336, "learning_rate": 1.7858249590508655e-06, "loss": 0.7702, "step": 4034 }, { "epoch": 0.017862676523971846, "grad_norm": 2.834278412856642, "learning_rate": 1.7862676523971847e-06, "loss": 0.5351, "step": 4035 }, { "epoch": 0.017867103457435033, "grad_norm": 2.5656945896164443, "learning_rate": 1.7867103457435036e-06, "loss": 0.8389, "step": 4036 }, { "epoch": 0.017871530390898224, "grad_norm": 2.996604326742681, "learning_rate": 1.7871530390898226e-06, "loss": 0.6527, "step": 4037 }, { "epoch": 0.017875957324361414, "grad_norm": 3.062787277445687, "learning_rate": 1.7875957324361417e-06, "loss": 0.6268, "step": 4038 }, { "epoch": 0.017880384257824605, "grad_norm": 3.766932864320317, "learning_rate": 1.7880384257824607e-06, "loss": 0.6667, "step": 4039 }, { "epoch": 0.017884811191287795, "grad_norm": 2.742832724848574, "learning_rate": 1.7884811191287796e-06, "loss": 0.7236, "step": 4040 }, { "epoch": 0.017889238124750986, "grad_norm": 2.88685523299303, "learning_rate": 1.7889238124750988e-06, "loss": 0.8543, "step": 4041 }, { "epoch": 0.017893665058214176, "grad_norm": 2.7741949117391402, "learning_rate": 1.7893665058214177e-06, "loss": 0.9559, "step": 4042 }, { "epoch": 0.017898091991677367, "grad_norm": 2.9366053023318752, "learning_rate": 1.7898091991677367e-06, "loss": 0.7756, "step": 4043 }, { "epoch": 0.017902518925140554, "grad_norm": 3.0752322488744213, "learning_rate": 1.7902518925140558e-06, "loss": 0.8453, "step": 4044 }, { "epoch": 0.017906945858603744, "grad_norm": 3.1976903491786257, "learning_rate": 1.7906945858603748e-06, "loss": 0.6721, "step": 4045 }, { "epoch": 0.017911372792066935, "grad_norm": 3.8282434339611173, "learning_rate": 1.7911372792066935e-06, "loss": 0.9847, "step": 4046 }, { "epoch": 0.017915799725530125, "grad_norm": 4.561962099799048, "learning_rate": 1.7915799725530129e-06, "loss": 0.9545, "step": 4047 }, { "epoch": 0.017920226658993316, "grad_norm": 3.7308187967437654, "learning_rate": 1.7920226658993316e-06, "loss": 1.083, "step": 4048 }, { "epoch": 0.017924653592456506, "grad_norm": 2.7826788368394153, "learning_rate": 1.7924653592456506e-06, "loss": 0.8544, "step": 4049 }, { "epoch": 0.017929080525919697, "grad_norm": 3.120613180392675, "learning_rate": 1.7929080525919697e-06, "loss": 1.0671, "step": 4050 }, { "epoch": 0.017933507459382887, "grad_norm": 3.5118244486347767, "learning_rate": 1.7933507459382887e-06, "loss": 1.1191, "step": 4051 }, { "epoch": 0.017937934392846074, "grad_norm": 2.9340386138105656, "learning_rate": 1.7937934392846076e-06, "loss": 0.9427, "step": 4052 }, { "epoch": 0.017942361326309265, "grad_norm": 3.4098546849675375, "learning_rate": 1.7942361326309268e-06, "loss": 0.9691, "step": 4053 }, { "epoch": 0.017946788259772455, "grad_norm": 2.8673186517604305, "learning_rate": 1.7946788259772457e-06, "loss": 0.8202, "step": 4054 }, { "epoch": 0.017951215193235646, "grad_norm": 3.0576556442128178, "learning_rate": 1.7951215193235647e-06, "loss": 0.6337, "step": 4055 }, { "epoch": 0.017955642126698836, "grad_norm": 4.027860116904567, "learning_rate": 1.7955642126698838e-06, "loss": 1.1501, "step": 4056 }, { "epoch": 0.017960069060162027, "grad_norm": 3.600020221150418, "learning_rate": 1.7960069060162028e-06, "loss": 0.7982, "step": 4057 }, { "epoch": 0.017964495993625217, "grad_norm": 3.8583115889419908, "learning_rate": 1.7964495993625217e-06, "loss": 0.9656, "step": 4058 }, { "epoch": 0.017968922927088404, "grad_norm": 2.984419405897531, "learning_rate": 1.7968922927088409e-06, "loss": 0.5667, "step": 4059 }, { "epoch": 0.017973349860551595, "grad_norm": 2.720627250024041, "learning_rate": 1.7973349860551598e-06, "loss": 0.674, "step": 4060 }, { "epoch": 0.017977776794014785, "grad_norm": 3.7847713832501504, "learning_rate": 1.7977776794014788e-06, "loss": 0.8555, "step": 4061 }, { "epoch": 0.017982203727477976, "grad_norm": 2.79084018452243, "learning_rate": 1.798220372747798e-06, "loss": 0.9252, "step": 4062 }, { "epoch": 0.017986630660941166, "grad_norm": 2.4078582577771996, "learning_rate": 1.7986630660941167e-06, "loss": 0.6707, "step": 4063 }, { "epoch": 0.017991057594404357, "grad_norm": 3.6622181250849386, "learning_rate": 1.7991057594404356e-06, "loss": 1.0035, "step": 4064 }, { "epoch": 0.017995484527867547, "grad_norm": 3.24177702355569, "learning_rate": 1.7995484527867548e-06, "loss": 0.5717, "step": 4065 }, { "epoch": 0.017999911461330738, "grad_norm": 2.6921506895014353, "learning_rate": 1.7999911461330737e-06, "loss": 0.6671, "step": 4066 }, { "epoch": 0.018004338394793925, "grad_norm": 3.264222940855457, "learning_rate": 1.8004338394793927e-06, "loss": 1.0859, "step": 4067 }, { "epoch": 0.018008765328257115, "grad_norm": 3.7946829521006475, "learning_rate": 1.8008765328257118e-06, "loss": 1.1205, "step": 4068 }, { "epoch": 0.018013192261720306, "grad_norm": 2.8299235115358226, "learning_rate": 1.8013192261720308e-06, "loss": 0.997, "step": 4069 }, { "epoch": 0.018017619195183496, "grad_norm": 3.289547040964947, "learning_rate": 1.8017619195183497e-06, "loss": 0.9153, "step": 4070 }, { "epoch": 0.018022046128646687, "grad_norm": 3.1669754618685335, "learning_rate": 1.8022046128646689e-06, "loss": 0.8698, "step": 4071 }, { "epoch": 0.018026473062109877, "grad_norm": 3.1612062509675374, "learning_rate": 1.8026473062109878e-06, "loss": 0.8026, "step": 4072 }, { "epoch": 0.018030899995573068, "grad_norm": 2.591358673721889, "learning_rate": 1.8030899995573068e-06, "loss": 0.8693, "step": 4073 }, { "epoch": 0.018035326929036255, "grad_norm": 2.722934744679768, "learning_rate": 1.803532692903626e-06, "loss": 0.7761, "step": 4074 }, { "epoch": 0.018039753862499446, "grad_norm": 3.4124088660462086, "learning_rate": 1.8039753862499449e-06, "loss": 0.987, "step": 4075 }, { "epoch": 0.018044180795962636, "grad_norm": 2.6955290340366678, "learning_rate": 1.8044180795962638e-06, "loss": 0.6591, "step": 4076 }, { "epoch": 0.018048607729425827, "grad_norm": 3.1555570011939147, "learning_rate": 1.804860772942583e-06, "loss": 1.0323, "step": 4077 }, { "epoch": 0.018053034662889017, "grad_norm": 2.7457228936048614, "learning_rate": 1.805303466288902e-06, "loss": 0.8304, "step": 4078 }, { "epoch": 0.018057461596352208, "grad_norm": 2.9048400865397848, "learning_rate": 1.8057461596352207e-06, "loss": 0.8034, "step": 4079 }, { "epoch": 0.018061888529815398, "grad_norm": 2.7825401870789124, "learning_rate": 1.80618885298154e-06, "loss": 0.8482, "step": 4080 }, { "epoch": 0.01806631546327859, "grad_norm": 2.664680096407671, "learning_rate": 1.8066315463278588e-06, "loss": 0.6638, "step": 4081 }, { "epoch": 0.018070742396741776, "grad_norm": 2.5872718129296373, "learning_rate": 1.8070742396741777e-06, "loss": 0.7666, "step": 4082 }, { "epoch": 0.018075169330204966, "grad_norm": 3.004719903113049, "learning_rate": 1.8075169330204969e-06, "loss": 0.5229, "step": 4083 }, { "epoch": 0.018079596263668157, "grad_norm": 3.3901311582471183, "learning_rate": 1.8079596263668158e-06, "loss": 0.6433, "step": 4084 }, { "epoch": 0.018084023197131347, "grad_norm": 2.8581374879470705, "learning_rate": 1.8084023197131348e-06, "loss": 1.0725, "step": 4085 }, { "epoch": 0.018088450130594538, "grad_norm": 2.6797077991894147, "learning_rate": 1.808845013059454e-06, "loss": 0.7634, "step": 4086 }, { "epoch": 0.018092877064057728, "grad_norm": 3.64991257224687, "learning_rate": 1.8092877064057729e-06, "loss": 0.9418, "step": 4087 }, { "epoch": 0.01809730399752092, "grad_norm": 3.484466320174413, "learning_rate": 1.8097303997520918e-06, "loss": 0.8368, "step": 4088 }, { "epoch": 0.018101730930984106, "grad_norm": 2.884548761592113, "learning_rate": 1.810173093098411e-06, "loss": 0.8237, "step": 4089 }, { "epoch": 0.018106157864447296, "grad_norm": 2.6398719979572745, "learning_rate": 1.81061578644473e-06, "loss": 0.5708, "step": 4090 }, { "epoch": 0.018110584797910487, "grad_norm": 2.7860142951213396, "learning_rate": 1.811058479791049e-06, "loss": 0.7214, "step": 4091 }, { "epoch": 0.018115011731373677, "grad_norm": 2.4248313061760873, "learning_rate": 1.811501173137368e-06, "loss": 0.8777, "step": 4092 }, { "epoch": 0.018119438664836868, "grad_norm": 2.5758663324649618, "learning_rate": 1.811943866483687e-06, "loss": 0.6211, "step": 4093 }, { "epoch": 0.018123865598300058, "grad_norm": 3.136968604704979, "learning_rate": 1.8123865598300057e-06, "loss": 0.6549, "step": 4094 }, { "epoch": 0.01812829253176325, "grad_norm": 2.5641361983674846, "learning_rate": 1.8128292531763251e-06, "loss": 0.5878, "step": 4095 }, { "epoch": 0.01813271946522644, "grad_norm": 2.800831211200282, "learning_rate": 1.8132719465226438e-06, "loss": 0.8535, "step": 4096 }, { "epoch": 0.018137146398689626, "grad_norm": 3.015775801414551, "learning_rate": 1.8137146398689628e-06, "loss": 0.7808, "step": 4097 }, { "epoch": 0.018141573332152817, "grad_norm": 3.4941016542030123, "learning_rate": 1.814157333215282e-06, "loss": 1.077, "step": 4098 }, { "epoch": 0.018146000265616007, "grad_norm": 2.8390294448633604, "learning_rate": 1.814600026561601e-06, "loss": 1.1147, "step": 4099 }, { "epoch": 0.018150427199079198, "grad_norm": 3.4935287757573272, "learning_rate": 1.8150427199079198e-06, "loss": 0.7621, "step": 4100 }, { "epoch": 0.01815485413254239, "grad_norm": 3.1101502782301194, "learning_rate": 1.815485413254239e-06, "loss": 1.1263, "step": 4101 }, { "epoch": 0.01815928106600558, "grad_norm": 3.2795455636766926, "learning_rate": 1.815928106600558e-06, "loss": 0.9401, "step": 4102 }, { "epoch": 0.01816370799946877, "grad_norm": 2.7424540053046114, "learning_rate": 1.816370799946877e-06, "loss": 0.7648, "step": 4103 }, { "epoch": 0.018168134932931956, "grad_norm": 2.7818229410731643, "learning_rate": 1.816813493293196e-06, "loss": 0.7588, "step": 4104 }, { "epoch": 0.018172561866395147, "grad_norm": 3.1430900486845466, "learning_rate": 1.817256186639515e-06, "loss": 1.1131, "step": 4105 }, { "epoch": 0.018176988799858337, "grad_norm": 3.1344781257506558, "learning_rate": 1.817698879985834e-06, "loss": 0.8235, "step": 4106 }, { "epoch": 0.018181415733321528, "grad_norm": 2.8208847670413126, "learning_rate": 1.8181415733321531e-06, "loss": 0.9462, "step": 4107 }, { "epoch": 0.01818584266678472, "grad_norm": 2.7348208612701135, "learning_rate": 1.818584266678472e-06, "loss": 0.9475, "step": 4108 }, { "epoch": 0.01819026960024791, "grad_norm": 3.8096737232422977, "learning_rate": 1.819026960024791e-06, "loss": 0.6296, "step": 4109 }, { "epoch": 0.0181946965337111, "grad_norm": 2.5423787025700335, "learning_rate": 1.8194696533711102e-06, "loss": 0.6636, "step": 4110 }, { "epoch": 0.01819912346717429, "grad_norm": 2.805810806971552, "learning_rate": 1.8199123467174291e-06, "loss": 0.9201, "step": 4111 }, { "epoch": 0.018203550400637477, "grad_norm": 2.6900888212708054, "learning_rate": 1.8203550400637478e-06, "loss": 0.5947, "step": 4112 }, { "epoch": 0.018207977334100667, "grad_norm": 2.699904182540002, "learning_rate": 1.820797733410067e-06, "loss": 0.6648, "step": 4113 }, { "epoch": 0.018212404267563858, "grad_norm": 3.0145138081539744, "learning_rate": 1.821240426756386e-06, "loss": 0.8116, "step": 4114 }, { "epoch": 0.01821683120102705, "grad_norm": 3.1933511126516128, "learning_rate": 1.821683120102705e-06, "loss": 0.7993, "step": 4115 }, { "epoch": 0.01822125813449024, "grad_norm": 3.1062207028286246, "learning_rate": 1.822125813449024e-06, "loss": 0.9346, "step": 4116 }, { "epoch": 0.01822568506795343, "grad_norm": 3.037587022916365, "learning_rate": 1.822568506795343e-06, "loss": 0.5883, "step": 4117 }, { "epoch": 0.01823011200141662, "grad_norm": 3.5145029872728513, "learning_rate": 1.823011200141662e-06, "loss": 0.864, "step": 4118 }, { "epoch": 0.018234538934879807, "grad_norm": 2.710916521215645, "learning_rate": 1.8234538934879811e-06, "loss": 0.641, "step": 4119 }, { "epoch": 0.018238965868342998, "grad_norm": 2.913051885377074, "learning_rate": 1.8238965868343e-06, "loss": 0.7893, "step": 4120 }, { "epoch": 0.018243392801806188, "grad_norm": 3.3335924304517373, "learning_rate": 1.824339280180619e-06, "loss": 1.0569, "step": 4121 }, { "epoch": 0.01824781973526938, "grad_norm": 2.883042028253609, "learning_rate": 1.8247819735269382e-06, "loss": 0.6526, "step": 4122 }, { "epoch": 0.01825224666873257, "grad_norm": 3.2532008458385917, "learning_rate": 1.8252246668732571e-06, "loss": 0.5323, "step": 4123 }, { "epoch": 0.01825667360219576, "grad_norm": 3.3973588352273953, "learning_rate": 1.825667360219576e-06, "loss": 1.0334, "step": 4124 }, { "epoch": 0.01826110053565895, "grad_norm": 3.1163069357754924, "learning_rate": 1.8261100535658952e-06, "loss": 0.6024, "step": 4125 }, { "epoch": 0.01826552746912214, "grad_norm": 2.513910432119277, "learning_rate": 1.8265527469122142e-06, "loss": 0.649, "step": 4126 }, { "epoch": 0.018269954402585328, "grad_norm": 2.769275435510727, "learning_rate": 1.826995440258533e-06, "loss": 0.6233, "step": 4127 }, { "epoch": 0.018274381336048518, "grad_norm": 3.987149848453916, "learning_rate": 1.8274381336048523e-06, "loss": 1.4641, "step": 4128 }, { "epoch": 0.01827880826951171, "grad_norm": 2.506030679526138, "learning_rate": 1.827880826951171e-06, "loss": 0.6068, "step": 4129 }, { "epoch": 0.0182832352029749, "grad_norm": 2.6428239282845265, "learning_rate": 1.82832352029749e-06, "loss": 0.637, "step": 4130 }, { "epoch": 0.01828766213643809, "grad_norm": 3.1761044769789306, "learning_rate": 1.8287662136438091e-06, "loss": 1.0028, "step": 4131 }, { "epoch": 0.01829208906990128, "grad_norm": 2.6300518916825752, "learning_rate": 1.829208906990128e-06, "loss": 0.5933, "step": 4132 }, { "epoch": 0.01829651600336447, "grad_norm": 3.8669115615998604, "learning_rate": 1.829651600336447e-06, "loss": 0.7467, "step": 4133 }, { "epoch": 0.01830094293682766, "grad_norm": 2.599908030336568, "learning_rate": 1.8300942936827662e-06, "loss": 0.7858, "step": 4134 }, { "epoch": 0.018305369870290848, "grad_norm": 2.860047980285959, "learning_rate": 1.8305369870290851e-06, "loss": 1.312, "step": 4135 }, { "epoch": 0.01830979680375404, "grad_norm": 2.9196980037476283, "learning_rate": 1.830979680375404e-06, "loss": 0.9645, "step": 4136 }, { "epoch": 0.01831422373721723, "grad_norm": 3.185648215213407, "learning_rate": 1.8314223737217232e-06, "loss": 0.8517, "step": 4137 }, { "epoch": 0.01831865067068042, "grad_norm": 3.012525142126389, "learning_rate": 1.8318650670680422e-06, "loss": 0.7464, "step": 4138 }, { "epoch": 0.01832307760414361, "grad_norm": 2.7763587870155537, "learning_rate": 1.8323077604143611e-06, "loss": 0.8982, "step": 4139 }, { "epoch": 0.0183275045376068, "grad_norm": 2.5638936552392595, "learning_rate": 1.8327504537606803e-06, "loss": 0.7015, "step": 4140 }, { "epoch": 0.01833193147106999, "grad_norm": 2.966172838414199, "learning_rate": 1.8331931471069992e-06, "loss": 0.751, "step": 4141 }, { "epoch": 0.01833635840453318, "grad_norm": 3.103842412662737, "learning_rate": 1.833635840453318e-06, "loss": 0.8696, "step": 4142 }, { "epoch": 0.01834078533799637, "grad_norm": 2.7681797619012203, "learning_rate": 1.8340785337996373e-06, "loss": 0.8166, "step": 4143 }, { "epoch": 0.01834521227145956, "grad_norm": 3.0071443883339635, "learning_rate": 1.834521227145956e-06, "loss": 1.1107, "step": 4144 }, { "epoch": 0.01834963920492275, "grad_norm": 2.5351086372681833, "learning_rate": 1.834963920492275e-06, "loss": 0.694, "step": 4145 }, { "epoch": 0.01835406613838594, "grad_norm": 2.589867478281244, "learning_rate": 1.8354066138385942e-06, "loss": 0.5881, "step": 4146 }, { "epoch": 0.01835849307184913, "grad_norm": 2.978341152749452, "learning_rate": 1.8358493071849131e-06, "loss": 0.8418, "step": 4147 }, { "epoch": 0.01836292000531232, "grad_norm": 3.324121530346008, "learning_rate": 1.836292000531232e-06, "loss": 0.8357, "step": 4148 }, { "epoch": 0.018367346938775512, "grad_norm": 2.561744703672665, "learning_rate": 1.8367346938775512e-06, "loss": 0.735, "step": 4149 }, { "epoch": 0.0183717738722387, "grad_norm": 3.1128554767106036, "learning_rate": 1.8371773872238702e-06, "loss": 0.4434, "step": 4150 }, { "epoch": 0.01837620080570189, "grad_norm": 2.884009982622305, "learning_rate": 1.8376200805701891e-06, "loss": 0.7637, "step": 4151 }, { "epoch": 0.01838062773916508, "grad_norm": 3.1801997027815845, "learning_rate": 1.8380627739165083e-06, "loss": 1.17, "step": 4152 }, { "epoch": 0.01838505467262827, "grad_norm": 3.4933194900762157, "learning_rate": 1.8385054672628272e-06, "loss": 0.9561, "step": 4153 }, { "epoch": 0.01838948160609146, "grad_norm": 3.0919967153898087, "learning_rate": 1.8389481606091462e-06, "loss": 0.7657, "step": 4154 }, { "epoch": 0.01839390853955465, "grad_norm": 2.981661508376456, "learning_rate": 1.8393908539554653e-06, "loss": 0.7032, "step": 4155 }, { "epoch": 0.018398335473017842, "grad_norm": 3.023829961170357, "learning_rate": 1.8398335473017843e-06, "loss": 0.9251, "step": 4156 }, { "epoch": 0.01840276240648103, "grad_norm": 2.3215649352593393, "learning_rate": 1.8402762406481032e-06, "loss": 0.6341, "step": 4157 }, { "epoch": 0.01840718933994422, "grad_norm": 3.610069219040919, "learning_rate": 1.8407189339944224e-06, "loss": 1.1911, "step": 4158 }, { "epoch": 0.01841161627340741, "grad_norm": 2.7494230535756174, "learning_rate": 1.8411616273407413e-06, "loss": 0.64, "step": 4159 }, { "epoch": 0.0184160432068706, "grad_norm": 3.104638003673055, "learning_rate": 1.84160432068706e-06, "loss": 0.9051, "step": 4160 }, { "epoch": 0.01842047014033379, "grad_norm": 2.3907771901006276, "learning_rate": 1.8420470140333794e-06, "loss": 0.7196, "step": 4161 }, { "epoch": 0.01842489707379698, "grad_norm": 2.6396249657454476, "learning_rate": 1.8424897073796982e-06, "loss": 0.7789, "step": 4162 }, { "epoch": 0.018429324007260172, "grad_norm": 2.802586671205745, "learning_rate": 1.8429324007260171e-06, "loss": 0.9482, "step": 4163 }, { "epoch": 0.018433750940723363, "grad_norm": 3.174794371026004, "learning_rate": 1.8433750940723363e-06, "loss": 1.0508, "step": 4164 }, { "epoch": 0.01843817787418655, "grad_norm": 2.788857414303914, "learning_rate": 1.8438177874186552e-06, "loss": 0.838, "step": 4165 }, { "epoch": 0.01844260480764974, "grad_norm": 2.6627083459314225, "learning_rate": 1.8442604807649742e-06, "loss": 0.8358, "step": 4166 }, { "epoch": 0.01844703174111293, "grad_norm": 3.4167995034878365, "learning_rate": 1.8447031741112933e-06, "loss": 0.6063, "step": 4167 }, { "epoch": 0.01845145867457612, "grad_norm": 3.2829200506758665, "learning_rate": 1.8451458674576123e-06, "loss": 0.8847, "step": 4168 }, { "epoch": 0.01845588560803931, "grad_norm": 3.7776552431661687, "learning_rate": 1.8455885608039312e-06, "loss": 0.8483, "step": 4169 }, { "epoch": 0.018460312541502502, "grad_norm": 3.815908816645149, "learning_rate": 1.8460312541502504e-06, "loss": 1.1679, "step": 4170 }, { "epoch": 0.018464739474965693, "grad_norm": 3.646046141432344, "learning_rate": 1.8464739474965693e-06, "loss": 0.9895, "step": 4171 }, { "epoch": 0.01846916640842888, "grad_norm": 2.556666708888905, "learning_rate": 1.8469166408428883e-06, "loss": 0.733, "step": 4172 }, { "epoch": 0.01847359334189207, "grad_norm": 2.9201454210145696, "learning_rate": 1.8473593341892074e-06, "loss": 1.0147, "step": 4173 }, { "epoch": 0.01847802027535526, "grad_norm": 3.2464668357381954, "learning_rate": 1.8478020275355264e-06, "loss": 1.1808, "step": 4174 }, { "epoch": 0.01848244720881845, "grad_norm": 3.1195779025604455, "learning_rate": 1.8482447208818451e-06, "loss": 0.7795, "step": 4175 }, { "epoch": 0.01848687414228164, "grad_norm": 3.385507856198327, "learning_rate": 1.8486874142281645e-06, "loss": 1.1988, "step": 4176 }, { "epoch": 0.018491301075744832, "grad_norm": 2.980493089115654, "learning_rate": 1.8491301075744832e-06, "loss": 0.7098, "step": 4177 }, { "epoch": 0.018495728009208023, "grad_norm": 2.569522716970246, "learning_rate": 1.8495728009208022e-06, "loss": 0.7764, "step": 4178 }, { "epoch": 0.018500154942671213, "grad_norm": 3.9807161546842362, "learning_rate": 1.8500154942671213e-06, "loss": 1.3478, "step": 4179 }, { "epoch": 0.0185045818761344, "grad_norm": 3.0806246888756252, "learning_rate": 1.8504581876134403e-06, "loss": 0.901, "step": 4180 }, { "epoch": 0.01850900880959759, "grad_norm": 2.7503979913718934, "learning_rate": 1.8509008809597592e-06, "loss": 0.5644, "step": 4181 }, { "epoch": 0.01851343574306078, "grad_norm": 2.9406508244411054, "learning_rate": 1.8513435743060784e-06, "loss": 1.1171, "step": 4182 }, { "epoch": 0.018517862676523972, "grad_norm": 3.055698510255476, "learning_rate": 1.8517862676523973e-06, "loss": 0.7767, "step": 4183 }, { "epoch": 0.018522289609987162, "grad_norm": 3.08801845135705, "learning_rate": 1.8522289609987163e-06, "loss": 1.0106, "step": 4184 }, { "epoch": 0.018526716543450353, "grad_norm": 2.7865644291168588, "learning_rate": 1.8526716543450354e-06, "loss": 0.9822, "step": 4185 }, { "epoch": 0.018531143476913543, "grad_norm": 2.9244636667756825, "learning_rate": 1.8531143476913544e-06, "loss": 0.4464, "step": 4186 }, { "epoch": 0.01853557041037673, "grad_norm": 3.6188534516313617, "learning_rate": 1.8535570410376733e-06, "loss": 0.8209, "step": 4187 }, { "epoch": 0.01853999734383992, "grad_norm": 2.5792732526052142, "learning_rate": 1.8539997343839925e-06, "loss": 0.8102, "step": 4188 }, { "epoch": 0.01854442427730311, "grad_norm": 2.548201091177267, "learning_rate": 1.8544424277303114e-06, "loss": 0.6435, "step": 4189 }, { "epoch": 0.018548851210766302, "grad_norm": 2.5332868135961597, "learning_rate": 1.8548851210766304e-06, "loss": 0.7755, "step": 4190 }, { "epoch": 0.018553278144229492, "grad_norm": 3.084580996440112, "learning_rate": 1.8553278144229495e-06, "loss": 0.6724, "step": 4191 }, { "epoch": 0.018557705077692683, "grad_norm": 3.193804946350779, "learning_rate": 1.8557705077692683e-06, "loss": 0.9243, "step": 4192 }, { "epoch": 0.018562132011155873, "grad_norm": 2.827992900681477, "learning_rate": 1.8562132011155872e-06, "loss": 0.6998, "step": 4193 }, { "epoch": 0.018566558944619064, "grad_norm": 3.785886026317059, "learning_rate": 1.8566558944619064e-06, "loss": 1.1069, "step": 4194 }, { "epoch": 0.01857098587808225, "grad_norm": 2.750948981776596, "learning_rate": 1.8570985878082253e-06, "loss": 0.7075, "step": 4195 }, { "epoch": 0.01857541281154544, "grad_norm": 2.6363936560871304, "learning_rate": 1.8575412811545443e-06, "loss": 0.6023, "step": 4196 }, { "epoch": 0.018579839745008632, "grad_norm": 3.562007153845104, "learning_rate": 1.8579839745008634e-06, "loss": 1.0173, "step": 4197 }, { "epoch": 0.018584266678471822, "grad_norm": 3.1474658798922417, "learning_rate": 1.8584266678471824e-06, "loss": 0.8666, "step": 4198 }, { "epoch": 0.018588693611935013, "grad_norm": 3.685010007664972, "learning_rate": 1.8588693611935013e-06, "loss": 0.9239, "step": 4199 }, { "epoch": 0.018593120545398203, "grad_norm": 2.7825561694789043, "learning_rate": 1.8593120545398205e-06, "loss": 0.5266, "step": 4200 }, { "epoch": 0.018597547478861394, "grad_norm": 4.188873921969247, "learning_rate": 1.8597547478861394e-06, "loss": 1.2176, "step": 4201 }, { "epoch": 0.018601974412324584, "grad_norm": 3.3992880032072588, "learning_rate": 1.8601974412324584e-06, "loss": 0.8786, "step": 4202 }, { "epoch": 0.01860640134578777, "grad_norm": 3.4296375097104073, "learning_rate": 1.8606401345787775e-06, "loss": 0.7611, "step": 4203 }, { "epoch": 0.018610828279250962, "grad_norm": 3.05984751660995, "learning_rate": 1.8610828279250965e-06, "loss": 0.8643, "step": 4204 }, { "epoch": 0.018615255212714153, "grad_norm": 3.125650531575726, "learning_rate": 1.8615255212714154e-06, "loss": 1.0165, "step": 4205 }, { "epoch": 0.018619682146177343, "grad_norm": 2.651570265381256, "learning_rate": 1.8619682146177346e-06, "loss": 0.7132, "step": 4206 }, { "epoch": 0.018624109079640534, "grad_norm": 3.085942530668123, "learning_rate": 1.8624109079640535e-06, "loss": 1.1489, "step": 4207 }, { "epoch": 0.018628536013103724, "grad_norm": 3.337858706100045, "learning_rate": 1.8628536013103723e-06, "loss": 0.8258, "step": 4208 }, { "epoch": 0.018632962946566915, "grad_norm": 2.6764774937080915, "learning_rate": 1.8632962946566916e-06, "loss": 0.8445, "step": 4209 }, { "epoch": 0.0186373898800301, "grad_norm": 2.9228774030580853, "learning_rate": 1.8637389880030104e-06, "loss": 0.8006, "step": 4210 }, { "epoch": 0.018641816813493292, "grad_norm": 3.0465059923649855, "learning_rate": 1.8641816813493293e-06, "loss": 1.2434, "step": 4211 }, { "epoch": 0.018646243746956483, "grad_norm": 2.972998255114267, "learning_rate": 1.8646243746956485e-06, "loss": 0.711, "step": 4212 }, { "epoch": 0.018650670680419673, "grad_norm": 3.072870970514238, "learning_rate": 1.8650670680419674e-06, "loss": 0.6954, "step": 4213 }, { "epoch": 0.018655097613882864, "grad_norm": 2.608713225209165, "learning_rate": 1.8655097613882864e-06, "loss": 0.558, "step": 4214 }, { "epoch": 0.018659524547346054, "grad_norm": 3.4568451502695985, "learning_rate": 1.8659524547346055e-06, "loss": 1.0828, "step": 4215 }, { "epoch": 0.018663951480809245, "grad_norm": 2.754290421884754, "learning_rate": 1.8663951480809245e-06, "loss": 0.6803, "step": 4216 }, { "epoch": 0.018668378414272435, "grad_norm": 2.732292983665867, "learning_rate": 1.8668378414272434e-06, "loss": 0.8707, "step": 4217 }, { "epoch": 0.018672805347735622, "grad_norm": 3.1387528056690437, "learning_rate": 1.8672805347735626e-06, "loss": 0.8501, "step": 4218 }, { "epoch": 0.018677232281198813, "grad_norm": 2.993402514166269, "learning_rate": 1.8677232281198815e-06, "loss": 0.7208, "step": 4219 }, { "epoch": 0.018681659214662003, "grad_norm": 2.5963338381784156, "learning_rate": 1.8681659214662005e-06, "loss": 0.548, "step": 4220 }, { "epoch": 0.018686086148125194, "grad_norm": 3.780257668550155, "learning_rate": 1.8686086148125196e-06, "loss": 0.9239, "step": 4221 }, { "epoch": 0.018690513081588384, "grad_norm": 3.140208227715496, "learning_rate": 1.8690513081588386e-06, "loss": 0.8168, "step": 4222 }, { "epoch": 0.018694940015051575, "grad_norm": 2.416509957464158, "learning_rate": 1.8694940015051573e-06, "loss": 0.6696, "step": 4223 }, { "epoch": 0.018699366948514765, "grad_norm": 3.024457109327861, "learning_rate": 1.8699366948514767e-06, "loss": 0.799, "step": 4224 }, { "epoch": 0.018703793881977952, "grad_norm": 2.717656279164691, "learning_rate": 1.8703793881977954e-06, "loss": 0.5981, "step": 4225 }, { "epoch": 0.018708220815441143, "grad_norm": 2.9173523118074547, "learning_rate": 1.8708220815441144e-06, "loss": 0.8564, "step": 4226 }, { "epoch": 0.018712647748904333, "grad_norm": 3.1143418401080405, "learning_rate": 1.8712647748904335e-06, "loss": 0.7869, "step": 4227 }, { "epoch": 0.018717074682367524, "grad_norm": 2.517780867021228, "learning_rate": 1.8717074682367525e-06, "loss": 0.5925, "step": 4228 }, { "epoch": 0.018721501615830714, "grad_norm": 3.621722577134451, "learning_rate": 1.8721501615830714e-06, "loss": 1.2015, "step": 4229 }, { "epoch": 0.018725928549293905, "grad_norm": 3.6354299720375556, "learning_rate": 1.8725928549293906e-06, "loss": 0.8008, "step": 4230 }, { "epoch": 0.018730355482757095, "grad_norm": 2.5684787526297663, "learning_rate": 1.8730355482757095e-06, "loss": 0.5399, "step": 4231 }, { "epoch": 0.018734782416220286, "grad_norm": 3.3478778781599043, "learning_rate": 1.8734782416220285e-06, "loss": 1.0593, "step": 4232 }, { "epoch": 0.018739209349683473, "grad_norm": 3.2480425632905288, "learning_rate": 1.8739209349683476e-06, "loss": 0.7693, "step": 4233 }, { "epoch": 0.018743636283146663, "grad_norm": 2.6446706780579383, "learning_rate": 1.8743636283146666e-06, "loss": 0.7953, "step": 4234 }, { "epoch": 0.018748063216609854, "grad_norm": 3.2905105321655572, "learning_rate": 1.8748063216609855e-06, "loss": 0.8867, "step": 4235 }, { "epoch": 0.018752490150073044, "grad_norm": 3.2652362268634882, "learning_rate": 1.8752490150073047e-06, "loss": 0.7467, "step": 4236 }, { "epoch": 0.018756917083536235, "grad_norm": 3.517716855377114, "learning_rate": 1.8756917083536236e-06, "loss": 0.9743, "step": 4237 }, { "epoch": 0.018761344016999425, "grad_norm": 2.671130353566024, "learning_rate": 1.8761344016999426e-06, "loss": 0.844, "step": 4238 }, { "epoch": 0.018765770950462616, "grad_norm": 2.8522852928377396, "learning_rate": 1.8765770950462617e-06, "loss": 0.7592, "step": 4239 }, { "epoch": 0.018770197883925803, "grad_norm": 2.9508442209536554, "learning_rate": 1.8770197883925807e-06, "loss": 0.8202, "step": 4240 }, { "epoch": 0.018774624817388993, "grad_norm": 2.7712752554244595, "learning_rate": 1.8774624817388994e-06, "loss": 0.7859, "step": 4241 }, { "epoch": 0.018779051750852184, "grad_norm": 2.791407895812249, "learning_rate": 1.8779051750852186e-06, "loss": 0.8161, "step": 4242 }, { "epoch": 0.018783478684315374, "grad_norm": 3.671835155486193, "learning_rate": 1.8783478684315375e-06, "loss": 1.1192, "step": 4243 }, { "epoch": 0.018787905617778565, "grad_norm": 3.690862263771022, "learning_rate": 1.8787905617778565e-06, "loss": 1.01, "step": 4244 }, { "epoch": 0.018792332551241756, "grad_norm": 3.6339021934138276, "learning_rate": 1.8792332551241756e-06, "loss": 0.7422, "step": 4245 }, { "epoch": 0.018796759484704946, "grad_norm": 3.3503901110611, "learning_rate": 1.8796759484704946e-06, "loss": 0.7374, "step": 4246 }, { "epoch": 0.018801186418168137, "grad_norm": 4.064204295327199, "learning_rate": 1.8801186418168135e-06, "loss": 1.3547, "step": 4247 }, { "epoch": 0.018805613351631324, "grad_norm": 2.941387595996578, "learning_rate": 1.8805613351631327e-06, "loss": 0.9307, "step": 4248 }, { "epoch": 0.018810040285094514, "grad_norm": 3.2676342816350425, "learning_rate": 1.8810040285094516e-06, "loss": 1.2932, "step": 4249 }, { "epoch": 0.018814467218557705, "grad_norm": 2.710017673432951, "learning_rate": 1.8814467218557706e-06, "loss": 0.7923, "step": 4250 }, { "epoch": 0.018818894152020895, "grad_norm": 3.1608131051030965, "learning_rate": 1.8818894152020897e-06, "loss": 0.9368, "step": 4251 }, { "epoch": 0.018823321085484086, "grad_norm": 2.7256259440976005, "learning_rate": 1.8823321085484087e-06, "loss": 0.8307, "step": 4252 }, { "epoch": 0.018827748018947276, "grad_norm": 2.9635801689951724, "learning_rate": 1.8827748018947276e-06, "loss": 1.0412, "step": 4253 }, { "epoch": 0.018832174952410467, "grad_norm": 2.7430245887269264, "learning_rate": 1.8832174952410468e-06, "loss": 0.7471, "step": 4254 }, { "epoch": 0.018836601885873654, "grad_norm": 3.217741213607261, "learning_rate": 1.8836601885873657e-06, "loss": 0.974, "step": 4255 }, { "epoch": 0.018841028819336844, "grad_norm": 2.684802058640511, "learning_rate": 1.8841028819336845e-06, "loss": 0.6909, "step": 4256 }, { "epoch": 0.018845455752800035, "grad_norm": 3.2066915827832982, "learning_rate": 1.8845455752800039e-06, "loss": 0.8944, "step": 4257 }, { "epoch": 0.018849882686263225, "grad_norm": 2.710374630431547, "learning_rate": 1.8849882686263226e-06, "loss": 0.9017, "step": 4258 }, { "epoch": 0.018854309619726416, "grad_norm": 3.0329392497254593, "learning_rate": 1.8854309619726415e-06, "loss": 0.7804, "step": 4259 }, { "epoch": 0.018858736553189606, "grad_norm": 2.802381896854465, "learning_rate": 1.8858736553189607e-06, "loss": 0.8292, "step": 4260 }, { "epoch": 0.018863163486652797, "grad_norm": 3.071924115449336, "learning_rate": 1.8863163486652796e-06, "loss": 0.713, "step": 4261 }, { "epoch": 0.018867590420115987, "grad_norm": 2.6835370311143967, "learning_rate": 1.8867590420115986e-06, "loss": 0.7729, "step": 4262 }, { "epoch": 0.018872017353579174, "grad_norm": 2.7222494729239495, "learning_rate": 1.8872017353579177e-06, "loss": 0.7309, "step": 4263 }, { "epoch": 0.018876444287042365, "grad_norm": 3.2543238856062144, "learning_rate": 1.8876444287042367e-06, "loss": 0.7139, "step": 4264 }, { "epoch": 0.018880871220505555, "grad_norm": 2.6465218715155427, "learning_rate": 1.8880871220505556e-06, "loss": 0.6046, "step": 4265 }, { "epoch": 0.018885298153968746, "grad_norm": 2.985641499033928, "learning_rate": 1.8885298153968748e-06, "loss": 0.9262, "step": 4266 }, { "epoch": 0.018889725087431936, "grad_norm": 2.944418073690175, "learning_rate": 1.8889725087431937e-06, "loss": 0.9261, "step": 4267 }, { "epoch": 0.018894152020895127, "grad_norm": 2.995763670214849, "learning_rate": 1.8894152020895127e-06, "loss": 1.0181, "step": 4268 }, { "epoch": 0.018898578954358317, "grad_norm": 2.442511076613808, "learning_rate": 1.8898578954358319e-06, "loss": 0.5879, "step": 4269 }, { "epoch": 0.018903005887821504, "grad_norm": 3.225970739150269, "learning_rate": 1.8903005887821508e-06, "loss": 0.9348, "step": 4270 }, { "epoch": 0.018907432821284695, "grad_norm": 3.6844898634498184, "learning_rate": 1.8907432821284695e-06, "loss": 0.7851, "step": 4271 }, { "epoch": 0.018911859754747885, "grad_norm": 2.572685238173319, "learning_rate": 1.891185975474789e-06, "loss": 0.7082, "step": 4272 }, { "epoch": 0.018916286688211076, "grad_norm": 2.903181818804419, "learning_rate": 1.8916286688211076e-06, "loss": 0.6994, "step": 4273 }, { "epoch": 0.018920713621674266, "grad_norm": 3.236351348520556, "learning_rate": 1.8920713621674266e-06, "loss": 0.922, "step": 4274 }, { "epoch": 0.018925140555137457, "grad_norm": 2.8561284547935313, "learning_rate": 1.8925140555137457e-06, "loss": 0.5684, "step": 4275 }, { "epoch": 0.018929567488600647, "grad_norm": 2.6245193529536293, "learning_rate": 1.8929567488600647e-06, "loss": 0.678, "step": 4276 }, { "epoch": 0.018933994422063838, "grad_norm": 3.180292206187626, "learning_rate": 1.8933994422063836e-06, "loss": 1.2445, "step": 4277 }, { "epoch": 0.018938421355527025, "grad_norm": 3.470340467706899, "learning_rate": 1.8938421355527028e-06, "loss": 1.0333, "step": 4278 }, { "epoch": 0.018942848288990215, "grad_norm": 3.0100305944343333, "learning_rate": 1.8942848288990217e-06, "loss": 0.8222, "step": 4279 }, { "epoch": 0.018947275222453406, "grad_norm": 3.344754397155081, "learning_rate": 1.8947275222453407e-06, "loss": 0.771, "step": 4280 }, { "epoch": 0.018951702155916596, "grad_norm": 2.6810350918008354, "learning_rate": 1.8951702155916599e-06, "loss": 0.8477, "step": 4281 }, { "epoch": 0.018956129089379787, "grad_norm": 2.728261324324055, "learning_rate": 1.8956129089379788e-06, "loss": 0.6565, "step": 4282 }, { "epoch": 0.018960556022842977, "grad_norm": 4.072762437703003, "learning_rate": 1.8960556022842977e-06, "loss": 1.5931, "step": 4283 }, { "epoch": 0.018964982956306168, "grad_norm": 3.5150690895310053, "learning_rate": 1.896498295630617e-06, "loss": 1.3365, "step": 4284 }, { "epoch": 0.01896940988976936, "grad_norm": 2.327145300594196, "learning_rate": 1.8969409889769359e-06, "loss": 0.4175, "step": 4285 }, { "epoch": 0.018973836823232546, "grad_norm": 2.8949886379308643, "learning_rate": 1.8973836823232548e-06, "loss": 0.6238, "step": 4286 }, { "epoch": 0.018978263756695736, "grad_norm": 2.8776189594618033, "learning_rate": 1.897826375669574e-06, "loss": 0.9084, "step": 4287 }, { "epoch": 0.018982690690158927, "grad_norm": 2.7734564910672783, "learning_rate": 1.898269069015893e-06, "loss": 0.5908, "step": 4288 }, { "epoch": 0.018987117623622117, "grad_norm": 3.223582605175141, "learning_rate": 1.8987117623622116e-06, "loss": 0.9114, "step": 4289 }, { "epoch": 0.018991544557085308, "grad_norm": 3.3155728144633376, "learning_rate": 1.899154455708531e-06, "loss": 1.0047, "step": 4290 }, { "epoch": 0.018995971490548498, "grad_norm": 3.0804185543678284, "learning_rate": 1.8995971490548497e-06, "loss": 0.8919, "step": 4291 }, { "epoch": 0.01900039842401169, "grad_norm": 4.122712716386959, "learning_rate": 1.9000398424011687e-06, "loss": 1.2767, "step": 4292 }, { "epoch": 0.019004825357474876, "grad_norm": 3.182308882221571, "learning_rate": 1.9004825357474879e-06, "loss": 0.7034, "step": 4293 }, { "epoch": 0.019009252290938066, "grad_norm": 2.8762235327687655, "learning_rate": 1.9009252290938068e-06, "loss": 0.7468, "step": 4294 }, { "epoch": 0.019013679224401257, "grad_norm": 2.401496601824448, "learning_rate": 1.9013679224401257e-06, "loss": 0.5394, "step": 4295 }, { "epoch": 0.019018106157864447, "grad_norm": 2.709402088903131, "learning_rate": 1.901810615786445e-06, "loss": 0.837, "step": 4296 }, { "epoch": 0.019022533091327638, "grad_norm": 3.276462073844833, "learning_rate": 1.9022533091327639e-06, "loss": 0.7755, "step": 4297 }, { "epoch": 0.019026960024790828, "grad_norm": 3.151889819094651, "learning_rate": 1.9026960024790828e-06, "loss": 1.0978, "step": 4298 }, { "epoch": 0.01903138695825402, "grad_norm": 2.7521390101555827, "learning_rate": 1.903138695825402e-06, "loss": 0.9934, "step": 4299 }, { "epoch": 0.01903581389171721, "grad_norm": 3.010182970410621, "learning_rate": 1.903581389171721e-06, "loss": 1.0866, "step": 4300 }, { "epoch": 0.019040240825180396, "grad_norm": 2.8067858760331843, "learning_rate": 1.9040240825180399e-06, "loss": 0.9173, "step": 4301 }, { "epoch": 0.019044667758643587, "grad_norm": 2.7557886234603717, "learning_rate": 1.904466775864359e-06, "loss": 0.6825, "step": 4302 }, { "epoch": 0.019049094692106777, "grad_norm": 5.1747833676493835, "learning_rate": 1.904909469210678e-06, "loss": 1.064, "step": 4303 }, { "epoch": 0.019053521625569968, "grad_norm": 2.97138043112023, "learning_rate": 1.9053521625569967e-06, "loss": 0.6729, "step": 4304 }, { "epoch": 0.019057948559033158, "grad_norm": 2.772488514398333, "learning_rate": 1.905794855903316e-06, "loss": 0.8783, "step": 4305 }, { "epoch": 0.01906237549249635, "grad_norm": 3.834076730351971, "learning_rate": 1.9062375492496348e-06, "loss": 1.0093, "step": 4306 }, { "epoch": 0.01906680242595954, "grad_norm": 2.6226412379425534, "learning_rate": 1.9066802425959537e-06, "loss": 0.4441, "step": 4307 }, { "epoch": 0.019071229359422726, "grad_norm": 4.262810373886254, "learning_rate": 1.907122935942273e-06, "loss": 1.4476, "step": 4308 }, { "epoch": 0.019075656292885917, "grad_norm": 3.4021190886867116, "learning_rate": 1.907565629288592e-06, "loss": 0.9453, "step": 4309 }, { "epoch": 0.019080083226349107, "grad_norm": 3.0543669679220162, "learning_rate": 1.908008322634911e-06, "loss": 0.7973, "step": 4310 }, { "epoch": 0.019084510159812298, "grad_norm": 2.867460525720438, "learning_rate": 1.90845101598123e-06, "loss": 0.6963, "step": 4311 }, { "epoch": 0.01908893709327549, "grad_norm": 3.3725575446054425, "learning_rate": 1.908893709327549e-06, "loss": 0.8773, "step": 4312 }, { "epoch": 0.01909336402673868, "grad_norm": 2.9513968583762087, "learning_rate": 1.9093364026738683e-06, "loss": 0.709, "step": 4313 }, { "epoch": 0.01909779096020187, "grad_norm": 2.8402053660264546, "learning_rate": 1.9097790960201872e-06, "loss": 0.8506, "step": 4314 }, { "epoch": 0.01910221789366506, "grad_norm": 2.9328202219880293, "learning_rate": 1.9102217893665057e-06, "loss": 0.9232, "step": 4315 }, { "epoch": 0.019106644827128247, "grad_norm": 2.9004401207273287, "learning_rate": 1.910664482712825e-06, "loss": 1.0029, "step": 4316 }, { "epoch": 0.019111071760591437, "grad_norm": 2.93832426112673, "learning_rate": 1.911107176059144e-06, "loss": 0.7731, "step": 4317 }, { "epoch": 0.019115498694054628, "grad_norm": 3.112070603066063, "learning_rate": 1.911549869405463e-06, "loss": 0.9626, "step": 4318 }, { "epoch": 0.01911992562751782, "grad_norm": 3.7963839774142976, "learning_rate": 1.911992562751782e-06, "loss": 0.9151, "step": 4319 }, { "epoch": 0.01912435256098101, "grad_norm": 2.3277037681548287, "learning_rate": 1.912435256098101e-06, "loss": 0.7321, "step": 4320 }, { "epoch": 0.0191287794944442, "grad_norm": 3.3011074134825877, "learning_rate": 1.91287794944442e-06, "loss": 0.7309, "step": 4321 }, { "epoch": 0.01913320642790739, "grad_norm": 3.2407627888900197, "learning_rate": 1.9133206427907392e-06, "loss": 1.0679, "step": 4322 }, { "epoch": 0.019137633361370577, "grad_norm": 2.850857268156585, "learning_rate": 1.913763336137058e-06, "loss": 0.7668, "step": 4323 }, { "epoch": 0.019142060294833767, "grad_norm": 3.4209953372140554, "learning_rate": 1.914206029483377e-06, "loss": 0.9914, "step": 4324 }, { "epoch": 0.019146487228296958, "grad_norm": 2.8264944287654687, "learning_rate": 1.914648722829696e-06, "loss": 0.818, "step": 4325 }, { "epoch": 0.01915091416176015, "grad_norm": 3.600524853132072, "learning_rate": 1.915091416176015e-06, "loss": 0.8872, "step": 4326 }, { "epoch": 0.01915534109522334, "grad_norm": 2.6053363253342217, "learning_rate": 1.915534109522334e-06, "loss": 0.9222, "step": 4327 }, { "epoch": 0.01915976802868653, "grad_norm": 2.964224520069457, "learning_rate": 1.9159768028686533e-06, "loss": 0.7619, "step": 4328 }, { "epoch": 0.01916419496214972, "grad_norm": 3.1449338581792623, "learning_rate": 1.9164194962149723e-06, "loss": 0.8109, "step": 4329 }, { "epoch": 0.01916862189561291, "grad_norm": 2.842405452453796, "learning_rate": 1.916862189561291e-06, "loss": 0.7129, "step": 4330 }, { "epoch": 0.019173048829076098, "grad_norm": 2.9843072430717035, "learning_rate": 1.91730488290761e-06, "loss": 0.4906, "step": 4331 }, { "epoch": 0.019177475762539288, "grad_norm": 3.181140550513297, "learning_rate": 1.917747576253929e-06, "loss": 1.177, "step": 4332 }, { "epoch": 0.01918190269600248, "grad_norm": 3.195882710228813, "learning_rate": 1.918190269600248e-06, "loss": 1.0892, "step": 4333 }, { "epoch": 0.01918632962946567, "grad_norm": 3.6138247535421786, "learning_rate": 1.918632962946567e-06, "loss": 0.9659, "step": 4334 }, { "epoch": 0.01919075656292886, "grad_norm": 3.425985280196456, "learning_rate": 1.919075656292886e-06, "loss": 0.6325, "step": 4335 }, { "epoch": 0.01919518349639205, "grad_norm": 2.8595019123100713, "learning_rate": 1.919518349639205e-06, "loss": 0.7412, "step": 4336 }, { "epoch": 0.01919961042985524, "grad_norm": 2.858534183681264, "learning_rate": 1.9199610429855243e-06, "loss": 0.8023, "step": 4337 }, { "epoch": 0.019204037363318428, "grad_norm": 2.977341094399524, "learning_rate": 1.9204037363318432e-06, "loss": 0.9264, "step": 4338 }, { "epoch": 0.019208464296781618, "grad_norm": 3.2049287586514215, "learning_rate": 1.920846429678162e-06, "loss": 1.0778, "step": 4339 }, { "epoch": 0.01921289123024481, "grad_norm": 3.0909701698648826, "learning_rate": 1.921289123024481e-06, "loss": 0.8878, "step": 4340 }, { "epoch": 0.019217318163708, "grad_norm": 2.73240665132247, "learning_rate": 1.9217318163708e-06, "loss": 0.6415, "step": 4341 }, { "epoch": 0.01922174509717119, "grad_norm": 2.9113679273047124, "learning_rate": 1.922174509717119e-06, "loss": 0.4356, "step": 4342 }, { "epoch": 0.01922617203063438, "grad_norm": 3.147542732467225, "learning_rate": 1.9226172030634384e-06, "loss": 0.8223, "step": 4343 }, { "epoch": 0.01923059896409757, "grad_norm": 3.247737821886338, "learning_rate": 1.9230598964097573e-06, "loss": 0.7194, "step": 4344 }, { "epoch": 0.01923502589756076, "grad_norm": 2.9294317643960546, "learning_rate": 1.923502589756076e-06, "loss": 0.8432, "step": 4345 }, { "epoch": 0.019239452831023948, "grad_norm": 2.5520820932053176, "learning_rate": 1.9239452831023952e-06, "loss": 0.7379, "step": 4346 }, { "epoch": 0.01924387976448714, "grad_norm": 3.4833477679429103, "learning_rate": 1.924387976448714e-06, "loss": 0.797, "step": 4347 }, { "epoch": 0.01924830669795033, "grad_norm": 3.633908605585666, "learning_rate": 1.924830669795033e-06, "loss": 0.8445, "step": 4348 }, { "epoch": 0.01925273363141352, "grad_norm": 2.6562247470857883, "learning_rate": 1.925273363141352e-06, "loss": 0.7922, "step": 4349 }, { "epoch": 0.01925716056487671, "grad_norm": 2.398066004824764, "learning_rate": 1.925716056487671e-06, "loss": 0.4634, "step": 4350 }, { "epoch": 0.0192615874983399, "grad_norm": 4.469993846157134, "learning_rate": 1.92615874983399e-06, "loss": 1.3693, "step": 4351 }, { "epoch": 0.01926601443180309, "grad_norm": 3.2093334724762257, "learning_rate": 1.9266014431803093e-06, "loss": 1.0913, "step": 4352 }, { "epoch": 0.019270441365266282, "grad_norm": 3.1897236707851366, "learning_rate": 1.9270441365266283e-06, "loss": 0.8869, "step": 4353 }, { "epoch": 0.01927486829872947, "grad_norm": 2.519701820441367, "learning_rate": 1.9274868298729472e-06, "loss": 0.5186, "step": 4354 }, { "epoch": 0.01927929523219266, "grad_norm": 3.3216567065014573, "learning_rate": 1.927929523219266e-06, "loss": 1.1758, "step": 4355 }, { "epoch": 0.01928372216565585, "grad_norm": 4.760181927266488, "learning_rate": 1.928372216565585e-06, "loss": 1.1883, "step": 4356 }, { "epoch": 0.01928814909911904, "grad_norm": 2.702712324482375, "learning_rate": 1.928814909911904e-06, "loss": 0.6629, "step": 4357 }, { "epoch": 0.01929257603258223, "grad_norm": 2.6670654409170673, "learning_rate": 1.9292576032582234e-06, "loss": 0.5918, "step": 4358 }, { "epoch": 0.01929700296604542, "grad_norm": 2.433421527742359, "learning_rate": 1.9297002966045424e-06, "loss": 0.5801, "step": 4359 }, { "epoch": 0.019301429899508612, "grad_norm": 2.7118853114904895, "learning_rate": 1.9301429899508613e-06, "loss": 0.8169, "step": 4360 }, { "epoch": 0.0193058568329718, "grad_norm": 3.3210630025939376, "learning_rate": 1.9305856832971803e-06, "loss": 0.8632, "step": 4361 }, { "epoch": 0.01931028376643499, "grad_norm": 2.346445577195379, "learning_rate": 1.9310283766434992e-06, "loss": 0.4736, "step": 4362 }, { "epoch": 0.01931471069989818, "grad_norm": 2.640589458558285, "learning_rate": 1.931471069989818e-06, "loss": 0.5806, "step": 4363 }, { "epoch": 0.01931913763336137, "grad_norm": 2.9033690307473363, "learning_rate": 1.9319137633361375e-06, "loss": 0.669, "step": 4364 }, { "epoch": 0.01932356456682456, "grad_norm": 2.7212909386607635, "learning_rate": 1.932356456682456e-06, "loss": 0.8215, "step": 4365 }, { "epoch": 0.01932799150028775, "grad_norm": 2.6543707387688285, "learning_rate": 1.932799150028775e-06, "loss": 0.9073, "step": 4366 }, { "epoch": 0.019332418433750942, "grad_norm": 3.7488506023800046, "learning_rate": 1.9332418433750944e-06, "loss": 1.1219, "step": 4367 }, { "epoch": 0.019336845367214132, "grad_norm": 3.6829752802752815, "learning_rate": 1.9336845367214133e-06, "loss": 1.1775, "step": 4368 }, { "epoch": 0.01934127230067732, "grad_norm": 2.880733761338702, "learning_rate": 1.9341272300677323e-06, "loss": 0.7592, "step": 4369 }, { "epoch": 0.01934569923414051, "grad_norm": 3.7046440184565603, "learning_rate": 1.9345699234140512e-06, "loss": 0.824, "step": 4370 }, { "epoch": 0.0193501261676037, "grad_norm": 2.9488631222794135, "learning_rate": 1.93501261676037e-06, "loss": 0.8441, "step": 4371 }, { "epoch": 0.01935455310106689, "grad_norm": 2.7891502502467724, "learning_rate": 1.935455310106689e-06, "loss": 0.5952, "step": 4372 }, { "epoch": 0.01935898003453008, "grad_norm": 3.1647311605376203, "learning_rate": 1.9358980034530085e-06, "loss": 0.614, "step": 4373 }, { "epoch": 0.019363406967993272, "grad_norm": 2.7525279288226825, "learning_rate": 1.9363406967993274e-06, "loss": 0.9402, "step": 4374 }, { "epoch": 0.019367833901456463, "grad_norm": 3.3517659259623778, "learning_rate": 1.9367833901456464e-06, "loss": 1.2046, "step": 4375 }, { "epoch": 0.01937226083491965, "grad_norm": 3.622791867657955, "learning_rate": 1.9372260834919653e-06, "loss": 1.2434, "step": 4376 }, { "epoch": 0.01937668776838284, "grad_norm": 3.3236064032145687, "learning_rate": 1.9376687768382843e-06, "loss": 1.2346, "step": 4377 }, { "epoch": 0.01938111470184603, "grad_norm": 3.2623675120004543, "learning_rate": 1.9381114701846032e-06, "loss": 0.8952, "step": 4378 }, { "epoch": 0.01938554163530922, "grad_norm": 2.832498279731854, "learning_rate": 1.9385541635309226e-06, "loss": 0.7551, "step": 4379 }, { "epoch": 0.01938996856877241, "grad_norm": 3.0548527349347157, "learning_rate": 1.938996856877241e-06, "loss": 0.8532, "step": 4380 }, { "epoch": 0.019394395502235602, "grad_norm": 3.2185439318434415, "learning_rate": 1.93943955022356e-06, "loss": 0.9106, "step": 4381 }, { "epoch": 0.019398822435698793, "grad_norm": 3.1151286335277657, "learning_rate": 1.9398822435698794e-06, "loss": 0.8077, "step": 4382 }, { "epoch": 0.019403249369161983, "grad_norm": 3.255418721697376, "learning_rate": 1.9403249369161984e-06, "loss": 0.8719, "step": 4383 }, { "epoch": 0.01940767630262517, "grad_norm": 3.2323373182743764, "learning_rate": 1.9407676302625173e-06, "loss": 0.7218, "step": 4384 }, { "epoch": 0.01941210323608836, "grad_norm": 2.7084599880141, "learning_rate": 1.9412103236088363e-06, "loss": 0.7254, "step": 4385 }, { "epoch": 0.01941653016955155, "grad_norm": 3.216098685445626, "learning_rate": 1.9416530169551552e-06, "loss": 0.9114, "step": 4386 }, { "epoch": 0.01942095710301474, "grad_norm": 3.6300898034331257, "learning_rate": 1.942095710301474e-06, "loss": 1.1753, "step": 4387 }, { "epoch": 0.019425384036477932, "grad_norm": 3.1761868957303445, "learning_rate": 1.9425384036477935e-06, "loss": 0.7141, "step": 4388 }, { "epoch": 0.019429810969941123, "grad_norm": 3.319505941532238, "learning_rate": 1.9429810969941125e-06, "loss": 1.0667, "step": 4389 }, { "epoch": 0.019434237903404313, "grad_norm": 2.9487049229317055, "learning_rate": 1.9434237903404314e-06, "loss": 0.8033, "step": 4390 }, { "epoch": 0.0194386648368675, "grad_norm": 2.559757115455164, "learning_rate": 1.9438664836867504e-06, "loss": 0.6227, "step": 4391 }, { "epoch": 0.01944309177033069, "grad_norm": 2.9859692053618083, "learning_rate": 1.9443091770330693e-06, "loss": 0.8033, "step": 4392 }, { "epoch": 0.01944751870379388, "grad_norm": 2.5704377046124804, "learning_rate": 1.9447518703793883e-06, "loss": 0.5727, "step": 4393 }, { "epoch": 0.019451945637257072, "grad_norm": 4.182377910403017, "learning_rate": 1.9451945637257077e-06, "loss": 1.1474, "step": 4394 }, { "epoch": 0.019456372570720262, "grad_norm": 3.330675254867097, "learning_rate": 1.945637257072026e-06, "loss": 0.8965, "step": 4395 }, { "epoch": 0.019460799504183453, "grad_norm": 2.414752634385827, "learning_rate": 1.946079950418345e-06, "loss": 0.7287, "step": 4396 }, { "epoch": 0.019465226437646643, "grad_norm": 3.308490886062579, "learning_rate": 1.9465226437646645e-06, "loss": 0.907, "step": 4397 }, { "epoch": 0.019469653371109834, "grad_norm": 2.9919261143798277, "learning_rate": 1.9469653371109834e-06, "loss": 0.8448, "step": 4398 }, { "epoch": 0.01947408030457302, "grad_norm": 2.902816124309811, "learning_rate": 1.9474080304573024e-06, "loss": 0.7282, "step": 4399 }, { "epoch": 0.01947850723803621, "grad_norm": 3.271425415531642, "learning_rate": 1.9478507238036213e-06, "loss": 0.914, "step": 4400 }, { "epoch": 0.019482934171499402, "grad_norm": 3.636147488674811, "learning_rate": 1.9482934171499403e-06, "loss": 1.2897, "step": 4401 }, { "epoch": 0.019487361104962592, "grad_norm": 2.4210114882681304, "learning_rate": 1.9487361104962592e-06, "loss": 0.7772, "step": 4402 }, { "epoch": 0.019491788038425783, "grad_norm": 2.692923086237237, "learning_rate": 1.9491788038425786e-06, "loss": 0.6386, "step": 4403 }, { "epoch": 0.019496214971888973, "grad_norm": 3.0404402149455385, "learning_rate": 1.9496214971888975e-06, "loss": 0.8297, "step": 4404 }, { "epoch": 0.019500641905352164, "grad_norm": 3.563146806156778, "learning_rate": 1.9500641905352165e-06, "loss": 1.1757, "step": 4405 }, { "epoch": 0.01950506883881535, "grad_norm": 2.9891046407470947, "learning_rate": 1.9505068838815354e-06, "loss": 0.9427, "step": 4406 }, { "epoch": 0.01950949577227854, "grad_norm": 2.7257357815817613, "learning_rate": 1.9509495772278544e-06, "loss": 0.5161, "step": 4407 }, { "epoch": 0.019513922705741732, "grad_norm": 2.7425125943615227, "learning_rate": 1.9513922705741733e-06, "loss": 0.6824, "step": 4408 }, { "epoch": 0.019518349639204922, "grad_norm": 3.0818089916502167, "learning_rate": 1.9518349639204927e-06, "loss": 0.8249, "step": 4409 }, { "epoch": 0.019522776572668113, "grad_norm": 3.9084841083386315, "learning_rate": 1.9522776572668117e-06, "loss": 1.0644, "step": 4410 }, { "epoch": 0.019527203506131303, "grad_norm": 2.2905718816615495, "learning_rate": 1.95272035061313e-06, "loss": 0.5227, "step": 4411 }, { "epoch": 0.019531630439594494, "grad_norm": 3.2753181269564084, "learning_rate": 1.9531630439594495e-06, "loss": 1.1108, "step": 4412 }, { "epoch": 0.019536057373057684, "grad_norm": 2.89342742623554, "learning_rate": 1.9536057373057685e-06, "loss": 0.9189, "step": 4413 }, { "epoch": 0.01954048430652087, "grad_norm": 2.8071464505529673, "learning_rate": 1.9540484306520874e-06, "loss": 0.6311, "step": 4414 }, { "epoch": 0.019544911239984062, "grad_norm": 2.91369905750997, "learning_rate": 1.9544911239984064e-06, "loss": 0.831, "step": 4415 }, { "epoch": 0.019549338173447253, "grad_norm": 3.6563717935052216, "learning_rate": 1.9549338173447253e-06, "loss": 0.6045, "step": 4416 }, { "epoch": 0.019553765106910443, "grad_norm": 3.0986273720692794, "learning_rate": 1.9553765106910443e-06, "loss": 0.8598, "step": 4417 }, { "epoch": 0.019558192040373634, "grad_norm": 3.160944473095314, "learning_rate": 1.9558192040373637e-06, "loss": 0.5919, "step": 4418 }, { "epoch": 0.019562618973836824, "grad_norm": 2.6439309311284864, "learning_rate": 1.9562618973836826e-06, "loss": 0.7482, "step": 4419 }, { "epoch": 0.019567045907300015, "grad_norm": 3.411675747186765, "learning_rate": 1.9567045907300015e-06, "loss": 0.6472, "step": 4420 }, { "epoch": 0.0195714728407632, "grad_norm": 2.644526528749203, "learning_rate": 1.9571472840763205e-06, "loss": 0.8464, "step": 4421 }, { "epoch": 0.019575899774226392, "grad_norm": 2.597873952717109, "learning_rate": 1.9575899774226394e-06, "loss": 0.6555, "step": 4422 }, { "epoch": 0.019580326707689583, "grad_norm": 3.4270425415915517, "learning_rate": 1.9580326707689584e-06, "loss": 0.8049, "step": 4423 }, { "epoch": 0.019584753641152773, "grad_norm": 2.8402482110680864, "learning_rate": 1.9584753641152778e-06, "loss": 0.6941, "step": 4424 }, { "epoch": 0.019589180574615964, "grad_norm": 3.1540152715670833, "learning_rate": 1.9589180574615967e-06, "loss": 0.8566, "step": 4425 }, { "epoch": 0.019593607508079154, "grad_norm": 2.42062388667607, "learning_rate": 1.9593607508079152e-06, "loss": 0.5712, "step": 4426 }, { "epoch": 0.019598034441542345, "grad_norm": 2.6086279077278074, "learning_rate": 1.9598034441542346e-06, "loss": 0.7987, "step": 4427 }, { "epoch": 0.019602461375005535, "grad_norm": 3.67375819119218, "learning_rate": 1.9602461375005535e-06, "loss": 0.7026, "step": 4428 }, { "epoch": 0.019606888308468722, "grad_norm": 3.1430328226545026, "learning_rate": 1.9606888308468725e-06, "loss": 0.9797, "step": 4429 }, { "epoch": 0.019611315241931913, "grad_norm": 3.552560583517022, "learning_rate": 1.9611315241931914e-06, "loss": 0.7255, "step": 4430 }, { "epoch": 0.019615742175395103, "grad_norm": 2.5919329890775398, "learning_rate": 1.9615742175395104e-06, "loss": 0.68, "step": 4431 }, { "epoch": 0.019620169108858294, "grad_norm": 2.914044742209991, "learning_rate": 1.9620169108858293e-06, "loss": 0.4175, "step": 4432 }, { "epoch": 0.019624596042321484, "grad_norm": 3.536324209829546, "learning_rate": 1.9624596042321487e-06, "loss": 1.1038, "step": 4433 }, { "epoch": 0.019629022975784675, "grad_norm": 3.4339111875526784, "learning_rate": 1.9629022975784677e-06, "loss": 0.9795, "step": 4434 }, { "epoch": 0.019633449909247865, "grad_norm": 2.7957389764128417, "learning_rate": 1.9633449909247866e-06, "loss": 0.8091, "step": 4435 }, { "epoch": 0.019637876842711056, "grad_norm": 3.2731206945855957, "learning_rate": 1.9637876842711055e-06, "loss": 0.8158, "step": 4436 }, { "epoch": 0.019642303776174243, "grad_norm": 2.8981905953853295, "learning_rate": 1.9642303776174245e-06, "loss": 0.6331, "step": 4437 }, { "epoch": 0.019646730709637433, "grad_norm": 3.496504749022775, "learning_rate": 1.9646730709637434e-06, "loss": 1.0231, "step": 4438 }, { "epoch": 0.019651157643100624, "grad_norm": 2.572824159984013, "learning_rate": 1.965115764310063e-06, "loss": 0.5273, "step": 4439 }, { "epoch": 0.019655584576563814, "grad_norm": 3.6115560517943397, "learning_rate": 1.9655584576563818e-06, "loss": 0.9326, "step": 4440 }, { "epoch": 0.019660011510027005, "grad_norm": 3.0865298351179504, "learning_rate": 1.9660011510027007e-06, "loss": 0.9967, "step": 4441 }, { "epoch": 0.019664438443490195, "grad_norm": 3.029935459576286, "learning_rate": 1.9664438443490197e-06, "loss": 0.6921, "step": 4442 }, { "epoch": 0.019668865376953386, "grad_norm": 2.530366659589065, "learning_rate": 1.9668865376953386e-06, "loss": 0.8191, "step": 4443 }, { "epoch": 0.019673292310416573, "grad_norm": 3.8394767969168595, "learning_rate": 1.9673292310416575e-06, "loss": 0.9633, "step": 4444 }, { "epoch": 0.019677719243879763, "grad_norm": 3.138354536709432, "learning_rate": 1.9677719243879765e-06, "loss": 0.6433, "step": 4445 }, { "epoch": 0.019682146177342954, "grad_norm": 2.7901880029036756, "learning_rate": 1.9682146177342954e-06, "loss": 0.7606, "step": 4446 }, { "epoch": 0.019686573110806144, "grad_norm": 3.167391009734188, "learning_rate": 1.9686573110806144e-06, "loss": 0.6819, "step": 4447 }, { "epoch": 0.019691000044269335, "grad_norm": 3.6527216478489835, "learning_rate": 1.9691000044269338e-06, "loss": 0.9841, "step": 4448 }, { "epoch": 0.019695426977732525, "grad_norm": 2.602303981394018, "learning_rate": 1.9695426977732527e-06, "loss": 0.825, "step": 4449 }, { "epoch": 0.019699853911195716, "grad_norm": 2.4841810565198656, "learning_rate": 1.9699853911195717e-06, "loss": 0.6305, "step": 4450 }, { "epoch": 0.019704280844658906, "grad_norm": 2.7369244757501834, "learning_rate": 1.9704280844658906e-06, "loss": 0.7634, "step": 4451 }, { "epoch": 0.019708707778122093, "grad_norm": 3.0973410491732927, "learning_rate": 1.9708707778122095e-06, "loss": 0.7204, "step": 4452 }, { "epoch": 0.019713134711585284, "grad_norm": 3.0901446523554346, "learning_rate": 1.9713134711585285e-06, "loss": 0.7872, "step": 4453 }, { "epoch": 0.019717561645048474, "grad_norm": 2.8842334483476657, "learning_rate": 1.971756164504848e-06, "loss": 0.7136, "step": 4454 }, { "epoch": 0.019721988578511665, "grad_norm": 2.5639153891968913, "learning_rate": 1.972198857851167e-06, "loss": 0.5455, "step": 4455 }, { "epoch": 0.019726415511974855, "grad_norm": 3.284167740943217, "learning_rate": 1.9726415511974858e-06, "loss": 0.846, "step": 4456 }, { "epoch": 0.019730842445438046, "grad_norm": 2.885386423130801, "learning_rate": 1.9730842445438047e-06, "loss": 0.6163, "step": 4457 }, { "epoch": 0.019735269378901237, "grad_norm": 3.69390892818896, "learning_rate": 1.9735269378901237e-06, "loss": 0.8162, "step": 4458 }, { "epoch": 0.019739696312364424, "grad_norm": 2.949182201633014, "learning_rate": 1.9739696312364426e-06, "loss": 0.7842, "step": 4459 }, { "epoch": 0.019744123245827614, "grad_norm": 3.01741287712677, "learning_rate": 1.974412324582762e-06, "loss": 0.8391, "step": 4460 }, { "epoch": 0.019748550179290805, "grad_norm": 3.505087779867136, "learning_rate": 1.9748550179290805e-06, "loss": 1.2049, "step": 4461 }, { "epoch": 0.019752977112753995, "grad_norm": 2.9047027997087507, "learning_rate": 1.9752977112753994e-06, "loss": 1.0821, "step": 4462 }, { "epoch": 0.019757404046217186, "grad_norm": 3.266726849017217, "learning_rate": 1.975740404621719e-06, "loss": 0.5906, "step": 4463 }, { "epoch": 0.019761830979680376, "grad_norm": 2.9288187936683205, "learning_rate": 1.9761830979680378e-06, "loss": 0.8728, "step": 4464 }, { "epoch": 0.019766257913143567, "grad_norm": 3.260371256169631, "learning_rate": 1.9766257913143567e-06, "loss": 0.6477, "step": 4465 }, { "epoch": 0.019770684846606757, "grad_norm": 2.8779386996377783, "learning_rate": 1.9770684846606757e-06, "loss": 0.9047, "step": 4466 }, { "epoch": 0.019775111780069944, "grad_norm": 3.312400331318192, "learning_rate": 1.9775111780069946e-06, "loss": 1.2364, "step": 4467 }, { "epoch": 0.019779538713533135, "grad_norm": 2.822436165922614, "learning_rate": 1.9779538713533135e-06, "loss": 1.0535, "step": 4468 }, { "epoch": 0.019783965646996325, "grad_norm": 2.6230777697349015, "learning_rate": 1.978396564699633e-06, "loss": 0.4286, "step": 4469 }, { "epoch": 0.019788392580459516, "grad_norm": 2.663195804367687, "learning_rate": 1.978839258045952e-06, "loss": 0.6439, "step": 4470 }, { "epoch": 0.019792819513922706, "grad_norm": 2.550769508403563, "learning_rate": 1.979281951392271e-06, "loss": 0.6545, "step": 4471 }, { "epoch": 0.019797246447385897, "grad_norm": 2.961622312422264, "learning_rate": 1.9797246447385898e-06, "loss": 0.834, "step": 4472 }, { "epoch": 0.019801673380849087, "grad_norm": 2.3647269046030477, "learning_rate": 1.9801673380849087e-06, "loss": 0.6249, "step": 4473 }, { "epoch": 0.019806100314312274, "grad_norm": 2.906545579069421, "learning_rate": 1.9806100314312277e-06, "loss": 0.8019, "step": 4474 }, { "epoch": 0.019810527247775465, "grad_norm": 2.863187027431324, "learning_rate": 1.981052724777547e-06, "loss": 0.9059, "step": 4475 }, { "epoch": 0.019814954181238655, "grad_norm": 3.0141863522431502, "learning_rate": 1.9814954181238655e-06, "loss": 0.8455, "step": 4476 }, { "epoch": 0.019819381114701846, "grad_norm": 2.744349415615406, "learning_rate": 1.9819381114701845e-06, "loss": 0.7644, "step": 4477 }, { "epoch": 0.019823808048165036, "grad_norm": 3.026394623789486, "learning_rate": 1.982380804816504e-06, "loss": 0.8364, "step": 4478 }, { "epoch": 0.019828234981628227, "grad_norm": 4.858582167663962, "learning_rate": 1.982823498162823e-06, "loss": 1.2239, "step": 4479 }, { "epoch": 0.019832661915091417, "grad_norm": 2.7530498104363037, "learning_rate": 1.9832661915091418e-06, "loss": 0.7875, "step": 4480 }, { "epoch": 0.019837088848554608, "grad_norm": 3.531101546287302, "learning_rate": 1.9837088848554607e-06, "loss": 1.0381, "step": 4481 }, { "epoch": 0.019841515782017795, "grad_norm": 2.60354210239609, "learning_rate": 1.9841515782017797e-06, "loss": 0.5702, "step": 4482 }, { "epoch": 0.019845942715480985, "grad_norm": 3.439704152863709, "learning_rate": 1.9845942715480986e-06, "loss": 0.9716, "step": 4483 }, { "epoch": 0.019850369648944176, "grad_norm": 2.996341732898985, "learning_rate": 1.985036964894418e-06, "loss": 0.6439, "step": 4484 }, { "epoch": 0.019854796582407366, "grad_norm": 2.7909786951687496, "learning_rate": 1.985479658240737e-06, "loss": 0.7395, "step": 4485 }, { "epoch": 0.019859223515870557, "grad_norm": 2.3126150983645206, "learning_rate": 1.985922351587056e-06, "loss": 0.5956, "step": 4486 }, { "epoch": 0.019863650449333747, "grad_norm": 2.689034754795183, "learning_rate": 1.986365044933375e-06, "loss": 1.0523, "step": 4487 }, { "epoch": 0.019868077382796938, "grad_norm": 2.908979945386624, "learning_rate": 1.9868077382796938e-06, "loss": 0.8469, "step": 4488 }, { "epoch": 0.019872504316260125, "grad_norm": 2.9424208807617065, "learning_rate": 1.9872504316260127e-06, "loss": 0.9758, "step": 4489 }, { "epoch": 0.019876931249723315, "grad_norm": 2.9124799214080963, "learning_rate": 1.987693124972332e-06, "loss": 0.5803, "step": 4490 }, { "epoch": 0.019881358183186506, "grad_norm": 2.526196895486222, "learning_rate": 1.988135818318651e-06, "loss": 0.4315, "step": 4491 }, { "epoch": 0.019885785116649696, "grad_norm": 3.1089069602174764, "learning_rate": 1.9885785116649695e-06, "loss": 0.8742, "step": 4492 }, { "epoch": 0.019890212050112887, "grad_norm": 2.937152438214036, "learning_rate": 1.989021205011289e-06, "loss": 0.9564, "step": 4493 }, { "epoch": 0.019894638983576077, "grad_norm": 2.7617582509217504, "learning_rate": 1.989463898357608e-06, "loss": 0.8553, "step": 4494 }, { "epoch": 0.019899065917039268, "grad_norm": 2.6762037624154065, "learning_rate": 1.989906591703927e-06, "loss": 0.7245, "step": 4495 }, { "epoch": 0.01990349285050246, "grad_norm": 3.6466914020957657, "learning_rate": 1.9903492850502458e-06, "loss": 0.8344, "step": 4496 }, { "epoch": 0.019907919783965645, "grad_norm": 3.159901766921339, "learning_rate": 1.9907919783965647e-06, "loss": 0.9011, "step": 4497 }, { "epoch": 0.019912346717428836, "grad_norm": 3.880246589705765, "learning_rate": 1.9912346717428837e-06, "loss": 0.7409, "step": 4498 }, { "epoch": 0.019916773650892027, "grad_norm": 2.75988567454038, "learning_rate": 1.991677365089203e-06, "loss": 0.6877, "step": 4499 }, { "epoch": 0.019921200584355217, "grad_norm": 2.682235247222495, "learning_rate": 1.992120058435522e-06, "loss": 1.0857, "step": 4500 }, { "epoch": 0.019925627517818408, "grad_norm": 4.190739481052816, "learning_rate": 1.992562751781841e-06, "loss": 1.0993, "step": 4501 }, { "epoch": 0.019930054451281598, "grad_norm": 2.8269350739579266, "learning_rate": 1.99300544512816e-06, "loss": 0.7147, "step": 4502 }, { "epoch": 0.01993448138474479, "grad_norm": 3.266328008891678, "learning_rate": 1.993448138474479e-06, "loss": 0.8801, "step": 4503 }, { "epoch": 0.01993890831820798, "grad_norm": 2.703383062594097, "learning_rate": 1.9938908318207978e-06, "loss": 0.7411, "step": 4504 }, { "epoch": 0.019943335251671166, "grad_norm": 3.391945795101353, "learning_rate": 1.994333525167117e-06, "loss": 1.0439, "step": 4505 }, { "epoch": 0.019947762185134357, "grad_norm": 3.4283503004377818, "learning_rate": 1.994776218513436e-06, "loss": 0.9588, "step": 4506 }, { "epoch": 0.019952189118597547, "grad_norm": 3.3844870228532935, "learning_rate": 1.9952189118597546e-06, "loss": 0.8623, "step": 4507 }, { "epoch": 0.019956616052060738, "grad_norm": 2.8076545451521953, "learning_rate": 1.995661605206074e-06, "loss": 0.6863, "step": 4508 }, { "epoch": 0.019961042985523928, "grad_norm": 3.4061169382057175, "learning_rate": 1.996104298552393e-06, "loss": 0.9706, "step": 4509 }, { "epoch": 0.01996546991898712, "grad_norm": 3.4801823604457693, "learning_rate": 1.996546991898712e-06, "loss": 0.5092, "step": 4510 }, { "epoch": 0.01996989685245031, "grad_norm": 3.159539095665815, "learning_rate": 1.996989685245031e-06, "loss": 1.048, "step": 4511 }, { "epoch": 0.019974323785913496, "grad_norm": 2.8772180852857137, "learning_rate": 1.9974323785913498e-06, "loss": 0.6641, "step": 4512 }, { "epoch": 0.019978750719376687, "grad_norm": 2.9217532448622388, "learning_rate": 1.9978750719376687e-06, "loss": 0.9297, "step": 4513 }, { "epoch": 0.019983177652839877, "grad_norm": 2.6866913377984045, "learning_rate": 1.998317765283988e-06, "loss": 0.5599, "step": 4514 }, { "epoch": 0.019987604586303068, "grad_norm": 3.8583955946882327, "learning_rate": 1.998760458630307e-06, "loss": 0.9405, "step": 4515 }, { "epoch": 0.019992031519766258, "grad_norm": 2.6984633410323644, "learning_rate": 1.999203151976626e-06, "loss": 0.765, "step": 4516 }, { "epoch": 0.01999645845322945, "grad_norm": 3.013009232078191, "learning_rate": 1.999645845322945e-06, "loss": 0.7752, "step": 4517 }, { "epoch": 0.02000088538669264, "grad_norm": 3.231759687671692, "learning_rate": 2.000088538669264e-06, "loss": 1.1083, "step": 4518 }, { "epoch": 0.02000531232015583, "grad_norm": 3.138463640169736, "learning_rate": 2.000531232015583e-06, "loss": 0.6061, "step": 4519 }, { "epoch": 0.020009739253619017, "grad_norm": 2.7363874641744643, "learning_rate": 2.000973925361902e-06, "loss": 0.8529, "step": 4520 }, { "epoch": 0.020014166187082207, "grad_norm": 2.6816991150974467, "learning_rate": 2.001416618708221e-06, "loss": 0.8401, "step": 4521 }, { "epoch": 0.020018593120545398, "grad_norm": 3.351593286274506, "learning_rate": 2.00185931205454e-06, "loss": 0.8545, "step": 4522 }, { "epoch": 0.02002302005400859, "grad_norm": 3.4822730304329337, "learning_rate": 2.002302005400859e-06, "loss": 1.0481, "step": 4523 }, { "epoch": 0.02002744698747178, "grad_norm": 2.591602490944211, "learning_rate": 2.002744698747178e-06, "loss": 0.8636, "step": 4524 }, { "epoch": 0.02003187392093497, "grad_norm": 2.828838144359925, "learning_rate": 2.003187392093497e-06, "loss": 0.668, "step": 4525 }, { "epoch": 0.02003630085439816, "grad_norm": 3.369124566511135, "learning_rate": 2.003630085439816e-06, "loss": 1.1367, "step": 4526 }, { "epoch": 0.020040727787861347, "grad_norm": 4.474268922427524, "learning_rate": 2.004072778786135e-06, "loss": 1.3916, "step": 4527 }, { "epoch": 0.020045154721324537, "grad_norm": 3.068278851938695, "learning_rate": 2.0045154721324538e-06, "loss": 1.01, "step": 4528 }, { "epoch": 0.020049581654787728, "grad_norm": 2.961803114286147, "learning_rate": 2.004958165478773e-06, "loss": 0.6322, "step": 4529 }, { "epoch": 0.02005400858825092, "grad_norm": 2.6551777807055057, "learning_rate": 2.005400858825092e-06, "loss": 0.7948, "step": 4530 }, { "epoch": 0.02005843552171411, "grad_norm": 2.900189972363904, "learning_rate": 2.005843552171411e-06, "loss": 0.5859, "step": 4531 }, { "epoch": 0.0200628624551773, "grad_norm": 2.5744549744203304, "learning_rate": 2.00628624551773e-06, "loss": 0.7146, "step": 4532 }, { "epoch": 0.02006728938864049, "grad_norm": 2.7840836810641756, "learning_rate": 2.006728938864049e-06, "loss": 0.8706, "step": 4533 }, { "epoch": 0.02007171632210368, "grad_norm": 2.9992176111794757, "learning_rate": 2.007171632210368e-06, "loss": 0.9087, "step": 4534 }, { "epoch": 0.020076143255566867, "grad_norm": 3.7876857484358193, "learning_rate": 2.0076143255566872e-06, "loss": 1.1295, "step": 4535 }, { "epoch": 0.020080570189030058, "grad_norm": 2.528707682545844, "learning_rate": 2.008057018903006e-06, "loss": 0.7812, "step": 4536 }, { "epoch": 0.02008499712249325, "grad_norm": 3.1398154238009184, "learning_rate": 2.008499712249325e-06, "loss": 0.7802, "step": 4537 }, { "epoch": 0.02008942405595644, "grad_norm": 3.3096981700107966, "learning_rate": 2.008942405595644e-06, "loss": 0.6711, "step": 4538 }, { "epoch": 0.02009385098941963, "grad_norm": 2.55528893204154, "learning_rate": 2.009385098941963e-06, "loss": 0.9039, "step": 4539 }, { "epoch": 0.02009827792288282, "grad_norm": 2.830437172912794, "learning_rate": 2.009827792288282e-06, "loss": 0.883, "step": 4540 }, { "epoch": 0.02010270485634601, "grad_norm": 2.825935359999058, "learning_rate": 2.0102704856346013e-06, "loss": 0.7654, "step": 4541 }, { "epoch": 0.020107131789809198, "grad_norm": 3.0310002206891227, "learning_rate": 2.01071317898092e-06, "loss": 0.5379, "step": 4542 }, { "epoch": 0.020111558723272388, "grad_norm": 3.1810775510140448, "learning_rate": 2.011155872327239e-06, "loss": 0.5924, "step": 4543 }, { "epoch": 0.02011598565673558, "grad_norm": 2.339671460011546, "learning_rate": 2.011598565673558e-06, "loss": 0.7302, "step": 4544 }, { "epoch": 0.02012041259019877, "grad_norm": 2.84093513137457, "learning_rate": 2.012041259019877e-06, "loss": 0.8884, "step": 4545 }, { "epoch": 0.02012483952366196, "grad_norm": 3.665841893665531, "learning_rate": 2.012483952366196e-06, "loss": 1.1825, "step": 4546 }, { "epoch": 0.02012926645712515, "grad_norm": 2.4967388499987844, "learning_rate": 2.012926645712515e-06, "loss": 0.7521, "step": 4547 }, { "epoch": 0.02013369339058834, "grad_norm": 2.8126294432473533, "learning_rate": 2.013369339058834e-06, "loss": 0.9949, "step": 4548 }, { "epoch": 0.02013812032405153, "grad_norm": 3.212380216401587, "learning_rate": 2.013812032405153e-06, "loss": 1.0953, "step": 4549 }, { "epoch": 0.020142547257514718, "grad_norm": 2.775662742862046, "learning_rate": 2.0142547257514723e-06, "loss": 0.7554, "step": 4550 }, { "epoch": 0.02014697419097791, "grad_norm": 2.9295205961506405, "learning_rate": 2.0146974190977912e-06, "loss": 0.6112, "step": 4551 }, { "epoch": 0.0201514011244411, "grad_norm": 3.1699042231188126, "learning_rate": 2.01514011244411e-06, "loss": 0.8919, "step": 4552 }, { "epoch": 0.02015582805790429, "grad_norm": 3.7507238342608837, "learning_rate": 2.015582805790429e-06, "loss": 0.8019, "step": 4553 }, { "epoch": 0.02016025499136748, "grad_norm": 3.294321065880847, "learning_rate": 2.016025499136748e-06, "loss": 0.7741, "step": 4554 }, { "epoch": 0.02016468192483067, "grad_norm": 2.732386051653706, "learning_rate": 2.016468192483067e-06, "loss": 0.5982, "step": 4555 }, { "epoch": 0.02016910885829386, "grad_norm": 4.378040765475408, "learning_rate": 2.0169108858293864e-06, "loss": 1.377, "step": 4556 }, { "epoch": 0.020173535791757048, "grad_norm": 2.883982617268843, "learning_rate": 2.017353579175705e-06, "loss": 0.9386, "step": 4557 }, { "epoch": 0.02017796272522024, "grad_norm": 3.7115938372036776, "learning_rate": 2.017796272522024e-06, "loss": 0.7969, "step": 4558 }, { "epoch": 0.02018238965868343, "grad_norm": 3.595835414892053, "learning_rate": 2.0182389658683432e-06, "loss": 1.1824, "step": 4559 }, { "epoch": 0.02018681659214662, "grad_norm": 3.565044917471168, "learning_rate": 2.018681659214662e-06, "loss": 0.9946, "step": 4560 }, { "epoch": 0.02019124352560981, "grad_norm": 2.61953707205859, "learning_rate": 2.019124352560981e-06, "loss": 0.8316, "step": 4561 }, { "epoch": 0.020195670459073, "grad_norm": 3.750567485887139, "learning_rate": 2.0195670459073e-06, "loss": 1.1254, "step": 4562 }, { "epoch": 0.02020009739253619, "grad_norm": 4.435491393077941, "learning_rate": 2.020009739253619e-06, "loss": 0.9976, "step": 4563 }, { "epoch": 0.020204524325999382, "grad_norm": 2.5432854673738747, "learning_rate": 2.020452432599938e-06, "loss": 0.3544, "step": 4564 }, { "epoch": 0.02020895125946257, "grad_norm": 2.557185699855796, "learning_rate": 2.0208951259462573e-06, "loss": 0.8169, "step": 4565 }, { "epoch": 0.02021337819292576, "grad_norm": 2.633320961258444, "learning_rate": 2.0213378192925763e-06, "loss": 0.7182, "step": 4566 }, { "epoch": 0.02021780512638895, "grad_norm": 3.1801417340108458, "learning_rate": 2.0217805126388952e-06, "loss": 1.0027, "step": 4567 }, { "epoch": 0.02022223205985214, "grad_norm": 3.7879329006640576, "learning_rate": 2.022223205985214e-06, "loss": 0.6465, "step": 4568 }, { "epoch": 0.02022665899331533, "grad_norm": 2.814036934449125, "learning_rate": 2.022665899331533e-06, "loss": 0.7091, "step": 4569 }, { "epoch": 0.02023108592677852, "grad_norm": 2.575729541416774, "learning_rate": 2.023108592677852e-06, "loss": 0.7895, "step": 4570 }, { "epoch": 0.020235512860241712, "grad_norm": 3.314358037385208, "learning_rate": 2.0235512860241715e-06, "loss": 0.9198, "step": 4571 }, { "epoch": 0.020239939793704902, "grad_norm": 2.9177349865006796, "learning_rate": 2.0239939793704904e-06, "loss": 0.4844, "step": 4572 }, { "epoch": 0.02024436672716809, "grad_norm": 2.567659171903688, "learning_rate": 2.024436672716809e-06, "loss": 0.6279, "step": 4573 }, { "epoch": 0.02024879366063128, "grad_norm": 3.2383465253741957, "learning_rate": 2.0248793660631283e-06, "loss": 0.8128, "step": 4574 }, { "epoch": 0.02025322059409447, "grad_norm": 2.5050635823812746, "learning_rate": 2.0253220594094472e-06, "loss": 0.5568, "step": 4575 }, { "epoch": 0.02025764752755766, "grad_norm": 2.7647313481914195, "learning_rate": 2.025764752755766e-06, "loss": 0.613, "step": 4576 }, { "epoch": 0.02026207446102085, "grad_norm": 2.729802495375313, "learning_rate": 2.026207446102085e-06, "loss": 0.7337, "step": 4577 }, { "epoch": 0.020266501394484042, "grad_norm": 2.730022945314114, "learning_rate": 2.026650139448404e-06, "loss": 0.6595, "step": 4578 }, { "epoch": 0.020270928327947232, "grad_norm": 3.2278523311135214, "learning_rate": 2.027092832794723e-06, "loss": 0.6453, "step": 4579 }, { "epoch": 0.02027535526141042, "grad_norm": 3.593985034560546, "learning_rate": 2.0275355261410424e-06, "loss": 1.1536, "step": 4580 }, { "epoch": 0.02027978219487361, "grad_norm": 3.271240004312197, "learning_rate": 2.0279782194873613e-06, "loss": 0.6506, "step": 4581 }, { "epoch": 0.0202842091283368, "grad_norm": 2.9442916626572293, "learning_rate": 2.0284209128336803e-06, "loss": 0.9077, "step": 4582 }, { "epoch": 0.02028863606179999, "grad_norm": 3.3175437584307987, "learning_rate": 2.0288636061799992e-06, "loss": 0.7096, "step": 4583 }, { "epoch": 0.02029306299526318, "grad_norm": 2.562468132580404, "learning_rate": 2.029306299526318e-06, "loss": 0.5597, "step": 4584 }, { "epoch": 0.020297489928726372, "grad_norm": 2.418294159906998, "learning_rate": 2.029748992872637e-06, "loss": 0.6031, "step": 4585 }, { "epoch": 0.020301916862189563, "grad_norm": 2.5823987540377424, "learning_rate": 2.0301916862189565e-06, "loss": 0.8378, "step": 4586 }, { "epoch": 0.020306343795652753, "grad_norm": 3.2793942521790083, "learning_rate": 2.0306343795652755e-06, "loss": 0.8004, "step": 4587 }, { "epoch": 0.02031077072911594, "grad_norm": 2.819982604001564, "learning_rate": 2.031077072911594e-06, "loss": 0.9654, "step": 4588 }, { "epoch": 0.02031519766257913, "grad_norm": 3.3086153906044498, "learning_rate": 2.0315197662579133e-06, "loss": 0.9889, "step": 4589 }, { "epoch": 0.02031962459604232, "grad_norm": 2.4759582283498385, "learning_rate": 2.0319624596042323e-06, "loss": 0.5781, "step": 4590 }, { "epoch": 0.02032405152950551, "grad_norm": 2.5143976688027445, "learning_rate": 2.0324051529505512e-06, "loss": 0.3685, "step": 4591 }, { "epoch": 0.020328478462968702, "grad_norm": 3.413773641441334, "learning_rate": 2.03284784629687e-06, "loss": 0.7714, "step": 4592 }, { "epoch": 0.020332905396431893, "grad_norm": 2.5238909976660553, "learning_rate": 2.033290539643189e-06, "loss": 0.8854, "step": 4593 }, { "epoch": 0.020337332329895083, "grad_norm": 2.6141007413324817, "learning_rate": 2.033733232989508e-06, "loss": 0.824, "step": 4594 }, { "epoch": 0.02034175926335827, "grad_norm": 2.592358573991603, "learning_rate": 2.0341759263358275e-06, "loss": 0.6835, "step": 4595 }, { "epoch": 0.02034618619682146, "grad_norm": 2.874750480056578, "learning_rate": 2.0346186196821464e-06, "loss": 0.4972, "step": 4596 }, { "epoch": 0.02035061313028465, "grad_norm": 2.490879509722716, "learning_rate": 2.0350613130284653e-06, "loss": 0.7706, "step": 4597 }, { "epoch": 0.02035504006374784, "grad_norm": 3.3177586858093933, "learning_rate": 2.0355040063747843e-06, "loss": 0.873, "step": 4598 }, { "epoch": 0.020359466997211032, "grad_norm": 2.9581623077866546, "learning_rate": 2.0359466997211032e-06, "loss": 0.9354, "step": 4599 }, { "epoch": 0.020363893930674223, "grad_norm": 2.617587668365156, "learning_rate": 2.036389393067422e-06, "loss": 0.6698, "step": 4600 }, { "epoch": 0.020368320864137413, "grad_norm": 2.9266130788197113, "learning_rate": 2.0368320864137416e-06, "loss": 1.0818, "step": 4601 }, { "epoch": 0.020372747797600604, "grad_norm": 4.627226109631649, "learning_rate": 2.0372747797600605e-06, "loss": 1.1869, "step": 4602 }, { "epoch": 0.02037717473106379, "grad_norm": 3.3073685029772286, "learning_rate": 2.037717473106379e-06, "loss": 1.1028, "step": 4603 }, { "epoch": 0.02038160166452698, "grad_norm": 3.4614993465257284, "learning_rate": 2.0381601664526984e-06, "loss": 0.9914, "step": 4604 }, { "epoch": 0.020386028597990172, "grad_norm": 2.7120750504887727, "learning_rate": 2.0386028597990173e-06, "loss": 0.5893, "step": 4605 }, { "epoch": 0.020390455531453362, "grad_norm": 4.33168669961742, "learning_rate": 2.0390455531453363e-06, "loss": 1.3299, "step": 4606 }, { "epoch": 0.020394882464916553, "grad_norm": 3.0145257670077155, "learning_rate": 2.0394882464916552e-06, "loss": 0.5819, "step": 4607 }, { "epoch": 0.020399309398379743, "grad_norm": 2.6996615594271947, "learning_rate": 2.039930939837974e-06, "loss": 0.9105, "step": 4608 }, { "epoch": 0.020403736331842934, "grad_norm": 3.752119285754552, "learning_rate": 2.0403736331842936e-06, "loss": 1.1053, "step": 4609 }, { "epoch": 0.02040816326530612, "grad_norm": 2.7334215498522436, "learning_rate": 2.0408163265306125e-06, "loss": 0.6774, "step": 4610 }, { "epoch": 0.02041259019876931, "grad_norm": 2.9625699580160254, "learning_rate": 2.0412590198769315e-06, "loss": 0.8501, "step": 4611 }, { "epoch": 0.020417017132232502, "grad_norm": 3.2314704361436197, "learning_rate": 2.0417017132232504e-06, "loss": 0.7039, "step": 4612 }, { "epoch": 0.020421444065695692, "grad_norm": 2.5724748983391232, "learning_rate": 2.0421444065695693e-06, "loss": 0.852, "step": 4613 }, { "epoch": 0.020425870999158883, "grad_norm": 2.7199702433085324, "learning_rate": 2.0425870999158883e-06, "loss": 0.9509, "step": 4614 }, { "epoch": 0.020430297932622073, "grad_norm": 2.7107379942863394, "learning_rate": 2.0430297932622077e-06, "loss": 0.678, "step": 4615 }, { "epoch": 0.020434724866085264, "grad_norm": 2.715605841537059, "learning_rate": 2.0434724866085266e-06, "loss": 0.8368, "step": 4616 }, { "epoch": 0.020439151799548454, "grad_norm": 2.821081961162029, "learning_rate": 2.0439151799548456e-06, "loss": 0.7111, "step": 4617 }, { "epoch": 0.02044357873301164, "grad_norm": 2.864353306064132, "learning_rate": 2.0443578733011645e-06, "loss": 0.5354, "step": 4618 }, { "epoch": 0.020448005666474832, "grad_norm": 3.396383205905001, "learning_rate": 2.0448005666474835e-06, "loss": 0.9919, "step": 4619 }, { "epoch": 0.020452432599938022, "grad_norm": 3.615757193544559, "learning_rate": 2.0452432599938024e-06, "loss": 0.9293, "step": 4620 }, { "epoch": 0.020456859533401213, "grad_norm": 3.859876747419594, "learning_rate": 2.0456859533401218e-06, "loss": 1.3218, "step": 4621 }, { "epoch": 0.020461286466864403, "grad_norm": 3.0254798314821136, "learning_rate": 2.0461286466864407e-06, "loss": 0.8488, "step": 4622 }, { "epoch": 0.020465713400327594, "grad_norm": 3.222606097661817, "learning_rate": 2.0465713400327592e-06, "loss": 0.9213, "step": 4623 }, { "epoch": 0.020470140333790784, "grad_norm": 2.954486755485601, "learning_rate": 2.0470140333790786e-06, "loss": 0.5429, "step": 4624 }, { "epoch": 0.02047456726725397, "grad_norm": 2.8572563902395873, "learning_rate": 2.0474567267253976e-06, "loss": 0.7031, "step": 4625 }, { "epoch": 0.020478994200717162, "grad_norm": 3.8932663400444607, "learning_rate": 2.0478994200717165e-06, "loss": 1.2196, "step": 4626 }, { "epoch": 0.020483421134180353, "grad_norm": 3.519266764882402, "learning_rate": 2.0483421134180355e-06, "loss": 0.914, "step": 4627 }, { "epoch": 0.020487848067643543, "grad_norm": 2.7003933133070586, "learning_rate": 2.0487848067643544e-06, "loss": 0.7002, "step": 4628 }, { "epoch": 0.020492275001106734, "grad_norm": 3.305937412215715, "learning_rate": 2.0492275001106733e-06, "loss": 1.2001, "step": 4629 }, { "epoch": 0.020496701934569924, "grad_norm": 2.681437372272191, "learning_rate": 2.0496701934569927e-06, "loss": 0.8472, "step": 4630 }, { "epoch": 0.020501128868033115, "grad_norm": 3.2957557239142603, "learning_rate": 2.0501128868033117e-06, "loss": 0.817, "step": 4631 }, { "epoch": 0.020505555801496305, "grad_norm": 2.634034974381627, "learning_rate": 2.0505555801496306e-06, "loss": 0.6811, "step": 4632 }, { "epoch": 0.020509982734959492, "grad_norm": 3.2679173373891297, "learning_rate": 2.0509982734959496e-06, "loss": 1.1356, "step": 4633 }, { "epoch": 0.020514409668422683, "grad_norm": 2.8413499539652314, "learning_rate": 2.0514409668422685e-06, "loss": 0.9327, "step": 4634 }, { "epoch": 0.020518836601885873, "grad_norm": 3.296999058029905, "learning_rate": 2.0518836601885875e-06, "loss": 1.108, "step": 4635 }, { "epoch": 0.020523263535349064, "grad_norm": 3.1674908506532296, "learning_rate": 2.052326353534907e-06, "loss": 1.0062, "step": 4636 }, { "epoch": 0.020527690468812254, "grad_norm": 3.0203977761710856, "learning_rate": 2.0527690468812258e-06, "loss": 0.8132, "step": 4637 }, { "epoch": 0.020532117402275445, "grad_norm": 3.0048307184828236, "learning_rate": 2.0532117402275443e-06, "loss": 0.672, "step": 4638 }, { "epoch": 0.020536544335738635, "grad_norm": 2.5446203164264283, "learning_rate": 2.0536544335738637e-06, "loss": 0.905, "step": 4639 }, { "epoch": 0.020540971269201822, "grad_norm": 2.937597745072611, "learning_rate": 2.0540971269201826e-06, "loss": 0.7073, "step": 4640 }, { "epoch": 0.020545398202665013, "grad_norm": 2.9611234705374243, "learning_rate": 2.0545398202665016e-06, "loss": 0.7754, "step": 4641 }, { "epoch": 0.020549825136128203, "grad_norm": 2.2672262705079684, "learning_rate": 2.0549825136128205e-06, "loss": 0.621, "step": 4642 }, { "epoch": 0.020554252069591394, "grad_norm": 2.921733039111175, "learning_rate": 2.0554252069591395e-06, "loss": 0.6894, "step": 4643 }, { "epoch": 0.020558679003054584, "grad_norm": 2.5331054014800647, "learning_rate": 2.0558679003054584e-06, "loss": 0.4909, "step": 4644 }, { "epoch": 0.020563105936517775, "grad_norm": 3.2105470741810023, "learning_rate": 2.0563105936517778e-06, "loss": 0.9575, "step": 4645 }, { "epoch": 0.020567532869980965, "grad_norm": 3.1512510321554807, "learning_rate": 2.0567532869980967e-06, "loss": 1.0472, "step": 4646 }, { "epoch": 0.020571959803444156, "grad_norm": 2.5429510447713213, "learning_rate": 2.0571959803444157e-06, "loss": 1.0012, "step": 4647 }, { "epoch": 0.020576386736907343, "grad_norm": 4.636747617810149, "learning_rate": 2.0576386736907346e-06, "loss": 1.2423, "step": 4648 }, { "epoch": 0.020580813670370533, "grad_norm": 3.24272198840294, "learning_rate": 2.0580813670370536e-06, "loss": 0.7487, "step": 4649 }, { "epoch": 0.020585240603833724, "grad_norm": 2.8463433736266315, "learning_rate": 2.0585240603833725e-06, "loss": 0.6273, "step": 4650 }, { "epoch": 0.020589667537296914, "grad_norm": 2.8350822162418936, "learning_rate": 2.058966753729692e-06, "loss": 0.8204, "step": 4651 }, { "epoch": 0.020594094470760105, "grad_norm": 3.098450950341693, "learning_rate": 2.059409447076011e-06, "loss": 0.6573, "step": 4652 }, { "epoch": 0.020598521404223295, "grad_norm": 2.7058738688102513, "learning_rate": 2.0598521404223293e-06, "loss": 0.8471, "step": 4653 }, { "epoch": 0.020602948337686486, "grad_norm": 3.1990314161231397, "learning_rate": 2.0602948337686487e-06, "loss": 0.9903, "step": 4654 }, { "epoch": 0.020607375271149676, "grad_norm": 2.5107213566330544, "learning_rate": 2.0607375271149677e-06, "loss": 0.6472, "step": 4655 }, { "epoch": 0.020611802204612863, "grad_norm": 3.608081537516346, "learning_rate": 2.0611802204612866e-06, "loss": 1.1222, "step": 4656 }, { "epoch": 0.020616229138076054, "grad_norm": 2.4413036012470615, "learning_rate": 2.0616229138076056e-06, "loss": 0.6033, "step": 4657 }, { "epoch": 0.020620656071539244, "grad_norm": 2.5726552720390496, "learning_rate": 2.0620656071539245e-06, "loss": 0.8058, "step": 4658 }, { "epoch": 0.020625083005002435, "grad_norm": 2.523925754593306, "learning_rate": 2.0625083005002435e-06, "loss": 0.4725, "step": 4659 }, { "epoch": 0.020629509938465625, "grad_norm": 2.5901417806311384, "learning_rate": 2.062950993846563e-06, "loss": 0.6479, "step": 4660 }, { "epoch": 0.020633936871928816, "grad_norm": 2.947741753655185, "learning_rate": 2.0633936871928818e-06, "loss": 0.9043, "step": 4661 }, { "epoch": 0.020638363805392006, "grad_norm": 3.1987122834641935, "learning_rate": 2.0638363805392007e-06, "loss": 0.6216, "step": 4662 }, { "epoch": 0.020642790738855193, "grad_norm": 2.724645482902487, "learning_rate": 2.0642790738855197e-06, "loss": 0.6488, "step": 4663 }, { "epoch": 0.020647217672318384, "grad_norm": 3.22200602942612, "learning_rate": 2.0647217672318386e-06, "loss": 0.5497, "step": 4664 }, { "epoch": 0.020651644605781574, "grad_norm": 2.6707563087140636, "learning_rate": 2.0651644605781576e-06, "loss": 0.8188, "step": 4665 }, { "epoch": 0.020656071539244765, "grad_norm": 3.7424684885294157, "learning_rate": 2.065607153924477e-06, "loss": 1.0506, "step": 4666 }, { "epoch": 0.020660498472707955, "grad_norm": 3.0535867801055665, "learning_rate": 2.066049847270796e-06, "loss": 0.8238, "step": 4667 }, { "epoch": 0.020664925406171146, "grad_norm": 4.219569378938682, "learning_rate": 2.066492540617115e-06, "loss": 1.2456, "step": 4668 }, { "epoch": 0.020669352339634336, "grad_norm": 3.1134911494025426, "learning_rate": 2.0669352339634338e-06, "loss": 0.9983, "step": 4669 }, { "epoch": 0.020673779273097527, "grad_norm": 2.805993834209374, "learning_rate": 2.0673779273097527e-06, "loss": 0.8058, "step": 4670 }, { "epoch": 0.020678206206560714, "grad_norm": 3.1094553369868323, "learning_rate": 2.0678206206560717e-06, "loss": 0.8744, "step": 4671 }, { "epoch": 0.020682633140023905, "grad_norm": 2.875644457456882, "learning_rate": 2.068263314002391e-06, "loss": 0.7932, "step": 4672 }, { "epoch": 0.020687060073487095, "grad_norm": 2.475034198521194, "learning_rate": 2.0687060073487096e-06, "loss": 0.6013, "step": 4673 }, { "epoch": 0.020691487006950286, "grad_norm": 2.7267841238797157, "learning_rate": 2.0691487006950285e-06, "loss": 0.8319, "step": 4674 }, { "epoch": 0.020695913940413476, "grad_norm": 2.488468445305372, "learning_rate": 2.069591394041348e-06, "loss": 0.8408, "step": 4675 }, { "epoch": 0.020700340873876667, "grad_norm": 3.271629449498727, "learning_rate": 2.070034087387667e-06, "loss": 0.8853, "step": 4676 }, { "epoch": 0.020704767807339857, "grad_norm": 2.979869962309571, "learning_rate": 2.0704767807339858e-06, "loss": 0.8013, "step": 4677 }, { "epoch": 0.020709194740803044, "grad_norm": 2.5991640052293676, "learning_rate": 2.0709194740803047e-06, "loss": 0.6131, "step": 4678 }, { "epoch": 0.020713621674266235, "grad_norm": 3.9106856923397233, "learning_rate": 2.0713621674266237e-06, "loss": 1.4502, "step": 4679 }, { "epoch": 0.020718048607729425, "grad_norm": 2.645677674292685, "learning_rate": 2.0718048607729426e-06, "loss": 0.6886, "step": 4680 }, { "epoch": 0.020722475541192616, "grad_norm": 2.548858816585673, "learning_rate": 2.072247554119262e-06, "loss": 0.7897, "step": 4681 }, { "epoch": 0.020726902474655806, "grad_norm": 3.495485175726434, "learning_rate": 2.072690247465581e-06, "loss": 0.5912, "step": 4682 }, { "epoch": 0.020731329408118997, "grad_norm": 2.7341865056930077, "learning_rate": 2.0731329408119e-06, "loss": 0.7155, "step": 4683 }, { "epoch": 0.020735756341582187, "grad_norm": 3.482656289690899, "learning_rate": 2.073575634158219e-06, "loss": 0.9317, "step": 4684 }, { "epoch": 0.020740183275045378, "grad_norm": 3.087732802782592, "learning_rate": 2.0740183275045378e-06, "loss": 0.9299, "step": 4685 }, { "epoch": 0.020744610208508565, "grad_norm": 3.231462490193393, "learning_rate": 2.0744610208508567e-06, "loss": 0.9544, "step": 4686 }, { "epoch": 0.020749037141971755, "grad_norm": 2.456106393933518, "learning_rate": 2.074903714197176e-06, "loss": 0.8272, "step": 4687 }, { "epoch": 0.020753464075434946, "grad_norm": 3.1092124590186203, "learning_rate": 2.0753464075434946e-06, "loss": 0.9505, "step": 4688 }, { "epoch": 0.020757891008898136, "grad_norm": 2.9261164428222615, "learning_rate": 2.0757891008898136e-06, "loss": 0.7054, "step": 4689 }, { "epoch": 0.020762317942361327, "grad_norm": 3.7072217196072814, "learning_rate": 2.076231794236133e-06, "loss": 0.7267, "step": 4690 }, { "epoch": 0.020766744875824517, "grad_norm": 2.873186255044963, "learning_rate": 2.076674487582452e-06, "loss": 0.9811, "step": 4691 }, { "epoch": 0.020771171809287708, "grad_norm": 2.8915793264275966, "learning_rate": 2.077117180928771e-06, "loss": 0.6246, "step": 4692 }, { "epoch": 0.020775598742750895, "grad_norm": 3.7266746764021166, "learning_rate": 2.0775598742750898e-06, "loss": 1.122, "step": 4693 }, { "epoch": 0.020780025676214085, "grad_norm": 3.0554105834187677, "learning_rate": 2.0780025676214087e-06, "loss": 0.891, "step": 4694 }, { "epoch": 0.020784452609677276, "grad_norm": 3.3148261905275027, "learning_rate": 2.0784452609677277e-06, "loss": 0.9652, "step": 4695 }, { "epoch": 0.020788879543140466, "grad_norm": 2.5763435585589094, "learning_rate": 2.078887954314047e-06, "loss": 0.7391, "step": 4696 }, { "epoch": 0.020793306476603657, "grad_norm": 3.3272561907283063, "learning_rate": 2.079330647660366e-06, "loss": 0.9409, "step": 4697 }, { "epoch": 0.020797733410066847, "grad_norm": 3.664139962737659, "learning_rate": 2.079773341006685e-06, "loss": 0.9736, "step": 4698 }, { "epoch": 0.020802160343530038, "grad_norm": 3.5254144650427075, "learning_rate": 2.080216034353004e-06, "loss": 1.1066, "step": 4699 }, { "epoch": 0.02080658727699323, "grad_norm": 2.996862273997893, "learning_rate": 2.080658727699323e-06, "loss": 0.9069, "step": 4700 }, { "epoch": 0.020811014210456415, "grad_norm": 2.447513331377893, "learning_rate": 2.0811014210456418e-06, "loss": 0.7333, "step": 4701 }, { "epoch": 0.020815441143919606, "grad_norm": 2.5773214126308526, "learning_rate": 2.081544114391961e-06, "loss": 0.842, "step": 4702 }, { "epoch": 0.020819868077382796, "grad_norm": 3.8051645984283753, "learning_rate": 2.0819868077382797e-06, "loss": 0.9893, "step": 4703 }, { "epoch": 0.020824295010845987, "grad_norm": 2.6138382954128607, "learning_rate": 2.0824295010845986e-06, "loss": 0.8494, "step": 4704 }, { "epoch": 0.020828721944309177, "grad_norm": 2.9109205289011846, "learning_rate": 2.082872194430918e-06, "loss": 0.8505, "step": 4705 }, { "epoch": 0.020833148877772368, "grad_norm": 2.8941307098647573, "learning_rate": 2.083314887777237e-06, "loss": 0.7403, "step": 4706 }, { "epoch": 0.02083757581123556, "grad_norm": 3.5582127334275606, "learning_rate": 2.083757581123556e-06, "loss": 0.8336, "step": 4707 }, { "epoch": 0.020842002744698745, "grad_norm": 2.5545704991182503, "learning_rate": 2.084200274469875e-06, "loss": 0.7626, "step": 4708 }, { "epoch": 0.020846429678161936, "grad_norm": 2.8753229221139294, "learning_rate": 2.0846429678161938e-06, "loss": 0.7714, "step": 4709 }, { "epoch": 0.020850856611625126, "grad_norm": 4.626487486280563, "learning_rate": 2.0850856611625127e-06, "loss": 1.01, "step": 4710 }, { "epoch": 0.020855283545088317, "grad_norm": 3.248287818787261, "learning_rate": 2.085528354508832e-06, "loss": 1.0672, "step": 4711 }, { "epoch": 0.020859710478551508, "grad_norm": 2.5971045509443873, "learning_rate": 2.085971047855151e-06, "loss": 0.6092, "step": 4712 }, { "epoch": 0.020864137412014698, "grad_norm": 3.4495865990728594, "learning_rate": 2.08641374120147e-06, "loss": 1.087, "step": 4713 }, { "epoch": 0.02086856434547789, "grad_norm": 3.9737008849374824, "learning_rate": 2.086856434547789e-06, "loss": 1.043, "step": 4714 }, { "epoch": 0.02087299127894108, "grad_norm": 3.6547163854609446, "learning_rate": 2.087299127894108e-06, "loss": 0.9087, "step": 4715 }, { "epoch": 0.020877418212404266, "grad_norm": 3.8446132034507987, "learning_rate": 2.087741821240427e-06, "loss": 0.9876, "step": 4716 }, { "epoch": 0.020881845145867457, "grad_norm": 3.5671197107113084, "learning_rate": 2.088184514586746e-06, "loss": 0.701, "step": 4717 }, { "epoch": 0.020886272079330647, "grad_norm": 2.636975200420914, "learning_rate": 2.088627207933065e-06, "loss": 0.6706, "step": 4718 }, { "epoch": 0.020890699012793838, "grad_norm": 3.298439425700491, "learning_rate": 2.0890699012793837e-06, "loss": 0.7564, "step": 4719 }, { "epoch": 0.020895125946257028, "grad_norm": 4.212614720083829, "learning_rate": 2.089512594625703e-06, "loss": 0.9724, "step": 4720 }, { "epoch": 0.02089955287972022, "grad_norm": 2.577982099784352, "learning_rate": 2.089955287972022e-06, "loss": 0.6833, "step": 4721 }, { "epoch": 0.02090397981318341, "grad_norm": 2.876355261826741, "learning_rate": 2.090397981318341e-06, "loss": 0.7717, "step": 4722 }, { "epoch": 0.0209084067466466, "grad_norm": 4.595106953108873, "learning_rate": 2.09084067466466e-06, "loss": 1.5381, "step": 4723 }, { "epoch": 0.020912833680109787, "grad_norm": 3.2376042322196015, "learning_rate": 2.091283368010979e-06, "loss": 0.4047, "step": 4724 }, { "epoch": 0.020917260613572977, "grad_norm": 3.434789181161734, "learning_rate": 2.0917260613572978e-06, "loss": 0.9089, "step": 4725 }, { "epoch": 0.020921687547036168, "grad_norm": 2.906448873215583, "learning_rate": 2.092168754703617e-06, "loss": 0.6619, "step": 4726 }, { "epoch": 0.020926114480499358, "grad_norm": 2.7320388785050795, "learning_rate": 2.092611448049936e-06, "loss": 0.7737, "step": 4727 }, { "epoch": 0.02093054141396255, "grad_norm": 2.5060342940537734, "learning_rate": 2.093054141396255e-06, "loss": 0.7255, "step": 4728 }, { "epoch": 0.02093496834742574, "grad_norm": 3.4412211268177866, "learning_rate": 2.093496834742574e-06, "loss": 1.212, "step": 4729 }, { "epoch": 0.02093939528088893, "grad_norm": 3.1574822806980767, "learning_rate": 2.093939528088893e-06, "loss": 1.0223, "step": 4730 }, { "epoch": 0.020943822214352117, "grad_norm": 2.928797108662572, "learning_rate": 2.094382221435212e-06, "loss": 0.9712, "step": 4731 }, { "epoch": 0.020948249147815307, "grad_norm": 3.745649642736777, "learning_rate": 2.0948249147815313e-06, "loss": 1.0098, "step": 4732 }, { "epoch": 0.020952676081278498, "grad_norm": 3.1194941950752315, "learning_rate": 2.09526760812785e-06, "loss": 0.5193, "step": 4733 }, { "epoch": 0.02095710301474169, "grad_norm": 3.1686286247553532, "learning_rate": 2.0957103014741687e-06, "loss": 0.529, "step": 4734 }, { "epoch": 0.02096152994820488, "grad_norm": 4.127416350857117, "learning_rate": 2.096152994820488e-06, "loss": 0.932, "step": 4735 }, { "epoch": 0.02096595688166807, "grad_norm": 2.639982400378178, "learning_rate": 2.096595688166807e-06, "loss": 0.755, "step": 4736 }, { "epoch": 0.02097038381513126, "grad_norm": 2.530027969753604, "learning_rate": 2.097038381513126e-06, "loss": 0.7274, "step": 4737 }, { "epoch": 0.02097481074859445, "grad_norm": 3.030060889394552, "learning_rate": 2.097481074859445e-06, "loss": 0.6974, "step": 4738 }, { "epoch": 0.020979237682057637, "grad_norm": 2.9670765740065095, "learning_rate": 2.097923768205764e-06, "loss": 0.814, "step": 4739 }, { "epoch": 0.020983664615520828, "grad_norm": 2.7455770803075836, "learning_rate": 2.098366461552083e-06, "loss": 0.3873, "step": 4740 }, { "epoch": 0.02098809154898402, "grad_norm": 3.0048651985383725, "learning_rate": 2.098809154898402e-06, "loss": 0.9589, "step": 4741 }, { "epoch": 0.02099251848244721, "grad_norm": 3.0637523403789206, "learning_rate": 2.099251848244721e-06, "loss": 0.8822, "step": 4742 }, { "epoch": 0.0209969454159104, "grad_norm": 3.083925811868094, "learning_rate": 2.09969454159104e-06, "loss": 0.9889, "step": 4743 }, { "epoch": 0.02100137234937359, "grad_norm": 3.1667081010431417, "learning_rate": 2.100137234937359e-06, "loss": 0.8592, "step": 4744 }, { "epoch": 0.02100579928283678, "grad_norm": 2.609534413952617, "learning_rate": 2.100579928283678e-06, "loss": 0.5069, "step": 4745 }, { "epoch": 0.021010226216299967, "grad_norm": 3.03531599606542, "learning_rate": 2.101022621629997e-06, "loss": 0.912, "step": 4746 }, { "epoch": 0.021014653149763158, "grad_norm": 2.8987438117537905, "learning_rate": 2.1014653149763163e-06, "loss": 0.8054, "step": 4747 }, { "epoch": 0.02101908008322635, "grad_norm": 2.4192075175554013, "learning_rate": 2.1019080083226353e-06, "loss": 0.8615, "step": 4748 }, { "epoch": 0.02102350701668954, "grad_norm": 2.9179926805281005, "learning_rate": 2.102350701668954e-06, "loss": 0.9927, "step": 4749 }, { "epoch": 0.02102793395015273, "grad_norm": 2.868454869646188, "learning_rate": 2.102793395015273e-06, "loss": 0.8285, "step": 4750 }, { "epoch": 0.02103236088361592, "grad_norm": 3.2542747649107646, "learning_rate": 2.103236088361592e-06, "loss": 1.0598, "step": 4751 }, { "epoch": 0.02103678781707911, "grad_norm": 2.8504803537054606, "learning_rate": 2.103678781707911e-06, "loss": 1.0512, "step": 4752 }, { "epoch": 0.0210412147505423, "grad_norm": 3.331948546000464, "learning_rate": 2.1041214750542304e-06, "loss": 0.8902, "step": 4753 }, { "epoch": 0.021045641684005488, "grad_norm": 3.1152872585615112, "learning_rate": 2.104564168400549e-06, "loss": 0.6031, "step": 4754 }, { "epoch": 0.02105006861746868, "grad_norm": 3.5249420904919977, "learning_rate": 2.105006861746868e-06, "loss": 1.2599, "step": 4755 }, { "epoch": 0.02105449555093187, "grad_norm": 2.7843299610481265, "learning_rate": 2.1054495550931873e-06, "loss": 0.8782, "step": 4756 }, { "epoch": 0.02105892248439506, "grad_norm": 3.6390827458628765, "learning_rate": 2.105892248439506e-06, "loss": 1.1045, "step": 4757 }, { "epoch": 0.02106334941785825, "grad_norm": 2.643221406877179, "learning_rate": 2.106334941785825e-06, "loss": 0.8677, "step": 4758 }, { "epoch": 0.02106777635132144, "grad_norm": 2.5840995622012515, "learning_rate": 2.106777635132144e-06, "loss": 0.9497, "step": 4759 }, { "epoch": 0.02107220328478463, "grad_norm": 3.135907540413849, "learning_rate": 2.107220328478463e-06, "loss": 0.9937, "step": 4760 }, { "epoch": 0.021076630218247818, "grad_norm": 3.4287848740949696, "learning_rate": 2.107663021824782e-06, "loss": 0.7143, "step": 4761 }, { "epoch": 0.02108105715171101, "grad_norm": 2.5687603643761, "learning_rate": 2.1081057151711014e-06, "loss": 0.5274, "step": 4762 }, { "epoch": 0.0210854840851742, "grad_norm": 3.203466629201951, "learning_rate": 2.1085484085174203e-06, "loss": 0.6477, "step": 4763 }, { "epoch": 0.02108991101863739, "grad_norm": 2.878554020557315, "learning_rate": 2.1089911018637393e-06, "loss": 0.5533, "step": 4764 }, { "epoch": 0.02109433795210058, "grad_norm": 2.8378155569655794, "learning_rate": 2.109433795210058e-06, "loss": 0.6415, "step": 4765 }, { "epoch": 0.02109876488556377, "grad_norm": 2.736030217303956, "learning_rate": 2.109876488556377e-06, "loss": 0.6817, "step": 4766 }, { "epoch": 0.02110319181902696, "grad_norm": 3.5239580337165672, "learning_rate": 2.110319181902696e-06, "loss": 0.9364, "step": 4767 }, { "epoch": 0.02110761875249015, "grad_norm": 3.48424113687236, "learning_rate": 2.1107618752490155e-06, "loss": 0.6359, "step": 4768 }, { "epoch": 0.02111204568595334, "grad_norm": 2.7505616960897807, "learning_rate": 2.111204568595334e-06, "loss": 0.6477, "step": 4769 }, { "epoch": 0.02111647261941653, "grad_norm": 3.629266196799732, "learning_rate": 2.111647261941653e-06, "loss": 1.0527, "step": 4770 }, { "epoch": 0.02112089955287972, "grad_norm": 3.0383653714212664, "learning_rate": 2.1120899552879723e-06, "loss": 1.029, "step": 4771 }, { "epoch": 0.02112532648634291, "grad_norm": 2.7537379868754654, "learning_rate": 2.1125326486342913e-06, "loss": 1.0364, "step": 4772 }, { "epoch": 0.0211297534198061, "grad_norm": 3.028715482169117, "learning_rate": 2.11297534198061e-06, "loss": 0.9798, "step": 4773 }, { "epoch": 0.02113418035326929, "grad_norm": 4.060121359581325, "learning_rate": 2.113418035326929e-06, "loss": 1.1543, "step": 4774 }, { "epoch": 0.021138607286732482, "grad_norm": 2.69116342078236, "learning_rate": 2.113860728673248e-06, "loss": 0.6048, "step": 4775 }, { "epoch": 0.02114303422019567, "grad_norm": 3.6593152934456556, "learning_rate": 2.114303422019567e-06, "loss": 0.9235, "step": 4776 }, { "epoch": 0.02114746115365886, "grad_norm": 2.896285396424255, "learning_rate": 2.1147461153658864e-06, "loss": 0.748, "step": 4777 }, { "epoch": 0.02115188808712205, "grad_norm": 2.5621673854096216, "learning_rate": 2.1151888087122054e-06, "loss": 0.8115, "step": 4778 }, { "epoch": 0.02115631502058524, "grad_norm": 3.0166420349118663, "learning_rate": 2.1156315020585243e-06, "loss": 0.7647, "step": 4779 }, { "epoch": 0.02116074195404843, "grad_norm": 3.7559434314393685, "learning_rate": 2.1160741954048433e-06, "loss": 1.0512, "step": 4780 }, { "epoch": 0.02116516888751162, "grad_norm": 2.758106590401377, "learning_rate": 2.116516888751162e-06, "loss": 0.7637, "step": 4781 }, { "epoch": 0.021169595820974812, "grad_norm": 4.543132596042578, "learning_rate": 2.116959582097481e-06, "loss": 1.4613, "step": 4782 }, { "epoch": 0.021174022754438002, "grad_norm": 3.314946600183015, "learning_rate": 2.1174022754438005e-06, "loss": 0.7299, "step": 4783 }, { "epoch": 0.02117844968790119, "grad_norm": 3.555125455046262, "learning_rate": 2.117844968790119e-06, "loss": 0.8283, "step": 4784 }, { "epoch": 0.02118287662136438, "grad_norm": 2.472077458316079, "learning_rate": 2.118287662136438e-06, "loss": 0.7651, "step": 4785 }, { "epoch": 0.02118730355482757, "grad_norm": 2.5183029224903173, "learning_rate": 2.1187303554827574e-06, "loss": 0.7223, "step": 4786 }, { "epoch": 0.02119173048829076, "grad_norm": 3.9670636420472514, "learning_rate": 2.1191730488290763e-06, "loss": 1.1289, "step": 4787 }, { "epoch": 0.02119615742175395, "grad_norm": 2.6225044286894, "learning_rate": 2.1196157421753953e-06, "loss": 0.6942, "step": 4788 }, { "epoch": 0.021200584355217142, "grad_norm": 3.4705324150228134, "learning_rate": 2.120058435521714e-06, "loss": 0.9125, "step": 4789 }, { "epoch": 0.021205011288680332, "grad_norm": 2.4629484671495936, "learning_rate": 2.120501128868033e-06, "loss": 0.5559, "step": 4790 }, { "epoch": 0.02120943822214352, "grad_norm": 2.7304084920239933, "learning_rate": 2.120943822214352e-06, "loss": 0.7573, "step": 4791 }, { "epoch": 0.02121386515560671, "grad_norm": 2.8106050921261914, "learning_rate": 2.1213865155606715e-06, "loss": 0.6043, "step": 4792 }, { "epoch": 0.0212182920890699, "grad_norm": 2.6081909592579264, "learning_rate": 2.1218292089069904e-06, "loss": 0.6455, "step": 4793 }, { "epoch": 0.02122271902253309, "grad_norm": 3.589572446094443, "learning_rate": 2.1222719022533094e-06, "loss": 1.0551, "step": 4794 }, { "epoch": 0.02122714595599628, "grad_norm": 3.132339515691189, "learning_rate": 2.1227145955996283e-06, "loss": 0.5935, "step": 4795 }, { "epoch": 0.021231572889459472, "grad_norm": 3.1275532657642544, "learning_rate": 2.1231572889459473e-06, "loss": 0.89, "step": 4796 }, { "epoch": 0.021235999822922662, "grad_norm": 2.5832254949011038, "learning_rate": 2.123599982292266e-06, "loss": 0.8028, "step": 4797 }, { "epoch": 0.021240426756385853, "grad_norm": 3.357546422970553, "learning_rate": 2.1240426756385856e-06, "loss": 0.6233, "step": 4798 }, { "epoch": 0.02124485368984904, "grad_norm": 4.232016119680998, "learning_rate": 2.1244853689849045e-06, "loss": 1.3675, "step": 4799 }, { "epoch": 0.02124928062331223, "grad_norm": 4.361775514593512, "learning_rate": 2.124928062331223e-06, "loss": 1.3263, "step": 4800 }, { "epoch": 0.02125370755677542, "grad_norm": 2.725769780591238, "learning_rate": 2.1253707556775424e-06, "loss": 0.8201, "step": 4801 }, { "epoch": 0.02125813449023861, "grad_norm": 2.6346568145401927, "learning_rate": 2.1258134490238614e-06, "loss": 0.9199, "step": 4802 }, { "epoch": 0.021262561423701802, "grad_norm": 3.3035171031749115, "learning_rate": 2.1262561423701803e-06, "loss": 0.6835, "step": 4803 }, { "epoch": 0.021266988357164993, "grad_norm": 2.9424444246646955, "learning_rate": 2.1266988357164993e-06, "loss": 0.8237, "step": 4804 }, { "epoch": 0.021271415290628183, "grad_norm": 3.9627216467476027, "learning_rate": 2.127141529062818e-06, "loss": 1.0745, "step": 4805 }, { "epoch": 0.021275842224091374, "grad_norm": 3.015959244517856, "learning_rate": 2.127584222409137e-06, "loss": 0.956, "step": 4806 }, { "epoch": 0.02128026915755456, "grad_norm": 3.1711763085004563, "learning_rate": 2.1280269157554565e-06, "loss": 0.7205, "step": 4807 }, { "epoch": 0.02128469609101775, "grad_norm": 2.7827090711557387, "learning_rate": 2.1284696091017755e-06, "loss": 0.8321, "step": 4808 }, { "epoch": 0.02128912302448094, "grad_norm": 3.326977004707805, "learning_rate": 2.1289123024480944e-06, "loss": 0.6009, "step": 4809 }, { "epoch": 0.021293549957944132, "grad_norm": 2.930203091686157, "learning_rate": 2.1293549957944134e-06, "loss": 0.4489, "step": 4810 }, { "epoch": 0.021297976891407323, "grad_norm": 2.651590590381563, "learning_rate": 2.1297976891407323e-06, "loss": 0.7687, "step": 4811 }, { "epoch": 0.021302403824870513, "grad_norm": 2.400036639268727, "learning_rate": 2.1302403824870513e-06, "loss": 0.6783, "step": 4812 }, { "epoch": 0.021306830758333704, "grad_norm": 2.960543177059527, "learning_rate": 2.1306830758333706e-06, "loss": 0.8928, "step": 4813 }, { "epoch": 0.02131125769179689, "grad_norm": 3.3398716951140375, "learning_rate": 2.1311257691796896e-06, "loss": 0.9812, "step": 4814 }, { "epoch": 0.02131568462526008, "grad_norm": 3.094154489748423, "learning_rate": 2.131568462526008e-06, "loss": 0.9236, "step": 4815 }, { "epoch": 0.021320111558723272, "grad_norm": 3.9431084440378648, "learning_rate": 2.1320111558723275e-06, "loss": 1.3333, "step": 4816 }, { "epoch": 0.021324538492186462, "grad_norm": 2.9903892332013786, "learning_rate": 2.1324538492186464e-06, "loss": 0.7048, "step": 4817 }, { "epoch": 0.021328965425649653, "grad_norm": 3.7006172448709767, "learning_rate": 2.1328965425649654e-06, "loss": 1.08, "step": 4818 }, { "epoch": 0.021333392359112843, "grad_norm": 2.570155163719801, "learning_rate": 2.1333392359112843e-06, "loss": 0.8249, "step": 4819 }, { "epoch": 0.021337819292576034, "grad_norm": 3.399200443955746, "learning_rate": 2.1337819292576033e-06, "loss": 0.8274, "step": 4820 }, { "epoch": 0.021342246226039224, "grad_norm": 2.8355070698840157, "learning_rate": 2.134224622603922e-06, "loss": 0.5283, "step": 4821 }, { "epoch": 0.02134667315950241, "grad_norm": 2.921261074615868, "learning_rate": 2.1346673159502416e-06, "loss": 0.7447, "step": 4822 }, { "epoch": 0.021351100092965602, "grad_norm": 2.9284701571909393, "learning_rate": 2.1351100092965605e-06, "loss": 0.8525, "step": 4823 }, { "epoch": 0.021355527026428792, "grad_norm": 3.420078796686423, "learning_rate": 2.1355527026428795e-06, "loss": 0.7952, "step": 4824 }, { "epoch": 0.021359953959891983, "grad_norm": 2.994641974252969, "learning_rate": 2.1359953959891984e-06, "loss": 0.8333, "step": 4825 }, { "epoch": 0.021364380893355173, "grad_norm": 3.2296983613060615, "learning_rate": 2.1364380893355174e-06, "loss": 0.9301, "step": 4826 }, { "epoch": 0.021368807826818364, "grad_norm": 3.1846888857278297, "learning_rate": 2.1368807826818363e-06, "loss": 1.1182, "step": 4827 }, { "epoch": 0.021373234760281554, "grad_norm": 3.2378581407291738, "learning_rate": 2.1373234760281557e-06, "loss": 0.9312, "step": 4828 }, { "epoch": 0.02137766169374474, "grad_norm": 3.3012245758768604, "learning_rate": 2.1377661693744746e-06, "loss": 0.9144, "step": 4829 }, { "epoch": 0.021382088627207932, "grad_norm": 2.9373904273289617, "learning_rate": 2.1382088627207936e-06, "loss": 0.9386, "step": 4830 }, { "epoch": 0.021386515560671122, "grad_norm": 2.4780193240015596, "learning_rate": 2.1386515560671125e-06, "loss": 0.5713, "step": 4831 }, { "epoch": 0.021390942494134313, "grad_norm": 2.9140610639573605, "learning_rate": 2.1390942494134315e-06, "loss": 0.8021, "step": 4832 }, { "epoch": 0.021395369427597503, "grad_norm": 3.2828116757723564, "learning_rate": 2.1395369427597504e-06, "loss": 1.0381, "step": 4833 }, { "epoch": 0.021399796361060694, "grad_norm": 2.623391300116836, "learning_rate": 2.1399796361060694e-06, "loss": 0.6911, "step": 4834 }, { "epoch": 0.021404223294523884, "grad_norm": 2.518296256199514, "learning_rate": 2.1404223294523883e-06, "loss": 0.6884, "step": 4835 }, { "epoch": 0.021408650227987075, "grad_norm": 3.154501781604165, "learning_rate": 2.1408650227987073e-06, "loss": 0.8659, "step": 4836 }, { "epoch": 0.021413077161450262, "grad_norm": 2.6548553851924988, "learning_rate": 2.1413077161450266e-06, "loss": 0.7936, "step": 4837 }, { "epoch": 0.021417504094913453, "grad_norm": 2.8193216258555927, "learning_rate": 2.1417504094913456e-06, "loss": 0.5593, "step": 4838 }, { "epoch": 0.021421931028376643, "grad_norm": 3.0935800606514836, "learning_rate": 2.1421931028376645e-06, "loss": 0.6522, "step": 4839 }, { "epoch": 0.021426357961839834, "grad_norm": 3.1060980812792502, "learning_rate": 2.1426357961839835e-06, "loss": 0.6002, "step": 4840 }, { "epoch": 0.021430784895303024, "grad_norm": 2.919171436501327, "learning_rate": 2.1430784895303024e-06, "loss": 0.8784, "step": 4841 }, { "epoch": 0.021435211828766215, "grad_norm": 2.652345982040232, "learning_rate": 2.1435211828766214e-06, "loss": 0.8339, "step": 4842 }, { "epoch": 0.021439638762229405, "grad_norm": 2.851211754996658, "learning_rate": 2.1439638762229407e-06, "loss": 0.6236, "step": 4843 }, { "epoch": 0.021444065695692592, "grad_norm": 3.202293069016315, "learning_rate": 2.1444065695692597e-06, "loss": 0.76, "step": 4844 }, { "epoch": 0.021448492629155783, "grad_norm": 2.8758009775176334, "learning_rate": 2.1448492629155786e-06, "loss": 0.838, "step": 4845 }, { "epoch": 0.021452919562618973, "grad_norm": 2.8810346715433495, "learning_rate": 2.1452919562618976e-06, "loss": 0.9435, "step": 4846 }, { "epoch": 0.021457346496082164, "grad_norm": 2.812703964064941, "learning_rate": 2.1457346496082165e-06, "loss": 0.9207, "step": 4847 }, { "epoch": 0.021461773429545354, "grad_norm": 2.916681976071273, "learning_rate": 2.1461773429545355e-06, "loss": 0.5094, "step": 4848 }, { "epoch": 0.021466200363008545, "grad_norm": 2.726277500852308, "learning_rate": 2.146620036300855e-06, "loss": 1.0242, "step": 4849 }, { "epoch": 0.021470627296471735, "grad_norm": 2.6556180013848465, "learning_rate": 2.1470627296471734e-06, "loss": 0.7467, "step": 4850 }, { "epoch": 0.021475054229934926, "grad_norm": 2.6999255259321013, "learning_rate": 2.1475054229934923e-06, "loss": 0.558, "step": 4851 }, { "epoch": 0.021479481163398113, "grad_norm": 3.6332697901211706, "learning_rate": 2.1479481163398117e-06, "loss": 1.2359, "step": 4852 }, { "epoch": 0.021483908096861303, "grad_norm": 3.546549991677004, "learning_rate": 2.1483908096861306e-06, "loss": 1.1644, "step": 4853 }, { "epoch": 0.021488335030324494, "grad_norm": 2.9522242409876016, "learning_rate": 2.1488335030324496e-06, "loss": 0.7698, "step": 4854 }, { "epoch": 0.021492761963787684, "grad_norm": 3.1214420358489616, "learning_rate": 2.1492761963787685e-06, "loss": 0.7215, "step": 4855 }, { "epoch": 0.021497188897250875, "grad_norm": 2.972989172631353, "learning_rate": 2.1497188897250875e-06, "loss": 0.6818, "step": 4856 }, { "epoch": 0.021501615830714065, "grad_norm": 2.651690854251923, "learning_rate": 2.1501615830714064e-06, "loss": 0.5405, "step": 4857 }, { "epoch": 0.021506042764177256, "grad_norm": 3.240458332779428, "learning_rate": 2.1506042764177258e-06, "loss": 0.9165, "step": 4858 }, { "epoch": 0.021510469697640443, "grad_norm": 2.457930235432962, "learning_rate": 2.1510469697640447e-06, "loss": 0.4783, "step": 4859 }, { "epoch": 0.021514896631103633, "grad_norm": 3.118518161107613, "learning_rate": 2.1514896631103637e-06, "loss": 0.9146, "step": 4860 }, { "epoch": 0.021519323564566824, "grad_norm": 2.9699205387456216, "learning_rate": 2.1519323564566826e-06, "loss": 0.7884, "step": 4861 }, { "epoch": 0.021523750498030014, "grad_norm": 2.479287728131258, "learning_rate": 2.1523750498030016e-06, "loss": 0.7321, "step": 4862 }, { "epoch": 0.021528177431493205, "grad_norm": 2.8533143773967087, "learning_rate": 2.1528177431493205e-06, "loss": 1.0522, "step": 4863 }, { "epoch": 0.021532604364956395, "grad_norm": 3.6363598666815915, "learning_rate": 2.15326043649564e-06, "loss": 0.845, "step": 4864 }, { "epoch": 0.021537031298419586, "grad_norm": 3.3711291989425245, "learning_rate": 2.1537031298419584e-06, "loss": 1.2816, "step": 4865 }, { "epoch": 0.021541458231882776, "grad_norm": 2.933757017002156, "learning_rate": 2.1541458231882774e-06, "loss": 0.8094, "step": 4866 }, { "epoch": 0.021545885165345963, "grad_norm": 3.7089335983832883, "learning_rate": 2.1545885165345967e-06, "loss": 0.9789, "step": 4867 }, { "epoch": 0.021550312098809154, "grad_norm": 3.4817209707115464, "learning_rate": 2.1550312098809157e-06, "loss": 0.9466, "step": 4868 }, { "epoch": 0.021554739032272344, "grad_norm": 2.560161336250751, "learning_rate": 2.1554739032272346e-06, "loss": 0.809, "step": 4869 }, { "epoch": 0.021559165965735535, "grad_norm": 2.5023445162721574, "learning_rate": 2.1559165965735536e-06, "loss": 0.6758, "step": 4870 }, { "epoch": 0.021563592899198725, "grad_norm": 3.090655324691516, "learning_rate": 2.1563592899198725e-06, "loss": 0.9807, "step": 4871 }, { "epoch": 0.021568019832661916, "grad_norm": 2.59650843665958, "learning_rate": 2.1568019832661915e-06, "loss": 0.7062, "step": 4872 }, { "epoch": 0.021572446766125106, "grad_norm": 3.823432224820704, "learning_rate": 2.157244676612511e-06, "loss": 1.2427, "step": 4873 }, { "epoch": 0.021576873699588297, "grad_norm": 3.0813997857697277, "learning_rate": 2.1576873699588298e-06, "loss": 0.5494, "step": 4874 }, { "epoch": 0.021581300633051484, "grad_norm": 2.4398746718094553, "learning_rate": 2.1581300633051487e-06, "loss": 0.4546, "step": 4875 }, { "epoch": 0.021585727566514674, "grad_norm": 3.073957639364871, "learning_rate": 2.1585727566514677e-06, "loss": 0.9584, "step": 4876 }, { "epoch": 0.021590154499977865, "grad_norm": 3.013409976724622, "learning_rate": 2.1590154499977866e-06, "loss": 1.1075, "step": 4877 }, { "epoch": 0.021594581433441055, "grad_norm": 2.7406540838317563, "learning_rate": 2.1594581433441056e-06, "loss": 0.7233, "step": 4878 }, { "epoch": 0.021599008366904246, "grad_norm": 2.7422762172705992, "learning_rate": 2.159900836690425e-06, "loss": 0.9679, "step": 4879 }, { "epoch": 0.021603435300367436, "grad_norm": 2.6648882478600497, "learning_rate": 2.160343530036744e-06, "loss": 0.5847, "step": 4880 }, { "epoch": 0.021607862233830627, "grad_norm": 2.8552254598692737, "learning_rate": 2.1607862233830624e-06, "loss": 0.9152, "step": 4881 }, { "epoch": 0.021612289167293814, "grad_norm": 2.7278103649149115, "learning_rate": 2.1612289167293818e-06, "loss": 1.0068, "step": 4882 }, { "epoch": 0.021616716100757005, "grad_norm": 3.114456363648348, "learning_rate": 2.1616716100757007e-06, "loss": 1.0892, "step": 4883 }, { "epoch": 0.021621143034220195, "grad_norm": 2.8382918183648456, "learning_rate": 2.1621143034220197e-06, "loss": 0.7824, "step": 4884 }, { "epoch": 0.021625569967683386, "grad_norm": 2.8088946705035434, "learning_rate": 2.1625569967683386e-06, "loss": 0.9438, "step": 4885 }, { "epoch": 0.021629996901146576, "grad_norm": 3.2374630016633956, "learning_rate": 2.1629996901146576e-06, "loss": 0.9497, "step": 4886 }, { "epoch": 0.021634423834609767, "grad_norm": 3.1590970300014742, "learning_rate": 2.1634423834609765e-06, "loss": 0.6022, "step": 4887 }, { "epoch": 0.021638850768072957, "grad_norm": 2.694884626133314, "learning_rate": 2.163885076807296e-06, "loss": 0.6851, "step": 4888 }, { "epoch": 0.021643277701536148, "grad_norm": 2.5157372783231082, "learning_rate": 2.164327770153615e-06, "loss": 0.5845, "step": 4889 }, { "epoch": 0.021647704634999335, "grad_norm": 2.7129686950743133, "learning_rate": 2.1647704634999338e-06, "loss": 0.7711, "step": 4890 }, { "epoch": 0.021652131568462525, "grad_norm": 3.1814008342884477, "learning_rate": 2.1652131568462527e-06, "loss": 0.9384, "step": 4891 }, { "epoch": 0.021656558501925716, "grad_norm": 2.882980425923283, "learning_rate": 2.1656558501925717e-06, "loss": 0.6374, "step": 4892 }, { "epoch": 0.021660985435388906, "grad_norm": 2.7700794765358916, "learning_rate": 2.1660985435388906e-06, "loss": 0.6286, "step": 4893 }, { "epoch": 0.021665412368852097, "grad_norm": 2.6355540064111866, "learning_rate": 2.16654123688521e-06, "loss": 0.7904, "step": 4894 }, { "epoch": 0.021669839302315287, "grad_norm": 3.059687442036678, "learning_rate": 2.166983930231529e-06, "loss": 0.8517, "step": 4895 }, { "epoch": 0.021674266235778478, "grad_norm": 3.0397212286159636, "learning_rate": 2.1674266235778475e-06, "loss": 0.6883, "step": 4896 }, { "epoch": 0.021678693169241665, "grad_norm": 2.9161202027440374, "learning_rate": 2.167869316924167e-06, "loss": 1.1055, "step": 4897 }, { "epoch": 0.021683120102704855, "grad_norm": 2.8697059634952593, "learning_rate": 2.1683120102704858e-06, "loss": 0.7295, "step": 4898 }, { "epoch": 0.021687547036168046, "grad_norm": 2.9533150111066835, "learning_rate": 2.1687547036168047e-06, "loss": 1.0874, "step": 4899 }, { "epoch": 0.021691973969631236, "grad_norm": 3.9809783225169104, "learning_rate": 2.1691973969631237e-06, "loss": 0.9088, "step": 4900 }, { "epoch": 0.021696400903094427, "grad_norm": 2.663797250802988, "learning_rate": 2.1696400903094426e-06, "loss": 0.6982, "step": 4901 }, { "epoch": 0.021700827836557617, "grad_norm": 2.5905682712065277, "learning_rate": 2.1700827836557616e-06, "loss": 0.6524, "step": 4902 }, { "epoch": 0.021705254770020808, "grad_norm": 3.5862371524606615, "learning_rate": 2.170525477002081e-06, "loss": 1.1073, "step": 4903 }, { "epoch": 0.021709681703483998, "grad_norm": 2.510554851135709, "learning_rate": 2.1709681703484e-06, "loss": 0.832, "step": 4904 }, { "epoch": 0.021714108636947185, "grad_norm": 3.069432885544987, "learning_rate": 2.171410863694719e-06, "loss": 1.2218, "step": 4905 }, { "epoch": 0.021718535570410376, "grad_norm": 2.7813675091997827, "learning_rate": 2.1718535570410378e-06, "loss": 0.773, "step": 4906 }, { "epoch": 0.021722962503873566, "grad_norm": 2.5079306493972675, "learning_rate": 2.1722962503873567e-06, "loss": 0.7322, "step": 4907 }, { "epoch": 0.021727389437336757, "grad_norm": 2.677209726156444, "learning_rate": 2.172738943733676e-06, "loss": 0.8757, "step": 4908 }, { "epoch": 0.021731816370799947, "grad_norm": 2.8130180981015545, "learning_rate": 2.173181637079995e-06, "loss": 0.5314, "step": 4909 }, { "epoch": 0.021736243304263138, "grad_norm": 2.956175004800286, "learning_rate": 2.173624330426314e-06, "loss": 0.7085, "step": 4910 }, { "epoch": 0.02174067023772633, "grad_norm": 2.662311035560589, "learning_rate": 2.174067023772633e-06, "loss": 0.7484, "step": 4911 }, { "epoch": 0.021745097171189515, "grad_norm": 3.3332166575197184, "learning_rate": 2.174509717118952e-06, "loss": 0.4268, "step": 4912 }, { "epoch": 0.021749524104652706, "grad_norm": 3.4931144398675578, "learning_rate": 2.174952410465271e-06, "loss": 0.7917, "step": 4913 }, { "epoch": 0.021753951038115896, "grad_norm": 4.086097713437346, "learning_rate": 2.1753951038115902e-06, "loss": 1.2133, "step": 4914 }, { "epoch": 0.021758377971579087, "grad_norm": 3.803571836984303, "learning_rate": 2.1758377971579087e-06, "loss": 0.7907, "step": 4915 }, { "epoch": 0.021762804905042277, "grad_norm": 2.4842136644294164, "learning_rate": 2.1762804905042277e-06, "loss": 0.5451, "step": 4916 }, { "epoch": 0.021767231838505468, "grad_norm": 3.606287395953796, "learning_rate": 2.176723183850547e-06, "loss": 0.9877, "step": 4917 }, { "epoch": 0.02177165877196866, "grad_norm": 2.6914115602549384, "learning_rate": 2.177165877196866e-06, "loss": 0.8853, "step": 4918 }, { "epoch": 0.02177608570543185, "grad_norm": 3.105615843447518, "learning_rate": 2.177608570543185e-06, "loss": 0.631, "step": 4919 }, { "epoch": 0.021780512638895036, "grad_norm": 3.2830844763660947, "learning_rate": 2.178051263889504e-06, "loss": 0.7814, "step": 4920 }, { "epoch": 0.021784939572358226, "grad_norm": 3.0526410474852295, "learning_rate": 2.178493957235823e-06, "loss": 0.9771, "step": 4921 }, { "epoch": 0.021789366505821417, "grad_norm": 3.0151390785495327, "learning_rate": 2.178936650582142e-06, "loss": 0.6388, "step": 4922 }, { "epoch": 0.021793793439284607, "grad_norm": 3.330272756526424, "learning_rate": 2.179379343928461e-06, "loss": 1.0441, "step": 4923 }, { "epoch": 0.021798220372747798, "grad_norm": 2.445763001285058, "learning_rate": 2.17982203727478e-06, "loss": 0.8265, "step": 4924 }, { "epoch": 0.02180264730621099, "grad_norm": 2.6749112723656947, "learning_rate": 2.180264730621099e-06, "loss": 0.5153, "step": 4925 }, { "epoch": 0.02180707423967418, "grad_norm": 2.7321540773219146, "learning_rate": 2.180707423967418e-06, "loss": 0.6847, "step": 4926 }, { "epoch": 0.021811501173137366, "grad_norm": 2.8248590401734757, "learning_rate": 2.181150117313737e-06, "loss": 0.7674, "step": 4927 }, { "epoch": 0.021815928106600557, "grad_norm": 2.7897103342225944, "learning_rate": 2.181592810660056e-06, "loss": 0.98, "step": 4928 }, { "epoch": 0.021820355040063747, "grad_norm": 3.3267766407320174, "learning_rate": 2.1820355040063753e-06, "loss": 0.7905, "step": 4929 }, { "epoch": 0.021824781973526938, "grad_norm": 3.7999575538507533, "learning_rate": 2.1824781973526942e-06, "loss": 1.0558, "step": 4930 }, { "epoch": 0.021829208906990128, "grad_norm": 2.2937076412810256, "learning_rate": 2.1829208906990127e-06, "loss": 0.6352, "step": 4931 }, { "epoch": 0.02183363584045332, "grad_norm": 3.229373076109276, "learning_rate": 2.183363584045332e-06, "loss": 0.7775, "step": 4932 }, { "epoch": 0.02183806277391651, "grad_norm": 2.787620700718599, "learning_rate": 2.183806277391651e-06, "loss": 1.0368, "step": 4933 }, { "epoch": 0.0218424897073797, "grad_norm": 2.848589538155587, "learning_rate": 2.18424897073797e-06, "loss": 0.7801, "step": 4934 }, { "epoch": 0.021846916640842887, "grad_norm": 2.915808366739952, "learning_rate": 2.184691664084289e-06, "loss": 0.7774, "step": 4935 }, { "epoch": 0.021851343574306077, "grad_norm": 2.4773141709174, "learning_rate": 2.185134357430608e-06, "loss": 0.6058, "step": 4936 }, { "epoch": 0.021855770507769268, "grad_norm": 3.008612461533925, "learning_rate": 2.185577050776927e-06, "loss": 0.6487, "step": 4937 }, { "epoch": 0.021860197441232458, "grad_norm": 4.36342210685195, "learning_rate": 2.1860197441232462e-06, "loss": 1.1893, "step": 4938 }, { "epoch": 0.02186462437469565, "grad_norm": 2.9564780664455688, "learning_rate": 2.186462437469565e-06, "loss": 0.7011, "step": 4939 }, { "epoch": 0.02186905130815884, "grad_norm": 2.597768065424085, "learning_rate": 2.186905130815884e-06, "loss": 0.7999, "step": 4940 }, { "epoch": 0.02187347824162203, "grad_norm": 4.188320821983719, "learning_rate": 2.187347824162203e-06, "loss": 1.2004, "step": 4941 }, { "epoch": 0.02187790517508522, "grad_norm": 3.209834673849684, "learning_rate": 2.187790517508522e-06, "loss": 0.7768, "step": 4942 }, { "epoch": 0.021882332108548407, "grad_norm": 2.650997049516534, "learning_rate": 2.188233210854841e-06, "loss": 0.9442, "step": 4943 }, { "epoch": 0.021886759042011598, "grad_norm": 3.2087465946074287, "learning_rate": 2.1886759042011603e-06, "loss": 0.9586, "step": 4944 }, { "epoch": 0.021891185975474788, "grad_norm": 3.5568697774435933, "learning_rate": 2.1891185975474793e-06, "loss": 1.0094, "step": 4945 }, { "epoch": 0.02189561290893798, "grad_norm": 2.8707159309243915, "learning_rate": 2.189561290893798e-06, "loss": 1.0342, "step": 4946 }, { "epoch": 0.02190003984240117, "grad_norm": 2.8358968256025485, "learning_rate": 2.190003984240117e-06, "loss": 0.741, "step": 4947 }, { "epoch": 0.02190446677586436, "grad_norm": 3.3449317914447034, "learning_rate": 2.190446677586436e-06, "loss": 0.7552, "step": 4948 }, { "epoch": 0.02190889370932755, "grad_norm": 2.9714151429780884, "learning_rate": 2.190889370932755e-06, "loss": 0.8486, "step": 4949 }, { "epoch": 0.021913320642790737, "grad_norm": 2.782922017553589, "learning_rate": 2.191332064279074e-06, "loss": 0.899, "step": 4950 }, { "epoch": 0.021917747576253928, "grad_norm": 2.947256195721753, "learning_rate": 2.191774757625393e-06, "loss": 0.779, "step": 4951 }, { "epoch": 0.02192217450971712, "grad_norm": 2.7739139951946243, "learning_rate": 2.192217450971712e-06, "loss": 0.7268, "step": 4952 }, { "epoch": 0.02192660144318031, "grad_norm": 2.7693095822533196, "learning_rate": 2.1926601443180313e-06, "loss": 0.6231, "step": 4953 }, { "epoch": 0.0219310283766435, "grad_norm": 3.0164792039277772, "learning_rate": 2.1931028376643502e-06, "loss": 0.9585, "step": 4954 }, { "epoch": 0.02193545531010669, "grad_norm": 3.337815520650048, "learning_rate": 2.193545531010669e-06, "loss": 0.9531, "step": 4955 }, { "epoch": 0.02193988224356988, "grad_norm": 2.7449839192117804, "learning_rate": 2.193988224356988e-06, "loss": 0.587, "step": 4956 }, { "epoch": 0.02194430917703307, "grad_norm": 2.300961966285502, "learning_rate": 2.194430917703307e-06, "loss": 0.719, "step": 4957 }, { "epoch": 0.021948736110496258, "grad_norm": 3.214729435722144, "learning_rate": 2.194873611049626e-06, "loss": 1.0908, "step": 4958 }, { "epoch": 0.02195316304395945, "grad_norm": 3.6359902310131282, "learning_rate": 2.1953163043959454e-06, "loss": 0.6191, "step": 4959 }, { "epoch": 0.02195758997742264, "grad_norm": 2.545959557115297, "learning_rate": 2.1957589977422643e-06, "loss": 0.7993, "step": 4960 }, { "epoch": 0.02196201691088583, "grad_norm": 3.0840546600601733, "learning_rate": 2.196201691088583e-06, "loss": 0.8989, "step": 4961 }, { "epoch": 0.02196644384434902, "grad_norm": 3.286599446733877, "learning_rate": 2.1966443844349022e-06, "loss": 1.2603, "step": 4962 }, { "epoch": 0.02197087077781221, "grad_norm": 3.81702134744096, "learning_rate": 2.197087077781221e-06, "loss": 0.9994, "step": 4963 }, { "epoch": 0.0219752977112754, "grad_norm": 5.0056415180347855, "learning_rate": 2.19752977112754e-06, "loss": 1.4097, "step": 4964 }, { "epoch": 0.021979724644738588, "grad_norm": 4.236062124483914, "learning_rate": 2.197972464473859e-06, "loss": 1.1669, "step": 4965 }, { "epoch": 0.02198415157820178, "grad_norm": 2.763234831721033, "learning_rate": 2.198415157820178e-06, "loss": 0.7225, "step": 4966 }, { "epoch": 0.02198857851166497, "grad_norm": 2.766820282650296, "learning_rate": 2.198857851166497e-06, "loss": 0.9156, "step": 4967 }, { "epoch": 0.02199300544512816, "grad_norm": 2.7282854480209373, "learning_rate": 2.1993005445128163e-06, "loss": 0.7873, "step": 4968 }, { "epoch": 0.02199743237859135, "grad_norm": 3.602982569080336, "learning_rate": 2.1997432378591353e-06, "loss": 0.65, "step": 4969 }, { "epoch": 0.02200185931205454, "grad_norm": 2.9806526490461085, "learning_rate": 2.2001859312054542e-06, "loss": 0.8638, "step": 4970 }, { "epoch": 0.02200628624551773, "grad_norm": 3.086795014721265, "learning_rate": 2.200628624551773e-06, "loss": 0.9153, "step": 4971 }, { "epoch": 0.02201071317898092, "grad_norm": 3.189479547543231, "learning_rate": 2.201071317898092e-06, "loss": 1.25, "step": 4972 }, { "epoch": 0.02201514011244411, "grad_norm": 3.594087425188869, "learning_rate": 2.201514011244411e-06, "loss": 1.0758, "step": 4973 }, { "epoch": 0.0220195670459073, "grad_norm": 3.060038803030758, "learning_rate": 2.2019567045907304e-06, "loss": 0.6559, "step": 4974 }, { "epoch": 0.02202399397937049, "grad_norm": 2.9935425716825046, "learning_rate": 2.2023993979370494e-06, "loss": 1.0875, "step": 4975 }, { "epoch": 0.02202842091283368, "grad_norm": 3.603088187062189, "learning_rate": 2.2028420912833683e-06, "loss": 1.1121, "step": 4976 }, { "epoch": 0.02203284784629687, "grad_norm": 2.754649718856009, "learning_rate": 2.2032847846296873e-06, "loss": 0.7977, "step": 4977 }, { "epoch": 0.02203727477976006, "grad_norm": 2.9975250549357257, "learning_rate": 2.2037274779760062e-06, "loss": 0.7167, "step": 4978 }, { "epoch": 0.02204170171322325, "grad_norm": 2.38384499788958, "learning_rate": 2.204170171322325e-06, "loss": 0.72, "step": 4979 }, { "epoch": 0.02204612864668644, "grad_norm": 2.921247017765762, "learning_rate": 2.2046128646686445e-06, "loss": 0.5896, "step": 4980 }, { "epoch": 0.02205055558014963, "grad_norm": 3.1124177817458376, "learning_rate": 2.205055558014963e-06, "loss": 0.9533, "step": 4981 }, { "epoch": 0.02205498251361282, "grad_norm": 4.018447417498361, "learning_rate": 2.205498251361282e-06, "loss": 0.732, "step": 4982 }, { "epoch": 0.02205940944707601, "grad_norm": 3.2316168479343483, "learning_rate": 2.2059409447076014e-06, "loss": 0.7879, "step": 4983 }, { "epoch": 0.0220638363805392, "grad_norm": 3.5174911069978894, "learning_rate": 2.2063836380539203e-06, "loss": 0.6483, "step": 4984 }, { "epoch": 0.02206826331400239, "grad_norm": 4.293441846020871, "learning_rate": 2.2068263314002393e-06, "loss": 1.158, "step": 4985 }, { "epoch": 0.02207269024746558, "grad_norm": 3.44140266172161, "learning_rate": 2.2072690247465582e-06, "loss": 0.6256, "step": 4986 }, { "epoch": 0.022077117180928772, "grad_norm": 2.634244780571985, "learning_rate": 2.207711718092877e-06, "loss": 0.9147, "step": 4987 }, { "epoch": 0.02208154411439196, "grad_norm": 3.3242314771241968, "learning_rate": 2.208154411439196e-06, "loss": 0.8938, "step": 4988 }, { "epoch": 0.02208597104785515, "grad_norm": 2.935834307809851, "learning_rate": 2.2085971047855155e-06, "loss": 0.8084, "step": 4989 }, { "epoch": 0.02209039798131834, "grad_norm": 3.032451303213991, "learning_rate": 2.2090397981318344e-06, "loss": 0.8109, "step": 4990 }, { "epoch": 0.02209482491478153, "grad_norm": 2.417473954917858, "learning_rate": 2.2094824914781534e-06, "loss": 0.6758, "step": 4991 }, { "epoch": 0.02209925184824472, "grad_norm": 2.7472490471008384, "learning_rate": 2.2099251848244723e-06, "loss": 0.683, "step": 4992 }, { "epoch": 0.022103678781707912, "grad_norm": 2.75167567203064, "learning_rate": 2.2103678781707913e-06, "loss": 0.6734, "step": 4993 }, { "epoch": 0.022108105715171102, "grad_norm": 2.9178271432550624, "learning_rate": 2.2108105715171102e-06, "loss": 0.7578, "step": 4994 }, { "epoch": 0.02211253264863429, "grad_norm": 2.7193300987559597, "learning_rate": 2.2112532648634296e-06, "loss": 0.6785, "step": 4995 }, { "epoch": 0.02211695958209748, "grad_norm": 2.5526596470128515, "learning_rate": 2.211695958209748e-06, "loss": 0.7676, "step": 4996 }, { "epoch": 0.02212138651556067, "grad_norm": 2.79022587835212, "learning_rate": 2.212138651556067e-06, "loss": 0.8511, "step": 4997 }, { "epoch": 0.02212581344902386, "grad_norm": 2.823778401509011, "learning_rate": 2.2125813449023864e-06, "loss": 0.9662, "step": 4998 }, { "epoch": 0.02213024038248705, "grad_norm": 3.7829566281231366, "learning_rate": 2.2130240382487054e-06, "loss": 1.1086, "step": 4999 }, { "epoch": 0.022134667315950242, "grad_norm": 2.694575304195397, "learning_rate": 2.2134667315950243e-06, "loss": 0.8251, "step": 5000 }, { "epoch": 0.022139094249413432, "grad_norm": 3.0483481292007175, "learning_rate": 2.2139094249413433e-06, "loss": 0.8519, "step": 5001 }, { "epoch": 0.022143521182876623, "grad_norm": 3.5763709282074894, "learning_rate": 2.2143521182876622e-06, "loss": 1.1288, "step": 5002 }, { "epoch": 0.02214794811633981, "grad_norm": 3.268024411300944, "learning_rate": 2.214794811633981e-06, "loss": 0.917, "step": 5003 }, { "epoch": 0.022152375049803, "grad_norm": 2.5280048962701596, "learning_rate": 2.2152375049803005e-06, "loss": 0.6193, "step": 5004 }, { "epoch": 0.02215680198326619, "grad_norm": 3.3347818695217355, "learning_rate": 2.2156801983266195e-06, "loss": 0.631, "step": 5005 }, { "epoch": 0.02216122891672938, "grad_norm": 4.095070070354242, "learning_rate": 2.2161228916729384e-06, "loss": 1.0147, "step": 5006 }, { "epoch": 0.022165655850192572, "grad_norm": 3.423124033018769, "learning_rate": 2.2165655850192574e-06, "loss": 1.0073, "step": 5007 }, { "epoch": 0.022170082783655762, "grad_norm": 3.1320874748172436, "learning_rate": 2.2170082783655763e-06, "loss": 0.7834, "step": 5008 }, { "epoch": 0.022174509717118953, "grad_norm": 3.6885640162471596, "learning_rate": 2.2174509717118953e-06, "loss": 0.9526, "step": 5009 }, { "epoch": 0.02217893665058214, "grad_norm": 3.2185493690198275, "learning_rate": 2.2178936650582146e-06, "loss": 0.7754, "step": 5010 }, { "epoch": 0.02218336358404533, "grad_norm": 2.4427144064500306, "learning_rate": 2.2183363584045336e-06, "loss": 0.5531, "step": 5011 }, { "epoch": 0.02218779051750852, "grad_norm": 3.039894505342443, "learning_rate": 2.218779051750852e-06, "loss": 0.9032, "step": 5012 }, { "epoch": 0.02219221745097171, "grad_norm": 2.9392182073645783, "learning_rate": 2.2192217450971715e-06, "loss": 0.8372, "step": 5013 }, { "epoch": 0.022196644384434902, "grad_norm": 2.842178571163699, "learning_rate": 2.2196644384434904e-06, "loss": 0.9226, "step": 5014 }, { "epoch": 0.022201071317898093, "grad_norm": 3.373440173174426, "learning_rate": 2.2201071317898094e-06, "loss": 0.9415, "step": 5015 }, { "epoch": 0.022205498251361283, "grad_norm": 2.771377999611899, "learning_rate": 2.2205498251361283e-06, "loss": 0.8373, "step": 5016 }, { "epoch": 0.022209925184824474, "grad_norm": 3.8708227818069423, "learning_rate": 2.2209925184824473e-06, "loss": 0.7832, "step": 5017 }, { "epoch": 0.02221435211828766, "grad_norm": 2.868963056398307, "learning_rate": 2.2214352118287662e-06, "loss": 0.6697, "step": 5018 }, { "epoch": 0.02221877905175085, "grad_norm": 2.9514590350208976, "learning_rate": 2.2218779051750856e-06, "loss": 0.8009, "step": 5019 }, { "epoch": 0.02222320598521404, "grad_norm": 2.6815018055154263, "learning_rate": 2.2223205985214045e-06, "loss": 0.7492, "step": 5020 }, { "epoch": 0.022227632918677232, "grad_norm": 3.179241101686209, "learning_rate": 2.2227632918677235e-06, "loss": 0.8675, "step": 5021 }, { "epoch": 0.022232059852140423, "grad_norm": 4.033738029209883, "learning_rate": 2.2232059852140424e-06, "loss": 1.5815, "step": 5022 }, { "epoch": 0.022236486785603613, "grad_norm": 2.625080450638483, "learning_rate": 2.2236486785603614e-06, "loss": 0.5278, "step": 5023 }, { "epoch": 0.022240913719066804, "grad_norm": 3.1970113026273035, "learning_rate": 2.2240913719066803e-06, "loss": 0.7963, "step": 5024 }, { "epoch": 0.022245340652529994, "grad_norm": 2.56559573143664, "learning_rate": 2.2245340652529997e-06, "loss": 0.7966, "step": 5025 }, { "epoch": 0.02224976758599318, "grad_norm": 4.240267034799344, "learning_rate": 2.2249767585993186e-06, "loss": 0.8065, "step": 5026 }, { "epoch": 0.02225419451945637, "grad_norm": 3.617919275390195, "learning_rate": 2.225419451945637e-06, "loss": 1.0105, "step": 5027 }, { "epoch": 0.022258621452919562, "grad_norm": 3.276371464250043, "learning_rate": 2.2258621452919565e-06, "loss": 0.5177, "step": 5028 }, { "epoch": 0.022263048386382753, "grad_norm": 3.402593553448175, "learning_rate": 2.2263048386382755e-06, "loss": 0.9156, "step": 5029 }, { "epoch": 0.022267475319845943, "grad_norm": 3.1138484548230694, "learning_rate": 2.2267475319845944e-06, "loss": 0.8174, "step": 5030 }, { "epoch": 0.022271902253309134, "grad_norm": 2.8626868344691876, "learning_rate": 2.2271902253309134e-06, "loss": 0.5264, "step": 5031 }, { "epoch": 0.022276329186772324, "grad_norm": 3.1509792503967757, "learning_rate": 2.2276329186772323e-06, "loss": 0.8367, "step": 5032 }, { "epoch": 0.02228075612023551, "grad_norm": 3.2042684777153183, "learning_rate": 2.2280756120235513e-06, "loss": 0.7836, "step": 5033 }, { "epoch": 0.022285183053698702, "grad_norm": 2.8512435408151457, "learning_rate": 2.2285183053698706e-06, "loss": 1.0514, "step": 5034 }, { "epoch": 0.022289609987161892, "grad_norm": 2.691018269800425, "learning_rate": 2.2289609987161896e-06, "loss": 0.7888, "step": 5035 }, { "epoch": 0.022294036920625083, "grad_norm": 2.9355629367749527, "learning_rate": 2.2294036920625085e-06, "loss": 0.9437, "step": 5036 }, { "epoch": 0.022298463854088273, "grad_norm": 2.8254067488827928, "learning_rate": 2.2298463854088275e-06, "loss": 0.8933, "step": 5037 }, { "epoch": 0.022302890787551464, "grad_norm": 3.29670947213268, "learning_rate": 2.2302890787551464e-06, "loss": 0.8464, "step": 5038 }, { "epoch": 0.022307317721014654, "grad_norm": 3.4190965396821453, "learning_rate": 2.2307317721014654e-06, "loss": 0.7405, "step": 5039 }, { "epoch": 0.022311744654477845, "grad_norm": 3.9609318134295792, "learning_rate": 2.2311744654477847e-06, "loss": 1.4887, "step": 5040 }, { "epoch": 0.022316171587941032, "grad_norm": 2.611177530982834, "learning_rate": 2.2316171587941037e-06, "loss": 0.6142, "step": 5041 }, { "epoch": 0.022320598521404222, "grad_norm": 2.48830963402966, "learning_rate": 2.2320598521404222e-06, "loss": 0.692, "step": 5042 }, { "epoch": 0.022325025454867413, "grad_norm": 2.6023460164132426, "learning_rate": 2.2325025454867416e-06, "loss": 0.6476, "step": 5043 }, { "epoch": 0.022329452388330603, "grad_norm": 2.386085114516229, "learning_rate": 2.2329452388330605e-06, "loss": 0.5983, "step": 5044 }, { "epoch": 0.022333879321793794, "grad_norm": 3.4667967910881723, "learning_rate": 2.2333879321793795e-06, "loss": 1.0334, "step": 5045 }, { "epoch": 0.022338306255256984, "grad_norm": 2.4941164993560583, "learning_rate": 2.2338306255256984e-06, "loss": 0.8072, "step": 5046 }, { "epoch": 0.022342733188720175, "grad_norm": 3.5496310805811944, "learning_rate": 2.2342733188720174e-06, "loss": 1.0634, "step": 5047 }, { "epoch": 0.022347160122183362, "grad_norm": 4.5810593367964705, "learning_rate": 2.2347160122183363e-06, "loss": 1.2444, "step": 5048 }, { "epoch": 0.022351587055646552, "grad_norm": 2.99728584208954, "learning_rate": 2.2351587055646557e-06, "loss": 0.8992, "step": 5049 }, { "epoch": 0.022356013989109743, "grad_norm": 2.5919145539985284, "learning_rate": 2.2356013989109746e-06, "loss": 0.7599, "step": 5050 }, { "epoch": 0.022360440922572934, "grad_norm": 2.7877374096819545, "learning_rate": 2.2360440922572936e-06, "loss": 0.8887, "step": 5051 }, { "epoch": 0.022364867856036124, "grad_norm": 3.3200339175106817, "learning_rate": 2.2364867856036125e-06, "loss": 1.0411, "step": 5052 }, { "epoch": 0.022369294789499315, "grad_norm": 3.065532697876167, "learning_rate": 2.2369294789499315e-06, "loss": 0.8176, "step": 5053 }, { "epoch": 0.022373721722962505, "grad_norm": 3.1505958704287393, "learning_rate": 2.2373721722962504e-06, "loss": 0.9775, "step": 5054 }, { "epoch": 0.022378148656425696, "grad_norm": 3.4464362908457806, "learning_rate": 2.23781486564257e-06, "loss": 0.81, "step": 5055 }, { "epoch": 0.022382575589888883, "grad_norm": 3.3714261632817766, "learning_rate": 2.2382575589888887e-06, "loss": 0.6499, "step": 5056 }, { "epoch": 0.022387002523352073, "grad_norm": 2.832486561031407, "learning_rate": 2.2387002523352077e-06, "loss": 0.7995, "step": 5057 }, { "epoch": 0.022391429456815264, "grad_norm": 2.7497039950312203, "learning_rate": 2.2391429456815266e-06, "loss": 0.9476, "step": 5058 }, { "epoch": 0.022395856390278454, "grad_norm": 2.6670039606150175, "learning_rate": 2.2395856390278456e-06, "loss": 0.7493, "step": 5059 }, { "epoch": 0.022400283323741645, "grad_norm": 2.62151864034442, "learning_rate": 2.2400283323741645e-06, "loss": 0.7154, "step": 5060 }, { "epoch": 0.022404710257204835, "grad_norm": 2.869112792748835, "learning_rate": 2.240471025720484e-06, "loss": 0.514, "step": 5061 }, { "epoch": 0.022409137190668026, "grad_norm": 3.2663519439103648, "learning_rate": 2.2409137190668024e-06, "loss": 0.7789, "step": 5062 }, { "epoch": 0.022413564124131213, "grad_norm": 2.72001657772931, "learning_rate": 2.2413564124131214e-06, "loss": 0.8195, "step": 5063 }, { "epoch": 0.022417991057594403, "grad_norm": 4.47778817452352, "learning_rate": 2.2417991057594407e-06, "loss": 1.2095, "step": 5064 }, { "epoch": 0.022422417991057594, "grad_norm": 3.1311002473803287, "learning_rate": 2.2422417991057597e-06, "loss": 0.6939, "step": 5065 }, { "epoch": 0.022426844924520784, "grad_norm": 2.5877063865439416, "learning_rate": 2.2426844924520786e-06, "loss": 0.5282, "step": 5066 }, { "epoch": 0.022431271857983975, "grad_norm": 2.7015001704052284, "learning_rate": 2.2431271857983976e-06, "loss": 0.5055, "step": 5067 }, { "epoch": 0.022435698791447165, "grad_norm": 4.133964173094745, "learning_rate": 2.2435698791447165e-06, "loss": 1.3415, "step": 5068 }, { "epoch": 0.022440125724910356, "grad_norm": 2.9498441944230156, "learning_rate": 2.2440125724910355e-06, "loss": 0.7707, "step": 5069 }, { "epoch": 0.022444552658373546, "grad_norm": 2.946913454761717, "learning_rate": 2.244455265837355e-06, "loss": 0.5887, "step": 5070 }, { "epoch": 0.022448979591836733, "grad_norm": 4.271328811504945, "learning_rate": 2.244897959183674e-06, "loss": 0.9799, "step": 5071 }, { "epoch": 0.022453406525299924, "grad_norm": 3.189992011705309, "learning_rate": 2.2453406525299927e-06, "loss": 1.2475, "step": 5072 }, { "epoch": 0.022457833458763114, "grad_norm": 2.609228252604911, "learning_rate": 2.2457833458763117e-06, "loss": 0.6082, "step": 5073 }, { "epoch": 0.022462260392226305, "grad_norm": 3.799839979267779, "learning_rate": 2.2462260392226306e-06, "loss": 0.9134, "step": 5074 }, { "epoch": 0.022466687325689495, "grad_norm": 3.93710684975912, "learning_rate": 2.2466687325689496e-06, "loss": 1.0631, "step": 5075 }, { "epoch": 0.022471114259152686, "grad_norm": 3.2621325194329565, "learning_rate": 2.247111425915269e-06, "loss": 0.7543, "step": 5076 }, { "epoch": 0.022475541192615876, "grad_norm": 3.394866136969129, "learning_rate": 2.2475541192615875e-06, "loss": 0.6893, "step": 5077 }, { "epoch": 0.022479968126079063, "grad_norm": 3.731669579897913, "learning_rate": 2.2479968126079064e-06, "loss": 1.0005, "step": 5078 }, { "epoch": 0.022484395059542254, "grad_norm": 3.0135264350472264, "learning_rate": 2.248439505954226e-06, "loss": 0.8956, "step": 5079 }, { "epoch": 0.022488821993005444, "grad_norm": 2.7043626424370357, "learning_rate": 2.2488821993005447e-06, "loss": 0.5633, "step": 5080 }, { "epoch": 0.022493248926468635, "grad_norm": 3.1988185126240425, "learning_rate": 2.2493248926468637e-06, "loss": 0.6564, "step": 5081 }, { "epoch": 0.022497675859931825, "grad_norm": 3.066351846614778, "learning_rate": 2.2497675859931826e-06, "loss": 0.99, "step": 5082 }, { "epoch": 0.022502102793395016, "grad_norm": 3.123975834023661, "learning_rate": 2.2502102793395016e-06, "loss": 0.8467, "step": 5083 }, { "epoch": 0.022506529726858206, "grad_norm": 3.1458546068467768, "learning_rate": 2.2506529726858205e-06, "loss": 0.8362, "step": 5084 }, { "epoch": 0.022510956660321397, "grad_norm": 3.9536114864841347, "learning_rate": 2.25109566603214e-06, "loss": 1.007, "step": 5085 }, { "epoch": 0.022515383593784584, "grad_norm": 3.1703338580955807, "learning_rate": 2.251538359378459e-06, "loss": 1.0045, "step": 5086 }, { "epoch": 0.022519810527247774, "grad_norm": 3.230128617554299, "learning_rate": 2.251981052724778e-06, "loss": 0.8414, "step": 5087 }, { "epoch": 0.022524237460710965, "grad_norm": 5.49843141447937, "learning_rate": 2.2524237460710967e-06, "loss": 1.2461, "step": 5088 }, { "epoch": 0.022528664394174155, "grad_norm": 2.818612078300497, "learning_rate": 2.2528664394174157e-06, "loss": 0.6774, "step": 5089 }, { "epoch": 0.022533091327637346, "grad_norm": 3.0622879139775008, "learning_rate": 2.2533091327637346e-06, "loss": 0.7541, "step": 5090 }, { "epoch": 0.022537518261100536, "grad_norm": 2.692999286334608, "learning_rate": 2.253751826110054e-06, "loss": 0.6677, "step": 5091 }, { "epoch": 0.022541945194563727, "grad_norm": 3.733438832463289, "learning_rate": 2.2541945194563725e-06, "loss": 1.3185, "step": 5092 }, { "epoch": 0.022546372128026917, "grad_norm": 2.824772256714168, "learning_rate": 2.2546372128026915e-06, "loss": 0.895, "step": 5093 }, { "epoch": 0.022550799061490105, "grad_norm": 2.9051445342869906, "learning_rate": 2.255079906149011e-06, "loss": 0.8278, "step": 5094 }, { "epoch": 0.022555225994953295, "grad_norm": 3.940482709727989, "learning_rate": 2.25552259949533e-06, "loss": 0.973, "step": 5095 }, { "epoch": 0.022559652928416486, "grad_norm": 3.1625092117302294, "learning_rate": 2.2559652928416487e-06, "loss": 1.0496, "step": 5096 }, { "epoch": 0.022564079861879676, "grad_norm": 3.441436435338724, "learning_rate": 2.2564079861879677e-06, "loss": 0.7511, "step": 5097 }, { "epoch": 0.022568506795342867, "grad_norm": 3.2959795228287065, "learning_rate": 2.2568506795342866e-06, "loss": 1.1185, "step": 5098 }, { "epoch": 0.022572933728806057, "grad_norm": 2.88139091443833, "learning_rate": 2.2572933728806056e-06, "loss": 0.9717, "step": 5099 }, { "epoch": 0.022577360662269248, "grad_norm": 3.194966825129321, "learning_rate": 2.257736066226925e-06, "loss": 0.9986, "step": 5100 }, { "epoch": 0.022581787595732435, "grad_norm": 2.4989604560252436, "learning_rate": 2.258178759573244e-06, "loss": 0.597, "step": 5101 }, { "epoch": 0.022586214529195625, "grad_norm": 3.317075730080358, "learning_rate": 2.258621452919563e-06, "loss": 1.0944, "step": 5102 }, { "epoch": 0.022590641462658816, "grad_norm": 3.234853913733378, "learning_rate": 2.259064146265882e-06, "loss": 0.9637, "step": 5103 }, { "epoch": 0.022595068396122006, "grad_norm": 2.6741892489937347, "learning_rate": 2.2595068396122007e-06, "loss": 0.781, "step": 5104 }, { "epoch": 0.022599495329585197, "grad_norm": 3.404217977759384, "learning_rate": 2.2599495329585197e-06, "loss": 0.868, "step": 5105 }, { "epoch": 0.022603922263048387, "grad_norm": 2.5319547342301663, "learning_rate": 2.260392226304839e-06, "loss": 0.8371, "step": 5106 }, { "epoch": 0.022608349196511578, "grad_norm": 4.736167048693432, "learning_rate": 2.260834919651158e-06, "loss": 1.1159, "step": 5107 }, { "epoch": 0.022612776129974768, "grad_norm": 3.667566726577176, "learning_rate": 2.2612776129974765e-06, "loss": 1.1882, "step": 5108 }, { "epoch": 0.022617203063437955, "grad_norm": 3.0599562649897085, "learning_rate": 2.261720306343796e-06, "loss": 0.7387, "step": 5109 }, { "epoch": 0.022621629996901146, "grad_norm": 3.412786422275757, "learning_rate": 2.262162999690115e-06, "loss": 1.0933, "step": 5110 }, { "epoch": 0.022626056930364336, "grad_norm": 3.1749036961639647, "learning_rate": 2.262605693036434e-06, "loss": 1.0191, "step": 5111 }, { "epoch": 0.022630483863827527, "grad_norm": 3.485848270982882, "learning_rate": 2.2630483863827527e-06, "loss": 0.9121, "step": 5112 }, { "epoch": 0.022634910797290717, "grad_norm": 2.752183318994591, "learning_rate": 2.2634910797290717e-06, "loss": 0.7401, "step": 5113 }, { "epoch": 0.022639337730753908, "grad_norm": 2.764913161705837, "learning_rate": 2.2639337730753906e-06, "loss": 0.7967, "step": 5114 }, { "epoch": 0.022643764664217098, "grad_norm": 2.9202224052755845, "learning_rate": 2.26437646642171e-06, "loss": 0.7977, "step": 5115 }, { "epoch": 0.022648191597680285, "grad_norm": 3.303939447679496, "learning_rate": 2.264819159768029e-06, "loss": 0.7576, "step": 5116 }, { "epoch": 0.022652618531143476, "grad_norm": 3.4660263206642705, "learning_rate": 2.265261853114348e-06, "loss": 0.4221, "step": 5117 }, { "epoch": 0.022657045464606666, "grad_norm": 3.4144010893750427, "learning_rate": 2.265704546460667e-06, "loss": 1.1307, "step": 5118 }, { "epoch": 0.022661472398069857, "grad_norm": 3.0168902569888427, "learning_rate": 2.266147239806986e-06, "loss": 0.9424, "step": 5119 }, { "epoch": 0.022665899331533047, "grad_norm": 3.381851606320884, "learning_rate": 2.2665899331533047e-06, "loss": 0.8643, "step": 5120 }, { "epoch": 0.022670326264996238, "grad_norm": 2.6691905115661196, "learning_rate": 2.267032626499624e-06, "loss": 0.7001, "step": 5121 }, { "epoch": 0.02267475319845943, "grad_norm": 3.181486953973689, "learning_rate": 2.267475319845943e-06, "loss": 1.2137, "step": 5122 }, { "epoch": 0.02267918013192262, "grad_norm": 3.1914105747395127, "learning_rate": 2.2679180131922616e-06, "loss": 0.5593, "step": 5123 }, { "epoch": 0.022683607065385806, "grad_norm": 3.9602261838637154, "learning_rate": 2.268360706538581e-06, "loss": 0.8608, "step": 5124 }, { "epoch": 0.022688033998848996, "grad_norm": 3.2378069428891396, "learning_rate": 2.2688033998849e-06, "loss": 0.7837, "step": 5125 }, { "epoch": 0.022692460932312187, "grad_norm": 3.2515188993752844, "learning_rate": 2.269246093231219e-06, "loss": 1.0535, "step": 5126 }, { "epoch": 0.022696887865775377, "grad_norm": 2.810872732098905, "learning_rate": 2.269688786577538e-06, "loss": 0.8241, "step": 5127 }, { "epoch": 0.022701314799238568, "grad_norm": 3.806919985313353, "learning_rate": 2.2701314799238567e-06, "loss": 0.5646, "step": 5128 }, { "epoch": 0.02270574173270176, "grad_norm": 3.1056585492708035, "learning_rate": 2.2705741732701757e-06, "loss": 0.7956, "step": 5129 }, { "epoch": 0.02271016866616495, "grad_norm": 3.4619733078585506, "learning_rate": 2.271016866616495e-06, "loss": 1.0034, "step": 5130 }, { "epoch": 0.022714595599628136, "grad_norm": 3.5212418123400404, "learning_rate": 2.271459559962814e-06, "loss": 1.1316, "step": 5131 }, { "epoch": 0.022719022533091326, "grad_norm": 3.441636810863508, "learning_rate": 2.271902253309133e-06, "loss": 1.1727, "step": 5132 }, { "epoch": 0.022723449466554517, "grad_norm": 2.897524423582801, "learning_rate": 2.272344946655452e-06, "loss": 0.6525, "step": 5133 }, { "epoch": 0.022727876400017707, "grad_norm": 2.7342742812501473, "learning_rate": 2.272787640001771e-06, "loss": 0.9629, "step": 5134 }, { "epoch": 0.022732303333480898, "grad_norm": 3.4397820006248216, "learning_rate": 2.27323033334809e-06, "loss": 0.6602, "step": 5135 }, { "epoch": 0.02273673026694409, "grad_norm": 3.0654867677183453, "learning_rate": 2.273673026694409e-06, "loss": 0.9179, "step": 5136 }, { "epoch": 0.02274115720040728, "grad_norm": 4.470940248367324, "learning_rate": 2.274115720040728e-06, "loss": 1.3063, "step": 5137 }, { "epoch": 0.02274558413387047, "grad_norm": 2.958238730807821, "learning_rate": 2.274558413387047e-06, "loss": 0.6444, "step": 5138 }, { "epoch": 0.022750011067333657, "grad_norm": 2.8979222333875443, "learning_rate": 2.275001106733366e-06, "loss": 0.7153, "step": 5139 }, { "epoch": 0.022754438000796847, "grad_norm": 2.7560736056882478, "learning_rate": 2.275443800079685e-06, "loss": 0.8006, "step": 5140 }, { "epoch": 0.022758864934260038, "grad_norm": 3.7292592552461623, "learning_rate": 2.275886493426004e-06, "loss": 1.1766, "step": 5141 }, { "epoch": 0.022763291867723228, "grad_norm": 3.6847016928359375, "learning_rate": 2.276329186772323e-06, "loss": 1.1156, "step": 5142 }, { "epoch": 0.02276771880118642, "grad_norm": 2.588705488082734, "learning_rate": 2.276771880118642e-06, "loss": 0.6583, "step": 5143 }, { "epoch": 0.02277214573464961, "grad_norm": 2.9912634743235347, "learning_rate": 2.2772145734649607e-06, "loss": 0.9179, "step": 5144 }, { "epoch": 0.0227765726681128, "grad_norm": 3.123505709919515, "learning_rate": 2.27765726681128e-06, "loss": 0.8778, "step": 5145 }, { "epoch": 0.022780999601575987, "grad_norm": 2.597678217617757, "learning_rate": 2.278099960157599e-06, "loss": 0.644, "step": 5146 }, { "epoch": 0.022785426535039177, "grad_norm": 2.815116072753709, "learning_rate": 2.278542653503918e-06, "loss": 0.6625, "step": 5147 }, { "epoch": 0.022789853468502368, "grad_norm": 2.8678925213095114, "learning_rate": 2.278985346850237e-06, "loss": 0.9402, "step": 5148 }, { "epoch": 0.022794280401965558, "grad_norm": 2.655493958390291, "learning_rate": 2.279428040196556e-06, "loss": 0.8051, "step": 5149 }, { "epoch": 0.02279870733542875, "grad_norm": 3.3734598769409923, "learning_rate": 2.279870733542875e-06, "loss": 0.9657, "step": 5150 }, { "epoch": 0.02280313426889194, "grad_norm": 2.860565044881338, "learning_rate": 2.2803134268891942e-06, "loss": 0.7156, "step": 5151 }, { "epoch": 0.02280756120235513, "grad_norm": 3.4045803970673667, "learning_rate": 2.280756120235513e-06, "loss": 1.0231, "step": 5152 }, { "epoch": 0.02281198813581832, "grad_norm": 2.9255014624193665, "learning_rate": 2.281198813581832e-06, "loss": 0.8712, "step": 5153 }, { "epoch": 0.022816415069281507, "grad_norm": 4.242445961862077, "learning_rate": 2.281641506928151e-06, "loss": 1.1775, "step": 5154 }, { "epoch": 0.022820842002744698, "grad_norm": 3.151898854912111, "learning_rate": 2.28208420027447e-06, "loss": 0.5586, "step": 5155 }, { "epoch": 0.022825268936207888, "grad_norm": 2.8045147909978665, "learning_rate": 2.282526893620789e-06, "loss": 0.7868, "step": 5156 }, { "epoch": 0.02282969586967108, "grad_norm": 2.9396487284361954, "learning_rate": 2.2829695869671083e-06, "loss": 0.5415, "step": 5157 }, { "epoch": 0.02283412280313427, "grad_norm": 2.609182570009344, "learning_rate": 2.283412280313427e-06, "loss": 0.7955, "step": 5158 }, { "epoch": 0.02283854973659746, "grad_norm": 3.160711283683701, "learning_rate": 2.283854973659746e-06, "loss": 1.1499, "step": 5159 }, { "epoch": 0.02284297667006065, "grad_norm": 3.26146635697062, "learning_rate": 2.284297667006065e-06, "loss": 1.0033, "step": 5160 }, { "epoch": 0.022847403603523837, "grad_norm": 3.2782350776523383, "learning_rate": 2.284740360352384e-06, "loss": 0.8465, "step": 5161 }, { "epoch": 0.022851830536987028, "grad_norm": 3.0125493244079222, "learning_rate": 2.285183053698703e-06, "loss": 0.6528, "step": 5162 }, { "epoch": 0.02285625747045022, "grad_norm": 2.6553920792425396, "learning_rate": 2.285625747045022e-06, "loss": 0.8691, "step": 5163 }, { "epoch": 0.02286068440391341, "grad_norm": 2.5647529260489916, "learning_rate": 2.286068440391341e-06, "loss": 0.6723, "step": 5164 }, { "epoch": 0.0228651113373766, "grad_norm": 2.5067914826811926, "learning_rate": 2.28651113373766e-06, "loss": 0.538, "step": 5165 }, { "epoch": 0.02286953827083979, "grad_norm": 2.7377299992195008, "learning_rate": 2.2869538270839793e-06, "loss": 0.8969, "step": 5166 }, { "epoch": 0.02287396520430298, "grad_norm": 2.9284992868992834, "learning_rate": 2.2873965204302982e-06, "loss": 0.7286, "step": 5167 }, { "epoch": 0.02287839213776617, "grad_norm": 4.579677356615409, "learning_rate": 2.287839213776617e-06, "loss": 1.0314, "step": 5168 }, { "epoch": 0.022882819071229358, "grad_norm": 2.590209941730104, "learning_rate": 2.288281907122936e-06, "loss": 0.6481, "step": 5169 }, { "epoch": 0.02288724600469255, "grad_norm": 3.2771653087879393, "learning_rate": 2.288724600469255e-06, "loss": 0.9852, "step": 5170 }, { "epoch": 0.02289167293815574, "grad_norm": 2.7795027367771374, "learning_rate": 2.289167293815574e-06, "loss": 0.8148, "step": 5171 }, { "epoch": 0.02289609987161893, "grad_norm": 3.0398800943619726, "learning_rate": 2.2896099871618934e-06, "loss": 0.6943, "step": 5172 }, { "epoch": 0.02290052680508212, "grad_norm": 2.6749062558706567, "learning_rate": 2.290052680508212e-06, "loss": 0.6599, "step": 5173 }, { "epoch": 0.02290495373854531, "grad_norm": 3.095352511089965, "learning_rate": 2.290495373854531e-06, "loss": 0.6268, "step": 5174 }, { "epoch": 0.0229093806720085, "grad_norm": 2.9245936530972694, "learning_rate": 2.2909380672008502e-06, "loss": 0.5992, "step": 5175 }, { "epoch": 0.02291380760547169, "grad_norm": 2.817559209932439, "learning_rate": 2.291380760547169e-06, "loss": 0.4537, "step": 5176 }, { "epoch": 0.02291823453893488, "grad_norm": 2.4629949913178906, "learning_rate": 2.291823453893488e-06, "loss": 0.5193, "step": 5177 }, { "epoch": 0.02292266147239807, "grad_norm": 3.0900617128856585, "learning_rate": 2.292266147239807e-06, "loss": 0.7369, "step": 5178 }, { "epoch": 0.02292708840586126, "grad_norm": 2.8710421663098358, "learning_rate": 2.292708840586126e-06, "loss": 0.9108, "step": 5179 }, { "epoch": 0.02293151533932445, "grad_norm": 4.104415165243576, "learning_rate": 2.293151533932445e-06, "loss": 1.1522, "step": 5180 }, { "epoch": 0.02293594227278764, "grad_norm": 3.089589843571092, "learning_rate": 2.2935942272787643e-06, "loss": 0.7189, "step": 5181 }, { "epoch": 0.02294036920625083, "grad_norm": 3.457646318235271, "learning_rate": 2.2940369206250833e-06, "loss": 0.7756, "step": 5182 }, { "epoch": 0.02294479613971402, "grad_norm": 3.5278153571609296, "learning_rate": 2.2944796139714022e-06, "loss": 1.3249, "step": 5183 }, { "epoch": 0.02294922307317721, "grad_norm": 2.4796081802205734, "learning_rate": 2.294922307317721e-06, "loss": 0.9599, "step": 5184 }, { "epoch": 0.0229536500066404, "grad_norm": 2.8608664705522764, "learning_rate": 2.29536500066404e-06, "loss": 0.7587, "step": 5185 }, { "epoch": 0.02295807694010359, "grad_norm": 5.189775643616017, "learning_rate": 2.295807694010359e-06, "loss": 1.4065, "step": 5186 }, { "epoch": 0.02296250387356678, "grad_norm": 2.9201301346836677, "learning_rate": 2.2962503873566784e-06, "loss": 0.747, "step": 5187 }, { "epoch": 0.02296693080702997, "grad_norm": 2.636832267272097, "learning_rate": 2.2966930807029974e-06, "loss": 0.6835, "step": 5188 }, { "epoch": 0.02297135774049316, "grad_norm": 2.4500863902250725, "learning_rate": 2.297135774049316e-06, "loss": 0.4601, "step": 5189 }, { "epoch": 0.02297578467395635, "grad_norm": 2.8634469350646032, "learning_rate": 2.2975784673956353e-06, "loss": 0.8585, "step": 5190 }, { "epoch": 0.022980211607419542, "grad_norm": 2.4020507768432275, "learning_rate": 2.2980211607419542e-06, "loss": 0.7717, "step": 5191 }, { "epoch": 0.02298463854088273, "grad_norm": 2.540897825092273, "learning_rate": 2.298463854088273e-06, "loss": 0.6809, "step": 5192 }, { "epoch": 0.02298906547434592, "grad_norm": 3.145888685816304, "learning_rate": 2.298906547434592e-06, "loss": 0.9081, "step": 5193 }, { "epoch": 0.02299349240780911, "grad_norm": 3.1530810075406657, "learning_rate": 2.299349240780911e-06, "loss": 0.745, "step": 5194 }, { "epoch": 0.0229979193412723, "grad_norm": 2.5563648592235, "learning_rate": 2.29979193412723e-06, "loss": 0.7958, "step": 5195 }, { "epoch": 0.02300234627473549, "grad_norm": 2.6886930049036204, "learning_rate": 2.3002346274735494e-06, "loss": 0.6149, "step": 5196 }, { "epoch": 0.02300677320819868, "grad_norm": 4.961125964843074, "learning_rate": 2.3006773208198683e-06, "loss": 1.5682, "step": 5197 }, { "epoch": 0.023011200141661872, "grad_norm": 3.3186372666226487, "learning_rate": 2.3011200141661873e-06, "loss": 0.7856, "step": 5198 }, { "epoch": 0.02301562707512506, "grad_norm": 2.909858741264868, "learning_rate": 2.3015627075125062e-06, "loss": 0.8859, "step": 5199 }, { "epoch": 0.02302005400858825, "grad_norm": 3.705885400732143, "learning_rate": 2.302005400858825e-06, "loss": 0.9166, "step": 5200 }, { "epoch": 0.02302448094205144, "grad_norm": 2.930447844958645, "learning_rate": 2.302448094205144e-06, "loss": 0.577, "step": 5201 }, { "epoch": 0.02302890787551463, "grad_norm": 2.788925723341051, "learning_rate": 2.3028907875514635e-06, "loss": 0.7724, "step": 5202 }, { "epoch": 0.02303333480897782, "grad_norm": 2.5392501197541293, "learning_rate": 2.3033334808977824e-06, "loss": 0.6108, "step": 5203 }, { "epoch": 0.023037761742441012, "grad_norm": 2.747757014582366, "learning_rate": 2.3037761742441014e-06, "loss": 0.7065, "step": 5204 }, { "epoch": 0.023042188675904202, "grad_norm": 2.5465161771423155, "learning_rate": 2.3042188675904203e-06, "loss": 0.6154, "step": 5205 }, { "epoch": 0.023046615609367393, "grad_norm": 3.2464224294943573, "learning_rate": 2.3046615609367393e-06, "loss": 0.8994, "step": 5206 }, { "epoch": 0.02305104254283058, "grad_norm": 3.6886804938446653, "learning_rate": 2.3051042542830587e-06, "loss": 0.9883, "step": 5207 }, { "epoch": 0.02305546947629377, "grad_norm": 3.5739562884323712, "learning_rate": 2.305546947629377e-06, "loss": 1.0424, "step": 5208 }, { "epoch": 0.02305989640975696, "grad_norm": 2.5229660103642524, "learning_rate": 2.305989640975696e-06, "loss": 0.5951, "step": 5209 }, { "epoch": 0.02306432334322015, "grad_norm": 3.9662533646887397, "learning_rate": 2.3064323343220155e-06, "loss": 1.136, "step": 5210 }, { "epoch": 0.023068750276683342, "grad_norm": 4.104265429611539, "learning_rate": 2.3068750276683344e-06, "loss": 1.0773, "step": 5211 }, { "epoch": 0.023073177210146532, "grad_norm": 3.494506989027822, "learning_rate": 2.3073177210146534e-06, "loss": 0.9698, "step": 5212 }, { "epoch": 0.023077604143609723, "grad_norm": 3.145527157204262, "learning_rate": 2.3077604143609723e-06, "loss": 0.776, "step": 5213 }, { "epoch": 0.02308203107707291, "grad_norm": 3.160510237650232, "learning_rate": 2.3082031077072913e-06, "loss": 0.8661, "step": 5214 }, { "epoch": 0.0230864580105361, "grad_norm": 2.6880132757347077, "learning_rate": 2.3086458010536102e-06, "loss": 0.6418, "step": 5215 }, { "epoch": 0.02309088494399929, "grad_norm": 2.758834511328317, "learning_rate": 2.3090884943999296e-06, "loss": 0.7094, "step": 5216 }, { "epoch": 0.02309531187746248, "grad_norm": 2.9594284754584286, "learning_rate": 2.3095311877462485e-06, "loss": 0.7486, "step": 5217 }, { "epoch": 0.023099738810925672, "grad_norm": 3.1881708374790043, "learning_rate": 2.3099738810925675e-06, "loss": 0.7144, "step": 5218 }, { "epoch": 0.023104165744388862, "grad_norm": 2.4292339472992874, "learning_rate": 2.3104165744388864e-06, "loss": 0.6229, "step": 5219 }, { "epoch": 0.023108592677852053, "grad_norm": 3.633426139690182, "learning_rate": 2.3108592677852054e-06, "loss": 1.2458, "step": 5220 }, { "epoch": 0.023113019611315243, "grad_norm": 4.194910357518328, "learning_rate": 2.3113019611315243e-06, "loss": 1.1736, "step": 5221 }, { "epoch": 0.02311744654477843, "grad_norm": 2.8246939490504936, "learning_rate": 2.3117446544778437e-06, "loss": 0.8448, "step": 5222 }, { "epoch": 0.02312187347824162, "grad_norm": 2.7437155034179743, "learning_rate": 2.3121873478241622e-06, "loss": 0.6604, "step": 5223 }, { "epoch": 0.02312630041170481, "grad_norm": 3.19184806316437, "learning_rate": 2.312630041170481e-06, "loss": 0.8175, "step": 5224 }, { "epoch": 0.023130727345168002, "grad_norm": 2.6020940537494672, "learning_rate": 2.3130727345168005e-06, "loss": 0.5914, "step": 5225 }, { "epoch": 0.023135154278631193, "grad_norm": 2.648436424739347, "learning_rate": 2.3135154278631195e-06, "loss": 0.994, "step": 5226 }, { "epoch": 0.023139581212094383, "grad_norm": 3.1325908954675015, "learning_rate": 2.3139581212094384e-06, "loss": 0.9582, "step": 5227 }, { "epoch": 0.023144008145557574, "grad_norm": 2.6501114201866183, "learning_rate": 2.3144008145557574e-06, "loss": 0.7025, "step": 5228 }, { "epoch": 0.02314843507902076, "grad_norm": 2.71800419588528, "learning_rate": 2.3148435079020763e-06, "loss": 0.573, "step": 5229 }, { "epoch": 0.02315286201248395, "grad_norm": 4.238614556067417, "learning_rate": 2.3152862012483953e-06, "loss": 0.6735, "step": 5230 }, { "epoch": 0.02315728894594714, "grad_norm": 2.6682209857524275, "learning_rate": 2.3157288945947147e-06, "loss": 0.7783, "step": 5231 }, { "epoch": 0.023161715879410332, "grad_norm": 3.3868525210526093, "learning_rate": 2.3161715879410336e-06, "loss": 1.0744, "step": 5232 }, { "epoch": 0.023166142812873523, "grad_norm": 2.7684192155036937, "learning_rate": 2.3166142812873525e-06, "loss": 0.6126, "step": 5233 }, { "epoch": 0.023170569746336713, "grad_norm": 3.2259187749635556, "learning_rate": 2.3170569746336715e-06, "loss": 0.8822, "step": 5234 }, { "epoch": 0.023174996679799904, "grad_norm": 2.948344928175423, "learning_rate": 2.3174996679799904e-06, "loss": 0.8705, "step": 5235 }, { "epoch": 0.023179423613263094, "grad_norm": 3.7728897485708637, "learning_rate": 2.3179423613263094e-06, "loss": 1.3172, "step": 5236 }, { "epoch": 0.02318385054672628, "grad_norm": 3.171056728231383, "learning_rate": 2.3183850546726288e-06, "loss": 0.9736, "step": 5237 }, { "epoch": 0.02318827748018947, "grad_norm": 3.3030344915881393, "learning_rate": 2.3188277480189477e-06, "loss": 0.6858, "step": 5238 }, { "epoch": 0.023192704413652662, "grad_norm": 3.696960145463627, "learning_rate": 2.3192704413652662e-06, "loss": 0.8969, "step": 5239 }, { "epoch": 0.023197131347115853, "grad_norm": 2.9947981739975122, "learning_rate": 2.3197131347115856e-06, "loss": 1.1313, "step": 5240 }, { "epoch": 0.023201558280579043, "grad_norm": 2.698942734660797, "learning_rate": 2.3201558280579045e-06, "loss": 0.7081, "step": 5241 }, { "epoch": 0.023205985214042234, "grad_norm": 2.852872245885489, "learning_rate": 2.3205985214042235e-06, "loss": 1.0147, "step": 5242 }, { "epoch": 0.023210412147505424, "grad_norm": 2.7786393243702654, "learning_rate": 2.3210412147505424e-06, "loss": 0.8248, "step": 5243 }, { "epoch": 0.023214839080968615, "grad_norm": 4.4401867909402215, "learning_rate": 2.3214839080968614e-06, "loss": 1.1814, "step": 5244 }, { "epoch": 0.023219266014431802, "grad_norm": 2.400947467591933, "learning_rate": 2.3219266014431803e-06, "loss": 0.5729, "step": 5245 }, { "epoch": 0.023223692947894992, "grad_norm": 2.8922510771004997, "learning_rate": 2.3223692947894997e-06, "loss": 0.7324, "step": 5246 }, { "epoch": 0.023228119881358183, "grad_norm": 2.6463643994426413, "learning_rate": 2.3228119881358187e-06, "loss": 0.7489, "step": 5247 }, { "epoch": 0.023232546814821373, "grad_norm": 2.9344227213451854, "learning_rate": 2.3232546814821376e-06, "loss": 0.7228, "step": 5248 }, { "epoch": 0.023236973748284564, "grad_norm": 2.8938866977582207, "learning_rate": 2.3236973748284565e-06, "loss": 0.7659, "step": 5249 }, { "epoch": 0.023241400681747754, "grad_norm": 2.8721951388749387, "learning_rate": 2.3241400681747755e-06, "loss": 0.7003, "step": 5250 }, { "epoch": 0.023245827615210945, "grad_norm": 3.4825918073925135, "learning_rate": 2.3245827615210944e-06, "loss": 1.3259, "step": 5251 }, { "epoch": 0.023250254548674132, "grad_norm": 3.33220829464527, "learning_rate": 2.325025454867414e-06, "loss": 1.0251, "step": 5252 }, { "epoch": 0.023254681482137322, "grad_norm": 2.55687671715812, "learning_rate": 2.3254681482137328e-06, "loss": 0.6145, "step": 5253 }, { "epoch": 0.023259108415600513, "grad_norm": 2.9603026999079596, "learning_rate": 2.3259108415600513e-06, "loss": 0.7647, "step": 5254 }, { "epoch": 0.023263535349063703, "grad_norm": 2.6719026180795558, "learning_rate": 2.3263535349063707e-06, "loss": 0.7521, "step": 5255 }, { "epoch": 0.023267962282526894, "grad_norm": 3.0749756434761664, "learning_rate": 2.3267962282526896e-06, "loss": 0.8321, "step": 5256 }, { "epoch": 0.023272389215990084, "grad_norm": 3.3123731409774866, "learning_rate": 2.3272389215990085e-06, "loss": 0.6706, "step": 5257 }, { "epoch": 0.023276816149453275, "grad_norm": 2.8604319891830374, "learning_rate": 2.3276816149453275e-06, "loss": 0.8451, "step": 5258 }, { "epoch": 0.023281243082916465, "grad_norm": 2.970868027020506, "learning_rate": 2.3281243082916464e-06, "loss": 0.8159, "step": 5259 }, { "epoch": 0.023285670016379652, "grad_norm": 2.931558641784057, "learning_rate": 2.3285670016379654e-06, "loss": 1.0854, "step": 5260 }, { "epoch": 0.023290096949842843, "grad_norm": 2.712550610915514, "learning_rate": 2.3290096949842848e-06, "loss": 0.73, "step": 5261 }, { "epoch": 0.023294523883306033, "grad_norm": 3.732316544869987, "learning_rate": 2.3294523883306037e-06, "loss": 1.0322, "step": 5262 }, { "epoch": 0.023298950816769224, "grad_norm": 2.7201951784861413, "learning_rate": 2.3298950816769227e-06, "loss": 0.6191, "step": 5263 }, { "epoch": 0.023303377750232415, "grad_norm": 2.622658194945728, "learning_rate": 2.3303377750232416e-06, "loss": 0.6366, "step": 5264 }, { "epoch": 0.023307804683695605, "grad_norm": 2.9207388256859077, "learning_rate": 2.3307804683695605e-06, "loss": 0.7395, "step": 5265 }, { "epoch": 0.023312231617158796, "grad_norm": 4.3694197642681, "learning_rate": 2.3312231617158795e-06, "loss": 1.5051, "step": 5266 }, { "epoch": 0.023316658550621983, "grad_norm": 3.2318851087770124, "learning_rate": 2.331665855062199e-06, "loss": 0.8402, "step": 5267 }, { "epoch": 0.023321085484085173, "grad_norm": 2.7277416770257052, "learning_rate": 2.332108548408518e-06, "loss": 0.6507, "step": 5268 }, { "epoch": 0.023325512417548364, "grad_norm": 2.9205670558289993, "learning_rate": 2.3325512417548368e-06, "loss": 0.6678, "step": 5269 }, { "epoch": 0.023329939351011554, "grad_norm": 3.361228807094223, "learning_rate": 2.3329939351011557e-06, "loss": 0.891, "step": 5270 }, { "epoch": 0.023334366284474745, "grad_norm": 2.5293484736599674, "learning_rate": 2.3334366284474747e-06, "loss": 0.656, "step": 5271 }, { "epoch": 0.023338793217937935, "grad_norm": 3.309670655751659, "learning_rate": 2.3338793217937936e-06, "loss": 1.0205, "step": 5272 }, { "epoch": 0.023343220151401126, "grad_norm": 3.395810188544061, "learning_rate": 2.3343220151401125e-06, "loss": 1.1261, "step": 5273 }, { "epoch": 0.023347647084864316, "grad_norm": 2.608131472185824, "learning_rate": 2.3347647084864315e-06, "loss": 0.6654, "step": 5274 }, { "epoch": 0.023352074018327503, "grad_norm": 2.7961459184558963, "learning_rate": 2.3352074018327504e-06, "loss": 0.7441, "step": 5275 }, { "epoch": 0.023356500951790694, "grad_norm": 2.7548240612065737, "learning_rate": 2.33565009517907e-06, "loss": 0.7854, "step": 5276 }, { "epoch": 0.023360927885253884, "grad_norm": 2.7420269051152806, "learning_rate": 2.3360927885253888e-06, "loss": 0.7762, "step": 5277 }, { "epoch": 0.023365354818717075, "grad_norm": 3.076817319056078, "learning_rate": 2.3365354818717077e-06, "loss": 0.9071, "step": 5278 }, { "epoch": 0.023369781752180265, "grad_norm": 2.6404259062631743, "learning_rate": 2.3369781752180267e-06, "loss": 0.8861, "step": 5279 }, { "epoch": 0.023374208685643456, "grad_norm": 2.4487426037880993, "learning_rate": 2.3374208685643456e-06, "loss": 0.6379, "step": 5280 }, { "epoch": 0.023378635619106646, "grad_norm": 2.7815463808941185, "learning_rate": 2.3378635619106645e-06, "loss": 0.873, "step": 5281 }, { "epoch": 0.023383062552569833, "grad_norm": 2.4453014967864886, "learning_rate": 2.338306255256984e-06, "loss": 0.7002, "step": 5282 }, { "epoch": 0.023387489486033024, "grad_norm": 2.8775032865261534, "learning_rate": 2.338748948603303e-06, "loss": 0.7091, "step": 5283 }, { "epoch": 0.023391916419496214, "grad_norm": 2.685079174465489, "learning_rate": 2.339191641949622e-06, "loss": 0.8031, "step": 5284 }, { "epoch": 0.023396343352959405, "grad_norm": 3.718372473016823, "learning_rate": 2.3396343352959408e-06, "loss": 1.1457, "step": 5285 }, { "epoch": 0.023400770286422595, "grad_norm": 2.670415529761915, "learning_rate": 2.3400770286422597e-06, "loss": 0.9345, "step": 5286 }, { "epoch": 0.023405197219885786, "grad_norm": 2.9162734510749804, "learning_rate": 2.3405197219885787e-06, "loss": 1.036, "step": 5287 }, { "epoch": 0.023409624153348976, "grad_norm": 3.210508615184295, "learning_rate": 2.340962415334898e-06, "loss": 1.0578, "step": 5288 }, { "epoch": 0.023414051086812167, "grad_norm": 2.622093542031645, "learning_rate": 2.3414051086812165e-06, "loss": 0.5136, "step": 5289 }, { "epoch": 0.023418478020275354, "grad_norm": 3.7455966586423086, "learning_rate": 2.3418478020275355e-06, "loss": 1.1561, "step": 5290 }, { "epoch": 0.023422904953738544, "grad_norm": 2.9924578083699305, "learning_rate": 2.342290495373855e-06, "loss": 0.8306, "step": 5291 }, { "epoch": 0.023427331887201735, "grad_norm": 3.0755913475139596, "learning_rate": 2.342733188720174e-06, "loss": 0.8111, "step": 5292 }, { "epoch": 0.023431758820664925, "grad_norm": 3.07061836592213, "learning_rate": 2.3431758820664928e-06, "loss": 0.6746, "step": 5293 }, { "epoch": 0.023436185754128116, "grad_norm": 2.712531469708873, "learning_rate": 2.3436185754128117e-06, "loss": 0.6591, "step": 5294 }, { "epoch": 0.023440612687591306, "grad_norm": 3.012023271533721, "learning_rate": 2.3440612687591307e-06, "loss": 0.6186, "step": 5295 }, { "epoch": 0.023445039621054497, "grad_norm": 3.8191072936626886, "learning_rate": 2.3445039621054496e-06, "loss": 0.9545, "step": 5296 }, { "epoch": 0.023449466554517684, "grad_norm": 3.5330467952019085, "learning_rate": 2.344946655451769e-06, "loss": 1.1068, "step": 5297 }, { "epoch": 0.023453893487980874, "grad_norm": 3.564260327140983, "learning_rate": 2.345389348798088e-06, "loss": 0.7131, "step": 5298 }, { "epoch": 0.023458320421444065, "grad_norm": 2.466718309939612, "learning_rate": 2.345832042144407e-06, "loss": 0.4184, "step": 5299 }, { "epoch": 0.023462747354907255, "grad_norm": 3.54917841942039, "learning_rate": 2.346274735490726e-06, "loss": 0.8586, "step": 5300 }, { "epoch": 0.023467174288370446, "grad_norm": 3.6442621019491352, "learning_rate": 2.3467174288370448e-06, "loss": 1.032, "step": 5301 }, { "epoch": 0.023471601221833636, "grad_norm": 3.0462737941774196, "learning_rate": 2.3471601221833637e-06, "loss": 0.7931, "step": 5302 }, { "epoch": 0.023476028155296827, "grad_norm": 3.181023126177322, "learning_rate": 2.347602815529683e-06, "loss": 1.0333, "step": 5303 }, { "epoch": 0.023480455088760017, "grad_norm": 2.742031348263924, "learning_rate": 2.3480455088760016e-06, "loss": 0.6464, "step": 5304 }, { "epoch": 0.023484882022223205, "grad_norm": 2.9842077892743215, "learning_rate": 2.3484882022223205e-06, "loss": 0.6189, "step": 5305 }, { "epoch": 0.023489308955686395, "grad_norm": 3.1795882097853476, "learning_rate": 2.34893089556864e-06, "loss": 0.7905, "step": 5306 }, { "epoch": 0.023493735889149586, "grad_norm": 4.12801016157952, "learning_rate": 2.349373588914959e-06, "loss": 1.2513, "step": 5307 }, { "epoch": 0.023498162822612776, "grad_norm": 3.8456069679004314, "learning_rate": 2.349816282261278e-06, "loss": 0.7534, "step": 5308 }, { "epoch": 0.023502589756075967, "grad_norm": 3.2527788966901325, "learning_rate": 2.3502589756075968e-06, "loss": 0.926, "step": 5309 }, { "epoch": 0.023507016689539157, "grad_norm": 2.970576173950588, "learning_rate": 2.3507016689539157e-06, "loss": 0.7067, "step": 5310 }, { "epoch": 0.023511443623002348, "grad_norm": 3.469426688238255, "learning_rate": 2.3511443623002347e-06, "loss": 0.7132, "step": 5311 }, { "epoch": 0.023515870556465535, "grad_norm": 2.898010391328929, "learning_rate": 2.351587055646554e-06, "loss": 0.8327, "step": 5312 }, { "epoch": 0.023520297489928725, "grad_norm": 3.0485813253861913, "learning_rate": 2.352029748992873e-06, "loss": 0.5288, "step": 5313 }, { "epoch": 0.023524724423391916, "grad_norm": 3.580223564290821, "learning_rate": 2.352472442339192e-06, "loss": 1.1634, "step": 5314 }, { "epoch": 0.023529151356855106, "grad_norm": 2.707514156000128, "learning_rate": 2.352915135685511e-06, "loss": 0.3965, "step": 5315 }, { "epoch": 0.023533578290318297, "grad_norm": 2.772080632345123, "learning_rate": 2.35335782903183e-06, "loss": 0.8087, "step": 5316 }, { "epoch": 0.023538005223781487, "grad_norm": 3.109246764334841, "learning_rate": 2.3538005223781488e-06, "loss": 0.9979, "step": 5317 }, { "epoch": 0.023542432157244678, "grad_norm": 3.5276158732549927, "learning_rate": 2.354243215724468e-06, "loss": 0.4134, "step": 5318 }, { "epoch": 0.023546859090707868, "grad_norm": 3.293672850745508, "learning_rate": 2.354685909070787e-06, "loss": 0.6552, "step": 5319 }, { "epoch": 0.023551286024171055, "grad_norm": 3.141932629544709, "learning_rate": 2.3551286024171056e-06, "loss": 0.9285, "step": 5320 }, { "epoch": 0.023555712957634246, "grad_norm": 3.4422419333990746, "learning_rate": 2.355571295763425e-06, "loss": 1.276, "step": 5321 }, { "epoch": 0.023560139891097436, "grad_norm": 4.093105495419478, "learning_rate": 2.356013989109744e-06, "loss": 1.1151, "step": 5322 }, { "epoch": 0.023564566824560627, "grad_norm": 2.6567461468306393, "learning_rate": 2.356456682456063e-06, "loss": 0.5586, "step": 5323 }, { "epoch": 0.023568993758023817, "grad_norm": 3.0780712295681507, "learning_rate": 2.356899375802382e-06, "loss": 0.8035, "step": 5324 }, { "epoch": 0.023573420691487008, "grad_norm": 3.1537604744545575, "learning_rate": 2.3573420691487008e-06, "loss": 0.7705, "step": 5325 }, { "epoch": 0.023577847624950198, "grad_norm": 3.0726712004144443, "learning_rate": 2.3577847624950197e-06, "loss": 0.6446, "step": 5326 }, { "epoch": 0.02358227455841339, "grad_norm": 2.7720074059479516, "learning_rate": 2.358227455841339e-06, "loss": 0.8906, "step": 5327 }, { "epoch": 0.023586701491876576, "grad_norm": 2.8036669517015733, "learning_rate": 2.358670149187658e-06, "loss": 0.6151, "step": 5328 }, { "epoch": 0.023591128425339766, "grad_norm": 3.7295997812019355, "learning_rate": 2.359112842533977e-06, "loss": 0.5395, "step": 5329 }, { "epoch": 0.023595555358802957, "grad_norm": 2.546369206761419, "learning_rate": 2.359555535880296e-06, "loss": 0.692, "step": 5330 }, { "epoch": 0.023599982292266147, "grad_norm": 2.7625645970571004, "learning_rate": 2.359998229226615e-06, "loss": 0.5542, "step": 5331 }, { "epoch": 0.023604409225729338, "grad_norm": 3.0974027463611895, "learning_rate": 2.360440922572934e-06, "loss": 0.7646, "step": 5332 }, { "epoch": 0.02360883615919253, "grad_norm": 3.0019629209346954, "learning_rate": 2.360883615919253e-06, "loss": 0.6169, "step": 5333 }, { "epoch": 0.02361326309265572, "grad_norm": 2.8271480101937585, "learning_rate": 2.361326309265572e-06, "loss": 0.6677, "step": 5334 }, { "epoch": 0.023617690026118906, "grad_norm": 3.049518266767875, "learning_rate": 2.3617690026118907e-06, "loss": 0.981, "step": 5335 }, { "epoch": 0.023622116959582096, "grad_norm": 3.3113870265603746, "learning_rate": 2.36221169595821e-06, "loss": 1.0975, "step": 5336 }, { "epoch": 0.023626543893045287, "grad_norm": 3.136644754605814, "learning_rate": 2.362654389304529e-06, "loss": 0.9759, "step": 5337 }, { "epoch": 0.023630970826508477, "grad_norm": 3.8025768057799736, "learning_rate": 2.363097082650848e-06, "loss": 0.9422, "step": 5338 }, { "epoch": 0.023635397759971668, "grad_norm": 2.495829844957923, "learning_rate": 2.363539775997167e-06, "loss": 0.663, "step": 5339 }, { "epoch": 0.02363982469343486, "grad_norm": 3.3157515688693557, "learning_rate": 2.363982469343486e-06, "loss": 0.8034, "step": 5340 }, { "epoch": 0.02364425162689805, "grad_norm": 3.160948196384829, "learning_rate": 2.3644251626898048e-06, "loss": 0.9608, "step": 5341 }, { "epoch": 0.02364867856036124, "grad_norm": 2.529984510192185, "learning_rate": 2.364867856036124e-06, "loss": 0.5588, "step": 5342 }, { "epoch": 0.023653105493824426, "grad_norm": 2.843921689490544, "learning_rate": 2.365310549382443e-06, "loss": 0.4723, "step": 5343 }, { "epoch": 0.023657532427287617, "grad_norm": 3.259334664496429, "learning_rate": 2.365753242728762e-06, "loss": 1.1409, "step": 5344 }, { "epoch": 0.023661959360750807, "grad_norm": 3.4868839948202397, "learning_rate": 2.366195936075081e-06, "loss": 0.6572, "step": 5345 }, { "epoch": 0.023666386294213998, "grad_norm": 2.7747401182706284, "learning_rate": 2.3666386294214e-06, "loss": 0.8042, "step": 5346 }, { "epoch": 0.02367081322767719, "grad_norm": 2.8211692564460717, "learning_rate": 2.367081322767719e-06, "loss": 0.6335, "step": 5347 }, { "epoch": 0.02367524016114038, "grad_norm": 3.0873779241536194, "learning_rate": 2.3675240161140382e-06, "loss": 0.5655, "step": 5348 }, { "epoch": 0.02367966709460357, "grad_norm": 2.257286625667477, "learning_rate": 2.367966709460357e-06, "loss": 0.4652, "step": 5349 }, { "epoch": 0.023684094028066757, "grad_norm": 2.7685620844505814, "learning_rate": 2.3684094028066757e-06, "loss": 0.6999, "step": 5350 }, { "epoch": 0.023688520961529947, "grad_norm": 2.806199032740958, "learning_rate": 2.368852096152995e-06, "loss": 1.0774, "step": 5351 }, { "epoch": 0.023692947894993138, "grad_norm": 2.952840770499623, "learning_rate": 2.369294789499314e-06, "loss": 0.8668, "step": 5352 }, { "epoch": 0.023697374828456328, "grad_norm": 2.6028738086301475, "learning_rate": 2.369737482845633e-06, "loss": 0.6322, "step": 5353 }, { "epoch": 0.02370180176191952, "grad_norm": 2.951992129539879, "learning_rate": 2.370180176191952e-06, "loss": 0.6752, "step": 5354 }, { "epoch": 0.02370622869538271, "grad_norm": 3.3329958850052677, "learning_rate": 2.370622869538271e-06, "loss": 1.04, "step": 5355 }, { "epoch": 0.0237106556288459, "grad_norm": 2.9784412337386206, "learning_rate": 2.37106556288459e-06, "loss": 0.7934, "step": 5356 }, { "epoch": 0.02371508256230909, "grad_norm": 3.2797170404397864, "learning_rate": 2.371508256230909e-06, "loss": 0.9712, "step": 5357 }, { "epoch": 0.023719509495772277, "grad_norm": 3.7946515695651404, "learning_rate": 2.371950949577228e-06, "loss": 0.6927, "step": 5358 }, { "epoch": 0.023723936429235468, "grad_norm": 3.520857461066973, "learning_rate": 2.372393642923547e-06, "loss": 1.0979, "step": 5359 }, { "epoch": 0.023728363362698658, "grad_norm": 3.374550941470205, "learning_rate": 2.372836336269866e-06, "loss": 0.9849, "step": 5360 }, { "epoch": 0.02373279029616185, "grad_norm": 2.9851432614231546, "learning_rate": 2.373279029616185e-06, "loss": 0.7086, "step": 5361 }, { "epoch": 0.02373721722962504, "grad_norm": 2.6367748623934144, "learning_rate": 2.373721722962504e-06, "loss": 0.8654, "step": 5362 }, { "epoch": 0.02374164416308823, "grad_norm": 3.324905589394009, "learning_rate": 2.3741644163088233e-06, "loss": 0.9991, "step": 5363 }, { "epoch": 0.02374607109655142, "grad_norm": 3.175002097311877, "learning_rate": 2.3746071096551422e-06, "loss": 0.7815, "step": 5364 }, { "epoch": 0.023750498030014607, "grad_norm": 2.6390879566721006, "learning_rate": 2.375049803001461e-06, "loss": 0.5802, "step": 5365 }, { "epoch": 0.023754924963477798, "grad_norm": 3.389502394504446, "learning_rate": 2.37549249634778e-06, "loss": 0.9784, "step": 5366 }, { "epoch": 0.023759351896940988, "grad_norm": 2.5441239671943663, "learning_rate": 2.375935189694099e-06, "loss": 0.6439, "step": 5367 }, { "epoch": 0.02376377883040418, "grad_norm": 3.1698339667735445, "learning_rate": 2.376377883040418e-06, "loss": 0.9488, "step": 5368 }, { "epoch": 0.02376820576386737, "grad_norm": 2.6246245004064135, "learning_rate": 2.3768205763867374e-06, "loss": 0.6595, "step": 5369 }, { "epoch": 0.02377263269733056, "grad_norm": 3.0565678075759917, "learning_rate": 2.377263269733056e-06, "loss": 1.0118, "step": 5370 }, { "epoch": 0.02377705963079375, "grad_norm": 3.3378124992004836, "learning_rate": 2.377705963079375e-06, "loss": 0.86, "step": 5371 }, { "epoch": 0.02378148656425694, "grad_norm": 3.2948196703378416, "learning_rate": 2.3781486564256942e-06, "loss": 1.281, "step": 5372 }, { "epoch": 0.023785913497720128, "grad_norm": 2.669192871166026, "learning_rate": 2.378591349772013e-06, "loss": 0.8389, "step": 5373 }, { "epoch": 0.02379034043118332, "grad_norm": 2.4217429164302486, "learning_rate": 2.379034043118332e-06, "loss": 0.733, "step": 5374 }, { "epoch": 0.02379476736464651, "grad_norm": 3.1305712483372785, "learning_rate": 2.379476736464651e-06, "loss": 0.8212, "step": 5375 }, { "epoch": 0.0237991942981097, "grad_norm": 2.510417667798383, "learning_rate": 2.37991942981097e-06, "loss": 0.6868, "step": 5376 }, { "epoch": 0.02380362123157289, "grad_norm": 3.758822470353302, "learning_rate": 2.380362123157289e-06, "loss": 1.2139, "step": 5377 }, { "epoch": 0.02380804816503608, "grad_norm": 2.630609943832385, "learning_rate": 2.3808048165036083e-06, "loss": 0.6539, "step": 5378 }, { "epoch": 0.02381247509849927, "grad_norm": 3.1887785013547485, "learning_rate": 2.3812475098499273e-06, "loss": 0.958, "step": 5379 }, { "epoch": 0.023816902031962458, "grad_norm": 2.802779492023076, "learning_rate": 2.3816902031962462e-06, "loss": 0.6331, "step": 5380 }, { "epoch": 0.02382132896542565, "grad_norm": 2.7474644247285154, "learning_rate": 2.382132896542565e-06, "loss": 0.6262, "step": 5381 }, { "epoch": 0.02382575589888884, "grad_norm": 2.636728902647992, "learning_rate": 2.382575589888884e-06, "loss": 0.7136, "step": 5382 }, { "epoch": 0.02383018283235203, "grad_norm": 2.9636472653578023, "learning_rate": 2.383018283235203e-06, "loss": 1.0189, "step": 5383 }, { "epoch": 0.02383460976581522, "grad_norm": 2.5268900516048167, "learning_rate": 2.3834609765815225e-06, "loss": 0.7726, "step": 5384 }, { "epoch": 0.02383903669927841, "grad_norm": 2.6055994489964442, "learning_rate": 2.383903669927841e-06, "loss": 0.8698, "step": 5385 }, { "epoch": 0.0238434636327416, "grad_norm": 2.4807121197679853, "learning_rate": 2.38434636327416e-06, "loss": 0.5234, "step": 5386 }, { "epoch": 0.02384789056620479, "grad_norm": 2.783778378387572, "learning_rate": 2.3847890566204793e-06, "loss": 1.031, "step": 5387 }, { "epoch": 0.02385231749966798, "grad_norm": 2.613178625447113, "learning_rate": 2.3852317499667982e-06, "loss": 0.8797, "step": 5388 }, { "epoch": 0.02385674443313117, "grad_norm": 2.7600252719846945, "learning_rate": 2.385674443313117e-06, "loss": 0.7707, "step": 5389 }, { "epoch": 0.02386117136659436, "grad_norm": 3.133340692261252, "learning_rate": 2.386117136659436e-06, "loss": 0.8386, "step": 5390 }, { "epoch": 0.02386559830005755, "grad_norm": 3.423201408536608, "learning_rate": 2.386559830005755e-06, "loss": 0.7906, "step": 5391 }, { "epoch": 0.02387002523352074, "grad_norm": 3.779869779507435, "learning_rate": 2.387002523352074e-06, "loss": 1.2722, "step": 5392 }, { "epoch": 0.02387445216698393, "grad_norm": 3.247987906430153, "learning_rate": 2.3874452166983934e-06, "loss": 1.0613, "step": 5393 }, { "epoch": 0.02387887910044712, "grad_norm": 3.367379005548387, "learning_rate": 2.3878879100447123e-06, "loss": 0.6742, "step": 5394 }, { "epoch": 0.023883306033910312, "grad_norm": 3.8090634972571875, "learning_rate": 2.3883306033910313e-06, "loss": 0.6254, "step": 5395 }, { "epoch": 0.0238877329673735, "grad_norm": 2.6320695034733457, "learning_rate": 2.3887732967373502e-06, "loss": 0.8404, "step": 5396 }, { "epoch": 0.02389215990083669, "grad_norm": 3.086391397955624, "learning_rate": 2.389215990083669e-06, "loss": 0.8635, "step": 5397 }, { "epoch": 0.02389658683429988, "grad_norm": 3.9725550476880804, "learning_rate": 2.389658683429988e-06, "loss": 0.6252, "step": 5398 }, { "epoch": 0.02390101376776307, "grad_norm": 3.5922106515831387, "learning_rate": 2.3901013767763075e-06, "loss": 0.9762, "step": 5399 }, { "epoch": 0.02390544070122626, "grad_norm": 2.8191304630547354, "learning_rate": 2.390544070122626e-06, "loss": 0.5373, "step": 5400 }, { "epoch": 0.02390986763468945, "grad_norm": 3.1914521050262517, "learning_rate": 2.390986763468945e-06, "loss": 0.7957, "step": 5401 }, { "epoch": 0.023914294568152642, "grad_norm": 3.460506909052379, "learning_rate": 2.3914294568152643e-06, "loss": 1.11, "step": 5402 }, { "epoch": 0.02391872150161583, "grad_norm": 2.675718046429941, "learning_rate": 2.3918721501615833e-06, "loss": 0.9499, "step": 5403 }, { "epoch": 0.02392314843507902, "grad_norm": 3.25606706746817, "learning_rate": 2.3923148435079022e-06, "loss": 1.1694, "step": 5404 }, { "epoch": 0.02392757536854221, "grad_norm": 2.7551154085812395, "learning_rate": 2.392757536854221e-06, "loss": 0.7515, "step": 5405 }, { "epoch": 0.0239320023020054, "grad_norm": 2.951379906040304, "learning_rate": 2.39320023020054e-06, "loss": 0.6864, "step": 5406 }, { "epoch": 0.02393642923546859, "grad_norm": 2.9173881433630644, "learning_rate": 2.393642923546859e-06, "loss": 0.8314, "step": 5407 }, { "epoch": 0.02394085616893178, "grad_norm": 3.349989945419668, "learning_rate": 2.3940856168931785e-06, "loss": 1.0074, "step": 5408 }, { "epoch": 0.023945283102394972, "grad_norm": 3.6058824663552125, "learning_rate": 2.3945283102394974e-06, "loss": 1.1836, "step": 5409 }, { "epoch": 0.023949710035858163, "grad_norm": 3.3356633781000045, "learning_rate": 2.3949710035858163e-06, "loss": 0.8452, "step": 5410 }, { "epoch": 0.02395413696932135, "grad_norm": 2.668876604065506, "learning_rate": 2.3954136969321353e-06, "loss": 0.7588, "step": 5411 }, { "epoch": 0.02395856390278454, "grad_norm": 3.3244721026961543, "learning_rate": 2.3958563902784542e-06, "loss": 0.7728, "step": 5412 }, { "epoch": 0.02396299083624773, "grad_norm": 3.063152020440571, "learning_rate": 2.396299083624773e-06, "loss": 1.0616, "step": 5413 }, { "epoch": 0.02396741776971092, "grad_norm": 2.7651766080472537, "learning_rate": 2.3967417769710926e-06, "loss": 0.9501, "step": 5414 }, { "epoch": 0.023971844703174112, "grad_norm": 3.2528798278886804, "learning_rate": 2.3971844703174115e-06, "loss": 0.9693, "step": 5415 }, { "epoch": 0.023976271636637302, "grad_norm": 3.5462732968751927, "learning_rate": 2.39762716366373e-06, "loss": 1.1437, "step": 5416 }, { "epoch": 0.023980698570100493, "grad_norm": 2.831085377599394, "learning_rate": 2.3980698570100494e-06, "loss": 0.6721, "step": 5417 }, { "epoch": 0.02398512550356368, "grad_norm": 2.4283159429150665, "learning_rate": 2.3985125503563683e-06, "loss": 0.8749, "step": 5418 }, { "epoch": 0.02398955243702687, "grad_norm": 2.7822187918543966, "learning_rate": 2.3989552437026873e-06, "loss": 0.6007, "step": 5419 }, { "epoch": 0.02399397937049006, "grad_norm": 2.9200478659649347, "learning_rate": 2.3993979370490062e-06, "loss": 0.8416, "step": 5420 }, { "epoch": 0.02399840630395325, "grad_norm": 3.438420170297589, "learning_rate": 2.399840630395325e-06, "loss": 0.8787, "step": 5421 }, { "epoch": 0.024002833237416442, "grad_norm": 2.828772582334573, "learning_rate": 2.400283323741644e-06, "loss": 0.6764, "step": 5422 }, { "epoch": 0.024007260170879632, "grad_norm": 3.493236427458106, "learning_rate": 2.4007260170879635e-06, "loss": 0.9966, "step": 5423 }, { "epoch": 0.024011687104342823, "grad_norm": 2.7197490867698577, "learning_rate": 2.4011687104342825e-06, "loss": 0.6677, "step": 5424 }, { "epoch": 0.024016114037806013, "grad_norm": 2.563845065149939, "learning_rate": 2.4016114037806014e-06, "loss": 0.6482, "step": 5425 }, { "epoch": 0.0240205409712692, "grad_norm": 3.1485399923955324, "learning_rate": 2.4020540971269203e-06, "loss": 0.6461, "step": 5426 }, { "epoch": 0.02402496790473239, "grad_norm": 3.081772288885693, "learning_rate": 2.4024967904732393e-06, "loss": 0.7158, "step": 5427 }, { "epoch": 0.02402939483819558, "grad_norm": 2.953314612282631, "learning_rate": 2.4029394838195582e-06, "loss": 0.8194, "step": 5428 }, { "epoch": 0.024033821771658772, "grad_norm": 2.6857546622077946, "learning_rate": 2.4033821771658776e-06, "loss": 0.7414, "step": 5429 }, { "epoch": 0.024038248705121962, "grad_norm": 2.963997947562501, "learning_rate": 2.4038248705121966e-06, "loss": 0.8707, "step": 5430 }, { "epoch": 0.024042675638585153, "grad_norm": 2.73150380708267, "learning_rate": 2.404267563858515e-06, "loss": 0.5501, "step": 5431 }, { "epoch": 0.024047102572048343, "grad_norm": 2.476827547179021, "learning_rate": 2.4047102572048345e-06, "loss": 0.9257, "step": 5432 }, { "epoch": 0.02405152950551153, "grad_norm": 2.75639864889178, "learning_rate": 2.4051529505511534e-06, "loss": 0.5844, "step": 5433 }, { "epoch": 0.02405595643897472, "grad_norm": 2.7235098965359663, "learning_rate": 2.4055956438974723e-06, "loss": 0.8128, "step": 5434 }, { "epoch": 0.02406038337243791, "grad_norm": 5.069475765633217, "learning_rate": 2.4060383372437913e-06, "loss": 1.0155, "step": 5435 }, { "epoch": 0.024064810305901102, "grad_norm": 3.282809878066888, "learning_rate": 2.4064810305901102e-06, "loss": 1.0745, "step": 5436 }, { "epoch": 0.024069237239364293, "grad_norm": 3.8139557778328155, "learning_rate": 2.406923723936429e-06, "loss": 1.2724, "step": 5437 }, { "epoch": 0.024073664172827483, "grad_norm": 3.400788723862436, "learning_rate": 2.4073664172827486e-06, "loss": 0.6761, "step": 5438 }, { "epoch": 0.024078091106290674, "grad_norm": 2.822602894754302, "learning_rate": 2.4078091106290675e-06, "loss": 0.5627, "step": 5439 }, { "epoch": 0.024082518039753864, "grad_norm": 2.8589727091961903, "learning_rate": 2.4082518039753865e-06, "loss": 0.7788, "step": 5440 }, { "epoch": 0.02408694497321705, "grad_norm": 3.4098890102938753, "learning_rate": 2.4086944973217054e-06, "loss": 1.038, "step": 5441 }, { "epoch": 0.02409137190668024, "grad_norm": 3.636708107143203, "learning_rate": 2.4091371906680243e-06, "loss": 1.0022, "step": 5442 }, { "epoch": 0.024095798840143432, "grad_norm": 3.403517001191369, "learning_rate": 2.4095798840143433e-06, "loss": 0.866, "step": 5443 }, { "epoch": 0.024100225773606623, "grad_norm": 3.2114800699105768, "learning_rate": 2.4100225773606627e-06, "loss": 0.9673, "step": 5444 }, { "epoch": 0.024104652707069813, "grad_norm": 3.9251888308312672, "learning_rate": 2.4104652707069816e-06, "loss": 1.2186, "step": 5445 }, { "epoch": 0.024109079640533004, "grad_norm": 3.238711074055549, "learning_rate": 2.4109079640533006e-06, "loss": 1.0056, "step": 5446 }, { "epoch": 0.024113506573996194, "grad_norm": 3.216400086074945, "learning_rate": 2.4113506573996195e-06, "loss": 0.9968, "step": 5447 }, { "epoch": 0.02411793350745938, "grad_norm": 3.283179382505363, "learning_rate": 2.4117933507459385e-06, "loss": 1.0335, "step": 5448 }, { "epoch": 0.02412236044092257, "grad_norm": 3.6359988429477483, "learning_rate": 2.4122360440922574e-06, "loss": 1.1139, "step": 5449 }, { "epoch": 0.024126787374385762, "grad_norm": 2.550561593373565, "learning_rate": 2.4126787374385763e-06, "loss": 0.789, "step": 5450 }, { "epoch": 0.024131214307848953, "grad_norm": 2.924454391075289, "learning_rate": 2.4131214307848953e-06, "loss": 0.9653, "step": 5451 }, { "epoch": 0.024135641241312143, "grad_norm": 3.094022955346179, "learning_rate": 2.4135641241312142e-06, "loss": 0.9466, "step": 5452 }, { "epoch": 0.024140068174775334, "grad_norm": 3.100413901101376, "learning_rate": 2.4140068174775336e-06, "loss": 0.6972, "step": 5453 }, { "epoch": 0.024144495108238524, "grad_norm": 3.2384028977196433, "learning_rate": 2.4144495108238526e-06, "loss": 1.1041, "step": 5454 }, { "epoch": 0.024148922041701715, "grad_norm": 3.765956483393755, "learning_rate": 2.4148922041701715e-06, "loss": 1.1469, "step": 5455 }, { "epoch": 0.024153348975164902, "grad_norm": 3.0947288967422155, "learning_rate": 2.4153348975164905e-06, "loss": 0.7257, "step": 5456 }, { "epoch": 0.024157775908628092, "grad_norm": 2.8492539847698213, "learning_rate": 2.4157775908628094e-06, "loss": 0.7951, "step": 5457 }, { "epoch": 0.024162202842091283, "grad_norm": 3.1017951306841676, "learning_rate": 2.4162202842091283e-06, "loss": 0.6444, "step": 5458 }, { "epoch": 0.024166629775554473, "grad_norm": 3.224342777651962, "learning_rate": 2.4166629775554477e-06, "loss": 0.884, "step": 5459 }, { "epoch": 0.024171056709017664, "grad_norm": 3.048951941994116, "learning_rate": 2.4171056709017667e-06, "loss": 0.7512, "step": 5460 }, { "epoch": 0.024175483642480854, "grad_norm": 2.505843707544981, "learning_rate": 2.4175483642480856e-06, "loss": 0.5849, "step": 5461 }, { "epoch": 0.024179910575944045, "grad_norm": 3.085761563084889, "learning_rate": 2.4179910575944046e-06, "loss": 0.6762, "step": 5462 }, { "epoch": 0.024184337509407235, "grad_norm": 2.7497074252338654, "learning_rate": 2.4184337509407235e-06, "loss": 0.7176, "step": 5463 }, { "epoch": 0.024188764442870422, "grad_norm": 3.843399759616768, "learning_rate": 2.4188764442870425e-06, "loss": 1.3117, "step": 5464 }, { "epoch": 0.024193191376333613, "grad_norm": 3.4321092670522964, "learning_rate": 2.419319137633362e-06, "loss": 0.6568, "step": 5465 }, { "epoch": 0.024197618309796803, "grad_norm": 2.9441007534954333, "learning_rate": 2.4197618309796803e-06, "loss": 0.9171, "step": 5466 }, { "epoch": 0.024202045243259994, "grad_norm": 2.5298074526296186, "learning_rate": 2.4202045243259993e-06, "loss": 0.7044, "step": 5467 }, { "epoch": 0.024206472176723184, "grad_norm": 2.6032958134659485, "learning_rate": 2.4206472176723187e-06, "loss": 0.604, "step": 5468 }, { "epoch": 0.024210899110186375, "grad_norm": 2.5882163138978216, "learning_rate": 2.4210899110186376e-06, "loss": 0.4702, "step": 5469 }, { "epoch": 0.024215326043649565, "grad_norm": 2.6740063629554056, "learning_rate": 2.4215326043649566e-06, "loss": 0.786, "step": 5470 }, { "epoch": 0.024219752977112752, "grad_norm": 2.3804637074145987, "learning_rate": 2.4219752977112755e-06, "loss": 0.637, "step": 5471 }, { "epoch": 0.024224179910575943, "grad_norm": 2.8954153367741586, "learning_rate": 2.4224179910575945e-06, "loss": 0.6825, "step": 5472 }, { "epoch": 0.024228606844039133, "grad_norm": 3.774735014313208, "learning_rate": 2.4228606844039134e-06, "loss": 0.9864, "step": 5473 }, { "epoch": 0.024233033777502324, "grad_norm": 2.448203421073391, "learning_rate": 2.4233033777502328e-06, "loss": 0.5744, "step": 5474 }, { "epoch": 0.024237460710965514, "grad_norm": 3.65615260223565, "learning_rate": 2.4237460710965517e-06, "loss": 0.9649, "step": 5475 }, { "epoch": 0.024241887644428705, "grad_norm": 3.936088289709899, "learning_rate": 2.4241887644428707e-06, "loss": 1.2471, "step": 5476 }, { "epoch": 0.024246314577891896, "grad_norm": 2.765275956819232, "learning_rate": 2.4246314577891896e-06, "loss": 0.7787, "step": 5477 }, { "epoch": 0.024250741511355086, "grad_norm": 2.6011666402250655, "learning_rate": 2.4250741511355086e-06, "loss": 0.7896, "step": 5478 }, { "epoch": 0.024255168444818273, "grad_norm": 4.7741208365797965, "learning_rate": 2.4255168444818275e-06, "loss": 1.3795, "step": 5479 }, { "epoch": 0.024259595378281464, "grad_norm": 2.4921525036799665, "learning_rate": 2.425959537828147e-06, "loss": 0.7951, "step": 5480 }, { "epoch": 0.024264022311744654, "grad_norm": 3.136491348464285, "learning_rate": 2.4264022311744654e-06, "loss": 1.0634, "step": 5481 }, { "epoch": 0.024268449245207845, "grad_norm": 3.1125497243848472, "learning_rate": 2.4268449245207844e-06, "loss": 0.5317, "step": 5482 }, { "epoch": 0.024272876178671035, "grad_norm": 2.81939853793006, "learning_rate": 2.4272876178671037e-06, "loss": 0.79, "step": 5483 }, { "epoch": 0.024277303112134226, "grad_norm": 4.538092205230235, "learning_rate": 2.4277303112134227e-06, "loss": 1.109, "step": 5484 }, { "epoch": 0.024281730045597416, "grad_norm": 2.5754122246442828, "learning_rate": 2.4281730045597416e-06, "loss": 0.5821, "step": 5485 }, { "epoch": 0.024286156979060603, "grad_norm": 3.390052838510209, "learning_rate": 2.4286156979060606e-06, "loss": 1.1717, "step": 5486 }, { "epoch": 0.024290583912523794, "grad_norm": 2.4260101275584924, "learning_rate": 2.4290583912523795e-06, "loss": 0.6231, "step": 5487 }, { "epoch": 0.024295010845986984, "grad_norm": 2.66063815873461, "learning_rate": 2.4295010845986985e-06, "loss": 0.7087, "step": 5488 }, { "epoch": 0.024299437779450175, "grad_norm": 2.7758026345095312, "learning_rate": 2.429943777945018e-06, "loss": 0.8003, "step": 5489 }, { "epoch": 0.024303864712913365, "grad_norm": 3.5046493735385695, "learning_rate": 2.4303864712913368e-06, "loss": 0.6472, "step": 5490 }, { "epoch": 0.024308291646376556, "grad_norm": 2.5734341347277767, "learning_rate": 2.4308291646376557e-06, "loss": 0.6906, "step": 5491 }, { "epoch": 0.024312718579839746, "grad_norm": 3.2028683907214868, "learning_rate": 2.4312718579839747e-06, "loss": 0.7261, "step": 5492 }, { "epoch": 0.024317145513302937, "grad_norm": 2.9454495611625475, "learning_rate": 2.4317145513302936e-06, "loss": 0.6252, "step": 5493 }, { "epoch": 0.024321572446766124, "grad_norm": 2.8762327521828834, "learning_rate": 2.4321572446766126e-06, "loss": 0.7577, "step": 5494 }, { "epoch": 0.024325999380229314, "grad_norm": 3.122090756028119, "learning_rate": 2.432599938022932e-06, "loss": 0.7134, "step": 5495 }, { "epoch": 0.024330426313692505, "grad_norm": 2.9220781506806146, "learning_rate": 2.433042631369251e-06, "loss": 0.4771, "step": 5496 }, { "epoch": 0.024334853247155695, "grad_norm": 3.2859631394738193, "learning_rate": 2.4334853247155694e-06, "loss": 1.1331, "step": 5497 }, { "epoch": 0.024339280180618886, "grad_norm": 2.9314361236193314, "learning_rate": 2.4339280180618888e-06, "loss": 0.8897, "step": 5498 }, { "epoch": 0.024343707114082076, "grad_norm": 3.3535304657799307, "learning_rate": 2.4343707114082077e-06, "loss": 1.0626, "step": 5499 }, { "epoch": 0.024348134047545267, "grad_norm": 2.7293515358258396, "learning_rate": 2.4348134047545267e-06, "loss": 0.7694, "step": 5500 }, { "epoch": 0.024352560981008454, "grad_norm": 2.63656867040419, "learning_rate": 2.4352560981008456e-06, "loss": 0.9575, "step": 5501 }, { "epoch": 0.024356987914471644, "grad_norm": 3.106155399034639, "learning_rate": 2.4356987914471646e-06, "loss": 0.8884, "step": 5502 }, { "epoch": 0.024361414847934835, "grad_norm": 2.88359967821344, "learning_rate": 2.436141484793484e-06, "loss": 0.7189, "step": 5503 }, { "epoch": 0.024365841781398025, "grad_norm": 2.9264750196510723, "learning_rate": 2.436584178139803e-06, "loss": 0.6337, "step": 5504 }, { "epoch": 0.024370268714861216, "grad_norm": 2.547216024547163, "learning_rate": 2.437026871486122e-06, "loss": 0.7201, "step": 5505 }, { "epoch": 0.024374695648324406, "grad_norm": 4.465372028843106, "learning_rate": 2.4374695648324408e-06, "loss": 1.2033, "step": 5506 }, { "epoch": 0.024379122581787597, "grad_norm": 3.2577872790216307, "learning_rate": 2.4379122581787597e-06, "loss": 0.7567, "step": 5507 }, { "epoch": 0.024383549515250787, "grad_norm": 3.3068097100884875, "learning_rate": 2.4383549515250787e-06, "loss": 0.762, "step": 5508 }, { "epoch": 0.024387976448713974, "grad_norm": 3.2306787209855776, "learning_rate": 2.438797644871398e-06, "loss": 1.0518, "step": 5509 }, { "epoch": 0.024392403382177165, "grad_norm": 4.208003245841179, "learning_rate": 2.439240338217717e-06, "loss": 1.2882, "step": 5510 }, { "epoch": 0.024396830315640355, "grad_norm": 2.9713456245834786, "learning_rate": 2.439683031564036e-06, "loss": 0.8155, "step": 5511 }, { "epoch": 0.024401257249103546, "grad_norm": 3.2576958865613874, "learning_rate": 2.440125724910355e-06, "loss": 0.708, "step": 5512 }, { "epoch": 0.024405684182566736, "grad_norm": 3.718453022997201, "learning_rate": 2.440568418256674e-06, "loss": 0.8089, "step": 5513 }, { "epoch": 0.024410111116029927, "grad_norm": 3.1258310931430913, "learning_rate": 2.4410111116029928e-06, "loss": 1.0777, "step": 5514 }, { "epoch": 0.024414538049493117, "grad_norm": 2.8132366304802905, "learning_rate": 2.441453804949312e-06, "loss": 0.9362, "step": 5515 }, { "epoch": 0.024418964982956304, "grad_norm": 3.5628895286764375, "learning_rate": 2.4418964982956307e-06, "loss": 0.8007, "step": 5516 }, { "epoch": 0.024423391916419495, "grad_norm": 2.7778831163210014, "learning_rate": 2.4423391916419496e-06, "loss": 0.7237, "step": 5517 }, { "epoch": 0.024427818849882686, "grad_norm": 3.3055098521367468, "learning_rate": 2.442781884988269e-06, "loss": 0.7841, "step": 5518 }, { "epoch": 0.024432245783345876, "grad_norm": 3.4177249802555827, "learning_rate": 2.443224578334588e-06, "loss": 1.0152, "step": 5519 }, { "epoch": 0.024436672716809067, "grad_norm": 2.7661741688261814, "learning_rate": 2.443667271680907e-06, "loss": 0.8588, "step": 5520 }, { "epoch": 0.024441099650272257, "grad_norm": 3.2930599428235277, "learning_rate": 2.444109965027226e-06, "loss": 0.5669, "step": 5521 }, { "epoch": 0.024445526583735448, "grad_norm": 2.5849541315173683, "learning_rate": 2.4445526583735448e-06, "loss": 0.7688, "step": 5522 }, { "epoch": 0.024449953517198638, "grad_norm": 2.9331827346510315, "learning_rate": 2.4449953517198637e-06, "loss": 0.7997, "step": 5523 }, { "epoch": 0.024454380450661825, "grad_norm": 3.3567372634410724, "learning_rate": 2.445438045066183e-06, "loss": 1.1431, "step": 5524 }, { "epoch": 0.024458807384125016, "grad_norm": 2.627917874989615, "learning_rate": 2.445880738412502e-06, "loss": 0.7482, "step": 5525 }, { "epoch": 0.024463234317588206, "grad_norm": 2.9453637656816714, "learning_rate": 2.446323431758821e-06, "loss": 0.7075, "step": 5526 }, { "epoch": 0.024467661251051397, "grad_norm": 2.835012418216499, "learning_rate": 2.44676612510514e-06, "loss": 0.6529, "step": 5527 }, { "epoch": 0.024472088184514587, "grad_norm": 2.5482653512881286, "learning_rate": 2.447208818451459e-06, "loss": 1.0117, "step": 5528 }, { "epoch": 0.024476515117977778, "grad_norm": 3.1431179262194187, "learning_rate": 2.447651511797778e-06, "loss": 0.7307, "step": 5529 }, { "epoch": 0.024480942051440968, "grad_norm": 3.008165575873125, "learning_rate": 2.448094205144097e-06, "loss": 0.7576, "step": 5530 }, { "epoch": 0.024485368984904155, "grad_norm": 2.510120864967746, "learning_rate": 2.4485368984904157e-06, "loss": 0.6679, "step": 5531 }, { "epoch": 0.024489795918367346, "grad_norm": 2.9245828868073134, "learning_rate": 2.4489795918367347e-06, "loss": 0.5296, "step": 5532 }, { "epoch": 0.024494222851830536, "grad_norm": 2.8251457440685517, "learning_rate": 2.449422285183054e-06, "loss": 0.7915, "step": 5533 }, { "epoch": 0.024498649785293727, "grad_norm": 3.5877425450014244, "learning_rate": 2.449864978529373e-06, "loss": 0.9864, "step": 5534 }, { "epoch": 0.024503076718756917, "grad_norm": 2.8168728604650712, "learning_rate": 2.450307671875692e-06, "loss": 0.6033, "step": 5535 }, { "epoch": 0.024507503652220108, "grad_norm": 4.166223630180548, "learning_rate": 2.450750365222011e-06, "loss": 0.7406, "step": 5536 }, { "epoch": 0.024511930585683298, "grad_norm": 2.3424426670929086, "learning_rate": 2.45119305856833e-06, "loss": 0.5147, "step": 5537 }, { "epoch": 0.02451635751914649, "grad_norm": 2.917702804449518, "learning_rate": 2.4516357519146488e-06, "loss": 0.8842, "step": 5538 }, { "epoch": 0.024520784452609676, "grad_norm": 2.7545473195721426, "learning_rate": 2.452078445260968e-06, "loss": 0.7897, "step": 5539 }, { "epoch": 0.024525211386072866, "grad_norm": 2.9283329964988534, "learning_rate": 2.452521138607287e-06, "loss": 0.5079, "step": 5540 }, { "epoch": 0.024529638319536057, "grad_norm": 2.5898016610003145, "learning_rate": 2.452963831953606e-06, "loss": 0.5561, "step": 5541 }, { "epoch": 0.024534065252999247, "grad_norm": 2.6189690354694157, "learning_rate": 2.453406525299925e-06, "loss": 0.7231, "step": 5542 }, { "epoch": 0.024538492186462438, "grad_norm": 2.701120344333089, "learning_rate": 2.453849218646244e-06, "loss": 0.9462, "step": 5543 }, { "epoch": 0.02454291911992563, "grad_norm": 2.8401713800370976, "learning_rate": 2.454291911992563e-06, "loss": 0.8963, "step": 5544 }, { "epoch": 0.02454734605338882, "grad_norm": 3.0765307061831386, "learning_rate": 2.4547346053388823e-06, "loss": 0.7785, "step": 5545 }, { "epoch": 0.02455177298685201, "grad_norm": 2.9029116839080986, "learning_rate": 2.455177298685201e-06, "loss": 0.5787, "step": 5546 }, { "epoch": 0.024556199920315196, "grad_norm": 3.0079504134170403, "learning_rate": 2.4556199920315197e-06, "loss": 0.7661, "step": 5547 }, { "epoch": 0.024560626853778387, "grad_norm": 3.4801820879094985, "learning_rate": 2.456062685377839e-06, "loss": 1.3172, "step": 5548 }, { "epoch": 0.024565053787241577, "grad_norm": 2.814762405420292, "learning_rate": 2.456505378724158e-06, "loss": 0.7474, "step": 5549 }, { "epoch": 0.024569480720704768, "grad_norm": 3.1424415729135866, "learning_rate": 2.456948072070477e-06, "loss": 0.9741, "step": 5550 }, { "epoch": 0.02457390765416796, "grad_norm": 4.10971840799614, "learning_rate": 2.457390765416796e-06, "loss": 1.379, "step": 5551 }, { "epoch": 0.02457833458763115, "grad_norm": 3.1010243750996422, "learning_rate": 2.457833458763115e-06, "loss": 1.0017, "step": 5552 }, { "epoch": 0.02458276152109434, "grad_norm": 2.8121932030391803, "learning_rate": 2.458276152109434e-06, "loss": 0.8054, "step": 5553 }, { "epoch": 0.024587188454557526, "grad_norm": 2.5576548463251245, "learning_rate": 2.458718845455753e-06, "loss": 0.83, "step": 5554 }, { "epoch": 0.024591615388020717, "grad_norm": 3.539566887670237, "learning_rate": 2.459161538802072e-06, "loss": 1.2408, "step": 5555 }, { "epoch": 0.024596042321483907, "grad_norm": 3.481180919124117, "learning_rate": 2.459604232148391e-06, "loss": 0.9544, "step": 5556 }, { "epoch": 0.024600469254947098, "grad_norm": 2.9487785243511855, "learning_rate": 2.46004692549471e-06, "loss": 0.7397, "step": 5557 }, { "epoch": 0.02460489618841029, "grad_norm": 3.7047035477499866, "learning_rate": 2.460489618841029e-06, "loss": 1.193, "step": 5558 }, { "epoch": 0.02460932312187348, "grad_norm": 2.7336008510101766, "learning_rate": 2.460932312187348e-06, "loss": 0.863, "step": 5559 }, { "epoch": 0.02461375005533667, "grad_norm": 3.3755625774330946, "learning_rate": 2.4613750055336673e-06, "loss": 0.6275, "step": 5560 }, { "epoch": 0.02461817698879986, "grad_norm": 2.778933749098714, "learning_rate": 2.4618176988799863e-06, "loss": 0.9568, "step": 5561 }, { "epoch": 0.024622603922263047, "grad_norm": 4.07792204024816, "learning_rate": 2.4622603922263048e-06, "loss": 1.2798, "step": 5562 }, { "epoch": 0.024627030855726238, "grad_norm": 3.0872668874652307, "learning_rate": 2.462703085572624e-06, "loss": 0.7787, "step": 5563 }, { "epoch": 0.024631457789189428, "grad_norm": 2.8475665161083, "learning_rate": 2.463145778918943e-06, "loss": 0.9924, "step": 5564 }, { "epoch": 0.02463588472265262, "grad_norm": 3.3639228952887525, "learning_rate": 2.463588472265262e-06, "loss": 0.6524, "step": 5565 }, { "epoch": 0.02464031165611581, "grad_norm": 2.870968448824767, "learning_rate": 2.464031165611581e-06, "loss": 0.7333, "step": 5566 }, { "epoch": 0.024644738589579, "grad_norm": 2.657518218351935, "learning_rate": 2.4644738589579e-06, "loss": 0.6822, "step": 5567 }, { "epoch": 0.02464916552304219, "grad_norm": 2.861107166716297, "learning_rate": 2.464916552304219e-06, "loss": 0.7708, "step": 5568 }, { "epoch": 0.024653592456505377, "grad_norm": 2.543401519138791, "learning_rate": 2.4653592456505383e-06, "loss": 0.5913, "step": 5569 }, { "epoch": 0.024658019389968568, "grad_norm": 2.5219535538994737, "learning_rate": 2.465801938996857e-06, "loss": 0.6786, "step": 5570 }, { "epoch": 0.024662446323431758, "grad_norm": 4.169480985381916, "learning_rate": 2.466244632343176e-06, "loss": 1.1648, "step": 5571 }, { "epoch": 0.02466687325689495, "grad_norm": 2.557451832706628, "learning_rate": 2.466687325689495e-06, "loss": 0.8106, "step": 5572 }, { "epoch": 0.02467130019035814, "grad_norm": 3.403328675945525, "learning_rate": 2.467130019035814e-06, "loss": 0.9958, "step": 5573 }, { "epoch": 0.02467572712382133, "grad_norm": 3.1694873389290645, "learning_rate": 2.467572712382133e-06, "loss": 1.0917, "step": 5574 }, { "epoch": 0.02468015405728452, "grad_norm": 3.208753030330455, "learning_rate": 2.4680154057284524e-06, "loss": 0.7396, "step": 5575 }, { "epoch": 0.02468458099074771, "grad_norm": 2.673740499548303, "learning_rate": 2.4684580990747713e-06, "loss": 0.8551, "step": 5576 }, { "epoch": 0.024689007924210898, "grad_norm": 2.3349410092061618, "learning_rate": 2.4689007924210903e-06, "loss": 0.8305, "step": 5577 }, { "epoch": 0.024693434857674088, "grad_norm": 2.6461298356357696, "learning_rate": 2.469343485767409e-06, "loss": 0.6358, "step": 5578 }, { "epoch": 0.02469786179113728, "grad_norm": 2.5863299687857575, "learning_rate": 2.469786179113728e-06, "loss": 0.6871, "step": 5579 }, { "epoch": 0.02470228872460047, "grad_norm": 2.84771575823915, "learning_rate": 2.470228872460047e-06, "loss": 0.9098, "step": 5580 }, { "epoch": 0.02470671565806366, "grad_norm": 2.4491196849609302, "learning_rate": 2.470671565806366e-06, "loss": 0.4253, "step": 5581 }, { "epoch": 0.02471114259152685, "grad_norm": 3.0036569514722946, "learning_rate": 2.471114259152685e-06, "loss": 0.8414, "step": 5582 }, { "epoch": 0.02471556952499004, "grad_norm": 2.7641384084610037, "learning_rate": 2.471556952499004e-06, "loss": 0.5241, "step": 5583 }, { "epoch": 0.024719996458453228, "grad_norm": 3.788352236974054, "learning_rate": 2.4719996458453233e-06, "loss": 1.0347, "step": 5584 }, { "epoch": 0.02472442339191642, "grad_norm": 2.8117613435718103, "learning_rate": 2.4724423391916423e-06, "loss": 0.7346, "step": 5585 }, { "epoch": 0.02472885032537961, "grad_norm": 2.7520446035564428, "learning_rate": 2.472885032537961e-06, "loss": 0.7041, "step": 5586 }, { "epoch": 0.0247332772588428, "grad_norm": 2.74865440492239, "learning_rate": 2.47332772588428e-06, "loss": 0.5159, "step": 5587 }, { "epoch": 0.02473770419230599, "grad_norm": 3.129797666059615, "learning_rate": 2.473770419230599e-06, "loss": 0.6571, "step": 5588 }, { "epoch": 0.02474213112576918, "grad_norm": 2.8719391477794654, "learning_rate": 2.474213112576918e-06, "loss": 0.7182, "step": 5589 }, { "epoch": 0.02474655805923237, "grad_norm": 2.5939426265258887, "learning_rate": 2.4746558059232374e-06, "loss": 0.5071, "step": 5590 }, { "epoch": 0.02475098499269556, "grad_norm": 2.5543984369585866, "learning_rate": 2.4750984992695564e-06, "loss": 0.5931, "step": 5591 }, { "epoch": 0.02475541192615875, "grad_norm": 2.6136259770234855, "learning_rate": 2.4755411926158753e-06, "loss": 0.5817, "step": 5592 }, { "epoch": 0.02475983885962194, "grad_norm": 2.6832982027388024, "learning_rate": 2.4759838859621943e-06, "loss": 0.7466, "step": 5593 }, { "epoch": 0.02476426579308513, "grad_norm": 3.881343634325751, "learning_rate": 2.476426579308513e-06, "loss": 0.796, "step": 5594 }, { "epoch": 0.02476869272654832, "grad_norm": 3.2195455246190403, "learning_rate": 2.476869272654832e-06, "loss": 0.7079, "step": 5595 }, { "epoch": 0.02477311966001151, "grad_norm": 2.9428882941098675, "learning_rate": 2.4773119660011515e-06, "loss": 1.0013, "step": 5596 }, { "epoch": 0.0247775465934747, "grad_norm": 2.7592387216113017, "learning_rate": 2.47775465934747e-06, "loss": 0.9151, "step": 5597 }, { "epoch": 0.02478197352693789, "grad_norm": 2.5105363632418354, "learning_rate": 2.478197352693789e-06, "loss": 0.7173, "step": 5598 }, { "epoch": 0.02478640046040108, "grad_norm": 3.467470540679371, "learning_rate": 2.4786400460401084e-06, "loss": 1.1577, "step": 5599 }, { "epoch": 0.02479082739386427, "grad_norm": 3.0529183435852265, "learning_rate": 2.4790827393864273e-06, "loss": 0.8024, "step": 5600 }, { "epoch": 0.02479525432732746, "grad_norm": 2.6508082817815923, "learning_rate": 2.4795254327327463e-06, "loss": 0.7342, "step": 5601 }, { "epoch": 0.02479968126079065, "grad_norm": 2.774515507306768, "learning_rate": 2.479968126079065e-06, "loss": 0.8178, "step": 5602 }, { "epoch": 0.02480410819425384, "grad_norm": 3.932869444800299, "learning_rate": 2.480410819425384e-06, "loss": 0.9385, "step": 5603 }, { "epoch": 0.02480853512771703, "grad_norm": 3.3507236205614253, "learning_rate": 2.480853512771703e-06, "loss": 0.9651, "step": 5604 }, { "epoch": 0.02481296206118022, "grad_norm": 3.078209109278039, "learning_rate": 2.4812962061180225e-06, "loss": 0.9099, "step": 5605 }, { "epoch": 0.024817388994643412, "grad_norm": 3.261253148063059, "learning_rate": 2.4817388994643414e-06, "loss": 1.173, "step": 5606 }, { "epoch": 0.0248218159281066, "grad_norm": 3.5215635207582427, "learning_rate": 2.4821815928106604e-06, "loss": 1.0352, "step": 5607 }, { "epoch": 0.02482624286156979, "grad_norm": 3.473672990411988, "learning_rate": 2.4826242861569793e-06, "loss": 1.2274, "step": 5608 }, { "epoch": 0.02483066979503298, "grad_norm": 2.70493887375051, "learning_rate": 2.4830669795032983e-06, "loss": 1.0321, "step": 5609 }, { "epoch": 0.02483509672849617, "grad_norm": 2.599169762961759, "learning_rate": 2.483509672849617e-06, "loss": 0.4654, "step": 5610 }, { "epoch": 0.02483952366195936, "grad_norm": 4.677949843886644, "learning_rate": 2.4839523661959366e-06, "loss": 0.8951, "step": 5611 }, { "epoch": 0.02484395059542255, "grad_norm": 3.28042051528546, "learning_rate": 2.484395059542255e-06, "loss": 0.8687, "step": 5612 }, { "epoch": 0.024848377528885742, "grad_norm": 3.3209112902993514, "learning_rate": 2.484837752888574e-06, "loss": 1.1701, "step": 5613 }, { "epoch": 0.024852804462348933, "grad_norm": 2.6599761002389384, "learning_rate": 2.4852804462348934e-06, "loss": 0.738, "step": 5614 }, { "epoch": 0.02485723139581212, "grad_norm": 2.4845878910384362, "learning_rate": 2.4857231395812124e-06, "loss": 0.6744, "step": 5615 }, { "epoch": 0.02486165832927531, "grad_norm": 2.59307791698928, "learning_rate": 2.4861658329275313e-06, "loss": 0.5625, "step": 5616 }, { "epoch": 0.0248660852627385, "grad_norm": 2.617722615015166, "learning_rate": 2.4866085262738503e-06, "loss": 0.8968, "step": 5617 }, { "epoch": 0.02487051219620169, "grad_norm": 3.427515073917516, "learning_rate": 2.487051219620169e-06, "loss": 0.7791, "step": 5618 }, { "epoch": 0.02487493912966488, "grad_norm": 2.4336332031348347, "learning_rate": 2.487493912966488e-06, "loss": 0.6977, "step": 5619 }, { "epoch": 0.024879366063128072, "grad_norm": 2.823216961156952, "learning_rate": 2.4879366063128075e-06, "loss": 0.7656, "step": 5620 }, { "epoch": 0.024883792996591263, "grad_norm": 2.782133164325076, "learning_rate": 2.4883792996591265e-06, "loss": 0.6847, "step": 5621 }, { "epoch": 0.02488821993005445, "grad_norm": 2.6270986656201765, "learning_rate": 2.4888219930054454e-06, "loss": 0.6382, "step": 5622 }, { "epoch": 0.02489264686351764, "grad_norm": 3.3139671594255593, "learning_rate": 2.4892646863517644e-06, "loss": 1.0755, "step": 5623 }, { "epoch": 0.02489707379698083, "grad_norm": 2.737175325685319, "learning_rate": 2.4897073796980833e-06, "loss": 0.7527, "step": 5624 }, { "epoch": 0.02490150073044402, "grad_norm": 3.0490809090094264, "learning_rate": 2.4901500730444023e-06, "loss": 0.9321, "step": 5625 }, { "epoch": 0.024905927663907212, "grad_norm": 3.2445147651726063, "learning_rate": 2.4905927663907216e-06, "loss": 0.8618, "step": 5626 }, { "epoch": 0.024910354597370402, "grad_norm": 3.8595784302895013, "learning_rate": 2.4910354597370406e-06, "loss": 0.9642, "step": 5627 }, { "epoch": 0.024914781530833593, "grad_norm": 2.6814549048627403, "learning_rate": 2.491478153083359e-06, "loss": 0.8514, "step": 5628 }, { "epoch": 0.024919208464296783, "grad_norm": 2.793251892936, "learning_rate": 2.4919208464296785e-06, "loss": 0.7183, "step": 5629 }, { "epoch": 0.02492363539775997, "grad_norm": 3.2560094204646655, "learning_rate": 2.4923635397759974e-06, "loss": 0.7507, "step": 5630 }, { "epoch": 0.02492806233122316, "grad_norm": 2.5770538897608817, "learning_rate": 2.4928062331223164e-06, "loss": 0.6998, "step": 5631 }, { "epoch": 0.02493248926468635, "grad_norm": 2.4562400207163244, "learning_rate": 2.4932489264686353e-06, "loss": 0.5203, "step": 5632 }, { "epoch": 0.024936916198149542, "grad_norm": 3.1457289523825764, "learning_rate": 2.4936916198149543e-06, "loss": 0.721, "step": 5633 }, { "epoch": 0.024941343131612732, "grad_norm": 3.1401954148768105, "learning_rate": 2.494134313161273e-06, "loss": 0.7446, "step": 5634 }, { "epoch": 0.024945770065075923, "grad_norm": 3.510920699328035, "learning_rate": 2.4945770065075926e-06, "loss": 1.0073, "step": 5635 }, { "epoch": 0.024950196998539113, "grad_norm": 3.106738652710491, "learning_rate": 2.4950196998539115e-06, "loss": 0.8941, "step": 5636 }, { "epoch": 0.0249546239320023, "grad_norm": 2.764198497052168, "learning_rate": 2.4954623932002305e-06, "loss": 0.7374, "step": 5637 }, { "epoch": 0.02495905086546549, "grad_norm": 2.6690590875995794, "learning_rate": 2.4959050865465494e-06, "loss": 0.7768, "step": 5638 }, { "epoch": 0.02496347779892868, "grad_norm": 2.9846018089451647, "learning_rate": 2.4963477798928684e-06, "loss": 0.5644, "step": 5639 }, { "epoch": 0.024967904732391872, "grad_norm": 2.448102435289741, "learning_rate": 2.4967904732391873e-06, "loss": 0.3984, "step": 5640 }, { "epoch": 0.024972331665855062, "grad_norm": 2.8292579866036736, "learning_rate": 2.4972331665855067e-06, "loss": 1.0271, "step": 5641 }, { "epoch": 0.024976758599318253, "grad_norm": 2.83387575355128, "learning_rate": 2.4976758599318256e-06, "loss": 0.6339, "step": 5642 }, { "epoch": 0.024981185532781443, "grad_norm": 3.013328465584641, "learning_rate": 2.498118553278144e-06, "loss": 1.0173, "step": 5643 }, { "epoch": 0.024985612466244634, "grad_norm": 3.6436372326077437, "learning_rate": 2.4985612466244635e-06, "loss": 0.7729, "step": 5644 }, { "epoch": 0.02499003939970782, "grad_norm": 2.9010621746827057, "learning_rate": 2.4990039399707825e-06, "loss": 0.7319, "step": 5645 }, { "epoch": 0.02499446633317101, "grad_norm": 2.3487358473245976, "learning_rate": 2.4994466333171014e-06, "loss": 0.3898, "step": 5646 }, { "epoch": 0.024998893266634202, "grad_norm": 3.4228605649779458, "learning_rate": 2.4998893266634204e-06, "loss": 1.0564, "step": 5647 }, { "epoch": 0.025003320200097393, "grad_norm": 2.9076880467766477, "learning_rate": 2.5003320200097397e-06, "loss": 0.9806, "step": 5648 }, { "epoch": 0.025007747133560583, "grad_norm": 2.9790919653219174, "learning_rate": 2.5007747133560583e-06, "loss": 0.9383, "step": 5649 }, { "epoch": 0.025012174067023774, "grad_norm": 2.9639443419405187, "learning_rate": 2.5012174067023776e-06, "loss": 0.5457, "step": 5650 }, { "epoch": 0.025016601000486964, "grad_norm": 2.8957687980028504, "learning_rate": 2.5016601000486966e-06, "loss": 1.0428, "step": 5651 }, { "epoch": 0.02502102793395015, "grad_norm": 3.909419332088613, "learning_rate": 2.5021027933950155e-06, "loss": 1.4314, "step": 5652 }, { "epoch": 0.02502545486741334, "grad_norm": 2.5963881835752445, "learning_rate": 2.5025454867413345e-06, "loss": 0.7991, "step": 5653 }, { "epoch": 0.025029881800876532, "grad_norm": 3.6049654041367063, "learning_rate": 2.502988180087654e-06, "loss": 1.3183, "step": 5654 }, { "epoch": 0.025034308734339723, "grad_norm": 3.4562307323647747, "learning_rate": 2.5034308734339724e-06, "loss": 0.9569, "step": 5655 }, { "epoch": 0.025038735667802913, "grad_norm": 3.2812672725010623, "learning_rate": 2.5038735667802917e-06, "loss": 0.7827, "step": 5656 }, { "epoch": 0.025043162601266104, "grad_norm": 2.7590636131125983, "learning_rate": 2.5043162601266107e-06, "loss": 0.8951, "step": 5657 }, { "epoch": 0.025047589534729294, "grad_norm": 3.7241572275626504, "learning_rate": 2.504758953472929e-06, "loss": 0.6324, "step": 5658 }, { "epoch": 0.025052016468192485, "grad_norm": 2.574301123234833, "learning_rate": 2.5052016468192486e-06, "loss": 0.7775, "step": 5659 }, { "epoch": 0.02505644340165567, "grad_norm": 3.0671395727450994, "learning_rate": 2.505644340165568e-06, "loss": 0.5508, "step": 5660 }, { "epoch": 0.025060870335118862, "grad_norm": 2.8530221081076643, "learning_rate": 2.5060870335118865e-06, "loss": 0.9016, "step": 5661 }, { "epoch": 0.025065297268582053, "grad_norm": 3.5503920902390083, "learning_rate": 2.5065297268582054e-06, "loss": 0.6007, "step": 5662 }, { "epoch": 0.025069724202045243, "grad_norm": 2.746223149695707, "learning_rate": 2.5069724202045248e-06, "loss": 0.7348, "step": 5663 }, { "epoch": 0.025074151135508434, "grad_norm": 4.607478756986962, "learning_rate": 2.5074151135508433e-06, "loss": 1.3872, "step": 5664 }, { "epoch": 0.025078578068971624, "grad_norm": 2.689886820997907, "learning_rate": 2.5078578068971627e-06, "loss": 0.8051, "step": 5665 }, { "epoch": 0.025083005002434815, "grad_norm": 2.798069289284905, "learning_rate": 2.5083005002434816e-06, "loss": 0.8723, "step": 5666 }, { "epoch": 0.025087431935898002, "grad_norm": 3.0882839558131248, "learning_rate": 2.5087431935898006e-06, "loss": 0.9361, "step": 5667 }, { "epoch": 0.025091858869361192, "grad_norm": 3.1922982484603892, "learning_rate": 2.5091858869361195e-06, "loss": 0.9447, "step": 5668 }, { "epoch": 0.025096285802824383, "grad_norm": 2.509524734335159, "learning_rate": 2.509628580282439e-06, "loss": 0.5782, "step": 5669 }, { "epoch": 0.025100712736287573, "grad_norm": 3.3536371837187127, "learning_rate": 2.5100712736287574e-06, "loss": 0.5592, "step": 5670 }, { "epoch": 0.025105139669750764, "grad_norm": 3.7253947160281737, "learning_rate": 2.5105139669750768e-06, "loss": 1.1478, "step": 5671 }, { "epoch": 0.025109566603213954, "grad_norm": 2.999230631925397, "learning_rate": 2.5109566603213957e-06, "loss": 0.7775, "step": 5672 }, { "epoch": 0.025113993536677145, "grad_norm": 3.3123239128399895, "learning_rate": 2.5113993536677147e-06, "loss": 0.8027, "step": 5673 }, { "epoch": 0.025118420470140335, "grad_norm": 2.972595022448705, "learning_rate": 2.5118420470140336e-06, "loss": 0.9078, "step": 5674 }, { "epoch": 0.025122847403603522, "grad_norm": 3.513238890372724, "learning_rate": 2.512284740360353e-06, "loss": 0.9531, "step": 5675 }, { "epoch": 0.025127274337066713, "grad_norm": 4.05103101476584, "learning_rate": 2.5127274337066715e-06, "loss": 1.343, "step": 5676 }, { "epoch": 0.025131701270529903, "grad_norm": 3.5566872414568556, "learning_rate": 2.513170127052991e-06, "loss": 0.9291, "step": 5677 }, { "epoch": 0.025136128203993094, "grad_norm": 3.1633579119741313, "learning_rate": 2.51361282039931e-06, "loss": 0.8601, "step": 5678 }, { "epoch": 0.025140555137456284, "grad_norm": 2.9035335061305347, "learning_rate": 2.5140555137456284e-06, "loss": 0.5757, "step": 5679 }, { "epoch": 0.025144982070919475, "grad_norm": 3.522027713410584, "learning_rate": 2.5144982070919477e-06, "loss": 0.8995, "step": 5680 }, { "epoch": 0.025149409004382665, "grad_norm": 2.689127252850257, "learning_rate": 2.5149409004382667e-06, "loss": 0.8427, "step": 5681 }, { "epoch": 0.025153835937845852, "grad_norm": 2.836138945106648, "learning_rate": 2.5153835937845856e-06, "loss": 0.769, "step": 5682 }, { "epoch": 0.025158262871309043, "grad_norm": 3.3803833517698405, "learning_rate": 2.5158262871309046e-06, "loss": 1.0291, "step": 5683 }, { "epoch": 0.025162689804772233, "grad_norm": 3.887304272900594, "learning_rate": 2.516268980477224e-06, "loss": 1.1819, "step": 5684 }, { "epoch": 0.025167116738235424, "grad_norm": 3.120232011045095, "learning_rate": 2.5167116738235425e-06, "loss": 0.5311, "step": 5685 }, { "epoch": 0.025171543671698614, "grad_norm": 3.1845423669442394, "learning_rate": 2.517154367169862e-06, "loss": 0.6588, "step": 5686 }, { "epoch": 0.025175970605161805, "grad_norm": 3.038726177567694, "learning_rate": 2.5175970605161808e-06, "loss": 0.8924, "step": 5687 }, { "epoch": 0.025180397538624995, "grad_norm": 3.53941441846276, "learning_rate": 2.5180397538624997e-06, "loss": 0.8939, "step": 5688 }, { "epoch": 0.025184824472088186, "grad_norm": 3.0298698181224957, "learning_rate": 2.5184824472088187e-06, "loss": 0.8727, "step": 5689 }, { "epoch": 0.025189251405551373, "grad_norm": 2.802709248091157, "learning_rate": 2.518925140555138e-06, "loss": 0.7713, "step": 5690 }, { "epoch": 0.025193678339014564, "grad_norm": 2.7993072490355773, "learning_rate": 2.5193678339014566e-06, "loss": 0.664, "step": 5691 }, { "epoch": 0.025198105272477754, "grad_norm": 2.8362811278359708, "learning_rate": 2.519810527247776e-06, "loss": 0.8285, "step": 5692 }, { "epoch": 0.025202532205940945, "grad_norm": 3.141249108898603, "learning_rate": 2.520253220594095e-06, "loss": 0.7626, "step": 5693 }, { "epoch": 0.025206959139404135, "grad_norm": 3.102813156939588, "learning_rate": 2.5206959139404134e-06, "loss": 0.8256, "step": 5694 }, { "epoch": 0.025211386072867326, "grad_norm": 2.9405659880385033, "learning_rate": 2.5211386072867328e-06, "loss": 0.5615, "step": 5695 }, { "epoch": 0.025215813006330516, "grad_norm": 3.07936469690359, "learning_rate": 2.521581300633052e-06, "loss": 0.874, "step": 5696 }, { "epoch": 0.025220239939793707, "grad_norm": 2.774991857645244, "learning_rate": 2.5220239939793707e-06, "loss": 0.803, "step": 5697 }, { "epoch": 0.025224666873256894, "grad_norm": 2.5541143033291718, "learning_rate": 2.5224666873256896e-06, "loss": 0.7869, "step": 5698 }, { "epoch": 0.025229093806720084, "grad_norm": 2.670191884471416, "learning_rate": 2.522909380672009e-06, "loss": 1.0661, "step": 5699 }, { "epoch": 0.025233520740183275, "grad_norm": 2.662308590745087, "learning_rate": 2.5233520740183275e-06, "loss": 0.8923, "step": 5700 }, { "epoch": 0.025237947673646465, "grad_norm": 2.938972736468588, "learning_rate": 2.523794767364647e-06, "loss": 0.7411, "step": 5701 }, { "epoch": 0.025242374607109656, "grad_norm": 3.3125064296505466, "learning_rate": 2.524237460710966e-06, "loss": 0.8138, "step": 5702 }, { "epoch": 0.025246801540572846, "grad_norm": 2.7701564163634087, "learning_rate": 2.5246801540572848e-06, "loss": 0.8152, "step": 5703 }, { "epoch": 0.025251228474036037, "grad_norm": 3.1030837362299093, "learning_rate": 2.5251228474036037e-06, "loss": 0.7774, "step": 5704 }, { "epoch": 0.025255655407499224, "grad_norm": 2.8529897497107894, "learning_rate": 2.525565540749923e-06, "loss": 0.8171, "step": 5705 }, { "epoch": 0.025260082340962414, "grad_norm": 3.0750103814992595, "learning_rate": 2.5260082340962416e-06, "loss": 1.0785, "step": 5706 }, { "epoch": 0.025264509274425605, "grad_norm": 2.7100456508158626, "learning_rate": 2.526450927442561e-06, "loss": 0.7322, "step": 5707 }, { "epoch": 0.025268936207888795, "grad_norm": 3.2521365373788025, "learning_rate": 2.52689362078888e-06, "loss": 0.5339, "step": 5708 }, { "epoch": 0.025273363141351986, "grad_norm": 3.096588714639428, "learning_rate": 2.5273363141351985e-06, "loss": 0.8329, "step": 5709 }, { "epoch": 0.025277790074815176, "grad_norm": 2.8526436418321905, "learning_rate": 2.527779007481518e-06, "loss": 1.0679, "step": 5710 }, { "epoch": 0.025282217008278367, "grad_norm": 4.23889362785745, "learning_rate": 2.528221700827837e-06, "loss": 1.1652, "step": 5711 }, { "epoch": 0.025286643941741557, "grad_norm": 4.606038808312937, "learning_rate": 2.5286643941741557e-06, "loss": 1.242, "step": 5712 }, { "epoch": 0.025291070875204744, "grad_norm": 2.6224997551361504, "learning_rate": 2.5291070875204747e-06, "loss": 0.7371, "step": 5713 }, { "epoch": 0.025295497808667935, "grad_norm": 3.1507068716405473, "learning_rate": 2.529549780866794e-06, "loss": 0.542, "step": 5714 }, { "epoch": 0.025299924742131125, "grad_norm": 3.083928572011853, "learning_rate": 2.5299924742131126e-06, "loss": 0.6695, "step": 5715 }, { "epoch": 0.025304351675594316, "grad_norm": 2.925299433649599, "learning_rate": 2.530435167559432e-06, "loss": 0.8853, "step": 5716 }, { "epoch": 0.025308778609057506, "grad_norm": 2.7671595794028456, "learning_rate": 2.530877860905751e-06, "loss": 0.8349, "step": 5717 }, { "epoch": 0.025313205542520697, "grad_norm": 3.329162257092078, "learning_rate": 2.53132055425207e-06, "loss": 1.0779, "step": 5718 }, { "epoch": 0.025317632475983887, "grad_norm": 2.61021205745343, "learning_rate": 2.5317632475983888e-06, "loss": 0.7621, "step": 5719 }, { "epoch": 0.025322059409447074, "grad_norm": 2.502778641583667, "learning_rate": 2.532205940944708e-06, "loss": 0.6535, "step": 5720 }, { "epoch": 0.025326486342910265, "grad_norm": 3.2969880534090423, "learning_rate": 2.5326486342910267e-06, "loss": 0.879, "step": 5721 }, { "epoch": 0.025330913276373455, "grad_norm": 2.607471196675586, "learning_rate": 2.533091327637346e-06, "loss": 0.6795, "step": 5722 }, { "epoch": 0.025335340209836646, "grad_norm": 2.256596835539074, "learning_rate": 2.533534020983665e-06, "loss": 0.6668, "step": 5723 }, { "epoch": 0.025339767143299836, "grad_norm": 3.075177666188208, "learning_rate": 2.5339767143299835e-06, "loss": 0.5988, "step": 5724 }, { "epoch": 0.025344194076763027, "grad_norm": 2.7758932061045334, "learning_rate": 2.534419407676303e-06, "loss": 0.9218, "step": 5725 }, { "epoch": 0.025348621010226217, "grad_norm": 2.8609786122295415, "learning_rate": 2.5348621010226223e-06, "loss": 0.8821, "step": 5726 }, { "epoch": 0.025353047943689408, "grad_norm": 3.5641289575489696, "learning_rate": 2.535304794368941e-06, "loss": 1.1798, "step": 5727 }, { "epoch": 0.025357474877152595, "grad_norm": 2.8295763882204708, "learning_rate": 2.5357474877152597e-06, "loss": 0.7433, "step": 5728 }, { "epoch": 0.025361901810615785, "grad_norm": 3.8766928710054134, "learning_rate": 2.536190181061579e-06, "loss": 1.028, "step": 5729 }, { "epoch": 0.025366328744078976, "grad_norm": 2.970492468808249, "learning_rate": 2.5366328744078976e-06, "loss": 0.9773, "step": 5730 }, { "epoch": 0.025370755677542167, "grad_norm": 3.4178665983206638, "learning_rate": 2.537075567754217e-06, "loss": 1.0704, "step": 5731 }, { "epoch": 0.025375182611005357, "grad_norm": 2.8971556614568112, "learning_rate": 2.537518261100536e-06, "loss": 0.7117, "step": 5732 }, { "epoch": 0.025379609544468548, "grad_norm": 2.8351757924460177, "learning_rate": 2.537960954446855e-06, "loss": 0.7551, "step": 5733 }, { "epoch": 0.025384036477931738, "grad_norm": 2.9357136041537197, "learning_rate": 2.538403647793174e-06, "loss": 0.692, "step": 5734 }, { "epoch": 0.025388463411394925, "grad_norm": 2.746375927242338, "learning_rate": 2.5388463411394932e-06, "loss": 0.7809, "step": 5735 }, { "epoch": 0.025392890344858116, "grad_norm": 2.4339973225603937, "learning_rate": 2.5392890344858117e-06, "loss": 0.6982, "step": 5736 }, { "epoch": 0.025397317278321306, "grad_norm": 2.7657451734374088, "learning_rate": 2.539731727832131e-06, "loss": 0.7737, "step": 5737 }, { "epoch": 0.025401744211784497, "grad_norm": 3.181217968093082, "learning_rate": 2.54017442117845e-06, "loss": 0.6947, "step": 5738 }, { "epoch": 0.025406171145247687, "grad_norm": 2.678063155260316, "learning_rate": 2.5406171145247686e-06, "loss": 0.7273, "step": 5739 }, { "epoch": 0.025410598078710878, "grad_norm": 2.5965642900601393, "learning_rate": 2.541059807871088e-06, "loss": 0.643, "step": 5740 }, { "epoch": 0.025415025012174068, "grad_norm": 3.4989484353818465, "learning_rate": 2.5415025012174073e-06, "loss": 0.9683, "step": 5741 }, { "epoch": 0.02541945194563726, "grad_norm": 3.1340367031910055, "learning_rate": 2.541945194563726e-06, "loss": 0.6856, "step": 5742 }, { "epoch": 0.025423878879100446, "grad_norm": 3.072833937749289, "learning_rate": 2.542387887910045e-06, "loss": 0.6914, "step": 5743 }, { "epoch": 0.025428305812563636, "grad_norm": 3.207813670024648, "learning_rate": 2.542830581256364e-06, "loss": 0.8479, "step": 5744 }, { "epoch": 0.025432732746026827, "grad_norm": 3.0801399834888263, "learning_rate": 2.5432732746026827e-06, "loss": 0.8975, "step": 5745 }, { "epoch": 0.025437159679490017, "grad_norm": 3.4682631273016558, "learning_rate": 2.543715967949002e-06, "loss": 0.9597, "step": 5746 }, { "epoch": 0.025441586612953208, "grad_norm": 3.987061982002673, "learning_rate": 2.544158661295321e-06, "loss": 1.2609, "step": 5747 }, { "epoch": 0.025446013546416398, "grad_norm": 2.414807770361831, "learning_rate": 2.54460135464164e-06, "loss": 0.709, "step": 5748 }, { "epoch": 0.02545044047987959, "grad_norm": 2.5814092438925895, "learning_rate": 2.545044047987959e-06, "loss": 0.7423, "step": 5749 }, { "epoch": 0.025454867413342776, "grad_norm": 2.9241344758140877, "learning_rate": 2.5454867413342783e-06, "loss": 0.8027, "step": 5750 }, { "epoch": 0.025459294346805966, "grad_norm": 3.433758836715842, "learning_rate": 2.545929434680597e-06, "loss": 0.8921, "step": 5751 }, { "epoch": 0.025463721280269157, "grad_norm": 2.7060848736273284, "learning_rate": 2.546372128026916e-06, "loss": 0.6877, "step": 5752 }, { "epoch": 0.025468148213732347, "grad_norm": 2.942756280872778, "learning_rate": 2.546814821373235e-06, "loss": 0.8942, "step": 5753 }, { "epoch": 0.025472575147195538, "grad_norm": 2.6357255956755794, "learning_rate": 2.547257514719554e-06, "loss": 0.5654, "step": 5754 }, { "epoch": 0.02547700208065873, "grad_norm": 3.990226999634525, "learning_rate": 2.547700208065873e-06, "loss": 0.9549, "step": 5755 }, { "epoch": 0.02548142901412192, "grad_norm": 2.738328765350239, "learning_rate": 2.5481429014121924e-06, "loss": 0.7793, "step": 5756 }, { "epoch": 0.02548585594758511, "grad_norm": 2.697589759209869, "learning_rate": 2.548585594758511e-06, "loss": 0.5733, "step": 5757 }, { "epoch": 0.025490282881048296, "grad_norm": 3.0815103548321927, "learning_rate": 2.54902828810483e-06, "loss": 0.7655, "step": 5758 }, { "epoch": 0.025494709814511487, "grad_norm": 3.3905121134240996, "learning_rate": 2.5494709814511492e-06, "loss": 0.828, "step": 5759 }, { "epoch": 0.025499136747974677, "grad_norm": 4.107849282272303, "learning_rate": 2.5499136747974677e-06, "loss": 0.8681, "step": 5760 }, { "epoch": 0.025503563681437868, "grad_norm": 2.88429574742858, "learning_rate": 2.550356368143787e-06, "loss": 0.9235, "step": 5761 }, { "epoch": 0.02550799061490106, "grad_norm": 3.4240004099169963, "learning_rate": 2.550799061490106e-06, "loss": 0.7964, "step": 5762 }, { "epoch": 0.02551241754836425, "grad_norm": 2.725778897918494, "learning_rate": 2.551241754836425e-06, "loss": 0.7984, "step": 5763 }, { "epoch": 0.02551684448182744, "grad_norm": 3.3418102898631834, "learning_rate": 2.551684448182744e-06, "loss": 1.0373, "step": 5764 }, { "epoch": 0.02552127141529063, "grad_norm": 2.904945905481321, "learning_rate": 2.5521271415290633e-06, "loss": 0.5151, "step": 5765 }, { "epoch": 0.025525698348753817, "grad_norm": 3.06529138250997, "learning_rate": 2.552569834875382e-06, "loss": 0.9893, "step": 5766 }, { "epoch": 0.025530125282217007, "grad_norm": 3.042172564793341, "learning_rate": 2.5530125282217012e-06, "loss": 0.6769, "step": 5767 }, { "epoch": 0.025534552215680198, "grad_norm": 2.5651053794806407, "learning_rate": 2.55345522156802e-06, "loss": 0.6693, "step": 5768 }, { "epoch": 0.02553897914914339, "grad_norm": 2.7561698502188583, "learning_rate": 2.553897914914339e-06, "loss": 0.6181, "step": 5769 }, { "epoch": 0.02554340608260658, "grad_norm": 3.3251341112006405, "learning_rate": 2.554340608260658e-06, "loss": 0.6783, "step": 5770 }, { "epoch": 0.02554783301606977, "grad_norm": 3.5611301033258513, "learning_rate": 2.5547833016069774e-06, "loss": 1.102, "step": 5771 }, { "epoch": 0.02555225994953296, "grad_norm": 3.7636221991156127, "learning_rate": 2.555225994953296e-06, "loss": 0.9624, "step": 5772 }, { "epoch": 0.025556686882996147, "grad_norm": 2.6912419203216693, "learning_rate": 2.5556686882996153e-06, "loss": 0.7902, "step": 5773 }, { "epoch": 0.025561113816459338, "grad_norm": 3.0696628986014423, "learning_rate": 2.5561113816459343e-06, "loss": 0.7421, "step": 5774 }, { "epoch": 0.025565540749922528, "grad_norm": 2.6802556272919174, "learning_rate": 2.556554074992253e-06, "loss": 0.779, "step": 5775 }, { "epoch": 0.02556996768338572, "grad_norm": 2.740895826985201, "learning_rate": 2.556996768338572e-06, "loss": 0.9346, "step": 5776 }, { "epoch": 0.02557439461684891, "grad_norm": 3.3382684112186065, "learning_rate": 2.5574394616848915e-06, "loss": 0.9693, "step": 5777 }, { "epoch": 0.0255788215503121, "grad_norm": 2.898658737156241, "learning_rate": 2.55788215503121e-06, "loss": 0.7741, "step": 5778 }, { "epoch": 0.02558324848377529, "grad_norm": 2.5465469485828556, "learning_rate": 2.558324848377529e-06, "loss": 0.477, "step": 5779 }, { "epoch": 0.02558767541723848, "grad_norm": 2.3976608950630007, "learning_rate": 2.5587675417238484e-06, "loss": 0.6014, "step": 5780 }, { "epoch": 0.025592102350701668, "grad_norm": 3.187080752998972, "learning_rate": 2.559210235070167e-06, "loss": 0.8445, "step": 5781 }, { "epoch": 0.025596529284164858, "grad_norm": 3.1773827692970156, "learning_rate": 2.5596529284164863e-06, "loss": 0.8836, "step": 5782 }, { "epoch": 0.02560095621762805, "grad_norm": 2.7394317580979304, "learning_rate": 2.5600956217628052e-06, "loss": 0.8955, "step": 5783 }, { "epoch": 0.02560538315109124, "grad_norm": 2.671743830760237, "learning_rate": 2.560538315109124e-06, "loss": 0.7276, "step": 5784 }, { "epoch": 0.02560981008455443, "grad_norm": 2.9751785385933647, "learning_rate": 2.560981008455443e-06, "loss": 0.7075, "step": 5785 }, { "epoch": 0.02561423701801762, "grad_norm": 2.6119199940022937, "learning_rate": 2.5614237018017625e-06, "loss": 0.8804, "step": 5786 }, { "epoch": 0.02561866395148081, "grad_norm": 2.742885486665806, "learning_rate": 2.561866395148081e-06, "loss": 0.7752, "step": 5787 }, { "epoch": 0.025623090884943998, "grad_norm": 3.21526441853295, "learning_rate": 2.5623090884944004e-06, "loss": 0.9259, "step": 5788 }, { "epoch": 0.025627517818407188, "grad_norm": 3.059138734540437, "learning_rate": 2.5627517818407193e-06, "loss": 0.8145, "step": 5789 }, { "epoch": 0.02563194475187038, "grad_norm": 2.8435271443987613, "learning_rate": 2.563194475187038e-06, "loss": 0.9531, "step": 5790 }, { "epoch": 0.02563637168533357, "grad_norm": 3.0736670098186156, "learning_rate": 2.5636371685333572e-06, "loss": 0.7735, "step": 5791 }, { "epoch": 0.02564079861879676, "grad_norm": 3.382026856805165, "learning_rate": 2.5640798618796766e-06, "loss": 0.8755, "step": 5792 }, { "epoch": 0.02564522555225995, "grad_norm": 3.3048642185448656, "learning_rate": 2.564522555225995e-06, "loss": 0.8193, "step": 5793 }, { "epoch": 0.02564965248572314, "grad_norm": 3.1649000525445725, "learning_rate": 2.564965248572314e-06, "loss": 0.6468, "step": 5794 }, { "epoch": 0.02565407941918633, "grad_norm": 2.663252419276861, "learning_rate": 2.5654079419186334e-06, "loss": 0.6198, "step": 5795 }, { "epoch": 0.02565850635264952, "grad_norm": 2.8231336457885967, "learning_rate": 2.565850635264952e-06, "loss": 0.9597, "step": 5796 }, { "epoch": 0.02566293328611271, "grad_norm": 2.864087670648462, "learning_rate": 2.5662933286112713e-06, "loss": 0.761, "step": 5797 }, { "epoch": 0.0256673602195759, "grad_norm": 2.423597787830815, "learning_rate": 2.5667360219575903e-06, "loss": 0.723, "step": 5798 }, { "epoch": 0.02567178715303909, "grad_norm": 2.9995812321754536, "learning_rate": 2.5671787153039092e-06, "loss": 0.9497, "step": 5799 }, { "epoch": 0.02567621408650228, "grad_norm": 2.5513236779005637, "learning_rate": 2.567621408650228e-06, "loss": 0.5764, "step": 5800 }, { "epoch": 0.02568064101996547, "grad_norm": 3.7923147764972627, "learning_rate": 2.5680641019965475e-06, "loss": 0.5796, "step": 5801 }, { "epoch": 0.02568506795342866, "grad_norm": 3.0596467298323655, "learning_rate": 2.568506795342866e-06, "loss": 0.8403, "step": 5802 }, { "epoch": 0.02568949488689185, "grad_norm": 2.561478039123088, "learning_rate": 2.5689494886891854e-06, "loss": 0.6575, "step": 5803 }, { "epoch": 0.02569392182035504, "grad_norm": 2.86326904154422, "learning_rate": 2.5693921820355044e-06, "loss": 0.9289, "step": 5804 }, { "epoch": 0.02569834875381823, "grad_norm": 2.7387258728037827, "learning_rate": 2.569834875381823e-06, "loss": 0.6072, "step": 5805 }, { "epoch": 0.02570277568728142, "grad_norm": 2.9352085626814772, "learning_rate": 2.5702775687281423e-06, "loss": 0.8928, "step": 5806 }, { "epoch": 0.02570720262074461, "grad_norm": 3.0089432013565087, "learning_rate": 2.5707202620744616e-06, "loss": 0.8459, "step": 5807 }, { "epoch": 0.0257116295542078, "grad_norm": 3.201945304204818, "learning_rate": 2.57116295542078e-06, "loss": 0.8167, "step": 5808 }, { "epoch": 0.02571605648767099, "grad_norm": 2.6460422101904175, "learning_rate": 2.571605648767099e-06, "loss": 0.4995, "step": 5809 }, { "epoch": 0.025720483421134182, "grad_norm": 3.095763177378782, "learning_rate": 2.5720483421134185e-06, "loss": 0.7897, "step": 5810 }, { "epoch": 0.02572491035459737, "grad_norm": 2.721118222863486, "learning_rate": 2.572491035459737e-06, "loss": 0.5302, "step": 5811 }, { "epoch": 0.02572933728806056, "grad_norm": 2.949910814542469, "learning_rate": 2.5729337288060564e-06, "loss": 0.4957, "step": 5812 }, { "epoch": 0.02573376422152375, "grad_norm": 3.2159801916035553, "learning_rate": 2.5733764221523753e-06, "loss": 1.1341, "step": 5813 }, { "epoch": 0.02573819115498694, "grad_norm": 2.8187509122142966, "learning_rate": 2.5738191154986943e-06, "loss": 0.6376, "step": 5814 }, { "epoch": 0.02574261808845013, "grad_norm": 2.7296971678783724, "learning_rate": 2.5742618088450132e-06, "loss": 0.8107, "step": 5815 }, { "epoch": 0.02574704502191332, "grad_norm": 3.3427538102203824, "learning_rate": 2.5747045021913326e-06, "loss": 1.0763, "step": 5816 }, { "epoch": 0.025751471955376512, "grad_norm": 4.189103343136405, "learning_rate": 2.575147195537651e-06, "loss": 1.3981, "step": 5817 }, { "epoch": 0.0257558988888397, "grad_norm": 2.6247987056769158, "learning_rate": 2.5755898888839705e-06, "loss": 0.8104, "step": 5818 }, { "epoch": 0.02576032582230289, "grad_norm": 3.2249890672980253, "learning_rate": 2.5760325822302894e-06, "loss": 0.8892, "step": 5819 }, { "epoch": 0.02576475275576608, "grad_norm": 2.9027716109940034, "learning_rate": 2.576475275576608e-06, "loss": 0.6866, "step": 5820 }, { "epoch": 0.02576917968922927, "grad_norm": 2.9871691114288583, "learning_rate": 2.5769179689229273e-06, "loss": 0.8128, "step": 5821 }, { "epoch": 0.02577360662269246, "grad_norm": 3.321531892567238, "learning_rate": 2.5773606622692467e-06, "loss": 0.7348, "step": 5822 }, { "epoch": 0.02577803355615565, "grad_norm": 2.305397886189885, "learning_rate": 2.5778033556155652e-06, "loss": 0.6225, "step": 5823 }, { "epoch": 0.025782460489618842, "grad_norm": 3.242061250671034, "learning_rate": 2.578246048961884e-06, "loss": 1.058, "step": 5824 }, { "epoch": 0.025786887423082033, "grad_norm": 2.8232644002883203, "learning_rate": 2.5786887423082035e-06, "loss": 0.8653, "step": 5825 }, { "epoch": 0.02579131435654522, "grad_norm": 3.1309856589209137, "learning_rate": 2.579131435654522e-06, "loss": 0.8336, "step": 5826 }, { "epoch": 0.02579574129000841, "grad_norm": 2.7514048000874234, "learning_rate": 2.5795741290008414e-06, "loss": 0.7949, "step": 5827 }, { "epoch": 0.0258001682234716, "grad_norm": 3.3462426863975985, "learning_rate": 2.5800168223471604e-06, "loss": 0.8617, "step": 5828 }, { "epoch": 0.02580459515693479, "grad_norm": 2.729001191052122, "learning_rate": 2.5804595156934793e-06, "loss": 0.8605, "step": 5829 }, { "epoch": 0.02580902209039798, "grad_norm": 2.7667987858798115, "learning_rate": 2.5809022090397983e-06, "loss": 0.8972, "step": 5830 }, { "epoch": 0.025813449023861172, "grad_norm": 2.5313268616170284, "learning_rate": 2.5813449023861176e-06, "loss": 0.7205, "step": 5831 }, { "epoch": 0.025817875957324363, "grad_norm": 2.7248925031479954, "learning_rate": 2.581787595732436e-06, "loss": 0.7926, "step": 5832 }, { "epoch": 0.02582230289078755, "grad_norm": 2.4706895797602986, "learning_rate": 2.5822302890787555e-06, "loss": 0.7493, "step": 5833 }, { "epoch": 0.02582672982425074, "grad_norm": 2.8058847456286395, "learning_rate": 2.5826729824250745e-06, "loss": 0.6468, "step": 5834 }, { "epoch": 0.02583115675771393, "grad_norm": 2.7622949848477067, "learning_rate": 2.5831156757713934e-06, "loss": 0.8633, "step": 5835 }, { "epoch": 0.02583558369117712, "grad_norm": 2.921396442389128, "learning_rate": 2.5835583691177124e-06, "loss": 0.6424, "step": 5836 }, { "epoch": 0.025840010624640312, "grad_norm": 3.343166515935657, "learning_rate": 2.5840010624640317e-06, "loss": 1.2042, "step": 5837 }, { "epoch": 0.025844437558103502, "grad_norm": 2.253034165018203, "learning_rate": 2.5844437558103503e-06, "loss": 0.6485, "step": 5838 }, { "epoch": 0.025848864491566693, "grad_norm": 3.4610170394712054, "learning_rate": 2.5848864491566692e-06, "loss": 0.8905, "step": 5839 }, { "epoch": 0.025853291425029883, "grad_norm": 2.6858536129963255, "learning_rate": 2.5853291425029886e-06, "loss": 0.7582, "step": 5840 }, { "epoch": 0.02585771835849307, "grad_norm": 2.368986786595815, "learning_rate": 2.585771835849307e-06, "loss": 0.5274, "step": 5841 }, { "epoch": 0.02586214529195626, "grad_norm": 3.0530184102953313, "learning_rate": 2.5862145291956265e-06, "loss": 0.774, "step": 5842 }, { "epoch": 0.02586657222541945, "grad_norm": 3.2315646358531507, "learning_rate": 2.5866572225419454e-06, "loss": 0.915, "step": 5843 }, { "epoch": 0.025870999158882642, "grad_norm": 3.5594199177648056, "learning_rate": 2.5870999158882644e-06, "loss": 0.9291, "step": 5844 }, { "epoch": 0.025875426092345832, "grad_norm": 2.3844751922789786, "learning_rate": 2.5875426092345833e-06, "loss": 0.8309, "step": 5845 }, { "epoch": 0.025879853025809023, "grad_norm": 3.0720527529733315, "learning_rate": 2.5879853025809027e-06, "loss": 0.7052, "step": 5846 }, { "epoch": 0.025884279959272213, "grad_norm": 3.1546885431392746, "learning_rate": 2.5884279959272212e-06, "loss": 0.7299, "step": 5847 }, { "epoch": 0.025888706892735404, "grad_norm": 2.7044949456853837, "learning_rate": 2.5888706892735406e-06, "loss": 0.7159, "step": 5848 }, { "epoch": 0.02589313382619859, "grad_norm": 3.3537448643275956, "learning_rate": 2.5893133826198595e-06, "loss": 0.7363, "step": 5849 }, { "epoch": 0.02589756075966178, "grad_norm": 3.3285310714528924, "learning_rate": 2.5897560759661785e-06, "loss": 0.6402, "step": 5850 }, { "epoch": 0.025901987693124972, "grad_norm": 3.4348818720646803, "learning_rate": 2.5901987693124974e-06, "loss": 1.0697, "step": 5851 }, { "epoch": 0.025906414626588162, "grad_norm": 3.973239402447446, "learning_rate": 2.590641462658817e-06, "loss": 0.7725, "step": 5852 }, { "epoch": 0.025910841560051353, "grad_norm": 2.69386313828683, "learning_rate": 2.5910841560051353e-06, "loss": 0.7862, "step": 5853 }, { "epoch": 0.025915268493514543, "grad_norm": 3.5969120376322237, "learning_rate": 2.5915268493514547e-06, "loss": 0.6662, "step": 5854 }, { "epoch": 0.025919695426977734, "grad_norm": 2.556478176498717, "learning_rate": 2.5919695426977736e-06, "loss": 0.5854, "step": 5855 }, { "epoch": 0.02592412236044092, "grad_norm": 2.5071351265833495, "learning_rate": 2.592412236044092e-06, "loss": 0.5573, "step": 5856 }, { "epoch": 0.02592854929390411, "grad_norm": 2.805179772269046, "learning_rate": 2.5928549293904115e-06, "loss": 0.7352, "step": 5857 }, { "epoch": 0.025932976227367302, "grad_norm": 2.745624133590659, "learning_rate": 2.5932976227367305e-06, "loss": 0.6579, "step": 5858 }, { "epoch": 0.025937403160830493, "grad_norm": 3.0700683667687283, "learning_rate": 2.5937403160830494e-06, "loss": 0.6328, "step": 5859 }, { "epoch": 0.025941830094293683, "grad_norm": 3.0716654858815273, "learning_rate": 2.5941830094293684e-06, "loss": 0.8763, "step": 5860 }, { "epoch": 0.025946257027756874, "grad_norm": 3.4805269903578835, "learning_rate": 2.5946257027756877e-06, "loss": 0.7465, "step": 5861 }, { "epoch": 0.025950683961220064, "grad_norm": 2.759318137408308, "learning_rate": 2.5950683961220063e-06, "loss": 0.7391, "step": 5862 }, { "epoch": 0.025955110894683255, "grad_norm": 2.6714185897898113, "learning_rate": 2.5955110894683256e-06, "loss": 0.7046, "step": 5863 }, { "epoch": 0.02595953782814644, "grad_norm": 4.169817508278297, "learning_rate": 2.5959537828146446e-06, "loss": 1.3157, "step": 5864 }, { "epoch": 0.025963964761609632, "grad_norm": 2.4509718386751755, "learning_rate": 2.5963964761609635e-06, "loss": 0.6896, "step": 5865 }, { "epoch": 0.025968391695072823, "grad_norm": 2.5901013958528423, "learning_rate": 2.5968391695072825e-06, "loss": 0.7378, "step": 5866 }, { "epoch": 0.025972818628536013, "grad_norm": 2.90500871729208, "learning_rate": 2.597281862853602e-06, "loss": 0.8, "step": 5867 }, { "epoch": 0.025977245561999204, "grad_norm": 3.8208098092217844, "learning_rate": 2.5977245561999204e-06, "loss": 1.0761, "step": 5868 }, { "epoch": 0.025981672495462394, "grad_norm": 3.660556845791156, "learning_rate": 2.5981672495462397e-06, "loss": 1.14, "step": 5869 }, { "epoch": 0.025986099428925585, "grad_norm": 3.0433127360771013, "learning_rate": 2.5986099428925587e-06, "loss": 0.9787, "step": 5870 }, { "epoch": 0.02599052636238877, "grad_norm": 2.5944706274564684, "learning_rate": 2.5990526362388772e-06, "loss": 0.6203, "step": 5871 }, { "epoch": 0.025994953295851962, "grad_norm": 2.7201083008586115, "learning_rate": 2.5994953295851966e-06, "loss": 0.8821, "step": 5872 }, { "epoch": 0.025999380229315153, "grad_norm": 2.7112395695072102, "learning_rate": 2.599938022931516e-06, "loss": 0.7001, "step": 5873 }, { "epoch": 0.026003807162778343, "grad_norm": 3.5495074922932375, "learning_rate": 2.6003807162778345e-06, "loss": 0.9802, "step": 5874 }, { "epoch": 0.026008234096241534, "grad_norm": 2.8054859853273526, "learning_rate": 2.6008234096241534e-06, "loss": 0.7205, "step": 5875 }, { "epoch": 0.026012661029704724, "grad_norm": 3.519947553094154, "learning_rate": 2.601266102970473e-06, "loss": 1.1585, "step": 5876 }, { "epoch": 0.026017087963167915, "grad_norm": 2.5766278007953907, "learning_rate": 2.6017087963167913e-06, "loss": 0.8274, "step": 5877 }, { "epoch": 0.026021514896631105, "grad_norm": 3.1549070916563506, "learning_rate": 2.6021514896631107e-06, "loss": 1.0762, "step": 5878 }, { "epoch": 0.026025941830094292, "grad_norm": 2.4737315765951218, "learning_rate": 2.6025941830094296e-06, "loss": 0.5228, "step": 5879 }, { "epoch": 0.026030368763557483, "grad_norm": 3.5102922281089324, "learning_rate": 2.6030368763557486e-06, "loss": 1.3128, "step": 5880 }, { "epoch": 0.026034795697020673, "grad_norm": 2.9354693685876447, "learning_rate": 2.6034795697020675e-06, "loss": 0.9612, "step": 5881 }, { "epoch": 0.026039222630483864, "grad_norm": 2.9101473093257137, "learning_rate": 2.603922263048387e-06, "loss": 0.708, "step": 5882 }, { "epoch": 0.026043649563947054, "grad_norm": 2.8866373820389053, "learning_rate": 2.6043649563947054e-06, "loss": 0.5717, "step": 5883 }, { "epoch": 0.026048076497410245, "grad_norm": 2.4220504818789026, "learning_rate": 2.604807649741025e-06, "loss": 0.6533, "step": 5884 }, { "epoch": 0.026052503430873435, "grad_norm": 3.815296195312277, "learning_rate": 2.6052503430873437e-06, "loss": 0.441, "step": 5885 }, { "epoch": 0.026056930364336622, "grad_norm": 3.210629491712237, "learning_rate": 2.6056930364336623e-06, "loss": 0.9259, "step": 5886 }, { "epoch": 0.026061357297799813, "grad_norm": 2.851834834820927, "learning_rate": 2.6061357297799816e-06, "loss": 0.6544, "step": 5887 }, { "epoch": 0.026065784231263003, "grad_norm": 3.116516943555133, "learning_rate": 2.606578423126301e-06, "loss": 0.8766, "step": 5888 }, { "epoch": 0.026070211164726194, "grad_norm": 4.0825363323644055, "learning_rate": 2.6070211164726195e-06, "loss": 1.1846, "step": 5889 }, { "epoch": 0.026074638098189384, "grad_norm": 3.5373056271422705, "learning_rate": 2.6074638098189385e-06, "loss": 0.7934, "step": 5890 }, { "epoch": 0.026079065031652575, "grad_norm": 2.5275937463345373, "learning_rate": 2.607906503165258e-06, "loss": 0.4855, "step": 5891 }, { "epoch": 0.026083491965115765, "grad_norm": 2.9898598892663792, "learning_rate": 2.6083491965115764e-06, "loss": 0.7782, "step": 5892 }, { "epoch": 0.026087918898578956, "grad_norm": 3.200077165997033, "learning_rate": 2.6087918898578957e-06, "loss": 0.7479, "step": 5893 }, { "epoch": 0.026092345832042143, "grad_norm": 2.826350782880107, "learning_rate": 2.6092345832042147e-06, "loss": 0.6335, "step": 5894 }, { "epoch": 0.026096772765505333, "grad_norm": 3.4954184167245645, "learning_rate": 2.6096772765505336e-06, "loss": 0.9362, "step": 5895 }, { "epoch": 0.026101199698968524, "grad_norm": 2.9647944054057813, "learning_rate": 2.6101199698968526e-06, "loss": 0.7553, "step": 5896 }, { "epoch": 0.026105626632431714, "grad_norm": 2.7384101885404317, "learning_rate": 2.610562663243172e-06, "loss": 0.7076, "step": 5897 }, { "epoch": 0.026110053565894905, "grad_norm": 3.48894148067908, "learning_rate": 2.6110053565894905e-06, "loss": 1.1476, "step": 5898 }, { "epoch": 0.026114480499358095, "grad_norm": 3.7411920640238256, "learning_rate": 2.61144804993581e-06, "loss": 0.9412, "step": 5899 }, { "epoch": 0.026118907432821286, "grad_norm": 2.8889367599359206, "learning_rate": 2.611890743282129e-06, "loss": 0.7356, "step": 5900 }, { "epoch": 0.026123334366284473, "grad_norm": 2.676296281817296, "learning_rate": 2.6123334366284473e-06, "loss": 0.5768, "step": 5901 }, { "epoch": 0.026127761299747664, "grad_norm": 2.8875883221938965, "learning_rate": 2.6127761299747667e-06, "loss": 0.7815, "step": 5902 }, { "epoch": 0.026132188233210854, "grad_norm": 2.5871508669301457, "learning_rate": 2.613218823321086e-06, "loss": 0.7054, "step": 5903 }, { "epoch": 0.026136615166674045, "grad_norm": 2.7660330617534115, "learning_rate": 2.6136615166674046e-06, "loss": 0.6851, "step": 5904 }, { "epoch": 0.026141042100137235, "grad_norm": 3.092389249227965, "learning_rate": 2.6141042100137235e-06, "loss": 0.7833, "step": 5905 }, { "epoch": 0.026145469033600426, "grad_norm": 2.8615966529121484, "learning_rate": 2.614546903360043e-06, "loss": 0.8519, "step": 5906 }, { "epoch": 0.026149895967063616, "grad_norm": 2.8020675860859843, "learning_rate": 2.6149895967063614e-06, "loss": 0.7981, "step": 5907 }, { "epoch": 0.026154322900526807, "grad_norm": 2.828717225986041, "learning_rate": 2.615432290052681e-06, "loss": 0.5524, "step": 5908 }, { "epoch": 0.026158749833989994, "grad_norm": 2.81250955175245, "learning_rate": 2.6158749833989997e-06, "loss": 0.9012, "step": 5909 }, { "epoch": 0.026163176767453184, "grad_norm": 3.231788035782109, "learning_rate": 2.6163176767453187e-06, "loss": 0.8005, "step": 5910 }, { "epoch": 0.026167603700916375, "grad_norm": 2.7145607838159354, "learning_rate": 2.6167603700916376e-06, "loss": 0.9377, "step": 5911 }, { "epoch": 0.026172030634379565, "grad_norm": 3.218005020496292, "learning_rate": 2.617203063437957e-06, "loss": 0.6212, "step": 5912 }, { "epoch": 0.026176457567842756, "grad_norm": 3.197197196352501, "learning_rate": 2.6176457567842755e-06, "loss": 0.8399, "step": 5913 }, { "epoch": 0.026180884501305946, "grad_norm": 3.072319743184688, "learning_rate": 2.618088450130595e-06, "loss": 0.8771, "step": 5914 }, { "epoch": 0.026185311434769137, "grad_norm": 2.6194845600624577, "learning_rate": 2.618531143476914e-06, "loss": 0.6709, "step": 5915 }, { "epoch": 0.026189738368232327, "grad_norm": 3.2934065221697133, "learning_rate": 2.6189738368232324e-06, "loss": 0.9616, "step": 5916 }, { "epoch": 0.026194165301695514, "grad_norm": 2.587458859608214, "learning_rate": 2.6194165301695517e-06, "loss": 0.8414, "step": 5917 }, { "epoch": 0.026198592235158705, "grad_norm": 2.71670098375585, "learning_rate": 2.619859223515871e-06, "loss": 0.8772, "step": 5918 }, { "epoch": 0.026203019168621895, "grad_norm": 2.580819868984814, "learning_rate": 2.6203019168621896e-06, "loss": 0.595, "step": 5919 }, { "epoch": 0.026207446102085086, "grad_norm": 2.6958278873502874, "learning_rate": 2.6207446102085086e-06, "loss": 0.8366, "step": 5920 }, { "epoch": 0.026211873035548276, "grad_norm": 3.13389294109123, "learning_rate": 2.621187303554828e-06, "loss": 0.6739, "step": 5921 }, { "epoch": 0.026216299969011467, "grad_norm": 2.4288267383785564, "learning_rate": 2.6216299969011465e-06, "loss": 0.7289, "step": 5922 }, { "epoch": 0.026220726902474657, "grad_norm": 2.5448230093463637, "learning_rate": 2.622072690247466e-06, "loss": 0.7533, "step": 5923 }, { "epoch": 0.026225153835937844, "grad_norm": 2.8477332765924634, "learning_rate": 2.622515383593785e-06, "loss": 0.8544, "step": 5924 }, { "epoch": 0.026229580769401035, "grad_norm": 3.136818099990669, "learning_rate": 2.6229580769401037e-06, "loss": 0.7181, "step": 5925 }, { "epoch": 0.026234007702864225, "grad_norm": 3.1808523829974216, "learning_rate": 2.6234007702864227e-06, "loss": 0.8244, "step": 5926 }, { "epoch": 0.026238434636327416, "grad_norm": 3.1861842636712967, "learning_rate": 2.623843463632742e-06, "loss": 0.8536, "step": 5927 }, { "epoch": 0.026242861569790606, "grad_norm": 3.009817243235192, "learning_rate": 2.6242861569790606e-06, "loss": 0.801, "step": 5928 }, { "epoch": 0.026247288503253797, "grad_norm": 3.2674170437181793, "learning_rate": 2.62472885032538e-06, "loss": 0.8993, "step": 5929 }, { "epoch": 0.026251715436716987, "grad_norm": 2.643292367633248, "learning_rate": 2.625171543671699e-06, "loss": 0.6991, "step": 5930 }, { "epoch": 0.026256142370180178, "grad_norm": 3.2108034331770425, "learning_rate": 2.625614237018018e-06, "loss": 0.9421, "step": 5931 }, { "epoch": 0.026260569303643365, "grad_norm": 2.633655545361321, "learning_rate": 2.626056930364337e-06, "loss": 0.7086, "step": 5932 }, { "epoch": 0.026264996237106555, "grad_norm": 3.2054665074495268, "learning_rate": 2.626499623710656e-06, "loss": 1.062, "step": 5933 }, { "epoch": 0.026269423170569746, "grad_norm": 3.237961917533473, "learning_rate": 2.6269423170569747e-06, "loss": 0.58, "step": 5934 }, { "epoch": 0.026273850104032936, "grad_norm": 2.923966843451228, "learning_rate": 2.627385010403294e-06, "loss": 0.6208, "step": 5935 }, { "epoch": 0.026278277037496127, "grad_norm": 2.7281309744103805, "learning_rate": 2.627827703749613e-06, "loss": 0.7205, "step": 5936 }, { "epoch": 0.026282703970959317, "grad_norm": 3.2625066977248074, "learning_rate": 2.6282703970959315e-06, "loss": 0.7715, "step": 5937 }, { "epoch": 0.026287130904422508, "grad_norm": 2.9798745898455916, "learning_rate": 2.628713090442251e-06, "loss": 0.6761, "step": 5938 }, { "epoch": 0.026291557837885695, "grad_norm": 3.3851717925737503, "learning_rate": 2.62915578378857e-06, "loss": 0.9648, "step": 5939 }, { "epoch": 0.026295984771348885, "grad_norm": 2.6892180973227444, "learning_rate": 2.629598477134889e-06, "loss": 0.5985, "step": 5940 }, { "epoch": 0.026300411704812076, "grad_norm": 2.9589349316167812, "learning_rate": 2.6300411704812077e-06, "loss": 0.6734, "step": 5941 }, { "epoch": 0.026304838638275266, "grad_norm": 3.2482138983513154, "learning_rate": 2.630483863827527e-06, "loss": 0.8448, "step": 5942 }, { "epoch": 0.026309265571738457, "grad_norm": 4.149566582021733, "learning_rate": 2.6309265571738456e-06, "loss": 1.6178, "step": 5943 }, { "epoch": 0.026313692505201648, "grad_norm": 3.32681519597684, "learning_rate": 2.631369250520165e-06, "loss": 0.9336, "step": 5944 }, { "epoch": 0.026318119438664838, "grad_norm": 3.232277167884108, "learning_rate": 2.631811943866484e-06, "loss": 0.8063, "step": 5945 }, { "epoch": 0.02632254637212803, "grad_norm": 2.880903209440779, "learning_rate": 2.632254637212803e-06, "loss": 0.8059, "step": 5946 }, { "epoch": 0.026326973305591216, "grad_norm": 3.143893640620434, "learning_rate": 2.632697330559122e-06, "loss": 0.8848, "step": 5947 }, { "epoch": 0.026331400239054406, "grad_norm": 2.7551053541263992, "learning_rate": 2.6331400239054412e-06, "loss": 0.5162, "step": 5948 }, { "epoch": 0.026335827172517597, "grad_norm": 3.1183711632494426, "learning_rate": 2.6335827172517597e-06, "loss": 1.0689, "step": 5949 }, { "epoch": 0.026340254105980787, "grad_norm": 2.954893754167803, "learning_rate": 2.634025410598079e-06, "loss": 0.9649, "step": 5950 }, { "epoch": 0.026344681039443978, "grad_norm": 3.164628040937278, "learning_rate": 2.634468103944398e-06, "loss": 0.5013, "step": 5951 }, { "epoch": 0.026349107972907168, "grad_norm": 3.2066898337224994, "learning_rate": 2.6349107972907166e-06, "loss": 0.7667, "step": 5952 }, { "epoch": 0.02635353490637036, "grad_norm": 3.5757201035633437, "learning_rate": 2.635353490637036e-06, "loss": 0.8676, "step": 5953 }, { "epoch": 0.026357961839833546, "grad_norm": 3.3418697802187958, "learning_rate": 2.6357961839833553e-06, "loss": 0.659, "step": 5954 }, { "epoch": 0.026362388773296736, "grad_norm": 3.014371533391787, "learning_rate": 2.636238877329674e-06, "loss": 0.7807, "step": 5955 }, { "epoch": 0.026366815706759927, "grad_norm": 3.3335227234265883, "learning_rate": 2.636681570675993e-06, "loss": 0.4693, "step": 5956 }, { "epoch": 0.026371242640223117, "grad_norm": 2.6057857351989786, "learning_rate": 2.637124264022312e-06, "loss": 0.9423, "step": 5957 }, { "epoch": 0.026375669573686308, "grad_norm": 3.3803419326958934, "learning_rate": 2.6375669573686307e-06, "loss": 0.973, "step": 5958 }, { "epoch": 0.026380096507149498, "grad_norm": 5.343900121280414, "learning_rate": 2.63800965071495e-06, "loss": 1.4257, "step": 5959 }, { "epoch": 0.02638452344061269, "grad_norm": 2.682962564220921, "learning_rate": 2.638452344061269e-06, "loss": 0.5564, "step": 5960 }, { "epoch": 0.02638895037407588, "grad_norm": 2.2641993646418346, "learning_rate": 2.638895037407588e-06, "loss": 0.5933, "step": 5961 }, { "epoch": 0.026393377307539066, "grad_norm": 3.127991752362665, "learning_rate": 2.639337730753907e-06, "loss": 1.0508, "step": 5962 }, { "epoch": 0.026397804241002257, "grad_norm": 3.1624351443285135, "learning_rate": 2.6397804241002263e-06, "loss": 0.8491, "step": 5963 }, { "epoch": 0.026402231174465447, "grad_norm": 2.735773690318985, "learning_rate": 2.640223117446545e-06, "loss": 0.7581, "step": 5964 }, { "epoch": 0.026406658107928638, "grad_norm": 2.7186298156168163, "learning_rate": 2.640665810792864e-06, "loss": 0.6053, "step": 5965 }, { "epoch": 0.02641108504139183, "grad_norm": 2.36932460795816, "learning_rate": 2.641108504139183e-06, "loss": 0.5401, "step": 5966 }, { "epoch": 0.02641551197485502, "grad_norm": 3.0028163279511224, "learning_rate": 2.6415511974855016e-06, "loss": 0.9536, "step": 5967 }, { "epoch": 0.02641993890831821, "grad_norm": 3.0887071415365046, "learning_rate": 2.641993890831821e-06, "loss": 0.6371, "step": 5968 }, { "epoch": 0.026424365841781396, "grad_norm": 2.6949999999908214, "learning_rate": 2.6424365841781404e-06, "loss": 0.5829, "step": 5969 }, { "epoch": 0.026428792775244587, "grad_norm": 3.2551087261727103, "learning_rate": 2.642879277524459e-06, "loss": 1.3103, "step": 5970 }, { "epoch": 0.026433219708707777, "grad_norm": 3.222837069056484, "learning_rate": 2.643321970870778e-06, "loss": 1.0324, "step": 5971 }, { "epoch": 0.026437646642170968, "grad_norm": 2.8481408792002303, "learning_rate": 2.6437646642170972e-06, "loss": 0.5732, "step": 5972 }, { "epoch": 0.02644207357563416, "grad_norm": 3.376115010715289, "learning_rate": 2.6442073575634158e-06, "loss": 0.8812, "step": 5973 }, { "epoch": 0.02644650050909735, "grad_norm": 3.1052305282759116, "learning_rate": 2.644650050909735e-06, "loss": 0.8637, "step": 5974 }, { "epoch": 0.02645092744256054, "grad_norm": 2.822050879640954, "learning_rate": 2.645092744256054e-06, "loss": 0.869, "step": 5975 }, { "epoch": 0.02645535437602373, "grad_norm": 4.114304561582088, "learning_rate": 2.645535437602373e-06, "loss": 0.9481, "step": 5976 }, { "epoch": 0.026459781309486917, "grad_norm": 2.7044384776688775, "learning_rate": 2.645978130948692e-06, "loss": 0.7645, "step": 5977 }, { "epoch": 0.026464208242950107, "grad_norm": 3.6860506621803872, "learning_rate": 2.6464208242950113e-06, "loss": 1.1601, "step": 5978 }, { "epoch": 0.026468635176413298, "grad_norm": 3.0446139760036908, "learning_rate": 2.64686351764133e-06, "loss": 0.4914, "step": 5979 }, { "epoch": 0.02647306210987649, "grad_norm": 3.141142250251563, "learning_rate": 2.6473062109876492e-06, "loss": 1.1056, "step": 5980 }, { "epoch": 0.02647748904333968, "grad_norm": 3.1115347223324457, "learning_rate": 2.647748904333968e-06, "loss": 1.0693, "step": 5981 }, { "epoch": 0.02648191597680287, "grad_norm": 2.8695997439388923, "learning_rate": 2.6481915976802867e-06, "loss": 0.999, "step": 5982 }, { "epoch": 0.02648634291026606, "grad_norm": 4.226734953952663, "learning_rate": 2.648634291026606e-06, "loss": 1.3576, "step": 5983 }, { "epoch": 0.02649076984372925, "grad_norm": 2.5354154286867843, "learning_rate": 2.6490769843729254e-06, "loss": 0.7074, "step": 5984 }, { "epoch": 0.026495196777192438, "grad_norm": 2.7489480662144548, "learning_rate": 2.649519677719244e-06, "loss": 0.6652, "step": 5985 }, { "epoch": 0.026499623710655628, "grad_norm": 2.934169796562408, "learning_rate": 2.649962371065563e-06, "loss": 0.8069, "step": 5986 }, { "epoch": 0.02650405064411882, "grad_norm": 2.9945956147156925, "learning_rate": 2.6504050644118823e-06, "loss": 0.7003, "step": 5987 }, { "epoch": 0.02650847757758201, "grad_norm": 3.1311272001714805, "learning_rate": 2.650847757758201e-06, "loss": 0.7128, "step": 5988 }, { "epoch": 0.0265129045110452, "grad_norm": 2.9294460506757405, "learning_rate": 2.65129045110452e-06, "loss": 0.8507, "step": 5989 }, { "epoch": 0.02651733144450839, "grad_norm": 3.258968638548638, "learning_rate": 2.651733144450839e-06, "loss": 0.7802, "step": 5990 }, { "epoch": 0.02652175837797158, "grad_norm": 3.7469394812207786, "learning_rate": 2.652175837797158e-06, "loss": 1.1166, "step": 5991 }, { "epoch": 0.026526185311434768, "grad_norm": 2.6056669803021517, "learning_rate": 2.652618531143477e-06, "loss": 0.6934, "step": 5992 }, { "epoch": 0.026530612244897958, "grad_norm": 3.1922699028201893, "learning_rate": 2.6530612244897964e-06, "loss": 0.993, "step": 5993 }, { "epoch": 0.02653503917836115, "grad_norm": 3.24517858920411, "learning_rate": 2.653503917836115e-06, "loss": 0.8061, "step": 5994 }, { "epoch": 0.02653946611182434, "grad_norm": 2.7148395595656387, "learning_rate": 2.6539466111824343e-06, "loss": 0.7654, "step": 5995 }, { "epoch": 0.02654389304528753, "grad_norm": 2.7685708808771325, "learning_rate": 2.6543893045287532e-06, "loss": 0.7924, "step": 5996 }, { "epoch": 0.02654831997875072, "grad_norm": 3.1247972091552283, "learning_rate": 2.6548319978750718e-06, "loss": 0.7292, "step": 5997 }, { "epoch": 0.02655274691221391, "grad_norm": 2.778155239509327, "learning_rate": 2.655274691221391e-06, "loss": 0.8256, "step": 5998 }, { "epoch": 0.0265571738456771, "grad_norm": 3.7703006043106004, "learning_rate": 2.6557173845677105e-06, "loss": 0.8623, "step": 5999 }, { "epoch": 0.026561600779140288, "grad_norm": 2.616096953693685, "learning_rate": 2.656160077914029e-06, "loss": 0.7402, "step": 6000 }, { "epoch": 0.02656602771260348, "grad_norm": 2.881613864793015, "learning_rate": 2.656602771260348e-06, "loss": 0.74, "step": 6001 }, { "epoch": 0.02657045464606667, "grad_norm": 3.415171207427074, "learning_rate": 2.6570454646066673e-06, "loss": 1.1519, "step": 6002 }, { "epoch": 0.02657488157952986, "grad_norm": 2.5530828461576225, "learning_rate": 2.657488157952986e-06, "loss": 0.5916, "step": 6003 }, { "epoch": 0.02657930851299305, "grad_norm": 2.5979923956903317, "learning_rate": 2.6579308512993052e-06, "loss": 0.7108, "step": 6004 }, { "epoch": 0.02658373544645624, "grad_norm": 3.082264950159969, "learning_rate": 2.658373544645624e-06, "loss": 1.2228, "step": 6005 }, { "epoch": 0.02658816237991943, "grad_norm": 3.4139088335724836, "learning_rate": 2.658816237991943e-06, "loss": 0.7326, "step": 6006 }, { "epoch": 0.02659258931338262, "grad_norm": 2.573258684199948, "learning_rate": 2.659258931338262e-06, "loss": 0.654, "step": 6007 }, { "epoch": 0.02659701624684581, "grad_norm": 2.5495425544145487, "learning_rate": 2.6597016246845814e-06, "loss": 0.9633, "step": 6008 }, { "epoch": 0.026601443180309, "grad_norm": 3.3998849648464256, "learning_rate": 2.6601443180309e-06, "loss": 1.1231, "step": 6009 }, { "epoch": 0.02660587011377219, "grad_norm": 4.243101705386254, "learning_rate": 2.6605870113772193e-06, "loss": 0.8907, "step": 6010 }, { "epoch": 0.02661029704723538, "grad_norm": 3.4927318116377775, "learning_rate": 2.6610297047235383e-06, "loss": 1.0842, "step": 6011 }, { "epoch": 0.02661472398069857, "grad_norm": 3.069732801321815, "learning_rate": 2.6614723980698572e-06, "loss": 0.9098, "step": 6012 }, { "epoch": 0.02661915091416176, "grad_norm": 2.8816810661682584, "learning_rate": 2.661915091416176e-06, "loss": 0.829, "step": 6013 }, { "epoch": 0.026623577847624952, "grad_norm": 2.883519513159759, "learning_rate": 2.6623577847624955e-06, "loss": 0.8075, "step": 6014 }, { "epoch": 0.02662800478108814, "grad_norm": 2.8921791308963365, "learning_rate": 2.662800478108814e-06, "loss": 0.8422, "step": 6015 }, { "epoch": 0.02663243171455133, "grad_norm": 3.296193773225054, "learning_rate": 2.663243171455133e-06, "loss": 0.8692, "step": 6016 }, { "epoch": 0.02663685864801452, "grad_norm": 2.6929755581309194, "learning_rate": 2.6636858648014524e-06, "loss": 0.5886, "step": 6017 }, { "epoch": 0.02664128558147771, "grad_norm": 3.407283173745809, "learning_rate": 2.664128558147771e-06, "loss": 0.9507, "step": 6018 }, { "epoch": 0.0266457125149409, "grad_norm": 3.986953576765534, "learning_rate": 2.6645712514940903e-06, "loss": 1.0143, "step": 6019 }, { "epoch": 0.02665013944840409, "grad_norm": 2.6806660522896775, "learning_rate": 2.6650139448404092e-06, "loss": 0.7967, "step": 6020 }, { "epoch": 0.026654566381867282, "grad_norm": 2.953763280672501, "learning_rate": 2.665456638186728e-06, "loss": 0.636, "step": 6021 }, { "epoch": 0.02665899331533047, "grad_norm": 2.366863911762487, "learning_rate": 2.665899331533047e-06, "loss": 0.8269, "step": 6022 }, { "epoch": 0.02666342024879366, "grad_norm": 2.736964508052192, "learning_rate": 2.6663420248793665e-06, "loss": 0.9615, "step": 6023 }, { "epoch": 0.02666784718225685, "grad_norm": 3.3991467037354206, "learning_rate": 2.666784718225685e-06, "loss": 0.8549, "step": 6024 }, { "epoch": 0.02667227411572004, "grad_norm": 3.296721885131288, "learning_rate": 2.6672274115720044e-06, "loss": 1.0515, "step": 6025 }, { "epoch": 0.02667670104918323, "grad_norm": 2.513355269840852, "learning_rate": 2.6676701049183233e-06, "loss": 0.753, "step": 6026 }, { "epoch": 0.02668112798264642, "grad_norm": 2.5857704686043355, "learning_rate": 2.6681127982646423e-06, "loss": 0.7797, "step": 6027 }, { "epoch": 0.026685554916109612, "grad_norm": 2.5229074133920224, "learning_rate": 2.6685554916109612e-06, "loss": 0.7455, "step": 6028 }, { "epoch": 0.026689981849572803, "grad_norm": 2.6739426279956597, "learning_rate": 2.6689981849572806e-06, "loss": 0.8031, "step": 6029 }, { "epoch": 0.02669440878303599, "grad_norm": 3.1636359583229128, "learning_rate": 2.669440878303599e-06, "loss": 0.6251, "step": 6030 }, { "epoch": 0.02669883571649918, "grad_norm": 2.970630624114814, "learning_rate": 2.6698835716499185e-06, "loss": 0.8557, "step": 6031 }, { "epoch": 0.02670326264996237, "grad_norm": 3.2981753443155277, "learning_rate": 2.6703262649962374e-06, "loss": 0.8824, "step": 6032 }, { "epoch": 0.02670768958342556, "grad_norm": 2.3565081455010883, "learning_rate": 2.670768958342556e-06, "loss": 0.7221, "step": 6033 }, { "epoch": 0.02671211651688875, "grad_norm": 3.470328791107458, "learning_rate": 2.6712116516888753e-06, "loss": 1.3643, "step": 6034 }, { "epoch": 0.026716543450351942, "grad_norm": 3.4059407916361595, "learning_rate": 2.6716543450351947e-06, "loss": 0.8581, "step": 6035 }, { "epoch": 0.026720970383815133, "grad_norm": 3.17308507698688, "learning_rate": 2.6720970383815132e-06, "loss": 0.9849, "step": 6036 }, { "epoch": 0.02672539731727832, "grad_norm": 3.046657092483055, "learning_rate": 2.672539731727832e-06, "loss": 0.8555, "step": 6037 }, { "epoch": 0.02672982425074151, "grad_norm": 3.3948881928132897, "learning_rate": 2.6729824250741515e-06, "loss": 0.7208, "step": 6038 }, { "epoch": 0.0267342511842047, "grad_norm": 2.6899644834176093, "learning_rate": 2.67342511842047e-06, "loss": 0.7605, "step": 6039 }, { "epoch": 0.02673867811766789, "grad_norm": 3.104190264336591, "learning_rate": 2.6738678117667894e-06, "loss": 0.523, "step": 6040 }, { "epoch": 0.02674310505113108, "grad_norm": 2.791611040502026, "learning_rate": 2.6743105051131084e-06, "loss": 0.7575, "step": 6041 }, { "epoch": 0.026747531984594272, "grad_norm": 2.903054672284587, "learning_rate": 2.6747531984594273e-06, "loss": 0.7262, "step": 6042 }, { "epoch": 0.026751958918057463, "grad_norm": 2.7769197893984927, "learning_rate": 2.6751958918057463e-06, "loss": 0.6008, "step": 6043 }, { "epoch": 0.026756385851520653, "grad_norm": 3.286883443581806, "learning_rate": 2.6756385851520657e-06, "loss": 0.8771, "step": 6044 }, { "epoch": 0.02676081278498384, "grad_norm": 2.612484452369731, "learning_rate": 2.676081278498384e-06, "loss": 0.8289, "step": 6045 }, { "epoch": 0.02676523971844703, "grad_norm": 2.962628278173099, "learning_rate": 2.6765239718447035e-06, "loss": 0.6506, "step": 6046 }, { "epoch": 0.02676966665191022, "grad_norm": 2.9019601393684007, "learning_rate": 2.6769666651910225e-06, "loss": 0.7307, "step": 6047 }, { "epoch": 0.026774093585373412, "grad_norm": 4.090122079390629, "learning_rate": 2.677409358537341e-06, "loss": 1.2138, "step": 6048 }, { "epoch": 0.026778520518836602, "grad_norm": 2.949851692965041, "learning_rate": 2.6778520518836604e-06, "loss": 0.4849, "step": 6049 }, { "epoch": 0.026782947452299793, "grad_norm": 2.8452039218505343, "learning_rate": 2.6782947452299798e-06, "loss": 1.1016, "step": 6050 }, { "epoch": 0.026787374385762983, "grad_norm": 2.6272157648270014, "learning_rate": 2.6787374385762983e-06, "loss": 0.8937, "step": 6051 }, { "epoch": 0.02679180131922617, "grad_norm": 3.6190190543837013, "learning_rate": 2.6791801319226172e-06, "loss": 0.9784, "step": 6052 }, { "epoch": 0.02679622825268936, "grad_norm": 4.721451970926696, "learning_rate": 2.6796228252689366e-06, "loss": 0.5764, "step": 6053 }, { "epoch": 0.02680065518615255, "grad_norm": 3.033477690235359, "learning_rate": 2.680065518615255e-06, "loss": 0.7553, "step": 6054 }, { "epoch": 0.026805082119615742, "grad_norm": 2.874597193035648, "learning_rate": 2.6805082119615745e-06, "loss": 0.7462, "step": 6055 }, { "epoch": 0.026809509053078932, "grad_norm": 2.7895209528157254, "learning_rate": 2.6809509053078934e-06, "loss": 0.8688, "step": 6056 }, { "epoch": 0.026813935986542123, "grad_norm": 2.978770340407559, "learning_rate": 2.6813935986542124e-06, "loss": 0.969, "step": 6057 }, { "epoch": 0.026818362920005313, "grad_norm": 2.98102982762637, "learning_rate": 2.6818362920005313e-06, "loss": 0.7037, "step": 6058 }, { "epoch": 0.026822789853468504, "grad_norm": 3.130169231095127, "learning_rate": 2.6822789853468507e-06, "loss": 0.6154, "step": 6059 }, { "epoch": 0.02682721678693169, "grad_norm": 2.883824204244744, "learning_rate": 2.6827216786931692e-06, "loss": 0.6867, "step": 6060 }, { "epoch": 0.02683164372039488, "grad_norm": 2.889314729773139, "learning_rate": 2.6831643720394886e-06, "loss": 0.8716, "step": 6061 }, { "epoch": 0.026836070653858072, "grad_norm": 2.6957975697004515, "learning_rate": 2.6836070653858075e-06, "loss": 0.8966, "step": 6062 }, { "epoch": 0.026840497587321262, "grad_norm": 2.7609698989024145, "learning_rate": 2.684049758732126e-06, "loss": 0.752, "step": 6063 }, { "epoch": 0.026844924520784453, "grad_norm": 2.5787310341002376, "learning_rate": 2.6844924520784454e-06, "loss": 0.8541, "step": 6064 }, { "epoch": 0.026849351454247643, "grad_norm": 3.4216806494021292, "learning_rate": 2.684935145424765e-06, "loss": 0.7322, "step": 6065 }, { "epoch": 0.026853778387710834, "grad_norm": 3.2012765761543505, "learning_rate": 2.6853778387710833e-06, "loss": 0.8475, "step": 6066 }, { "epoch": 0.026858205321174024, "grad_norm": 3.129770906911186, "learning_rate": 2.6858205321174023e-06, "loss": 0.8643, "step": 6067 }, { "epoch": 0.02686263225463721, "grad_norm": 3.5834740148610376, "learning_rate": 2.6862632254637217e-06, "loss": 1.0018, "step": 6068 }, { "epoch": 0.026867059188100402, "grad_norm": 3.0949768691892667, "learning_rate": 2.68670591881004e-06, "loss": 0.3855, "step": 6069 }, { "epoch": 0.026871486121563593, "grad_norm": 3.614619372370773, "learning_rate": 2.6871486121563595e-06, "loss": 0.6775, "step": 6070 }, { "epoch": 0.026875913055026783, "grad_norm": 2.681116630551224, "learning_rate": 2.6875913055026785e-06, "loss": 0.927, "step": 6071 }, { "epoch": 0.026880339988489974, "grad_norm": 3.0858198517492417, "learning_rate": 2.6880339988489974e-06, "loss": 1.1375, "step": 6072 }, { "epoch": 0.026884766921953164, "grad_norm": 2.997465158051266, "learning_rate": 2.6884766921953164e-06, "loss": 0.7832, "step": 6073 }, { "epoch": 0.026889193855416355, "grad_norm": 2.5062967102637224, "learning_rate": 2.6889193855416358e-06, "loss": 0.6929, "step": 6074 }, { "epoch": 0.02689362078887954, "grad_norm": 3.130402674654651, "learning_rate": 2.6893620788879543e-06, "loss": 0.769, "step": 6075 }, { "epoch": 0.026898047722342732, "grad_norm": 2.6482888904963824, "learning_rate": 2.6898047722342737e-06, "loss": 0.6744, "step": 6076 }, { "epoch": 0.026902474655805923, "grad_norm": 3.1116485169647867, "learning_rate": 2.6902474655805926e-06, "loss": 1.0793, "step": 6077 }, { "epoch": 0.026906901589269113, "grad_norm": 2.9649006018883655, "learning_rate": 2.690690158926911e-06, "loss": 0.8, "step": 6078 }, { "epoch": 0.026911328522732304, "grad_norm": 3.358567401042197, "learning_rate": 2.6911328522732305e-06, "loss": 1.0339, "step": 6079 }, { "epoch": 0.026915755456195494, "grad_norm": 3.021218724889828, "learning_rate": 2.69157554561955e-06, "loss": 0.7511, "step": 6080 }, { "epoch": 0.026920182389658685, "grad_norm": 3.079275076045482, "learning_rate": 2.6920182389658684e-06, "loss": 0.8017, "step": 6081 }, { "epoch": 0.026924609323121875, "grad_norm": 2.6194575796734596, "learning_rate": 2.6924609323121873e-06, "loss": 0.7623, "step": 6082 }, { "epoch": 0.026929036256585062, "grad_norm": 2.783626015525339, "learning_rate": 2.6929036256585067e-06, "loss": 0.6018, "step": 6083 }, { "epoch": 0.026933463190048253, "grad_norm": 3.0076115487016932, "learning_rate": 2.6933463190048252e-06, "loss": 0.9483, "step": 6084 }, { "epoch": 0.026937890123511443, "grad_norm": 2.697874936030486, "learning_rate": 2.6937890123511446e-06, "loss": 0.9501, "step": 6085 }, { "epoch": 0.026942317056974634, "grad_norm": 2.8160636308662905, "learning_rate": 2.6942317056974635e-06, "loss": 0.5596, "step": 6086 }, { "epoch": 0.026946743990437824, "grad_norm": 3.285355440300643, "learning_rate": 2.6946743990437825e-06, "loss": 1.1614, "step": 6087 }, { "epoch": 0.026951170923901015, "grad_norm": 3.75745565909625, "learning_rate": 2.6951170923901014e-06, "loss": 1.0361, "step": 6088 }, { "epoch": 0.026955597857364205, "grad_norm": 2.651641482728015, "learning_rate": 2.695559785736421e-06, "loss": 0.7468, "step": 6089 }, { "epoch": 0.026960024790827392, "grad_norm": 3.0095529002867045, "learning_rate": 2.6960024790827393e-06, "loss": 0.751, "step": 6090 }, { "epoch": 0.026964451724290583, "grad_norm": 3.4493611239719684, "learning_rate": 2.6964451724290587e-06, "loss": 0.9561, "step": 6091 }, { "epoch": 0.026968878657753773, "grad_norm": 2.83590531751133, "learning_rate": 2.6968878657753777e-06, "loss": 0.8512, "step": 6092 }, { "epoch": 0.026973305591216964, "grad_norm": 3.1108324414264557, "learning_rate": 2.6973305591216966e-06, "loss": 0.3795, "step": 6093 }, { "epoch": 0.026977732524680154, "grad_norm": 3.0552844349192623, "learning_rate": 2.6977732524680155e-06, "loss": 0.7865, "step": 6094 }, { "epoch": 0.026982159458143345, "grad_norm": 2.406449812017739, "learning_rate": 2.698215945814335e-06, "loss": 0.7169, "step": 6095 }, { "epoch": 0.026986586391606535, "grad_norm": 3.081928572590339, "learning_rate": 2.6986586391606534e-06, "loss": 0.8954, "step": 6096 }, { "epoch": 0.026991013325069726, "grad_norm": 2.6643584129750177, "learning_rate": 2.6991013325069724e-06, "loss": 0.8589, "step": 6097 }, { "epoch": 0.026995440258532913, "grad_norm": 3.506733256990969, "learning_rate": 2.6995440258532918e-06, "loss": 0.9001, "step": 6098 }, { "epoch": 0.026999867191996103, "grad_norm": 3.264727754150249, "learning_rate": 2.6999867191996103e-06, "loss": 0.9626, "step": 6099 }, { "epoch": 0.027004294125459294, "grad_norm": 2.8058490921800883, "learning_rate": 2.7004294125459297e-06, "loss": 0.7504, "step": 6100 }, { "epoch": 0.027008721058922484, "grad_norm": 2.6668611008412992, "learning_rate": 2.7008721058922486e-06, "loss": 0.6583, "step": 6101 }, { "epoch": 0.027013147992385675, "grad_norm": 3.8818672957085103, "learning_rate": 2.7013147992385675e-06, "loss": 1.2884, "step": 6102 }, { "epoch": 0.027017574925848865, "grad_norm": 3.0427509846274696, "learning_rate": 2.7017574925848865e-06, "loss": 1.02, "step": 6103 }, { "epoch": 0.027022001859312056, "grad_norm": 3.3878819715532584, "learning_rate": 2.702200185931206e-06, "loss": 0.8958, "step": 6104 }, { "epoch": 0.027026428792775243, "grad_norm": 2.8069627681906724, "learning_rate": 2.7026428792775244e-06, "loss": 0.6952, "step": 6105 }, { "epoch": 0.027030855726238433, "grad_norm": 2.6025534794010126, "learning_rate": 2.7030855726238438e-06, "loss": 0.7185, "step": 6106 }, { "epoch": 0.027035282659701624, "grad_norm": 2.8122163502155564, "learning_rate": 2.7035282659701627e-06, "loss": 0.8045, "step": 6107 }, { "epoch": 0.027039709593164814, "grad_norm": 2.9860711033049188, "learning_rate": 2.7039709593164817e-06, "loss": 0.7365, "step": 6108 }, { "epoch": 0.027044136526628005, "grad_norm": 2.965129674759866, "learning_rate": 2.7044136526628006e-06, "loss": 0.922, "step": 6109 }, { "epoch": 0.027048563460091195, "grad_norm": 2.9669513512965655, "learning_rate": 2.70485634600912e-06, "loss": 0.9248, "step": 6110 }, { "epoch": 0.027052990393554386, "grad_norm": 3.063830714029029, "learning_rate": 2.7052990393554385e-06, "loss": 0.5703, "step": 6111 }, { "epoch": 0.027057417327017576, "grad_norm": 2.8337052931744724, "learning_rate": 2.705741732701758e-06, "loss": 0.6715, "step": 6112 }, { "epoch": 0.027061844260480764, "grad_norm": 2.852808666562662, "learning_rate": 2.706184426048077e-06, "loss": 0.7267, "step": 6113 }, { "epoch": 0.027066271193943954, "grad_norm": 2.835944689339823, "learning_rate": 2.7066271193943953e-06, "loss": 0.8483, "step": 6114 }, { "epoch": 0.027070698127407145, "grad_norm": 3.1225341798215482, "learning_rate": 2.7070698127407147e-06, "loss": 0.8191, "step": 6115 }, { "epoch": 0.027075125060870335, "grad_norm": 2.9060701856688795, "learning_rate": 2.7075125060870337e-06, "loss": 0.6566, "step": 6116 }, { "epoch": 0.027079551994333526, "grad_norm": 3.2977363090745406, "learning_rate": 2.7079551994333526e-06, "loss": 0.7604, "step": 6117 }, { "epoch": 0.027083978927796716, "grad_norm": 3.3132851071924185, "learning_rate": 2.7083978927796715e-06, "loss": 0.7619, "step": 6118 }, { "epoch": 0.027088405861259907, "grad_norm": 3.0752523119605497, "learning_rate": 2.708840586125991e-06, "loss": 0.9811, "step": 6119 }, { "epoch": 0.027092832794723094, "grad_norm": 3.3944534800239787, "learning_rate": 2.7092832794723094e-06, "loss": 0.7475, "step": 6120 }, { "epoch": 0.027097259728186284, "grad_norm": 2.765135181333016, "learning_rate": 2.709725972818629e-06, "loss": 0.7195, "step": 6121 }, { "epoch": 0.027101686661649475, "grad_norm": 3.196731998367911, "learning_rate": 2.7101686661649478e-06, "loss": 0.9803, "step": 6122 }, { "epoch": 0.027106113595112665, "grad_norm": 2.614198898820745, "learning_rate": 2.7106113595112667e-06, "loss": 0.7446, "step": 6123 }, { "epoch": 0.027110540528575856, "grad_norm": 2.4313661535168003, "learning_rate": 2.7110540528575857e-06, "loss": 0.7368, "step": 6124 }, { "epoch": 0.027114967462039046, "grad_norm": 2.501290226005334, "learning_rate": 2.711496746203905e-06, "loss": 0.7248, "step": 6125 }, { "epoch": 0.027119394395502237, "grad_norm": 3.1864825986748415, "learning_rate": 2.7119394395502235e-06, "loss": 0.9697, "step": 6126 }, { "epoch": 0.027123821328965427, "grad_norm": 3.1710572836958124, "learning_rate": 2.712382132896543e-06, "loss": 0.4547, "step": 6127 }, { "epoch": 0.027128248262428614, "grad_norm": 3.921296127526907, "learning_rate": 2.712824826242862e-06, "loss": 0.9394, "step": 6128 }, { "epoch": 0.027132675195891805, "grad_norm": 2.9351694961589274, "learning_rate": 2.7132675195891804e-06, "loss": 0.895, "step": 6129 }, { "epoch": 0.027137102129354995, "grad_norm": 3.3617057751383235, "learning_rate": 2.7137102129354998e-06, "loss": 1.0789, "step": 6130 }, { "epoch": 0.027141529062818186, "grad_norm": 3.1565725039906307, "learning_rate": 2.714152906281819e-06, "loss": 0.8369, "step": 6131 }, { "epoch": 0.027145955996281376, "grad_norm": 3.248465415688521, "learning_rate": 2.7145955996281377e-06, "loss": 0.8992, "step": 6132 }, { "epoch": 0.027150382929744567, "grad_norm": 2.759423616644945, "learning_rate": 2.7150382929744566e-06, "loss": 0.6378, "step": 6133 }, { "epoch": 0.027154809863207757, "grad_norm": 3.986578998289591, "learning_rate": 2.715480986320776e-06, "loss": 1.3075, "step": 6134 }, { "epoch": 0.027159236796670948, "grad_norm": 2.6991672011759857, "learning_rate": 2.7159236796670945e-06, "loss": 0.9035, "step": 6135 }, { "epoch": 0.027163663730134135, "grad_norm": 2.752559517803752, "learning_rate": 2.716366373013414e-06, "loss": 0.8344, "step": 6136 }, { "epoch": 0.027168090663597325, "grad_norm": 4.026276418924115, "learning_rate": 2.716809066359733e-06, "loss": 0.93, "step": 6137 }, { "epoch": 0.027172517597060516, "grad_norm": 3.8534502355007336, "learning_rate": 2.7172517597060518e-06, "loss": 0.6981, "step": 6138 }, { "epoch": 0.027176944530523706, "grad_norm": 3.1202142850375196, "learning_rate": 2.7176944530523707e-06, "loss": 0.9886, "step": 6139 }, { "epoch": 0.027181371463986897, "grad_norm": 2.8528155247074274, "learning_rate": 2.71813714639869e-06, "loss": 1.1108, "step": 6140 }, { "epoch": 0.027185798397450087, "grad_norm": 3.0145811015589934, "learning_rate": 2.7185798397450086e-06, "loss": 0.8769, "step": 6141 }, { "epoch": 0.027190225330913278, "grad_norm": 3.824664791713149, "learning_rate": 2.719022533091328e-06, "loss": 0.6204, "step": 6142 }, { "epoch": 0.027194652264376465, "grad_norm": 2.66339897457749, "learning_rate": 2.719465226437647e-06, "loss": 0.7111, "step": 6143 }, { "epoch": 0.027199079197839655, "grad_norm": 3.322022448655753, "learning_rate": 2.7199079197839654e-06, "loss": 1.1108, "step": 6144 }, { "epoch": 0.027203506131302846, "grad_norm": 2.819624989802639, "learning_rate": 2.720350613130285e-06, "loss": 0.6876, "step": 6145 }, { "epoch": 0.027207933064766036, "grad_norm": 3.3376784169728944, "learning_rate": 2.720793306476604e-06, "loss": 0.7243, "step": 6146 }, { "epoch": 0.027212359998229227, "grad_norm": 3.029049448017691, "learning_rate": 2.7212359998229227e-06, "loss": 0.9902, "step": 6147 }, { "epoch": 0.027216786931692417, "grad_norm": 3.3385534700265067, "learning_rate": 2.7216786931692417e-06, "loss": 0.6828, "step": 6148 }, { "epoch": 0.027221213865155608, "grad_norm": 2.987734167128815, "learning_rate": 2.722121386515561e-06, "loss": 0.7326, "step": 6149 }, { "epoch": 0.0272256407986188, "grad_norm": 2.7982861088901925, "learning_rate": 2.7225640798618796e-06, "loss": 0.7164, "step": 6150 }, { "epoch": 0.027230067732081985, "grad_norm": 3.861090129215605, "learning_rate": 2.723006773208199e-06, "loss": 0.5293, "step": 6151 }, { "epoch": 0.027234494665545176, "grad_norm": 3.529614364027594, "learning_rate": 2.723449466554518e-06, "loss": 0.805, "step": 6152 }, { "epoch": 0.027238921599008366, "grad_norm": 2.54398394850414, "learning_rate": 2.723892159900837e-06, "loss": 0.7418, "step": 6153 }, { "epoch": 0.027243348532471557, "grad_norm": 3.1736090973157154, "learning_rate": 2.7243348532471558e-06, "loss": 1.0496, "step": 6154 }, { "epoch": 0.027247775465934748, "grad_norm": 2.874366352607597, "learning_rate": 2.724777546593475e-06, "loss": 1.0154, "step": 6155 }, { "epoch": 0.027252202399397938, "grad_norm": 2.4391019317216522, "learning_rate": 2.7252202399397937e-06, "loss": 0.6872, "step": 6156 }, { "epoch": 0.02725662933286113, "grad_norm": 2.6125644910640142, "learning_rate": 2.725662933286113e-06, "loss": 0.6746, "step": 6157 }, { "epoch": 0.027261056266324316, "grad_norm": 2.636954494975239, "learning_rate": 2.726105626632432e-06, "loss": 0.6473, "step": 6158 }, { "epoch": 0.027265483199787506, "grad_norm": 3.244023003738817, "learning_rate": 2.7265483199787505e-06, "loss": 0.9332, "step": 6159 }, { "epoch": 0.027269910133250697, "grad_norm": 3.3837867921750413, "learning_rate": 2.72699101332507e-06, "loss": 1.0043, "step": 6160 }, { "epoch": 0.027274337066713887, "grad_norm": 3.717814720402594, "learning_rate": 2.7274337066713892e-06, "loss": 0.7709, "step": 6161 }, { "epoch": 0.027278764000177078, "grad_norm": 3.343815048090911, "learning_rate": 2.7278764000177078e-06, "loss": 0.7405, "step": 6162 }, { "epoch": 0.027283190933640268, "grad_norm": 2.9957147678668634, "learning_rate": 2.7283190933640267e-06, "loss": 0.6207, "step": 6163 }, { "epoch": 0.02728761786710346, "grad_norm": 3.0217782013528436, "learning_rate": 2.728761786710346e-06, "loss": 0.732, "step": 6164 }, { "epoch": 0.02729204480056665, "grad_norm": 3.242921053742111, "learning_rate": 2.7292044800566646e-06, "loss": 0.6976, "step": 6165 }, { "epoch": 0.027296471734029836, "grad_norm": 3.272564175843078, "learning_rate": 2.729647173402984e-06, "loss": 1.2467, "step": 6166 }, { "epoch": 0.027300898667493027, "grad_norm": 2.810520881501148, "learning_rate": 2.730089866749303e-06, "loss": 0.5414, "step": 6167 }, { "epoch": 0.027305325600956217, "grad_norm": 2.6278544537072768, "learning_rate": 2.730532560095622e-06, "loss": 0.6398, "step": 6168 }, { "epoch": 0.027309752534419408, "grad_norm": 2.854554637494066, "learning_rate": 2.730975253441941e-06, "loss": 0.8058, "step": 6169 }, { "epoch": 0.027314179467882598, "grad_norm": 3.092070038194748, "learning_rate": 2.73141794678826e-06, "loss": 0.8846, "step": 6170 }, { "epoch": 0.02731860640134579, "grad_norm": 2.668966270205149, "learning_rate": 2.7318606401345787e-06, "loss": 0.8024, "step": 6171 }, { "epoch": 0.02732303333480898, "grad_norm": 2.8274527857101486, "learning_rate": 2.732303333480898e-06, "loss": 0.8404, "step": 6172 }, { "epoch": 0.027327460268272166, "grad_norm": 2.7944287975219324, "learning_rate": 2.732746026827217e-06, "loss": 0.9171, "step": 6173 }, { "epoch": 0.027331887201735357, "grad_norm": 2.6553539358484053, "learning_rate": 2.7331887201735356e-06, "loss": 0.7685, "step": 6174 }, { "epoch": 0.027336314135198547, "grad_norm": 3.451307795479541, "learning_rate": 2.733631413519855e-06, "loss": 0.813, "step": 6175 }, { "epoch": 0.027340741068661738, "grad_norm": 2.3208420197197706, "learning_rate": 2.7340741068661743e-06, "loss": 0.7672, "step": 6176 }, { "epoch": 0.027345168002124928, "grad_norm": 2.8715635527751884, "learning_rate": 2.734516800212493e-06, "loss": 0.6604, "step": 6177 }, { "epoch": 0.02734959493558812, "grad_norm": 2.784873237339675, "learning_rate": 2.7349594935588118e-06, "loss": 1.0912, "step": 6178 }, { "epoch": 0.02735402186905131, "grad_norm": 3.467939978460008, "learning_rate": 2.735402186905131e-06, "loss": 0.5348, "step": 6179 }, { "epoch": 0.0273584488025145, "grad_norm": 3.164967383474354, "learning_rate": 2.7358448802514497e-06, "loss": 0.6972, "step": 6180 }, { "epoch": 0.027362875735977687, "grad_norm": 2.830831212666996, "learning_rate": 2.736287573597769e-06, "loss": 0.8865, "step": 6181 }, { "epoch": 0.027367302669440877, "grad_norm": 3.380483741230333, "learning_rate": 2.736730266944088e-06, "loss": 0.9959, "step": 6182 }, { "epoch": 0.027371729602904068, "grad_norm": 2.5224329381071193, "learning_rate": 2.737172960290407e-06, "loss": 0.6223, "step": 6183 }, { "epoch": 0.02737615653636726, "grad_norm": 2.752873857004114, "learning_rate": 2.737615653636726e-06, "loss": 0.788, "step": 6184 }, { "epoch": 0.02738058346983045, "grad_norm": 3.055874311514271, "learning_rate": 2.7380583469830452e-06, "loss": 0.9063, "step": 6185 }, { "epoch": 0.02738501040329364, "grad_norm": 3.0505559199220236, "learning_rate": 2.7385010403293638e-06, "loss": 0.8631, "step": 6186 }, { "epoch": 0.02738943733675683, "grad_norm": 3.62483517131342, "learning_rate": 2.738943733675683e-06, "loss": 1.0319, "step": 6187 }, { "epoch": 0.027393864270220017, "grad_norm": 2.5118930574133977, "learning_rate": 2.739386427022002e-06, "loss": 0.755, "step": 6188 }, { "epoch": 0.027398291203683207, "grad_norm": 2.6248585350512923, "learning_rate": 2.739829120368321e-06, "loss": 0.6645, "step": 6189 }, { "epoch": 0.027402718137146398, "grad_norm": 3.6964332866742766, "learning_rate": 2.74027181371464e-06, "loss": 0.7469, "step": 6190 }, { "epoch": 0.02740714507060959, "grad_norm": 3.1139983115247687, "learning_rate": 2.7407145070609593e-06, "loss": 0.656, "step": 6191 }, { "epoch": 0.02741157200407278, "grad_norm": 4.000760101299833, "learning_rate": 2.741157200407278e-06, "loss": 1.057, "step": 6192 }, { "epoch": 0.02741599893753597, "grad_norm": 2.8573053908393797, "learning_rate": 2.7415998937535972e-06, "loss": 0.5811, "step": 6193 }, { "epoch": 0.02742042587099916, "grad_norm": 2.630695499664154, "learning_rate": 2.742042587099916e-06, "loss": 0.6037, "step": 6194 }, { "epoch": 0.02742485280446235, "grad_norm": 3.5372363049571343, "learning_rate": 2.7424852804462347e-06, "loss": 0.628, "step": 6195 }, { "epoch": 0.027429279737925538, "grad_norm": 3.931548977348, "learning_rate": 2.742927973792554e-06, "loss": 0.8977, "step": 6196 }, { "epoch": 0.027433706671388728, "grad_norm": 2.973524604539265, "learning_rate": 2.743370667138873e-06, "loss": 0.893, "step": 6197 }, { "epoch": 0.02743813360485192, "grad_norm": 3.4987162242731094, "learning_rate": 2.743813360485192e-06, "loss": 0.7443, "step": 6198 }, { "epoch": 0.02744256053831511, "grad_norm": 3.137351748021235, "learning_rate": 2.744256053831511e-06, "loss": 0.6382, "step": 6199 }, { "epoch": 0.0274469874717783, "grad_norm": 2.9507598763038847, "learning_rate": 2.7446987471778303e-06, "loss": 0.9786, "step": 6200 }, { "epoch": 0.02745141440524149, "grad_norm": 3.1913381080363217, "learning_rate": 2.745141440524149e-06, "loss": 0.8491, "step": 6201 }, { "epoch": 0.02745584133870468, "grad_norm": 3.2729192221449845, "learning_rate": 2.745584133870468e-06, "loss": 0.7805, "step": 6202 }, { "epoch": 0.027460268272167868, "grad_norm": 2.8927741922423618, "learning_rate": 2.746026827216787e-06, "loss": 0.7177, "step": 6203 }, { "epoch": 0.027464695205631058, "grad_norm": 2.933639702727409, "learning_rate": 2.746469520563106e-06, "loss": 0.931, "step": 6204 }, { "epoch": 0.02746912213909425, "grad_norm": 2.601208346204492, "learning_rate": 2.746912213909425e-06, "loss": 0.5331, "step": 6205 }, { "epoch": 0.02747354907255744, "grad_norm": 2.592541616958555, "learning_rate": 2.7473549072557444e-06, "loss": 0.4829, "step": 6206 }, { "epoch": 0.02747797600602063, "grad_norm": 2.8046631062543765, "learning_rate": 2.747797600602063e-06, "loss": 0.8242, "step": 6207 }, { "epoch": 0.02748240293948382, "grad_norm": 2.42199337571544, "learning_rate": 2.7482402939483823e-06, "loss": 0.6746, "step": 6208 }, { "epoch": 0.02748682987294701, "grad_norm": 3.104213274506651, "learning_rate": 2.7486829872947012e-06, "loss": 0.8855, "step": 6209 }, { "epoch": 0.0274912568064102, "grad_norm": 2.548296596161892, "learning_rate": 2.7491256806410198e-06, "loss": 0.7682, "step": 6210 }, { "epoch": 0.027495683739873388, "grad_norm": 3.171479649676698, "learning_rate": 2.749568373987339e-06, "loss": 0.9895, "step": 6211 }, { "epoch": 0.02750011067333658, "grad_norm": 2.8375749363418845, "learning_rate": 2.7500110673336585e-06, "loss": 0.7812, "step": 6212 }, { "epoch": 0.02750453760679977, "grad_norm": 2.9385534858160023, "learning_rate": 2.750453760679977e-06, "loss": 0.5804, "step": 6213 }, { "epoch": 0.02750896454026296, "grad_norm": 3.077930923101838, "learning_rate": 2.750896454026296e-06, "loss": 1.0057, "step": 6214 }, { "epoch": 0.02751339147372615, "grad_norm": 3.763501834504699, "learning_rate": 2.7513391473726153e-06, "loss": 0.6276, "step": 6215 }, { "epoch": 0.02751781840718934, "grad_norm": 3.0233240731671263, "learning_rate": 2.751781840718934e-06, "loss": 1.0464, "step": 6216 }, { "epoch": 0.02752224534065253, "grad_norm": 3.124985518959066, "learning_rate": 2.7522245340652532e-06, "loss": 1.1632, "step": 6217 }, { "epoch": 0.02752667227411572, "grad_norm": 2.739625775854822, "learning_rate": 2.752667227411572e-06, "loss": 0.7245, "step": 6218 }, { "epoch": 0.02753109920757891, "grad_norm": 2.8169284059915984, "learning_rate": 2.753109920757891e-06, "loss": 0.941, "step": 6219 }, { "epoch": 0.0275355261410421, "grad_norm": 2.5596801087701166, "learning_rate": 2.75355261410421e-06, "loss": 0.8234, "step": 6220 }, { "epoch": 0.02753995307450529, "grad_norm": 3.2117654883816065, "learning_rate": 2.7539953074505295e-06, "loss": 0.8046, "step": 6221 }, { "epoch": 0.02754438000796848, "grad_norm": 2.748793607712214, "learning_rate": 2.754438000796848e-06, "loss": 0.5928, "step": 6222 }, { "epoch": 0.02754880694143167, "grad_norm": 2.578230464351098, "learning_rate": 2.7548806941431673e-06, "loss": 0.655, "step": 6223 }, { "epoch": 0.02755323387489486, "grad_norm": 3.189894132430959, "learning_rate": 2.7553233874894863e-06, "loss": 0.8187, "step": 6224 }, { "epoch": 0.027557660808358052, "grad_norm": 2.394112676036996, "learning_rate": 2.755766080835805e-06, "loss": 0.67, "step": 6225 }, { "epoch": 0.02756208774182124, "grad_norm": 2.5576339919394364, "learning_rate": 2.756208774182124e-06, "loss": 0.5745, "step": 6226 }, { "epoch": 0.02756651467528443, "grad_norm": 3.056007882097748, "learning_rate": 2.7566514675284436e-06, "loss": 0.7486, "step": 6227 }, { "epoch": 0.02757094160874762, "grad_norm": 2.95103185341787, "learning_rate": 2.757094160874762e-06, "loss": 0.7154, "step": 6228 }, { "epoch": 0.02757536854221081, "grad_norm": 3.16294331498884, "learning_rate": 2.757536854221081e-06, "loss": 1.0593, "step": 6229 }, { "epoch": 0.027579795475674, "grad_norm": 3.1997354174538404, "learning_rate": 2.7579795475674004e-06, "loss": 0.7773, "step": 6230 }, { "epoch": 0.02758422240913719, "grad_norm": 2.8711227113519193, "learning_rate": 2.758422240913719e-06, "loss": 0.913, "step": 6231 }, { "epoch": 0.027588649342600382, "grad_norm": 2.5309014010442987, "learning_rate": 2.7588649342600383e-06, "loss": 0.5639, "step": 6232 }, { "epoch": 0.027593076276063572, "grad_norm": 2.756828697802537, "learning_rate": 2.7593076276063572e-06, "loss": 0.5649, "step": 6233 }, { "epoch": 0.02759750320952676, "grad_norm": 2.3282242288020183, "learning_rate": 2.759750320952676e-06, "loss": 0.7124, "step": 6234 }, { "epoch": 0.02760193014298995, "grad_norm": 2.8025603716923047, "learning_rate": 2.760193014298995e-06, "loss": 0.6751, "step": 6235 }, { "epoch": 0.02760635707645314, "grad_norm": 3.140326297891491, "learning_rate": 2.7606357076453145e-06, "loss": 0.667, "step": 6236 }, { "epoch": 0.02761078400991633, "grad_norm": 3.112545708308758, "learning_rate": 2.761078400991633e-06, "loss": 0.654, "step": 6237 }, { "epoch": 0.02761521094337952, "grad_norm": 3.742528718326491, "learning_rate": 2.7615210943379524e-06, "loss": 0.7834, "step": 6238 }, { "epoch": 0.027619637876842712, "grad_norm": 3.6614947176225345, "learning_rate": 2.7619637876842713e-06, "loss": 1.0722, "step": 6239 }, { "epoch": 0.027624064810305902, "grad_norm": 2.972771994245622, "learning_rate": 2.76240648103059e-06, "loss": 0.7244, "step": 6240 }, { "epoch": 0.02762849174376909, "grad_norm": 3.3835529505279345, "learning_rate": 2.7628491743769092e-06, "loss": 0.7128, "step": 6241 }, { "epoch": 0.02763291867723228, "grad_norm": 2.633296611024547, "learning_rate": 2.7632918677232286e-06, "loss": 0.7038, "step": 6242 }, { "epoch": 0.02763734561069547, "grad_norm": 3.033498127324717, "learning_rate": 2.7637345610695476e-06, "loss": 0.3784, "step": 6243 }, { "epoch": 0.02764177254415866, "grad_norm": 4.06268301132108, "learning_rate": 2.764177254415866e-06, "loss": 1.212, "step": 6244 }, { "epoch": 0.02764619947762185, "grad_norm": 2.4464426933896153, "learning_rate": 2.7646199477621855e-06, "loss": 0.4671, "step": 6245 }, { "epoch": 0.027650626411085042, "grad_norm": 3.8262779740895967, "learning_rate": 2.765062641108505e-06, "loss": 1.2567, "step": 6246 }, { "epoch": 0.027655053344548233, "grad_norm": 3.243143205644852, "learning_rate": 2.7655053344548233e-06, "loss": 0.841, "step": 6247 }, { "epoch": 0.027659480278011423, "grad_norm": 3.535814134103078, "learning_rate": 2.7659480278011423e-06, "loss": 0.9805, "step": 6248 }, { "epoch": 0.02766390721147461, "grad_norm": 2.9879875571324277, "learning_rate": 2.7663907211474617e-06, "loss": 0.6585, "step": 6249 }, { "epoch": 0.0276683341449378, "grad_norm": 2.706639093305537, "learning_rate": 2.76683341449378e-06, "loss": 0.7019, "step": 6250 }, { "epoch": 0.02767276107840099, "grad_norm": 2.4730201467656223, "learning_rate": 2.7672761078400996e-06, "loss": 0.5768, "step": 6251 }, { "epoch": 0.02767718801186418, "grad_norm": 2.794222305996564, "learning_rate": 2.7677188011864185e-06, "loss": 0.6333, "step": 6252 }, { "epoch": 0.027681614945327372, "grad_norm": 2.4649210190421313, "learning_rate": 2.7681614945327375e-06, "loss": 0.8057, "step": 6253 }, { "epoch": 0.027686041878790563, "grad_norm": 2.7102357307719753, "learning_rate": 2.7686041878790564e-06, "loss": 0.8375, "step": 6254 }, { "epoch": 0.027690468812253753, "grad_norm": 2.8807192596629005, "learning_rate": 2.7690468812253758e-06, "loss": 0.973, "step": 6255 }, { "epoch": 0.02769489574571694, "grad_norm": 3.0468673018493493, "learning_rate": 2.7694895745716943e-06, "loss": 0.646, "step": 6256 }, { "epoch": 0.02769932267918013, "grad_norm": 4.007199515197105, "learning_rate": 2.7699322679180137e-06, "loss": 0.8358, "step": 6257 }, { "epoch": 0.02770374961264332, "grad_norm": 2.8367766113797566, "learning_rate": 2.7703749612643326e-06, "loss": 0.6142, "step": 6258 }, { "epoch": 0.02770817654610651, "grad_norm": 3.192245363234375, "learning_rate": 2.770817654610651e-06, "loss": 0.8074, "step": 6259 }, { "epoch": 0.027712603479569702, "grad_norm": 2.9240956855531706, "learning_rate": 2.7712603479569705e-06, "loss": 0.4369, "step": 6260 }, { "epoch": 0.027717030413032893, "grad_norm": 2.874855279954837, "learning_rate": 2.77170304130329e-06, "loss": 0.8975, "step": 6261 }, { "epoch": 0.027721457346496083, "grad_norm": 2.5746589091395227, "learning_rate": 2.7721457346496084e-06, "loss": 0.8318, "step": 6262 }, { "epoch": 0.027725884279959274, "grad_norm": 2.8418997666055748, "learning_rate": 2.7725884279959273e-06, "loss": 0.7051, "step": 6263 }, { "epoch": 0.02773031121342246, "grad_norm": 4.446265733723131, "learning_rate": 2.7730311213422467e-06, "loss": 1.1081, "step": 6264 }, { "epoch": 0.02773473814688565, "grad_norm": 2.67926011227263, "learning_rate": 2.7734738146885652e-06, "loss": 0.7985, "step": 6265 }, { "epoch": 0.027739165080348842, "grad_norm": 4.283301206892357, "learning_rate": 2.7739165080348846e-06, "loss": 0.6858, "step": 6266 }, { "epoch": 0.027743592013812032, "grad_norm": 2.901240167578112, "learning_rate": 2.7743592013812036e-06, "loss": 0.915, "step": 6267 }, { "epoch": 0.027748018947275223, "grad_norm": 2.540349600026956, "learning_rate": 2.7748018947275225e-06, "loss": 0.5404, "step": 6268 }, { "epoch": 0.027752445880738413, "grad_norm": 2.607335824693657, "learning_rate": 2.7752445880738415e-06, "loss": 0.7335, "step": 6269 }, { "epoch": 0.027756872814201604, "grad_norm": 3.120707052825606, "learning_rate": 2.775687281420161e-06, "loss": 0.6952, "step": 6270 }, { "epoch": 0.02776129974766479, "grad_norm": 2.956261568976406, "learning_rate": 2.7761299747664793e-06, "loss": 0.8608, "step": 6271 }, { "epoch": 0.02776572668112798, "grad_norm": 3.2294424243751676, "learning_rate": 2.7765726681127987e-06, "loss": 0.8494, "step": 6272 }, { "epoch": 0.027770153614591172, "grad_norm": 2.582459048896693, "learning_rate": 2.7770153614591177e-06, "loss": 0.7933, "step": 6273 }, { "epoch": 0.027774580548054362, "grad_norm": 2.736192792503866, "learning_rate": 2.777458054805436e-06, "loss": 0.6759, "step": 6274 }, { "epoch": 0.027779007481517553, "grad_norm": 2.8625885912512157, "learning_rate": 2.7779007481517556e-06, "loss": 0.8603, "step": 6275 }, { "epoch": 0.027783434414980743, "grad_norm": 2.616132286411142, "learning_rate": 2.778343441498075e-06, "loss": 0.712, "step": 6276 }, { "epoch": 0.027787861348443934, "grad_norm": 2.984617275176359, "learning_rate": 2.7787861348443935e-06, "loss": 0.6524, "step": 6277 }, { "epoch": 0.027792288281907124, "grad_norm": 2.7646182465664926, "learning_rate": 2.7792288281907124e-06, "loss": 0.9523, "step": 6278 }, { "epoch": 0.02779671521537031, "grad_norm": 2.9093779555110455, "learning_rate": 2.7796715215370318e-06, "loss": 0.6978, "step": 6279 }, { "epoch": 0.027801142148833502, "grad_norm": 2.5226668893043738, "learning_rate": 2.7801142148833503e-06, "loss": 0.6638, "step": 6280 }, { "epoch": 0.027805569082296692, "grad_norm": 3.363492399464773, "learning_rate": 2.7805569082296697e-06, "loss": 0.8158, "step": 6281 }, { "epoch": 0.027809996015759883, "grad_norm": 2.7615873419285673, "learning_rate": 2.7809996015759886e-06, "loss": 0.7259, "step": 6282 }, { "epoch": 0.027814422949223074, "grad_norm": 3.1771572903902983, "learning_rate": 2.7814422949223076e-06, "loss": 0.8109, "step": 6283 }, { "epoch": 0.027818849882686264, "grad_norm": 3.1666548389532014, "learning_rate": 2.7818849882686265e-06, "loss": 0.6109, "step": 6284 }, { "epoch": 0.027823276816149455, "grad_norm": 2.501345650269196, "learning_rate": 2.782327681614946e-06, "loss": 0.7127, "step": 6285 }, { "epoch": 0.027827703749612645, "grad_norm": 3.4371454840223388, "learning_rate": 2.7827703749612644e-06, "loss": 1.0733, "step": 6286 }, { "epoch": 0.027832130683075832, "grad_norm": 3.089480968947409, "learning_rate": 2.7832130683075838e-06, "loss": 0.9137, "step": 6287 }, { "epoch": 0.027836557616539023, "grad_norm": 2.7279071053745576, "learning_rate": 2.7836557616539027e-06, "loss": 0.9028, "step": 6288 }, { "epoch": 0.027840984550002213, "grad_norm": 3.2547062111641023, "learning_rate": 2.7840984550002217e-06, "loss": 0.6905, "step": 6289 }, { "epoch": 0.027845411483465404, "grad_norm": 2.445939367564505, "learning_rate": 2.7845411483465406e-06, "loss": 0.6332, "step": 6290 }, { "epoch": 0.027849838416928594, "grad_norm": 4.156527910244202, "learning_rate": 2.78498384169286e-06, "loss": 1.0937, "step": 6291 }, { "epoch": 0.027854265350391785, "grad_norm": 3.0005031217974265, "learning_rate": 2.7854265350391785e-06, "loss": 1.0598, "step": 6292 }, { "epoch": 0.027858692283854975, "grad_norm": 2.6981353605593683, "learning_rate": 2.785869228385498e-06, "loss": 0.9735, "step": 6293 }, { "epoch": 0.027863119217318162, "grad_norm": 4.405695758781176, "learning_rate": 2.786311921731817e-06, "loss": 1.2273, "step": 6294 }, { "epoch": 0.027867546150781353, "grad_norm": 2.9884087215606487, "learning_rate": 2.7867546150781353e-06, "loss": 0.8001, "step": 6295 }, { "epoch": 0.027871973084244543, "grad_norm": 2.6514860406310934, "learning_rate": 2.7871973084244547e-06, "loss": 0.7157, "step": 6296 }, { "epoch": 0.027876400017707734, "grad_norm": 2.7989510122901313, "learning_rate": 2.7876400017707737e-06, "loss": 0.8688, "step": 6297 }, { "epoch": 0.027880826951170924, "grad_norm": 2.99760851605665, "learning_rate": 2.7880826951170926e-06, "loss": 1.0104, "step": 6298 }, { "epoch": 0.027885253884634115, "grad_norm": 2.532368524623095, "learning_rate": 2.7885253884634116e-06, "loss": 0.8297, "step": 6299 }, { "epoch": 0.027889680818097305, "grad_norm": 3.0638167043409448, "learning_rate": 2.788968081809731e-06, "loss": 0.6007, "step": 6300 }, { "epoch": 0.027894107751560496, "grad_norm": 2.4744426485440707, "learning_rate": 2.7894107751560495e-06, "loss": 0.6696, "step": 6301 }, { "epoch": 0.027898534685023683, "grad_norm": 2.824745425677747, "learning_rate": 2.789853468502369e-06, "loss": 0.5745, "step": 6302 }, { "epoch": 0.027902961618486873, "grad_norm": 2.5233564465571776, "learning_rate": 2.7902961618486878e-06, "loss": 0.6662, "step": 6303 }, { "epoch": 0.027907388551950064, "grad_norm": 2.572321252700362, "learning_rate": 2.7907388551950067e-06, "loss": 0.6454, "step": 6304 }, { "epoch": 0.027911815485413254, "grad_norm": 3.0241948025964467, "learning_rate": 2.7911815485413257e-06, "loss": 0.6636, "step": 6305 }, { "epoch": 0.027916242418876445, "grad_norm": 3.217172731060887, "learning_rate": 2.791624241887645e-06, "loss": 0.6728, "step": 6306 }, { "epoch": 0.027920669352339635, "grad_norm": 2.9746610937711573, "learning_rate": 2.7920669352339636e-06, "loss": 0.7012, "step": 6307 }, { "epoch": 0.027925096285802826, "grad_norm": 2.4198412510711673, "learning_rate": 2.792509628580283e-06, "loss": 0.593, "step": 6308 }, { "epoch": 0.027929523219266013, "grad_norm": 2.6531168643353173, "learning_rate": 2.792952321926602e-06, "loss": 0.4985, "step": 6309 }, { "epoch": 0.027933950152729203, "grad_norm": 2.80785596409399, "learning_rate": 2.7933950152729204e-06, "loss": 0.7539, "step": 6310 }, { "epoch": 0.027938377086192394, "grad_norm": 2.897470218062234, "learning_rate": 2.7938377086192398e-06, "loss": 0.8326, "step": 6311 }, { "epoch": 0.027942804019655584, "grad_norm": 3.387862381101596, "learning_rate": 2.794280401965559e-06, "loss": 1.2624, "step": 6312 }, { "epoch": 0.027947230953118775, "grad_norm": 2.7047520912130523, "learning_rate": 2.7947230953118777e-06, "loss": 0.8818, "step": 6313 }, { "epoch": 0.027951657886581965, "grad_norm": 3.0135469890465076, "learning_rate": 2.7951657886581966e-06, "loss": 1.0044, "step": 6314 }, { "epoch": 0.027956084820045156, "grad_norm": 2.7972609943189823, "learning_rate": 2.795608482004516e-06, "loss": 0.9487, "step": 6315 }, { "epoch": 0.027960511753508346, "grad_norm": 2.940863610187558, "learning_rate": 2.7960511753508345e-06, "loss": 0.6085, "step": 6316 }, { "epoch": 0.027964938686971533, "grad_norm": 2.750768465823898, "learning_rate": 2.796493868697154e-06, "loss": 0.8513, "step": 6317 }, { "epoch": 0.027969365620434724, "grad_norm": 2.558339554389659, "learning_rate": 2.796936562043473e-06, "loss": 0.7058, "step": 6318 }, { "epoch": 0.027973792553897914, "grad_norm": 3.3521785444166174, "learning_rate": 2.7973792553897918e-06, "loss": 1.2347, "step": 6319 }, { "epoch": 0.027978219487361105, "grad_norm": 2.661186628162168, "learning_rate": 2.7978219487361107e-06, "loss": 0.8874, "step": 6320 }, { "epoch": 0.027982646420824295, "grad_norm": 3.5110815308330094, "learning_rate": 2.79826464208243e-06, "loss": 0.6908, "step": 6321 }, { "epoch": 0.027987073354287486, "grad_norm": 2.7261109771047534, "learning_rate": 2.7987073354287486e-06, "loss": 0.8694, "step": 6322 }, { "epoch": 0.027991500287750676, "grad_norm": 5.009534935049044, "learning_rate": 2.799150028775068e-06, "loss": 1.3722, "step": 6323 }, { "epoch": 0.027995927221213864, "grad_norm": 2.9419937563066676, "learning_rate": 2.799592722121387e-06, "loss": 0.8194, "step": 6324 }, { "epoch": 0.028000354154677054, "grad_norm": 3.131418687674243, "learning_rate": 2.8000354154677055e-06, "loss": 0.9288, "step": 6325 }, { "epoch": 0.028004781088140245, "grad_norm": 2.7177426216869884, "learning_rate": 2.800478108814025e-06, "loss": 0.8344, "step": 6326 }, { "epoch": 0.028009208021603435, "grad_norm": 3.574880833560243, "learning_rate": 2.800920802160344e-06, "loss": 1.0273, "step": 6327 }, { "epoch": 0.028013634955066626, "grad_norm": 3.550809103094482, "learning_rate": 2.8013634955066627e-06, "loss": 1.0993, "step": 6328 }, { "epoch": 0.028018061888529816, "grad_norm": 3.0208614039574773, "learning_rate": 2.8018061888529817e-06, "loss": 0.9006, "step": 6329 }, { "epoch": 0.028022488821993007, "grad_norm": 3.168790151944226, "learning_rate": 2.802248882199301e-06, "loss": 0.6057, "step": 6330 }, { "epoch": 0.028026915755456197, "grad_norm": 2.839191546063642, "learning_rate": 2.8026915755456196e-06, "loss": 0.7049, "step": 6331 }, { "epoch": 0.028031342688919384, "grad_norm": 3.3978186356679725, "learning_rate": 2.803134268891939e-06, "loss": 0.8701, "step": 6332 }, { "epoch": 0.028035769622382575, "grad_norm": 2.9539418188734383, "learning_rate": 2.803576962238258e-06, "loss": 0.8294, "step": 6333 }, { "epoch": 0.028040196555845765, "grad_norm": 2.8270329661754445, "learning_rate": 2.804019655584577e-06, "loss": 0.5318, "step": 6334 }, { "epoch": 0.028044623489308956, "grad_norm": 3.2624978585660283, "learning_rate": 2.8044623489308958e-06, "loss": 0.9045, "step": 6335 }, { "epoch": 0.028049050422772146, "grad_norm": 2.7327964748207485, "learning_rate": 2.804905042277215e-06, "loss": 0.7466, "step": 6336 }, { "epoch": 0.028053477356235337, "grad_norm": 2.5202931513524782, "learning_rate": 2.8053477356235337e-06, "loss": 0.8547, "step": 6337 }, { "epoch": 0.028057904289698527, "grad_norm": 3.030313270202774, "learning_rate": 2.805790428969853e-06, "loss": 0.808, "step": 6338 }, { "epoch": 0.028062331223161714, "grad_norm": 2.835541556403227, "learning_rate": 2.806233122316172e-06, "loss": 0.7723, "step": 6339 }, { "epoch": 0.028066758156624905, "grad_norm": 2.6807553998207525, "learning_rate": 2.8066758156624905e-06, "loss": 0.9228, "step": 6340 }, { "epoch": 0.028071185090088095, "grad_norm": 3.304800145327714, "learning_rate": 2.80711850900881e-06, "loss": 1.0188, "step": 6341 }, { "epoch": 0.028075612023551286, "grad_norm": 2.292102289992031, "learning_rate": 2.8075612023551293e-06, "loss": 0.4345, "step": 6342 }, { "epoch": 0.028080038957014476, "grad_norm": 3.7310149432399613, "learning_rate": 2.8080038957014478e-06, "loss": 1.0514, "step": 6343 }, { "epoch": 0.028084465890477667, "grad_norm": 2.883642595378163, "learning_rate": 2.8084465890477667e-06, "loss": 0.8842, "step": 6344 }, { "epoch": 0.028088892823940857, "grad_norm": 3.334321388093538, "learning_rate": 2.808889282394086e-06, "loss": 0.835, "step": 6345 }, { "epoch": 0.028093319757404048, "grad_norm": 2.657834047157235, "learning_rate": 2.8093319757404046e-06, "loss": 0.7376, "step": 6346 }, { "epoch": 0.028097746690867235, "grad_norm": 2.8402613561417667, "learning_rate": 2.809774669086724e-06, "loss": 0.6444, "step": 6347 }, { "epoch": 0.028102173624330425, "grad_norm": 2.3365681705572845, "learning_rate": 2.810217362433043e-06, "loss": 0.6043, "step": 6348 }, { "epoch": 0.028106600557793616, "grad_norm": 2.7202634698770027, "learning_rate": 2.810660055779362e-06, "loss": 0.9164, "step": 6349 }, { "epoch": 0.028111027491256806, "grad_norm": 2.8586328988765306, "learning_rate": 2.811102749125681e-06, "loss": 0.6293, "step": 6350 }, { "epoch": 0.028115454424719997, "grad_norm": 2.863256103318967, "learning_rate": 2.811545442472e-06, "loss": 0.8366, "step": 6351 }, { "epoch": 0.028119881358183187, "grad_norm": 3.098571255542273, "learning_rate": 2.8119881358183187e-06, "loss": 0.8501, "step": 6352 }, { "epoch": 0.028124308291646378, "grad_norm": 2.671760049504579, "learning_rate": 2.812430829164638e-06, "loss": 0.6388, "step": 6353 }, { "epoch": 0.02812873522510957, "grad_norm": 3.3484073462153487, "learning_rate": 2.812873522510957e-06, "loss": 0.8966, "step": 6354 }, { "epoch": 0.028133162158572755, "grad_norm": 2.8342773115724387, "learning_rate": 2.8133162158572756e-06, "loss": 0.5137, "step": 6355 }, { "epoch": 0.028137589092035946, "grad_norm": 2.3700014515743995, "learning_rate": 2.813758909203595e-06, "loss": 0.5767, "step": 6356 }, { "epoch": 0.028142016025499136, "grad_norm": 3.6149462901447436, "learning_rate": 2.8142016025499143e-06, "loss": 1.0485, "step": 6357 }, { "epoch": 0.028146442958962327, "grad_norm": 2.988189631422305, "learning_rate": 2.814644295896233e-06, "loss": 0.7776, "step": 6358 }, { "epoch": 0.028150869892425517, "grad_norm": 2.976846100074315, "learning_rate": 2.8150869892425518e-06, "loss": 0.8447, "step": 6359 }, { "epoch": 0.028155296825888708, "grad_norm": 2.3420882724361447, "learning_rate": 2.815529682588871e-06, "loss": 0.6666, "step": 6360 }, { "epoch": 0.0281597237593519, "grad_norm": 3.4445734977596993, "learning_rate": 2.8159723759351897e-06, "loss": 1.2874, "step": 6361 }, { "epoch": 0.028164150692815085, "grad_norm": 2.9616083049272857, "learning_rate": 2.816415069281509e-06, "loss": 1.0381, "step": 6362 }, { "epoch": 0.028168577626278276, "grad_norm": 2.647061742988146, "learning_rate": 2.816857762627828e-06, "loss": 0.2764, "step": 6363 }, { "epoch": 0.028173004559741466, "grad_norm": 3.214387984622073, "learning_rate": 2.817300455974147e-06, "loss": 0.889, "step": 6364 }, { "epoch": 0.028177431493204657, "grad_norm": 3.0241372581788704, "learning_rate": 2.817743149320466e-06, "loss": 0.7539, "step": 6365 }, { "epoch": 0.028181858426667847, "grad_norm": 3.1512473492560993, "learning_rate": 2.8181858426667853e-06, "loss": 0.9006, "step": 6366 }, { "epoch": 0.028186285360131038, "grad_norm": 2.8733768157539625, "learning_rate": 2.8186285360131038e-06, "loss": 0.7062, "step": 6367 }, { "epoch": 0.02819071229359423, "grad_norm": 2.479668260647073, "learning_rate": 2.819071229359423e-06, "loss": 0.7453, "step": 6368 }, { "epoch": 0.02819513922705742, "grad_norm": 3.3913603606305456, "learning_rate": 2.819513922705742e-06, "loss": 0.8083, "step": 6369 }, { "epoch": 0.028199566160520606, "grad_norm": 3.5670982048346755, "learning_rate": 2.819956616052061e-06, "loss": 1.0602, "step": 6370 }, { "epoch": 0.028203993093983797, "grad_norm": 3.5489886349030493, "learning_rate": 2.82039930939838e-06, "loss": 0.998, "step": 6371 }, { "epoch": 0.028208420027446987, "grad_norm": 2.727345560252586, "learning_rate": 2.8208420027446994e-06, "loss": 0.81, "step": 6372 }, { "epoch": 0.028212846960910178, "grad_norm": 3.1240227155522318, "learning_rate": 2.821284696091018e-06, "loss": 0.9825, "step": 6373 }, { "epoch": 0.028217273894373368, "grad_norm": 3.189063573876483, "learning_rate": 2.821727389437337e-06, "loss": 0.6114, "step": 6374 }, { "epoch": 0.02822170082783656, "grad_norm": 2.914065556978392, "learning_rate": 2.822170082783656e-06, "loss": 0.6633, "step": 6375 }, { "epoch": 0.02822612776129975, "grad_norm": 2.833255971015585, "learning_rate": 2.8226127761299747e-06, "loss": 0.6938, "step": 6376 }, { "epoch": 0.028230554694762936, "grad_norm": 2.5596612882522187, "learning_rate": 2.823055469476294e-06, "loss": 0.6424, "step": 6377 }, { "epoch": 0.028234981628226127, "grad_norm": 2.707753723357867, "learning_rate": 2.823498162822613e-06, "loss": 0.6021, "step": 6378 }, { "epoch": 0.028239408561689317, "grad_norm": 2.8146622387846105, "learning_rate": 2.823940856168932e-06, "loss": 0.7753, "step": 6379 }, { "epoch": 0.028243835495152508, "grad_norm": 2.7677193531268847, "learning_rate": 2.824383549515251e-06, "loss": 1.1627, "step": 6380 }, { "epoch": 0.028248262428615698, "grad_norm": 3.1556238904359724, "learning_rate": 2.8248262428615703e-06, "loss": 1.0744, "step": 6381 }, { "epoch": 0.02825268936207889, "grad_norm": 3.1549905646880774, "learning_rate": 2.825268936207889e-06, "loss": 0.539, "step": 6382 }, { "epoch": 0.02825711629554208, "grad_norm": 2.841829271848963, "learning_rate": 2.825711629554208e-06, "loss": 0.7075, "step": 6383 }, { "epoch": 0.02826154322900527, "grad_norm": 3.0587111923138597, "learning_rate": 2.826154322900527e-06, "loss": 0.809, "step": 6384 }, { "epoch": 0.028265970162468457, "grad_norm": 3.203037743418815, "learning_rate": 2.826597016246846e-06, "loss": 1.03, "step": 6385 }, { "epoch": 0.028270397095931647, "grad_norm": 3.569707733944333, "learning_rate": 2.827039709593165e-06, "loss": 1.3033, "step": 6386 }, { "epoch": 0.028274824029394838, "grad_norm": 3.189405440384072, "learning_rate": 2.8274824029394844e-06, "loss": 0.7272, "step": 6387 }, { "epoch": 0.028279250962858028, "grad_norm": 2.8382487455544867, "learning_rate": 2.827925096285803e-06, "loss": 0.7869, "step": 6388 }, { "epoch": 0.02828367789632122, "grad_norm": 2.782302337019561, "learning_rate": 2.8283677896321223e-06, "loss": 0.569, "step": 6389 }, { "epoch": 0.02828810482978441, "grad_norm": 2.851156572242131, "learning_rate": 2.8288104829784413e-06, "loss": 0.8216, "step": 6390 }, { "epoch": 0.0282925317632476, "grad_norm": 4.8903522796227765, "learning_rate": 2.8292531763247598e-06, "loss": 1.1644, "step": 6391 }, { "epoch": 0.028296958696710787, "grad_norm": 2.417560177000073, "learning_rate": 2.829695869671079e-06, "loss": 0.8283, "step": 6392 }, { "epoch": 0.028301385630173977, "grad_norm": 2.899549749572373, "learning_rate": 2.8301385630173985e-06, "loss": 0.9762, "step": 6393 }, { "epoch": 0.028305812563637168, "grad_norm": 3.084035245008862, "learning_rate": 2.830581256363717e-06, "loss": 0.5177, "step": 6394 }, { "epoch": 0.02831023949710036, "grad_norm": 4.04434575493491, "learning_rate": 2.831023949710036e-06, "loss": 1.2391, "step": 6395 }, { "epoch": 0.02831466643056355, "grad_norm": 2.612579924139556, "learning_rate": 2.8314666430563554e-06, "loss": 0.7379, "step": 6396 }, { "epoch": 0.02831909336402674, "grad_norm": 3.4665694186275586, "learning_rate": 2.831909336402674e-06, "loss": 0.869, "step": 6397 }, { "epoch": 0.02832352029748993, "grad_norm": 3.002771112367879, "learning_rate": 2.8323520297489933e-06, "loss": 0.949, "step": 6398 }, { "epoch": 0.02832794723095312, "grad_norm": 3.3273589163928246, "learning_rate": 2.832794723095312e-06, "loss": 1.0963, "step": 6399 }, { "epoch": 0.028332374164416307, "grad_norm": 2.7038968832912387, "learning_rate": 2.833237416441631e-06, "loss": 0.9179, "step": 6400 }, { "epoch": 0.028336801097879498, "grad_norm": 3.1049990741850415, "learning_rate": 2.83368010978795e-06, "loss": 0.7997, "step": 6401 }, { "epoch": 0.02834122803134269, "grad_norm": 2.9103770375243485, "learning_rate": 2.8341228031342695e-06, "loss": 0.6484, "step": 6402 }, { "epoch": 0.02834565496480588, "grad_norm": 2.944720197974959, "learning_rate": 2.834565496480588e-06, "loss": 0.9569, "step": 6403 }, { "epoch": 0.02835008189826907, "grad_norm": 3.1127375407655773, "learning_rate": 2.8350081898269074e-06, "loss": 1.0492, "step": 6404 }, { "epoch": 0.02835450883173226, "grad_norm": 2.4522397872360115, "learning_rate": 2.8354508831732263e-06, "loss": 0.6417, "step": 6405 }, { "epoch": 0.02835893576519545, "grad_norm": 2.50551895151204, "learning_rate": 2.835893576519545e-06, "loss": 0.7207, "step": 6406 }, { "epoch": 0.028363362698658637, "grad_norm": 2.5433411481509496, "learning_rate": 2.836336269865864e-06, "loss": 0.7023, "step": 6407 }, { "epoch": 0.028367789632121828, "grad_norm": 3.2088771910774483, "learning_rate": 2.8367789632121836e-06, "loss": 0.9772, "step": 6408 }, { "epoch": 0.02837221656558502, "grad_norm": 2.6827868595764643, "learning_rate": 2.837221656558502e-06, "loss": 0.8002, "step": 6409 }, { "epoch": 0.02837664349904821, "grad_norm": 2.5633427796026234, "learning_rate": 2.837664349904821e-06, "loss": 0.6107, "step": 6410 }, { "epoch": 0.0283810704325114, "grad_norm": 2.8977844497809637, "learning_rate": 2.8381070432511404e-06, "loss": 0.9282, "step": 6411 }, { "epoch": 0.02838549736597459, "grad_norm": 2.881460060285097, "learning_rate": 2.838549736597459e-06, "loss": 0.888, "step": 6412 }, { "epoch": 0.02838992429943778, "grad_norm": 2.725965099834945, "learning_rate": 2.8389924299437783e-06, "loss": 0.8105, "step": 6413 }, { "epoch": 0.02839435123290097, "grad_norm": 2.567248184067708, "learning_rate": 2.8394351232900973e-06, "loss": 0.8267, "step": 6414 }, { "epoch": 0.028398778166364158, "grad_norm": 3.186067242876545, "learning_rate": 2.839877816636416e-06, "loss": 0.7856, "step": 6415 }, { "epoch": 0.02840320509982735, "grad_norm": 2.7075718038496035, "learning_rate": 2.840320509982735e-06, "loss": 0.7028, "step": 6416 }, { "epoch": 0.02840763203329054, "grad_norm": 2.9327937308124796, "learning_rate": 2.8407632033290545e-06, "loss": 0.7939, "step": 6417 }, { "epoch": 0.02841205896675373, "grad_norm": 2.7670398081959884, "learning_rate": 2.841205896675373e-06, "loss": 0.7411, "step": 6418 }, { "epoch": 0.02841648590021692, "grad_norm": 4.495531852699788, "learning_rate": 2.8416485900216924e-06, "loss": 1.3875, "step": 6419 }, { "epoch": 0.02842091283368011, "grad_norm": 2.571531193019465, "learning_rate": 2.8420912833680114e-06, "loss": 0.6959, "step": 6420 }, { "epoch": 0.0284253397671433, "grad_norm": 2.7584119221173315, "learning_rate": 2.84253397671433e-06, "loss": 0.5557, "step": 6421 }, { "epoch": 0.028429766700606488, "grad_norm": 4.7177637508574435, "learning_rate": 2.8429766700606493e-06, "loss": 0.5931, "step": 6422 }, { "epoch": 0.02843419363406968, "grad_norm": 2.8347464706504435, "learning_rate": 2.8434193634069686e-06, "loss": 0.8175, "step": 6423 }, { "epoch": 0.02843862056753287, "grad_norm": 3.8044611988307344, "learning_rate": 2.843862056753287e-06, "loss": 1.0434, "step": 6424 }, { "epoch": 0.02844304750099606, "grad_norm": 2.6896023077974447, "learning_rate": 2.844304750099606e-06, "loss": 0.7789, "step": 6425 }, { "epoch": 0.02844747443445925, "grad_norm": 3.4712712402023915, "learning_rate": 2.8447474434459255e-06, "loss": 0.8282, "step": 6426 }, { "epoch": 0.02845190136792244, "grad_norm": 3.713425675741754, "learning_rate": 2.845190136792244e-06, "loss": 0.645, "step": 6427 }, { "epoch": 0.02845632830138563, "grad_norm": 2.8084287525912255, "learning_rate": 2.8456328301385634e-06, "loss": 0.7524, "step": 6428 }, { "epoch": 0.02846075523484882, "grad_norm": 2.70761068042975, "learning_rate": 2.8460755234848823e-06, "loss": 0.812, "step": 6429 }, { "epoch": 0.02846518216831201, "grad_norm": 2.9964760913710657, "learning_rate": 2.8465182168312013e-06, "loss": 0.6492, "step": 6430 }, { "epoch": 0.0284696091017752, "grad_norm": 3.1321975126706287, "learning_rate": 2.84696091017752e-06, "loss": 0.9117, "step": 6431 }, { "epoch": 0.02847403603523839, "grad_norm": 2.370810779723057, "learning_rate": 2.8474036035238396e-06, "loss": 0.7507, "step": 6432 }, { "epoch": 0.02847846296870158, "grad_norm": 3.0217367597894893, "learning_rate": 2.847846296870158e-06, "loss": 0.9146, "step": 6433 }, { "epoch": 0.02848288990216477, "grad_norm": 3.29934813685153, "learning_rate": 2.8482889902164775e-06, "loss": 0.8968, "step": 6434 }, { "epoch": 0.02848731683562796, "grad_norm": 2.9974273073754323, "learning_rate": 2.8487316835627964e-06, "loss": 0.6999, "step": 6435 }, { "epoch": 0.028491743769091152, "grad_norm": 3.366747996672498, "learning_rate": 2.849174376909115e-06, "loss": 1.1528, "step": 6436 }, { "epoch": 0.028496170702554342, "grad_norm": 2.690815399241675, "learning_rate": 2.8496170702554343e-06, "loss": 0.6124, "step": 6437 }, { "epoch": 0.02850059763601753, "grad_norm": 3.339263890580513, "learning_rate": 2.8500597636017537e-06, "loss": 0.8999, "step": 6438 }, { "epoch": 0.02850502456948072, "grad_norm": 3.1236158894349124, "learning_rate": 2.850502456948072e-06, "loss": 0.5725, "step": 6439 }, { "epoch": 0.02850945150294391, "grad_norm": 2.8533843585279435, "learning_rate": 2.850945150294391e-06, "loss": 0.896, "step": 6440 }, { "epoch": 0.0285138784364071, "grad_norm": 2.365604040773084, "learning_rate": 2.8513878436407105e-06, "loss": 0.7525, "step": 6441 }, { "epoch": 0.02851830536987029, "grad_norm": 3.137508903960271, "learning_rate": 2.851830536987029e-06, "loss": 0.82, "step": 6442 }, { "epoch": 0.028522732303333482, "grad_norm": 2.6202278494947717, "learning_rate": 2.8522732303333484e-06, "loss": 0.8622, "step": 6443 }, { "epoch": 0.028527159236796672, "grad_norm": 2.904442949414452, "learning_rate": 2.8527159236796674e-06, "loss": 0.5933, "step": 6444 }, { "epoch": 0.02853158617025986, "grad_norm": 2.3644184461100566, "learning_rate": 2.8531586170259863e-06, "loss": 0.612, "step": 6445 }, { "epoch": 0.02853601310372305, "grad_norm": 3.333873247767814, "learning_rate": 2.8536013103723053e-06, "loss": 0.8363, "step": 6446 }, { "epoch": 0.02854044003718624, "grad_norm": 3.6149154388201246, "learning_rate": 2.8540440037186246e-06, "loss": 0.811, "step": 6447 }, { "epoch": 0.02854486697064943, "grad_norm": 2.7004264542929755, "learning_rate": 2.854486697064943e-06, "loss": 0.6318, "step": 6448 }, { "epoch": 0.02854929390411262, "grad_norm": 3.5920350878461913, "learning_rate": 2.8549293904112625e-06, "loss": 0.6458, "step": 6449 }, { "epoch": 0.028553720837575812, "grad_norm": 3.3656074220650334, "learning_rate": 2.8553720837575815e-06, "loss": 1.2863, "step": 6450 }, { "epoch": 0.028558147771039002, "grad_norm": 2.8163510150150497, "learning_rate": 2.8558147771039004e-06, "loss": 0.7516, "step": 6451 }, { "epoch": 0.028562574704502193, "grad_norm": 2.5518435126363506, "learning_rate": 2.8562574704502194e-06, "loss": 0.6329, "step": 6452 }, { "epoch": 0.02856700163796538, "grad_norm": 2.682864571936885, "learning_rate": 2.8567001637965387e-06, "loss": 0.4355, "step": 6453 }, { "epoch": 0.02857142857142857, "grad_norm": 2.7964579294155767, "learning_rate": 2.8571428571428573e-06, "loss": 0.7514, "step": 6454 }, { "epoch": 0.02857585550489176, "grad_norm": 2.967496247363309, "learning_rate": 2.857585550489176e-06, "loss": 0.6794, "step": 6455 }, { "epoch": 0.02858028243835495, "grad_norm": 2.9641878885647577, "learning_rate": 2.8580282438354956e-06, "loss": 1.0251, "step": 6456 }, { "epoch": 0.028584709371818142, "grad_norm": 3.8316125792384295, "learning_rate": 2.858470937181814e-06, "loss": 1.1953, "step": 6457 }, { "epoch": 0.028589136305281333, "grad_norm": 2.647925655763692, "learning_rate": 2.8589136305281335e-06, "loss": 0.8861, "step": 6458 }, { "epoch": 0.028593563238744523, "grad_norm": 3.8562417399737514, "learning_rate": 2.8593563238744524e-06, "loss": 1.2661, "step": 6459 }, { "epoch": 0.02859799017220771, "grad_norm": 4.21286271358229, "learning_rate": 2.8597990172207714e-06, "loss": 1.0741, "step": 6460 }, { "epoch": 0.0286024171056709, "grad_norm": 2.2953466059911287, "learning_rate": 2.8602417105670903e-06, "loss": 0.6782, "step": 6461 }, { "epoch": 0.02860684403913409, "grad_norm": 2.8995509418559338, "learning_rate": 2.8606844039134097e-06, "loss": 0.9096, "step": 6462 }, { "epoch": 0.02861127097259728, "grad_norm": 3.1524302222349547, "learning_rate": 2.861127097259728e-06, "loss": 0.8945, "step": 6463 }, { "epoch": 0.028615697906060472, "grad_norm": 2.966921987729771, "learning_rate": 2.8615697906060476e-06, "loss": 0.5029, "step": 6464 }, { "epoch": 0.028620124839523663, "grad_norm": 2.4774286936965337, "learning_rate": 2.8620124839523665e-06, "loss": 0.6841, "step": 6465 }, { "epoch": 0.028624551772986853, "grad_norm": 4.875673833747394, "learning_rate": 2.8624551772986855e-06, "loss": 1.4329, "step": 6466 }, { "epoch": 0.028628978706450044, "grad_norm": 2.794047608839129, "learning_rate": 2.8628978706450044e-06, "loss": 0.8811, "step": 6467 }, { "epoch": 0.02863340563991323, "grad_norm": 2.7256455515502647, "learning_rate": 2.8633405639913238e-06, "loss": 0.8615, "step": 6468 }, { "epoch": 0.02863783257337642, "grad_norm": 2.8432236864083307, "learning_rate": 2.8637832573376423e-06, "loss": 0.8919, "step": 6469 }, { "epoch": 0.02864225950683961, "grad_norm": 3.4778344769474803, "learning_rate": 2.8642259506839617e-06, "loss": 1.1068, "step": 6470 }, { "epoch": 0.028646686440302802, "grad_norm": 2.827908688682769, "learning_rate": 2.8646686440302806e-06, "loss": 0.4225, "step": 6471 }, { "epoch": 0.028651113373765993, "grad_norm": 3.013875893431785, "learning_rate": 2.865111337376599e-06, "loss": 0.5378, "step": 6472 }, { "epoch": 0.028655540307229183, "grad_norm": 2.6788950358241586, "learning_rate": 2.8655540307229185e-06, "loss": 0.7445, "step": 6473 }, { "epoch": 0.028659967240692374, "grad_norm": 2.9116972968962886, "learning_rate": 2.8659967240692375e-06, "loss": 0.6947, "step": 6474 }, { "epoch": 0.02866439417415556, "grad_norm": 3.629508121980898, "learning_rate": 2.8664394174155564e-06, "loss": 1.0155, "step": 6475 }, { "epoch": 0.02866882110761875, "grad_norm": 3.587731690001489, "learning_rate": 2.8668821107618754e-06, "loss": 1.0135, "step": 6476 }, { "epoch": 0.028673248041081942, "grad_norm": 2.9836271728667008, "learning_rate": 2.8673248041081947e-06, "loss": 0.7147, "step": 6477 }, { "epoch": 0.028677674974545132, "grad_norm": 4.066741774239654, "learning_rate": 2.8677674974545133e-06, "loss": 0.9679, "step": 6478 }, { "epoch": 0.028682101908008323, "grad_norm": 2.786170931111802, "learning_rate": 2.8682101908008326e-06, "loss": 0.6749, "step": 6479 }, { "epoch": 0.028686528841471513, "grad_norm": 3.2120685467396117, "learning_rate": 2.8686528841471516e-06, "loss": 0.6584, "step": 6480 }, { "epoch": 0.028690955774934704, "grad_norm": 3.3845065700731958, "learning_rate": 2.8690955774934705e-06, "loss": 1.0135, "step": 6481 }, { "epoch": 0.028695382708397894, "grad_norm": 2.6836105166884754, "learning_rate": 2.8695382708397895e-06, "loss": 0.8391, "step": 6482 }, { "epoch": 0.02869980964186108, "grad_norm": 4.012521057095535, "learning_rate": 2.869980964186109e-06, "loss": 1.2175, "step": 6483 }, { "epoch": 0.028704236575324272, "grad_norm": 3.0925374222428066, "learning_rate": 2.8704236575324274e-06, "loss": 0.9725, "step": 6484 }, { "epoch": 0.028708663508787462, "grad_norm": 2.7357413126213395, "learning_rate": 2.8708663508787467e-06, "loss": 0.6993, "step": 6485 }, { "epoch": 0.028713090442250653, "grad_norm": 3.582519094262149, "learning_rate": 2.8713090442250657e-06, "loss": 0.8053, "step": 6486 }, { "epoch": 0.028717517375713843, "grad_norm": 2.917459689983771, "learning_rate": 2.871751737571384e-06, "loss": 0.8271, "step": 6487 }, { "epoch": 0.028721944309177034, "grad_norm": 2.8016192267736244, "learning_rate": 2.8721944309177036e-06, "loss": 0.5882, "step": 6488 }, { "epoch": 0.028726371242640224, "grad_norm": 2.740966814726341, "learning_rate": 2.872637124264023e-06, "loss": 0.7966, "step": 6489 }, { "epoch": 0.02873079817610341, "grad_norm": 2.5263449394689577, "learning_rate": 2.8730798176103415e-06, "loss": 0.6728, "step": 6490 }, { "epoch": 0.028735225109566602, "grad_norm": 3.097755264032267, "learning_rate": 2.8735225109566604e-06, "loss": 0.8786, "step": 6491 }, { "epoch": 0.028739652043029792, "grad_norm": 3.1156942291335206, "learning_rate": 2.8739652043029798e-06, "loss": 1.15, "step": 6492 }, { "epoch": 0.028744078976492983, "grad_norm": 3.045105077240403, "learning_rate": 2.8744078976492983e-06, "loss": 0.6268, "step": 6493 }, { "epoch": 0.028748505909956173, "grad_norm": 3.2538788430664454, "learning_rate": 2.8748505909956177e-06, "loss": 0.9839, "step": 6494 }, { "epoch": 0.028752932843419364, "grad_norm": 2.672773272530359, "learning_rate": 2.8752932843419366e-06, "loss": 0.7644, "step": 6495 }, { "epoch": 0.028757359776882555, "grad_norm": 2.576297828719088, "learning_rate": 2.8757359776882556e-06, "loss": 0.9119, "step": 6496 }, { "epoch": 0.028761786710345745, "grad_norm": 4.023374732348853, "learning_rate": 2.8761786710345745e-06, "loss": 1.0282, "step": 6497 }, { "epoch": 0.028766213643808932, "grad_norm": 3.071937422142434, "learning_rate": 2.876621364380894e-06, "loss": 0.5209, "step": 6498 }, { "epoch": 0.028770640577272123, "grad_norm": 2.9172936102139233, "learning_rate": 2.8770640577272124e-06, "loss": 0.6448, "step": 6499 }, { "epoch": 0.028775067510735313, "grad_norm": 2.8545901452312776, "learning_rate": 2.8775067510735318e-06, "loss": 0.6661, "step": 6500 }, { "epoch": 0.028779494444198504, "grad_norm": 3.132934504692711, "learning_rate": 2.8779494444198507e-06, "loss": 0.797, "step": 6501 }, { "epoch": 0.028783921377661694, "grad_norm": 4.439944588809975, "learning_rate": 2.8783921377661693e-06, "loss": 0.9881, "step": 6502 }, { "epoch": 0.028788348311124885, "grad_norm": 3.17431928005242, "learning_rate": 2.8788348311124886e-06, "loss": 0.7145, "step": 6503 }, { "epoch": 0.028792775244588075, "grad_norm": 3.0909613467495713, "learning_rate": 2.879277524458808e-06, "loss": 0.7807, "step": 6504 }, { "epoch": 0.028797202178051266, "grad_norm": 3.1875029094683898, "learning_rate": 2.8797202178051265e-06, "loss": 0.7631, "step": 6505 }, { "epoch": 0.028801629111514453, "grad_norm": 3.3251992344381907, "learning_rate": 2.8801629111514455e-06, "loss": 0.9896, "step": 6506 }, { "epoch": 0.028806056044977643, "grad_norm": 2.570377415927251, "learning_rate": 2.880605604497765e-06, "loss": 0.5065, "step": 6507 }, { "epoch": 0.028810482978440834, "grad_norm": 2.9119288320471446, "learning_rate": 2.8810482978440834e-06, "loss": 0.5429, "step": 6508 }, { "epoch": 0.028814909911904024, "grad_norm": 3.0155756593878675, "learning_rate": 2.8814909911904027e-06, "loss": 0.9386, "step": 6509 }, { "epoch": 0.028819336845367215, "grad_norm": 3.0952584707723556, "learning_rate": 2.8819336845367217e-06, "loss": 0.5929, "step": 6510 }, { "epoch": 0.028823763778830405, "grad_norm": 3.0123075927530416, "learning_rate": 2.8823763778830406e-06, "loss": 0.9929, "step": 6511 }, { "epoch": 0.028828190712293596, "grad_norm": 2.7871882170594358, "learning_rate": 2.8828190712293596e-06, "loss": 0.6911, "step": 6512 }, { "epoch": 0.028832617645756783, "grad_norm": 2.613667928918865, "learning_rate": 2.883261764575679e-06, "loss": 0.597, "step": 6513 }, { "epoch": 0.028837044579219973, "grad_norm": 2.58405754025337, "learning_rate": 2.8837044579219975e-06, "loss": 0.4886, "step": 6514 }, { "epoch": 0.028841471512683164, "grad_norm": 3.0286527121271423, "learning_rate": 2.884147151268317e-06, "loss": 0.6268, "step": 6515 }, { "epoch": 0.028845898446146354, "grad_norm": 3.087334079770866, "learning_rate": 2.8845898446146358e-06, "loss": 1.066, "step": 6516 }, { "epoch": 0.028850325379609545, "grad_norm": 4.155134250095059, "learning_rate": 2.8850325379609543e-06, "loss": 1.2312, "step": 6517 }, { "epoch": 0.028854752313072735, "grad_norm": 3.5048430513092717, "learning_rate": 2.8854752313072737e-06, "loss": 1.2854, "step": 6518 }, { "epoch": 0.028859179246535926, "grad_norm": 2.701935098088227, "learning_rate": 2.885917924653593e-06, "loss": 0.6329, "step": 6519 }, { "epoch": 0.028863606179999116, "grad_norm": 3.611094245963218, "learning_rate": 2.8863606179999116e-06, "loss": 1.1481, "step": 6520 }, { "epoch": 0.028868033113462303, "grad_norm": 2.935427771238375, "learning_rate": 2.8868033113462305e-06, "loss": 0.8453, "step": 6521 }, { "epoch": 0.028872460046925494, "grad_norm": 2.7252172325177293, "learning_rate": 2.88724600469255e-06, "loss": 0.6493, "step": 6522 }, { "epoch": 0.028876886980388684, "grad_norm": 2.9274485449363183, "learning_rate": 2.8876886980388684e-06, "loss": 0.6953, "step": 6523 }, { "epoch": 0.028881313913851875, "grad_norm": 2.934795084260256, "learning_rate": 2.8881313913851878e-06, "loss": 0.9677, "step": 6524 }, { "epoch": 0.028885740847315065, "grad_norm": 2.874275731089834, "learning_rate": 2.8885740847315067e-06, "loss": 0.9102, "step": 6525 }, { "epoch": 0.028890167780778256, "grad_norm": 2.74868832889989, "learning_rate": 2.8890167780778257e-06, "loss": 0.9163, "step": 6526 }, { "epoch": 0.028894594714241446, "grad_norm": 2.4436272401559287, "learning_rate": 2.8894594714241446e-06, "loss": 0.5058, "step": 6527 }, { "epoch": 0.028899021647704633, "grad_norm": 2.8748652796855354, "learning_rate": 2.889902164770464e-06, "loss": 0.7143, "step": 6528 }, { "epoch": 0.028903448581167824, "grad_norm": 4.250351543970601, "learning_rate": 2.8903448581167825e-06, "loss": 0.8738, "step": 6529 }, { "epoch": 0.028907875514631014, "grad_norm": 3.0210605707465894, "learning_rate": 2.890787551463102e-06, "loss": 1.1751, "step": 6530 }, { "epoch": 0.028912302448094205, "grad_norm": 2.8460389906726378, "learning_rate": 2.891230244809421e-06, "loss": 0.6845, "step": 6531 }, { "epoch": 0.028916729381557395, "grad_norm": 3.031602277090719, "learning_rate": 2.8916729381557394e-06, "loss": 0.8491, "step": 6532 }, { "epoch": 0.028921156315020586, "grad_norm": 2.814331979443796, "learning_rate": 2.8921156315020587e-06, "loss": 0.7831, "step": 6533 }, { "epoch": 0.028925583248483776, "grad_norm": 2.8430362369807587, "learning_rate": 2.892558324848378e-06, "loss": 0.8734, "step": 6534 }, { "epoch": 0.028930010181946967, "grad_norm": 3.100343659460957, "learning_rate": 2.8930010181946966e-06, "loss": 0.8903, "step": 6535 }, { "epoch": 0.028934437115410154, "grad_norm": 3.617638803650921, "learning_rate": 2.8934437115410156e-06, "loss": 1.2135, "step": 6536 }, { "epoch": 0.028938864048873345, "grad_norm": 2.7949042754253726, "learning_rate": 2.893886404887335e-06, "loss": 0.6085, "step": 6537 }, { "epoch": 0.028943290982336535, "grad_norm": 3.137081498709568, "learning_rate": 2.8943290982336535e-06, "loss": 0.9003, "step": 6538 }, { "epoch": 0.028947717915799726, "grad_norm": 3.178586267569266, "learning_rate": 2.894771791579973e-06, "loss": 0.8168, "step": 6539 }, { "epoch": 0.028952144849262916, "grad_norm": 3.136635695434097, "learning_rate": 2.8952144849262918e-06, "loss": 0.9662, "step": 6540 }, { "epoch": 0.028956571782726107, "grad_norm": 3.1593019390248305, "learning_rate": 2.8956571782726107e-06, "loss": 0.9798, "step": 6541 }, { "epoch": 0.028960998716189297, "grad_norm": 2.639203900130351, "learning_rate": 2.8960998716189297e-06, "loss": 0.9914, "step": 6542 }, { "epoch": 0.028965425649652484, "grad_norm": 2.6501069473231653, "learning_rate": 2.896542564965249e-06, "loss": 0.6007, "step": 6543 }, { "epoch": 0.028969852583115675, "grad_norm": 3.0433306304578918, "learning_rate": 2.8969852583115676e-06, "loss": 0.7245, "step": 6544 }, { "epoch": 0.028974279516578865, "grad_norm": 2.6893005628459625, "learning_rate": 2.897427951657887e-06, "loss": 0.6213, "step": 6545 }, { "epoch": 0.028978706450042056, "grad_norm": 3.0069305122302445, "learning_rate": 2.897870645004206e-06, "loss": 0.6339, "step": 6546 }, { "epoch": 0.028983133383505246, "grad_norm": 2.9081570969796853, "learning_rate": 2.898313338350525e-06, "loss": 0.6487, "step": 6547 }, { "epoch": 0.028987560316968437, "grad_norm": 2.7346880714501673, "learning_rate": 2.898756031696844e-06, "loss": 0.8796, "step": 6548 }, { "epoch": 0.028991987250431627, "grad_norm": 3.2128924723992753, "learning_rate": 2.899198725043163e-06, "loss": 0.9526, "step": 6549 }, { "epoch": 0.028996414183894818, "grad_norm": 3.1029011027886235, "learning_rate": 2.8996414183894817e-06, "loss": 0.8303, "step": 6550 }, { "epoch": 0.029000841117358005, "grad_norm": 2.9322132298993875, "learning_rate": 2.900084111735801e-06, "loss": 1.0463, "step": 6551 }, { "epoch": 0.029005268050821195, "grad_norm": 3.685730499933722, "learning_rate": 2.90052680508212e-06, "loss": 1.0064, "step": 6552 }, { "epoch": 0.029009694984284386, "grad_norm": 3.250796393343884, "learning_rate": 2.9009694984284385e-06, "loss": 1.1311, "step": 6553 }, { "epoch": 0.029014121917747576, "grad_norm": 2.7874780081058304, "learning_rate": 2.901412191774758e-06, "loss": 0.481, "step": 6554 }, { "epoch": 0.029018548851210767, "grad_norm": 3.144331164371267, "learning_rate": 2.901854885121077e-06, "loss": 0.9774, "step": 6555 }, { "epoch": 0.029022975784673957, "grad_norm": 2.5146582371816026, "learning_rate": 2.902297578467396e-06, "loss": 0.6013, "step": 6556 }, { "epoch": 0.029027402718137148, "grad_norm": 3.6423291195902157, "learning_rate": 2.9027402718137147e-06, "loss": 1.2726, "step": 6557 }, { "epoch": 0.029031829651600335, "grad_norm": 2.593025854659271, "learning_rate": 2.903182965160034e-06, "loss": 0.4797, "step": 6558 }, { "epoch": 0.029036256585063525, "grad_norm": 2.672716236977905, "learning_rate": 2.9036256585063526e-06, "loss": 0.8715, "step": 6559 }, { "epoch": 0.029040683518526716, "grad_norm": 2.9182322077253757, "learning_rate": 2.904068351852672e-06, "loss": 0.7713, "step": 6560 }, { "epoch": 0.029045110451989906, "grad_norm": 2.9770348515230878, "learning_rate": 2.904511045198991e-06, "loss": 0.792, "step": 6561 }, { "epoch": 0.029049537385453097, "grad_norm": 2.69529368084661, "learning_rate": 2.90495373854531e-06, "loss": 0.668, "step": 6562 }, { "epoch": 0.029053964318916287, "grad_norm": 3.0850246343549075, "learning_rate": 2.905396431891629e-06, "loss": 0.8493, "step": 6563 }, { "epoch": 0.029058391252379478, "grad_norm": 3.111978669830556, "learning_rate": 2.9058391252379482e-06, "loss": 0.901, "step": 6564 }, { "epoch": 0.02906281818584267, "grad_norm": 3.427235302143178, "learning_rate": 2.9062818185842667e-06, "loss": 0.9954, "step": 6565 }, { "epoch": 0.029067245119305855, "grad_norm": 3.4101489366636577, "learning_rate": 2.906724511930586e-06, "loss": 0.7759, "step": 6566 }, { "epoch": 0.029071672052769046, "grad_norm": 3.306144648721213, "learning_rate": 2.907167205276905e-06, "loss": 0.8026, "step": 6567 }, { "epoch": 0.029076098986232236, "grad_norm": 2.974934575073074, "learning_rate": 2.9076098986232236e-06, "loss": 0.7933, "step": 6568 }, { "epoch": 0.029080525919695427, "grad_norm": 3.2816409844759815, "learning_rate": 2.908052591969543e-06, "loss": 0.9201, "step": 6569 }, { "epoch": 0.029084952853158617, "grad_norm": 3.465583029189486, "learning_rate": 2.9084952853158623e-06, "loss": 1.0903, "step": 6570 }, { "epoch": 0.029089379786621808, "grad_norm": 2.5655706215407483, "learning_rate": 2.908937978662181e-06, "loss": 0.7025, "step": 6571 }, { "epoch": 0.029093806720085, "grad_norm": 3.317702121192985, "learning_rate": 2.9093806720085e-06, "loss": 1.0815, "step": 6572 }, { "epoch": 0.029098233653548185, "grad_norm": 2.842638385130514, "learning_rate": 2.909823365354819e-06, "loss": 0.8292, "step": 6573 }, { "epoch": 0.029102660587011376, "grad_norm": 2.893579225833732, "learning_rate": 2.9102660587011377e-06, "loss": 0.9407, "step": 6574 }, { "epoch": 0.029107087520474566, "grad_norm": 2.5677970896065925, "learning_rate": 2.910708752047457e-06, "loss": 0.7688, "step": 6575 }, { "epoch": 0.029111514453937757, "grad_norm": 3.320831931799861, "learning_rate": 2.911151445393776e-06, "loss": 1.0213, "step": 6576 }, { "epoch": 0.029115941387400947, "grad_norm": 2.9399262638917496, "learning_rate": 2.911594138740095e-06, "loss": 0.7421, "step": 6577 }, { "epoch": 0.029120368320864138, "grad_norm": 2.486150200287373, "learning_rate": 2.912036832086414e-06, "loss": 0.7073, "step": 6578 }, { "epoch": 0.02912479525432733, "grad_norm": 2.7388149007950577, "learning_rate": 2.9124795254327333e-06, "loss": 0.6114, "step": 6579 }, { "epoch": 0.02912922218779052, "grad_norm": 2.6169299279642915, "learning_rate": 2.912922218779052e-06, "loss": 0.8177, "step": 6580 }, { "epoch": 0.029133649121253706, "grad_norm": 3.1885009781074882, "learning_rate": 2.913364912125371e-06, "loss": 0.8758, "step": 6581 }, { "epoch": 0.029138076054716897, "grad_norm": 2.513585771752577, "learning_rate": 2.91380760547169e-06, "loss": 0.7385, "step": 6582 }, { "epoch": 0.029142502988180087, "grad_norm": 2.653756695213769, "learning_rate": 2.9142502988180086e-06, "loss": 0.5457, "step": 6583 }, { "epoch": 0.029146929921643278, "grad_norm": 2.8492862586975343, "learning_rate": 2.914692992164328e-06, "loss": 0.8626, "step": 6584 }, { "epoch": 0.029151356855106468, "grad_norm": 2.946235535146167, "learning_rate": 2.9151356855106474e-06, "loss": 0.6153, "step": 6585 }, { "epoch": 0.02915578378856966, "grad_norm": 3.2938999517111074, "learning_rate": 2.915578378856966e-06, "loss": 0.7183, "step": 6586 }, { "epoch": 0.02916021072203285, "grad_norm": 3.2113636533283243, "learning_rate": 2.916021072203285e-06, "loss": 0.4281, "step": 6587 }, { "epoch": 0.02916463765549604, "grad_norm": 2.3632685832314873, "learning_rate": 2.9164637655496042e-06, "loss": 0.5661, "step": 6588 }, { "epoch": 0.029169064588959227, "grad_norm": 2.726464409658783, "learning_rate": 2.9169064588959227e-06, "loss": 0.9956, "step": 6589 }, { "epoch": 0.029173491522422417, "grad_norm": 3.122907408947614, "learning_rate": 2.917349152242242e-06, "loss": 0.7652, "step": 6590 }, { "epoch": 0.029177918455885608, "grad_norm": 4.750680161335253, "learning_rate": 2.917791845588561e-06, "loss": 1.3543, "step": 6591 }, { "epoch": 0.029182345389348798, "grad_norm": 3.62753517958974, "learning_rate": 2.91823453893488e-06, "loss": 1.0346, "step": 6592 }, { "epoch": 0.02918677232281199, "grad_norm": 2.6925778732703303, "learning_rate": 2.918677232281199e-06, "loss": 0.5267, "step": 6593 }, { "epoch": 0.02919119925627518, "grad_norm": 3.255043632597592, "learning_rate": 2.9191199256275183e-06, "loss": 1.071, "step": 6594 }, { "epoch": 0.02919562618973837, "grad_norm": 2.870425309040239, "learning_rate": 2.919562618973837e-06, "loss": 0.8543, "step": 6595 }, { "epoch": 0.029200053123201557, "grad_norm": 3.3983468395864276, "learning_rate": 2.9200053123201562e-06, "loss": 0.808, "step": 6596 }, { "epoch": 0.029204480056664747, "grad_norm": 2.4234861829793304, "learning_rate": 2.920448005666475e-06, "loss": 0.5095, "step": 6597 }, { "epoch": 0.029208906990127938, "grad_norm": 2.8409476964817824, "learning_rate": 2.9208906990127937e-06, "loss": 1.0137, "step": 6598 }, { "epoch": 0.029213333923591128, "grad_norm": 2.672943089031463, "learning_rate": 2.921333392359113e-06, "loss": 0.5927, "step": 6599 }, { "epoch": 0.02921776085705432, "grad_norm": 3.1466072467658917, "learning_rate": 2.9217760857054324e-06, "loss": 0.8372, "step": 6600 }, { "epoch": 0.02922218779051751, "grad_norm": 2.754933587014956, "learning_rate": 2.922218779051751e-06, "loss": 0.6664, "step": 6601 }, { "epoch": 0.0292266147239807, "grad_norm": 3.861982993181804, "learning_rate": 2.92266147239807e-06, "loss": 1.1336, "step": 6602 }, { "epoch": 0.02923104165744389, "grad_norm": 2.8942161270070197, "learning_rate": 2.9231041657443893e-06, "loss": 0.8239, "step": 6603 }, { "epoch": 0.029235468590907077, "grad_norm": 2.375472304359663, "learning_rate": 2.923546859090708e-06, "loss": 0.5667, "step": 6604 }, { "epoch": 0.029239895524370268, "grad_norm": 2.787438271232896, "learning_rate": 2.923989552437027e-06, "loss": 0.6498, "step": 6605 }, { "epoch": 0.02924432245783346, "grad_norm": 3.373330335271328, "learning_rate": 2.924432245783346e-06, "loss": 0.7927, "step": 6606 }, { "epoch": 0.02924874939129665, "grad_norm": 2.5639507112099102, "learning_rate": 2.924874939129665e-06, "loss": 0.8053, "step": 6607 }, { "epoch": 0.02925317632475984, "grad_norm": 2.600448043886664, "learning_rate": 2.925317632475984e-06, "loss": 0.5872, "step": 6608 }, { "epoch": 0.02925760325822303, "grad_norm": 2.748134560676255, "learning_rate": 2.9257603258223034e-06, "loss": 0.7694, "step": 6609 }, { "epoch": 0.02926203019168622, "grad_norm": 2.695169572762281, "learning_rate": 2.926203019168622e-06, "loss": 0.6761, "step": 6610 }, { "epoch": 0.029266457125149407, "grad_norm": 2.5124917853966853, "learning_rate": 2.9266457125149413e-06, "loss": 0.7571, "step": 6611 }, { "epoch": 0.029270884058612598, "grad_norm": 3.1939719681194436, "learning_rate": 2.9270884058612602e-06, "loss": 0.8143, "step": 6612 }, { "epoch": 0.02927531099207579, "grad_norm": 3.615255052196171, "learning_rate": 2.9275310992075787e-06, "loss": 1.4671, "step": 6613 }, { "epoch": 0.02927973792553898, "grad_norm": 3.2900138325085497, "learning_rate": 2.927973792553898e-06, "loss": 0.7968, "step": 6614 }, { "epoch": 0.02928416485900217, "grad_norm": 3.2707903006572203, "learning_rate": 2.9284164859002175e-06, "loss": 0.7092, "step": 6615 }, { "epoch": 0.02928859179246536, "grad_norm": 3.5255089260015344, "learning_rate": 2.928859179246536e-06, "loss": 1.202, "step": 6616 }, { "epoch": 0.02929301872592855, "grad_norm": 4.584779735097746, "learning_rate": 2.929301872592855e-06, "loss": 1.3107, "step": 6617 }, { "epoch": 0.02929744565939174, "grad_norm": 4.353224447058061, "learning_rate": 2.9297445659391743e-06, "loss": 1.5009, "step": 6618 }, { "epoch": 0.029301872592854928, "grad_norm": 2.8844888595507308, "learning_rate": 2.930187259285493e-06, "loss": 0.8754, "step": 6619 }, { "epoch": 0.02930629952631812, "grad_norm": 2.773834855679543, "learning_rate": 2.9306299526318122e-06, "loss": 0.79, "step": 6620 }, { "epoch": 0.02931072645978131, "grad_norm": 2.6703383201541016, "learning_rate": 2.931072645978131e-06, "loss": 0.874, "step": 6621 }, { "epoch": 0.0293151533932445, "grad_norm": 2.508180302785868, "learning_rate": 2.93151533932445e-06, "loss": 0.5798, "step": 6622 }, { "epoch": 0.02931958032670769, "grad_norm": 3.3942469501699555, "learning_rate": 2.931958032670769e-06, "loss": 1.1023, "step": 6623 }, { "epoch": 0.02932400726017088, "grad_norm": 3.5224088012574644, "learning_rate": 2.9324007260170884e-06, "loss": 0.8106, "step": 6624 }, { "epoch": 0.02932843419363407, "grad_norm": 3.4850224392706965, "learning_rate": 2.932843419363407e-06, "loss": 0.6938, "step": 6625 }, { "epoch": 0.029332861127097258, "grad_norm": 4.089107683367312, "learning_rate": 2.9332861127097263e-06, "loss": 1.231, "step": 6626 }, { "epoch": 0.02933728806056045, "grad_norm": 2.8545976321894644, "learning_rate": 2.9337288060560453e-06, "loss": 0.7311, "step": 6627 }, { "epoch": 0.02934171499402364, "grad_norm": 2.821931584391023, "learning_rate": 2.9341714994023642e-06, "loss": 0.828, "step": 6628 }, { "epoch": 0.02934614192748683, "grad_norm": 3.4862404095949424, "learning_rate": 2.934614192748683e-06, "loss": 0.9844, "step": 6629 }, { "epoch": 0.02935056886095002, "grad_norm": 2.6067810807050846, "learning_rate": 2.9350568860950025e-06, "loss": 0.6651, "step": 6630 }, { "epoch": 0.02935499579441321, "grad_norm": 2.307892665460926, "learning_rate": 2.935499579441321e-06, "loss": 0.5601, "step": 6631 }, { "epoch": 0.0293594227278764, "grad_norm": 2.7374597589581406, "learning_rate": 2.93594227278764e-06, "loss": 0.8633, "step": 6632 }, { "epoch": 0.02936384966133959, "grad_norm": 2.488207409836583, "learning_rate": 2.9363849661339594e-06, "loss": 0.6506, "step": 6633 }, { "epoch": 0.02936827659480278, "grad_norm": 3.726789983621912, "learning_rate": 2.936827659480278e-06, "loss": 0.868, "step": 6634 }, { "epoch": 0.02937270352826597, "grad_norm": 3.1410682144470354, "learning_rate": 2.9372703528265973e-06, "loss": 0.923, "step": 6635 }, { "epoch": 0.02937713046172916, "grad_norm": 2.643710079089087, "learning_rate": 2.9377130461729162e-06, "loss": 0.7345, "step": 6636 }, { "epoch": 0.02938155739519235, "grad_norm": 2.9932246644754206, "learning_rate": 2.938155739519235e-06, "loss": 0.7351, "step": 6637 }, { "epoch": 0.02938598432865554, "grad_norm": 2.4318986043913484, "learning_rate": 2.938598432865554e-06, "loss": 0.8445, "step": 6638 }, { "epoch": 0.02939041126211873, "grad_norm": 2.779877702028883, "learning_rate": 2.9390411262118735e-06, "loss": 0.7241, "step": 6639 }, { "epoch": 0.02939483819558192, "grad_norm": 2.8396511131480913, "learning_rate": 2.939483819558192e-06, "loss": 0.8089, "step": 6640 }, { "epoch": 0.02939926512904511, "grad_norm": 3.180700682062504, "learning_rate": 2.9399265129045114e-06, "loss": 0.7749, "step": 6641 }, { "epoch": 0.0294036920625083, "grad_norm": 2.8285391684007952, "learning_rate": 2.9403692062508303e-06, "loss": 0.7546, "step": 6642 }, { "epoch": 0.02940811899597149, "grad_norm": 2.7316965338307093, "learning_rate": 2.9408118995971493e-06, "loss": 0.6737, "step": 6643 }, { "epoch": 0.02941254592943468, "grad_norm": 3.4902600814149265, "learning_rate": 2.9412545929434682e-06, "loss": 0.9204, "step": 6644 }, { "epoch": 0.02941697286289787, "grad_norm": 3.1642228505633323, "learning_rate": 2.9416972862897876e-06, "loss": 1.0745, "step": 6645 }, { "epoch": 0.02942139979636106, "grad_norm": 2.609946589207161, "learning_rate": 2.942139979636106e-06, "loss": 0.6736, "step": 6646 }, { "epoch": 0.029425826729824252, "grad_norm": 3.370962760056527, "learning_rate": 2.9425826729824255e-06, "loss": 1.0585, "step": 6647 }, { "epoch": 0.029430253663287442, "grad_norm": 3.59454072686399, "learning_rate": 2.9430253663287444e-06, "loss": 0.5669, "step": 6648 }, { "epoch": 0.02943468059675063, "grad_norm": 4.1627351555001075, "learning_rate": 2.943468059675063e-06, "loss": 1.0217, "step": 6649 }, { "epoch": 0.02943910753021382, "grad_norm": 2.8469802201180845, "learning_rate": 2.9439107530213823e-06, "loss": 0.875, "step": 6650 }, { "epoch": 0.02944353446367701, "grad_norm": 2.9486225214002584, "learning_rate": 2.9443534463677017e-06, "loss": 0.8165, "step": 6651 }, { "epoch": 0.0294479613971402, "grad_norm": 3.3798430989752326, "learning_rate": 2.9447961397140202e-06, "loss": 0.6697, "step": 6652 }, { "epoch": 0.02945238833060339, "grad_norm": 2.9365032210455624, "learning_rate": 2.945238833060339e-06, "loss": 0.591, "step": 6653 }, { "epoch": 0.029456815264066582, "grad_norm": 2.9974680462420213, "learning_rate": 2.9456815264066585e-06, "loss": 0.902, "step": 6654 }, { "epoch": 0.029461242197529772, "grad_norm": 2.490772202879507, "learning_rate": 2.946124219752977e-06, "loss": 0.4808, "step": 6655 }, { "epoch": 0.029465669130992963, "grad_norm": 2.3886168626605935, "learning_rate": 2.9465669130992964e-06, "loss": 0.6453, "step": 6656 }, { "epoch": 0.02947009606445615, "grad_norm": 2.858630216194146, "learning_rate": 2.9470096064456154e-06, "loss": 0.737, "step": 6657 }, { "epoch": 0.02947452299791934, "grad_norm": 3.212002160106771, "learning_rate": 2.9474522997919343e-06, "loss": 0.875, "step": 6658 }, { "epoch": 0.02947894993138253, "grad_norm": 2.396343876442365, "learning_rate": 2.9478949931382533e-06, "loss": 0.5866, "step": 6659 }, { "epoch": 0.02948337686484572, "grad_norm": 2.6572627592985003, "learning_rate": 2.9483376864845726e-06, "loss": 0.5177, "step": 6660 }, { "epoch": 0.029487803798308912, "grad_norm": 2.931773855271273, "learning_rate": 2.948780379830891e-06, "loss": 0.8412, "step": 6661 }, { "epoch": 0.029492230731772102, "grad_norm": 2.807156685478042, "learning_rate": 2.9492230731772105e-06, "loss": 0.7171, "step": 6662 }, { "epoch": 0.029496657665235293, "grad_norm": 3.4628248511735804, "learning_rate": 2.9496657665235295e-06, "loss": 0.791, "step": 6663 }, { "epoch": 0.02950108459869848, "grad_norm": 3.6593421849889993, "learning_rate": 2.950108459869848e-06, "loss": 0.9003, "step": 6664 }, { "epoch": 0.02950551153216167, "grad_norm": 3.1683332732001106, "learning_rate": 2.9505511532161674e-06, "loss": 0.8066, "step": 6665 }, { "epoch": 0.02950993846562486, "grad_norm": 2.7980586898203015, "learning_rate": 2.9509938465624867e-06, "loss": 0.7761, "step": 6666 }, { "epoch": 0.02951436539908805, "grad_norm": 4.007337542434652, "learning_rate": 2.9514365399088053e-06, "loss": 0.9721, "step": 6667 }, { "epoch": 0.029518792332551242, "grad_norm": 2.584332624324193, "learning_rate": 2.9518792332551242e-06, "loss": 0.7705, "step": 6668 }, { "epoch": 0.029523219266014433, "grad_norm": 2.695580874731333, "learning_rate": 2.9523219266014436e-06, "loss": 0.9644, "step": 6669 }, { "epoch": 0.029527646199477623, "grad_norm": 3.0598805567862026, "learning_rate": 2.952764619947762e-06, "loss": 0.8521, "step": 6670 }, { "epoch": 0.029532073132940814, "grad_norm": 2.7547874389207987, "learning_rate": 2.9532073132940815e-06, "loss": 0.6642, "step": 6671 }, { "epoch": 0.029536500066404, "grad_norm": 2.769729518895553, "learning_rate": 2.9536500066404004e-06, "loss": 0.9173, "step": 6672 }, { "epoch": 0.02954092699986719, "grad_norm": 3.8157742814767017, "learning_rate": 2.9540926999867194e-06, "loss": 1.2652, "step": 6673 }, { "epoch": 0.02954535393333038, "grad_norm": 3.95935557685118, "learning_rate": 2.9545353933330383e-06, "loss": 1.1031, "step": 6674 }, { "epoch": 0.029549780866793572, "grad_norm": 2.9701343299741767, "learning_rate": 2.9549780866793577e-06, "loss": 0.8096, "step": 6675 }, { "epoch": 0.029554207800256763, "grad_norm": 3.0145028271927057, "learning_rate": 2.9554207800256762e-06, "loss": 0.5327, "step": 6676 }, { "epoch": 0.029558634733719953, "grad_norm": 3.3165789967560393, "learning_rate": 2.9558634733719956e-06, "loss": 0.6174, "step": 6677 }, { "epoch": 0.029563061667183144, "grad_norm": 3.327450060620728, "learning_rate": 2.9563061667183145e-06, "loss": 1.0742, "step": 6678 }, { "epoch": 0.02956748860064633, "grad_norm": 3.9433797073657164, "learning_rate": 2.956748860064633e-06, "loss": 1.2749, "step": 6679 }, { "epoch": 0.02957191553410952, "grad_norm": 3.488724481104261, "learning_rate": 2.9571915534109524e-06, "loss": 1.0266, "step": 6680 }, { "epoch": 0.02957634246757271, "grad_norm": 3.5944903476581556, "learning_rate": 2.957634246757272e-06, "loss": 0.9638, "step": 6681 }, { "epoch": 0.029580769401035902, "grad_norm": 2.53173125627027, "learning_rate": 2.9580769401035903e-06, "loss": 0.8255, "step": 6682 }, { "epoch": 0.029585196334499093, "grad_norm": 2.7610650464376536, "learning_rate": 2.9585196334499093e-06, "loss": 0.7064, "step": 6683 }, { "epoch": 0.029589623267962283, "grad_norm": 2.7342934591145838, "learning_rate": 2.9589623267962286e-06, "loss": 0.8313, "step": 6684 }, { "epoch": 0.029594050201425474, "grad_norm": 3.0526781758347967, "learning_rate": 2.959405020142547e-06, "loss": 0.8975, "step": 6685 }, { "epoch": 0.029598477134888664, "grad_norm": 2.9598840983332058, "learning_rate": 2.9598477134888665e-06, "loss": 0.582, "step": 6686 }, { "epoch": 0.02960290406835185, "grad_norm": 3.014103610652749, "learning_rate": 2.9602904068351855e-06, "loss": 0.667, "step": 6687 }, { "epoch": 0.029607331001815042, "grad_norm": 2.6726060358193306, "learning_rate": 2.9607331001815044e-06, "loss": 0.8428, "step": 6688 }, { "epoch": 0.029611757935278232, "grad_norm": 3.8726647134843404, "learning_rate": 2.9611757935278234e-06, "loss": 0.7667, "step": 6689 }, { "epoch": 0.029616184868741423, "grad_norm": 3.3295956645216136, "learning_rate": 2.9616184868741427e-06, "loss": 0.679, "step": 6690 }, { "epoch": 0.029620611802204613, "grad_norm": 2.4033982588106437, "learning_rate": 2.9620611802204613e-06, "loss": 0.6547, "step": 6691 }, { "epoch": 0.029625038735667804, "grad_norm": 3.70107210238619, "learning_rate": 2.9625038735667806e-06, "loss": 1.2757, "step": 6692 }, { "epoch": 0.029629465669130994, "grad_norm": 2.7268956360411574, "learning_rate": 2.9629465669130996e-06, "loss": 0.9393, "step": 6693 }, { "epoch": 0.02963389260259418, "grad_norm": 3.4937368582467725, "learning_rate": 2.963389260259418e-06, "loss": 0.5288, "step": 6694 }, { "epoch": 0.029638319536057372, "grad_norm": 2.7785473105826552, "learning_rate": 2.9638319536057375e-06, "loss": 0.437, "step": 6695 }, { "epoch": 0.029642746469520562, "grad_norm": 2.8532282997082348, "learning_rate": 2.964274646952057e-06, "loss": 1.0354, "step": 6696 }, { "epoch": 0.029647173402983753, "grad_norm": 3.337154663763085, "learning_rate": 2.9647173402983754e-06, "loss": 1.0447, "step": 6697 }, { "epoch": 0.029651600336446943, "grad_norm": 2.6553299459000743, "learning_rate": 2.9651600336446943e-06, "loss": 0.8297, "step": 6698 }, { "epoch": 0.029656027269910134, "grad_norm": 2.5664025171400615, "learning_rate": 2.9656027269910137e-06, "loss": 0.7249, "step": 6699 }, { "epoch": 0.029660454203373324, "grad_norm": 2.9776979106161106, "learning_rate": 2.9660454203373322e-06, "loss": 0.6661, "step": 6700 }, { "epoch": 0.029664881136836515, "grad_norm": 2.556697706332524, "learning_rate": 2.9664881136836516e-06, "loss": 0.7476, "step": 6701 }, { "epoch": 0.029669308070299702, "grad_norm": 2.3801500073730644, "learning_rate": 2.9669308070299705e-06, "loss": 0.5072, "step": 6702 }, { "epoch": 0.029673735003762892, "grad_norm": 2.3620091553753597, "learning_rate": 2.9673735003762895e-06, "loss": 0.7288, "step": 6703 }, { "epoch": 0.029678161937226083, "grad_norm": 2.816202310522243, "learning_rate": 2.9678161937226084e-06, "loss": 0.915, "step": 6704 }, { "epoch": 0.029682588870689273, "grad_norm": 2.915333997282888, "learning_rate": 2.968258887068928e-06, "loss": 0.8501, "step": 6705 }, { "epoch": 0.029687015804152464, "grad_norm": 3.1538292667270915, "learning_rate": 2.9687015804152463e-06, "loss": 0.7034, "step": 6706 }, { "epoch": 0.029691442737615655, "grad_norm": 3.1778708261092032, "learning_rate": 2.9691442737615657e-06, "loss": 0.696, "step": 6707 }, { "epoch": 0.029695869671078845, "grad_norm": 2.664031800446634, "learning_rate": 2.9695869671078846e-06, "loss": 0.6663, "step": 6708 }, { "epoch": 0.029700296604542032, "grad_norm": 2.4318829703148825, "learning_rate": 2.9700296604542036e-06, "loss": 0.7778, "step": 6709 }, { "epoch": 0.029704723538005223, "grad_norm": 2.5366222940790917, "learning_rate": 2.9704723538005225e-06, "loss": 0.6646, "step": 6710 }, { "epoch": 0.029709150471468413, "grad_norm": 4.838335805840271, "learning_rate": 2.970915047146842e-06, "loss": 1.5797, "step": 6711 }, { "epoch": 0.029713577404931604, "grad_norm": 3.406004426222967, "learning_rate": 2.9713577404931604e-06, "loss": 0.7204, "step": 6712 }, { "epoch": 0.029718004338394794, "grad_norm": 3.015030138496692, "learning_rate": 2.9718004338394794e-06, "loss": 0.7645, "step": 6713 }, { "epoch": 0.029722431271857985, "grad_norm": 2.572804901564409, "learning_rate": 2.9722431271857987e-06, "loss": 0.8154, "step": 6714 }, { "epoch": 0.029726858205321175, "grad_norm": 3.0445605542873486, "learning_rate": 2.9726858205321173e-06, "loss": 0.9712, "step": 6715 }, { "epoch": 0.029731285138784366, "grad_norm": 3.2284065177648156, "learning_rate": 2.9731285138784366e-06, "loss": 0.6266, "step": 6716 }, { "epoch": 0.029735712072247553, "grad_norm": 2.7642292158146393, "learning_rate": 2.9735712072247556e-06, "loss": 0.7361, "step": 6717 }, { "epoch": 0.029740139005710743, "grad_norm": 2.878309254397271, "learning_rate": 2.9740139005710745e-06, "loss": 0.7526, "step": 6718 }, { "epoch": 0.029744565939173934, "grad_norm": 2.6481792920836718, "learning_rate": 2.9744565939173935e-06, "loss": 0.749, "step": 6719 }, { "epoch": 0.029748992872637124, "grad_norm": 2.9662572559542655, "learning_rate": 2.974899287263713e-06, "loss": 0.8566, "step": 6720 }, { "epoch": 0.029753419806100315, "grad_norm": 3.7862693724213883, "learning_rate": 2.9753419806100314e-06, "loss": 1.1352, "step": 6721 }, { "epoch": 0.029757846739563505, "grad_norm": 3.0031357790358046, "learning_rate": 2.9757846739563507e-06, "loss": 0.4567, "step": 6722 }, { "epoch": 0.029762273673026696, "grad_norm": 3.3108315338192402, "learning_rate": 2.9762273673026697e-06, "loss": 0.9986, "step": 6723 }, { "epoch": 0.029766700606489883, "grad_norm": 2.840353586971191, "learning_rate": 2.9766700606489886e-06, "loss": 0.7038, "step": 6724 }, { "epoch": 0.029771127539953073, "grad_norm": 3.6512608997325304, "learning_rate": 2.9771127539953076e-06, "loss": 0.8115, "step": 6725 }, { "epoch": 0.029775554473416264, "grad_norm": 2.648391031928305, "learning_rate": 2.977555447341627e-06, "loss": 0.6052, "step": 6726 }, { "epoch": 0.029779981406879454, "grad_norm": 2.6466165189511006, "learning_rate": 2.9779981406879455e-06, "loss": 0.7558, "step": 6727 }, { "epoch": 0.029784408340342645, "grad_norm": 3.328264956547163, "learning_rate": 2.978440834034265e-06, "loss": 1.0672, "step": 6728 }, { "epoch": 0.029788835273805835, "grad_norm": 2.650188484030597, "learning_rate": 2.978883527380584e-06, "loss": 0.7643, "step": 6729 }, { "epoch": 0.029793262207269026, "grad_norm": 2.5282867632653248, "learning_rate": 2.9793262207269023e-06, "loss": 0.5549, "step": 6730 }, { "epoch": 0.029797689140732216, "grad_norm": 3.641478822330509, "learning_rate": 2.9797689140732217e-06, "loss": 0.9852, "step": 6731 }, { "epoch": 0.029802116074195403, "grad_norm": 2.6427878437594567, "learning_rate": 2.9802116074195406e-06, "loss": 0.666, "step": 6732 }, { "epoch": 0.029806543007658594, "grad_norm": 2.410773647510075, "learning_rate": 2.9806543007658596e-06, "loss": 0.6666, "step": 6733 }, { "epoch": 0.029810969941121784, "grad_norm": 3.402569394124271, "learning_rate": 2.9810969941121785e-06, "loss": 0.9135, "step": 6734 }, { "epoch": 0.029815396874584975, "grad_norm": 3.813838306978684, "learning_rate": 2.981539687458498e-06, "loss": 0.4878, "step": 6735 }, { "epoch": 0.029819823808048165, "grad_norm": 3.8062718832496567, "learning_rate": 2.9819823808048164e-06, "loss": 1.2169, "step": 6736 }, { "epoch": 0.029824250741511356, "grad_norm": 2.7478467283252788, "learning_rate": 2.982425074151136e-06, "loss": 0.7649, "step": 6737 }, { "epoch": 0.029828677674974546, "grad_norm": 3.9309420759394027, "learning_rate": 2.9828677674974547e-06, "loss": 1.055, "step": 6738 }, { "epoch": 0.029833104608437737, "grad_norm": 2.6132291301261468, "learning_rate": 2.9833104608437737e-06, "loss": 0.6098, "step": 6739 }, { "epoch": 0.029837531541900924, "grad_norm": 2.5545091180723993, "learning_rate": 2.9837531541900926e-06, "loss": 0.7325, "step": 6740 }, { "epoch": 0.029841958475364114, "grad_norm": 2.476092089064283, "learning_rate": 2.984195847536412e-06, "loss": 0.7118, "step": 6741 }, { "epoch": 0.029846385408827305, "grad_norm": 2.8020119037554942, "learning_rate": 2.9846385408827305e-06, "loss": 0.7655, "step": 6742 }, { "epoch": 0.029850812342290495, "grad_norm": 3.6399785677541403, "learning_rate": 2.98508123422905e-06, "loss": 0.8136, "step": 6743 }, { "epoch": 0.029855239275753686, "grad_norm": 2.8865961394685007, "learning_rate": 2.985523927575369e-06, "loss": 1.0456, "step": 6744 }, { "epoch": 0.029859666209216876, "grad_norm": 2.7955169154577395, "learning_rate": 2.9859666209216874e-06, "loss": 0.5503, "step": 6745 }, { "epoch": 0.029864093142680067, "grad_norm": 3.497451012346671, "learning_rate": 2.9864093142680067e-06, "loss": 1.1343, "step": 6746 }, { "epoch": 0.029868520076143254, "grad_norm": 2.994409287891423, "learning_rate": 2.986852007614326e-06, "loss": 0.6781, "step": 6747 }, { "epoch": 0.029872947009606445, "grad_norm": 2.9199826789272287, "learning_rate": 2.9872947009606446e-06, "loss": 0.6541, "step": 6748 }, { "epoch": 0.029877373943069635, "grad_norm": 2.72333520207508, "learning_rate": 2.9877373943069636e-06, "loss": 0.7199, "step": 6749 }, { "epoch": 0.029881800876532826, "grad_norm": 2.574410761969953, "learning_rate": 2.988180087653283e-06, "loss": 0.632, "step": 6750 }, { "epoch": 0.029886227809996016, "grad_norm": 4.853609549071806, "learning_rate": 2.9886227809996015e-06, "loss": 1.3604, "step": 6751 }, { "epoch": 0.029890654743459207, "grad_norm": 2.9744382838169883, "learning_rate": 2.989065474345921e-06, "loss": 1.0436, "step": 6752 }, { "epoch": 0.029895081676922397, "grad_norm": 2.7062748847807887, "learning_rate": 2.98950816769224e-06, "loss": 0.5989, "step": 6753 }, { "epoch": 0.029899508610385588, "grad_norm": 2.5506614688763225, "learning_rate": 2.9899508610385587e-06, "loss": 0.6303, "step": 6754 }, { "epoch": 0.029903935543848775, "grad_norm": 2.5535686388899785, "learning_rate": 2.9903935543848777e-06, "loss": 0.6887, "step": 6755 }, { "epoch": 0.029908362477311965, "grad_norm": 2.8124347180996994, "learning_rate": 2.990836247731197e-06, "loss": 0.6454, "step": 6756 }, { "epoch": 0.029912789410775156, "grad_norm": 3.6823697871575205, "learning_rate": 2.9912789410775156e-06, "loss": 0.8312, "step": 6757 }, { "epoch": 0.029917216344238346, "grad_norm": 4.194398274867815, "learning_rate": 2.991721634423835e-06, "loss": 1.1122, "step": 6758 }, { "epoch": 0.029921643277701537, "grad_norm": 2.7507331221256948, "learning_rate": 2.992164327770154e-06, "loss": 0.7717, "step": 6759 }, { "epoch": 0.029926070211164727, "grad_norm": 2.529720170275795, "learning_rate": 2.9926070211164724e-06, "loss": 0.8014, "step": 6760 }, { "epoch": 0.029930497144627918, "grad_norm": 3.426029184952856, "learning_rate": 2.993049714462792e-06, "loss": 0.861, "step": 6761 }, { "epoch": 0.029934924078091105, "grad_norm": 3.45434509049596, "learning_rate": 2.993492407809111e-06, "loss": 0.846, "step": 6762 }, { "epoch": 0.029939351011554295, "grad_norm": 2.8083262771567674, "learning_rate": 2.9939351011554297e-06, "loss": 0.592, "step": 6763 }, { "epoch": 0.029943777945017486, "grad_norm": 2.811081111662768, "learning_rate": 2.9943777945017486e-06, "loss": 0.9145, "step": 6764 }, { "epoch": 0.029948204878480676, "grad_norm": 3.0544746424837537, "learning_rate": 2.994820487848068e-06, "loss": 0.7511, "step": 6765 }, { "epoch": 0.029952631811943867, "grad_norm": 3.26842913467369, "learning_rate": 2.9952631811943865e-06, "loss": 0.7151, "step": 6766 }, { "epoch": 0.029957058745407057, "grad_norm": 2.8925637418196324, "learning_rate": 2.995705874540706e-06, "loss": 0.6293, "step": 6767 }, { "epoch": 0.029961485678870248, "grad_norm": 3.345174133669969, "learning_rate": 2.996148567887025e-06, "loss": 1.0066, "step": 6768 }, { "epoch": 0.029965912612333438, "grad_norm": 2.721604873666105, "learning_rate": 2.996591261233344e-06, "loss": 0.9181, "step": 6769 }, { "epoch": 0.029970339545796625, "grad_norm": 3.1423646917844583, "learning_rate": 2.9970339545796627e-06, "loss": 0.8115, "step": 6770 }, { "epoch": 0.029974766479259816, "grad_norm": 3.8083706284974315, "learning_rate": 2.997476647925982e-06, "loss": 1.2568, "step": 6771 }, { "epoch": 0.029979193412723006, "grad_norm": 2.762465074495418, "learning_rate": 2.9979193412723006e-06, "loss": 0.6534, "step": 6772 }, { "epoch": 0.029983620346186197, "grad_norm": 3.0689103883535687, "learning_rate": 2.99836203461862e-06, "loss": 0.616, "step": 6773 }, { "epoch": 0.029988047279649387, "grad_norm": 2.7743204169447555, "learning_rate": 2.998804727964939e-06, "loss": 0.6844, "step": 6774 }, { "epoch": 0.029992474213112578, "grad_norm": 3.15026776794304, "learning_rate": 2.9992474213112575e-06, "loss": 0.6316, "step": 6775 }, { "epoch": 0.02999690114657577, "grad_norm": 2.810644794393493, "learning_rate": 2.999690114657577e-06, "loss": 0.949, "step": 6776 }, { "epoch": 0.030001328080038955, "grad_norm": 2.4406733810827803, "learning_rate": 3.0001328080038962e-06, "loss": 0.6878, "step": 6777 }, { "epoch": 0.030005755013502146, "grad_norm": 3.6876316993434526, "learning_rate": 3.0005755013502147e-06, "loss": 1.1722, "step": 6778 }, { "epoch": 0.030010181946965336, "grad_norm": 3.2182499408226346, "learning_rate": 3.0010181946965337e-06, "loss": 0.8592, "step": 6779 }, { "epoch": 0.030014608880428527, "grad_norm": 3.0148776075232986, "learning_rate": 3.001460888042853e-06, "loss": 0.8382, "step": 6780 }, { "epoch": 0.030019035813891717, "grad_norm": 2.7723764410462275, "learning_rate": 3.0019035813891716e-06, "loss": 0.6777, "step": 6781 }, { "epoch": 0.030023462747354908, "grad_norm": 3.731456162445722, "learning_rate": 3.002346274735491e-06, "loss": 1.2964, "step": 6782 }, { "epoch": 0.0300278896808181, "grad_norm": 3.0155936522190068, "learning_rate": 3.00278896808181e-06, "loss": 0.797, "step": 6783 }, { "epoch": 0.03003231661428129, "grad_norm": 3.096150334306849, "learning_rate": 3.003231661428129e-06, "loss": 0.7918, "step": 6784 }, { "epoch": 0.030036743547744476, "grad_norm": 2.576255061482275, "learning_rate": 3.003674354774448e-06, "loss": 0.9216, "step": 6785 }, { "epoch": 0.030041170481207666, "grad_norm": 3.2556258752231253, "learning_rate": 3.004117048120767e-06, "loss": 1.0187, "step": 6786 }, { "epoch": 0.030045597414670857, "grad_norm": 3.6357839444849835, "learning_rate": 3.0045597414670857e-06, "loss": 1.1814, "step": 6787 }, { "epoch": 0.030050024348134047, "grad_norm": 3.205762541192068, "learning_rate": 3.005002434813405e-06, "loss": 0.6894, "step": 6788 }, { "epoch": 0.030054451281597238, "grad_norm": 2.7263883325188445, "learning_rate": 3.005445128159724e-06, "loss": 0.7772, "step": 6789 }, { "epoch": 0.03005887821506043, "grad_norm": 5.066488049897675, "learning_rate": 3.0058878215060425e-06, "loss": 1.0301, "step": 6790 }, { "epoch": 0.03006330514852362, "grad_norm": 3.1839968674882524, "learning_rate": 3.006330514852362e-06, "loss": 0.8465, "step": 6791 }, { "epoch": 0.030067732081986806, "grad_norm": 2.4396289656695993, "learning_rate": 3.0067732081986813e-06, "loss": 0.6687, "step": 6792 }, { "epoch": 0.030072159015449997, "grad_norm": 2.5950465391851005, "learning_rate": 3.007215901545e-06, "loss": 0.6033, "step": 6793 }, { "epoch": 0.030076585948913187, "grad_norm": 3.3986770343144026, "learning_rate": 3.0076585948913188e-06, "loss": 1.2369, "step": 6794 }, { "epoch": 0.030081012882376378, "grad_norm": 2.596236160143561, "learning_rate": 3.008101288237638e-06, "loss": 0.6916, "step": 6795 }, { "epoch": 0.030085439815839568, "grad_norm": 2.4624395239750765, "learning_rate": 3.0085439815839566e-06, "loss": 0.5619, "step": 6796 }, { "epoch": 0.03008986674930276, "grad_norm": 3.7162972760821824, "learning_rate": 3.008986674930276e-06, "loss": 1.14, "step": 6797 }, { "epoch": 0.03009429368276595, "grad_norm": 2.9281139518827435, "learning_rate": 3.009429368276595e-06, "loss": 0.7616, "step": 6798 }, { "epoch": 0.03009872061622914, "grad_norm": 3.084124933184535, "learning_rate": 3.009872061622914e-06, "loss": 0.3715, "step": 6799 }, { "epoch": 0.030103147549692327, "grad_norm": 3.0544141056028935, "learning_rate": 3.010314754969233e-06, "loss": 0.9015, "step": 6800 }, { "epoch": 0.030107574483155517, "grad_norm": 4.057823294768587, "learning_rate": 3.0107574483155522e-06, "loss": 0.8911, "step": 6801 }, { "epoch": 0.030112001416618708, "grad_norm": 2.7714514355173683, "learning_rate": 3.0112001416618708e-06, "loss": 0.8746, "step": 6802 }, { "epoch": 0.030116428350081898, "grad_norm": 3.2718984613566, "learning_rate": 3.01164283500819e-06, "loss": 0.9721, "step": 6803 }, { "epoch": 0.03012085528354509, "grad_norm": 2.750442106283417, "learning_rate": 3.012085528354509e-06, "loss": 0.7805, "step": 6804 }, { "epoch": 0.03012528221700828, "grad_norm": 2.6055116258246604, "learning_rate": 3.012528221700828e-06, "loss": 0.782, "step": 6805 }, { "epoch": 0.03012970915047147, "grad_norm": 2.2741059364710563, "learning_rate": 3.012970915047147e-06, "loss": 0.6743, "step": 6806 }, { "epoch": 0.03013413608393466, "grad_norm": 3.704577261586144, "learning_rate": 3.0134136083934663e-06, "loss": 0.8999, "step": 6807 }, { "epoch": 0.030138563017397847, "grad_norm": 2.5515486048096965, "learning_rate": 3.013856301739785e-06, "loss": 0.8108, "step": 6808 }, { "epoch": 0.030142989950861038, "grad_norm": 3.534648850955577, "learning_rate": 3.0142989950861042e-06, "loss": 1.1847, "step": 6809 }, { "epoch": 0.030147416884324228, "grad_norm": 3.3421507859077, "learning_rate": 3.014741688432423e-06, "loss": 0.6954, "step": 6810 }, { "epoch": 0.03015184381778742, "grad_norm": 3.1141198780040606, "learning_rate": 3.0151843817787417e-06, "loss": 0.638, "step": 6811 }, { "epoch": 0.03015627075125061, "grad_norm": 2.5721804552643257, "learning_rate": 3.015627075125061e-06, "loss": 0.5836, "step": 6812 }, { "epoch": 0.0301606976847138, "grad_norm": 3.1928095234823273, "learning_rate": 3.01606976847138e-06, "loss": 1.0037, "step": 6813 }, { "epoch": 0.03016512461817699, "grad_norm": 3.639778485981708, "learning_rate": 3.016512461817699e-06, "loss": 1.0642, "step": 6814 }, { "epoch": 0.030169551551640177, "grad_norm": 2.6205475920898826, "learning_rate": 3.016955155164018e-06, "loss": 0.7074, "step": 6815 }, { "epoch": 0.030173978485103368, "grad_norm": 2.847071755212488, "learning_rate": 3.0173978485103373e-06, "loss": 0.8342, "step": 6816 }, { "epoch": 0.03017840541856656, "grad_norm": 2.758464960905245, "learning_rate": 3.017840541856656e-06, "loss": 0.7814, "step": 6817 }, { "epoch": 0.03018283235202975, "grad_norm": 2.783185980328721, "learning_rate": 3.018283235202975e-06, "loss": 1.0395, "step": 6818 }, { "epoch": 0.03018725928549294, "grad_norm": 3.374550287847168, "learning_rate": 3.018725928549294e-06, "loss": 0.7137, "step": 6819 }, { "epoch": 0.03019168621895613, "grad_norm": 2.678575058647125, "learning_rate": 3.019168621895613e-06, "loss": 0.5004, "step": 6820 }, { "epoch": 0.03019611315241932, "grad_norm": 2.992751047091042, "learning_rate": 3.019611315241932e-06, "loss": 0.8239, "step": 6821 }, { "epoch": 0.03020054008588251, "grad_norm": 3.296000511032174, "learning_rate": 3.0200540085882514e-06, "loss": 0.6757, "step": 6822 }, { "epoch": 0.030204967019345698, "grad_norm": 2.848307767560714, "learning_rate": 3.02049670193457e-06, "loss": 0.894, "step": 6823 }, { "epoch": 0.03020939395280889, "grad_norm": 2.8088560637027378, "learning_rate": 3.0209393952808893e-06, "loss": 0.7841, "step": 6824 }, { "epoch": 0.03021382088627208, "grad_norm": 2.531156288228238, "learning_rate": 3.0213820886272082e-06, "loss": 0.8615, "step": 6825 }, { "epoch": 0.03021824781973527, "grad_norm": 2.9330304994363683, "learning_rate": 3.0218247819735268e-06, "loss": 0.7303, "step": 6826 }, { "epoch": 0.03022267475319846, "grad_norm": 2.874319617730392, "learning_rate": 3.022267475319846e-06, "loss": 0.702, "step": 6827 }, { "epoch": 0.03022710168666165, "grad_norm": 2.3877714974376225, "learning_rate": 3.0227101686661655e-06, "loss": 0.6737, "step": 6828 }, { "epoch": 0.03023152862012484, "grad_norm": 3.025654470629856, "learning_rate": 3.023152862012484e-06, "loss": 0.9139, "step": 6829 }, { "epoch": 0.030235955553588028, "grad_norm": 2.7029393230741734, "learning_rate": 3.023595555358803e-06, "loss": 0.6984, "step": 6830 }, { "epoch": 0.03024038248705122, "grad_norm": 2.8728225465772357, "learning_rate": 3.0240382487051223e-06, "loss": 0.9098, "step": 6831 }, { "epoch": 0.03024480942051441, "grad_norm": 2.6759549516199086, "learning_rate": 3.024480942051441e-06, "loss": 1.0449, "step": 6832 }, { "epoch": 0.0302492363539776, "grad_norm": 2.4869332539928166, "learning_rate": 3.0249236353977602e-06, "loss": 0.7862, "step": 6833 }, { "epoch": 0.03025366328744079, "grad_norm": 3.4237918709571136, "learning_rate": 3.025366328744079e-06, "loss": 0.7203, "step": 6834 }, { "epoch": 0.03025809022090398, "grad_norm": 3.3058885498735258, "learning_rate": 3.025809022090398e-06, "loss": 0.931, "step": 6835 }, { "epoch": 0.03026251715436717, "grad_norm": 3.2603911160186274, "learning_rate": 3.026251715436717e-06, "loss": 0.6576, "step": 6836 }, { "epoch": 0.03026694408783036, "grad_norm": 3.0826702814645772, "learning_rate": 3.0266944087830364e-06, "loss": 0.8462, "step": 6837 }, { "epoch": 0.03027137102129355, "grad_norm": 2.526882070560209, "learning_rate": 3.0271371021293554e-06, "loss": 0.6204, "step": 6838 }, { "epoch": 0.03027579795475674, "grad_norm": 3.536315041252256, "learning_rate": 3.0275797954756743e-06, "loss": 0.9657, "step": 6839 }, { "epoch": 0.03028022488821993, "grad_norm": 2.792508462263812, "learning_rate": 3.0280224888219933e-06, "loss": 0.6176, "step": 6840 }, { "epoch": 0.03028465182168312, "grad_norm": 2.9419541613107976, "learning_rate": 3.0284651821683127e-06, "loss": 0.5119, "step": 6841 }, { "epoch": 0.03028907875514631, "grad_norm": 2.2726311306555913, "learning_rate": 3.028907875514631e-06, "loss": 0.5659, "step": 6842 }, { "epoch": 0.0302935056886095, "grad_norm": 3.536672532750487, "learning_rate": 3.0293505688609505e-06, "loss": 0.5493, "step": 6843 }, { "epoch": 0.03029793262207269, "grad_norm": 2.6810673332032797, "learning_rate": 3.0297932622072695e-06, "loss": 0.7131, "step": 6844 }, { "epoch": 0.03030235955553588, "grad_norm": 2.8704114625465764, "learning_rate": 3.030235955553588e-06, "loss": 0.6478, "step": 6845 }, { "epoch": 0.03030678648899907, "grad_norm": 3.1014857088661616, "learning_rate": 3.0306786488999074e-06, "loss": 0.8349, "step": 6846 }, { "epoch": 0.03031121342246226, "grad_norm": 3.635439223770622, "learning_rate": 3.0311213422462268e-06, "loss": 1.2916, "step": 6847 }, { "epoch": 0.03031564035592545, "grad_norm": 3.0207505905948318, "learning_rate": 3.0315640355925453e-06, "loss": 0.8394, "step": 6848 }, { "epoch": 0.03032006728938864, "grad_norm": 2.7612887167989952, "learning_rate": 3.0320067289388642e-06, "loss": 0.5255, "step": 6849 }, { "epoch": 0.03032449422285183, "grad_norm": 2.4711390769960433, "learning_rate": 3.0324494222851836e-06, "loss": 0.6135, "step": 6850 }, { "epoch": 0.03032892115631502, "grad_norm": 3.906261476326929, "learning_rate": 3.032892115631502e-06, "loss": 0.7903, "step": 6851 }, { "epoch": 0.030333348089778212, "grad_norm": 2.962336799477753, "learning_rate": 3.0333348089778215e-06, "loss": 0.7173, "step": 6852 }, { "epoch": 0.0303377750232414, "grad_norm": 2.576129268056592, "learning_rate": 3.0337775023241404e-06, "loss": 0.5248, "step": 6853 }, { "epoch": 0.03034220195670459, "grad_norm": 3.2625802114487916, "learning_rate": 3.0342201956704594e-06, "loss": 0.7328, "step": 6854 }, { "epoch": 0.03034662889016778, "grad_norm": 2.811190279564735, "learning_rate": 3.0346628890167783e-06, "loss": 0.6189, "step": 6855 }, { "epoch": 0.03035105582363097, "grad_norm": 3.2260981082250995, "learning_rate": 3.0351055823630977e-06, "loss": 0.9233, "step": 6856 }, { "epoch": 0.03035548275709416, "grad_norm": 3.3354624561712996, "learning_rate": 3.0355482757094162e-06, "loss": 0.5115, "step": 6857 }, { "epoch": 0.030359909690557352, "grad_norm": 2.649374124955738, "learning_rate": 3.0359909690557356e-06, "loss": 0.5953, "step": 6858 }, { "epoch": 0.030364336624020542, "grad_norm": 3.6979069610850677, "learning_rate": 3.0364336624020545e-06, "loss": 0.9175, "step": 6859 }, { "epoch": 0.03036876355748373, "grad_norm": 2.6508655675197628, "learning_rate": 3.036876355748373e-06, "loss": 0.6431, "step": 6860 }, { "epoch": 0.03037319049094692, "grad_norm": 3.8819985753959223, "learning_rate": 3.0373190490946924e-06, "loss": 1.2604, "step": 6861 }, { "epoch": 0.03037761742441011, "grad_norm": 2.5125447604415485, "learning_rate": 3.037761742441012e-06, "loss": 0.5805, "step": 6862 }, { "epoch": 0.0303820443578733, "grad_norm": 2.6437666054561837, "learning_rate": 3.0382044357873303e-06, "loss": 0.9341, "step": 6863 }, { "epoch": 0.03038647129133649, "grad_norm": 3.9856824131600126, "learning_rate": 3.0386471291336493e-06, "loss": 1.1025, "step": 6864 }, { "epoch": 0.030390898224799682, "grad_norm": 2.636082392843153, "learning_rate": 3.0390898224799687e-06, "loss": 0.8746, "step": 6865 }, { "epoch": 0.030395325158262872, "grad_norm": 2.788120400876928, "learning_rate": 3.039532515826287e-06, "loss": 0.6244, "step": 6866 }, { "epoch": 0.030399752091726063, "grad_norm": 3.9704371800379503, "learning_rate": 3.0399752091726065e-06, "loss": 1.2839, "step": 6867 }, { "epoch": 0.03040417902518925, "grad_norm": 3.0828773073263496, "learning_rate": 3.0404179025189255e-06, "loss": 1.0634, "step": 6868 }, { "epoch": 0.03040860595865244, "grad_norm": 3.1984148734408455, "learning_rate": 3.0408605958652444e-06, "loss": 0.8618, "step": 6869 }, { "epoch": 0.03041303289211563, "grad_norm": 3.6588785082956097, "learning_rate": 3.0413032892115634e-06, "loss": 1.143, "step": 6870 }, { "epoch": 0.03041745982557882, "grad_norm": 2.6007522004395707, "learning_rate": 3.0417459825578828e-06, "loss": 0.7169, "step": 6871 }, { "epoch": 0.030421886759042012, "grad_norm": 3.5304388565265223, "learning_rate": 3.0421886759042013e-06, "loss": 0.9905, "step": 6872 }, { "epoch": 0.030426313692505202, "grad_norm": 2.9635849382371875, "learning_rate": 3.0426313692505207e-06, "loss": 0.9312, "step": 6873 }, { "epoch": 0.030430740625968393, "grad_norm": 3.1614982281264354, "learning_rate": 3.0430740625968396e-06, "loss": 1.2502, "step": 6874 }, { "epoch": 0.030435167559431583, "grad_norm": 3.4627187517945224, "learning_rate": 3.043516755943158e-06, "loss": 0.8325, "step": 6875 }, { "epoch": 0.03043959449289477, "grad_norm": 2.6685158419881154, "learning_rate": 3.0439594492894775e-06, "loss": 0.6128, "step": 6876 }, { "epoch": 0.03044402142635796, "grad_norm": 2.860656469581736, "learning_rate": 3.044402142635797e-06, "loss": 0.5994, "step": 6877 }, { "epoch": 0.03044844835982115, "grad_norm": 2.651946057237455, "learning_rate": 3.0448448359821154e-06, "loss": 0.5955, "step": 6878 }, { "epoch": 0.030452875293284342, "grad_norm": 3.0489923545514497, "learning_rate": 3.0452875293284343e-06, "loss": 0.7637, "step": 6879 }, { "epoch": 0.030457302226747533, "grad_norm": 3.133243126295013, "learning_rate": 3.0457302226747537e-06, "loss": 0.7627, "step": 6880 }, { "epoch": 0.030461729160210723, "grad_norm": 2.5803186142895465, "learning_rate": 3.0461729160210722e-06, "loss": 0.7578, "step": 6881 }, { "epoch": 0.030466156093673914, "grad_norm": 2.576637060023821, "learning_rate": 3.0466156093673916e-06, "loss": 0.6965, "step": 6882 }, { "epoch": 0.0304705830271371, "grad_norm": 3.185984827700627, "learning_rate": 3.0470583027137105e-06, "loss": 0.5989, "step": 6883 }, { "epoch": 0.03047500996060029, "grad_norm": 3.1089744693964567, "learning_rate": 3.0475009960600295e-06, "loss": 0.7646, "step": 6884 }, { "epoch": 0.03047943689406348, "grad_norm": 2.657591184537782, "learning_rate": 3.0479436894063484e-06, "loss": 0.4997, "step": 6885 }, { "epoch": 0.030483863827526672, "grad_norm": 3.063333470687638, "learning_rate": 3.048386382752668e-06, "loss": 0.6617, "step": 6886 }, { "epoch": 0.030488290760989863, "grad_norm": 3.2661118480188494, "learning_rate": 3.0488290760989863e-06, "loss": 0.7895, "step": 6887 }, { "epoch": 0.030492717694453053, "grad_norm": 3.475882327119201, "learning_rate": 3.0492717694453057e-06, "loss": 0.9473, "step": 6888 }, { "epoch": 0.030497144627916244, "grad_norm": 2.7885544418969097, "learning_rate": 3.0497144627916247e-06, "loss": 0.8825, "step": 6889 }, { "epoch": 0.030501571561379434, "grad_norm": 2.8781466288963142, "learning_rate": 3.050157156137943e-06, "loss": 0.5999, "step": 6890 }, { "epoch": 0.03050599849484262, "grad_norm": 2.7359756844187038, "learning_rate": 3.0505998494842625e-06, "loss": 0.7089, "step": 6891 }, { "epoch": 0.03051042542830581, "grad_norm": 3.468786463051958, "learning_rate": 3.051042542830582e-06, "loss": 0.772, "step": 6892 }, { "epoch": 0.030514852361769002, "grad_norm": 2.919075534733167, "learning_rate": 3.0514852361769004e-06, "loss": 0.744, "step": 6893 }, { "epoch": 0.030519279295232193, "grad_norm": 3.0926596731740803, "learning_rate": 3.0519279295232194e-06, "loss": 0.8413, "step": 6894 }, { "epoch": 0.030523706228695383, "grad_norm": 2.4928372450350036, "learning_rate": 3.0523706228695388e-06, "loss": 0.6325, "step": 6895 }, { "epoch": 0.030528133162158574, "grad_norm": 2.8676786891486485, "learning_rate": 3.0528133162158573e-06, "loss": 0.7702, "step": 6896 }, { "epoch": 0.030532560095621764, "grad_norm": 3.2203523744927267, "learning_rate": 3.0532560095621767e-06, "loss": 1.1277, "step": 6897 }, { "epoch": 0.03053698702908495, "grad_norm": 2.9181324212677793, "learning_rate": 3.0536987029084956e-06, "loss": 0.7542, "step": 6898 }, { "epoch": 0.030541413962548142, "grad_norm": 2.7881126456775465, "learning_rate": 3.0541413962548145e-06, "loss": 0.5582, "step": 6899 }, { "epoch": 0.030545840896011332, "grad_norm": 2.6228784097497106, "learning_rate": 3.0545840896011335e-06, "loss": 0.7228, "step": 6900 }, { "epoch": 0.030550267829474523, "grad_norm": 2.6193696647642395, "learning_rate": 3.055026782947453e-06, "loss": 0.5354, "step": 6901 }, { "epoch": 0.030554694762937713, "grad_norm": 2.8774346346742967, "learning_rate": 3.0554694762937714e-06, "loss": 0.6456, "step": 6902 }, { "epoch": 0.030559121696400904, "grad_norm": 2.6431226349987336, "learning_rate": 3.0559121696400908e-06, "loss": 0.8646, "step": 6903 }, { "epoch": 0.030563548629864094, "grad_norm": 2.951745109390637, "learning_rate": 3.0563548629864097e-06, "loss": 1.0296, "step": 6904 }, { "epoch": 0.030567975563327285, "grad_norm": 3.130658504995821, "learning_rate": 3.0567975563327287e-06, "loss": 0.6403, "step": 6905 }, { "epoch": 0.030572402496790472, "grad_norm": 2.3195382277343786, "learning_rate": 3.0572402496790476e-06, "loss": 0.7396, "step": 6906 }, { "epoch": 0.030576829430253662, "grad_norm": 2.7277515293744714, "learning_rate": 3.057682943025367e-06, "loss": 0.841, "step": 6907 }, { "epoch": 0.030581256363716853, "grad_norm": 3.191246216573534, "learning_rate": 3.0581256363716855e-06, "loss": 0.8584, "step": 6908 }, { "epoch": 0.030585683297180043, "grad_norm": 4.228772772668339, "learning_rate": 3.058568329718005e-06, "loss": 0.8197, "step": 6909 }, { "epoch": 0.030590110230643234, "grad_norm": 4.169248290124791, "learning_rate": 3.059011023064324e-06, "loss": 1.1888, "step": 6910 }, { "epoch": 0.030594537164106424, "grad_norm": 2.5841038739728286, "learning_rate": 3.0594537164106423e-06, "loss": 0.7117, "step": 6911 }, { "epoch": 0.030598964097569615, "grad_norm": 3.4931378721083277, "learning_rate": 3.0598964097569617e-06, "loss": 0.8281, "step": 6912 }, { "epoch": 0.030603391031032802, "grad_norm": 3.3829781701888537, "learning_rate": 3.0603391031032807e-06, "loss": 1.1702, "step": 6913 }, { "epoch": 0.030607817964495992, "grad_norm": 3.135531564615524, "learning_rate": 3.0607817964495996e-06, "loss": 0.9415, "step": 6914 }, { "epoch": 0.030612244897959183, "grad_norm": 3.865252477581645, "learning_rate": 3.0612244897959185e-06, "loss": 1.2283, "step": 6915 }, { "epoch": 0.030616671831422373, "grad_norm": 3.4228939656443385, "learning_rate": 3.061667183142238e-06, "loss": 0.8811, "step": 6916 }, { "epoch": 0.030621098764885564, "grad_norm": 3.191208839195066, "learning_rate": 3.0621098764885564e-06, "loss": 0.7061, "step": 6917 }, { "epoch": 0.030625525698348754, "grad_norm": 2.9199731933759847, "learning_rate": 3.062552569834876e-06, "loss": 0.8874, "step": 6918 }, { "epoch": 0.030629952631811945, "grad_norm": 2.4378492107246705, "learning_rate": 3.0629952631811948e-06, "loss": 0.499, "step": 6919 }, { "epoch": 0.030634379565275136, "grad_norm": 2.993934455315986, "learning_rate": 3.0634379565275137e-06, "loss": 0.7642, "step": 6920 }, { "epoch": 0.030638806498738323, "grad_norm": 3.20499125832017, "learning_rate": 3.0638806498738327e-06, "loss": 0.8786, "step": 6921 }, { "epoch": 0.030643233432201513, "grad_norm": 3.8313717069338415, "learning_rate": 3.064323343220152e-06, "loss": 0.9789, "step": 6922 }, { "epoch": 0.030647660365664704, "grad_norm": 2.5808768553698416, "learning_rate": 3.0647660365664705e-06, "loss": 0.7394, "step": 6923 }, { "epoch": 0.030652087299127894, "grad_norm": 2.5015833645279444, "learning_rate": 3.06520872991279e-06, "loss": 0.6212, "step": 6924 }, { "epoch": 0.030656514232591085, "grad_norm": 2.8698328642704487, "learning_rate": 3.065651423259109e-06, "loss": 0.6978, "step": 6925 }, { "epoch": 0.030660941166054275, "grad_norm": 3.830612939977726, "learning_rate": 3.0660941166054274e-06, "loss": 1.5712, "step": 6926 }, { "epoch": 0.030665368099517466, "grad_norm": 3.661041982232377, "learning_rate": 3.0665368099517468e-06, "loss": 1.1255, "step": 6927 }, { "epoch": 0.030669795032980653, "grad_norm": 2.4774163897347847, "learning_rate": 3.066979503298066e-06, "loss": 0.5476, "step": 6928 }, { "epoch": 0.030674221966443843, "grad_norm": 2.6335356982803537, "learning_rate": 3.0674221966443847e-06, "loss": 0.6151, "step": 6929 }, { "epoch": 0.030678648899907034, "grad_norm": 2.5959038469091635, "learning_rate": 3.0678648899907036e-06, "loss": 0.3068, "step": 6930 }, { "epoch": 0.030683075833370224, "grad_norm": 2.832293766782864, "learning_rate": 3.068307583337023e-06, "loss": 0.6665, "step": 6931 }, { "epoch": 0.030687502766833415, "grad_norm": 2.863497871435491, "learning_rate": 3.0687502766833415e-06, "loss": 0.9181, "step": 6932 }, { "epoch": 0.030691929700296605, "grad_norm": 3.059868989846533, "learning_rate": 3.069192970029661e-06, "loss": 0.7974, "step": 6933 }, { "epoch": 0.030696356633759796, "grad_norm": 3.407577129516583, "learning_rate": 3.06963566337598e-06, "loss": 1.0224, "step": 6934 }, { "epoch": 0.030700783567222986, "grad_norm": 2.577270285102182, "learning_rate": 3.0700783567222988e-06, "loss": 0.6689, "step": 6935 }, { "epoch": 0.030705210500686173, "grad_norm": 2.6211234204694476, "learning_rate": 3.0705210500686177e-06, "loss": 0.6953, "step": 6936 }, { "epoch": 0.030709637434149364, "grad_norm": 3.6768557688119468, "learning_rate": 3.070963743414937e-06, "loss": 1.0712, "step": 6937 }, { "epoch": 0.030714064367612554, "grad_norm": 2.73884566171171, "learning_rate": 3.0714064367612556e-06, "loss": 0.3985, "step": 6938 }, { "epoch": 0.030718491301075745, "grad_norm": 2.9042951419050618, "learning_rate": 3.071849130107575e-06, "loss": 0.9448, "step": 6939 }, { "epoch": 0.030722918234538935, "grad_norm": 3.7181558368501717, "learning_rate": 3.072291823453894e-06, "loss": 0.759, "step": 6940 }, { "epoch": 0.030727345168002126, "grad_norm": 2.4843550784874466, "learning_rate": 3.0727345168002124e-06, "loss": 0.7295, "step": 6941 }, { "epoch": 0.030731772101465316, "grad_norm": 3.0956389426103805, "learning_rate": 3.073177210146532e-06, "loss": 0.8126, "step": 6942 }, { "epoch": 0.030736199034928503, "grad_norm": 2.6527088776689087, "learning_rate": 3.073619903492851e-06, "loss": 0.6438, "step": 6943 }, { "epoch": 0.030740625968391694, "grad_norm": 2.25663835211193, "learning_rate": 3.0740625968391697e-06, "loss": 0.6576, "step": 6944 }, { "epoch": 0.030745052901854884, "grad_norm": 3.244373451382959, "learning_rate": 3.0745052901854887e-06, "loss": 0.899, "step": 6945 }, { "epoch": 0.030749479835318075, "grad_norm": 2.799333725852939, "learning_rate": 3.074947983531808e-06, "loss": 0.88, "step": 6946 }, { "epoch": 0.030753906768781265, "grad_norm": 2.5594270141744615, "learning_rate": 3.0753906768781265e-06, "loss": 0.6049, "step": 6947 }, { "epoch": 0.030758333702244456, "grad_norm": 2.589677370649066, "learning_rate": 3.075833370224446e-06, "loss": 0.7402, "step": 6948 }, { "epoch": 0.030762760635707646, "grad_norm": 3.0763807137125845, "learning_rate": 3.076276063570765e-06, "loss": 0.6641, "step": 6949 }, { "epoch": 0.030767187569170837, "grad_norm": 3.355201075671575, "learning_rate": 3.076718756917084e-06, "loss": 0.9245, "step": 6950 }, { "epoch": 0.030771614502634024, "grad_norm": 3.0667950536971147, "learning_rate": 3.0771614502634028e-06, "loss": 0.6447, "step": 6951 }, { "epoch": 0.030776041436097214, "grad_norm": 3.2050116011967122, "learning_rate": 3.077604143609722e-06, "loss": 1.1186, "step": 6952 }, { "epoch": 0.030780468369560405, "grad_norm": 3.6624947831528427, "learning_rate": 3.0780468369560407e-06, "loss": 1.0103, "step": 6953 }, { "epoch": 0.030784895303023595, "grad_norm": 2.6298849936791235, "learning_rate": 3.07848953030236e-06, "loss": 0.7702, "step": 6954 }, { "epoch": 0.030789322236486786, "grad_norm": 2.213355141246207, "learning_rate": 3.078932223648679e-06, "loss": 0.5687, "step": 6955 }, { "epoch": 0.030793749169949976, "grad_norm": 3.409513242617963, "learning_rate": 3.0793749169949975e-06, "loss": 1.0374, "step": 6956 }, { "epoch": 0.030798176103413167, "grad_norm": 2.718407268016861, "learning_rate": 3.079817610341317e-06, "loss": 0.6241, "step": 6957 }, { "epoch": 0.030802603036876357, "grad_norm": 2.832585522580035, "learning_rate": 3.0802603036876362e-06, "loss": 0.9528, "step": 6958 }, { "epoch": 0.030807029970339544, "grad_norm": 2.700515065288063, "learning_rate": 3.0807029970339548e-06, "loss": 0.6458, "step": 6959 }, { "epoch": 0.030811456903802735, "grad_norm": 2.7022929989007616, "learning_rate": 3.0811456903802737e-06, "loss": 0.8577, "step": 6960 }, { "epoch": 0.030815883837265926, "grad_norm": 2.729471141300736, "learning_rate": 3.081588383726593e-06, "loss": 0.6852, "step": 6961 }, { "epoch": 0.030820310770729116, "grad_norm": 3.8352106728001623, "learning_rate": 3.0820310770729116e-06, "loss": 1.4828, "step": 6962 }, { "epoch": 0.030824737704192307, "grad_norm": 3.294444913683691, "learning_rate": 3.082473770419231e-06, "loss": 0.9205, "step": 6963 }, { "epoch": 0.030829164637655497, "grad_norm": 3.1458669620785282, "learning_rate": 3.08291646376555e-06, "loss": 0.6746, "step": 6964 }, { "epoch": 0.030833591571118688, "grad_norm": 3.0728862996458637, "learning_rate": 3.083359157111869e-06, "loss": 0.6945, "step": 6965 }, { "epoch": 0.030838018504581875, "grad_norm": 4.169115929476317, "learning_rate": 3.083801850458188e-06, "loss": 1.1563, "step": 6966 }, { "epoch": 0.030842445438045065, "grad_norm": 3.8825175798291083, "learning_rate": 3.084244543804507e-06, "loss": 1.2984, "step": 6967 }, { "epoch": 0.030846872371508256, "grad_norm": 2.6685775658472473, "learning_rate": 3.0846872371508257e-06, "loss": 0.8677, "step": 6968 }, { "epoch": 0.030851299304971446, "grad_norm": 3.0152677280935767, "learning_rate": 3.085129930497145e-06, "loss": 1.0112, "step": 6969 }, { "epoch": 0.030855726238434637, "grad_norm": 2.2465792517913363, "learning_rate": 3.085572623843464e-06, "loss": 0.5809, "step": 6970 }, { "epoch": 0.030860153171897827, "grad_norm": 2.9402220426160897, "learning_rate": 3.0860153171897826e-06, "loss": 1.0157, "step": 6971 }, { "epoch": 0.030864580105361018, "grad_norm": 2.9827984748654806, "learning_rate": 3.086458010536102e-06, "loss": 0.8317, "step": 6972 }, { "epoch": 0.030869007038824208, "grad_norm": 3.2749815344877313, "learning_rate": 3.0869007038824213e-06, "loss": 0.9784, "step": 6973 }, { "epoch": 0.030873433972287395, "grad_norm": 2.6553041776142523, "learning_rate": 3.08734339722874e-06, "loss": 0.9745, "step": 6974 }, { "epoch": 0.030877860905750586, "grad_norm": 3.1263817991695686, "learning_rate": 3.0877860905750588e-06, "loss": 0.6734, "step": 6975 }, { "epoch": 0.030882287839213776, "grad_norm": 2.687298101374125, "learning_rate": 3.088228783921378e-06, "loss": 0.59, "step": 6976 }, { "epoch": 0.030886714772676967, "grad_norm": 2.4692578973651758, "learning_rate": 3.0886714772676967e-06, "loss": 0.6454, "step": 6977 }, { "epoch": 0.030891141706140157, "grad_norm": 2.5162884374088716, "learning_rate": 3.089114170614016e-06, "loss": 0.7137, "step": 6978 }, { "epoch": 0.030895568639603348, "grad_norm": 2.98627025109477, "learning_rate": 3.089556863960335e-06, "loss": 1.0984, "step": 6979 }, { "epoch": 0.030899995573066538, "grad_norm": 3.1799050015119943, "learning_rate": 3.089999557306654e-06, "loss": 0.9396, "step": 6980 }, { "epoch": 0.030904422506529725, "grad_norm": 2.724035406848809, "learning_rate": 3.090442250652973e-06, "loss": 0.7615, "step": 6981 }, { "epoch": 0.030908849439992916, "grad_norm": 3.079092225351039, "learning_rate": 3.0908849439992922e-06, "loss": 0.8152, "step": 6982 }, { "epoch": 0.030913276373456106, "grad_norm": 2.7794072746503713, "learning_rate": 3.0913276373456108e-06, "loss": 0.7495, "step": 6983 }, { "epoch": 0.030917703306919297, "grad_norm": 2.4982994340821896, "learning_rate": 3.09177033069193e-06, "loss": 0.6276, "step": 6984 }, { "epoch": 0.030922130240382487, "grad_norm": 3.1484385873205887, "learning_rate": 3.092213024038249e-06, "loss": 0.8855, "step": 6985 }, { "epoch": 0.030926557173845678, "grad_norm": 2.582742406462103, "learning_rate": 3.092655717384568e-06, "loss": 0.751, "step": 6986 }, { "epoch": 0.03093098410730887, "grad_norm": 3.188313325711996, "learning_rate": 3.093098410730887e-06, "loss": 0.8015, "step": 6987 }, { "epoch": 0.03093541104077206, "grad_norm": 2.8708862426698145, "learning_rate": 3.0935411040772063e-06, "loss": 0.6784, "step": 6988 }, { "epoch": 0.030939837974235246, "grad_norm": 2.7236041939854942, "learning_rate": 3.093983797423525e-06, "loss": 0.8552, "step": 6989 }, { "epoch": 0.030944264907698436, "grad_norm": 2.5688871636344888, "learning_rate": 3.094426490769844e-06, "loss": 0.5354, "step": 6990 }, { "epoch": 0.030948691841161627, "grad_norm": 2.4539659370210885, "learning_rate": 3.094869184116163e-06, "loss": 0.6834, "step": 6991 }, { "epoch": 0.030953118774624817, "grad_norm": 3.307098896574683, "learning_rate": 3.0953118774624817e-06, "loss": 0.8269, "step": 6992 }, { "epoch": 0.030957545708088008, "grad_norm": 2.7714601662746925, "learning_rate": 3.095754570808801e-06, "loss": 0.7902, "step": 6993 }, { "epoch": 0.0309619726415512, "grad_norm": 3.980772420906343, "learning_rate": 3.09619726415512e-06, "loss": 1.3213, "step": 6994 }, { "epoch": 0.03096639957501439, "grad_norm": 3.1376446502460347, "learning_rate": 3.096639957501439e-06, "loss": 0.7123, "step": 6995 }, { "epoch": 0.030970826508477576, "grad_norm": 2.8899339959753423, "learning_rate": 3.097082650847758e-06, "loss": 0.6188, "step": 6996 }, { "epoch": 0.030975253441940766, "grad_norm": 3.4379179877239645, "learning_rate": 3.0975253441940773e-06, "loss": 0.9928, "step": 6997 }, { "epoch": 0.030979680375403957, "grad_norm": 3.2981036331622153, "learning_rate": 3.097968037540396e-06, "loss": 0.7055, "step": 6998 }, { "epoch": 0.030984107308867147, "grad_norm": 3.09023716307243, "learning_rate": 3.098410730886715e-06, "loss": 0.7771, "step": 6999 }, { "epoch": 0.030988534242330338, "grad_norm": 2.7481548225680164, "learning_rate": 3.098853424233034e-06, "loss": 0.7032, "step": 7000 }, { "epoch": 0.03099296117579353, "grad_norm": 2.843983818764607, "learning_rate": 3.099296117579353e-06, "loss": 0.7928, "step": 7001 }, { "epoch": 0.03099738810925672, "grad_norm": 2.8530315154816486, "learning_rate": 3.099738810925672e-06, "loss": 0.8995, "step": 7002 }, { "epoch": 0.03100181504271991, "grad_norm": 3.212074322283417, "learning_rate": 3.1001815042719914e-06, "loss": 0.9845, "step": 7003 }, { "epoch": 0.031006241976183097, "grad_norm": 2.584683290341467, "learning_rate": 3.10062419761831e-06, "loss": 0.7044, "step": 7004 }, { "epoch": 0.031010668909646287, "grad_norm": 3.246112989474312, "learning_rate": 3.1010668909646293e-06, "loss": 0.5065, "step": 7005 }, { "epoch": 0.031015095843109478, "grad_norm": 5.257866683404719, "learning_rate": 3.1015095843109482e-06, "loss": 0.6343, "step": 7006 }, { "epoch": 0.031019522776572668, "grad_norm": 2.8994124899111045, "learning_rate": 3.1019522776572668e-06, "loss": 0.7026, "step": 7007 }, { "epoch": 0.03102394971003586, "grad_norm": 2.7587077969283977, "learning_rate": 3.102394971003586e-06, "loss": 0.5921, "step": 7008 }, { "epoch": 0.03102837664349905, "grad_norm": 3.8289232358445786, "learning_rate": 3.1028376643499055e-06, "loss": 1.0223, "step": 7009 }, { "epoch": 0.03103280357696224, "grad_norm": 2.524012811450832, "learning_rate": 3.103280357696224e-06, "loss": 0.7187, "step": 7010 }, { "epoch": 0.031037230510425427, "grad_norm": 2.6699035621846456, "learning_rate": 3.103723051042543e-06, "loss": 0.7105, "step": 7011 }, { "epoch": 0.031041657443888617, "grad_norm": 3.1472809614442756, "learning_rate": 3.1041657443888623e-06, "loss": 0.8457, "step": 7012 }, { "epoch": 0.031046084377351808, "grad_norm": 3.419626580074498, "learning_rate": 3.104608437735181e-06, "loss": 0.7387, "step": 7013 }, { "epoch": 0.031050511310814998, "grad_norm": 2.6054127913985545, "learning_rate": 3.1050511310815002e-06, "loss": 0.6405, "step": 7014 }, { "epoch": 0.03105493824427819, "grad_norm": 2.4413951923055213, "learning_rate": 3.105493824427819e-06, "loss": 0.5976, "step": 7015 }, { "epoch": 0.03105936517774138, "grad_norm": 3.276955869911365, "learning_rate": 3.105936517774138e-06, "loss": 1.2292, "step": 7016 }, { "epoch": 0.03106379211120457, "grad_norm": 2.6549725716106014, "learning_rate": 3.106379211120457e-06, "loss": 0.7444, "step": 7017 }, { "epoch": 0.03106821904466776, "grad_norm": 2.7491058390515106, "learning_rate": 3.1068219044667765e-06, "loss": 0.6577, "step": 7018 }, { "epoch": 0.031072645978130947, "grad_norm": 2.6017045173206266, "learning_rate": 3.107264597813095e-06, "loss": 0.5256, "step": 7019 }, { "epoch": 0.031077072911594138, "grad_norm": 2.6057325907233353, "learning_rate": 3.1077072911594143e-06, "loss": 0.7476, "step": 7020 }, { "epoch": 0.031081499845057328, "grad_norm": 2.837667919566665, "learning_rate": 3.1081499845057333e-06, "loss": 0.9232, "step": 7021 }, { "epoch": 0.03108592677852052, "grad_norm": 2.7993236028840056, "learning_rate": 3.108592677852052e-06, "loss": 0.6342, "step": 7022 }, { "epoch": 0.03109035371198371, "grad_norm": 2.6187998023351753, "learning_rate": 3.109035371198371e-06, "loss": 0.6669, "step": 7023 }, { "epoch": 0.0310947806454469, "grad_norm": 2.579961007223686, "learning_rate": 3.1094780645446906e-06, "loss": 0.5331, "step": 7024 }, { "epoch": 0.03109920757891009, "grad_norm": 2.5741402184364794, "learning_rate": 3.109920757891009e-06, "loss": 0.7692, "step": 7025 }, { "epoch": 0.03110363451237328, "grad_norm": 2.8502314133427427, "learning_rate": 3.110363451237328e-06, "loss": 0.8649, "step": 7026 }, { "epoch": 0.031108061445836468, "grad_norm": 2.8841496699236377, "learning_rate": 3.1108061445836474e-06, "loss": 0.733, "step": 7027 }, { "epoch": 0.03111248837929966, "grad_norm": 2.7636617477478054, "learning_rate": 3.111248837929966e-06, "loss": 0.7332, "step": 7028 }, { "epoch": 0.03111691531276285, "grad_norm": 3.3021209056312104, "learning_rate": 3.1116915312762853e-06, "loss": 0.9384, "step": 7029 }, { "epoch": 0.03112134224622604, "grad_norm": 2.9588609288044947, "learning_rate": 3.1121342246226042e-06, "loss": 0.5699, "step": 7030 }, { "epoch": 0.03112576917968923, "grad_norm": 3.491453399922809, "learning_rate": 3.112576917968923e-06, "loss": 0.668, "step": 7031 }, { "epoch": 0.03113019611315242, "grad_norm": 3.606051533351526, "learning_rate": 3.113019611315242e-06, "loss": 1.3397, "step": 7032 }, { "epoch": 0.03113462304661561, "grad_norm": 3.6496668314712863, "learning_rate": 3.1134623046615615e-06, "loss": 0.7587, "step": 7033 }, { "epoch": 0.031139049980078798, "grad_norm": 2.9198169821672346, "learning_rate": 3.11390499800788e-06, "loss": 0.6261, "step": 7034 }, { "epoch": 0.03114347691354199, "grad_norm": 2.731866814478189, "learning_rate": 3.1143476913541994e-06, "loss": 0.9345, "step": 7035 }, { "epoch": 0.03114790384700518, "grad_norm": 2.9621963268155813, "learning_rate": 3.1147903847005183e-06, "loss": 0.8732, "step": 7036 }, { "epoch": 0.03115233078046837, "grad_norm": 2.6707407851001674, "learning_rate": 3.115233078046837e-06, "loss": 0.692, "step": 7037 }, { "epoch": 0.03115675771393156, "grad_norm": 2.624948466181447, "learning_rate": 3.1156757713931562e-06, "loss": 0.7789, "step": 7038 }, { "epoch": 0.03116118464739475, "grad_norm": 3.27809263249245, "learning_rate": 3.1161184647394756e-06, "loss": 0.9592, "step": 7039 }, { "epoch": 0.03116561158085794, "grad_norm": 2.5590618123134816, "learning_rate": 3.116561158085794e-06, "loss": 0.6642, "step": 7040 }, { "epoch": 0.03117003851432113, "grad_norm": 3.0293862785676176, "learning_rate": 3.117003851432113e-06, "loss": 0.9295, "step": 7041 }, { "epoch": 0.03117446544778432, "grad_norm": 2.968852025153201, "learning_rate": 3.1174465447784325e-06, "loss": 0.8438, "step": 7042 }, { "epoch": 0.03117889238124751, "grad_norm": 2.6799466929979703, "learning_rate": 3.117889238124751e-06, "loss": 0.6772, "step": 7043 }, { "epoch": 0.0311833193147107, "grad_norm": 2.9592901080608214, "learning_rate": 3.1183319314710703e-06, "loss": 0.899, "step": 7044 }, { "epoch": 0.03118774624817389, "grad_norm": 3.091964199253217, "learning_rate": 3.1187746248173893e-06, "loss": 0.8185, "step": 7045 }, { "epoch": 0.03119217318163708, "grad_norm": 3.2272363515760127, "learning_rate": 3.1192173181637082e-06, "loss": 0.6769, "step": 7046 }, { "epoch": 0.03119660011510027, "grad_norm": 2.895438917289595, "learning_rate": 3.119660011510027e-06, "loss": 0.9184, "step": 7047 }, { "epoch": 0.03120102704856346, "grad_norm": 2.4438924217394593, "learning_rate": 3.1201027048563466e-06, "loss": 0.6578, "step": 7048 }, { "epoch": 0.03120545398202665, "grad_norm": 3.300315061874156, "learning_rate": 3.120545398202665e-06, "loss": 0.7675, "step": 7049 }, { "epoch": 0.03120988091548984, "grad_norm": 2.7266523606633735, "learning_rate": 3.1209880915489845e-06, "loss": 0.6478, "step": 7050 }, { "epoch": 0.03121430784895303, "grad_norm": 3.240735358571701, "learning_rate": 3.1214307848953034e-06, "loss": 0.5355, "step": 7051 }, { "epoch": 0.03121873478241622, "grad_norm": 2.618983438335959, "learning_rate": 3.121873478241622e-06, "loss": 0.6495, "step": 7052 }, { "epoch": 0.03122316171587941, "grad_norm": 2.735695852667003, "learning_rate": 3.1223161715879413e-06, "loss": 0.6781, "step": 7053 }, { "epoch": 0.0312275886493426, "grad_norm": 4.164370771924915, "learning_rate": 3.1227588649342607e-06, "loss": 1.2203, "step": 7054 }, { "epoch": 0.03123201558280579, "grad_norm": 3.1447480996081807, "learning_rate": 3.123201558280579e-06, "loss": 0.9558, "step": 7055 }, { "epoch": 0.031236442516268982, "grad_norm": 2.887573590569758, "learning_rate": 3.123644251626898e-06, "loss": 0.7591, "step": 7056 }, { "epoch": 0.03124086944973217, "grad_norm": 2.743028869086151, "learning_rate": 3.1240869449732175e-06, "loss": 0.7734, "step": 7057 }, { "epoch": 0.03124529638319536, "grad_norm": 3.9998107466894584, "learning_rate": 3.124529638319536e-06, "loss": 1.0504, "step": 7058 }, { "epoch": 0.03124972331665855, "grad_norm": 2.929312790613889, "learning_rate": 3.1249723316658554e-06, "loss": 0.6967, "step": 7059 }, { "epoch": 0.031254150250121744, "grad_norm": 2.8700745387036486, "learning_rate": 3.1254150250121743e-06, "loss": 0.8111, "step": 7060 }, { "epoch": 0.03125857718358493, "grad_norm": 2.6801347230964416, "learning_rate": 3.1258577183584933e-06, "loss": 0.9002, "step": 7061 }, { "epoch": 0.03126300411704812, "grad_norm": 3.1760935994513506, "learning_rate": 3.1263004117048122e-06, "loss": 0.8435, "step": 7062 }, { "epoch": 0.03126743105051131, "grad_norm": 2.3164087866544856, "learning_rate": 3.1267431050511316e-06, "loss": 0.5959, "step": 7063 }, { "epoch": 0.0312718579839745, "grad_norm": 2.569997038597507, "learning_rate": 3.12718579839745e-06, "loss": 0.9601, "step": 7064 }, { "epoch": 0.03127628491743769, "grad_norm": 2.8659383840985377, "learning_rate": 3.1276284917437695e-06, "loss": 1.006, "step": 7065 }, { "epoch": 0.03128071185090088, "grad_norm": 3.057210879741086, "learning_rate": 3.1280711850900885e-06, "loss": 0.8357, "step": 7066 }, { "epoch": 0.03128513878436407, "grad_norm": 5.559097932126407, "learning_rate": 3.1285138784364074e-06, "loss": 1.502, "step": 7067 }, { "epoch": 0.03128956571782726, "grad_norm": 3.2935623111709105, "learning_rate": 3.1289565717827263e-06, "loss": 1.0592, "step": 7068 }, { "epoch": 0.03129399265129045, "grad_norm": 2.6765348235268047, "learning_rate": 3.1293992651290457e-06, "loss": 0.7222, "step": 7069 }, { "epoch": 0.03129841958475364, "grad_norm": 3.1840042677562974, "learning_rate": 3.1298419584753642e-06, "loss": 0.8081, "step": 7070 }, { "epoch": 0.03130284651821683, "grad_norm": 3.276636591286941, "learning_rate": 3.130284651821683e-06, "loss": 0.9702, "step": 7071 }, { "epoch": 0.03130727345168002, "grad_norm": 2.856976361953906, "learning_rate": 3.1307273451680026e-06, "loss": 0.8609, "step": 7072 }, { "epoch": 0.031311700385143214, "grad_norm": 2.87751172316897, "learning_rate": 3.131170038514321e-06, "loss": 0.7676, "step": 7073 }, { "epoch": 0.031316127318606404, "grad_norm": 2.3672837504695923, "learning_rate": 3.1316127318606405e-06, "loss": 0.6977, "step": 7074 }, { "epoch": 0.031320554252069595, "grad_norm": 2.887774439806315, "learning_rate": 3.1320554252069594e-06, "loss": 0.7253, "step": 7075 }, { "epoch": 0.03132498118553278, "grad_norm": 3.2696531243934963, "learning_rate": 3.1324981185532783e-06, "loss": 0.6212, "step": 7076 }, { "epoch": 0.03132940811899597, "grad_norm": 2.804507618481819, "learning_rate": 3.1329408118995973e-06, "loss": 0.8714, "step": 7077 }, { "epoch": 0.03133383505245916, "grad_norm": 3.035718526604505, "learning_rate": 3.1333835052459167e-06, "loss": 0.853, "step": 7078 }, { "epoch": 0.03133826198592235, "grad_norm": 2.7977594594198285, "learning_rate": 3.133826198592235e-06, "loss": 0.709, "step": 7079 }, { "epoch": 0.03134268891938554, "grad_norm": 2.492779812970261, "learning_rate": 3.1342688919385546e-06, "loss": 0.7184, "step": 7080 }, { "epoch": 0.03134711585284873, "grad_norm": 3.250596017975733, "learning_rate": 3.1347115852848735e-06, "loss": 1.0436, "step": 7081 }, { "epoch": 0.03135154278631192, "grad_norm": 2.752000450908978, "learning_rate": 3.1351542786311925e-06, "loss": 0.6492, "step": 7082 }, { "epoch": 0.03135596971977511, "grad_norm": 2.3930480586986964, "learning_rate": 3.1355969719775114e-06, "loss": 0.7519, "step": 7083 }, { "epoch": 0.0313603966532383, "grad_norm": 3.1181586133496695, "learning_rate": 3.1360396653238308e-06, "loss": 0.7085, "step": 7084 }, { "epoch": 0.03136482358670149, "grad_norm": 3.052383089014159, "learning_rate": 3.1364823586701493e-06, "loss": 0.8569, "step": 7085 }, { "epoch": 0.03136925052016468, "grad_norm": 3.057734898283147, "learning_rate": 3.1369250520164687e-06, "loss": 0.8762, "step": 7086 }, { "epoch": 0.031373677453627874, "grad_norm": 3.526786412788804, "learning_rate": 3.1373677453627876e-06, "loss": 1.0935, "step": 7087 }, { "epoch": 0.031378104387091064, "grad_norm": 3.02010056272543, "learning_rate": 3.137810438709106e-06, "loss": 0.8108, "step": 7088 }, { "epoch": 0.031382531320554255, "grad_norm": 4.1296120344562945, "learning_rate": 3.1382531320554255e-06, "loss": 1.3431, "step": 7089 }, { "epoch": 0.031386958254017445, "grad_norm": 3.107326847228595, "learning_rate": 3.138695825401745e-06, "loss": 0.8484, "step": 7090 }, { "epoch": 0.03139138518748063, "grad_norm": 2.751323050350942, "learning_rate": 3.1391385187480634e-06, "loss": 0.8589, "step": 7091 }, { "epoch": 0.03139581212094382, "grad_norm": 2.619815276812015, "learning_rate": 3.1395812120943823e-06, "loss": 0.764, "step": 7092 }, { "epoch": 0.03140023905440701, "grad_norm": 2.7342555672283946, "learning_rate": 3.1400239054407017e-06, "loss": 0.8944, "step": 7093 }, { "epoch": 0.0314046659878702, "grad_norm": 2.4786895737347243, "learning_rate": 3.1404665987870202e-06, "loss": 0.5767, "step": 7094 }, { "epoch": 0.03140909292133339, "grad_norm": 4.052156673145462, "learning_rate": 3.1409092921333396e-06, "loss": 1.088, "step": 7095 }, { "epoch": 0.03141351985479658, "grad_norm": 3.136061843731613, "learning_rate": 3.1413519854796586e-06, "loss": 0.596, "step": 7096 }, { "epoch": 0.03141794678825977, "grad_norm": 2.736048955097817, "learning_rate": 3.1417946788259775e-06, "loss": 0.9439, "step": 7097 }, { "epoch": 0.03142237372172296, "grad_norm": 2.4351030567121796, "learning_rate": 3.1422373721722965e-06, "loss": 0.6801, "step": 7098 }, { "epoch": 0.03142680065518615, "grad_norm": 3.2039992371252386, "learning_rate": 3.142680065518616e-06, "loss": 1.0679, "step": 7099 }, { "epoch": 0.031431227588649344, "grad_norm": 4.224370820866286, "learning_rate": 3.1431227588649343e-06, "loss": 0.7233, "step": 7100 }, { "epoch": 0.031435654522112534, "grad_norm": 2.9206697405022335, "learning_rate": 3.1435654522112537e-06, "loss": 0.6483, "step": 7101 }, { "epoch": 0.031440081455575725, "grad_norm": 2.4008420202765506, "learning_rate": 3.1440081455575727e-06, "loss": 0.729, "step": 7102 }, { "epoch": 0.031444508389038915, "grad_norm": 2.815979965049697, "learning_rate": 3.144450838903891e-06, "loss": 0.6496, "step": 7103 }, { "epoch": 0.031448935322502106, "grad_norm": 2.655986191357077, "learning_rate": 3.1448935322502106e-06, "loss": 0.8945, "step": 7104 }, { "epoch": 0.031453362255965296, "grad_norm": 2.4991460935820067, "learning_rate": 3.14533622559653e-06, "loss": 0.6759, "step": 7105 }, { "epoch": 0.03145778918942848, "grad_norm": 4.160852823848135, "learning_rate": 3.1457789189428485e-06, "loss": 0.8274, "step": 7106 }, { "epoch": 0.03146221612289167, "grad_norm": 2.4401337587858887, "learning_rate": 3.1462216122891674e-06, "loss": 0.4914, "step": 7107 }, { "epoch": 0.03146664305635486, "grad_norm": 2.712184593543878, "learning_rate": 3.1466643056354868e-06, "loss": 0.7104, "step": 7108 }, { "epoch": 0.03147106998981805, "grad_norm": 2.5396450687625114, "learning_rate": 3.1471069989818053e-06, "loss": 0.6102, "step": 7109 }, { "epoch": 0.03147549692328124, "grad_norm": 3.2598032665687833, "learning_rate": 3.1475496923281247e-06, "loss": 0.8296, "step": 7110 }, { "epoch": 0.03147992385674443, "grad_norm": 3.023845589460379, "learning_rate": 3.1479923856744436e-06, "loss": 0.8677, "step": 7111 }, { "epoch": 0.03148435079020762, "grad_norm": 3.777354426944939, "learning_rate": 3.1484350790207626e-06, "loss": 0.8206, "step": 7112 }, { "epoch": 0.03148877772367081, "grad_norm": 3.1552048836741156, "learning_rate": 3.1488777723670815e-06, "loss": 0.7762, "step": 7113 }, { "epoch": 0.031493204657134004, "grad_norm": 2.4788038898167213, "learning_rate": 3.149320465713401e-06, "loss": 0.9196, "step": 7114 }, { "epoch": 0.031497631590597194, "grad_norm": 2.5986924735110026, "learning_rate": 3.1497631590597194e-06, "loss": 0.5951, "step": 7115 }, { "epoch": 0.031502058524060385, "grad_norm": 2.902555941525565, "learning_rate": 3.1502058524060388e-06, "loss": 0.9433, "step": 7116 }, { "epoch": 0.031506485457523575, "grad_norm": 3.676045418379899, "learning_rate": 3.1506485457523577e-06, "loss": 0.7247, "step": 7117 }, { "epoch": 0.031510912390986766, "grad_norm": 2.539391931013173, "learning_rate": 3.1510912390986762e-06, "loss": 0.5471, "step": 7118 }, { "epoch": 0.031515339324449956, "grad_norm": 2.7730966883985726, "learning_rate": 3.1515339324449956e-06, "loss": 0.9359, "step": 7119 }, { "epoch": 0.03151976625791315, "grad_norm": 3.1267647107648093, "learning_rate": 3.151976625791315e-06, "loss": 0.8771, "step": 7120 }, { "epoch": 0.03152419319137633, "grad_norm": 3.228993391893556, "learning_rate": 3.1524193191376335e-06, "loss": 0.9809, "step": 7121 }, { "epoch": 0.03152862012483952, "grad_norm": 2.840869838424349, "learning_rate": 3.1528620124839525e-06, "loss": 0.5788, "step": 7122 }, { "epoch": 0.03153304705830271, "grad_norm": 3.44578486880721, "learning_rate": 3.153304705830272e-06, "loss": 0.8618, "step": 7123 }, { "epoch": 0.0315374739917659, "grad_norm": 3.5356833215343317, "learning_rate": 3.1537473991765904e-06, "loss": 0.7917, "step": 7124 }, { "epoch": 0.03154190092522909, "grad_norm": 3.2772016183652224, "learning_rate": 3.1541900925229097e-06, "loss": 0.7526, "step": 7125 }, { "epoch": 0.03154632785869228, "grad_norm": 4.9156401242280765, "learning_rate": 3.1546327858692287e-06, "loss": 0.9654, "step": 7126 }, { "epoch": 0.03155075479215547, "grad_norm": 3.853630440992934, "learning_rate": 3.1550754792155476e-06, "loss": 0.9546, "step": 7127 }, { "epoch": 0.031555181725618664, "grad_norm": 2.6280356047681668, "learning_rate": 3.1555181725618666e-06, "loss": 0.8476, "step": 7128 }, { "epoch": 0.031559608659081854, "grad_norm": 3.0668450654017807, "learning_rate": 3.155960865908186e-06, "loss": 0.7608, "step": 7129 }, { "epoch": 0.031564035592545045, "grad_norm": 3.367460372827168, "learning_rate": 3.1564035592545045e-06, "loss": 0.682, "step": 7130 }, { "epoch": 0.031568462526008235, "grad_norm": 2.419285729276397, "learning_rate": 3.156846252600824e-06, "loss": 0.7425, "step": 7131 }, { "epoch": 0.031572889459471426, "grad_norm": 2.6873294946918356, "learning_rate": 3.1572889459471428e-06, "loss": 0.9368, "step": 7132 }, { "epoch": 0.031577316392934617, "grad_norm": 3.0254260177198926, "learning_rate": 3.1577316392934613e-06, "loss": 0.89, "step": 7133 }, { "epoch": 0.03158174332639781, "grad_norm": 3.3845924466882558, "learning_rate": 3.1581743326397807e-06, "loss": 1.0537, "step": 7134 }, { "epoch": 0.031586170259861, "grad_norm": 4.203571850734298, "learning_rate": 3.1586170259861e-06, "loss": 0.9616, "step": 7135 }, { "epoch": 0.03159059719332418, "grad_norm": 3.045431778739254, "learning_rate": 3.1590597193324186e-06, "loss": 0.6041, "step": 7136 }, { "epoch": 0.03159502412678737, "grad_norm": 2.969381632333399, "learning_rate": 3.1595024126787375e-06, "loss": 0.9153, "step": 7137 }, { "epoch": 0.03159945106025056, "grad_norm": 2.8146382942665786, "learning_rate": 3.159945106025057e-06, "loss": 0.6712, "step": 7138 }, { "epoch": 0.03160387799371375, "grad_norm": 3.3723283557478103, "learning_rate": 3.1603877993713754e-06, "loss": 1.0182, "step": 7139 }, { "epoch": 0.03160830492717694, "grad_norm": 2.9070444802581332, "learning_rate": 3.1608304927176948e-06, "loss": 0.6136, "step": 7140 }, { "epoch": 0.031612731860640134, "grad_norm": 3.53976919599669, "learning_rate": 3.1612731860640137e-06, "loss": 0.9516, "step": 7141 }, { "epoch": 0.031617158794103324, "grad_norm": 2.805137423030939, "learning_rate": 3.1617158794103327e-06, "loss": 0.6557, "step": 7142 }, { "epoch": 0.031621585727566515, "grad_norm": 2.911824014261469, "learning_rate": 3.1621585727566516e-06, "loss": 0.9802, "step": 7143 }, { "epoch": 0.031626012661029705, "grad_norm": 2.885235037770691, "learning_rate": 3.162601266102971e-06, "loss": 0.8772, "step": 7144 }, { "epoch": 0.031630439594492896, "grad_norm": 3.031686977147015, "learning_rate": 3.1630439594492895e-06, "loss": 0.7391, "step": 7145 }, { "epoch": 0.031634866527956086, "grad_norm": 2.9319583326988314, "learning_rate": 3.163486652795609e-06, "loss": 0.4464, "step": 7146 }, { "epoch": 0.03163929346141928, "grad_norm": 3.887002797098405, "learning_rate": 3.163929346141928e-06, "loss": 1.3671, "step": 7147 }, { "epoch": 0.03164372039488247, "grad_norm": 3.2364059526518947, "learning_rate": 3.1643720394882464e-06, "loss": 1.0222, "step": 7148 }, { "epoch": 0.03164814732834566, "grad_norm": 2.615619188725985, "learning_rate": 3.1648147328345657e-06, "loss": 0.6836, "step": 7149 }, { "epoch": 0.03165257426180885, "grad_norm": 2.984204483835673, "learning_rate": 3.165257426180885e-06, "loss": 0.9777, "step": 7150 }, { "epoch": 0.03165700119527203, "grad_norm": 3.1943345953567976, "learning_rate": 3.1657001195272036e-06, "loss": 0.9239, "step": 7151 }, { "epoch": 0.03166142812873522, "grad_norm": 2.7018355352020857, "learning_rate": 3.1661428128735226e-06, "loss": 0.5252, "step": 7152 }, { "epoch": 0.03166585506219841, "grad_norm": 2.539906613614104, "learning_rate": 3.166585506219842e-06, "loss": 0.7888, "step": 7153 }, { "epoch": 0.0316702819956616, "grad_norm": 2.915687352593671, "learning_rate": 3.1670281995661605e-06, "loss": 0.4974, "step": 7154 }, { "epoch": 0.031674708929124794, "grad_norm": 3.55134170680214, "learning_rate": 3.16747089291248e-06, "loss": 1.1192, "step": 7155 }, { "epoch": 0.031679135862587984, "grad_norm": 2.9207827936473647, "learning_rate": 3.1679135862587988e-06, "loss": 0.6708, "step": 7156 }, { "epoch": 0.031683562796051175, "grad_norm": 3.142474938231488, "learning_rate": 3.1683562796051177e-06, "loss": 0.8723, "step": 7157 }, { "epoch": 0.031687989729514365, "grad_norm": 2.9571202817302904, "learning_rate": 3.1687989729514367e-06, "loss": 0.727, "step": 7158 }, { "epoch": 0.031692416662977556, "grad_norm": 2.7650405648588, "learning_rate": 3.169241666297756e-06, "loss": 0.7509, "step": 7159 }, { "epoch": 0.031696843596440746, "grad_norm": 2.665133166481438, "learning_rate": 3.1696843596440746e-06, "loss": 0.9831, "step": 7160 }, { "epoch": 0.03170127052990394, "grad_norm": 2.5667965325218356, "learning_rate": 3.170127052990394e-06, "loss": 0.8005, "step": 7161 }, { "epoch": 0.03170569746336713, "grad_norm": 2.5141369262565885, "learning_rate": 3.170569746336713e-06, "loss": 0.7251, "step": 7162 }, { "epoch": 0.03171012439683032, "grad_norm": 2.4717503551166313, "learning_rate": 3.171012439683032e-06, "loss": 0.5696, "step": 7163 }, { "epoch": 0.03171455133029351, "grad_norm": 2.3309264135402277, "learning_rate": 3.1714551330293508e-06, "loss": 0.7887, "step": 7164 }, { "epoch": 0.0317189782637567, "grad_norm": 2.7123179677675915, "learning_rate": 3.17189782637567e-06, "loss": 0.5881, "step": 7165 }, { "epoch": 0.03172340519721988, "grad_norm": 2.9231974687518787, "learning_rate": 3.1723405197219887e-06, "loss": 0.6265, "step": 7166 }, { "epoch": 0.03172783213068307, "grad_norm": 3.187113148187288, "learning_rate": 3.172783213068308e-06, "loss": 1.0994, "step": 7167 }, { "epoch": 0.03173225906414626, "grad_norm": 2.578594049545037, "learning_rate": 3.173225906414627e-06, "loss": 0.9001, "step": 7168 }, { "epoch": 0.031736685997609454, "grad_norm": 2.875602823287184, "learning_rate": 3.1736685997609455e-06, "loss": 0.7139, "step": 7169 }, { "epoch": 0.031741112931072644, "grad_norm": 3.1052264765570814, "learning_rate": 3.174111293107265e-06, "loss": 0.8686, "step": 7170 }, { "epoch": 0.031745539864535835, "grad_norm": 2.8036074638920887, "learning_rate": 3.174553986453584e-06, "loss": 0.8324, "step": 7171 }, { "epoch": 0.031749966797999025, "grad_norm": 2.9203876681014296, "learning_rate": 3.1749966797999028e-06, "loss": 0.8844, "step": 7172 }, { "epoch": 0.031754393731462216, "grad_norm": 3.3771099299710237, "learning_rate": 3.1754393731462217e-06, "loss": 1.0063, "step": 7173 }, { "epoch": 0.031758820664925407, "grad_norm": 2.7544990902937583, "learning_rate": 3.175882066492541e-06, "loss": 0.883, "step": 7174 }, { "epoch": 0.0317632475983886, "grad_norm": 2.75208410210018, "learning_rate": 3.1763247598388596e-06, "loss": 0.7922, "step": 7175 }, { "epoch": 0.03176767453185179, "grad_norm": 2.5801766450218886, "learning_rate": 3.176767453185179e-06, "loss": 0.8304, "step": 7176 }, { "epoch": 0.03177210146531498, "grad_norm": 2.362804074464017, "learning_rate": 3.177210146531498e-06, "loss": 0.5409, "step": 7177 }, { "epoch": 0.03177652839877817, "grad_norm": 3.196763203392045, "learning_rate": 3.177652839877817e-06, "loss": 0.7908, "step": 7178 }, { "epoch": 0.03178095533224136, "grad_norm": 3.4980457032394403, "learning_rate": 3.178095533224136e-06, "loss": 1.0147, "step": 7179 }, { "epoch": 0.03178538226570455, "grad_norm": 2.519710061347197, "learning_rate": 3.178538226570455e-06, "loss": 0.772, "step": 7180 }, { "epoch": 0.03178980919916773, "grad_norm": 3.6849443448612615, "learning_rate": 3.1789809199167737e-06, "loss": 0.7306, "step": 7181 }, { "epoch": 0.031794236132630924, "grad_norm": 2.768593995955188, "learning_rate": 3.179423613263093e-06, "loss": 0.738, "step": 7182 }, { "epoch": 0.031798663066094114, "grad_norm": 3.2154679974251983, "learning_rate": 3.179866306609412e-06, "loss": 0.7392, "step": 7183 }, { "epoch": 0.031803089999557305, "grad_norm": 2.674236933296264, "learning_rate": 3.1803089999557306e-06, "loss": 0.7117, "step": 7184 }, { "epoch": 0.031807516933020495, "grad_norm": 3.517740500764957, "learning_rate": 3.18075169330205e-06, "loss": 1.1017, "step": 7185 }, { "epoch": 0.031811943866483686, "grad_norm": 3.0178499181470158, "learning_rate": 3.1811943866483693e-06, "loss": 1.0371, "step": 7186 }, { "epoch": 0.031816370799946876, "grad_norm": 2.7490284196793677, "learning_rate": 3.181637079994688e-06, "loss": 0.7095, "step": 7187 }, { "epoch": 0.03182079773341007, "grad_norm": 2.7948749101028514, "learning_rate": 3.1820797733410068e-06, "loss": 0.6942, "step": 7188 }, { "epoch": 0.03182522466687326, "grad_norm": 2.8787494722505795, "learning_rate": 3.182522466687326e-06, "loss": 0.7306, "step": 7189 }, { "epoch": 0.03182965160033645, "grad_norm": 3.253477325695595, "learning_rate": 3.1829651600336447e-06, "loss": 0.9948, "step": 7190 }, { "epoch": 0.03183407853379964, "grad_norm": 2.322318226778784, "learning_rate": 3.183407853379964e-06, "loss": 0.595, "step": 7191 }, { "epoch": 0.03183850546726283, "grad_norm": 2.7057956244712322, "learning_rate": 3.183850546726283e-06, "loss": 0.8091, "step": 7192 }, { "epoch": 0.03184293240072602, "grad_norm": 3.7303665808793416, "learning_rate": 3.184293240072602e-06, "loss": 0.8603, "step": 7193 }, { "epoch": 0.03184735933418921, "grad_norm": 2.7094864546311066, "learning_rate": 3.184735933418921e-06, "loss": 0.9017, "step": 7194 }, { "epoch": 0.0318517862676524, "grad_norm": 3.2230100709222516, "learning_rate": 3.1851786267652403e-06, "loss": 0.616, "step": 7195 }, { "epoch": 0.031856213201115584, "grad_norm": 4.09504237907442, "learning_rate": 3.1856213201115588e-06, "loss": 0.8613, "step": 7196 }, { "epoch": 0.031860640134578774, "grad_norm": 3.219052185446105, "learning_rate": 3.186064013457878e-06, "loss": 0.7765, "step": 7197 }, { "epoch": 0.031865067068041965, "grad_norm": 3.728312697792794, "learning_rate": 3.186506706804197e-06, "loss": 1.2368, "step": 7198 }, { "epoch": 0.031869494001505155, "grad_norm": 2.6582261746203404, "learning_rate": 3.1869494001505156e-06, "loss": 0.8161, "step": 7199 }, { "epoch": 0.031873920934968346, "grad_norm": 2.698380955083529, "learning_rate": 3.187392093496835e-06, "loss": 0.7839, "step": 7200 }, { "epoch": 0.031878347868431536, "grad_norm": 3.4374962706961485, "learning_rate": 3.1878347868431544e-06, "loss": 1.0067, "step": 7201 }, { "epoch": 0.03188277480189473, "grad_norm": 2.58055171574767, "learning_rate": 3.188277480189473e-06, "loss": 0.8708, "step": 7202 }, { "epoch": 0.03188720173535792, "grad_norm": 2.975337789256801, "learning_rate": 3.188720173535792e-06, "loss": 0.5797, "step": 7203 }, { "epoch": 0.03189162866882111, "grad_norm": 2.3795984401639863, "learning_rate": 3.189162866882111e-06, "loss": 0.6487, "step": 7204 }, { "epoch": 0.0318960556022843, "grad_norm": 2.948181088496799, "learning_rate": 3.1896055602284297e-06, "loss": 0.8958, "step": 7205 }, { "epoch": 0.03190048253574749, "grad_norm": 3.1919031314344872, "learning_rate": 3.190048253574749e-06, "loss": 1.0248, "step": 7206 }, { "epoch": 0.03190490946921068, "grad_norm": 2.863306601454775, "learning_rate": 3.190490946921068e-06, "loss": 0.4685, "step": 7207 }, { "epoch": 0.03190933640267387, "grad_norm": 2.7224618749106, "learning_rate": 3.190933640267387e-06, "loss": 0.6588, "step": 7208 }, { "epoch": 0.03191376333613706, "grad_norm": 2.616327601437964, "learning_rate": 3.191376333613706e-06, "loss": 0.8785, "step": 7209 }, { "epoch": 0.03191819026960025, "grad_norm": 3.8894686721279736, "learning_rate": 3.1918190269600253e-06, "loss": 0.8637, "step": 7210 }, { "epoch": 0.03192261720306344, "grad_norm": 3.491163080333462, "learning_rate": 3.192261720306344e-06, "loss": 1.1727, "step": 7211 }, { "epoch": 0.031927044136526625, "grad_norm": 3.5678597270026766, "learning_rate": 3.192704413652663e-06, "loss": 0.9325, "step": 7212 }, { "epoch": 0.031931471069989815, "grad_norm": 3.0712902640262723, "learning_rate": 3.193147106998982e-06, "loss": 0.64, "step": 7213 }, { "epoch": 0.031935898003453006, "grad_norm": 2.916980665193418, "learning_rate": 3.1935898003453007e-06, "loss": 0.6473, "step": 7214 }, { "epoch": 0.031940324936916197, "grad_norm": 3.2807816718567167, "learning_rate": 3.19403249369162e-06, "loss": 0.8037, "step": 7215 }, { "epoch": 0.03194475187037939, "grad_norm": 2.9005954896132917, "learning_rate": 3.1944751870379394e-06, "loss": 0.4543, "step": 7216 }, { "epoch": 0.03194917880384258, "grad_norm": 3.728934043062258, "learning_rate": 3.194917880384258e-06, "loss": 0.811, "step": 7217 }, { "epoch": 0.03195360573730577, "grad_norm": 2.7602687131923016, "learning_rate": 3.195360573730577e-06, "loss": 0.7243, "step": 7218 }, { "epoch": 0.03195803267076896, "grad_norm": 2.6906548803508414, "learning_rate": 3.1958032670768963e-06, "loss": 0.4804, "step": 7219 }, { "epoch": 0.03196245960423215, "grad_norm": 2.6495258689589076, "learning_rate": 3.1962459604232148e-06, "loss": 0.6512, "step": 7220 }, { "epoch": 0.03196688653769534, "grad_norm": 2.8252604941438744, "learning_rate": 3.196688653769534e-06, "loss": 0.4951, "step": 7221 }, { "epoch": 0.03197131347115853, "grad_norm": 2.592861638254879, "learning_rate": 3.197131347115853e-06, "loss": 0.7149, "step": 7222 }, { "epoch": 0.03197574040462172, "grad_norm": 2.865451817078309, "learning_rate": 3.197574040462172e-06, "loss": 0.7946, "step": 7223 }, { "epoch": 0.03198016733808491, "grad_norm": 2.5924696772720472, "learning_rate": 3.198016733808491e-06, "loss": 0.7743, "step": 7224 }, { "epoch": 0.0319845942715481, "grad_norm": 3.1099124981858957, "learning_rate": 3.1984594271548104e-06, "loss": 0.9087, "step": 7225 }, { "epoch": 0.03198902120501129, "grad_norm": 2.528341052147552, "learning_rate": 3.198902120501129e-06, "loss": 0.5977, "step": 7226 }, { "epoch": 0.031993448138474476, "grad_norm": 2.623638132623338, "learning_rate": 3.1993448138474483e-06, "loss": 0.6886, "step": 7227 }, { "epoch": 0.031997875071937666, "grad_norm": 2.6684906652770293, "learning_rate": 3.199787507193767e-06, "loss": 0.8214, "step": 7228 }, { "epoch": 0.03200230200540086, "grad_norm": 3.101169266261853, "learning_rate": 3.2002302005400857e-06, "loss": 1.0597, "step": 7229 }, { "epoch": 0.03200672893886405, "grad_norm": 3.1594724591693244, "learning_rate": 3.200672893886405e-06, "loss": 0.7076, "step": 7230 }, { "epoch": 0.03201115587232724, "grad_norm": 3.3075437738559037, "learning_rate": 3.2011155872327245e-06, "loss": 0.8625, "step": 7231 }, { "epoch": 0.03201558280579043, "grad_norm": 2.838240659531615, "learning_rate": 3.201558280579043e-06, "loss": 0.9575, "step": 7232 }, { "epoch": 0.03202000973925362, "grad_norm": 2.8289009205063174, "learning_rate": 3.202000973925362e-06, "loss": 0.6326, "step": 7233 }, { "epoch": 0.03202443667271681, "grad_norm": 2.8740731580890144, "learning_rate": 3.2024436672716813e-06, "loss": 0.9624, "step": 7234 }, { "epoch": 0.03202886360618, "grad_norm": 3.123267783720387, "learning_rate": 3.202886360618e-06, "loss": 0.826, "step": 7235 }, { "epoch": 0.03203329053964319, "grad_norm": 2.638671781461483, "learning_rate": 3.203329053964319e-06, "loss": 0.5727, "step": 7236 }, { "epoch": 0.03203771747310638, "grad_norm": 3.202796437809653, "learning_rate": 3.203771747310638e-06, "loss": 0.8876, "step": 7237 }, { "epoch": 0.03204214440656957, "grad_norm": 2.9164679094609416, "learning_rate": 3.204214440656957e-06, "loss": 0.8987, "step": 7238 }, { "epoch": 0.03204657134003276, "grad_norm": 2.6459125268200157, "learning_rate": 3.204657134003276e-06, "loss": 0.6358, "step": 7239 }, { "epoch": 0.03205099827349595, "grad_norm": 2.870979346310132, "learning_rate": 3.2050998273495954e-06, "loss": 0.828, "step": 7240 }, { "epoch": 0.03205542520695914, "grad_norm": 4.275647409998168, "learning_rate": 3.205542520695914e-06, "loss": 1.114, "step": 7241 }, { "epoch": 0.032059852140422326, "grad_norm": 3.1999518024869875, "learning_rate": 3.2059852140422333e-06, "loss": 0.7884, "step": 7242 }, { "epoch": 0.03206427907388552, "grad_norm": 3.4288295395614643, "learning_rate": 3.2064279073885523e-06, "loss": 0.6903, "step": 7243 }, { "epoch": 0.03206870600734871, "grad_norm": 2.744619956487624, "learning_rate": 3.206870600734871e-06, "loss": 0.6501, "step": 7244 }, { "epoch": 0.0320731329408119, "grad_norm": 4.0211674125870465, "learning_rate": 3.20731329408119e-06, "loss": 0.9937, "step": 7245 }, { "epoch": 0.03207755987427509, "grad_norm": 3.0659307052342233, "learning_rate": 3.2077559874275095e-06, "loss": 0.928, "step": 7246 }, { "epoch": 0.03208198680773828, "grad_norm": 2.7396705799425667, "learning_rate": 3.208198680773828e-06, "loss": 0.7509, "step": 7247 }, { "epoch": 0.03208641374120147, "grad_norm": 2.5535590424608055, "learning_rate": 3.208641374120147e-06, "loss": 0.6387, "step": 7248 }, { "epoch": 0.03209084067466466, "grad_norm": 2.754876004174327, "learning_rate": 3.2090840674664664e-06, "loss": 0.7909, "step": 7249 }, { "epoch": 0.03209526760812785, "grad_norm": 2.7092217312898184, "learning_rate": 3.209526760812785e-06, "loss": 0.5918, "step": 7250 }, { "epoch": 0.03209969454159104, "grad_norm": 2.478700645646149, "learning_rate": 3.2099694541591043e-06, "loss": 0.5875, "step": 7251 }, { "epoch": 0.03210412147505423, "grad_norm": 2.765510846834047, "learning_rate": 3.210412147505423e-06, "loss": 0.8408, "step": 7252 }, { "epoch": 0.03210854840851742, "grad_norm": 2.698058362897101, "learning_rate": 3.210854840851742e-06, "loss": 0.7104, "step": 7253 }, { "epoch": 0.03211297534198061, "grad_norm": 2.86139240858445, "learning_rate": 3.211297534198061e-06, "loss": 0.5151, "step": 7254 }, { "epoch": 0.0321174022754438, "grad_norm": 3.031855621713266, "learning_rate": 3.2117402275443805e-06, "loss": 0.9506, "step": 7255 }, { "epoch": 0.03212182920890699, "grad_norm": 2.9198792749350417, "learning_rate": 3.212182920890699e-06, "loss": 0.7336, "step": 7256 }, { "epoch": 0.03212625614237018, "grad_norm": 3.3594269876082232, "learning_rate": 3.2126256142370184e-06, "loss": 0.7491, "step": 7257 }, { "epoch": 0.03213068307583337, "grad_norm": 2.946280945171275, "learning_rate": 3.2130683075833373e-06, "loss": 0.8639, "step": 7258 }, { "epoch": 0.03213511000929656, "grad_norm": 2.692951164041204, "learning_rate": 3.2135110009296563e-06, "loss": 0.684, "step": 7259 }, { "epoch": 0.03213953694275975, "grad_norm": 4.039033709692443, "learning_rate": 3.213953694275975e-06, "loss": 1.0224, "step": 7260 }, { "epoch": 0.03214396387622294, "grad_norm": 3.7780348376923376, "learning_rate": 3.2143963876222946e-06, "loss": 1.0176, "step": 7261 }, { "epoch": 0.03214839080968613, "grad_norm": 3.3991207960002807, "learning_rate": 3.214839080968613e-06, "loss": 0.7915, "step": 7262 }, { "epoch": 0.03215281774314932, "grad_norm": 2.319430277326177, "learning_rate": 3.2152817743149325e-06, "loss": 0.5145, "step": 7263 }, { "epoch": 0.03215724467661251, "grad_norm": 3.114185690053852, "learning_rate": 3.2157244676612514e-06, "loss": 0.6504, "step": 7264 }, { "epoch": 0.0321616716100757, "grad_norm": 2.940932147062745, "learning_rate": 3.21616716100757e-06, "loss": 1.0746, "step": 7265 }, { "epoch": 0.03216609854353889, "grad_norm": 2.5158825770803084, "learning_rate": 3.2166098543538893e-06, "loss": 0.6436, "step": 7266 }, { "epoch": 0.03217052547700208, "grad_norm": 2.6215766607531488, "learning_rate": 3.2170525477002087e-06, "loss": 0.8188, "step": 7267 }, { "epoch": 0.03217495241046527, "grad_norm": 2.9184010844935506, "learning_rate": 3.217495241046527e-06, "loss": 0.7353, "step": 7268 }, { "epoch": 0.03217937934392846, "grad_norm": 2.6104312551265103, "learning_rate": 3.217937934392846e-06, "loss": 0.6066, "step": 7269 }, { "epoch": 0.032183806277391654, "grad_norm": 3.2584646691233035, "learning_rate": 3.2183806277391655e-06, "loss": 0.8824, "step": 7270 }, { "epoch": 0.032188233210854844, "grad_norm": 3.484599824584603, "learning_rate": 3.218823321085484e-06, "loss": 0.6813, "step": 7271 }, { "epoch": 0.03219266014431803, "grad_norm": 2.507161873026731, "learning_rate": 3.2192660144318034e-06, "loss": 0.8466, "step": 7272 }, { "epoch": 0.03219708707778122, "grad_norm": 3.4606198973861195, "learning_rate": 3.2197087077781224e-06, "loss": 1.0781, "step": 7273 }, { "epoch": 0.03220151401124441, "grad_norm": 2.2867299019905154, "learning_rate": 3.2201514011244413e-06, "loss": 0.4458, "step": 7274 }, { "epoch": 0.0322059409447076, "grad_norm": 2.878474111585088, "learning_rate": 3.2205940944707603e-06, "loss": 0.5436, "step": 7275 }, { "epoch": 0.03221036787817079, "grad_norm": 3.455751388476446, "learning_rate": 3.2210367878170796e-06, "loss": 1.2159, "step": 7276 }, { "epoch": 0.03221479481163398, "grad_norm": 3.694795612544349, "learning_rate": 3.221479481163398e-06, "loss": 1.0012, "step": 7277 }, { "epoch": 0.03221922174509717, "grad_norm": 2.9585262123741143, "learning_rate": 3.2219221745097175e-06, "loss": 1.0099, "step": 7278 }, { "epoch": 0.03222364867856036, "grad_norm": 2.9807170305650588, "learning_rate": 3.2223648678560365e-06, "loss": 0.911, "step": 7279 }, { "epoch": 0.03222807561202355, "grad_norm": 3.208439274960126, "learning_rate": 3.222807561202355e-06, "loss": 0.8454, "step": 7280 }, { "epoch": 0.03223250254548674, "grad_norm": 2.934332462263686, "learning_rate": 3.2232502545486744e-06, "loss": 0.6745, "step": 7281 }, { "epoch": 0.03223692947894993, "grad_norm": 3.4910913734697004, "learning_rate": 3.2236929478949937e-06, "loss": 1.0373, "step": 7282 }, { "epoch": 0.03224135641241312, "grad_norm": 2.94900248726974, "learning_rate": 3.2241356412413123e-06, "loss": 0.8963, "step": 7283 }, { "epoch": 0.032245783345876314, "grad_norm": 2.9002131397345727, "learning_rate": 3.224578334587631e-06, "loss": 0.7865, "step": 7284 }, { "epoch": 0.032250210279339504, "grad_norm": 2.791259125130142, "learning_rate": 3.2250210279339506e-06, "loss": 0.6041, "step": 7285 }, { "epoch": 0.032254637212802695, "grad_norm": 2.858419956120431, "learning_rate": 3.225463721280269e-06, "loss": 0.6801, "step": 7286 }, { "epoch": 0.03225906414626588, "grad_norm": 2.5111073407066717, "learning_rate": 3.2259064146265885e-06, "loss": 0.7107, "step": 7287 }, { "epoch": 0.03226349107972907, "grad_norm": 2.9186311414328, "learning_rate": 3.2263491079729074e-06, "loss": 0.7394, "step": 7288 }, { "epoch": 0.03226791801319226, "grad_norm": 2.6240716074116146, "learning_rate": 3.2267918013192264e-06, "loss": 0.6366, "step": 7289 }, { "epoch": 0.03227234494665545, "grad_norm": 2.2308926968239193, "learning_rate": 3.2272344946655453e-06, "loss": 0.4216, "step": 7290 }, { "epoch": 0.03227677188011864, "grad_norm": 3.283687971301174, "learning_rate": 3.2276771880118647e-06, "loss": 1.0368, "step": 7291 }, { "epoch": 0.03228119881358183, "grad_norm": 2.791786642822341, "learning_rate": 3.228119881358183e-06, "loss": 0.7215, "step": 7292 }, { "epoch": 0.03228562574704502, "grad_norm": 2.9023370264527704, "learning_rate": 3.2285625747045026e-06, "loss": 0.8524, "step": 7293 }, { "epoch": 0.03229005268050821, "grad_norm": 2.5947049476605253, "learning_rate": 3.2290052680508215e-06, "loss": 0.5879, "step": 7294 }, { "epoch": 0.0322944796139714, "grad_norm": 2.65484552743751, "learning_rate": 3.22944796139714e-06, "loss": 0.9039, "step": 7295 }, { "epoch": 0.03229890654743459, "grad_norm": 3.048708823297255, "learning_rate": 3.2298906547434594e-06, "loss": 0.9689, "step": 7296 }, { "epoch": 0.03230333348089778, "grad_norm": 2.426424057813468, "learning_rate": 3.2303333480897788e-06, "loss": 0.641, "step": 7297 }, { "epoch": 0.032307760414360974, "grad_norm": 2.4474201422792397, "learning_rate": 3.2307760414360973e-06, "loss": 0.5259, "step": 7298 }, { "epoch": 0.032312187347824164, "grad_norm": 2.6962981199542093, "learning_rate": 3.2312187347824163e-06, "loss": 0.7903, "step": 7299 }, { "epoch": 0.032316614281287355, "grad_norm": 2.3051278676154983, "learning_rate": 3.2316614281287356e-06, "loss": 0.5447, "step": 7300 }, { "epoch": 0.032321041214750545, "grad_norm": 2.7483216021087586, "learning_rate": 3.232104121475054e-06, "loss": 0.809, "step": 7301 }, { "epoch": 0.03232546814821373, "grad_norm": 2.4276868710425226, "learning_rate": 3.2325468148213735e-06, "loss": 0.5666, "step": 7302 }, { "epoch": 0.03232989508167692, "grad_norm": 2.905358550952525, "learning_rate": 3.2329895081676925e-06, "loss": 0.587, "step": 7303 }, { "epoch": 0.03233432201514011, "grad_norm": 2.7858162141204854, "learning_rate": 3.2334322015140114e-06, "loss": 0.5362, "step": 7304 }, { "epoch": 0.0323387489486033, "grad_norm": 2.997085394364713, "learning_rate": 3.2338748948603304e-06, "loss": 0.8382, "step": 7305 }, { "epoch": 0.03234317588206649, "grad_norm": 3.3163230556502286, "learning_rate": 3.2343175882066497e-06, "loss": 0.8205, "step": 7306 }, { "epoch": 0.03234760281552968, "grad_norm": 2.703291505315776, "learning_rate": 3.2347602815529683e-06, "loss": 0.6005, "step": 7307 }, { "epoch": 0.03235202974899287, "grad_norm": 2.7666469673181537, "learning_rate": 3.2352029748992876e-06, "loss": 0.6054, "step": 7308 }, { "epoch": 0.03235645668245606, "grad_norm": 2.895038974903476, "learning_rate": 3.2356456682456066e-06, "loss": 0.7165, "step": 7309 }, { "epoch": 0.03236088361591925, "grad_norm": 2.9218473384258403, "learning_rate": 3.236088361591925e-06, "loss": 0.9199, "step": 7310 }, { "epoch": 0.032365310549382444, "grad_norm": 4.033220067444336, "learning_rate": 3.2365310549382445e-06, "loss": 0.6016, "step": 7311 }, { "epoch": 0.032369737482845634, "grad_norm": 3.554882141254175, "learning_rate": 3.236973748284564e-06, "loss": 0.9318, "step": 7312 }, { "epoch": 0.032374164416308825, "grad_norm": 3.002749528364961, "learning_rate": 3.2374164416308824e-06, "loss": 0.7227, "step": 7313 }, { "epoch": 0.032378591349772015, "grad_norm": 3.2365661885483017, "learning_rate": 3.2378591349772013e-06, "loss": 0.8482, "step": 7314 }, { "epoch": 0.032383018283235206, "grad_norm": 3.1881031674541505, "learning_rate": 3.2383018283235207e-06, "loss": 0.9104, "step": 7315 }, { "epoch": 0.032387445216698396, "grad_norm": 2.863718404990159, "learning_rate": 3.238744521669839e-06, "loss": 0.6843, "step": 7316 }, { "epoch": 0.03239187215016158, "grad_norm": 2.622526469244982, "learning_rate": 3.2391872150161586e-06, "loss": 0.7062, "step": 7317 }, { "epoch": 0.03239629908362477, "grad_norm": 2.617947122203838, "learning_rate": 3.2396299083624775e-06, "loss": 0.9054, "step": 7318 }, { "epoch": 0.03240072601708796, "grad_norm": 2.6405124325706857, "learning_rate": 3.2400726017087965e-06, "loss": 0.613, "step": 7319 }, { "epoch": 0.03240515295055115, "grad_norm": 2.935330749267723, "learning_rate": 3.2405152950551154e-06, "loss": 0.8189, "step": 7320 }, { "epoch": 0.03240957988401434, "grad_norm": 2.555319209496792, "learning_rate": 3.2409579884014348e-06, "loss": 0.8987, "step": 7321 }, { "epoch": 0.03241400681747753, "grad_norm": 3.1434285170578877, "learning_rate": 3.2414006817477533e-06, "loss": 0.9334, "step": 7322 }, { "epoch": 0.03241843375094072, "grad_norm": 3.033781936566446, "learning_rate": 3.2418433750940727e-06, "loss": 0.8509, "step": 7323 }, { "epoch": 0.03242286068440391, "grad_norm": 2.992805173367896, "learning_rate": 3.2422860684403916e-06, "loss": 0.5674, "step": 7324 }, { "epoch": 0.032427287617867104, "grad_norm": 3.1771589283483377, "learning_rate": 3.2427287617867106e-06, "loss": 0.8265, "step": 7325 }, { "epoch": 0.032431714551330294, "grad_norm": 3.0935274254270495, "learning_rate": 3.2431714551330295e-06, "loss": 0.7834, "step": 7326 }, { "epoch": 0.032436141484793485, "grad_norm": 3.397767769560771, "learning_rate": 3.243614148479349e-06, "loss": 0.8986, "step": 7327 }, { "epoch": 0.032440568418256675, "grad_norm": 2.43377811487712, "learning_rate": 3.2440568418256674e-06, "loss": 0.6016, "step": 7328 }, { "epoch": 0.032444995351719866, "grad_norm": 3.085713940883155, "learning_rate": 3.2444995351719864e-06, "loss": 0.8192, "step": 7329 }, { "epoch": 0.032449422285183056, "grad_norm": 3.1568830228274667, "learning_rate": 3.2449422285183057e-06, "loss": 0.7413, "step": 7330 }, { "epoch": 0.03245384921864625, "grad_norm": 2.6713376255076104, "learning_rate": 3.2453849218646243e-06, "loss": 0.6882, "step": 7331 }, { "epoch": 0.03245827615210943, "grad_norm": 2.8532552164452243, "learning_rate": 3.2458276152109436e-06, "loss": 0.6767, "step": 7332 }, { "epoch": 0.03246270308557262, "grad_norm": 3.2970049060828757, "learning_rate": 3.2462703085572626e-06, "loss": 0.8232, "step": 7333 }, { "epoch": 0.03246713001903581, "grad_norm": 2.964093809006676, "learning_rate": 3.2467130019035815e-06, "loss": 0.8691, "step": 7334 }, { "epoch": 0.032471556952499, "grad_norm": 2.7945100830148846, "learning_rate": 3.2471556952499005e-06, "loss": 0.6567, "step": 7335 }, { "epoch": 0.03247598388596219, "grad_norm": 3.7173247992829226, "learning_rate": 3.24759838859622e-06, "loss": 0.8188, "step": 7336 }, { "epoch": 0.03248041081942538, "grad_norm": 3.1235890434342144, "learning_rate": 3.2480410819425384e-06, "loss": 0.9385, "step": 7337 }, { "epoch": 0.03248483775288857, "grad_norm": 3.8595863342580903, "learning_rate": 3.2484837752888577e-06, "loss": 0.8092, "step": 7338 }, { "epoch": 0.032489264686351764, "grad_norm": 2.6136965023153578, "learning_rate": 3.2489264686351767e-06, "loss": 0.7591, "step": 7339 }, { "epoch": 0.032493691619814954, "grad_norm": 2.712027095880442, "learning_rate": 3.2493691619814956e-06, "loss": 0.6438, "step": 7340 }, { "epoch": 0.032498118553278145, "grad_norm": 4.185104363020139, "learning_rate": 3.2498118553278146e-06, "loss": 1.3324, "step": 7341 }, { "epoch": 0.032502545486741335, "grad_norm": 3.191461189928025, "learning_rate": 3.250254548674134e-06, "loss": 0.9596, "step": 7342 }, { "epoch": 0.032506972420204526, "grad_norm": 3.5666156516790823, "learning_rate": 3.2506972420204525e-06, "loss": 0.705, "step": 7343 }, { "epoch": 0.032511399353667716, "grad_norm": 2.7662360697107977, "learning_rate": 3.251139935366772e-06, "loss": 0.6511, "step": 7344 }, { "epoch": 0.03251582628713091, "grad_norm": 2.734790695519478, "learning_rate": 3.2515826287130908e-06, "loss": 0.5583, "step": 7345 }, { "epoch": 0.0325202532205941, "grad_norm": 3.3225956083273913, "learning_rate": 3.2520253220594093e-06, "loss": 1.1307, "step": 7346 }, { "epoch": 0.03252468015405728, "grad_norm": 3.0137015111083967, "learning_rate": 3.2524680154057287e-06, "loss": 1.0284, "step": 7347 }, { "epoch": 0.03252910708752047, "grad_norm": 2.613042720796134, "learning_rate": 3.252910708752048e-06, "loss": 0.7237, "step": 7348 }, { "epoch": 0.03253353402098366, "grad_norm": 2.8468878747776643, "learning_rate": 3.2533534020983666e-06, "loss": 1.0182, "step": 7349 }, { "epoch": 0.03253796095444685, "grad_norm": 3.2336295891382414, "learning_rate": 3.2537960954446855e-06, "loss": 1.2265, "step": 7350 }, { "epoch": 0.03254238788791004, "grad_norm": 2.557190584072463, "learning_rate": 3.254238788791005e-06, "loss": 0.6139, "step": 7351 }, { "epoch": 0.032546814821373234, "grad_norm": 2.8494661539626795, "learning_rate": 3.2546814821373234e-06, "loss": 0.7355, "step": 7352 }, { "epoch": 0.032551241754836424, "grad_norm": 2.703066849296528, "learning_rate": 3.2551241754836428e-06, "loss": 0.9782, "step": 7353 }, { "epoch": 0.032555668688299615, "grad_norm": 4.599108552285836, "learning_rate": 3.2555668688299617e-06, "loss": 1.2794, "step": 7354 }, { "epoch": 0.032560095621762805, "grad_norm": 3.632910490976228, "learning_rate": 3.2560095621762807e-06, "loss": 1.0209, "step": 7355 }, { "epoch": 0.032564522555225996, "grad_norm": 4.186092293993414, "learning_rate": 3.2564522555225996e-06, "loss": 0.865, "step": 7356 }, { "epoch": 0.032568949488689186, "grad_norm": 2.5835762178701707, "learning_rate": 3.256894948868919e-06, "loss": 0.5921, "step": 7357 }, { "epoch": 0.03257337642215238, "grad_norm": 2.7710500030219, "learning_rate": 3.2573376422152375e-06, "loss": 0.9239, "step": 7358 }, { "epoch": 0.03257780335561557, "grad_norm": 2.6043971900398706, "learning_rate": 3.257780335561557e-06, "loss": 0.934, "step": 7359 }, { "epoch": 0.03258223028907876, "grad_norm": 2.349057497554511, "learning_rate": 3.258223028907876e-06, "loss": 0.562, "step": 7360 }, { "epoch": 0.03258665722254195, "grad_norm": 4.327632516786551, "learning_rate": 3.2586657222541944e-06, "loss": 1.3906, "step": 7361 }, { "epoch": 0.03259108415600514, "grad_norm": 2.598012889124882, "learning_rate": 3.2591084156005137e-06, "loss": 1.0075, "step": 7362 }, { "epoch": 0.03259551108946832, "grad_norm": 2.363843516339734, "learning_rate": 3.259551108946833e-06, "loss": 0.5909, "step": 7363 }, { "epoch": 0.03259993802293151, "grad_norm": 2.958817586472747, "learning_rate": 3.2599938022931516e-06, "loss": 0.7718, "step": 7364 }, { "epoch": 0.0326043649563947, "grad_norm": 2.688399710464045, "learning_rate": 3.2604364956394706e-06, "loss": 0.747, "step": 7365 }, { "epoch": 0.032608791889857894, "grad_norm": 2.459404262458047, "learning_rate": 3.26087918898579e-06, "loss": 0.6439, "step": 7366 }, { "epoch": 0.032613218823321084, "grad_norm": 3.075906688896797, "learning_rate": 3.2613218823321085e-06, "loss": 1.0259, "step": 7367 }, { "epoch": 0.032617645756784275, "grad_norm": 3.126322412198865, "learning_rate": 3.261764575678428e-06, "loss": 0.5567, "step": 7368 }, { "epoch": 0.032622072690247465, "grad_norm": 2.727424612134684, "learning_rate": 3.262207269024747e-06, "loss": 0.799, "step": 7369 }, { "epoch": 0.032626499623710656, "grad_norm": 2.89230058307284, "learning_rate": 3.2626499623710657e-06, "loss": 1.2625, "step": 7370 }, { "epoch": 0.032630926557173846, "grad_norm": 2.6530062214531984, "learning_rate": 3.2630926557173847e-06, "loss": 0.5751, "step": 7371 }, { "epoch": 0.03263535349063704, "grad_norm": 3.344036929485236, "learning_rate": 3.263535349063704e-06, "loss": 0.9245, "step": 7372 }, { "epoch": 0.03263978042410023, "grad_norm": 2.5044832767535214, "learning_rate": 3.2639780424100226e-06, "loss": 0.6036, "step": 7373 }, { "epoch": 0.03264420735756342, "grad_norm": 2.6468534120392198, "learning_rate": 3.264420735756342e-06, "loss": 0.9037, "step": 7374 }, { "epoch": 0.03264863429102661, "grad_norm": 2.939444126316492, "learning_rate": 3.264863429102661e-06, "loss": 0.8033, "step": 7375 }, { "epoch": 0.0326530612244898, "grad_norm": 2.986153357799402, "learning_rate": 3.2653061224489794e-06, "loss": 0.8923, "step": 7376 }, { "epoch": 0.03265748815795299, "grad_norm": 2.6804228194448045, "learning_rate": 3.265748815795299e-06, "loss": 0.668, "step": 7377 }, { "epoch": 0.03266191509141617, "grad_norm": 2.4198057729650824, "learning_rate": 3.266191509141618e-06, "loss": 0.608, "step": 7378 }, { "epoch": 0.03266634202487936, "grad_norm": 3.614194965147426, "learning_rate": 3.2666342024879367e-06, "loss": 1.0633, "step": 7379 }, { "epoch": 0.032670768958342554, "grad_norm": 3.3977676922128275, "learning_rate": 3.2670768958342556e-06, "loss": 1.0608, "step": 7380 }, { "epoch": 0.032675195891805744, "grad_norm": 2.933138320039958, "learning_rate": 3.267519589180575e-06, "loss": 0.9463, "step": 7381 }, { "epoch": 0.032679622825268935, "grad_norm": 3.1527567111726933, "learning_rate": 3.2679622825268935e-06, "loss": 0.8246, "step": 7382 }, { "epoch": 0.032684049758732125, "grad_norm": 3.363136335937424, "learning_rate": 3.268404975873213e-06, "loss": 0.8175, "step": 7383 }, { "epoch": 0.032688476692195316, "grad_norm": 3.1888354617931904, "learning_rate": 3.268847669219532e-06, "loss": 0.948, "step": 7384 }, { "epoch": 0.032692903625658506, "grad_norm": 3.110939419607474, "learning_rate": 3.269290362565851e-06, "loss": 0.6526, "step": 7385 }, { "epoch": 0.0326973305591217, "grad_norm": 2.681503338250871, "learning_rate": 3.2697330559121697e-06, "loss": 0.8956, "step": 7386 }, { "epoch": 0.03270175749258489, "grad_norm": 3.431628209510453, "learning_rate": 3.270175749258489e-06, "loss": 0.6695, "step": 7387 }, { "epoch": 0.03270618442604808, "grad_norm": 3.849478051807429, "learning_rate": 3.2706184426048076e-06, "loss": 1.1887, "step": 7388 }, { "epoch": 0.03271061135951127, "grad_norm": 3.029963115555009, "learning_rate": 3.271061135951127e-06, "loss": 0.929, "step": 7389 }, { "epoch": 0.03271503829297446, "grad_norm": 3.4847397016004567, "learning_rate": 3.271503829297446e-06, "loss": 1.0218, "step": 7390 }, { "epoch": 0.03271946522643765, "grad_norm": 2.8613834832865357, "learning_rate": 3.2719465226437645e-06, "loss": 0.69, "step": 7391 }, { "epoch": 0.03272389215990084, "grad_norm": 2.969512805333118, "learning_rate": 3.272389215990084e-06, "loss": 0.6389, "step": 7392 }, { "epoch": 0.032728319093364024, "grad_norm": 3.419609415163831, "learning_rate": 3.2728319093364032e-06, "loss": 0.8713, "step": 7393 }, { "epoch": 0.032732746026827214, "grad_norm": 3.2009890602439453, "learning_rate": 3.2732746026827217e-06, "loss": 1.0282, "step": 7394 }, { "epoch": 0.032737172960290405, "grad_norm": 3.208835758430072, "learning_rate": 3.2737172960290407e-06, "loss": 0.8202, "step": 7395 }, { "epoch": 0.032741599893753595, "grad_norm": 2.3975753996554596, "learning_rate": 3.27415998937536e-06, "loss": 0.6339, "step": 7396 }, { "epoch": 0.032746026827216786, "grad_norm": 2.6053545856473153, "learning_rate": 3.2746026827216786e-06, "loss": 0.7326, "step": 7397 }, { "epoch": 0.032750453760679976, "grad_norm": 5.779028529688619, "learning_rate": 3.275045376067998e-06, "loss": 0.9813, "step": 7398 }, { "epoch": 0.03275488069414317, "grad_norm": 2.9763953613004657, "learning_rate": 3.275488069414317e-06, "loss": 0.7369, "step": 7399 }, { "epoch": 0.03275930762760636, "grad_norm": 2.3668170370368857, "learning_rate": 3.275930762760636e-06, "loss": 0.6917, "step": 7400 }, { "epoch": 0.03276373456106955, "grad_norm": 2.7080393559043627, "learning_rate": 3.276373456106955e-06, "loss": 0.6522, "step": 7401 }, { "epoch": 0.03276816149453274, "grad_norm": 3.478069402257724, "learning_rate": 3.276816149453274e-06, "loss": 0.9008, "step": 7402 }, { "epoch": 0.03277258842799593, "grad_norm": 3.022548871495007, "learning_rate": 3.2772588427995927e-06, "loss": 1.0528, "step": 7403 }, { "epoch": 0.03277701536145912, "grad_norm": 2.5464963316099314, "learning_rate": 3.277701536145912e-06, "loss": 0.749, "step": 7404 }, { "epoch": 0.03278144229492231, "grad_norm": 3.261401055824147, "learning_rate": 3.278144229492231e-06, "loss": 0.9864, "step": 7405 }, { "epoch": 0.0327858692283855, "grad_norm": 3.3915876520633654, "learning_rate": 3.2785869228385495e-06, "loss": 0.9825, "step": 7406 }, { "epoch": 0.03279029616184869, "grad_norm": 3.0835732413554395, "learning_rate": 3.279029616184869e-06, "loss": 1.0158, "step": 7407 }, { "epoch": 0.032794723095311874, "grad_norm": 4.607519542038251, "learning_rate": 3.2794723095311883e-06, "loss": 1.0494, "step": 7408 }, { "epoch": 0.032799150028775065, "grad_norm": 2.900015108637236, "learning_rate": 3.279915002877507e-06, "loss": 0.7786, "step": 7409 }, { "epoch": 0.032803576962238255, "grad_norm": 2.6148537016631015, "learning_rate": 3.2803576962238257e-06, "loss": 0.6742, "step": 7410 }, { "epoch": 0.032808003895701446, "grad_norm": 2.5511994903908026, "learning_rate": 3.280800389570145e-06, "loss": 0.6807, "step": 7411 }, { "epoch": 0.032812430829164636, "grad_norm": 2.9022308612655636, "learning_rate": 3.2812430829164636e-06, "loss": 0.7499, "step": 7412 }, { "epoch": 0.03281685776262783, "grad_norm": 2.4863139900612197, "learning_rate": 3.281685776262783e-06, "loss": 0.7465, "step": 7413 }, { "epoch": 0.03282128469609102, "grad_norm": 2.5215002031657168, "learning_rate": 3.282128469609102e-06, "loss": 0.5696, "step": 7414 }, { "epoch": 0.03282571162955421, "grad_norm": 2.8509548700913365, "learning_rate": 3.282571162955421e-06, "loss": 0.4581, "step": 7415 }, { "epoch": 0.0328301385630174, "grad_norm": 2.6745251486133617, "learning_rate": 3.28301385630174e-06, "loss": 0.9195, "step": 7416 }, { "epoch": 0.03283456549648059, "grad_norm": 2.5644347946601616, "learning_rate": 3.2834565496480592e-06, "loss": 0.5089, "step": 7417 }, { "epoch": 0.03283899242994378, "grad_norm": 2.775151456613913, "learning_rate": 3.2838992429943777e-06, "loss": 0.8542, "step": 7418 }, { "epoch": 0.03284341936340697, "grad_norm": 2.895225632780382, "learning_rate": 3.284341936340697e-06, "loss": 0.7446, "step": 7419 }, { "epoch": 0.03284784629687016, "grad_norm": 3.3102578772612503, "learning_rate": 3.284784629687016e-06, "loss": 0.7241, "step": 7420 }, { "epoch": 0.03285227323033335, "grad_norm": 3.1844779845696674, "learning_rate": 3.285227323033335e-06, "loss": 0.7919, "step": 7421 }, { "epoch": 0.03285670016379654, "grad_norm": 2.7286502139278777, "learning_rate": 3.285670016379654e-06, "loss": 0.5585, "step": 7422 }, { "epoch": 0.032861127097259725, "grad_norm": 3.4701941882289793, "learning_rate": 3.2861127097259733e-06, "loss": 0.6566, "step": 7423 }, { "epoch": 0.032865554030722915, "grad_norm": 3.479583353820735, "learning_rate": 3.286555403072292e-06, "loss": 0.782, "step": 7424 }, { "epoch": 0.032869980964186106, "grad_norm": 2.7049157043616994, "learning_rate": 3.2869980964186112e-06, "loss": 0.5203, "step": 7425 }, { "epoch": 0.032874407897649296, "grad_norm": 3.324298202076682, "learning_rate": 3.28744078976493e-06, "loss": 0.9158, "step": 7426 }, { "epoch": 0.03287883483111249, "grad_norm": 2.7008242671933713, "learning_rate": 3.2878834831112487e-06, "loss": 0.7421, "step": 7427 }, { "epoch": 0.03288326176457568, "grad_norm": 3.0543984312679053, "learning_rate": 3.288326176457568e-06, "loss": 1.1096, "step": 7428 }, { "epoch": 0.03288768869803887, "grad_norm": 2.8150925892772296, "learning_rate": 3.288768869803887e-06, "loss": 0.8624, "step": 7429 }, { "epoch": 0.03289211563150206, "grad_norm": 2.773456880390488, "learning_rate": 3.289211563150206e-06, "loss": 0.7178, "step": 7430 }, { "epoch": 0.03289654256496525, "grad_norm": 3.0876729174427022, "learning_rate": 3.289654256496525e-06, "loss": 0.8516, "step": 7431 }, { "epoch": 0.03290096949842844, "grad_norm": 3.2741741011514276, "learning_rate": 3.2900969498428443e-06, "loss": 0.543, "step": 7432 }, { "epoch": 0.03290539643189163, "grad_norm": 3.5942062710261222, "learning_rate": 3.2905396431891632e-06, "loss": 0.6521, "step": 7433 }, { "epoch": 0.03290982336535482, "grad_norm": 2.597205163080885, "learning_rate": 3.290982336535482e-06, "loss": 0.6769, "step": 7434 }, { "epoch": 0.03291425029881801, "grad_norm": 3.1288699319636515, "learning_rate": 3.291425029881801e-06, "loss": 0.862, "step": 7435 }, { "epoch": 0.0329186772322812, "grad_norm": 2.864827579979774, "learning_rate": 3.2918677232281205e-06, "loss": 1.0112, "step": 7436 }, { "epoch": 0.03292310416574439, "grad_norm": 3.7401129062569995, "learning_rate": 3.292310416574439e-06, "loss": 1.0359, "step": 7437 }, { "epoch": 0.032927531099207576, "grad_norm": 2.6322954089510264, "learning_rate": 3.2927531099207584e-06, "loss": 0.7403, "step": 7438 }, { "epoch": 0.032931958032670766, "grad_norm": 2.5496894258359846, "learning_rate": 3.2931958032670773e-06, "loss": 0.5153, "step": 7439 }, { "epoch": 0.03293638496613396, "grad_norm": 3.2574648100935404, "learning_rate": 3.2936384966133963e-06, "loss": 0.5881, "step": 7440 }, { "epoch": 0.03294081189959715, "grad_norm": 4.138608809529056, "learning_rate": 3.2940811899597152e-06, "loss": 1.0431, "step": 7441 }, { "epoch": 0.03294523883306034, "grad_norm": 3.9025540215396366, "learning_rate": 3.2945238833060346e-06, "loss": 1.137, "step": 7442 }, { "epoch": 0.03294966576652353, "grad_norm": 2.9779850165252344, "learning_rate": 3.294966576652353e-06, "loss": 0.8883, "step": 7443 }, { "epoch": 0.03295409269998672, "grad_norm": 3.581169695986898, "learning_rate": 3.2954092699986725e-06, "loss": 1.313, "step": 7444 }, { "epoch": 0.03295851963344991, "grad_norm": 2.8289874451006405, "learning_rate": 3.2958519633449914e-06, "loss": 0.65, "step": 7445 }, { "epoch": 0.0329629465669131, "grad_norm": 2.8784260664430157, "learning_rate": 3.29629465669131e-06, "loss": 0.843, "step": 7446 }, { "epoch": 0.03296737350037629, "grad_norm": 3.142458360990308, "learning_rate": 3.2967373500376293e-06, "loss": 0.7093, "step": 7447 }, { "epoch": 0.03297180043383948, "grad_norm": 4.093241387545982, "learning_rate": 3.2971800433839487e-06, "loss": 0.8284, "step": 7448 }, { "epoch": 0.03297622736730267, "grad_norm": 3.371809912695621, "learning_rate": 3.2976227367302672e-06, "loss": 0.6377, "step": 7449 }, { "epoch": 0.03298065430076586, "grad_norm": 3.175307624362525, "learning_rate": 3.298065430076586e-06, "loss": 1.0607, "step": 7450 }, { "epoch": 0.03298508123422905, "grad_norm": 3.9043175725698025, "learning_rate": 3.2985081234229055e-06, "loss": 1.1395, "step": 7451 }, { "epoch": 0.03298950816769224, "grad_norm": 3.190117572064603, "learning_rate": 3.298950816769224e-06, "loss": 0.6907, "step": 7452 }, { "epoch": 0.032993935101155426, "grad_norm": 3.5941184171619036, "learning_rate": 3.2993935101155434e-06, "loss": 0.92, "step": 7453 }, { "epoch": 0.03299836203461862, "grad_norm": 2.5362781309917257, "learning_rate": 3.2998362034618624e-06, "loss": 0.6621, "step": 7454 }, { "epoch": 0.03300278896808181, "grad_norm": 3.1297514603905854, "learning_rate": 3.3002788968081813e-06, "loss": 0.8006, "step": 7455 }, { "epoch": 0.033007215901545, "grad_norm": 3.1498364829317675, "learning_rate": 3.3007215901545003e-06, "loss": 0.6296, "step": 7456 }, { "epoch": 0.03301164283500819, "grad_norm": 2.9280851852160286, "learning_rate": 3.3011642835008196e-06, "loss": 0.6606, "step": 7457 }, { "epoch": 0.03301606976847138, "grad_norm": 2.8775020140361462, "learning_rate": 3.301606976847138e-06, "loss": 0.5685, "step": 7458 }, { "epoch": 0.03302049670193457, "grad_norm": 3.65758237370827, "learning_rate": 3.3020496701934575e-06, "loss": 1.2557, "step": 7459 }, { "epoch": 0.03302492363539776, "grad_norm": 3.1313773806897984, "learning_rate": 3.3024923635397765e-06, "loss": 1.022, "step": 7460 }, { "epoch": 0.03302935056886095, "grad_norm": 2.41914334120346, "learning_rate": 3.302935056886095e-06, "loss": 0.6393, "step": 7461 }, { "epoch": 0.03303377750232414, "grad_norm": 3.9741256118527697, "learning_rate": 3.3033777502324144e-06, "loss": 0.6913, "step": 7462 }, { "epoch": 0.03303820443578733, "grad_norm": 2.554106232607378, "learning_rate": 3.3038204435787337e-06, "loss": 0.549, "step": 7463 }, { "epoch": 0.03304263136925052, "grad_norm": 2.7910405248847523, "learning_rate": 3.3042631369250523e-06, "loss": 0.5955, "step": 7464 }, { "epoch": 0.03304705830271371, "grad_norm": 2.7575093830419175, "learning_rate": 3.3047058302713712e-06, "loss": 0.6754, "step": 7465 }, { "epoch": 0.0330514852361769, "grad_norm": 2.702743989277386, "learning_rate": 3.3051485236176906e-06, "loss": 0.9023, "step": 7466 }, { "epoch": 0.03305591216964009, "grad_norm": 3.311757564416989, "learning_rate": 3.305591216964009e-06, "loss": 1.0984, "step": 7467 }, { "epoch": 0.03306033910310328, "grad_norm": 2.6640468538967372, "learning_rate": 3.3060339103103285e-06, "loss": 0.6241, "step": 7468 }, { "epoch": 0.03306476603656647, "grad_norm": 3.265881283386424, "learning_rate": 3.3064766036566474e-06, "loss": 0.7082, "step": 7469 }, { "epoch": 0.03306919297002966, "grad_norm": 3.892363136347343, "learning_rate": 3.3069192970029664e-06, "loss": 1.0225, "step": 7470 }, { "epoch": 0.03307361990349285, "grad_norm": 2.4816444695019255, "learning_rate": 3.3073619903492853e-06, "loss": 0.7414, "step": 7471 }, { "epoch": 0.03307804683695604, "grad_norm": 3.51678972253506, "learning_rate": 3.3078046836956047e-06, "loss": 0.9881, "step": 7472 }, { "epoch": 0.03308247377041923, "grad_norm": 2.92435065783626, "learning_rate": 3.3082473770419232e-06, "loss": 0.8442, "step": 7473 }, { "epoch": 0.03308690070388242, "grad_norm": 2.656067271451401, "learning_rate": 3.3086900703882426e-06, "loss": 0.7842, "step": 7474 }, { "epoch": 0.03309132763734561, "grad_norm": 3.8029757125747543, "learning_rate": 3.3091327637345615e-06, "loss": 1.1301, "step": 7475 }, { "epoch": 0.0330957545708088, "grad_norm": 3.4627572949197227, "learning_rate": 3.30957545708088e-06, "loss": 0.7931, "step": 7476 }, { "epoch": 0.03310018150427199, "grad_norm": 3.2902567188952827, "learning_rate": 3.3100181504271994e-06, "loss": 1.0926, "step": 7477 }, { "epoch": 0.03310460843773518, "grad_norm": 2.9766590068245833, "learning_rate": 3.310460843773519e-06, "loss": 0.8202, "step": 7478 }, { "epoch": 0.03310903537119837, "grad_norm": 2.9094039945380614, "learning_rate": 3.3109035371198373e-06, "loss": 0.7329, "step": 7479 }, { "epoch": 0.03311346230466156, "grad_norm": 2.602952191797951, "learning_rate": 3.3113462304661563e-06, "loss": 0.8688, "step": 7480 }, { "epoch": 0.033117889238124754, "grad_norm": 3.32805097772252, "learning_rate": 3.3117889238124756e-06, "loss": 1.1432, "step": 7481 }, { "epoch": 0.033122316171587944, "grad_norm": 2.5958954546951207, "learning_rate": 3.312231617158794e-06, "loss": 0.7873, "step": 7482 }, { "epoch": 0.03312674310505113, "grad_norm": 2.568037223830173, "learning_rate": 3.3126743105051135e-06, "loss": 0.545, "step": 7483 }, { "epoch": 0.03313117003851432, "grad_norm": 2.7733041283071627, "learning_rate": 3.3131170038514325e-06, "loss": 0.8189, "step": 7484 }, { "epoch": 0.03313559697197751, "grad_norm": 2.700052858795912, "learning_rate": 3.3135596971977514e-06, "loss": 0.8323, "step": 7485 }, { "epoch": 0.0331400239054407, "grad_norm": 2.951857540607713, "learning_rate": 3.3140023905440704e-06, "loss": 0.9802, "step": 7486 }, { "epoch": 0.03314445083890389, "grad_norm": 2.2644587090544, "learning_rate": 3.3144450838903897e-06, "loss": 0.5594, "step": 7487 }, { "epoch": 0.03314887777236708, "grad_norm": 3.5317286808433805, "learning_rate": 3.3148877772367083e-06, "loss": 0.8433, "step": 7488 }, { "epoch": 0.03315330470583027, "grad_norm": 3.2962797621379827, "learning_rate": 3.3153304705830276e-06, "loss": 0.8633, "step": 7489 }, { "epoch": 0.03315773163929346, "grad_norm": 3.1656623315032055, "learning_rate": 3.3157731639293466e-06, "loss": 0.7324, "step": 7490 }, { "epoch": 0.03316215857275665, "grad_norm": 2.8541441612020293, "learning_rate": 3.316215857275665e-06, "loss": 0.8219, "step": 7491 }, { "epoch": 0.03316658550621984, "grad_norm": 2.898705257688449, "learning_rate": 3.3166585506219845e-06, "loss": 0.7854, "step": 7492 }, { "epoch": 0.03317101243968303, "grad_norm": 2.997558247465425, "learning_rate": 3.317101243968304e-06, "loss": 0.8828, "step": 7493 }, { "epoch": 0.03317543937314622, "grad_norm": 3.0159399103596716, "learning_rate": 3.3175439373146224e-06, "loss": 0.7833, "step": 7494 }, { "epoch": 0.033179866306609414, "grad_norm": 2.719085944719788, "learning_rate": 3.3179866306609413e-06, "loss": 0.4979, "step": 7495 }, { "epoch": 0.033184293240072604, "grad_norm": 2.9185618231138113, "learning_rate": 3.3184293240072607e-06, "loss": 0.8334, "step": 7496 }, { "epoch": 0.033188720173535795, "grad_norm": 3.1718283870970536, "learning_rate": 3.3188720173535792e-06, "loss": 0.8506, "step": 7497 }, { "epoch": 0.03319314710699898, "grad_norm": 2.645465188914284, "learning_rate": 3.3193147106998986e-06, "loss": 0.7002, "step": 7498 }, { "epoch": 0.03319757404046217, "grad_norm": 3.300605671183836, "learning_rate": 3.3197574040462175e-06, "loss": 0.882, "step": 7499 }, { "epoch": 0.03320200097392536, "grad_norm": 3.0300658998084384, "learning_rate": 3.3202000973925365e-06, "loss": 0.9896, "step": 7500 }, { "epoch": 0.03320642790738855, "grad_norm": 3.1805104109157063, "learning_rate": 3.3206427907388554e-06, "loss": 1.0412, "step": 7501 }, { "epoch": 0.03321085484085174, "grad_norm": 2.4360644181927267, "learning_rate": 3.321085484085175e-06, "loss": 0.7985, "step": 7502 }, { "epoch": 0.03321528177431493, "grad_norm": 2.993182508291201, "learning_rate": 3.3215281774314933e-06, "loss": 0.6849, "step": 7503 }, { "epoch": 0.03321970870777812, "grad_norm": 2.7659428553875123, "learning_rate": 3.3219708707778127e-06, "loss": 0.9082, "step": 7504 }, { "epoch": 0.03322413564124131, "grad_norm": 3.2246767670635292, "learning_rate": 3.3224135641241316e-06, "loss": 0.6968, "step": 7505 }, { "epoch": 0.0332285625747045, "grad_norm": 2.5879512344945796, "learning_rate": 3.32285625747045e-06, "loss": 0.6183, "step": 7506 }, { "epoch": 0.03323298950816769, "grad_norm": 2.8878475961456602, "learning_rate": 3.3232989508167695e-06, "loss": 0.9209, "step": 7507 }, { "epoch": 0.03323741644163088, "grad_norm": 2.9706551960987952, "learning_rate": 3.323741644163089e-06, "loss": 1.0354, "step": 7508 }, { "epoch": 0.033241843375094074, "grad_norm": 2.6656536019841695, "learning_rate": 3.3241843375094074e-06, "loss": 0.5425, "step": 7509 }, { "epoch": 0.033246270308557264, "grad_norm": 2.644790921611629, "learning_rate": 3.3246270308557264e-06, "loss": 0.8492, "step": 7510 }, { "epoch": 0.033250697242020455, "grad_norm": 3.1025646897793795, "learning_rate": 3.3250697242020457e-06, "loss": 0.8084, "step": 7511 }, { "epoch": 0.033255124175483645, "grad_norm": 2.927700848014211, "learning_rate": 3.3255124175483643e-06, "loss": 0.7165, "step": 7512 }, { "epoch": 0.033259551108946836, "grad_norm": 2.7510213125136596, "learning_rate": 3.3259551108946836e-06, "loss": 0.7456, "step": 7513 }, { "epoch": 0.03326397804241002, "grad_norm": 2.4917702267771062, "learning_rate": 3.3263978042410026e-06, "loss": 0.7563, "step": 7514 }, { "epoch": 0.03326840497587321, "grad_norm": 2.432641191919122, "learning_rate": 3.3268404975873215e-06, "loss": 0.4373, "step": 7515 }, { "epoch": 0.0332728319093364, "grad_norm": 3.209152040281748, "learning_rate": 3.3272831909336405e-06, "loss": 0.8263, "step": 7516 }, { "epoch": 0.03327725884279959, "grad_norm": 3.190211646812546, "learning_rate": 3.32772588427996e-06, "loss": 0.5141, "step": 7517 }, { "epoch": 0.03328168577626278, "grad_norm": 2.5672749992884705, "learning_rate": 3.3281685776262784e-06, "loss": 0.727, "step": 7518 }, { "epoch": 0.03328611270972597, "grad_norm": 2.6806476180900307, "learning_rate": 3.3286112709725977e-06, "loss": 0.5655, "step": 7519 }, { "epoch": 0.03329053964318916, "grad_norm": 2.474000963182326, "learning_rate": 3.3290539643189167e-06, "loss": 0.7165, "step": 7520 }, { "epoch": 0.03329496657665235, "grad_norm": 3.0480236733360595, "learning_rate": 3.3294966576652356e-06, "loss": 0.8661, "step": 7521 }, { "epoch": 0.033299393510115544, "grad_norm": 2.4405151263242986, "learning_rate": 3.3299393510115546e-06, "loss": 0.6847, "step": 7522 }, { "epoch": 0.033303820443578734, "grad_norm": 2.8774187342612194, "learning_rate": 3.330382044357874e-06, "loss": 0.9942, "step": 7523 }, { "epoch": 0.033308247377041925, "grad_norm": 2.6340284571711927, "learning_rate": 3.3308247377041925e-06, "loss": 0.4396, "step": 7524 }, { "epoch": 0.033312674310505115, "grad_norm": 2.906052388981122, "learning_rate": 3.331267431050512e-06, "loss": 0.7237, "step": 7525 }, { "epoch": 0.033317101243968306, "grad_norm": 2.473224404444725, "learning_rate": 3.331710124396831e-06, "loss": 0.638, "step": 7526 }, { "epoch": 0.033321528177431496, "grad_norm": 2.7802093812020785, "learning_rate": 3.3321528177431493e-06, "loss": 0.746, "step": 7527 }, { "epoch": 0.03332595511089469, "grad_norm": 2.822777570761156, "learning_rate": 3.3325955110894687e-06, "loss": 1.0556, "step": 7528 }, { "epoch": 0.03333038204435787, "grad_norm": 2.4102622259869992, "learning_rate": 3.3330382044357876e-06, "loss": 0.7144, "step": 7529 }, { "epoch": 0.03333480897782106, "grad_norm": 2.996456418938122, "learning_rate": 3.3334808977821066e-06, "loss": 0.5771, "step": 7530 }, { "epoch": 0.03333923591128425, "grad_norm": 2.7859133184131335, "learning_rate": 3.3339235911284255e-06, "loss": 0.869, "step": 7531 }, { "epoch": 0.03334366284474744, "grad_norm": 3.861150631577692, "learning_rate": 3.334366284474745e-06, "loss": 0.8842, "step": 7532 }, { "epoch": 0.03334808977821063, "grad_norm": 3.030397819705786, "learning_rate": 3.3348089778210634e-06, "loss": 0.8639, "step": 7533 }, { "epoch": 0.03335251671167382, "grad_norm": 2.663761502798265, "learning_rate": 3.335251671167383e-06, "loss": 0.6438, "step": 7534 }, { "epoch": 0.03335694364513701, "grad_norm": 2.7893050943195337, "learning_rate": 3.3356943645137017e-06, "loss": 0.7187, "step": 7535 }, { "epoch": 0.033361370578600204, "grad_norm": 2.991519453359209, "learning_rate": 3.3361370578600207e-06, "loss": 0.6246, "step": 7536 }, { "epoch": 0.033365797512063394, "grad_norm": 3.5548254528153427, "learning_rate": 3.3365797512063396e-06, "loss": 0.9535, "step": 7537 }, { "epoch": 0.033370224445526585, "grad_norm": 2.202571085346143, "learning_rate": 3.337022444552659e-06, "loss": 0.4031, "step": 7538 }, { "epoch": 0.033374651378989775, "grad_norm": 2.467437923088351, "learning_rate": 3.3374651378989775e-06, "loss": 0.8477, "step": 7539 }, { "epoch": 0.033379078312452966, "grad_norm": 2.660441304139199, "learning_rate": 3.337907831245297e-06, "loss": 0.8283, "step": 7540 }, { "epoch": 0.033383505245916156, "grad_norm": 3.7105693097896815, "learning_rate": 3.338350524591616e-06, "loss": 0.8426, "step": 7541 }, { "epoch": 0.03338793217937935, "grad_norm": 2.716911260154036, "learning_rate": 3.3387932179379344e-06, "loss": 0.8347, "step": 7542 }, { "epoch": 0.03339235911284254, "grad_norm": 2.802973939176283, "learning_rate": 3.3392359112842537e-06, "loss": 0.9157, "step": 7543 }, { "epoch": 0.03339678604630572, "grad_norm": 2.4774197124495925, "learning_rate": 3.339678604630573e-06, "loss": 0.6022, "step": 7544 }, { "epoch": 0.03340121297976891, "grad_norm": 3.5159681596081556, "learning_rate": 3.3401212979768916e-06, "loss": 0.8356, "step": 7545 }, { "epoch": 0.0334056399132321, "grad_norm": 3.210418703382984, "learning_rate": 3.3405639913232106e-06, "loss": 0.968, "step": 7546 }, { "epoch": 0.03341006684669529, "grad_norm": 3.1466736901335186, "learning_rate": 3.34100668466953e-06, "loss": 0.9348, "step": 7547 }, { "epoch": 0.03341449378015848, "grad_norm": 3.7283632289709576, "learning_rate": 3.3414493780158485e-06, "loss": 0.726, "step": 7548 }, { "epoch": 0.03341892071362167, "grad_norm": 2.8297694602386776, "learning_rate": 3.341892071362168e-06, "loss": 0.9801, "step": 7549 }, { "epoch": 0.033423347647084864, "grad_norm": 3.0196081933074277, "learning_rate": 3.342334764708487e-06, "loss": 0.7532, "step": 7550 }, { "epoch": 0.033427774580548054, "grad_norm": 2.799739062047394, "learning_rate": 3.3427774580548057e-06, "loss": 0.7667, "step": 7551 }, { "epoch": 0.033432201514011245, "grad_norm": 2.387731846761829, "learning_rate": 3.3432201514011247e-06, "loss": 0.7064, "step": 7552 }, { "epoch": 0.033436628447474435, "grad_norm": 3.3160536063327304, "learning_rate": 3.343662844747444e-06, "loss": 0.826, "step": 7553 }, { "epoch": 0.033441055380937626, "grad_norm": 2.6763519290914313, "learning_rate": 3.3441055380937626e-06, "loss": 0.9201, "step": 7554 }, { "epoch": 0.033445482314400816, "grad_norm": 3.097400709837505, "learning_rate": 3.344548231440082e-06, "loss": 0.7123, "step": 7555 }, { "epoch": 0.03344990924786401, "grad_norm": 2.6340936352745334, "learning_rate": 3.344990924786401e-06, "loss": 0.7562, "step": 7556 }, { "epoch": 0.0334543361813272, "grad_norm": 2.99852825395952, "learning_rate": 3.3454336181327194e-06, "loss": 0.879, "step": 7557 }, { "epoch": 0.03345876311479039, "grad_norm": 3.3970793443482377, "learning_rate": 3.345876311479039e-06, "loss": 0.8213, "step": 7558 }, { "epoch": 0.03346319004825357, "grad_norm": 3.378952504097538, "learning_rate": 3.346319004825358e-06, "loss": 1.0594, "step": 7559 }, { "epoch": 0.03346761698171676, "grad_norm": 2.5025769795331616, "learning_rate": 3.3467616981716767e-06, "loss": 0.7619, "step": 7560 }, { "epoch": 0.03347204391517995, "grad_norm": 2.8318773385063514, "learning_rate": 3.3472043915179956e-06, "loss": 0.749, "step": 7561 }, { "epoch": 0.03347647084864314, "grad_norm": 3.051824940437036, "learning_rate": 3.347647084864315e-06, "loss": 0.9026, "step": 7562 }, { "epoch": 0.033480897782106334, "grad_norm": 2.8916957847084674, "learning_rate": 3.3480897782106335e-06, "loss": 0.5567, "step": 7563 }, { "epoch": 0.033485324715569524, "grad_norm": 2.7073501007226333, "learning_rate": 3.348532471556953e-06, "loss": 0.8402, "step": 7564 }, { "epoch": 0.033489751649032715, "grad_norm": 2.568390840050431, "learning_rate": 3.348975164903272e-06, "loss": 0.7886, "step": 7565 }, { "epoch": 0.033494178582495905, "grad_norm": 2.4707292237406313, "learning_rate": 3.349417858249591e-06, "loss": 0.6923, "step": 7566 }, { "epoch": 0.033498605515959096, "grad_norm": 2.551949918396836, "learning_rate": 3.3498605515959097e-06, "loss": 0.705, "step": 7567 }, { "epoch": 0.033503032449422286, "grad_norm": 3.9344934049169704, "learning_rate": 3.350303244942229e-06, "loss": 0.9829, "step": 7568 }, { "epoch": 0.03350745938288548, "grad_norm": 2.4776332559310226, "learning_rate": 3.3507459382885476e-06, "loss": 0.5713, "step": 7569 }, { "epoch": 0.03351188631634867, "grad_norm": 4.230237895313688, "learning_rate": 3.351188631634867e-06, "loss": 1.3389, "step": 7570 }, { "epoch": 0.03351631324981186, "grad_norm": 2.643565342969115, "learning_rate": 3.351631324981186e-06, "loss": 0.7389, "step": 7571 }, { "epoch": 0.03352074018327505, "grad_norm": 4.372402662297213, "learning_rate": 3.3520740183275045e-06, "loss": 1.1798, "step": 7572 }, { "epoch": 0.03352516711673824, "grad_norm": 2.8128544836733846, "learning_rate": 3.352516711673824e-06, "loss": 0.6562, "step": 7573 }, { "epoch": 0.03352959405020142, "grad_norm": 3.2387992468937954, "learning_rate": 3.3529594050201432e-06, "loss": 0.9303, "step": 7574 }, { "epoch": 0.03353402098366461, "grad_norm": 2.658945615404773, "learning_rate": 3.3534020983664617e-06, "loss": 0.6508, "step": 7575 }, { "epoch": 0.0335384479171278, "grad_norm": 3.250280920596108, "learning_rate": 3.3538447917127807e-06, "loss": 1.1032, "step": 7576 }, { "epoch": 0.033542874850590994, "grad_norm": 2.360273141742391, "learning_rate": 3.3542874850591e-06, "loss": 0.6076, "step": 7577 }, { "epoch": 0.033547301784054184, "grad_norm": 3.0182776226080965, "learning_rate": 3.3547301784054186e-06, "loss": 0.7826, "step": 7578 }, { "epoch": 0.033551728717517375, "grad_norm": 3.0483115042861892, "learning_rate": 3.355172871751738e-06, "loss": 0.8383, "step": 7579 }, { "epoch": 0.033556155650980565, "grad_norm": 2.6339752643074617, "learning_rate": 3.355615565098057e-06, "loss": 0.6443, "step": 7580 }, { "epoch": 0.033560582584443756, "grad_norm": 3.2770340735726364, "learning_rate": 3.356058258444376e-06, "loss": 0.7891, "step": 7581 }, { "epoch": 0.033565009517906946, "grad_norm": 2.7869408694107203, "learning_rate": 3.356500951790695e-06, "loss": 0.8589, "step": 7582 }, { "epoch": 0.03356943645137014, "grad_norm": 3.2607255187940267, "learning_rate": 3.356943645137014e-06, "loss": 0.6615, "step": 7583 }, { "epoch": 0.03357386338483333, "grad_norm": 2.362853688913638, "learning_rate": 3.3573863384833327e-06, "loss": 0.5378, "step": 7584 }, { "epoch": 0.03357829031829652, "grad_norm": 2.748755423445054, "learning_rate": 3.357829031829652e-06, "loss": 0.6254, "step": 7585 }, { "epoch": 0.03358271725175971, "grad_norm": 3.111416617166759, "learning_rate": 3.358271725175971e-06, "loss": 0.704, "step": 7586 }, { "epoch": 0.0335871441852229, "grad_norm": 3.0635496225766126, "learning_rate": 3.3587144185222895e-06, "loss": 0.8166, "step": 7587 }, { "epoch": 0.03359157111868609, "grad_norm": 2.647462255396281, "learning_rate": 3.359157111868609e-06, "loss": 0.7748, "step": 7588 }, { "epoch": 0.03359599805214927, "grad_norm": 3.3588802406986478, "learning_rate": 3.3595998052149283e-06, "loss": 0.6308, "step": 7589 }, { "epoch": 0.03360042498561246, "grad_norm": 3.0700088292925147, "learning_rate": 3.360042498561247e-06, "loss": 0.8957, "step": 7590 }, { "epoch": 0.033604851919075654, "grad_norm": 3.1158638151930353, "learning_rate": 3.3604851919075657e-06, "loss": 0.9001, "step": 7591 }, { "epoch": 0.033609278852538844, "grad_norm": 3.07338920291011, "learning_rate": 3.360927885253885e-06, "loss": 1.0515, "step": 7592 }, { "epoch": 0.033613705786002035, "grad_norm": 3.0990889285693326, "learning_rate": 3.3613705786002036e-06, "loss": 0.8553, "step": 7593 }, { "epoch": 0.033618132719465225, "grad_norm": 3.087415067434533, "learning_rate": 3.361813271946523e-06, "loss": 0.8079, "step": 7594 }, { "epoch": 0.033622559652928416, "grad_norm": 2.529647949933162, "learning_rate": 3.362255965292842e-06, "loss": 0.5296, "step": 7595 }, { "epoch": 0.033626986586391606, "grad_norm": 2.8818996475268195, "learning_rate": 3.362698658639161e-06, "loss": 0.6488, "step": 7596 }, { "epoch": 0.0336314135198548, "grad_norm": 2.515953148856958, "learning_rate": 3.36314135198548e-06, "loss": 0.6191, "step": 7597 }, { "epoch": 0.03363584045331799, "grad_norm": 2.996057396267459, "learning_rate": 3.3635840453317992e-06, "loss": 0.5727, "step": 7598 }, { "epoch": 0.03364026738678118, "grad_norm": 2.6792786215367324, "learning_rate": 3.3640267386781177e-06, "loss": 0.717, "step": 7599 }, { "epoch": 0.03364469432024437, "grad_norm": 2.6545678749638513, "learning_rate": 3.364469432024437e-06, "loss": 0.9364, "step": 7600 }, { "epoch": 0.03364912125370756, "grad_norm": 2.6317738483180086, "learning_rate": 3.364912125370756e-06, "loss": 0.6831, "step": 7601 }, { "epoch": 0.03365354818717075, "grad_norm": 3.018487962838136, "learning_rate": 3.365354818717075e-06, "loss": 0.5884, "step": 7602 }, { "epoch": 0.03365797512063394, "grad_norm": 2.574530107535042, "learning_rate": 3.365797512063394e-06, "loss": 0.8583, "step": 7603 }, { "epoch": 0.033662402054097124, "grad_norm": 3.5632901223294082, "learning_rate": 3.3662402054097133e-06, "loss": 1.0596, "step": 7604 }, { "epoch": 0.033666828987560314, "grad_norm": 2.858961317307755, "learning_rate": 3.366682898756032e-06, "loss": 0.7756, "step": 7605 }, { "epoch": 0.033671255921023505, "grad_norm": 2.6229023763961874, "learning_rate": 3.3671255921023512e-06, "loss": 0.7302, "step": 7606 }, { "epoch": 0.033675682854486695, "grad_norm": 2.803836720334684, "learning_rate": 3.36756828544867e-06, "loss": 0.9131, "step": 7607 }, { "epoch": 0.033680109787949886, "grad_norm": 2.549136156279228, "learning_rate": 3.3680109787949887e-06, "loss": 0.5235, "step": 7608 }, { "epoch": 0.033684536721413076, "grad_norm": 2.4361949282545643, "learning_rate": 3.368453672141308e-06, "loss": 0.7321, "step": 7609 }, { "epoch": 0.03368896365487627, "grad_norm": 2.366444803183045, "learning_rate": 3.368896365487627e-06, "loss": 0.6587, "step": 7610 }, { "epoch": 0.03369339058833946, "grad_norm": 2.758968395902666, "learning_rate": 3.369339058833946e-06, "loss": 0.9283, "step": 7611 }, { "epoch": 0.03369781752180265, "grad_norm": 3.4340806819512175, "learning_rate": 3.369781752180265e-06, "loss": 0.8945, "step": 7612 }, { "epoch": 0.03370224445526584, "grad_norm": 3.445468395936475, "learning_rate": 3.3702244455265843e-06, "loss": 0.688, "step": 7613 }, { "epoch": 0.03370667138872903, "grad_norm": 2.6706992227184747, "learning_rate": 3.370667138872903e-06, "loss": 0.5182, "step": 7614 }, { "epoch": 0.03371109832219222, "grad_norm": 2.7629844751984165, "learning_rate": 3.371109832219222e-06, "loss": 0.6696, "step": 7615 }, { "epoch": 0.03371552525565541, "grad_norm": 2.993472060332794, "learning_rate": 3.371552525565541e-06, "loss": 0.6174, "step": 7616 }, { "epoch": 0.0337199521891186, "grad_norm": 3.144874738166404, "learning_rate": 3.37199521891186e-06, "loss": 0.8808, "step": 7617 }, { "epoch": 0.03372437912258179, "grad_norm": 2.7584406582732397, "learning_rate": 3.372437912258179e-06, "loss": 0.7249, "step": 7618 }, { "epoch": 0.033728806056044974, "grad_norm": 2.4476627825419155, "learning_rate": 3.3728806056044984e-06, "loss": 0.6191, "step": 7619 }, { "epoch": 0.033733232989508165, "grad_norm": 3.229274167266505, "learning_rate": 3.373323298950817e-06, "loss": 0.7215, "step": 7620 }, { "epoch": 0.033737659922971355, "grad_norm": 3.070098465360142, "learning_rate": 3.3737659922971363e-06, "loss": 0.5699, "step": 7621 }, { "epoch": 0.033742086856434546, "grad_norm": 2.4615542869734512, "learning_rate": 3.3742086856434552e-06, "loss": 0.5152, "step": 7622 }, { "epoch": 0.033746513789897736, "grad_norm": 2.9387381253103553, "learning_rate": 3.3746513789897738e-06, "loss": 0.8786, "step": 7623 }, { "epoch": 0.03375094072336093, "grad_norm": 2.5752687357086286, "learning_rate": 3.375094072336093e-06, "loss": 0.6574, "step": 7624 }, { "epoch": 0.03375536765682412, "grad_norm": 3.0026964685578337, "learning_rate": 3.3755367656824125e-06, "loss": 0.8439, "step": 7625 }, { "epoch": 0.03375979459028731, "grad_norm": 2.4775693754636214, "learning_rate": 3.375979459028731e-06, "loss": 0.7798, "step": 7626 }, { "epoch": 0.0337642215237505, "grad_norm": 2.872533295210822, "learning_rate": 3.37642215237505e-06, "loss": 0.6431, "step": 7627 }, { "epoch": 0.03376864845721369, "grad_norm": 3.12762286430737, "learning_rate": 3.3768648457213693e-06, "loss": 0.7933, "step": 7628 }, { "epoch": 0.03377307539067688, "grad_norm": 3.380482941087795, "learning_rate": 3.377307539067688e-06, "loss": 0.9312, "step": 7629 }, { "epoch": 0.03377750232414007, "grad_norm": 3.724134372702412, "learning_rate": 3.3777502324140072e-06, "loss": 0.5419, "step": 7630 }, { "epoch": 0.03378192925760326, "grad_norm": 3.7401229518680084, "learning_rate": 3.378192925760326e-06, "loss": 1.271, "step": 7631 }, { "epoch": 0.03378635619106645, "grad_norm": 3.272995680369416, "learning_rate": 3.378635619106645e-06, "loss": 0.8898, "step": 7632 }, { "epoch": 0.03379078312452964, "grad_norm": 2.707772796368634, "learning_rate": 3.379078312452964e-06, "loss": 0.5754, "step": 7633 }, { "epoch": 0.033795210057992825, "grad_norm": 3.679046806525754, "learning_rate": 3.3795210057992834e-06, "loss": 0.9979, "step": 7634 }, { "epoch": 0.033799636991456015, "grad_norm": 2.9485839997689314, "learning_rate": 3.379963699145602e-06, "loss": 0.7623, "step": 7635 }, { "epoch": 0.033804063924919206, "grad_norm": 2.545119669390494, "learning_rate": 3.3804063924919213e-06, "loss": 0.7826, "step": 7636 }, { "epoch": 0.033808490858382396, "grad_norm": 2.5596658115153006, "learning_rate": 3.3808490858382403e-06, "loss": 0.8547, "step": 7637 }, { "epoch": 0.03381291779184559, "grad_norm": 2.9129482480470474, "learning_rate": 3.381291779184559e-06, "loss": 0.7859, "step": 7638 }, { "epoch": 0.03381734472530878, "grad_norm": 2.6511793476471093, "learning_rate": 3.381734472530878e-06, "loss": 0.6441, "step": 7639 }, { "epoch": 0.03382177165877197, "grad_norm": 2.462170618153597, "learning_rate": 3.3821771658771975e-06, "loss": 0.5641, "step": 7640 }, { "epoch": 0.03382619859223516, "grad_norm": 3.092687115879708, "learning_rate": 3.382619859223516e-06, "loss": 0.8631, "step": 7641 }, { "epoch": 0.03383062552569835, "grad_norm": 2.7157106125399118, "learning_rate": 3.383062552569835e-06, "loss": 0.8043, "step": 7642 }, { "epoch": 0.03383505245916154, "grad_norm": 2.306705311231435, "learning_rate": 3.3835052459161544e-06, "loss": 0.5784, "step": 7643 }, { "epoch": 0.03383947939262473, "grad_norm": 5.317491594637283, "learning_rate": 3.383947939262473e-06, "loss": 1.2712, "step": 7644 }, { "epoch": 0.03384390632608792, "grad_norm": 2.7584768167979696, "learning_rate": 3.3843906326087923e-06, "loss": 0.7133, "step": 7645 }, { "epoch": 0.03384833325955111, "grad_norm": 3.235473350339955, "learning_rate": 3.3848333259551112e-06, "loss": 1.1012, "step": 7646 }, { "epoch": 0.0338527601930143, "grad_norm": 3.7236102896018974, "learning_rate": 3.38527601930143e-06, "loss": 1.0206, "step": 7647 }, { "epoch": 0.03385718712647749, "grad_norm": 2.9772540019137357, "learning_rate": 3.385718712647749e-06, "loss": 0.7306, "step": 7648 }, { "epoch": 0.033861614059940676, "grad_norm": 2.7647675959441504, "learning_rate": 3.3861614059940685e-06, "loss": 0.6759, "step": 7649 }, { "epoch": 0.033866040993403866, "grad_norm": 2.9161074415534034, "learning_rate": 3.386604099340387e-06, "loss": 1.1341, "step": 7650 }, { "epoch": 0.03387046792686706, "grad_norm": 3.456965980792002, "learning_rate": 3.3870467926867064e-06, "loss": 0.8199, "step": 7651 }, { "epoch": 0.03387489486033025, "grad_norm": 3.3068091958007053, "learning_rate": 3.3874894860330253e-06, "loss": 0.9849, "step": 7652 }, { "epoch": 0.03387932179379344, "grad_norm": 3.749227895673335, "learning_rate": 3.387932179379344e-06, "loss": 1.0419, "step": 7653 }, { "epoch": 0.03388374872725663, "grad_norm": 3.2557854094821876, "learning_rate": 3.3883748727256632e-06, "loss": 1.0266, "step": 7654 }, { "epoch": 0.03388817566071982, "grad_norm": 3.6845168528065715, "learning_rate": 3.3888175660719826e-06, "loss": 1.052, "step": 7655 }, { "epoch": 0.03389260259418301, "grad_norm": 2.9569865725873847, "learning_rate": 3.389260259418301e-06, "loss": 0.7613, "step": 7656 }, { "epoch": 0.0338970295276462, "grad_norm": 2.8841461828613326, "learning_rate": 3.38970295276462e-06, "loss": 0.6503, "step": 7657 }, { "epoch": 0.03390145646110939, "grad_norm": 2.9729123128794357, "learning_rate": 3.3901456461109394e-06, "loss": 0.765, "step": 7658 }, { "epoch": 0.03390588339457258, "grad_norm": 3.308582666211766, "learning_rate": 3.390588339457258e-06, "loss": 0.5787, "step": 7659 }, { "epoch": 0.03391031032803577, "grad_norm": 2.203302313463546, "learning_rate": 3.3910310328035773e-06, "loss": 0.522, "step": 7660 }, { "epoch": 0.03391473726149896, "grad_norm": 2.681311896529342, "learning_rate": 3.3914737261498963e-06, "loss": 0.6603, "step": 7661 }, { "epoch": 0.03391916419496215, "grad_norm": 3.0603368469892938, "learning_rate": 3.3919164194962152e-06, "loss": 0.963, "step": 7662 }, { "epoch": 0.03392359112842534, "grad_norm": 2.6432258677784963, "learning_rate": 3.392359112842534e-06, "loss": 0.7634, "step": 7663 }, { "epoch": 0.03392801806188853, "grad_norm": 2.8205090918551075, "learning_rate": 3.3928018061888535e-06, "loss": 0.7751, "step": 7664 }, { "epoch": 0.03393244499535172, "grad_norm": 3.2314372867647934, "learning_rate": 3.393244499535172e-06, "loss": 0.7033, "step": 7665 }, { "epoch": 0.03393687192881491, "grad_norm": 3.354978295370899, "learning_rate": 3.3936871928814914e-06, "loss": 0.7948, "step": 7666 }, { "epoch": 0.0339412988622781, "grad_norm": 3.121953297362407, "learning_rate": 3.3941298862278104e-06, "loss": 0.6512, "step": 7667 }, { "epoch": 0.03394572579574129, "grad_norm": 2.6645152654623394, "learning_rate": 3.394572579574129e-06, "loss": 0.7786, "step": 7668 }, { "epoch": 0.03395015272920448, "grad_norm": 3.6521763813784873, "learning_rate": 3.3950152729204483e-06, "loss": 0.7285, "step": 7669 }, { "epoch": 0.03395457966266767, "grad_norm": 3.008925052199392, "learning_rate": 3.3954579662667677e-06, "loss": 0.6247, "step": 7670 }, { "epoch": 0.03395900659613086, "grad_norm": 3.1838283025251375, "learning_rate": 3.395900659613086e-06, "loss": 1.027, "step": 7671 }, { "epoch": 0.03396343352959405, "grad_norm": 2.919812444104894, "learning_rate": 3.396343352959405e-06, "loss": 0.6483, "step": 7672 }, { "epoch": 0.03396786046305724, "grad_norm": 2.705662997690998, "learning_rate": 3.3967860463057245e-06, "loss": 0.5378, "step": 7673 }, { "epoch": 0.03397228739652043, "grad_norm": 2.353029619364673, "learning_rate": 3.397228739652043e-06, "loss": 0.6705, "step": 7674 }, { "epoch": 0.03397671432998362, "grad_norm": 3.4047107175051945, "learning_rate": 3.3976714329983624e-06, "loss": 1.0661, "step": 7675 }, { "epoch": 0.03398114126344681, "grad_norm": 2.791612163065859, "learning_rate": 3.3981141263446813e-06, "loss": 0.8457, "step": 7676 }, { "epoch": 0.03398556819691, "grad_norm": 2.885577872281529, "learning_rate": 3.3985568196910003e-06, "loss": 0.5244, "step": 7677 }, { "epoch": 0.03398999513037319, "grad_norm": 2.4972220621356525, "learning_rate": 3.3989995130373192e-06, "loss": 0.7621, "step": 7678 }, { "epoch": 0.033994422063836384, "grad_norm": 2.9661570935462636, "learning_rate": 3.3994422063836386e-06, "loss": 0.7817, "step": 7679 }, { "epoch": 0.03399884899729957, "grad_norm": 2.9555431555511094, "learning_rate": 3.399884899729957e-06, "loss": 0.658, "step": 7680 }, { "epoch": 0.03400327593076276, "grad_norm": 3.9313074584556964, "learning_rate": 3.4003275930762765e-06, "loss": 1.3909, "step": 7681 }, { "epoch": 0.03400770286422595, "grad_norm": 2.9910471948615895, "learning_rate": 3.4007702864225954e-06, "loss": 0.6311, "step": 7682 }, { "epoch": 0.03401212979768914, "grad_norm": 2.840289824099919, "learning_rate": 3.4012129797689144e-06, "loss": 0.9794, "step": 7683 }, { "epoch": 0.03401655673115233, "grad_norm": 3.1644952176115653, "learning_rate": 3.4016556731152333e-06, "loss": 0.6653, "step": 7684 }, { "epoch": 0.03402098366461552, "grad_norm": 2.422195790521676, "learning_rate": 3.4020983664615527e-06, "loss": 0.5142, "step": 7685 }, { "epoch": 0.03402541059807871, "grad_norm": 2.4852875716129748, "learning_rate": 3.4025410598078712e-06, "loss": 0.5952, "step": 7686 }, { "epoch": 0.0340298375315419, "grad_norm": 3.613711930991604, "learning_rate": 3.40298375315419e-06, "loss": 1.1109, "step": 7687 }, { "epoch": 0.03403426446500509, "grad_norm": 2.815089951830925, "learning_rate": 3.4034264465005095e-06, "loss": 0.7393, "step": 7688 }, { "epoch": 0.03403869139846828, "grad_norm": 2.5444796703831036, "learning_rate": 3.403869139846828e-06, "loss": 0.9194, "step": 7689 }, { "epoch": 0.03404311833193147, "grad_norm": 2.6262518591328474, "learning_rate": 3.4043118331931474e-06, "loss": 0.572, "step": 7690 }, { "epoch": 0.03404754526539466, "grad_norm": 3.3878138995827207, "learning_rate": 3.4047545265394664e-06, "loss": 0.9802, "step": 7691 }, { "epoch": 0.034051972198857854, "grad_norm": 3.0079946398624515, "learning_rate": 3.4051972198857853e-06, "loss": 0.8683, "step": 7692 }, { "epoch": 0.034056399132321044, "grad_norm": 3.0689709274736843, "learning_rate": 3.4056399132321043e-06, "loss": 0.8125, "step": 7693 }, { "epoch": 0.034060826065784235, "grad_norm": 3.721913478209974, "learning_rate": 3.4060826065784237e-06, "loss": 0.628, "step": 7694 }, { "epoch": 0.03406525299924742, "grad_norm": 2.698169279675595, "learning_rate": 3.406525299924742e-06, "loss": 0.711, "step": 7695 }, { "epoch": 0.03406967993271061, "grad_norm": 3.908856238116026, "learning_rate": 3.4069679932710615e-06, "loss": 1.0011, "step": 7696 }, { "epoch": 0.0340741068661738, "grad_norm": 3.5401260497245217, "learning_rate": 3.4074106866173805e-06, "loss": 1.3115, "step": 7697 }, { "epoch": 0.03407853379963699, "grad_norm": 2.2834848358784052, "learning_rate": 3.4078533799636994e-06, "loss": 0.4415, "step": 7698 }, { "epoch": 0.03408296073310018, "grad_norm": 3.021107156077025, "learning_rate": 3.4082960733100184e-06, "loss": 0.7641, "step": 7699 }, { "epoch": 0.03408738766656337, "grad_norm": 3.5776018953629483, "learning_rate": 3.4087387666563378e-06, "loss": 0.9542, "step": 7700 }, { "epoch": 0.03409181460002656, "grad_norm": 3.4990666263438026, "learning_rate": 3.4091814600026563e-06, "loss": 1.0158, "step": 7701 }, { "epoch": 0.03409624153348975, "grad_norm": 3.146422459858827, "learning_rate": 3.4096241533489757e-06, "loss": 0.8321, "step": 7702 }, { "epoch": 0.03410066846695294, "grad_norm": 2.5718724608801233, "learning_rate": 3.4100668466952946e-06, "loss": 0.6239, "step": 7703 }, { "epoch": 0.03410509540041613, "grad_norm": 2.8113184955480097, "learning_rate": 3.410509540041613e-06, "loss": 0.8458, "step": 7704 }, { "epoch": 0.03410952233387932, "grad_norm": 3.3791737528758885, "learning_rate": 3.4109522333879325e-06, "loss": 0.5317, "step": 7705 }, { "epoch": 0.034113949267342514, "grad_norm": 2.6569346705581114, "learning_rate": 3.411394926734252e-06, "loss": 0.7371, "step": 7706 }, { "epoch": 0.034118376200805704, "grad_norm": 2.444034360365128, "learning_rate": 3.4118376200805704e-06, "loss": 0.7898, "step": 7707 }, { "epoch": 0.034122803134268895, "grad_norm": 2.5637874102839286, "learning_rate": 3.4122803134268893e-06, "loss": 0.8351, "step": 7708 }, { "epoch": 0.034127230067732085, "grad_norm": 2.655266527273304, "learning_rate": 3.4127230067732087e-06, "loss": 0.6746, "step": 7709 }, { "epoch": 0.03413165700119527, "grad_norm": 2.6187110199658137, "learning_rate": 3.4131657001195272e-06, "loss": 0.7039, "step": 7710 }, { "epoch": 0.03413608393465846, "grad_norm": 2.7274904665859236, "learning_rate": 3.4136083934658466e-06, "loss": 0.9356, "step": 7711 }, { "epoch": 0.03414051086812165, "grad_norm": 3.057854372930257, "learning_rate": 3.4140510868121655e-06, "loss": 1.1177, "step": 7712 }, { "epoch": 0.03414493780158484, "grad_norm": 2.777869029688645, "learning_rate": 3.4144937801584845e-06, "loss": 0.7542, "step": 7713 }, { "epoch": 0.03414936473504803, "grad_norm": 2.812185565285078, "learning_rate": 3.4149364735048034e-06, "loss": 0.7341, "step": 7714 }, { "epoch": 0.03415379166851122, "grad_norm": 2.5435655199760956, "learning_rate": 3.415379166851123e-06, "loss": 0.9606, "step": 7715 }, { "epoch": 0.03415821860197441, "grad_norm": 3.567080957883018, "learning_rate": 3.4158218601974413e-06, "loss": 1.0253, "step": 7716 }, { "epoch": 0.0341626455354376, "grad_norm": 2.7056644579310114, "learning_rate": 3.4162645535437607e-06, "loss": 0.871, "step": 7717 }, { "epoch": 0.03416707246890079, "grad_norm": 2.8991755097821237, "learning_rate": 3.4167072468900797e-06, "loss": 0.7852, "step": 7718 }, { "epoch": 0.03417149940236398, "grad_norm": 2.991357195301298, "learning_rate": 3.417149940236398e-06, "loss": 0.8515, "step": 7719 }, { "epoch": 0.034175926335827174, "grad_norm": 3.3843146538042, "learning_rate": 3.4175926335827175e-06, "loss": 1.1377, "step": 7720 }, { "epoch": 0.034180353269290364, "grad_norm": 2.6546002734771763, "learning_rate": 3.418035326929037e-06, "loss": 0.7962, "step": 7721 }, { "epoch": 0.034184780202753555, "grad_norm": 3.5088338119763938, "learning_rate": 3.4184780202753554e-06, "loss": 0.7751, "step": 7722 }, { "epoch": 0.034189207136216745, "grad_norm": 3.2829733815690787, "learning_rate": 3.4189207136216744e-06, "loss": 0.6182, "step": 7723 }, { "epoch": 0.034193634069679936, "grad_norm": 3.0199207186710737, "learning_rate": 3.4193634069679938e-06, "loss": 0.8108, "step": 7724 }, { "epoch": 0.03419806100314312, "grad_norm": 2.8210999178907454, "learning_rate": 3.4198061003143123e-06, "loss": 0.9146, "step": 7725 }, { "epoch": 0.03420248793660631, "grad_norm": 2.887171179842358, "learning_rate": 3.4202487936606317e-06, "loss": 0.5981, "step": 7726 }, { "epoch": 0.0342069148700695, "grad_norm": 2.72774777477305, "learning_rate": 3.4206914870069506e-06, "loss": 0.663, "step": 7727 }, { "epoch": 0.03421134180353269, "grad_norm": 2.429117245160065, "learning_rate": 3.4211341803532695e-06, "loss": 0.69, "step": 7728 }, { "epoch": 0.03421576873699588, "grad_norm": 3.2094851545576626, "learning_rate": 3.4215768736995885e-06, "loss": 1.0609, "step": 7729 }, { "epoch": 0.03422019567045907, "grad_norm": 3.219169047478392, "learning_rate": 3.422019567045908e-06, "loss": 1.0883, "step": 7730 }, { "epoch": 0.03422462260392226, "grad_norm": 3.8230059287669906, "learning_rate": 3.4224622603922264e-06, "loss": 0.7219, "step": 7731 }, { "epoch": 0.03422904953738545, "grad_norm": 3.5485317444078377, "learning_rate": 3.4229049537385458e-06, "loss": 1.3128, "step": 7732 }, { "epoch": 0.034233476470848644, "grad_norm": 3.334380720581328, "learning_rate": 3.4233476470848647e-06, "loss": 0.9571, "step": 7733 }, { "epoch": 0.034237903404311834, "grad_norm": 2.7613036590774356, "learning_rate": 3.4237903404311832e-06, "loss": 0.4039, "step": 7734 }, { "epoch": 0.034242330337775025, "grad_norm": 2.4995827424944808, "learning_rate": 3.4242330337775026e-06, "loss": 0.7453, "step": 7735 }, { "epoch": 0.034246757271238215, "grad_norm": 2.434999683327354, "learning_rate": 3.424675727123822e-06, "loss": 0.5758, "step": 7736 }, { "epoch": 0.034251184204701406, "grad_norm": 2.36813723265687, "learning_rate": 3.4251184204701405e-06, "loss": 0.666, "step": 7737 }, { "epoch": 0.034255611138164596, "grad_norm": 2.653045473074084, "learning_rate": 3.4255611138164594e-06, "loss": 0.8155, "step": 7738 }, { "epoch": 0.03426003807162779, "grad_norm": 2.8773498257987735, "learning_rate": 3.426003807162779e-06, "loss": 0.7891, "step": 7739 }, { "epoch": 0.03426446500509097, "grad_norm": 3.286924128913711, "learning_rate": 3.4264465005090973e-06, "loss": 0.8972, "step": 7740 }, { "epoch": 0.03426889193855416, "grad_norm": 2.8272423311151216, "learning_rate": 3.4268891938554167e-06, "loss": 0.7465, "step": 7741 }, { "epoch": 0.03427331887201735, "grad_norm": 3.5824654350205942, "learning_rate": 3.4273318872017357e-06, "loss": 1.2352, "step": 7742 }, { "epoch": 0.03427774580548054, "grad_norm": 3.0419411397320095, "learning_rate": 3.4277745805480546e-06, "loss": 0.8071, "step": 7743 }, { "epoch": 0.03428217273894373, "grad_norm": 3.1369141749598852, "learning_rate": 3.4282172738943735e-06, "loss": 0.7929, "step": 7744 }, { "epoch": 0.03428659967240692, "grad_norm": 2.874867357607173, "learning_rate": 3.428659967240693e-06, "loss": 0.7596, "step": 7745 }, { "epoch": 0.03429102660587011, "grad_norm": 2.519404025136811, "learning_rate": 3.4291026605870114e-06, "loss": 0.5395, "step": 7746 }, { "epoch": 0.034295453539333304, "grad_norm": 2.5573080816911267, "learning_rate": 3.429545353933331e-06, "loss": 0.8791, "step": 7747 }, { "epoch": 0.034299880472796494, "grad_norm": 2.7103628514393416, "learning_rate": 3.4299880472796498e-06, "loss": 0.6634, "step": 7748 }, { "epoch": 0.034304307406259685, "grad_norm": 2.8524833346572196, "learning_rate": 3.4304307406259683e-06, "loss": 0.8253, "step": 7749 }, { "epoch": 0.034308734339722875, "grad_norm": 3.1558980023147285, "learning_rate": 3.4308734339722877e-06, "loss": 0.7175, "step": 7750 }, { "epoch": 0.034313161273186066, "grad_norm": 3.5341985549664217, "learning_rate": 3.431316127318607e-06, "loss": 0.8516, "step": 7751 }, { "epoch": 0.034317588206649256, "grad_norm": 3.417186293705051, "learning_rate": 3.4317588206649255e-06, "loss": 0.5582, "step": 7752 }, { "epoch": 0.03432201514011245, "grad_norm": 2.486299949214353, "learning_rate": 3.4322015140112445e-06, "loss": 0.5893, "step": 7753 }, { "epoch": 0.03432644207357564, "grad_norm": 2.562856194998851, "learning_rate": 3.432644207357564e-06, "loss": 0.7063, "step": 7754 }, { "epoch": 0.03433086900703882, "grad_norm": 3.6883936491813323, "learning_rate": 3.4330869007038824e-06, "loss": 1.2247, "step": 7755 }, { "epoch": 0.03433529594050201, "grad_norm": 2.8091015260747465, "learning_rate": 3.4335295940502018e-06, "loss": 0.4912, "step": 7756 }, { "epoch": 0.0343397228739652, "grad_norm": 3.1390998155967624, "learning_rate": 3.4339722873965207e-06, "loss": 0.9191, "step": 7757 }, { "epoch": 0.03434414980742839, "grad_norm": 2.5985778284274548, "learning_rate": 3.4344149807428397e-06, "loss": 0.9298, "step": 7758 }, { "epoch": 0.03434857674089158, "grad_norm": 3.528120330947743, "learning_rate": 3.4348576740891586e-06, "loss": 1.0391, "step": 7759 }, { "epoch": 0.03435300367435477, "grad_norm": 2.5846013058186155, "learning_rate": 3.435300367435478e-06, "loss": 0.5806, "step": 7760 }, { "epoch": 0.034357430607817964, "grad_norm": 3.522007246221051, "learning_rate": 3.4357430607817965e-06, "loss": 0.6908, "step": 7761 }, { "epoch": 0.034361857541281154, "grad_norm": 2.9926491065804415, "learning_rate": 3.436185754128116e-06, "loss": 0.8009, "step": 7762 }, { "epoch": 0.034366284474744345, "grad_norm": 3.338065668966139, "learning_rate": 3.436628447474435e-06, "loss": 1.3254, "step": 7763 }, { "epoch": 0.034370711408207535, "grad_norm": 2.573462817443144, "learning_rate": 3.4370711408207533e-06, "loss": 0.5771, "step": 7764 }, { "epoch": 0.034375138341670726, "grad_norm": 3.3552635611371766, "learning_rate": 3.4375138341670727e-06, "loss": 0.6003, "step": 7765 }, { "epoch": 0.034379565275133916, "grad_norm": 3.3805795520710293, "learning_rate": 3.437956527513392e-06, "loss": 0.6155, "step": 7766 }, { "epoch": 0.03438399220859711, "grad_norm": 3.124329728457044, "learning_rate": 3.4383992208597106e-06, "loss": 0.9604, "step": 7767 }, { "epoch": 0.0343884191420603, "grad_norm": 3.12158483589357, "learning_rate": 3.4388419142060295e-06, "loss": 0.9182, "step": 7768 }, { "epoch": 0.03439284607552349, "grad_norm": 2.4820067164274255, "learning_rate": 3.439284607552349e-06, "loss": 0.7224, "step": 7769 }, { "epoch": 0.03439727300898667, "grad_norm": 4.4901939520502125, "learning_rate": 3.4397273008986674e-06, "loss": 1.1138, "step": 7770 }, { "epoch": 0.03440169994244986, "grad_norm": 3.226031628766715, "learning_rate": 3.440169994244987e-06, "loss": 1.2199, "step": 7771 }, { "epoch": 0.03440612687591305, "grad_norm": 2.9947337013018673, "learning_rate": 3.4406126875913058e-06, "loss": 0.9198, "step": 7772 }, { "epoch": 0.03441055380937624, "grad_norm": 2.286384631666423, "learning_rate": 3.4410553809376247e-06, "loss": 0.7468, "step": 7773 }, { "epoch": 0.034414980742839434, "grad_norm": 2.6191773478680416, "learning_rate": 3.4414980742839437e-06, "loss": 0.8015, "step": 7774 }, { "epoch": 0.034419407676302624, "grad_norm": 3.6402971520472205, "learning_rate": 3.441940767630263e-06, "loss": 1.1698, "step": 7775 }, { "epoch": 0.034423834609765815, "grad_norm": 2.813090813300282, "learning_rate": 3.4423834609765816e-06, "loss": 0.7421, "step": 7776 }, { "epoch": 0.034428261543229005, "grad_norm": 2.6661645726160264, "learning_rate": 3.442826154322901e-06, "loss": 0.6276, "step": 7777 }, { "epoch": 0.034432688476692196, "grad_norm": 2.357216811281542, "learning_rate": 3.44326884766922e-06, "loss": 0.59, "step": 7778 }, { "epoch": 0.034437115410155386, "grad_norm": 3.4601391129320027, "learning_rate": 3.443711541015539e-06, "loss": 1.2015, "step": 7779 }, { "epoch": 0.03444154234361858, "grad_norm": 2.7428392320675576, "learning_rate": 3.4441542343618578e-06, "loss": 0.8994, "step": 7780 }, { "epoch": 0.03444596927708177, "grad_norm": 3.6206417192611164, "learning_rate": 3.444596927708177e-06, "loss": 1.2393, "step": 7781 }, { "epoch": 0.03445039621054496, "grad_norm": 3.027705933694299, "learning_rate": 3.4450396210544957e-06, "loss": 0.774, "step": 7782 }, { "epoch": 0.03445482314400815, "grad_norm": 3.3203554610337176, "learning_rate": 3.445482314400815e-06, "loss": 0.839, "step": 7783 }, { "epoch": 0.03445925007747134, "grad_norm": 3.2225449914111843, "learning_rate": 3.445925007747134e-06, "loss": 1.0126, "step": 7784 }, { "epoch": 0.03446367701093452, "grad_norm": 2.977083964855324, "learning_rate": 3.4463677010934525e-06, "loss": 0.8374, "step": 7785 }, { "epoch": 0.03446810394439771, "grad_norm": 3.311180317385822, "learning_rate": 3.446810394439772e-06, "loss": 1.1132, "step": 7786 }, { "epoch": 0.0344725308778609, "grad_norm": 3.4622178432355395, "learning_rate": 3.447253087786091e-06, "loss": 1.079, "step": 7787 }, { "epoch": 0.034476957811324094, "grad_norm": 3.9400893023480346, "learning_rate": 3.4476957811324098e-06, "loss": 1.2143, "step": 7788 }, { "epoch": 0.034481384744787284, "grad_norm": 2.560844973694104, "learning_rate": 3.4481384744787287e-06, "loss": 0.7654, "step": 7789 }, { "epoch": 0.034485811678250475, "grad_norm": 3.2342354815591907, "learning_rate": 3.448581167825048e-06, "loss": 1.0918, "step": 7790 }, { "epoch": 0.034490238611713665, "grad_norm": 3.133320312579303, "learning_rate": 3.4490238611713666e-06, "loss": 0.9835, "step": 7791 }, { "epoch": 0.034494665545176856, "grad_norm": 2.952566287391742, "learning_rate": 3.449466554517686e-06, "loss": 0.7046, "step": 7792 }, { "epoch": 0.034499092478640046, "grad_norm": 2.876983759446262, "learning_rate": 3.449909247864005e-06, "loss": 0.6822, "step": 7793 }, { "epoch": 0.03450351941210324, "grad_norm": 2.9464698127987425, "learning_rate": 3.450351941210324e-06, "loss": 0.68, "step": 7794 }, { "epoch": 0.03450794634556643, "grad_norm": 3.334598804879397, "learning_rate": 3.450794634556643e-06, "loss": 1.0544, "step": 7795 }, { "epoch": 0.03451237327902962, "grad_norm": 3.3212488295240465, "learning_rate": 3.451237327902962e-06, "loss": 0.6139, "step": 7796 }, { "epoch": 0.03451680021249281, "grad_norm": 3.1548191592901147, "learning_rate": 3.4516800212492807e-06, "loss": 0.8444, "step": 7797 }, { "epoch": 0.034521227145956, "grad_norm": 2.9495981471602195, "learning_rate": 3.4521227145956e-06, "loss": 0.9758, "step": 7798 }, { "epoch": 0.03452565407941919, "grad_norm": 2.7778523156495867, "learning_rate": 3.452565407941919e-06, "loss": 0.7173, "step": 7799 }, { "epoch": 0.03453008101288237, "grad_norm": 2.820702814075089, "learning_rate": 3.4530081012882376e-06, "loss": 0.7799, "step": 7800 }, { "epoch": 0.03453450794634556, "grad_norm": 2.816092733451698, "learning_rate": 3.453450794634557e-06, "loss": 0.7835, "step": 7801 }, { "epoch": 0.034538934879808754, "grad_norm": 3.0748038216625826, "learning_rate": 3.4538934879808763e-06, "loss": 1.101, "step": 7802 }, { "epoch": 0.034543361813271944, "grad_norm": 2.634236524571932, "learning_rate": 3.454336181327195e-06, "loss": 0.3291, "step": 7803 }, { "epoch": 0.034547788746735135, "grad_norm": 2.8610904658571346, "learning_rate": 3.4547788746735138e-06, "loss": 0.9042, "step": 7804 }, { "epoch": 0.034552215680198325, "grad_norm": 2.8070275879261577, "learning_rate": 3.455221568019833e-06, "loss": 0.5782, "step": 7805 }, { "epoch": 0.034556642613661516, "grad_norm": 2.973966703369039, "learning_rate": 3.4556642613661517e-06, "loss": 0.6776, "step": 7806 }, { "epoch": 0.034561069547124706, "grad_norm": 3.0792234990226484, "learning_rate": 3.456106954712471e-06, "loss": 0.8976, "step": 7807 }, { "epoch": 0.0345654964805879, "grad_norm": 2.6309692513792933, "learning_rate": 3.45654964805879e-06, "loss": 1.0043, "step": 7808 }, { "epoch": 0.03456992341405109, "grad_norm": 2.7537436041962415, "learning_rate": 3.456992341405109e-06, "loss": 0.6343, "step": 7809 }, { "epoch": 0.03457435034751428, "grad_norm": 2.6339869313843165, "learning_rate": 3.457435034751428e-06, "loss": 0.7343, "step": 7810 }, { "epoch": 0.03457877728097747, "grad_norm": 2.350116673472407, "learning_rate": 3.4578777280977472e-06, "loss": 0.67, "step": 7811 }, { "epoch": 0.03458320421444066, "grad_norm": 3.0980240731138666, "learning_rate": 3.4583204214440658e-06, "loss": 0.7882, "step": 7812 }, { "epoch": 0.03458763114790385, "grad_norm": 2.7801500407202586, "learning_rate": 3.458763114790385e-06, "loss": 0.8499, "step": 7813 }, { "epoch": 0.03459205808136704, "grad_norm": 2.7660983495536047, "learning_rate": 3.459205808136704e-06, "loss": 0.6352, "step": 7814 }, { "epoch": 0.03459648501483023, "grad_norm": 2.584086037330808, "learning_rate": 3.4596485014830226e-06, "loss": 0.7033, "step": 7815 }, { "epoch": 0.034600911948293414, "grad_norm": 2.786779095787737, "learning_rate": 3.460091194829342e-06, "loss": 0.77, "step": 7816 }, { "epoch": 0.034605338881756605, "grad_norm": 3.4463835378879497, "learning_rate": 3.4605338881756613e-06, "loss": 0.9865, "step": 7817 }, { "epoch": 0.034609765815219795, "grad_norm": 3.1252307075632793, "learning_rate": 3.46097658152198e-06, "loss": 0.7086, "step": 7818 }, { "epoch": 0.034614192748682986, "grad_norm": 3.436417314809619, "learning_rate": 3.461419274868299e-06, "loss": 0.7263, "step": 7819 }, { "epoch": 0.034618619682146176, "grad_norm": 4.135501100218695, "learning_rate": 3.461861968214618e-06, "loss": 0.9092, "step": 7820 }, { "epoch": 0.03462304661560937, "grad_norm": 3.2863043885243637, "learning_rate": 3.4623046615609367e-06, "loss": 1.0008, "step": 7821 }, { "epoch": 0.03462747354907256, "grad_norm": 3.1102483640799767, "learning_rate": 3.462747354907256e-06, "loss": 0.9518, "step": 7822 }, { "epoch": 0.03463190048253575, "grad_norm": 2.902218613954329, "learning_rate": 3.463190048253575e-06, "loss": 0.7514, "step": 7823 }, { "epoch": 0.03463632741599894, "grad_norm": 2.8974742360838714, "learning_rate": 3.463632741599894e-06, "loss": 0.7737, "step": 7824 }, { "epoch": 0.03464075434946213, "grad_norm": 3.2474785913982536, "learning_rate": 3.464075434946213e-06, "loss": 0.7597, "step": 7825 }, { "epoch": 0.03464518128292532, "grad_norm": 3.132951796629156, "learning_rate": 3.4645181282925323e-06, "loss": 0.9652, "step": 7826 }, { "epoch": 0.03464960821638851, "grad_norm": 3.71694500200647, "learning_rate": 3.464960821638851e-06, "loss": 0.5677, "step": 7827 }, { "epoch": 0.0346540351498517, "grad_norm": 2.6960684238283794, "learning_rate": 3.46540351498517e-06, "loss": 0.7267, "step": 7828 }, { "epoch": 0.03465846208331489, "grad_norm": 2.641652559200331, "learning_rate": 3.465846208331489e-06, "loss": 0.5008, "step": 7829 }, { "epoch": 0.03466288901677808, "grad_norm": 3.1643353572860153, "learning_rate": 3.4662889016778077e-06, "loss": 0.7854, "step": 7830 }, { "epoch": 0.034667315950241265, "grad_norm": 2.615217953980334, "learning_rate": 3.466731595024127e-06, "loss": 0.7128, "step": 7831 }, { "epoch": 0.034671742883704455, "grad_norm": 3.5563619557320374, "learning_rate": 3.4671742883704464e-06, "loss": 0.8853, "step": 7832 }, { "epoch": 0.034676169817167646, "grad_norm": 2.899703412119716, "learning_rate": 3.467616981716765e-06, "loss": 0.901, "step": 7833 }, { "epoch": 0.034680596750630836, "grad_norm": 2.6367355199487355, "learning_rate": 3.468059675063084e-06, "loss": 0.8194, "step": 7834 }, { "epoch": 0.03468502368409403, "grad_norm": 3.2227564074156563, "learning_rate": 3.4685023684094032e-06, "loss": 0.8201, "step": 7835 }, { "epoch": 0.03468945061755722, "grad_norm": 2.471339528369435, "learning_rate": 3.4689450617557218e-06, "loss": 0.6721, "step": 7836 }, { "epoch": 0.03469387755102041, "grad_norm": 2.348017284082285, "learning_rate": 3.469387755102041e-06, "loss": 0.6983, "step": 7837 }, { "epoch": 0.0346983044844836, "grad_norm": 3.807535577247548, "learning_rate": 3.46983044844836e-06, "loss": 0.8885, "step": 7838 }, { "epoch": 0.03470273141794679, "grad_norm": 2.107736628979283, "learning_rate": 3.470273141794679e-06, "loss": 0.4913, "step": 7839 }, { "epoch": 0.03470715835140998, "grad_norm": 2.734284551519398, "learning_rate": 3.470715835140998e-06, "loss": 0.8831, "step": 7840 }, { "epoch": 0.03471158528487317, "grad_norm": 3.2885488124985303, "learning_rate": 3.4711585284873173e-06, "loss": 0.7246, "step": 7841 }, { "epoch": 0.03471601221833636, "grad_norm": 2.7032052574139835, "learning_rate": 3.471601221833636e-06, "loss": 0.7922, "step": 7842 }, { "epoch": 0.03472043915179955, "grad_norm": 2.9149721804835336, "learning_rate": 3.4720439151799552e-06, "loss": 0.9953, "step": 7843 }, { "epoch": 0.03472486608526274, "grad_norm": 2.674830273067456, "learning_rate": 3.472486608526274e-06, "loss": 0.792, "step": 7844 }, { "epoch": 0.03472929301872593, "grad_norm": 3.4949573688631825, "learning_rate": 3.4729293018725927e-06, "loss": 1.123, "step": 7845 }, { "epoch": 0.034733719952189115, "grad_norm": 2.7993209458988297, "learning_rate": 3.473371995218912e-06, "loss": 0.7063, "step": 7846 }, { "epoch": 0.034738146885652306, "grad_norm": 3.0335380718408014, "learning_rate": 3.4738146885652315e-06, "loss": 0.6276, "step": 7847 }, { "epoch": 0.034742573819115496, "grad_norm": 2.5891037342850067, "learning_rate": 3.47425738191155e-06, "loss": 0.8748, "step": 7848 }, { "epoch": 0.03474700075257869, "grad_norm": 3.630488205780467, "learning_rate": 3.474700075257869e-06, "loss": 0.7488, "step": 7849 }, { "epoch": 0.03475142768604188, "grad_norm": 2.571403759169103, "learning_rate": 3.4751427686041883e-06, "loss": 0.7965, "step": 7850 }, { "epoch": 0.03475585461950507, "grad_norm": 3.288876314699665, "learning_rate": 3.475585461950507e-06, "loss": 0.7729, "step": 7851 }, { "epoch": 0.03476028155296826, "grad_norm": 2.5536915746754003, "learning_rate": 3.476028155296826e-06, "loss": 0.8564, "step": 7852 }, { "epoch": 0.03476470848643145, "grad_norm": 3.3373638485060697, "learning_rate": 3.476470848643145e-06, "loss": 1.062, "step": 7853 }, { "epoch": 0.03476913541989464, "grad_norm": 2.8982970907249976, "learning_rate": 3.476913541989464e-06, "loss": 0.8622, "step": 7854 }, { "epoch": 0.03477356235335783, "grad_norm": 3.905938213936532, "learning_rate": 3.477356235335783e-06, "loss": 0.9935, "step": 7855 }, { "epoch": 0.03477798928682102, "grad_norm": 3.246792296621128, "learning_rate": 3.4777989286821024e-06, "loss": 1.0474, "step": 7856 }, { "epoch": 0.03478241622028421, "grad_norm": 2.7970479298591764, "learning_rate": 3.478241622028421e-06, "loss": 0.7957, "step": 7857 }, { "epoch": 0.0347868431537474, "grad_norm": 2.9988805641985965, "learning_rate": 3.4786843153747403e-06, "loss": 0.7388, "step": 7858 }, { "epoch": 0.03479127008721059, "grad_norm": 3.02714831323829, "learning_rate": 3.4791270087210592e-06, "loss": 0.6569, "step": 7859 }, { "epoch": 0.03479569702067378, "grad_norm": 2.8646680890507668, "learning_rate": 3.479569702067378e-06, "loss": 0.6875, "step": 7860 }, { "epoch": 0.034800123954136966, "grad_norm": 3.0984942808630396, "learning_rate": 3.480012395413697e-06, "loss": 0.9523, "step": 7861 }, { "epoch": 0.03480455088760016, "grad_norm": 3.056007986987479, "learning_rate": 3.4804550887600165e-06, "loss": 0.7032, "step": 7862 }, { "epoch": 0.03480897782106335, "grad_norm": 3.13269191696554, "learning_rate": 3.480897782106335e-06, "loss": 1.1113, "step": 7863 }, { "epoch": 0.03481340475452654, "grad_norm": 2.8394802941873087, "learning_rate": 3.4813404754526544e-06, "loss": 0.7883, "step": 7864 }, { "epoch": 0.03481783168798973, "grad_norm": 2.6604722910729963, "learning_rate": 3.4817831687989733e-06, "loss": 0.8626, "step": 7865 }, { "epoch": 0.03482225862145292, "grad_norm": 2.770229119062266, "learning_rate": 3.482225862145292e-06, "loss": 1.0992, "step": 7866 }, { "epoch": 0.03482668555491611, "grad_norm": 2.1915379673796984, "learning_rate": 3.4826685554916112e-06, "loss": 0.5097, "step": 7867 }, { "epoch": 0.0348311124883793, "grad_norm": 3.1895770734874884, "learning_rate": 3.48311124883793e-06, "loss": 0.7504, "step": 7868 }, { "epoch": 0.03483553942184249, "grad_norm": 3.156722010069688, "learning_rate": 3.483553942184249e-06, "loss": 0.4942, "step": 7869 }, { "epoch": 0.03483996635530568, "grad_norm": 2.795936411214515, "learning_rate": 3.483996635530568e-06, "loss": 0.9405, "step": 7870 }, { "epoch": 0.03484439328876887, "grad_norm": 2.65894168789888, "learning_rate": 3.4844393288768875e-06, "loss": 0.7546, "step": 7871 }, { "epoch": 0.03484882022223206, "grad_norm": 2.751632301448822, "learning_rate": 3.484882022223206e-06, "loss": 0.7848, "step": 7872 }, { "epoch": 0.03485324715569525, "grad_norm": 2.672707916427194, "learning_rate": 3.4853247155695253e-06, "loss": 0.829, "step": 7873 }, { "epoch": 0.03485767408915844, "grad_norm": 2.7719901595297256, "learning_rate": 3.4857674089158443e-06, "loss": 0.8511, "step": 7874 }, { "epoch": 0.03486210102262163, "grad_norm": 3.3112021095811244, "learning_rate": 3.4862101022621632e-06, "loss": 0.7328, "step": 7875 }, { "epoch": 0.03486652795608482, "grad_norm": 3.4181352584588836, "learning_rate": 3.486652795608482e-06, "loss": 1.0727, "step": 7876 }, { "epoch": 0.03487095488954801, "grad_norm": 2.787003725870873, "learning_rate": 3.4870954889548016e-06, "loss": 0.6973, "step": 7877 }, { "epoch": 0.0348753818230112, "grad_norm": 4.0131760856314225, "learning_rate": 3.48753818230112e-06, "loss": 1.3886, "step": 7878 }, { "epoch": 0.03487980875647439, "grad_norm": 3.54293504856351, "learning_rate": 3.4879808756474395e-06, "loss": 1.0451, "step": 7879 }, { "epoch": 0.03488423568993758, "grad_norm": 2.6813510237992357, "learning_rate": 3.4884235689937584e-06, "loss": 0.5847, "step": 7880 }, { "epoch": 0.03488866262340077, "grad_norm": 3.5930380697534, "learning_rate": 3.488866262340077e-06, "loss": 0.8692, "step": 7881 }, { "epoch": 0.03489308955686396, "grad_norm": 2.4763077132459372, "learning_rate": 3.4893089556863963e-06, "loss": 0.8185, "step": 7882 }, { "epoch": 0.03489751649032715, "grad_norm": 3.247646687405074, "learning_rate": 3.4897516490327157e-06, "loss": 0.6642, "step": 7883 }, { "epoch": 0.03490194342379034, "grad_norm": 2.450442443291003, "learning_rate": 3.490194342379034e-06, "loss": 0.6004, "step": 7884 }, { "epoch": 0.03490637035725353, "grad_norm": 2.804159182539627, "learning_rate": 3.490637035725353e-06, "loss": 0.7509, "step": 7885 }, { "epoch": 0.03491079729071672, "grad_norm": 3.488813639969621, "learning_rate": 3.4910797290716725e-06, "loss": 1.0486, "step": 7886 }, { "epoch": 0.03491522422417991, "grad_norm": 3.675145281415032, "learning_rate": 3.491522422417991e-06, "loss": 1.1039, "step": 7887 }, { "epoch": 0.0349196511576431, "grad_norm": 2.6182260703636446, "learning_rate": 3.4919651157643104e-06, "loss": 0.6662, "step": 7888 }, { "epoch": 0.03492407809110629, "grad_norm": 2.3896305226886962, "learning_rate": 3.4924078091106293e-06, "loss": 0.5835, "step": 7889 }, { "epoch": 0.034928505024569484, "grad_norm": 2.5532382164357097, "learning_rate": 3.4928505024569483e-06, "loss": 0.8626, "step": 7890 }, { "epoch": 0.03493293195803267, "grad_norm": 3.185310564982488, "learning_rate": 3.4932931958032672e-06, "loss": 0.9774, "step": 7891 }, { "epoch": 0.03493735889149586, "grad_norm": 2.733908359669194, "learning_rate": 3.4937358891495866e-06, "loss": 0.906, "step": 7892 }, { "epoch": 0.03494178582495905, "grad_norm": 3.5449960577594606, "learning_rate": 3.494178582495905e-06, "loss": 0.8239, "step": 7893 }, { "epoch": 0.03494621275842224, "grad_norm": 3.6844011606113973, "learning_rate": 3.4946212758422245e-06, "loss": 1.2322, "step": 7894 }, { "epoch": 0.03495063969188543, "grad_norm": 2.6325104295074597, "learning_rate": 3.4950639691885435e-06, "loss": 0.8186, "step": 7895 }, { "epoch": 0.03495506662534862, "grad_norm": 2.5172892473062993, "learning_rate": 3.495506662534862e-06, "loss": 0.7709, "step": 7896 }, { "epoch": 0.03495949355881181, "grad_norm": 2.4787928928391527, "learning_rate": 3.4959493558811813e-06, "loss": 0.7643, "step": 7897 }, { "epoch": 0.034963920492275, "grad_norm": 2.8750458246649986, "learning_rate": 3.4963920492275007e-06, "loss": 0.7038, "step": 7898 }, { "epoch": 0.03496834742573819, "grad_norm": 2.842753299903731, "learning_rate": 3.4968347425738192e-06, "loss": 0.8294, "step": 7899 }, { "epoch": 0.03497277435920138, "grad_norm": 2.71037851321072, "learning_rate": 3.497277435920138e-06, "loss": 0.4674, "step": 7900 }, { "epoch": 0.03497720129266457, "grad_norm": 2.498920649732839, "learning_rate": 3.4977201292664576e-06, "loss": 0.7046, "step": 7901 }, { "epoch": 0.03498162822612776, "grad_norm": 3.0938242949558905, "learning_rate": 3.498162822612776e-06, "loss": 1.0037, "step": 7902 }, { "epoch": 0.034986055159590954, "grad_norm": 2.576358076212721, "learning_rate": 3.4986055159590955e-06, "loss": 0.768, "step": 7903 }, { "epoch": 0.034990482093054144, "grad_norm": 3.2282967690197117, "learning_rate": 3.4990482093054144e-06, "loss": 0.6948, "step": 7904 }, { "epoch": 0.034994909026517335, "grad_norm": 3.7442636000790683, "learning_rate": 3.4994909026517333e-06, "loss": 0.8194, "step": 7905 }, { "epoch": 0.03499933595998052, "grad_norm": 3.061546647506742, "learning_rate": 3.4999335959980523e-06, "loss": 0.6089, "step": 7906 }, { "epoch": 0.03500376289344371, "grad_norm": 3.0466133942285785, "learning_rate": 3.5003762893443717e-06, "loss": 0.9259, "step": 7907 }, { "epoch": 0.0350081898269069, "grad_norm": 3.4252678232019265, "learning_rate": 3.50081898269069e-06, "loss": 0.97, "step": 7908 }, { "epoch": 0.03501261676037009, "grad_norm": 2.766414548408582, "learning_rate": 3.5012616760370096e-06, "loss": 0.5895, "step": 7909 }, { "epoch": 0.03501704369383328, "grad_norm": 2.60867001119937, "learning_rate": 3.5017043693833285e-06, "loss": 0.562, "step": 7910 }, { "epoch": 0.03502147062729647, "grad_norm": 2.8139992973894556, "learning_rate": 3.502147062729647e-06, "loss": 0.7771, "step": 7911 }, { "epoch": 0.03502589756075966, "grad_norm": 2.5363921572623442, "learning_rate": 3.5025897560759664e-06, "loss": 0.6818, "step": 7912 }, { "epoch": 0.03503032449422285, "grad_norm": 3.1609787426383784, "learning_rate": 3.5030324494222858e-06, "loss": 0.7683, "step": 7913 }, { "epoch": 0.03503475142768604, "grad_norm": 2.6155625815535037, "learning_rate": 3.5034751427686043e-06, "loss": 0.6067, "step": 7914 }, { "epoch": 0.03503917836114923, "grad_norm": 3.412221605670991, "learning_rate": 3.5039178361149232e-06, "loss": 1.1169, "step": 7915 }, { "epoch": 0.03504360529461242, "grad_norm": 3.3719315625541406, "learning_rate": 3.5043605294612426e-06, "loss": 1.0061, "step": 7916 }, { "epoch": 0.035048032228075614, "grad_norm": 2.8645543022000357, "learning_rate": 3.504803222807561e-06, "loss": 0.9382, "step": 7917 }, { "epoch": 0.035052459161538804, "grad_norm": 3.0004173474419913, "learning_rate": 3.5052459161538805e-06, "loss": 0.9003, "step": 7918 }, { "epoch": 0.035056886095001995, "grad_norm": 3.2512823384577465, "learning_rate": 3.5056886095001995e-06, "loss": 0.9219, "step": 7919 }, { "epoch": 0.035061313028465185, "grad_norm": 2.582545052045373, "learning_rate": 3.5061313028465184e-06, "loss": 0.8552, "step": 7920 }, { "epoch": 0.03506573996192837, "grad_norm": 2.745540016275323, "learning_rate": 3.5065739961928373e-06, "loss": 0.8666, "step": 7921 }, { "epoch": 0.03507016689539156, "grad_norm": 2.568166660607488, "learning_rate": 3.5070166895391567e-06, "loss": 0.79, "step": 7922 }, { "epoch": 0.03507459382885475, "grad_norm": 2.8464762716482537, "learning_rate": 3.5074593828854752e-06, "loss": 0.5558, "step": 7923 }, { "epoch": 0.03507902076231794, "grad_norm": 3.5797883095911054, "learning_rate": 3.5079020762317946e-06, "loss": 0.9811, "step": 7924 }, { "epoch": 0.03508344769578113, "grad_norm": 3.381615710135803, "learning_rate": 3.5083447695781136e-06, "loss": 0.9511, "step": 7925 }, { "epoch": 0.03508787462924432, "grad_norm": 2.6740643071526984, "learning_rate": 3.508787462924432e-06, "loss": 0.6833, "step": 7926 }, { "epoch": 0.03509230156270751, "grad_norm": 2.716891455762179, "learning_rate": 3.5092301562707515e-06, "loss": 0.6511, "step": 7927 }, { "epoch": 0.0350967284961707, "grad_norm": 2.90898246427, "learning_rate": 3.509672849617071e-06, "loss": 0.8413, "step": 7928 }, { "epoch": 0.03510115542963389, "grad_norm": 2.868182807450218, "learning_rate": 3.5101155429633893e-06, "loss": 0.9463, "step": 7929 }, { "epoch": 0.03510558236309708, "grad_norm": 2.8830878488402027, "learning_rate": 3.5105582363097083e-06, "loss": 0.8091, "step": 7930 }, { "epoch": 0.035110009296560274, "grad_norm": 2.4478298077463267, "learning_rate": 3.5110009296560277e-06, "loss": 0.7615, "step": 7931 }, { "epoch": 0.035114436230023464, "grad_norm": 2.855612526747028, "learning_rate": 3.511443623002346e-06, "loss": 0.9158, "step": 7932 }, { "epoch": 0.035118863163486655, "grad_norm": 2.813070521169878, "learning_rate": 3.5118863163486656e-06, "loss": 0.8295, "step": 7933 }, { "epoch": 0.035123290096949845, "grad_norm": 3.720439081194329, "learning_rate": 3.5123290096949845e-06, "loss": 0.9699, "step": 7934 }, { "epoch": 0.035127717030413036, "grad_norm": 3.2596718807687797, "learning_rate": 3.5127717030413035e-06, "loss": 0.5847, "step": 7935 }, { "epoch": 0.03513214396387622, "grad_norm": 3.186807302967041, "learning_rate": 3.5132143963876224e-06, "loss": 0.864, "step": 7936 }, { "epoch": 0.03513657089733941, "grad_norm": 3.2240374809939962, "learning_rate": 3.5136570897339418e-06, "loss": 0.9274, "step": 7937 }, { "epoch": 0.0351409978308026, "grad_norm": 2.9767019499792475, "learning_rate": 3.5140997830802603e-06, "loss": 0.9256, "step": 7938 }, { "epoch": 0.03514542476426579, "grad_norm": 2.653615248866047, "learning_rate": 3.5145424764265797e-06, "loss": 0.5711, "step": 7939 }, { "epoch": 0.03514985169772898, "grad_norm": 3.100805393070256, "learning_rate": 3.5149851697728986e-06, "loss": 1.0005, "step": 7940 }, { "epoch": 0.03515427863119217, "grad_norm": 3.2724807230318023, "learning_rate": 3.5154278631192176e-06, "loss": 0.9945, "step": 7941 }, { "epoch": 0.03515870556465536, "grad_norm": 3.2338620288353566, "learning_rate": 3.5158705564655365e-06, "loss": 0.8981, "step": 7942 }, { "epoch": 0.03516313249811855, "grad_norm": 3.661067875251388, "learning_rate": 3.516313249811856e-06, "loss": 1.0858, "step": 7943 }, { "epoch": 0.035167559431581744, "grad_norm": 2.7205309315776813, "learning_rate": 3.5167559431581744e-06, "loss": 0.6628, "step": 7944 }, { "epoch": 0.035171986365044934, "grad_norm": 2.8816152481321713, "learning_rate": 3.5171986365044934e-06, "loss": 0.8608, "step": 7945 }, { "epoch": 0.035176413298508125, "grad_norm": 3.363680000226097, "learning_rate": 3.5176413298508127e-06, "loss": 0.7704, "step": 7946 }, { "epoch": 0.035180840231971315, "grad_norm": 2.6202277692099534, "learning_rate": 3.5180840231971312e-06, "loss": 0.7066, "step": 7947 }, { "epoch": 0.035185267165434506, "grad_norm": 3.021738950142473, "learning_rate": 3.5185267165434506e-06, "loss": 0.6389, "step": 7948 }, { "epoch": 0.035189694098897696, "grad_norm": 2.715646604800255, "learning_rate": 3.5189694098897696e-06, "loss": 0.6218, "step": 7949 }, { "epoch": 0.03519412103236089, "grad_norm": 3.362932173087254, "learning_rate": 3.5194121032360885e-06, "loss": 1.0185, "step": 7950 }, { "epoch": 0.03519854796582408, "grad_norm": 2.4964056992703068, "learning_rate": 3.5198547965824075e-06, "loss": 0.683, "step": 7951 }, { "epoch": 0.03520297489928726, "grad_norm": 2.921258830577063, "learning_rate": 3.520297489928727e-06, "loss": 0.7516, "step": 7952 }, { "epoch": 0.03520740183275045, "grad_norm": 2.393260082713003, "learning_rate": 3.5207401832750454e-06, "loss": 0.6519, "step": 7953 }, { "epoch": 0.03521182876621364, "grad_norm": 2.5745680817844754, "learning_rate": 3.5211828766213647e-06, "loss": 0.5139, "step": 7954 }, { "epoch": 0.03521625569967683, "grad_norm": 3.413720958944926, "learning_rate": 3.5216255699676837e-06, "loss": 0.872, "step": 7955 }, { "epoch": 0.03522068263314002, "grad_norm": 3.789350571620998, "learning_rate": 3.5220682633140026e-06, "loss": 1.0753, "step": 7956 }, { "epoch": 0.03522510956660321, "grad_norm": 2.652768598327507, "learning_rate": 3.5225109566603216e-06, "loss": 0.7325, "step": 7957 }, { "epoch": 0.035229536500066404, "grad_norm": 3.0676392191323307, "learning_rate": 3.522953650006641e-06, "loss": 0.6367, "step": 7958 }, { "epoch": 0.035233963433529594, "grad_norm": 2.447245789599331, "learning_rate": 3.5233963433529595e-06, "loss": 0.5843, "step": 7959 }, { "epoch": 0.035238390366992785, "grad_norm": 2.9058700867314564, "learning_rate": 3.523839036699279e-06, "loss": 0.9981, "step": 7960 }, { "epoch": 0.035242817300455975, "grad_norm": 2.7815765014157017, "learning_rate": 3.5242817300455978e-06, "loss": 0.7574, "step": 7961 }, { "epoch": 0.035247244233919166, "grad_norm": 2.922245338164143, "learning_rate": 3.5247244233919163e-06, "loss": 0.952, "step": 7962 }, { "epoch": 0.035251671167382356, "grad_norm": 2.7749028226978094, "learning_rate": 3.5251671167382357e-06, "loss": 0.6614, "step": 7963 }, { "epoch": 0.03525609810084555, "grad_norm": 2.674412670963222, "learning_rate": 3.525609810084555e-06, "loss": 0.6692, "step": 7964 }, { "epoch": 0.03526052503430874, "grad_norm": 3.2201942481553907, "learning_rate": 3.5260525034308736e-06, "loss": 0.8006, "step": 7965 }, { "epoch": 0.03526495196777193, "grad_norm": 2.730812770776316, "learning_rate": 3.5264951967771925e-06, "loss": 0.7849, "step": 7966 }, { "epoch": 0.03526937890123511, "grad_norm": 2.698663148396653, "learning_rate": 3.526937890123512e-06, "loss": 0.8159, "step": 7967 }, { "epoch": 0.0352738058346983, "grad_norm": 2.994174687592373, "learning_rate": 3.5273805834698304e-06, "loss": 0.6348, "step": 7968 }, { "epoch": 0.03527823276816149, "grad_norm": 2.681565099109219, "learning_rate": 3.5278232768161498e-06, "loss": 0.6415, "step": 7969 }, { "epoch": 0.03528265970162468, "grad_norm": 2.577147309985058, "learning_rate": 3.5282659701624687e-06, "loss": 0.6264, "step": 7970 }, { "epoch": 0.03528708663508787, "grad_norm": 2.608712251732979, "learning_rate": 3.5287086635087877e-06, "loss": 0.7586, "step": 7971 }, { "epoch": 0.035291513568551064, "grad_norm": 2.582304004687481, "learning_rate": 3.5291513568551066e-06, "loss": 0.6966, "step": 7972 }, { "epoch": 0.035295940502014254, "grad_norm": 2.559761815662539, "learning_rate": 3.529594050201426e-06, "loss": 0.6181, "step": 7973 }, { "epoch": 0.035300367435477445, "grad_norm": 3.4595449505438562, "learning_rate": 3.5300367435477445e-06, "loss": 1.0631, "step": 7974 }, { "epoch": 0.035304794368940635, "grad_norm": 2.5205477792646263, "learning_rate": 3.530479436894064e-06, "loss": 0.7646, "step": 7975 }, { "epoch": 0.035309221302403826, "grad_norm": 3.8567098581779646, "learning_rate": 3.530922130240383e-06, "loss": 1.1502, "step": 7976 }, { "epoch": 0.035313648235867016, "grad_norm": 2.8599032409201497, "learning_rate": 3.5313648235867014e-06, "loss": 0.595, "step": 7977 }, { "epoch": 0.03531807516933021, "grad_norm": 2.8063387793731702, "learning_rate": 3.5318075169330207e-06, "loss": 0.88, "step": 7978 }, { "epoch": 0.0353225021027934, "grad_norm": 2.6988653602618293, "learning_rate": 3.53225021027934e-06, "loss": 0.5453, "step": 7979 }, { "epoch": 0.03532692903625659, "grad_norm": 3.05156934064548, "learning_rate": 3.5326929036256586e-06, "loss": 0.9495, "step": 7980 }, { "epoch": 0.03533135596971978, "grad_norm": 2.6512711286174526, "learning_rate": 3.5331355969719776e-06, "loss": 0.7854, "step": 7981 }, { "epoch": 0.03533578290318296, "grad_norm": 3.4759610728041626, "learning_rate": 3.533578290318297e-06, "loss": 1.0375, "step": 7982 }, { "epoch": 0.03534020983664615, "grad_norm": 2.2672655867077207, "learning_rate": 3.5340209836646155e-06, "loss": 0.4827, "step": 7983 }, { "epoch": 0.03534463677010934, "grad_norm": 2.533902700664141, "learning_rate": 3.534463677010935e-06, "loss": 0.7766, "step": 7984 }, { "epoch": 0.035349063703572534, "grad_norm": 2.6563337240302163, "learning_rate": 3.5349063703572538e-06, "loss": 0.7577, "step": 7985 }, { "epoch": 0.035353490637035724, "grad_norm": 2.897439547828054, "learning_rate": 3.5353490637035727e-06, "loss": 0.8824, "step": 7986 }, { "epoch": 0.035357917570498915, "grad_norm": 3.7681560987694875, "learning_rate": 3.5357917570498917e-06, "loss": 0.9756, "step": 7987 }, { "epoch": 0.035362344503962105, "grad_norm": 2.8417664389772934, "learning_rate": 3.536234450396211e-06, "loss": 0.6929, "step": 7988 }, { "epoch": 0.035366771437425296, "grad_norm": 2.7545295107088927, "learning_rate": 3.5366771437425296e-06, "loss": 0.8199, "step": 7989 }, { "epoch": 0.035371198370888486, "grad_norm": 4.133213529519193, "learning_rate": 3.537119837088849e-06, "loss": 0.8606, "step": 7990 }, { "epoch": 0.03537562530435168, "grad_norm": 3.0527427818785666, "learning_rate": 3.537562530435168e-06, "loss": 0.5665, "step": 7991 }, { "epoch": 0.03538005223781487, "grad_norm": 2.9097191738650157, "learning_rate": 3.5380052237814864e-06, "loss": 0.7755, "step": 7992 }, { "epoch": 0.03538447917127806, "grad_norm": 2.634908124041873, "learning_rate": 3.5384479171278058e-06, "loss": 0.6489, "step": 7993 }, { "epoch": 0.03538890610474125, "grad_norm": 2.7187303394252536, "learning_rate": 3.538890610474125e-06, "loss": 0.7597, "step": 7994 }, { "epoch": 0.03539333303820444, "grad_norm": 2.5146691040658764, "learning_rate": 3.5393333038204437e-06, "loss": 0.7461, "step": 7995 }, { "epoch": 0.03539775997166763, "grad_norm": 2.958368364842259, "learning_rate": 3.5397759971667626e-06, "loss": 0.7329, "step": 7996 }, { "epoch": 0.03540218690513081, "grad_norm": 2.971593356366139, "learning_rate": 3.540218690513082e-06, "loss": 0.7782, "step": 7997 }, { "epoch": 0.035406613838594, "grad_norm": 3.5392553399593476, "learning_rate": 3.5406613838594005e-06, "loss": 1.454, "step": 7998 }, { "epoch": 0.035411040772057194, "grad_norm": 3.367591239166598, "learning_rate": 3.54110407720572e-06, "loss": 0.9957, "step": 7999 }, { "epoch": 0.035415467705520384, "grad_norm": 2.8155951426514294, "learning_rate": 3.541546770552039e-06, "loss": 0.5502, "step": 8000 }, { "epoch": 0.035419894638983575, "grad_norm": 2.8126621713289266, "learning_rate": 3.5419894638983578e-06, "loss": 0.7693, "step": 8001 }, { "epoch": 0.035424321572446765, "grad_norm": 4.66041063127821, "learning_rate": 3.5424321572446767e-06, "loss": 1.4951, "step": 8002 }, { "epoch": 0.035428748505909956, "grad_norm": 3.892877877203735, "learning_rate": 3.542874850590996e-06, "loss": 1.3222, "step": 8003 }, { "epoch": 0.035433175439373146, "grad_norm": 3.87070257988773, "learning_rate": 3.5433175439373146e-06, "loss": 1.1846, "step": 8004 }, { "epoch": 0.03543760237283634, "grad_norm": 2.456637140943259, "learning_rate": 3.543760237283634e-06, "loss": 0.7445, "step": 8005 }, { "epoch": 0.03544202930629953, "grad_norm": 2.829346712635556, "learning_rate": 3.544202930629953e-06, "loss": 0.8859, "step": 8006 }, { "epoch": 0.03544645623976272, "grad_norm": 3.3929134483884846, "learning_rate": 3.5446456239762715e-06, "loss": 1.2669, "step": 8007 }, { "epoch": 0.03545088317322591, "grad_norm": 2.5126549512619167, "learning_rate": 3.545088317322591e-06, "loss": 0.8328, "step": 8008 }, { "epoch": 0.0354553101066891, "grad_norm": 2.5126153096975132, "learning_rate": 3.54553101066891e-06, "loss": 0.7014, "step": 8009 }, { "epoch": 0.03545973704015229, "grad_norm": 2.5608521937823228, "learning_rate": 3.5459737040152287e-06, "loss": 0.7964, "step": 8010 }, { "epoch": 0.03546416397361548, "grad_norm": 2.5796003133910372, "learning_rate": 3.5464163973615477e-06, "loss": 0.7687, "step": 8011 }, { "epoch": 0.03546859090707866, "grad_norm": 3.033267494411219, "learning_rate": 3.546859090707867e-06, "loss": 0.504, "step": 8012 }, { "epoch": 0.035473017840541854, "grad_norm": 3.51886530985854, "learning_rate": 3.5473017840541856e-06, "loss": 0.7415, "step": 8013 }, { "epoch": 0.035477444774005044, "grad_norm": 2.7631150139281573, "learning_rate": 3.547744477400505e-06, "loss": 0.6938, "step": 8014 }, { "epoch": 0.035481871707468235, "grad_norm": 3.2862776548925754, "learning_rate": 3.548187170746824e-06, "loss": 0.9282, "step": 8015 }, { "epoch": 0.035486298640931425, "grad_norm": 2.784673914885178, "learning_rate": 3.548629864093143e-06, "loss": 0.5433, "step": 8016 }, { "epoch": 0.035490725574394616, "grad_norm": 2.7161946400591126, "learning_rate": 3.5490725574394618e-06, "loss": 0.5256, "step": 8017 }, { "epoch": 0.035495152507857806, "grad_norm": 3.1455547163462696, "learning_rate": 3.549515250785781e-06, "loss": 0.8268, "step": 8018 }, { "epoch": 0.035499579441321, "grad_norm": 2.814549826175777, "learning_rate": 3.5499579441320997e-06, "loss": 0.705, "step": 8019 }, { "epoch": 0.03550400637478419, "grad_norm": 2.4215682456955405, "learning_rate": 3.550400637478419e-06, "loss": 0.5579, "step": 8020 }, { "epoch": 0.03550843330824738, "grad_norm": 2.9455537282104043, "learning_rate": 3.550843330824738e-06, "loss": 0.7515, "step": 8021 }, { "epoch": 0.03551286024171057, "grad_norm": 2.9525945179675137, "learning_rate": 3.5512860241710565e-06, "loss": 0.8887, "step": 8022 }, { "epoch": 0.03551728717517376, "grad_norm": 2.493013304314749, "learning_rate": 3.551728717517376e-06, "loss": 0.6991, "step": 8023 }, { "epoch": 0.03552171410863695, "grad_norm": 2.5914783194354034, "learning_rate": 3.5521714108636953e-06, "loss": 0.6339, "step": 8024 }, { "epoch": 0.03552614104210014, "grad_norm": 2.8502299862910605, "learning_rate": 3.5526141042100138e-06, "loss": 0.7253, "step": 8025 }, { "epoch": 0.03553056797556333, "grad_norm": 2.675269733697059, "learning_rate": 3.5530567975563327e-06, "loss": 0.6033, "step": 8026 }, { "epoch": 0.035534994909026514, "grad_norm": 2.6350113238899344, "learning_rate": 3.553499490902652e-06, "loss": 0.6639, "step": 8027 }, { "epoch": 0.035539421842489705, "grad_norm": 2.8005882397879187, "learning_rate": 3.5539421842489715e-06, "loss": 0.9075, "step": 8028 }, { "epoch": 0.035543848775952895, "grad_norm": 3.5369314286228204, "learning_rate": 3.55438487759529e-06, "loss": 1.2131, "step": 8029 }, { "epoch": 0.035548275709416086, "grad_norm": 2.8109466470700966, "learning_rate": 3.554827570941609e-06, "loss": 0.8507, "step": 8030 }, { "epoch": 0.035552702642879276, "grad_norm": 2.5729960060028487, "learning_rate": 3.5552702642879283e-06, "loss": 0.7284, "step": 8031 }, { "epoch": 0.03555712957634247, "grad_norm": 2.6777070340141367, "learning_rate": 3.555712957634247e-06, "loss": 1.0792, "step": 8032 }, { "epoch": 0.03556155650980566, "grad_norm": 2.853583739052603, "learning_rate": 3.556155650980566e-06, "loss": 0.7497, "step": 8033 }, { "epoch": 0.03556598344326885, "grad_norm": 2.829186472150377, "learning_rate": 3.556598344326885e-06, "loss": 0.4565, "step": 8034 }, { "epoch": 0.03557041037673204, "grad_norm": 3.417806701362867, "learning_rate": 3.557041037673204e-06, "loss": 1.0209, "step": 8035 }, { "epoch": 0.03557483731019523, "grad_norm": 2.443723423283002, "learning_rate": 3.557483731019523e-06, "loss": 0.8344, "step": 8036 }, { "epoch": 0.03557926424365842, "grad_norm": 3.2648952734264136, "learning_rate": 3.5579264243658424e-06, "loss": 1.2199, "step": 8037 }, { "epoch": 0.03558369117712161, "grad_norm": 2.4958192211839707, "learning_rate": 3.558369117712161e-06, "loss": 0.8484, "step": 8038 }, { "epoch": 0.0355881181105848, "grad_norm": 2.6940445029714035, "learning_rate": 3.5588118110584803e-06, "loss": 0.8367, "step": 8039 }, { "epoch": 0.03559254504404799, "grad_norm": 2.433422986172456, "learning_rate": 3.5592545044047993e-06, "loss": 0.595, "step": 8040 }, { "epoch": 0.03559697197751118, "grad_norm": 2.77843488150385, "learning_rate": 3.559697197751118e-06, "loss": 0.7722, "step": 8041 }, { "epoch": 0.035601398910974365, "grad_norm": 3.7051289144244044, "learning_rate": 3.560139891097437e-06, "loss": 0.8094, "step": 8042 }, { "epoch": 0.035605825844437555, "grad_norm": 2.8702459164749516, "learning_rate": 3.5605825844437565e-06, "loss": 0.7139, "step": 8043 }, { "epoch": 0.035610252777900746, "grad_norm": 2.4566644672272826, "learning_rate": 3.561025277790075e-06, "loss": 0.7232, "step": 8044 }, { "epoch": 0.035614679711363936, "grad_norm": 3.2195048852214065, "learning_rate": 3.561467971136394e-06, "loss": 0.9268, "step": 8045 }, { "epoch": 0.03561910664482713, "grad_norm": 3.1174487280768974, "learning_rate": 3.5619106644827134e-06, "loss": 0.7035, "step": 8046 }, { "epoch": 0.03562353357829032, "grad_norm": 2.5729222474036146, "learning_rate": 3.562353357829032e-06, "loss": 0.7981, "step": 8047 }, { "epoch": 0.03562796051175351, "grad_norm": 2.4450230447583134, "learning_rate": 3.5627960511753513e-06, "loss": 0.6204, "step": 8048 }, { "epoch": 0.0356323874452167, "grad_norm": 2.964804100813771, "learning_rate": 3.56323874452167e-06, "loss": 0.8964, "step": 8049 }, { "epoch": 0.03563681437867989, "grad_norm": 2.7154134166168427, "learning_rate": 3.563681437867989e-06, "loss": 0.6511, "step": 8050 }, { "epoch": 0.03564124131214308, "grad_norm": 2.8022162922910976, "learning_rate": 3.564124131214308e-06, "loss": 0.9792, "step": 8051 }, { "epoch": 0.03564566824560627, "grad_norm": 3.552148172850259, "learning_rate": 3.5645668245606275e-06, "loss": 0.7117, "step": 8052 }, { "epoch": 0.03565009517906946, "grad_norm": 3.4554301177501405, "learning_rate": 3.565009517906946e-06, "loss": 0.8594, "step": 8053 }, { "epoch": 0.03565452211253265, "grad_norm": 2.875691120080443, "learning_rate": 3.5654522112532654e-06, "loss": 0.6345, "step": 8054 }, { "epoch": 0.03565894904599584, "grad_norm": 2.864325510553696, "learning_rate": 3.5658949045995843e-06, "loss": 0.7201, "step": 8055 }, { "epoch": 0.03566337597945903, "grad_norm": 3.3226678463128816, "learning_rate": 3.5663375979459033e-06, "loss": 1.0166, "step": 8056 }, { "epoch": 0.035667802912922215, "grad_norm": 2.7461671189636445, "learning_rate": 3.566780291292222e-06, "loss": 0.57, "step": 8057 }, { "epoch": 0.035672229846385406, "grad_norm": 3.4079016395632324, "learning_rate": 3.5672229846385416e-06, "loss": 1.1173, "step": 8058 }, { "epoch": 0.035676656779848596, "grad_norm": 2.615859690232146, "learning_rate": 3.56766567798486e-06, "loss": 0.6401, "step": 8059 }, { "epoch": 0.03568108371331179, "grad_norm": 3.230498750736748, "learning_rate": 3.5681083713311795e-06, "loss": 0.998, "step": 8060 }, { "epoch": 0.03568551064677498, "grad_norm": 2.7949225284267736, "learning_rate": 3.5685510646774984e-06, "loss": 0.7228, "step": 8061 }, { "epoch": 0.03568993758023817, "grad_norm": 3.2032462637454544, "learning_rate": 3.568993758023817e-06, "loss": 1.0314, "step": 8062 }, { "epoch": 0.03569436451370136, "grad_norm": 3.0795063416435657, "learning_rate": 3.5694364513701363e-06, "loss": 1.0466, "step": 8063 }, { "epoch": 0.03569879144716455, "grad_norm": 2.8055069076728656, "learning_rate": 3.5698791447164557e-06, "loss": 0.6409, "step": 8064 }, { "epoch": 0.03570321838062774, "grad_norm": 3.326484196408225, "learning_rate": 3.570321838062774e-06, "loss": 1.0245, "step": 8065 }, { "epoch": 0.03570764531409093, "grad_norm": 2.1995824797622, "learning_rate": 3.570764531409093e-06, "loss": 0.5784, "step": 8066 }, { "epoch": 0.03571207224755412, "grad_norm": 2.5344683198456432, "learning_rate": 3.5712072247554125e-06, "loss": 0.6598, "step": 8067 }, { "epoch": 0.03571649918101731, "grad_norm": 3.2061033473456693, "learning_rate": 3.571649918101731e-06, "loss": 0.5822, "step": 8068 }, { "epoch": 0.0357209261144805, "grad_norm": 3.22861376910068, "learning_rate": 3.5720926114480504e-06, "loss": 1.1005, "step": 8069 }, { "epoch": 0.03572535304794369, "grad_norm": 3.058569110086303, "learning_rate": 3.5725353047943694e-06, "loss": 0.592, "step": 8070 }, { "epoch": 0.03572977998140688, "grad_norm": 2.523885301109301, "learning_rate": 3.5729779981406883e-06, "loss": 0.7143, "step": 8071 }, { "epoch": 0.035734206914870066, "grad_norm": 2.5889456534493056, "learning_rate": 3.5734206914870073e-06, "loss": 0.6298, "step": 8072 }, { "epoch": 0.03573863384833326, "grad_norm": 2.4439480511618914, "learning_rate": 3.5738633848333266e-06, "loss": 0.7255, "step": 8073 }, { "epoch": 0.03574306078179645, "grad_norm": 2.91841700201576, "learning_rate": 3.574306078179645e-06, "loss": 0.9925, "step": 8074 }, { "epoch": 0.03574748771525964, "grad_norm": 3.2037505805940274, "learning_rate": 3.5747487715259645e-06, "loss": 0.8941, "step": 8075 }, { "epoch": 0.03575191464872283, "grad_norm": 3.301140633237761, "learning_rate": 3.5751914648722835e-06, "loss": 0.664, "step": 8076 }, { "epoch": 0.03575634158218602, "grad_norm": 3.187453983189332, "learning_rate": 3.575634158218602e-06, "loss": 0.9575, "step": 8077 }, { "epoch": 0.03576076851564921, "grad_norm": 3.266143248895206, "learning_rate": 3.5760768515649214e-06, "loss": 0.7921, "step": 8078 }, { "epoch": 0.0357651954491124, "grad_norm": 3.660835072491934, "learning_rate": 3.5765195449112407e-06, "loss": 0.9979, "step": 8079 }, { "epoch": 0.03576962238257559, "grad_norm": 2.7068793671293245, "learning_rate": 3.5769622382575593e-06, "loss": 0.6886, "step": 8080 }, { "epoch": 0.03577404931603878, "grad_norm": 3.008741642114013, "learning_rate": 3.577404931603878e-06, "loss": 0.5522, "step": 8081 }, { "epoch": 0.03577847624950197, "grad_norm": 2.655624199118532, "learning_rate": 3.5778476249501976e-06, "loss": 0.7078, "step": 8082 }, { "epoch": 0.03578290318296516, "grad_norm": 3.0491569957852205, "learning_rate": 3.578290318296516e-06, "loss": 0.7376, "step": 8083 }, { "epoch": 0.03578733011642835, "grad_norm": 2.6238231957180167, "learning_rate": 3.5787330116428355e-06, "loss": 0.721, "step": 8084 }, { "epoch": 0.03579175704989154, "grad_norm": 2.9529249011863317, "learning_rate": 3.5791757049891544e-06, "loss": 0.7906, "step": 8085 }, { "epoch": 0.03579618398335473, "grad_norm": 3.142214509363756, "learning_rate": 3.5796183983354734e-06, "loss": 0.8418, "step": 8086 }, { "epoch": 0.03580061091681792, "grad_norm": 2.9111418945779612, "learning_rate": 3.5800610916817923e-06, "loss": 1.0051, "step": 8087 }, { "epoch": 0.03580503785028111, "grad_norm": 2.8853932393444084, "learning_rate": 3.5805037850281117e-06, "loss": 0.5436, "step": 8088 }, { "epoch": 0.0358094647837443, "grad_norm": 2.5618178534086034, "learning_rate": 3.58094647837443e-06, "loss": 0.3647, "step": 8089 }, { "epoch": 0.03581389171720749, "grad_norm": 3.027790651380716, "learning_rate": 3.5813891717207496e-06, "loss": 1.017, "step": 8090 }, { "epoch": 0.03581831865067068, "grad_norm": 2.770535279583474, "learning_rate": 3.5818318650670685e-06, "loss": 0.6561, "step": 8091 }, { "epoch": 0.03582274558413387, "grad_norm": 2.2627663535998797, "learning_rate": 3.582274558413387e-06, "loss": 0.6118, "step": 8092 }, { "epoch": 0.03582717251759706, "grad_norm": 2.8135607540259255, "learning_rate": 3.5827172517597064e-06, "loss": 0.6955, "step": 8093 }, { "epoch": 0.03583159945106025, "grad_norm": 2.635043624819474, "learning_rate": 3.5831599451060258e-06, "loss": 0.7187, "step": 8094 }, { "epoch": 0.03583602638452344, "grad_norm": 3.2571043783720333, "learning_rate": 3.5836026384523443e-06, "loss": 0.9111, "step": 8095 }, { "epoch": 0.03584045331798663, "grad_norm": 3.525289967733912, "learning_rate": 3.5840453317986633e-06, "loss": 1.009, "step": 8096 }, { "epoch": 0.03584488025144982, "grad_norm": 2.9945978672582982, "learning_rate": 3.5844880251449826e-06, "loss": 0.6369, "step": 8097 }, { "epoch": 0.03584930718491301, "grad_norm": 3.097994233173249, "learning_rate": 3.584930718491301e-06, "loss": 0.9656, "step": 8098 }, { "epoch": 0.0358537341183762, "grad_norm": 2.6507546748571102, "learning_rate": 3.5853734118376205e-06, "loss": 0.6093, "step": 8099 }, { "epoch": 0.03585816105183939, "grad_norm": 2.7082789219425014, "learning_rate": 3.5858161051839395e-06, "loss": 0.7713, "step": 8100 }, { "epoch": 0.035862587985302584, "grad_norm": 3.3104463069877643, "learning_rate": 3.5862587985302584e-06, "loss": 1.0428, "step": 8101 }, { "epoch": 0.035867014918765774, "grad_norm": 2.48020967607478, "learning_rate": 3.5867014918765774e-06, "loss": 0.728, "step": 8102 }, { "epoch": 0.03587144185222896, "grad_norm": 2.698102044603294, "learning_rate": 3.5871441852228967e-06, "loss": 0.8678, "step": 8103 }, { "epoch": 0.03587586878569215, "grad_norm": 2.6622614168062118, "learning_rate": 3.5875868785692153e-06, "loss": 0.7085, "step": 8104 }, { "epoch": 0.03588029571915534, "grad_norm": 2.482647066358114, "learning_rate": 3.5880295719155346e-06, "loss": 0.6722, "step": 8105 }, { "epoch": 0.03588472265261853, "grad_norm": 3.100059862999843, "learning_rate": 3.5884722652618536e-06, "loss": 0.9012, "step": 8106 }, { "epoch": 0.03588914958608172, "grad_norm": 2.5968193738113166, "learning_rate": 3.588914958608172e-06, "loss": 0.6965, "step": 8107 }, { "epoch": 0.03589357651954491, "grad_norm": 3.3819442762377894, "learning_rate": 3.5893576519544915e-06, "loss": 0.6722, "step": 8108 }, { "epoch": 0.0358980034530081, "grad_norm": 2.771383469575123, "learning_rate": 3.589800345300811e-06, "loss": 0.7929, "step": 8109 }, { "epoch": 0.03590243038647129, "grad_norm": 2.8673910381575096, "learning_rate": 3.5902430386471294e-06, "loss": 1.0388, "step": 8110 }, { "epoch": 0.03590685731993448, "grad_norm": 2.9666986808612394, "learning_rate": 3.5906857319934483e-06, "loss": 0.6771, "step": 8111 }, { "epoch": 0.03591128425339767, "grad_norm": 3.1903445347551536, "learning_rate": 3.5911284253397677e-06, "loss": 0.8827, "step": 8112 }, { "epoch": 0.03591571118686086, "grad_norm": 2.796752738706456, "learning_rate": 3.591571118686086e-06, "loss": 0.558, "step": 8113 }, { "epoch": 0.035920138120324054, "grad_norm": 3.8154508660035558, "learning_rate": 3.5920138120324056e-06, "loss": 1.1824, "step": 8114 }, { "epoch": 0.035924565053787244, "grad_norm": 2.013272686966004, "learning_rate": 3.5924565053787245e-06, "loss": 0.4247, "step": 8115 }, { "epoch": 0.035928991987250435, "grad_norm": 2.815399179650827, "learning_rate": 3.5928991987250435e-06, "loss": 0.8244, "step": 8116 }, { "epoch": 0.035933418920713625, "grad_norm": 3.330242077297706, "learning_rate": 3.5933418920713624e-06, "loss": 0.9369, "step": 8117 }, { "epoch": 0.03593784585417681, "grad_norm": 3.920894485255396, "learning_rate": 3.5937845854176818e-06, "loss": 1.087, "step": 8118 }, { "epoch": 0.03594227278764, "grad_norm": 3.069554294124087, "learning_rate": 3.5942272787640003e-06, "loss": 0.5782, "step": 8119 }, { "epoch": 0.03594669972110319, "grad_norm": 3.312243146251326, "learning_rate": 3.5946699721103197e-06, "loss": 0.9054, "step": 8120 }, { "epoch": 0.03595112665456638, "grad_norm": 3.5995943398804124, "learning_rate": 3.5951126654566386e-06, "loss": 1.1088, "step": 8121 }, { "epoch": 0.03595555358802957, "grad_norm": 3.372833398536439, "learning_rate": 3.5955553588029576e-06, "loss": 0.8827, "step": 8122 }, { "epoch": 0.03595998052149276, "grad_norm": 3.6245664244835143, "learning_rate": 3.5959980521492765e-06, "loss": 0.9162, "step": 8123 }, { "epoch": 0.03596440745495595, "grad_norm": 2.438282200907543, "learning_rate": 3.596440745495596e-06, "loss": 0.5358, "step": 8124 }, { "epoch": 0.03596883438841914, "grad_norm": 3.2434363931981545, "learning_rate": 3.5968834388419144e-06, "loss": 0.8663, "step": 8125 }, { "epoch": 0.03597326132188233, "grad_norm": 2.9039434401693183, "learning_rate": 3.5973261321882334e-06, "loss": 0.9834, "step": 8126 }, { "epoch": 0.03597768825534552, "grad_norm": 2.809915325676793, "learning_rate": 3.5977688255345527e-06, "loss": 0.8542, "step": 8127 }, { "epoch": 0.035982115188808714, "grad_norm": 3.4803768969512143, "learning_rate": 3.5982115188808713e-06, "loss": 0.9864, "step": 8128 }, { "epoch": 0.035986542122271904, "grad_norm": 2.6411613771812883, "learning_rate": 3.5986542122271906e-06, "loss": 0.6268, "step": 8129 }, { "epoch": 0.035990969055735095, "grad_norm": 2.7893796857359043, "learning_rate": 3.5990969055735096e-06, "loss": 0.5533, "step": 8130 }, { "epoch": 0.035995395989198285, "grad_norm": 2.9074685469350716, "learning_rate": 3.5995395989198285e-06, "loss": 0.5812, "step": 8131 }, { "epoch": 0.035999822922661476, "grad_norm": 2.5164444041034653, "learning_rate": 3.5999822922661475e-06, "loss": 0.7378, "step": 8132 }, { "epoch": 0.03600424985612466, "grad_norm": 2.512026287077496, "learning_rate": 3.600424985612467e-06, "loss": 0.5279, "step": 8133 }, { "epoch": 0.03600867678958785, "grad_norm": 2.7155374685452665, "learning_rate": 3.6008676789587854e-06, "loss": 0.3932, "step": 8134 }, { "epoch": 0.03601310372305104, "grad_norm": 2.779767833102724, "learning_rate": 3.6013103723051047e-06, "loss": 0.6787, "step": 8135 }, { "epoch": 0.03601753065651423, "grad_norm": 2.524578426539813, "learning_rate": 3.6017530656514237e-06, "loss": 0.5768, "step": 8136 }, { "epoch": 0.03602195758997742, "grad_norm": 2.7609792760628364, "learning_rate": 3.6021957589977426e-06, "loss": 0.949, "step": 8137 }, { "epoch": 0.03602638452344061, "grad_norm": 4.452849492699267, "learning_rate": 3.6026384523440616e-06, "loss": 1.2698, "step": 8138 }, { "epoch": 0.0360308114569038, "grad_norm": 2.946100872561306, "learning_rate": 3.603081145690381e-06, "loss": 0.7594, "step": 8139 }, { "epoch": 0.03603523839036699, "grad_norm": 2.5305060261006593, "learning_rate": 3.6035238390366995e-06, "loss": 0.6311, "step": 8140 }, { "epoch": 0.03603966532383018, "grad_norm": 2.906066087337047, "learning_rate": 3.603966532383019e-06, "loss": 0.7281, "step": 8141 }, { "epoch": 0.036044092257293374, "grad_norm": 3.2391788499284817, "learning_rate": 3.6044092257293378e-06, "loss": 1.1462, "step": 8142 }, { "epoch": 0.036048519190756564, "grad_norm": 3.617054340939541, "learning_rate": 3.6048519190756563e-06, "loss": 1.199, "step": 8143 }, { "epoch": 0.036052946124219755, "grad_norm": 3.053505658420703, "learning_rate": 3.6052946124219757e-06, "loss": 0.7707, "step": 8144 }, { "epoch": 0.036057373057682945, "grad_norm": 3.882019401165755, "learning_rate": 3.6057373057682946e-06, "loss": 1.6107, "step": 8145 }, { "epoch": 0.036061799991146136, "grad_norm": 2.965663056835648, "learning_rate": 3.6061799991146136e-06, "loss": 0.9632, "step": 8146 }, { "epoch": 0.036066226924609326, "grad_norm": 3.514374961801961, "learning_rate": 3.6066226924609325e-06, "loss": 0.9639, "step": 8147 }, { "epoch": 0.03607065385807251, "grad_norm": 3.2295833840299037, "learning_rate": 3.607065385807252e-06, "loss": 0.9022, "step": 8148 }, { "epoch": 0.0360750807915357, "grad_norm": 3.0125906342956057, "learning_rate": 3.6075080791535704e-06, "loss": 0.7007, "step": 8149 }, { "epoch": 0.03607950772499889, "grad_norm": 3.75705575398552, "learning_rate": 3.6079507724998898e-06, "loss": 1.0035, "step": 8150 }, { "epoch": 0.03608393465846208, "grad_norm": 2.5991285069310877, "learning_rate": 3.6083934658462087e-06, "loss": 0.4886, "step": 8151 }, { "epoch": 0.03608836159192527, "grad_norm": 2.513107288535486, "learning_rate": 3.6088361591925277e-06, "loss": 0.442, "step": 8152 }, { "epoch": 0.03609278852538846, "grad_norm": 2.9953893935814646, "learning_rate": 3.6092788525388466e-06, "loss": 0.7381, "step": 8153 }, { "epoch": 0.03609721545885165, "grad_norm": 2.8375400788709753, "learning_rate": 3.609721545885166e-06, "loss": 0.8599, "step": 8154 }, { "epoch": 0.036101642392314844, "grad_norm": 3.794057687113391, "learning_rate": 3.6101642392314845e-06, "loss": 1.0686, "step": 8155 }, { "epoch": 0.036106069325778034, "grad_norm": 5.264290992217585, "learning_rate": 3.610606932577804e-06, "loss": 1.4158, "step": 8156 }, { "epoch": 0.036110496259241225, "grad_norm": 2.521088798722639, "learning_rate": 3.611049625924123e-06, "loss": 0.7467, "step": 8157 }, { "epoch": 0.036114923192704415, "grad_norm": 2.9817703208412065, "learning_rate": 3.6114923192704414e-06, "loss": 0.9483, "step": 8158 }, { "epoch": 0.036119350126167606, "grad_norm": 3.0090060163944927, "learning_rate": 3.6119350126167607e-06, "loss": 0.7316, "step": 8159 }, { "epoch": 0.036123777059630796, "grad_norm": 2.532829398068664, "learning_rate": 3.61237770596308e-06, "loss": 0.7937, "step": 8160 }, { "epoch": 0.03612820399309399, "grad_norm": 3.329062842179455, "learning_rate": 3.6128203993093986e-06, "loss": 0.8336, "step": 8161 }, { "epoch": 0.03613263092655718, "grad_norm": 3.7072828831088143, "learning_rate": 3.6132630926557176e-06, "loss": 1.265, "step": 8162 }, { "epoch": 0.03613705786002036, "grad_norm": 2.5811763892368482, "learning_rate": 3.613705786002037e-06, "loss": 0.666, "step": 8163 }, { "epoch": 0.03614148479348355, "grad_norm": 3.7566473704473298, "learning_rate": 3.6141484793483555e-06, "loss": 0.9088, "step": 8164 }, { "epoch": 0.03614591172694674, "grad_norm": 2.4815747961715457, "learning_rate": 3.614591172694675e-06, "loss": 0.7038, "step": 8165 }, { "epoch": 0.03615033866040993, "grad_norm": 2.839471591344458, "learning_rate": 3.6150338660409938e-06, "loss": 0.8134, "step": 8166 }, { "epoch": 0.03615476559387312, "grad_norm": 3.9834313246909305, "learning_rate": 3.6154765593873127e-06, "loss": 1.0069, "step": 8167 }, { "epoch": 0.03615919252733631, "grad_norm": 2.4116245013902087, "learning_rate": 3.6159192527336317e-06, "loss": 0.6329, "step": 8168 }, { "epoch": 0.036163619460799504, "grad_norm": 2.6605307789837447, "learning_rate": 3.616361946079951e-06, "loss": 0.7084, "step": 8169 }, { "epoch": 0.036168046394262694, "grad_norm": 4.103835439643235, "learning_rate": 3.6168046394262696e-06, "loss": 1.2802, "step": 8170 }, { "epoch": 0.036172473327725885, "grad_norm": 2.941626966313626, "learning_rate": 3.617247332772589e-06, "loss": 0.8859, "step": 8171 }, { "epoch": 0.036176900261189075, "grad_norm": 3.914506507378739, "learning_rate": 3.617690026118908e-06, "loss": 1.2449, "step": 8172 }, { "epoch": 0.036181327194652266, "grad_norm": 3.214970031252809, "learning_rate": 3.6181327194652264e-06, "loss": 1.1149, "step": 8173 }, { "epoch": 0.036185754128115456, "grad_norm": 2.303781700092508, "learning_rate": 3.6185754128115458e-06, "loss": 0.3249, "step": 8174 }, { "epoch": 0.03619018106157865, "grad_norm": 3.4970625525604424, "learning_rate": 3.619018106157865e-06, "loss": 1.132, "step": 8175 }, { "epoch": 0.03619460799504184, "grad_norm": 2.6059107814093467, "learning_rate": 3.6194607995041837e-06, "loss": 0.6006, "step": 8176 }, { "epoch": 0.03619903492850503, "grad_norm": 3.4014691791565035, "learning_rate": 3.6199034928505026e-06, "loss": 0.9, "step": 8177 }, { "epoch": 0.03620346186196821, "grad_norm": 2.6722839137824037, "learning_rate": 3.620346186196822e-06, "loss": 0.7141, "step": 8178 }, { "epoch": 0.0362078887954314, "grad_norm": 3.483786840763723, "learning_rate": 3.6207888795431405e-06, "loss": 0.984, "step": 8179 }, { "epoch": 0.03621231572889459, "grad_norm": 3.354213244806176, "learning_rate": 3.62123157288946e-06, "loss": 0.5021, "step": 8180 }, { "epoch": 0.03621674266235778, "grad_norm": 2.467306051259038, "learning_rate": 3.621674266235779e-06, "loss": 0.5755, "step": 8181 }, { "epoch": 0.03622116959582097, "grad_norm": 2.7723577202102745, "learning_rate": 3.622116959582098e-06, "loss": 0.9375, "step": 8182 }, { "epoch": 0.036225596529284164, "grad_norm": 2.7987945501198883, "learning_rate": 3.6225596529284167e-06, "loss": 0.8458, "step": 8183 }, { "epoch": 0.036230023462747354, "grad_norm": 3.3616722686434177, "learning_rate": 3.623002346274736e-06, "loss": 0.6993, "step": 8184 }, { "epoch": 0.036234450396210545, "grad_norm": 2.8243274094615525, "learning_rate": 3.6234450396210546e-06, "loss": 0.6072, "step": 8185 }, { "epoch": 0.036238877329673735, "grad_norm": 2.463422920765543, "learning_rate": 3.623887732967374e-06, "loss": 0.7714, "step": 8186 }, { "epoch": 0.036243304263136926, "grad_norm": 2.910492707085047, "learning_rate": 3.624330426313693e-06, "loss": 0.8592, "step": 8187 }, { "epoch": 0.036247731196600116, "grad_norm": 2.766727243955512, "learning_rate": 3.6247731196600115e-06, "loss": 0.6492, "step": 8188 }, { "epoch": 0.03625215813006331, "grad_norm": 2.728019404661411, "learning_rate": 3.625215813006331e-06, "loss": 0.6557, "step": 8189 }, { "epoch": 0.0362565850635265, "grad_norm": 4.022660401888966, "learning_rate": 3.6256585063526502e-06, "loss": 1.4507, "step": 8190 }, { "epoch": 0.03626101199698969, "grad_norm": 2.882578213689718, "learning_rate": 3.6261011996989687e-06, "loss": 0.8164, "step": 8191 }, { "epoch": 0.03626543893045288, "grad_norm": 3.138153981238699, "learning_rate": 3.6265438930452877e-06, "loss": 0.7159, "step": 8192 }, { "epoch": 0.03626986586391606, "grad_norm": 2.994178020290198, "learning_rate": 3.626986586391607e-06, "loss": 0.8939, "step": 8193 }, { "epoch": 0.03627429279737925, "grad_norm": 2.906771975103548, "learning_rate": 3.6274292797379256e-06, "loss": 0.7873, "step": 8194 }, { "epoch": 0.03627871973084244, "grad_norm": 2.724147218871158, "learning_rate": 3.627871973084245e-06, "loss": 0.9256, "step": 8195 }, { "epoch": 0.036283146664305634, "grad_norm": 2.7087791480416303, "learning_rate": 3.628314666430564e-06, "loss": 0.8446, "step": 8196 }, { "epoch": 0.036287573597768824, "grad_norm": 2.387479723414163, "learning_rate": 3.628757359776883e-06, "loss": 0.608, "step": 8197 }, { "epoch": 0.036292000531232015, "grad_norm": 2.5964706581656336, "learning_rate": 3.629200053123202e-06, "loss": 0.7839, "step": 8198 }, { "epoch": 0.036296427464695205, "grad_norm": 3.302251572061952, "learning_rate": 3.629642746469521e-06, "loss": 0.7308, "step": 8199 }, { "epoch": 0.036300854398158396, "grad_norm": 2.4692809214139406, "learning_rate": 3.6300854398158397e-06, "loss": 0.6156, "step": 8200 }, { "epoch": 0.036305281331621586, "grad_norm": 4.362775590280457, "learning_rate": 3.630528133162159e-06, "loss": 1.1713, "step": 8201 }, { "epoch": 0.03630970826508478, "grad_norm": 2.943677365317135, "learning_rate": 3.630970826508478e-06, "loss": 0.6951, "step": 8202 }, { "epoch": 0.03631413519854797, "grad_norm": 3.7461463254123695, "learning_rate": 3.6314135198547965e-06, "loss": 1.0073, "step": 8203 }, { "epoch": 0.03631856213201116, "grad_norm": 3.1568843582201427, "learning_rate": 3.631856213201116e-06, "loss": 0.8086, "step": 8204 }, { "epoch": 0.03632298906547435, "grad_norm": 2.8731763295032926, "learning_rate": 3.6322989065474353e-06, "loss": 0.8804, "step": 8205 }, { "epoch": 0.03632741599893754, "grad_norm": 2.548614324590499, "learning_rate": 3.632741599893754e-06, "loss": 0.5492, "step": 8206 }, { "epoch": 0.03633184293240073, "grad_norm": 2.4619444089566334, "learning_rate": 3.6331842932400727e-06, "loss": 0.6656, "step": 8207 }, { "epoch": 0.03633626986586391, "grad_norm": 2.7843278136931007, "learning_rate": 3.633626986586392e-06, "loss": 0.769, "step": 8208 }, { "epoch": 0.0363406967993271, "grad_norm": 2.624897028641731, "learning_rate": 3.6340696799327106e-06, "loss": 0.5843, "step": 8209 }, { "epoch": 0.036345123732790294, "grad_norm": 3.2475791351664536, "learning_rate": 3.63451237327903e-06, "loss": 1.0699, "step": 8210 }, { "epoch": 0.036349550666253484, "grad_norm": 3.238467313214348, "learning_rate": 3.634955066625349e-06, "loss": 1.0682, "step": 8211 }, { "epoch": 0.036353977599716675, "grad_norm": 2.646797633959829, "learning_rate": 3.635397759971668e-06, "loss": 0.8716, "step": 8212 }, { "epoch": 0.036358404533179865, "grad_norm": 2.9623056356365494, "learning_rate": 3.635840453317987e-06, "loss": 0.8739, "step": 8213 }, { "epoch": 0.036362831466643056, "grad_norm": 3.228527786913503, "learning_rate": 3.6362831466643062e-06, "loss": 0.6534, "step": 8214 }, { "epoch": 0.036367258400106246, "grad_norm": 3.440886964391597, "learning_rate": 3.6367258400106247e-06, "loss": 0.8694, "step": 8215 }, { "epoch": 0.03637168533356944, "grad_norm": 2.6315566586876202, "learning_rate": 3.637168533356944e-06, "loss": 0.7984, "step": 8216 }, { "epoch": 0.03637611226703263, "grad_norm": 2.7917475081322705, "learning_rate": 3.637611226703263e-06, "loss": 0.7052, "step": 8217 }, { "epoch": 0.03638053920049582, "grad_norm": 2.896689917412603, "learning_rate": 3.638053920049582e-06, "loss": 0.7587, "step": 8218 }, { "epoch": 0.03638496613395901, "grad_norm": 2.8183304278310706, "learning_rate": 3.638496613395901e-06, "loss": 0.8323, "step": 8219 }, { "epoch": 0.0363893930674222, "grad_norm": 3.2256347298389914, "learning_rate": 3.6389393067422203e-06, "loss": 1.1514, "step": 8220 }, { "epoch": 0.03639382000088539, "grad_norm": 2.9075841000155016, "learning_rate": 3.639382000088539e-06, "loss": 0.7395, "step": 8221 }, { "epoch": 0.03639824693434858, "grad_norm": 3.3412422958861656, "learning_rate": 3.6398246934348582e-06, "loss": 0.9527, "step": 8222 }, { "epoch": 0.03640267386781176, "grad_norm": 2.7792791741494245, "learning_rate": 3.640267386781177e-06, "loss": 0.6144, "step": 8223 }, { "epoch": 0.036407100801274954, "grad_norm": 2.3606701089780286, "learning_rate": 3.6407100801274957e-06, "loss": 0.4612, "step": 8224 }, { "epoch": 0.036411527734738144, "grad_norm": 2.8348602848665485, "learning_rate": 3.641152773473815e-06, "loss": 0.8565, "step": 8225 }, { "epoch": 0.036415954668201335, "grad_norm": 2.6065801694799275, "learning_rate": 3.641595466820134e-06, "loss": 0.6984, "step": 8226 }, { "epoch": 0.036420381601664525, "grad_norm": 2.6322312580682152, "learning_rate": 3.642038160166453e-06, "loss": 0.8547, "step": 8227 }, { "epoch": 0.036424808535127716, "grad_norm": 3.2385361864736617, "learning_rate": 3.642480853512772e-06, "loss": 0.819, "step": 8228 }, { "epoch": 0.036429235468590906, "grad_norm": 2.5928705115006045, "learning_rate": 3.6429235468590913e-06, "loss": 0.7784, "step": 8229 }, { "epoch": 0.0364336624020541, "grad_norm": 2.6727726620934185, "learning_rate": 3.64336624020541e-06, "loss": 0.929, "step": 8230 }, { "epoch": 0.03643808933551729, "grad_norm": 3.0071796720299657, "learning_rate": 3.643808933551729e-06, "loss": 0.5831, "step": 8231 }, { "epoch": 0.03644251626898048, "grad_norm": 2.8976770160714653, "learning_rate": 3.644251626898048e-06, "loss": 0.8656, "step": 8232 }, { "epoch": 0.03644694320244367, "grad_norm": 3.6170005411484993, "learning_rate": 3.644694320244367e-06, "loss": 0.6461, "step": 8233 }, { "epoch": 0.03645137013590686, "grad_norm": 2.8294290879106914, "learning_rate": 3.645137013590686e-06, "loss": 0.975, "step": 8234 }, { "epoch": 0.03645579706937005, "grad_norm": 2.8844042843897695, "learning_rate": 3.6455797069370054e-06, "loss": 0.6552, "step": 8235 }, { "epoch": 0.03646022400283324, "grad_norm": 3.7954021688469446, "learning_rate": 3.646022400283324e-06, "loss": 1.3151, "step": 8236 }, { "epoch": 0.03646465093629643, "grad_norm": 2.732131470195254, "learning_rate": 3.6464650936296433e-06, "loss": 0.9609, "step": 8237 }, { "epoch": 0.036469077869759614, "grad_norm": 2.4148701972502447, "learning_rate": 3.6469077869759622e-06, "loss": 0.6985, "step": 8238 }, { "epoch": 0.036473504803222805, "grad_norm": 3.7733792409246774, "learning_rate": 3.6473504803222807e-06, "loss": 0.7396, "step": 8239 }, { "epoch": 0.036477931736685995, "grad_norm": 2.599710187339286, "learning_rate": 3.6477931736686e-06, "loss": 0.707, "step": 8240 }, { "epoch": 0.036482358670149186, "grad_norm": 4.702272963924677, "learning_rate": 3.6482358670149195e-06, "loss": 1.5813, "step": 8241 }, { "epoch": 0.036486785603612376, "grad_norm": 3.0692701450304347, "learning_rate": 3.648678560361238e-06, "loss": 0.8203, "step": 8242 }, { "epoch": 0.03649121253707557, "grad_norm": 2.3912143273722766, "learning_rate": 3.649121253707557e-06, "loss": 0.6761, "step": 8243 }, { "epoch": 0.03649563947053876, "grad_norm": 3.5618279946733753, "learning_rate": 3.6495639470538763e-06, "loss": 0.5048, "step": 8244 }, { "epoch": 0.03650006640400195, "grad_norm": 2.7943603749120856, "learning_rate": 3.650006640400195e-06, "loss": 0.6781, "step": 8245 }, { "epoch": 0.03650449333746514, "grad_norm": 3.601175019125711, "learning_rate": 3.6504493337465142e-06, "loss": 1.0107, "step": 8246 }, { "epoch": 0.03650892027092833, "grad_norm": 3.0237777516931947, "learning_rate": 3.650892027092833e-06, "loss": 0.9658, "step": 8247 }, { "epoch": 0.03651334720439152, "grad_norm": 2.3477328687944086, "learning_rate": 3.651334720439152e-06, "loss": 0.5781, "step": 8248 }, { "epoch": 0.03651777413785471, "grad_norm": 2.7827768477224066, "learning_rate": 3.651777413785471e-06, "loss": 0.6668, "step": 8249 }, { "epoch": 0.0365222010713179, "grad_norm": 4.611845558252535, "learning_rate": 3.6522201071317904e-06, "loss": 1.7097, "step": 8250 }, { "epoch": 0.03652662800478109, "grad_norm": 3.1171814944948446, "learning_rate": 3.652662800478109e-06, "loss": 0.9055, "step": 8251 }, { "epoch": 0.03653105493824428, "grad_norm": 3.2675514866922364, "learning_rate": 3.6531054938244283e-06, "loss": 0.7411, "step": 8252 }, { "epoch": 0.03653548187170747, "grad_norm": 2.73858597359841, "learning_rate": 3.6535481871707473e-06, "loss": 0.8418, "step": 8253 }, { "epoch": 0.036539908805170655, "grad_norm": 2.5567432248863433, "learning_rate": 3.653990880517066e-06, "loss": 0.7052, "step": 8254 }, { "epoch": 0.036544335738633846, "grad_norm": 2.5032285459924073, "learning_rate": 3.654433573863385e-06, "loss": 0.7617, "step": 8255 }, { "epoch": 0.036548762672097036, "grad_norm": 2.977942466639494, "learning_rate": 3.6548762672097045e-06, "loss": 1.0668, "step": 8256 }, { "epoch": 0.03655318960556023, "grad_norm": 3.089332184584611, "learning_rate": 3.655318960556023e-06, "loss": 0.7123, "step": 8257 }, { "epoch": 0.03655761653902342, "grad_norm": 3.2867543678713558, "learning_rate": 3.655761653902342e-06, "loss": 0.6682, "step": 8258 }, { "epoch": 0.03656204347248661, "grad_norm": 2.647417107780746, "learning_rate": 3.6562043472486614e-06, "loss": 0.6292, "step": 8259 }, { "epoch": 0.0365664704059498, "grad_norm": 2.8901243113696498, "learning_rate": 3.65664704059498e-06, "loss": 1.0125, "step": 8260 }, { "epoch": 0.03657089733941299, "grad_norm": 2.7125021662865, "learning_rate": 3.6570897339412993e-06, "loss": 0.7384, "step": 8261 }, { "epoch": 0.03657532427287618, "grad_norm": 3.5661970813517434, "learning_rate": 3.6575324272876182e-06, "loss": 1.0035, "step": 8262 }, { "epoch": 0.03657975120633937, "grad_norm": 3.516072717907704, "learning_rate": 3.657975120633937e-06, "loss": 1.2222, "step": 8263 }, { "epoch": 0.03658417813980256, "grad_norm": 3.695497581013001, "learning_rate": 3.658417813980256e-06, "loss": 0.7218, "step": 8264 }, { "epoch": 0.03658860507326575, "grad_norm": 2.5885872440240827, "learning_rate": 3.6588605073265755e-06, "loss": 0.5384, "step": 8265 }, { "epoch": 0.03659303200672894, "grad_norm": 2.663246319149024, "learning_rate": 3.659303200672894e-06, "loss": 0.855, "step": 8266 }, { "epoch": 0.03659745894019213, "grad_norm": 2.4370429029433205, "learning_rate": 3.6597458940192134e-06, "loss": 0.4974, "step": 8267 }, { "epoch": 0.03660188587365532, "grad_norm": 3.4751464360849837, "learning_rate": 3.6601885873655323e-06, "loss": 1.1326, "step": 8268 }, { "epoch": 0.036606312807118506, "grad_norm": 2.8802903060367324, "learning_rate": 3.660631280711851e-06, "loss": 0.96, "step": 8269 }, { "epoch": 0.036610739740581696, "grad_norm": 2.3621492900276184, "learning_rate": 3.6610739740581702e-06, "loss": 0.5333, "step": 8270 }, { "epoch": 0.03661516667404489, "grad_norm": 3.106794952025881, "learning_rate": 3.6615166674044896e-06, "loss": 0.9871, "step": 8271 }, { "epoch": 0.03661959360750808, "grad_norm": 2.7876404868485567, "learning_rate": 3.661959360750808e-06, "loss": 0.5509, "step": 8272 }, { "epoch": 0.03662402054097127, "grad_norm": 2.869915048365946, "learning_rate": 3.662402054097127e-06, "loss": 0.9158, "step": 8273 }, { "epoch": 0.03662844747443446, "grad_norm": 2.9092489177964027, "learning_rate": 3.6628447474434464e-06, "loss": 0.8743, "step": 8274 }, { "epoch": 0.03663287440789765, "grad_norm": 3.2718953375036013, "learning_rate": 3.663287440789765e-06, "loss": 0.6976, "step": 8275 }, { "epoch": 0.03663730134136084, "grad_norm": 2.7597361095309894, "learning_rate": 3.6637301341360843e-06, "loss": 0.7313, "step": 8276 }, { "epoch": 0.03664172827482403, "grad_norm": 2.2002110902069085, "learning_rate": 3.6641728274824033e-06, "loss": 0.5869, "step": 8277 }, { "epoch": 0.03664615520828722, "grad_norm": 2.6370466798947936, "learning_rate": 3.6646155208287222e-06, "loss": 0.6323, "step": 8278 }, { "epoch": 0.03665058214175041, "grad_norm": 2.4025882402074195, "learning_rate": 3.665058214175041e-06, "loss": 0.7058, "step": 8279 }, { "epoch": 0.0366550090752136, "grad_norm": 3.7159478951179907, "learning_rate": 3.6655009075213605e-06, "loss": 1.283, "step": 8280 }, { "epoch": 0.03665943600867679, "grad_norm": 2.805626654289432, "learning_rate": 3.665943600867679e-06, "loss": 0.6829, "step": 8281 }, { "epoch": 0.03666386294213998, "grad_norm": 2.7220872177621036, "learning_rate": 3.6663862942139984e-06, "loss": 0.448, "step": 8282 }, { "epoch": 0.03666828987560317, "grad_norm": 2.729471763594204, "learning_rate": 3.6668289875603174e-06, "loss": 0.8219, "step": 8283 }, { "epoch": 0.03667271680906636, "grad_norm": 3.273361811326629, "learning_rate": 3.667271680906636e-06, "loss": 0.6513, "step": 8284 }, { "epoch": 0.03667714374252955, "grad_norm": 2.5018096253176094, "learning_rate": 3.6677143742529553e-06, "loss": 0.5868, "step": 8285 }, { "epoch": 0.03668157067599274, "grad_norm": 4.120865270383425, "learning_rate": 3.6681570675992746e-06, "loss": 1.407, "step": 8286 }, { "epoch": 0.03668599760945593, "grad_norm": 2.720706393901892, "learning_rate": 3.668599760945593e-06, "loss": 0.9238, "step": 8287 }, { "epoch": 0.03669042454291912, "grad_norm": 3.3656942903826566, "learning_rate": 3.669042454291912e-06, "loss": 0.4583, "step": 8288 }, { "epoch": 0.03669485147638231, "grad_norm": 3.203819998889009, "learning_rate": 3.6694851476382315e-06, "loss": 0.9435, "step": 8289 }, { "epoch": 0.0366992784098455, "grad_norm": 2.538185481049204, "learning_rate": 3.66992784098455e-06, "loss": 0.9257, "step": 8290 }, { "epoch": 0.03670370534330869, "grad_norm": 2.467628077766255, "learning_rate": 3.6703705343308694e-06, "loss": 0.6702, "step": 8291 }, { "epoch": 0.03670813227677188, "grad_norm": 2.5379631074741127, "learning_rate": 3.6708132276771883e-06, "loss": 0.7089, "step": 8292 }, { "epoch": 0.03671255921023507, "grad_norm": 3.08223637562083, "learning_rate": 3.6712559210235073e-06, "loss": 0.7356, "step": 8293 }, { "epoch": 0.03671698614369826, "grad_norm": 2.5846343949331447, "learning_rate": 3.6716986143698262e-06, "loss": 0.819, "step": 8294 }, { "epoch": 0.03672141307716145, "grad_norm": 3.646145467768339, "learning_rate": 3.6721413077161456e-06, "loss": 0.924, "step": 8295 }, { "epoch": 0.03672584001062464, "grad_norm": 2.456993476048143, "learning_rate": 3.672584001062464e-06, "loss": 0.7891, "step": 8296 }, { "epoch": 0.03673026694408783, "grad_norm": 3.1384326265426297, "learning_rate": 3.6730266944087835e-06, "loss": 0.6622, "step": 8297 }, { "epoch": 0.036734693877551024, "grad_norm": 2.8409476521221584, "learning_rate": 3.6734693877551024e-06, "loss": 0.4421, "step": 8298 }, { "epoch": 0.03673912081101421, "grad_norm": 2.382211554182088, "learning_rate": 3.6739120811014214e-06, "loss": 0.7021, "step": 8299 }, { "epoch": 0.0367435477444774, "grad_norm": 2.715431066805633, "learning_rate": 3.6743547744477403e-06, "loss": 0.4334, "step": 8300 }, { "epoch": 0.03674797467794059, "grad_norm": 2.5597506206730336, "learning_rate": 3.6747974677940597e-06, "loss": 0.7865, "step": 8301 }, { "epoch": 0.03675240161140378, "grad_norm": 3.4684100813037952, "learning_rate": 3.6752401611403782e-06, "loss": 0.6983, "step": 8302 }, { "epoch": 0.03675682854486697, "grad_norm": 2.6477284836018193, "learning_rate": 3.675682854486697e-06, "loss": 0.7345, "step": 8303 }, { "epoch": 0.03676125547833016, "grad_norm": 3.1480747119419212, "learning_rate": 3.6761255478330165e-06, "loss": 0.6432, "step": 8304 }, { "epoch": 0.03676568241179335, "grad_norm": 2.6051688307998724, "learning_rate": 3.676568241179335e-06, "loss": 0.7329, "step": 8305 }, { "epoch": 0.03677010934525654, "grad_norm": 3.7306628505239545, "learning_rate": 3.6770109345256544e-06, "loss": 0.955, "step": 8306 }, { "epoch": 0.03677453627871973, "grad_norm": 2.7651977141800352, "learning_rate": 3.6774536278719734e-06, "loss": 0.6, "step": 8307 }, { "epoch": 0.03677896321218292, "grad_norm": 2.2978228185695584, "learning_rate": 3.6778963212182923e-06, "loss": 0.6387, "step": 8308 }, { "epoch": 0.03678339014564611, "grad_norm": 2.7505256510375027, "learning_rate": 3.6783390145646113e-06, "loss": 0.9093, "step": 8309 }, { "epoch": 0.0367878170791093, "grad_norm": 2.9934732690800354, "learning_rate": 3.6787817079109306e-06, "loss": 0.7066, "step": 8310 }, { "epoch": 0.03679224401257249, "grad_norm": 3.2145347470960988, "learning_rate": 3.679224401257249e-06, "loss": 0.7801, "step": 8311 }, { "epoch": 0.036796670946035684, "grad_norm": 2.7673675212362077, "learning_rate": 3.6796670946035685e-06, "loss": 0.7053, "step": 8312 }, { "epoch": 0.036801097879498874, "grad_norm": 2.73756114242001, "learning_rate": 3.6801097879498875e-06, "loss": 0.5716, "step": 8313 }, { "epoch": 0.03680552481296206, "grad_norm": 3.0727063912698274, "learning_rate": 3.6805524812962064e-06, "loss": 0.8582, "step": 8314 }, { "epoch": 0.03680995174642525, "grad_norm": 2.8515195087395977, "learning_rate": 3.6809951746425254e-06, "loss": 0.9972, "step": 8315 }, { "epoch": 0.03681437867988844, "grad_norm": 2.591663074317746, "learning_rate": 3.6814378679888447e-06, "loss": 0.789, "step": 8316 }, { "epoch": 0.03681880561335163, "grad_norm": 2.6609863282137267, "learning_rate": 3.6818805613351633e-06, "loss": 0.6394, "step": 8317 }, { "epoch": 0.03682323254681482, "grad_norm": 2.655965986346416, "learning_rate": 3.6823232546814826e-06, "loss": 0.7972, "step": 8318 }, { "epoch": 0.03682765948027801, "grad_norm": 2.6718866969455495, "learning_rate": 3.6827659480278016e-06, "loss": 0.8104, "step": 8319 }, { "epoch": 0.0368320864137412, "grad_norm": 4.242443307383732, "learning_rate": 3.68320864137412e-06, "loss": 1.5012, "step": 8320 }, { "epoch": 0.03683651334720439, "grad_norm": 3.0300874222262637, "learning_rate": 3.6836513347204395e-06, "loss": 0.7368, "step": 8321 }, { "epoch": 0.03684094028066758, "grad_norm": 3.127404823812534, "learning_rate": 3.684094028066759e-06, "loss": 0.7876, "step": 8322 }, { "epoch": 0.03684536721413077, "grad_norm": 2.8167570562702067, "learning_rate": 3.6845367214130774e-06, "loss": 1.118, "step": 8323 }, { "epoch": 0.03684979414759396, "grad_norm": 2.443323189848401, "learning_rate": 3.6849794147593963e-06, "loss": 0.7128, "step": 8324 }, { "epoch": 0.036854221081057154, "grad_norm": 2.8783391504739804, "learning_rate": 3.6854221081057157e-06, "loss": 0.6701, "step": 8325 }, { "epoch": 0.036858648014520344, "grad_norm": 2.605976553689796, "learning_rate": 3.6858648014520342e-06, "loss": 0.6407, "step": 8326 }, { "epoch": 0.036863074947983535, "grad_norm": 2.3221764334497483, "learning_rate": 3.6863074947983536e-06, "loss": 0.7258, "step": 8327 }, { "epoch": 0.036867501881446725, "grad_norm": 2.6738845140280176, "learning_rate": 3.6867501881446725e-06, "loss": 0.6876, "step": 8328 }, { "epoch": 0.03687192881490991, "grad_norm": 2.3204124100735726, "learning_rate": 3.6871928814909915e-06, "loss": 0.6749, "step": 8329 }, { "epoch": 0.0368763557483731, "grad_norm": 2.884464568552702, "learning_rate": 3.6876355748373104e-06, "loss": 0.8365, "step": 8330 }, { "epoch": 0.03688078268183629, "grad_norm": 3.2894151927708033, "learning_rate": 3.68807826818363e-06, "loss": 0.7943, "step": 8331 }, { "epoch": 0.03688520961529948, "grad_norm": 3.4074866980735123, "learning_rate": 3.6885209615299483e-06, "loss": 0.8959, "step": 8332 }, { "epoch": 0.03688963654876267, "grad_norm": 2.9130349469014907, "learning_rate": 3.6889636548762677e-06, "loss": 0.72, "step": 8333 }, { "epoch": 0.03689406348222586, "grad_norm": 4.355503294886377, "learning_rate": 3.6894063482225866e-06, "loss": 1.2909, "step": 8334 }, { "epoch": 0.03689849041568905, "grad_norm": 2.5907313896543047, "learning_rate": 3.689849041568905e-06, "loss": 0.7129, "step": 8335 }, { "epoch": 0.03690291734915224, "grad_norm": 2.7393731006950484, "learning_rate": 3.6902917349152245e-06, "loss": 0.6765, "step": 8336 }, { "epoch": 0.03690734428261543, "grad_norm": 2.9354338544826883, "learning_rate": 3.690734428261544e-06, "loss": 0.8022, "step": 8337 }, { "epoch": 0.03691177121607862, "grad_norm": 3.1393127481591563, "learning_rate": 3.6911771216078624e-06, "loss": 0.8185, "step": 8338 }, { "epoch": 0.036916198149541814, "grad_norm": 3.268544515234255, "learning_rate": 3.6916198149541814e-06, "loss": 0.5252, "step": 8339 }, { "epoch": 0.036920625083005004, "grad_norm": 3.144494808014041, "learning_rate": 3.6920625083005007e-06, "loss": 0.6283, "step": 8340 }, { "epoch": 0.036925052016468195, "grad_norm": 2.652318876916285, "learning_rate": 3.6925052016468193e-06, "loss": 0.7105, "step": 8341 }, { "epoch": 0.036929478949931385, "grad_norm": 2.8955760798687975, "learning_rate": 3.6929478949931386e-06, "loss": 0.8944, "step": 8342 }, { "epoch": 0.036933905883394576, "grad_norm": 2.6703151760969943, "learning_rate": 3.6933905883394576e-06, "loss": 0.7069, "step": 8343 }, { "epoch": 0.03693833281685776, "grad_norm": 3.450867115103451, "learning_rate": 3.6938332816857765e-06, "loss": 0.828, "step": 8344 }, { "epoch": 0.03694275975032095, "grad_norm": 2.630513331619473, "learning_rate": 3.6942759750320955e-06, "loss": 0.7344, "step": 8345 }, { "epoch": 0.03694718668378414, "grad_norm": 2.8299613643875854, "learning_rate": 3.694718668378415e-06, "loss": 0.8761, "step": 8346 }, { "epoch": 0.03695161361724733, "grad_norm": 2.986928574331939, "learning_rate": 3.6951613617247334e-06, "loss": 0.9999, "step": 8347 }, { "epoch": 0.03695604055071052, "grad_norm": 2.9230503028222796, "learning_rate": 3.6956040550710527e-06, "loss": 0.6428, "step": 8348 }, { "epoch": 0.03696046748417371, "grad_norm": 2.496170298411065, "learning_rate": 3.6960467484173717e-06, "loss": 0.7477, "step": 8349 }, { "epoch": 0.0369648944176369, "grad_norm": 3.077791158309357, "learning_rate": 3.6964894417636902e-06, "loss": 1.1001, "step": 8350 }, { "epoch": 0.03696932135110009, "grad_norm": 3.086701798874673, "learning_rate": 3.6969321351100096e-06, "loss": 0.8251, "step": 8351 }, { "epoch": 0.03697374828456328, "grad_norm": 2.6296731774823607, "learning_rate": 3.697374828456329e-06, "loss": 0.6363, "step": 8352 }, { "epoch": 0.036978175218026474, "grad_norm": 2.9477996850986803, "learning_rate": 3.6978175218026475e-06, "loss": 1.1373, "step": 8353 }, { "epoch": 0.036982602151489664, "grad_norm": 2.7314634101729056, "learning_rate": 3.6982602151489664e-06, "loss": 0.7627, "step": 8354 }, { "epoch": 0.036987029084952855, "grad_norm": 3.335237891195569, "learning_rate": 3.698702908495286e-06, "loss": 1.1748, "step": 8355 }, { "epoch": 0.036991456018416045, "grad_norm": 2.998840786446608, "learning_rate": 3.6991456018416043e-06, "loss": 0.7655, "step": 8356 }, { "epoch": 0.036995882951879236, "grad_norm": 2.5544736590213057, "learning_rate": 3.6995882951879237e-06, "loss": 0.6479, "step": 8357 }, { "epoch": 0.037000309885342426, "grad_norm": 2.8109504096138704, "learning_rate": 3.7000309885342426e-06, "loss": 0.7217, "step": 8358 }, { "epoch": 0.03700473681880561, "grad_norm": 3.0661589418986366, "learning_rate": 3.7004736818805616e-06, "loss": 1.004, "step": 8359 }, { "epoch": 0.0370091637522688, "grad_norm": 3.164753808862875, "learning_rate": 3.7009163752268805e-06, "loss": 1.0853, "step": 8360 }, { "epoch": 0.03701359068573199, "grad_norm": 2.4832175136876695, "learning_rate": 3.7013590685732e-06, "loss": 0.6356, "step": 8361 }, { "epoch": 0.03701801761919518, "grad_norm": 2.9768581672780687, "learning_rate": 3.7018017619195184e-06, "loss": 0.784, "step": 8362 }, { "epoch": 0.03702244455265837, "grad_norm": 2.3271642151399456, "learning_rate": 3.702244455265838e-06, "loss": 0.56, "step": 8363 }, { "epoch": 0.03702687148612156, "grad_norm": 2.991713167953312, "learning_rate": 3.7026871486121567e-06, "loss": 0.8149, "step": 8364 }, { "epoch": 0.03703129841958475, "grad_norm": 2.980498784487842, "learning_rate": 3.7031298419584753e-06, "loss": 1.0188, "step": 8365 }, { "epoch": 0.037035725353047944, "grad_norm": 2.494253274865703, "learning_rate": 3.7035725353047946e-06, "loss": 0.6433, "step": 8366 }, { "epoch": 0.037040152286511134, "grad_norm": 3.51438048583471, "learning_rate": 3.704015228651114e-06, "loss": 0.9575, "step": 8367 }, { "epoch": 0.037044579219974325, "grad_norm": 3.474883277147884, "learning_rate": 3.7044579219974325e-06, "loss": 1.1898, "step": 8368 }, { "epoch": 0.037049006153437515, "grad_norm": 2.784752169925493, "learning_rate": 3.7049006153437515e-06, "loss": 0.6258, "step": 8369 }, { "epoch": 0.037053433086900706, "grad_norm": 2.7286452511671384, "learning_rate": 3.705343308690071e-06, "loss": 0.8734, "step": 8370 }, { "epoch": 0.037057860020363896, "grad_norm": 3.96272577994786, "learning_rate": 3.7057860020363894e-06, "loss": 1.1171, "step": 8371 }, { "epoch": 0.03706228695382709, "grad_norm": 3.0290508264501343, "learning_rate": 3.7062286953827087e-06, "loss": 1.0101, "step": 8372 }, { "epoch": 0.03706671388729028, "grad_norm": 3.4441043985475477, "learning_rate": 3.7066713887290277e-06, "loss": 0.8376, "step": 8373 }, { "epoch": 0.03707114082075346, "grad_norm": 2.659099863802756, "learning_rate": 3.7071140820753466e-06, "loss": 0.6246, "step": 8374 }, { "epoch": 0.03707556775421665, "grad_norm": 2.6859285547887644, "learning_rate": 3.7075567754216656e-06, "loss": 0.8984, "step": 8375 }, { "epoch": 0.03707999468767984, "grad_norm": 2.6112692636416885, "learning_rate": 3.707999468767985e-06, "loss": 0.6661, "step": 8376 }, { "epoch": 0.03708442162114303, "grad_norm": 3.177928644643624, "learning_rate": 3.7084421621143035e-06, "loss": 0.8004, "step": 8377 }, { "epoch": 0.03708884855460622, "grad_norm": 3.6393724807024874, "learning_rate": 3.708884855460623e-06, "loss": 1.0305, "step": 8378 }, { "epoch": 0.03709327548806941, "grad_norm": 2.6311539483754154, "learning_rate": 3.709327548806942e-06, "loss": 0.7321, "step": 8379 }, { "epoch": 0.037097702421532604, "grad_norm": 3.3064622151667566, "learning_rate": 3.7097702421532607e-06, "loss": 1.2305, "step": 8380 }, { "epoch": 0.037102129354995794, "grad_norm": 3.0359586819696296, "learning_rate": 3.7102129354995797e-06, "loss": 0.8019, "step": 8381 }, { "epoch": 0.037106556288458985, "grad_norm": 2.9963872730941863, "learning_rate": 3.710655628845899e-06, "loss": 0.9958, "step": 8382 }, { "epoch": 0.037110983221922175, "grad_norm": 2.7307924897165265, "learning_rate": 3.7110983221922176e-06, "loss": 0.6099, "step": 8383 }, { "epoch": 0.037115410155385366, "grad_norm": 3.0042158086125017, "learning_rate": 3.7115410155385365e-06, "loss": 0.6282, "step": 8384 }, { "epoch": 0.037119837088848556, "grad_norm": 3.3294346249857387, "learning_rate": 3.711983708884856e-06, "loss": 1.3206, "step": 8385 }, { "epoch": 0.03712426402231175, "grad_norm": 2.911491421399403, "learning_rate": 3.7124264022311744e-06, "loss": 0.7487, "step": 8386 }, { "epoch": 0.03712869095577494, "grad_norm": 2.819998774354183, "learning_rate": 3.712869095577494e-06, "loss": 0.7466, "step": 8387 }, { "epoch": 0.03713311788923813, "grad_norm": 2.499430264125939, "learning_rate": 3.7133117889238127e-06, "loss": 0.7034, "step": 8388 }, { "epoch": 0.03713754482270131, "grad_norm": 2.777393992865592, "learning_rate": 3.7137544822701317e-06, "loss": 0.7822, "step": 8389 }, { "epoch": 0.0371419717561645, "grad_norm": 2.6555801382443724, "learning_rate": 3.7141971756164506e-06, "loss": 0.6522, "step": 8390 }, { "epoch": 0.03714639868962769, "grad_norm": 2.8167912890275995, "learning_rate": 3.71463986896277e-06, "loss": 0.6614, "step": 8391 }, { "epoch": 0.03715082562309088, "grad_norm": 2.8417639813204536, "learning_rate": 3.7150825623090885e-06, "loss": 1.0152, "step": 8392 }, { "epoch": 0.03715525255655407, "grad_norm": 2.8154298995902054, "learning_rate": 3.715525255655408e-06, "loss": 0.6342, "step": 8393 }, { "epoch": 0.037159679490017264, "grad_norm": 2.73992770906191, "learning_rate": 3.715967949001727e-06, "loss": 0.7213, "step": 8394 }, { "epoch": 0.037164106423480454, "grad_norm": 3.0238236629526285, "learning_rate": 3.716410642348046e-06, "loss": 0.808, "step": 8395 }, { "epoch": 0.037168533356943645, "grad_norm": 3.3472361182885395, "learning_rate": 3.7168533356943647e-06, "loss": 0.9605, "step": 8396 }, { "epoch": 0.037172960290406835, "grad_norm": 2.5233477059130096, "learning_rate": 3.717296029040684e-06, "loss": 0.6506, "step": 8397 }, { "epoch": 0.037177387223870026, "grad_norm": 2.9170191170976802, "learning_rate": 3.7177387223870026e-06, "loss": 0.78, "step": 8398 }, { "epoch": 0.037181814157333216, "grad_norm": 2.918736975965361, "learning_rate": 3.718181415733322e-06, "loss": 0.7891, "step": 8399 }, { "epoch": 0.03718624109079641, "grad_norm": 3.344466696904214, "learning_rate": 3.718624109079641e-06, "loss": 0.7609, "step": 8400 }, { "epoch": 0.0371906680242596, "grad_norm": 2.9006084673399446, "learning_rate": 3.7190668024259595e-06, "loss": 0.7401, "step": 8401 }, { "epoch": 0.03719509495772279, "grad_norm": 3.179832581754313, "learning_rate": 3.719509495772279e-06, "loss": 1.1035, "step": 8402 }, { "epoch": 0.03719952189118598, "grad_norm": 3.244875176840944, "learning_rate": 3.719952189118598e-06, "loss": 0.848, "step": 8403 }, { "epoch": 0.03720394882464917, "grad_norm": 4.057174133130111, "learning_rate": 3.7203948824649167e-06, "loss": 1.2545, "step": 8404 }, { "epoch": 0.03720837575811235, "grad_norm": 3.3992148726253317, "learning_rate": 3.7208375758112357e-06, "loss": 1.0337, "step": 8405 }, { "epoch": 0.03721280269157554, "grad_norm": 2.870256386447243, "learning_rate": 3.721280269157555e-06, "loss": 0.8227, "step": 8406 }, { "epoch": 0.037217229625038734, "grad_norm": 2.845560798792387, "learning_rate": 3.7217229625038736e-06, "loss": 0.8914, "step": 8407 }, { "epoch": 0.037221656558501924, "grad_norm": 3.1342718010809896, "learning_rate": 3.722165655850193e-06, "loss": 0.8149, "step": 8408 }, { "epoch": 0.037226083491965115, "grad_norm": 2.885098317113974, "learning_rate": 3.722608349196512e-06, "loss": 1.1198, "step": 8409 }, { "epoch": 0.037230510425428305, "grad_norm": 3.329927839544073, "learning_rate": 3.723051042542831e-06, "loss": 0.4734, "step": 8410 }, { "epoch": 0.037234937358891496, "grad_norm": 2.6762163925007916, "learning_rate": 3.72349373588915e-06, "loss": 0.6513, "step": 8411 }, { "epoch": 0.037239364292354686, "grad_norm": 3.738870849885181, "learning_rate": 3.723936429235469e-06, "loss": 0.8628, "step": 8412 }, { "epoch": 0.03724379122581788, "grad_norm": 3.484191145843444, "learning_rate": 3.7243791225817877e-06, "loss": 0.8634, "step": 8413 }, { "epoch": 0.03724821815928107, "grad_norm": 2.426824289040142, "learning_rate": 3.724821815928107e-06, "loss": 0.6006, "step": 8414 }, { "epoch": 0.03725264509274426, "grad_norm": 2.8488429469083956, "learning_rate": 3.725264509274426e-06, "loss": 0.5908, "step": 8415 }, { "epoch": 0.03725707202620745, "grad_norm": 2.579698216342596, "learning_rate": 3.7257072026207445e-06, "loss": 0.6932, "step": 8416 }, { "epoch": 0.03726149895967064, "grad_norm": 2.9002264228436347, "learning_rate": 3.726149895967064e-06, "loss": 0.6862, "step": 8417 }, { "epoch": 0.03726592589313383, "grad_norm": 2.6629361600050854, "learning_rate": 3.7265925893133833e-06, "loss": 0.8529, "step": 8418 }, { "epoch": 0.03727035282659702, "grad_norm": 2.9614805698078857, "learning_rate": 3.727035282659702e-06, "loss": 0.9043, "step": 8419 }, { "epoch": 0.0372747797600602, "grad_norm": 2.6965527246848295, "learning_rate": 3.7274779760060207e-06, "loss": 0.667, "step": 8420 }, { "epoch": 0.037279206693523394, "grad_norm": 2.1627861212389994, "learning_rate": 3.72792066935234e-06, "loss": 0.4719, "step": 8421 }, { "epoch": 0.037283633626986584, "grad_norm": 2.8348354300396372, "learning_rate": 3.7283633626986586e-06, "loss": 0.8768, "step": 8422 }, { "epoch": 0.037288060560449775, "grad_norm": 2.685479676454041, "learning_rate": 3.728806056044978e-06, "loss": 0.8481, "step": 8423 }, { "epoch": 0.037292487493912965, "grad_norm": 2.6778693037895214, "learning_rate": 3.729248749391297e-06, "loss": 0.5177, "step": 8424 }, { "epoch": 0.037296914427376156, "grad_norm": 3.286983744638599, "learning_rate": 3.729691442737616e-06, "loss": 1.0762, "step": 8425 }, { "epoch": 0.037301341360839346, "grad_norm": 2.5834411277216383, "learning_rate": 3.730134136083935e-06, "loss": 0.7529, "step": 8426 }, { "epoch": 0.03730576829430254, "grad_norm": 3.3608821419645456, "learning_rate": 3.7305768294302542e-06, "loss": 1.0461, "step": 8427 }, { "epoch": 0.03731019522776573, "grad_norm": 2.8325800272099033, "learning_rate": 3.7310195227765728e-06, "loss": 0.7822, "step": 8428 }, { "epoch": 0.03731462216122892, "grad_norm": 2.680733284023253, "learning_rate": 3.731462216122892e-06, "loss": 0.6973, "step": 8429 }, { "epoch": 0.03731904909469211, "grad_norm": 3.5216144095733566, "learning_rate": 3.731904909469211e-06, "loss": 1.1697, "step": 8430 }, { "epoch": 0.0373234760281553, "grad_norm": 2.519255925918428, "learning_rate": 3.7323476028155296e-06, "loss": 0.49, "step": 8431 }, { "epoch": 0.03732790296161849, "grad_norm": 3.385007661087259, "learning_rate": 3.732790296161849e-06, "loss": 0.4494, "step": 8432 }, { "epoch": 0.03733232989508168, "grad_norm": 3.1317223632352644, "learning_rate": 3.7332329895081683e-06, "loss": 0.9158, "step": 8433 }, { "epoch": 0.03733675682854487, "grad_norm": 2.8884562987684084, "learning_rate": 3.733675682854487e-06, "loss": 0.3909, "step": 8434 }, { "epoch": 0.037341183762008054, "grad_norm": 3.1102111258061944, "learning_rate": 3.734118376200806e-06, "loss": 0.9918, "step": 8435 }, { "epoch": 0.037345610695471244, "grad_norm": 2.908110789483234, "learning_rate": 3.734561069547125e-06, "loss": 0.61, "step": 8436 }, { "epoch": 0.037350037628934435, "grad_norm": 2.2640272866866473, "learning_rate": 3.7350037628934437e-06, "loss": 0.5712, "step": 8437 }, { "epoch": 0.037354464562397625, "grad_norm": 3.7297356187291233, "learning_rate": 3.735446456239763e-06, "loss": 1.2191, "step": 8438 }, { "epoch": 0.037358891495860816, "grad_norm": 2.702433971457322, "learning_rate": 3.735889149586082e-06, "loss": 0.6442, "step": 8439 }, { "epoch": 0.037363318429324006, "grad_norm": 3.118521850676782, "learning_rate": 3.736331842932401e-06, "loss": 0.9224, "step": 8440 }, { "epoch": 0.0373677453627872, "grad_norm": 2.9142441201447125, "learning_rate": 3.73677453627872e-06, "loss": 0.929, "step": 8441 }, { "epoch": 0.03737217229625039, "grad_norm": 3.4556962118985743, "learning_rate": 3.7372172296250393e-06, "loss": 0.9871, "step": 8442 }, { "epoch": 0.03737659922971358, "grad_norm": 2.6209782933645376, "learning_rate": 3.737659922971358e-06, "loss": 0.5784, "step": 8443 }, { "epoch": 0.03738102616317677, "grad_norm": 2.364587872208211, "learning_rate": 3.738102616317677e-06, "loss": 0.5167, "step": 8444 }, { "epoch": 0.03738545309663996, "grad_norm": 2.754479994701543, "learning_rate": 3.738545309663996e-06, "loss": 0.7551, "step": 8445 }, { "epoch": 0.03738988003010315, "grad_norm": 3.455279133865303, "learning_rate": 3.7389880030103146e-06, "loss": 1.0915, "step": 8446 }, { "epoch": 0.03739430696356634, "grad_norm": 2.9225118158991155, "learning_rate": 3.739430696356634e-06, "loss": 0.7103, "step": 8447 }, { "epoch": 0.03739873389702953, "grad_norm": 2.6297462502026465, "learning_rate": 3.7398733897029534e-06, "loss": 0.4644, "step": 8448 }, { "epoch": 0.03740316083049272, "grad_norm": 2.9575353498942136, "learning_rate": 3.740316083049272e-06, "loss": 0.6024, "step": 8449 }, { "epoch": 0.037407587763955905, "grad_norm": 2.267574357961488, "learning_rate": 3.740758776395591e-06, "loss": 0.4371, "step": 8450 }, { "epoch": 0.037412014697419095, "grad_norm": 2.3471276268577275, "learning_rate": 3.7412014697419102e-06, "loss": 0.6835, "step": 8451 }, { "epoch": 0.037416441630882286, "grad_norm": 3.3935435520323454, "learning_rate": 3.7416441630882288e-06, "loss": 0.6277, "step": 8452 }, { "epoch": 0.037420868564345476, "grad_norm": 3.7819975244838835, "learning_rate": 3.742086856434548e-06, "loss": 0.7496, "step": 8453 }, { "epoch": 0.03742529549780867, "grad_norm": 3.614318738604147, "learning_rate": 3.742529549780867e-06, "loss": 1.0405, "step": 8454 }, { "epoch": 0.03742972243127186, "grad_norm": 2.896701904517489, "learning_rate": 3.742972243127186e-06, "loss": 0.6765, "step": 8455 }, { "epoch": 0.03743414936473505, "grad_norm": 2.79191000151591, "learning_rate": 3.743414936473505e-06, "loss": 0.5758, "step": 8456 }, { "epoch": 0.03743857629819824, "grad_norm": 3.4489140619881007, "learning_rate": 3.7438576298198243e-06, "loss": 1.2133, "step": 8457 }, { "epoch": 0.03744300323166143, "grad_norm": 2.6930129599313526, "learning_rate": 3.744300323166143e-06, "loss": 0.5812, "step": 8458 }, { "epoch": 0.03744743016512462, "grad_norm": 4.094473192812177, "learning_rate": 3.7447430165124622e-06, "loss": 1.2588, "step": 8459 }, { "epoch": 0.03745185709858781, "grad_norm": 2.300669676238996, "learning_rate": 3.745185709858781e-06, "loss": 0.6911, "step": 8460 }, { "epoch": 0.037456284032051, "grad_norm": 2.485230876011824, "learning_rate": 3.7456284032050997e-06, "loss": 0.6354, "step": 8461 }, { "epoch": 0.03746071096551419, "grad_norm": 3.4023299302816925, "learning_rate": 3.746071096551419e-06, "loss": 1.1826, "step": 8462 }, { "epoch": 0.03746513789897738, "grad_norm": 2.748511371746391, "learning_rate": 3.7465137898977384e-06, "loss": 0.54, "step": 8463 }, { "epoch": 0.03746956483244057, "grad_norm": 2.424939270970706, "learning_rate": 3.746956483244057e-06, "loss": 0.8003, "step": 8464 }, { "epoch": 0.037473991765903755, "grad_norm": 2.729727047777213, "learning_rate": 3.747399176590376e-06, "loss": 0.7231, "step": 8465 }, { "epoch": 0.037478418699366946, "grad_norm": 3.069206727280552, "learning_rate": 3.7478418699366953e-06, "loss": 0.8614, "step": 8466 }, { "epoch": 0.037482845632830136, "grad_norm": 3.266493604723518, "learning_rate": 3.748284563283014e-06, "loss": 0.7899, "step": 8467 }, { "epoch": 0.03748727256629333, "grad_norm": 3.2844043853912788, "learning_rate": 3.748727256629333e-06, "loss": 0.8378, "step": 8468 }, { "epoch": 0.03749169949975652, "grad_norm": 2.839793920170052, "learning_rate": 3.749169949975652e-06, "loss": 1.1018, "step": 8469 }, { "epoch": 0.03749612643321971, "grad_norm": 2.8698880465852223, "learning_rate": 3.749612643321971e-06, "loss": 0.5093, "step": 8470 }, { "epoch": 0.0375005533666829, "grad_norm": 3.4027960781663644, "learning_rate": 3.75005533666829e-06, "loss": 1.2485, "step": 8471 }, { "epoch": 0.03750498030014609, "grad_norm": 3.4677630765266603, "learning_rate": 3.7504980300146094e-06, "loss": 1.0346, "step": 8472 }, { "epoch": 0.03750940723360928, "grad_norm": 3.4067564199441605, "learning_rate": 3.750940723360928e-06, "loss": 1.1881, "step": 8473 }, { "epoch": 0.03751383416707247, "grad_norm": 2.5067594998391156, "learning_rate": 3.7513834167072473e-06, "loss": 0.5421, "step": 8474 }, { "epoch": 0.03751826110053566, "grad_norm": 2.69584572281381, "learning_rate": 3.7518261100535662e-06, "loss": 0.7011, "step": 8475 }, { "epoch": 0.03752268803399885, "grad_norm": 2.3910725847595202, "learning_rate": 3.752268803399885e-06, "loss": 0.7246, "step": 8476 }, { "epoch": 0.03752711496746204, "grad_norm": 3.3494867568704403, "learning_rate": 3.752711496746204e-06, "loss": 0.5027, "step": 8477 }, { "epoch": 0.03753154190092523, "grad_norm": 2.5070464609063765, "learning_rate": 3.7531541900925235e-06, "loss": 0.5085, "step": 8478 }, { "epoch": 0.03753596883438842, "grad_norm": 2.570632754597433, "learning_rate": 3.753596883438842e-06, "loss": 0.4824, "step": 8479 }, { "epoch": 0.037540395767851606, "grad_norm": 4.185646055155565, "learning_rate": 3.7540395767851614e-06, "loss": 1.1353, "step": 8480 }, { "epoch": 0.037544822701314796, "grad_norm": 2.9633959252510587, "learning_rate": 3.7544822701314803e-06, "loss": 0.6936, "step": 8481 }, { "epoch": 0.03754924963477799, "grad_norm": 4.184800217227894, "learning_rate": 3.754924963477799e-06, "loss": 1.2918, "step": 8482 }, { "epoch": 0.03755367656824118, "grad_norm": 3.14969244900534, "learning_rate": 3.7553676568241182e-06, "loss": 0.8603, "step": 8483 }, { "epoch": 0.03755810350170437, "grad_norm": 3.550823034394803, "learning_rate": 3.755810350170437e-06, "loss": 0.9813, "step": 8484 }, { "epoch": 0.03756253043516756, "grad_norm": 2.712310003098827, "learning_rate": 3.756253043516756e-06, "loss": 0.6919, "step": 8485 }, { "epoch": 0.03756695736863075, "grad_norm": 3.045734198092544, "learning_rate": 3.756695736863075e-06, "loss": 0.9657, "step": 8486 }, { "epoch": 0.03757138430209394, "grad_norm": 3.725233242477634, "learning_rate": 3.7571384302093944e-06, "loss": 0.7728, "step": 8487 }, { "epoch": 0.03757581123555713, "grad_norm": 3.600831230263226, "learning_rate": 3.757581123555713e-06, "loss": 0.9336, "step": 8488 }, { "epoch": 0.03758023816902032, "grad_norm": 2.7595315311848077, "learning_rate": 3.7580238169020323e-06, "loss": 0.5325, "step": 8489 }, { "epoch": 0.03758466510248351, "grad_norm": 2.6008650934509836, "learning_rate": 3.7584665102483513e-06, "loss": 0.7223, "step": 8490 }, { "epoch": 0.0375890920359467, "grad_norm": 2.9930248962601387, "learning_rate": 3.7589092035946702e-06, "loss": 0.9017, "step": 8491 }, { "epoch": 0.03759351896940989, "grad_norm": 3.0776222100090105, "learning_rate": 3.759351896940989e-06, "loss": 0.5562, "step": 8492 }, { "epoch": 0.03759794590287308, "grad_norm": 2.121169582221193, "learning_rate": 3.7597945902873085e-06, "loss": 0.5084, "step": 8493 }, { "epoch": 0.03760237283633627, "grad_norm": 2.729869812130983, "learning_rate": 3.760237283633627e-06, "loss": 0.8092, "step": 8494 }, { "epoch": 0.03760679976979946, "grad_norm": 2.4298659266795615, "learning_rate": 3.7606799769799464e-06, "loss": 0.7819, "step": 8495 }, { "epoch": 0.03761122670326265, "grad_norm": 2.4751965989629596, "learning_rate": 3.7611226703262654e-06, "loss": 0.6768, "step": 8496 }, { "epoch": 0.03761565363672584, "grad_norm": 3.2757156496407505, "learning_rate": 3.761565363672584e-06, "loss": 1.1202, "step": 8497 }, { "epoch": 0.03762008057018903, "grad_norm": 2.992529470937643, "learning_rate": 3.7620080570189033e-06, "loss": 0.693, "step": 8498 }, { "epoch": 0.03762450750365222, "grad_norm": 2.8948518382763484, "learning_rate": 3.7624507503652227e-06, "loss": 0.6077, "step": 8499 }, { "epoch": 0.03762893443711541, "grad_norm": 3.0502633730837103, "learning_rate": 3.762893443711541e-06, "loss": 0.621, "step": 8500 }, { "epoch": 0.0376333613705786, "grad_norm": 2.6940841568756855, "learning_rate": 3.76333613705786e-06, "loss": 0.6718, "step": 8501 }, { "epoch": 0.03763778830404179, "grad_norm": 4.2365921052019635, "learning_rate": 3.7637788304041795e-06, "loss": 1.0351, "step": 8502 }, { "epoch": 0.03764221523750498, "grad_norm": 2.9489569487011167, "learning_rate": 3.764221523750498e-06, "loss": 1.1453, "step": 8503 }, { "epoch": 0.03764664217096817, "grad_norm": 2.6307537424116876, "learning_rate": 3.7646642170968174e-06, "loss": 0.5499, "step": 8504 }, { "epoch": 0.03765106910443136, "grad_norm": 3.1037053675269966, "learning_rate": 3.7651069104431363e-06, "loss": 0.6331, "step": 8505 }, { "epoch": 0.03765549603789455, "grad_norm": 2.6593943157553026, "learning_rate": 3.7655496037894553e-06, "loss": 0.9807, "step": 8506 }, { "epoch": 0.03765992297135774, "grad_norm": 2.935898326078174, "learning_rate": 3.7659922971357742e-06, "loss": 0.6801, "step": 8507 }, { "epoch": 0.03766434990482093, "grad_norm": 2.9403862129042757, "learning_rate": 3.7664349904820936e-06, "loss": 1.1057, "step": 8508 }, { "epoch": 0.037668776838284124, "grad_norm": 3.1416006666484244, "learning_rate": 3.766877683828412e-06, "loss": 1.0897, "step": 8509 }, { "epoch": 0.03767320377174731, "grad_norm": 3.0815971248927085, "learning_rate": 3.7673203771747315e-06, "loss": 0.8475, "step": 8510 }, { "epoch": 0.0376776307052105, "grad_norm": 2.699976358413462, "learning_rate": 3.7677630705210504e-06, "loss": 0.686, "step": 8511 }, { "epoch": 0.03768205763867369, "grad_norm": 2.8429724083507475, "learning_rate": 3.768205763867369e-06, "loss": 0.6311, "step": 8512 }, { "epoch": 0.03768648457213688, "grad_norm": 3.026029355071907, "learning_rate": 3.7686484572136883e-06, "loss": 0.8616, "step": 8513 }, { "epoch": 0.03769091150560007, "grad_norm": 2.942304832899695, "learning_rate": 3.7690911505600077e-06, "loss": 0.9485, "step": 8514 }, { "epoch": 0.03769533843906326, "grad_norm": 2.6010672618154413, "learning_rate": 3.7695338439063262e-06, "loss": 0.379, "step": 8515 }, { "epoch": 0.03769976537252645, "grad_norm": 4.423478772683144, "learning_rate": 3.769976537252645e-06, "loss": 1.2597, "step": 8516 }, { "epoch": 0.03770419230598964, "grad_norm": 3.212316628964631, "learning_rate": 3.7704192305989645e-06, "loss": 0.6146, "step": 8517 }, { "epoch": 0.03770861923945283, "grad_norm": 3.8729550169915856, "learning_rate": 3.770861923945283e-06, "loss": 1.0859, "step": 8518 }, { "epoch": 0.03771304617291602, "grad_norm": 2.3505124643586295, "learning_rate": 3.7713046172916024e-06, "loss": 0.6207, "step": 8519 }, { "epoch": 0.03771747310637921, "grad_norm": 3.0280633302400584, "learning_rate": 3.7717473106379214e-06, "loss": 1.0585, "step": 8520 }, { "epoch": 0.0377219000398424, "grad_norm": 2.2576765558174317, "learning_rate": 3.7721900039842403e-06, "loss": 0.5948, "step": 8521 }, { "epoch": 0.03772632697330559, "grad_norm": 3.015543021427662, "learning_rate": 3.7726326973305593e-06, "loss": 1.0029, "step": 8522 }, { "epoch": 0.037730753906768784, "grad_norm": 2.8526139317158634, "learning_rate": 3.7730753906768787e-06, "loss": 0.5633, "step": 8523 }, { "epoch": 0.037735180840231974, "grad_norm": 3.0144484790655097, "learning_rate": 3.773518084023197e-06, "loss": 0.9388, "step": 8524 }, { "epoch": 0.03773960777369516, "grad_norm": 2.617755228717107, "learning_rate": 3.7739607773695165e-06, "loss": 0.6115, "step": 8525 }, { "epoch": 0.03774403470715835, "grad_norm": 3.4548058379687, "learning_rate": 3.7744034707158355e-06, "loss": 0.9153, "step": 8526 }, { "epoch": 0.03774846164062154, "grad_norm": 2.7017891638340923, "learning_rate": 3.774846164062154e-06, "loss": 0.5748, "step": 8527 }, { "epoch": 0.03775288857408473, "grad_norm": 2.9977746542452404, "learning_rate": 3.7752888574084734e-06, "loss": 0.9447, "step": 8528 }, { "epoch": 0.03775731550754792, "grad_norm": 2.481268938373947, "learning_rate": 3.7757315507547928e-06, "loss": 0.7726, "step": 8529 }, { "epoch": 0.03776174244101111, "grad_norm": 2.8055770960003286, "learning_rate": 3.7761742441011113e-06, "loss": 0.5391, "step": 8530 }, { "epoch": 0.0377661693744743, "grad_norm": 2.6111839376561217, "learning_rate": 3.7766169374474302e-06, "loss": 0.7523, "step": 8531 }, { "epoch": 0.03777059630793749, "grad_norm": 2.2570627912049117, "learning_rate": 3.7770596307937496e-06, "loss": 0.5589, "step": 8532 }, { "epoch": 0.03777502324140068, "grad_norm": 3.262746111350529, "learning_rate": 3.777502324140068e-06, "loss": 0.7899, "step": 8533 }, { "epoch": 0.03777945017486387, "grad_norm": 2.696977783528629, "learning_rate": 3.7779450174863875e-06, "loss": 0.6135, "step": 8534 }, { "epoch": 0.03778387710832706, "grad_norm": 3.2648988514718873, "learning_rate": 3.7783877108327064e-06, "loss": 0.9081, "step": 8535 }, { "epoch": 0.037788304041790254, "grad_norm": 2.9748023818208393, "learning_rate": 3.7788304041790254e-06, "loss": 0.8796, "step": 8536 }, { "epoch": 0.037792730975253444, "grad_norm": 2.287988522769701, "learning_rate": 3.7792730975253443e-06, "loss": 0.4597, "step": 8537 }, { "epoch": 0.037797157908716635, "grad_norm": 2.6733600790326317, "learning_rate": 3.7797157908716637e-06, "loss": 0.6293, "step": 8538 }, { "epoch": 0.037801584842179825, "grad_norm": 3.6304094548134933, "learning_rate": 3.7801584842179822e-06, "loss": 0.7609, "step": 8539 }, { "epoch": 0.03780601177564301, "grad_norm": 3.0604883233408735, "learning_rate": 3.7806011775643016e-06, "loss": 0.7142, "step": 8540 }, { "epoch": 0.0378104387091062, "grad_norm": 2.736827620216975, "learning_rate": 3.7810438709106205e-06, "loss": 0.6965, "step": 8541 }, { "epoch": 0.03781486564256939, "grad_norm": 2.3760689268057926, "learning_rate": 3.781486564256939e-06, "loss": 0.4766, "step": 8542 }, { "epoch": 0.03781929257603258, "grad_norm": 2.431775933529992, "learning_rate": 3.7819292576032584e-06, "loss": 0.481, "step": 8543 }, { "epoch": 0.03782371950949577, "grad_norm": 3.2960581518933747, "learning_rate": 3.782371950949578e-06, "loss": 0.7648, "step": 8544 }, { "epoch": 0.03782814644295896, "grad_norm": 2.7351066212210826, "learning_rate": 3.7828146442958963e-06, "loss": 0.9187, "step": 8545 }, { "epoch": 0.03783257337642215, "grad_norm": 2.751647980197562, "learning_rate": 3.7832573376422153e-06, "loss": 0.8501, "step": 8546 }, { "epoch": 0.03783700030988534, "grad_norm": 3.206825217844337, "learning_rate": 3.7837000309885347e-06, "loss": 0.9228, "step": 8547 }, { "epoch": 0.03784142724334853, "grad_norm": 2.6790866424584574, "learning_rate": 3.784142724334853e-06, "loss": 0.7765, "step": 8548 }, { "epoch": 0.03784585417681172, "grad_norm": 3.169842511846569, "learning_rate": 3.7845854176811725e-06, "loss": 0.9109, "step": 8549 }, { "epoch": 0.037850281110274914, "grad_norm": 2.968921089345211, "learning_rate": 3.7850281110274915e-06, "loss": 0.5878, "step": 8550 }, { "epoch": 0.037854708043738104, "grad_norm": 3.0776565794020234, "learning_rate": 3.7854708043738104e-06, "loss": 0.7609, "step": 8551 }, { "epoch": 0.037859134977201295, "grad_norm": 2.612384331867503, "learning_rate": 3.7859134977201294e-06, "loss": 0.6584, "step": 8552 }, { "epoch": 0.037863561910664485, "grad_norm": 2.991878236609281, "learning_rate": 3.7863561910664488e-06, "loss": 0.9726, "step": 8553 }, { "epoch": 0.037867988844127676, "grad_norm": 2.826523037749425, "learning_rate": 3.7867988844127673e-06, "loss": 0.8937, "step": 8554 }, { "epoch": 0.037872415777590866, "grad_norm": 3.0696754760251244, "learning_rate": 3.7872415777590867e-06, "loss": 0.8688, "step": 8555 }, { "epoch": 0.03787684271105405, "grad_norm": 3.151793754586396, "learning_rate": 3.7876842711054056e-06, "loss": 1.0597, "step": 8556 }, { "epoch": 0.03788126964451724, "grad_norm": 3.4321803545212477, "learning_rate": 3.7881269644517245e-06, "loss": 0.7274, "step": 8557 }, { "epoch": 0.03788569657798043, "grad_norm": 3.3806922396066743, "learning_rate": 3.7885696577980435e-06, "loss": 0.8376, "step": 8558 }, { "epoch": 0.03789012351144362, "grad_norm": 2.946571504684719, "learning_rate": 3.789012351144363e-06, "loss": 0.9211, "step": 8559 }, { "epoch": 0.03789455044490681, "grad_norm": 3.8459382073002346, "learning_rate": 3.7894550444906814e-06, "loss": 1.1271, "step": 8560 }, { "epoch": 0.03789897737837, "grad_norm": 2.849093474824542, "learning_rate": 3.7898977378370003e-06, "loss": 0.792, "step": 8561 }, { "epoch": 0.03790340431183319, "grad_norm": 2.7339330643339967, "learning_rate": 3.7903404311833197e-06, "loss": 0.7569, "step": 8562 }, { "epoch": 0.03790783124529638, "grad_norm": 3.1537234812198003, "learning_rate": 3.7907831245296382e-06, "loss": 0.7823, "step": 8563 }, { "epoch": 0.037912258178759574, "grad_norm": 3.0608669259773267, "learning_rate": 3.7912258178759576e-06, "loss": 0.7199, "step": 8564 }, { "epoch": 0.037916685112222764, "grad_norm": 3.9973640002395303, "learning_rate": 3.7916685112222765e-06, "loss": 1.1092, "step": 8565 }, { "epoch": 0.037921112045685955, "grad_norm": 2.9134506562911446, "learning_rate": 3.7921112045685955e-06, "loss": 0.8113, "step": 8566 }, { "epoch": 0.037925538979149145, "grad_norm": 4.158612219369911, "learning_rate": 3.7925538979149144e-06, "loss": 1.2776, "step": 8567 }, { "epoch": 0.037929965912612336, "grad_norm": 2.4859681644368496, "learning_rate": 3.792996591261234e-06, "loss": 0.6702, "step": 8568 }, { "epoch": 0.037934392846075526, "grad_norm": 2.396710925873245, "learning_rate": 3.7934392846075523e-06, "loss": 0.6012, "step": 8569 }, { "epoch": 0.03793881977953872, "grad_norm": 2.7872025412965895, "learning_rate": 3.7938819779538717e-06, "loss": 0.68, "step": 8570 }, { "epoch": 0.0379432467130019, "grad_norm": 3.1846172500672445, "learning_rate": 3.7943246713001907e-06, "loss": 0.9846, "step": 8571 }, { "epoch": 0.03794767364646509, "grad_norm": 3.927646862146754, "learning_rate": 3.7947673646465096e-06, "loss": 0.7035, "step": 8572 }, { "epoch": 0.03795210057992828, "grad_norm": 3.9639569309628784, "learning_rate": 3.7952100579928285e-06, "loss": 0.6493, "step": 8573 }, { "epoch": 0.03795652751339147, "grad_norm": 4.4106561667012265, "learning_rate": 3.795652751339148e-06, "loss": 1.0276, "step": 8574 }, { "epoch": 0.03796095444685466, "grad_norm": 2.7998315234787907, "learning_rate": 3.7960954446854664e-06, "loss": 0.6312, "step": 8575 }, { "epoch": 0.03796538138031785, "grad_norm": 2.680736542915692, "learning_rate": 3.796538138031786e-06, "loss": 0.725, "step": 8576 }, { "epoch": 0.037969808313781044, "grad_norm": 2.6253841716060506, "learning_rate": 3.7969808313781048e-06, "loss": 0.5606, "step": 8577 }, { "epoch": 0.037974235247244234, "grad_norm": 3.440041213282882, "learning_rate": 3.7974235247244233e-06, "loss": 1.0462, "step": 8578 }, { "epoch": 0.037978662180707425, "grad_norm": 2.6793827862455757, "learning_rate": 3.7978662180707427e-06, "loss": 0.766, "step": 8579 }, { "epoch": 0.037983089114170615, "grad_norm": 2.831516037170696, "learning_rate": 3.798308911417062e-06, "loss": 0.5668, "step": 8580 }, { "epoch": 0.037987516047633806, "grad_norm": 2.84565229241467, "learning_rate": 3.7987516047633805e-06, "loss": 0.8995, "step": 8581 }, { "epoch": 0.037991942981096996, "grad_norm": 3.182052940150467, "learning_rate": 3.7991942981096995e-06, "loss": 0.5091, "step": 8582 }, { "epoch": 0.03799636991456019, "grad_norm": 2.9098264294773486, "learning_rate": 3.799636991456019e-06, "loss": 0.9661, "step": 8583 }, { "epoch": 0.03800079684802338, "grad_norm": 3.3457829338636915, "learning_rate": 3.8000796848023374e-06, "loss": 1.2016, "step": 8584 }, { "epoch": 0.03800522378148657, "grad_norm": 2.583207759855768, "learning_rate": 3.8005223781486568e-06, "loss": 0.6514, "step": 8585 }, { "epoch": 0.03800965071494975, "grad_norm": 2.9092755130843426, "learning_rate": 3.8009650714949757e-06, "loss": 0.9348, "step": 8586 }, { "epoch": 0.03801407764841294, "grad_norm": 2.9247587956477434, "learning_rate": 3.8014077648412947e-06, "loss": 0.6789, "step": 8587 }, { "epoch": 0.03801850458187613, "grad_norm": 2.635725940148485, "learning_rate": 3.8018504581876136e-06, "loss": 0.7769, "step": 8588 }, { "epoch": 0.03802293151533932, "grad_norm": 3.2206438759833134, "learning_rate": 3.802293151533933e-06, "loss": 0.8515, "step": 8589 }, { "epoch": 0.03802735844880251, "grad_norm": 3.3190079492443276, "learning_rate": 3.8027358448802515e-06, "loss": 0.9507, "step": 8590 }, { "epoch": 0.038031785382265704, "grad_norm": 3.3996143869344957, "learning_rate": 3.803178538226571e-06, "loss": 1.2441, "step": 8591 }, { "epoch": 0.038036212315728894, "grad_norm": 3.1945511910091122, "learning_rate": 3.80362123157289e-06, "loss": 0.6721, "step": 8592 }, { "epoch": 0.038040639249192085, "grad_norm": 3.2692365185005174, "learning_rate": 3.8040639249192083e-06, "loss": 0.466, "step": 8593 }, { "epoch": 0.038045066182655275, "grad_norm": 2.7401089407362123, "learning_rate": 3.8045066182655277e-06, "loss": 0.658, "step": 8594 }, { "epoch": 0.038049493116118466, "grad_norm": 2.8072587438938315, "learning_rate": 3.804949311611847e-06, "loss": 0.7506, "step": 8595 }, { "epoch": 0.038053920049581656, "grad_norm": 2.80127279359656, "learning_rate": 3.8053920049581656e-06, "loss": 0.7387, "step": 8596 }, { "epoch": 0.03805834698304485, "grad_norm": 2.593518093377373, "learning_rate": 3.8058346983044846e-06, "loss": 0.7076, "step": 8597 }, { "epoch": 0.03806277391650804, "grad_norm": 3.890130941435532, "learning_rate": 3.806277391650804e-06, "loss": 1.2581, "step": 8598 }, { "epoch": 0.03806720084997123, "grad_norm": 2.5498628556824623, "learning_rate": 3.8067200849971224e-06, "loss": 0.8088, "step": 8599 }, { "epoch": 0.03807162778343442, "grad_norm": 3.508751602490799, "learning_rate": 3.807162778343442e-06, "loss": 1.0752, "step": 8600 }, { "epoch": 0.0380760547168976, "grad_norm": 3.3782710531434796, "learning_rate": 3.8076054716897608e-06, "loss": 0.9579, "step": 8601 }, { "epoch": 0.03808048165036079, "grad_norm": 3.042611466312061, "learning_rate": 3.8080481650360797e-06, "loss": 0.718, "step": 8602 }, { "epoch": 0.03808490858382398, "grad_norm": 2.9548178494459902, "learning_rate": 3.8084908583823987e-06, "loss": 0.9859, "step": 8603 }, { "epoch": 0.03808933551728717, "grad_norm": 2.6158792309818772, "learning_rate": 3.808933551728718e-06, "loss": 0.7003, "step": 8604 }, { "epoch": 0.038093762450750364, "grad_norm": 3.041392195592174, "learning_rate": 3.8093762450750366e-06, "loss": 0.8677, "step": 8605 }, { "epoch": 0.038098189384213554, "grad_norm": 2.896674615673743, "learning_rate": 3.809818938421356e-06, "loss": 0.48, "step": 8606 }, { "epoch": 0.038102616317676745, "grad_norm": 2.4171670412785358, "learning_rate": 3.810261631767675e-06, "loss": 0.4743, "step": 8607 }, { "epoch": 0.038107043251139935, "grad_norm": 3.2730630828454927, "learning_rate": 3.8107043251139934e-06, "loss": 0.7657, "step": 8608 }, { "epoch": 0.038111470184603126, "grad_norm": 3.6217986467045757, "learning_rate": 3.8111470184603128e-06, "loss": 0.955, "step": 8609 }, { "epoch": 0.038115897118066316, "grad_norm": 2.921735079758053, "learning_rate": 3.811589711806632e-06, "loss": 0.8496, "step": 8610 }, { "epoch": 0.03812032405152951, "grad_norm": 2.4851784244974513, "learning_rate": 3.8120324051529507e-06, "loss": 0.6523, "step": 8611 }, { "epoch": 0.0381247509849927, "grad_norm": 2.9169402401498314, "learning_rate": 3.8124750984992696e-06, "loss": 0.7617, "step": 8612 }, { "epoch": 0.03812917791845589, "grad_norm": 3.256912335628549, "learning_rate": 3.812917791845589e-06, "loss": 1.0015, "step": 8613 }, { "epoch": 0.03813360485191908, "grad_norm": 2.94093325677443, "learning_rate": 3.8133604851919075e-06, "loss": 0.7371, "step": 8614 }, { "epoch": 0.03813803178538227, "grad_norm": 2.721399828873708, "learning_rate": 3.813803178538227e-06, "loss": 0.6427, "step": 8615 }, { "epoch": 0.03814245871884545, "grad_norm": 2.6896872025433067, "learning_rate": 3.814245871884546e-06, "loss": 0.539, "step": 8616 }, { "epoch": 0.03814688565230864, "grad_norm": 3.1889021403971847, "learning_rate": 3.8146885652308648e-06, "loss": 1.0513, "step": 8617 }, { "epoch": 0.038151312585771834, "grad_norm": 2.868591057312025, "learning_rate": 3.815131258577184e-06, "loss": 0.5717, "step": 8618 }, { "epoch": 0.038155739519235024, "grad_norm": 2.955793833776889, "learning_rate": 3.815573951923503e-06, "loss": 1.1177, "step": 8619 }, { "epoch": 0.038160166452698215, "grad_norm": 2.7088009380959646, "learning_rate": 3.816016645269822e-06, "loss": 0.8841, "step": 8620 }, { "epoch": 0.038164593386161405, "grad_norm": 2.5189235834382413, "learning_rate": 3.8164593386161406e-06, "loss": 0.6809, "step": 8621 }, { "epoch": 0.038169020319624596, "grad_norm": 2.9461679326727923, "learning_rate": 3.81690203196246e-06, "loss": 0.9269, "step": 8622 }, { "epoch": 0.038173447253087786, "grad_norm": 3.032738534915718, "learning_rate": 3.817344725308779e-06, "loss": 1.0711, "step": 8623 }, { "epoch": 0.03817787418655098, "grad_norm": 2.324771997810477, "learning_rate": 3.817787418655098e-06, "loss": 0.6713, "step": 8624 }, { "epoch": 0.03818230112001417, "grad_norm": 2.8611669296052917, "learning_rate": 3.818230112001417e-06, "loss": 0.6311, "step": 8625 }, { "epoch": 0.03818672805347736, "grad_norm": 2.8463776157566345, "learning_rate": 3.8186728053477366e-06, "loss": 0.7536, "step": 8626 }, { "epoch": 0.03819115498694055, "grad_norm": 3.1712219159223825, "learning_rate": 3.819115498694055e-06, "loss": 0.7604, "step": 8627 }, { "epoch": 0.03819558192040374, "grad_norm": 2.9700444768685617, "learning_rate": 3.8195581920403745e-06, "loss": 1.0301, "step": 8628 }, { "epoch": 0.03820000885386693, "grad_norm": 3.232667169817758, "learning_rate": 3.820000885386693e-06, "loss": 0.9668, "step": 8629 }, { "epoch": 0.03820443578733012, "grad_norm": 2.844401903276883, "learning_rate": 3.8204435787330115e-06, "loss": 0.7738, "step": 8630 }, { "epoch": 0.0382088627207933, "grad_norm": 3.15321083499919, "learning_rate": 3.820886272079331e-06, "loss": 1.0671, "step": 8631 }, { "epoch": 0.038213289654256494, "grad_norm": 3.0035675436222444, "learning_rate": 3.82132896542565e-06, "loss": 0.5349, "step": 8632 }, { "epoch": 0.038217716587719684, "grad_norm": 2.839944373681658, "learning_rate": 3.821771658771969e-06, "loss": 0.6211, "step": 8633 }, { "epoch": 0.038222143521182875, "grad_norm": 3.245645066578961, "learning_rate": 3.822214352118288e-06, "loss": 1.0205, "step": 8634 }, { "epoch": 0.038226570454646065, "grad_norm": 2.616100199865266, "learning_rate": 3.8226570454646075e-06, "loss": 0.6621, "step": 8635 }, { "epoch": 0.038230997388109256, "grad_norm": 3.167892464586596, "learning_rate": 3.823099738810926e-06, "loss": 0.9435, "step": 8636 }, { "epoch": 0.038235424321572446, "grad_norm": 2.891607083105791, "learning_rate": 3.823542432157245e-06, "loss": 0.6256, "step": 8637 }, { "epoch": 0.03823985125503564, "grad_norm": 2.571698567839702, "learning_rate": 3.823985125503564e-06, "loss": 0.586, "step": 8638 }, { "epoch": 0.03824427818849883, "grad_norm": 2.8325972123371486, "learning_rate": 3.824427818849883e-06, "loss": 0.6913, "step": 8639 }, { "epoch": 0.03824870512196202, "grad_norm": 3.485850340120476, "learning_rate": 3.824870512196202e-06, "loss": 1.3334, "step": 8640 }, { "epoch": 0.03825313205542521, "grad_norm": 2.694271642584265, "learning_rate": 3.825313205542521e-06, "loss": 0.7582, "step": 8641 }, { "epoch": 0.0382575589888884, "grad_norm": 2.760578030145785, "learning_rate": 3.82575589888884e-06, "loss": 0.9418, "step": 8642 }, { "epoch": 0.03826198592235159, "grad_norm": 2.686250573075948, "learning_rate": 3.826198592235159e-06, "loss": 0.8268, "step": 8643 }, { "epoch": 0.03826641285581478, "grad_norm": 3.082000486870942, "learning_rate": 3.8266412855814785e-06, "loss": 0.7572, "step": 8644 }, { "epoch": 0.03827083978927797, "grad_norm": 3.1884499064484153, "learning_rate": 3.827083978927797e-06, "loss": 0.9198, "step": 8645 }, { "epoch": 0.038275266722741154, "grad_norm": 2.622379101320152, "learning_rate": 3.827526672274116e-06, "loss": 0.7699, "step": 8646 }, { "epoch": 0.038279693656204344, "grad_norm": 2.4390290522042752, "learning_rate": 3.827969365620436e-06, "loss": 0.8441, "step": 8647 }, { "epoch": 0.038284120589667535, "grad_norm": 2.339104255309195, "learning_rate": 3.828412058966754e-06, "loss": 0.4996, "step": 8648 }, { "epoch": 0.038288547523130725, "grad_norm": 2.317249193705575, "learning_rate": 3.828854752313073e-06, "loss": 0.6554, "step": 8649 }, { "epoch": 0.038292974456593916, "grad_norm": 3.592637157901963, "learning_rate": 3.829297445659392e-06, "loss": 1.0461, "step": 8650 }, { "epoch": 0.038297401390057106, "grad_norm": 2.529270221020675, "learning_rate": 3.829740139005711e-06, "loss": 1.0057, "step": 8651 }, { "epoch": 0.0383018283235203, "grad_norm": 2.454189461421632, "learning_rate": 3.83018283235203e-06, "loss": 0.6014, "step": 8652 }, { "epoch": 0.03830625525698349, "grad_norm": 2.810578286077906, "learning_rate": 3.830625525698349e-06, "loss": 0.5397, "step": 8653 }, { "epoch": 0.03831068219044668, "grad_norm": 2.5136501322170104, "learning_rate": 3.831068219044668e-06, "loss": 0.6722, "step": 8654 }, { "epoch": 0.03831510912390987, "grad_norm": 2.830379813993576, "learning_rate": 3.831510912390987e-06, "loss": 0.8076, "step": 8655 }, { "epoch": 0.03831953605737306, "grad_norm": 3.0333373557289884, "learning_rate": 3.831953605737307e-06, "loss": 0.8417, "step": 8656 }, { "epoch": 0.03832396299083625, "grad_norm": 3.0835819455802946, "learning_rate": 3.832396299083625e-06, "loss": 0.8575, "step": 8657 }, { "epoch": 0.03832838992429944, "grad_norm": 2.889109340069613, "learning_rate": 3.8328389924299446e-06, "loss": 0.626, "step": 8658 }, { "epoch": 0.03833281685776263, "grad_norm": 2.5401065240097735, "learning_rate": 3.833281685776263e-06, "loss": 0.7822, "step": 8659 }, { "epoch": 0.03833724379122582, "grad_norm": 2.8309336471069706, "learning_rate": 3.833724379122582e-06, "loss": 0.9804, "step": 8660 }, { "epoch": 0.038341670724689005, "grad_norm": 2.933259690249187, "learning_rate": 3.834167072468901e-06, "loss": 0.8602, "step": 8661 }, { "epoch": 0.038346097658152195, "grad_norm": 3.135135581821004, "learning_rate": 3.83460976581522e-06, "loss": 0.6191, "step": 8662 }, { "epoch": 0.038350524591615386, "grad_norm": 2.929780398605595, "learning_rate": 3.835052459161539e-06, "loss": 0.7741, "step": 8663 }, { "epoch": 0.038354951525078576, "grad_norm": 2.7489467207380476, "learning_rate": 3.835495152507858e-06, "loss": 0.706, "step": 8664 }, { "epoch": 0.03835937845854177, "grad_norm": 3.136921734512716, "learning_rate": 3.835937845854178e-06, "loss": 0.8551, "step": 8665 }, { "epoch": 0.03836380539200496, "grad_norm": 3.668157823760164, "learning_rate": 3.836380539200496e-06, "loss": 1.1565, "step": 8666 }, { "epoch": 0.03836823232546815, "grad_norm": 3.245834044003476, "learning_rate": 3.8368232325468155e-06, "loss": 0.8411, "step": 8667 }, { "epoch": 0.03837265925893134, "grad_norm": 3.594649142265071, "learning_rate": 3.837265925893134e-06, "loss": 0.9428, "step": 8668 }, { "epoch": 0.03837708619239453, "grad_norm": 2.634282175408912, "learning_rate": 3.837708619239453e-06, "loss": 0.7302, "step": 8669 }, { "epoch": 0.03838151312585772, "grad_norm": 3.244335368282169, "learning_rate": 3.838151312585772e-06, "loss": 1.0176, "step": 8670 }, { "epoch": 0.03838594005932091, "grad_norm": 3.0866787887585634, "learning_rate": 3.838594005932091e-06, "loss": 1.0286, "step": 8671 }, { "epoch": 0.0383903669927841, "grad_norm": 3.1267271917860926, "learning_rate": 3.83903669927841e-06, "loss": 1.0734, "step": 8672 }, { "epoch": 0.03839479392624729, "grad_norm": 2.904754011931471, "learning_rate": 3.839479392624729e-06, "loss": 0.6612, "step": 8673 }, { "epoch": 0.03839922085971048, "grad_norm": 3.5239358002656713, "learning_rate": 3.8399220859710486e-06, "loss": 1.0079, "step": 8674 }, { "epoch": 0.03840364779317367, "grad_norm": 3.0672707690980565, "learning_rate": 3.840364779317367e-06, "loss": 0.9244, "step": 8675 }, { "epoch": 0.038408074726636855, "grad_norm": 2.5827076402849345, "learning_rate": 3.8408074726636865e-06, "loss": 0.8135, "step": 8676 }, { "epoch": 0.038412501660100046, "grad_norm": 2.712657719282522, "learning_rate": 3.841250166010006e-06, "loss": 0.6569, "step": 8677 }, { "epoch": 0.038416928593563236, "grad_norm": 4.012072475210157, "learning_rate": 3.841692859356324e-06, "loss": 0.9875, "step": 8678 }, { "epoch": 0.03842135552702643, "grad_norm": 2.3179910193037125, "learning_rate": 3.842135552702643e-06, "loss": 0.5645, "step": 8679 }, { "epoch": 0.03842578246048962, "grad_norm": 2.764229901077735, "learning_rate": 3.842578246048962e-06, "loss": 0.7941, "step": 8680 }, { "epoch": 0.03843020939395281, "grad_norm": 3.3856860585194015, "learning_rate": 3.843020939395281e-06, "loss": 1.0702, "step": 8681 }, { "epoch": 0.038434636327416, "grad_norm": 2.6473444671655026, "learning_rate": 3.8434636327416e-06, "loss": 0.9339, "step": 8682 }, { "epoch": 0.03843906326087919, "grad_norm": 3.2133257687395513, "learning_rate": 3.8439063260879195e-06, "loss": 0.7563, "step": 8683 }, { "epoch": 0.03844349019434238, "grad_norm": 2.9179762067879405, "learning_rate": 3.844349019434238e-06, "loss": 0.8006, "step": 8684 }, { "epoch": 0.03844791712780557, "grad_norm": 2.3736195337471817, "learning_rate": 3.844791712780557e-06, "loss": 0.4393, "step": 8685 }, { "epoch": 0.03845234406126876, "grad_norm": 2.144472218284132, "learning_rate": 3.845234406126877e-06, "loss": 0.5158, "step": 8686 }, { "epoch": 0.03845677099473195, "grad_norm": 2.6427925440861673, "learning_rate": 3.845677099473195e-06, "loss": 0.5412, "step": 8687 }, { "epoch": 0.03846119792819514, "grad_norm": 2.5057128109089404, "learning_rate": 3.846119792819515e-06, "loss": 0.4674, "step": 8688 }, { "epoch": 0.03846562486165833, "grad_norm": 3.6409997536227907, "learning_rate": 3.846562486165833e-06, "loss": 1.0697, "step": 8689 }, { "epoch": 0.03847005179512152, "grad_norm": 2.5591839676729755, "learning_rate": 3.847005179512152e-06, "loss": 0.8302, "step": 8690 }, { "epoch": 0.038474478728584706, "grad_norm": 2.7520104377132864, "learning_rate": 3.847447872858471e-06, "loss": 0.8509, "step": 8691 }, { "epoch": 0.038478905662047896, "grad_norm": 2.635396163817549, "learning_rate": 3.8478905662047905e-06, "loss": 0.5083, "step": 8692 }, { "epoch": 0.03848333259551109, "grad_norm": 2.7863958124441934, "learning_rate": 3.848333259551109e-06, "loss": 0.5451, "step": 8693 }, { "epoch": 0.03848775952897428, "grad_norm": 2.56624189542381, "learning_rate": 3.848775952897428e-06, "loss": 0.7355, "step": 8694 }, { "epoch": 0.03849218646243747, "grad_norm": 2.9585587984402992, "learning_rate": 3.849218646243748e-06, "loss": 0.6004, "step": 8695 }, { "epoch": 0.03849661339590066, "grad_norm": 2.7324401074944067, "learning_rate": 3.849661339590066e-06, "loss": 0.5277, "step": 8696 }, { "epoch": 0.03850104032936385, "grad_norm": 3.1501070382202707, "learning_rate": 3.850104032936386e-06, "loss": 0.8205, "step": 8697 }, { "epoch": 0.03850546726282704, "grad_norm": 2.599238731846529, "learning_rate": 3.850546726282704e-06, "loss": 0.7129, "step": 8698 }, { "epoch": 0.03850989419629023, "grad_norm": 3.204858012534369, "learning_rate": 3.8509894196290235e-06, "loss": 0.8802, "step": 8699 }, { "epoch": 0.03851432112975342, "grad_norm": 3.2659769042216444, "learning_rate": 3.851432112975342e-06, "loss": 0.8336, "step": 8700 }, { "epoch": 0.03851874806321661, "grad_norm": 3.4760011450042696, "learning_rate": 3.851874806321661e-06, "loss": 1.0428, "step": 8701 }, { "epoch": 0.0385231749966798, "grad_norm": 2.68180120828587, "learning_rate": 3.85231749966798e-06, "loss": 0.6148, "step": 8702 }, { "epoch": 0.03852760193014299, "grad_norm": 3.5281612725338056, "learning_rate": 3.852760193014299e-06, "loss": 0.9189, "step": 8703 }, { "epoch": 0.03853202886360618, "grad_norm": 2.6249193922323277, "learning_rate": 3.853202886360619e-06, "loss": 0.6847, "step": 8704 }, { "epoch": 0.03853645579706937, "grad_norm": 2.6507367597979066, "learning_rate": 3.853645579706937e-06, "loss": 0.79, "step": 8705 }, { "epoch": 0.038540882730532564, "grad_norm": 2.6304101252304295, "learning_rate": 3.8540882730532566e-06, "loss": 0.671, "step": 8706 }, { "epoch": 0.03854530966399575, "grad_norm": 3.3906305528692102, "learning_rate": 3.854530966399576e-06, "loss": 0.9832, "step": 8707 }, { "epoch": 0.03854973659745894, "grad_norm": 2.8603038521489452, "learning_rate": 3.8549736597458945e-06, "loss": 0.6154, "step": 8708 }, { "epoch": 0.03855416353092213, "grad_norm": 3.202157939874632, "learning_rate": 3.855416353092214e-06, "loss": 0.9245, "step": 8709 }, { "epoch": 0.03855859046438532, "grad_norm": 3.1534594754291225, "learning_rate": 3.855859046438532e-06, "loss": 0.7671, "step": 8710 }, { "epoch": 0.03856301739784851, "grad_norm": 3.0964434525474847, "learning_rate": 3.856301739784851e-06, "loss": 0.8618, "step": 8711 }, { "epoch": 0.0385674443313117, "grad_norm": 2.8872762707657103, "learning_rate": 3.85674443313117e-06, "loss": 0.8949, "step": 8712 }, { "epoch": 0.03857187126477489, "grad_norm": 2.532800773799718, "learning_rate": 3.85718712647749e-06, "loss": 0.6586, "step": 8713 }, { "epoch": 0.03857629819823808, "grad_norm": 2.843984919708959, "learning_rate": 3.857629819823808e-06, "loss": 0.8537, "step": 8714 }, { "epoch": 0.03858072513170127, "grad_norm": 2.726991048950916, "learning_rate": 3.8580725131701275e-06, "loss": 0.8332, "step": 8715 }, { "epoch": 0.03858515206516446, "grad_norm": 2.7926911847374116, "learning_rate": 3.858515206516447e-06, "loss": 0.7842, "step": 8716 }, { "epoch": 0.03858957899862765, "grad_norm": 2.505767065723782, "learning_rate": 3.858957899862765e-06, "loss": 0.7611, "step": 8717 }, { "epoch": 0.03859400593209084, "grad_norm": 2.9940581567641775, "learning_rate": 3.859400593209085e-06, "loss": 0.6408, "step": 8718 }, { "epoch": 0.03859843286555403, "grad_norm": 2.5654503749998567, "learning_rate": 3.859843286555403e-06, "loss": 0.6122, "step": 8719 }, { "epoch": 0.038602859799017224, "grad_norm": 3.1368756742428365, "learning_rate": 3.860285979901723e-06, "loss": 1.0104, "step": 8720 }, { "epoch": 0.038607286732480414, "grad_norm": 3.7704198087006042, "learning_rate": 3.860728673248041e-06, "loss": 0.808, "step": 8721 }, { "epoch": 0.0386117136659436, "grad_norm": 3.4274090199359843, "learning_rate": 3.8611713665943606e-06, "loss": 1.311, "step": 8722 }, { "epoch": 0.03861614059940679, "grad_norm": 2.875211124117502, "learning_rate": 3.861614059940679e-06, "loss": 0.8032, "step": 8723 }, { "epoch": 0.03862056753286998, "grad_norm": 3.301792893155742, "learning_rate": 3.8620567532869985e-06, "loss": 1.1876, "step": 8724 }, { "epoch": 0.03862499446633317, "grad_norm": 4.47117156984838, "learning_rate": 3.862499446633318e-06, "loss": 1.1432, "step": 8725 }, { "epoch": 0.03862942139979636, "grad_norm": 2.683532534262483, "learning_rate": 3.862942139979636e-06, "loss": 0.6218, "step": 8726 }, { "epoch": 0.03863384833325955, "grad_norm": 2.7448534324085974, "learning_rate": 3.863384833325956e-06, "loss": 0.8664, "step": 8727 }, { "epoch": 0.03863827526672274, "grad_norm": 2.80672898367697, "learning_rate": 3.863827526672275e-06, "loss": 0.6741, "step": 8728 }, { "epoch": 0.03864270220018593, "grad_norm": 2.649755996290276, "learning_rate": 3.864270220018594e-06, "loss": 0.9358, "step": 8729 }, { "epoch": 0.03864712913364912, "grad_norm": 2.7026571690941705, "learning_rate": 3.864712913364912e-06, "loss": 0.7153, "step": 8730 }, { "epoch": 0.03865155606711231, "grad_norm": 2.486417556432448, "learning_rate": 3.8651556067112315e-06, "loss": 0.774, "step": 8731 }, { "epoch": 0.0386559830005755, "grad_norm": 3.2878851650069603, "learning_rate": 3.86559830005755e-06, "loss": 0.9711, "step": 8732 }, { "epoch": 0.03866040993403869, "grad_norm": 3.592837721583428, "learning_rate": 3.866040993403869e-06, "loss": 0.5839, "step": 8733 }, { "epoch": 0.038664836867501884, "grad_norm": 3.1233308893047407, "learning_rate": 3.866483686750189e-06, "loss": 0.6251, "step": 8734 }, { "epoch": 0.038669263800965074, "grad_norm": 2.5963038487454675, "learning_rate": 3.866926380096507e-06, "loss": 0.619, "step": 8735 }, { "epoch": 0.038673690734428265, "grad_norm": 3.0324273324454736, "learning_rate": 3.867369073442827e-06, "loss": 1.0855, "step": 8736 }, { "epoch": 0.03867811766789145, "grad_norm": 3.19416710395804, "learning_rate": 3.867811766789146e-06, "loss": 1.1055, "step": 8737 }, { "epoch": 0.03868254460135464, "grad_norm": 3.609546797016092, "learning_rate": 3.8682544601354646e-06, "loss": 1.0514, "step": 8738 }, { "epoch": 0.03868697153481783, "grad_norm": 2.4872978486472643, "learning_rate": 3.868697153481784e-06, "loss": 0.5839, "step": 8739 }, { "epoch": 0.03869139846828102, "grad_norm": 2.8620116343309707, "learning_rate": 3.8691398468281025e-06, "loss": 0.7823, "step": 8740 }, { "epoch": 0.03869582540174421, "grad_norm": 2.6070524926132754, "learning_rate": 3.869582540174421e-06, "loss": 0.7563, "step": 8741 }, { "epoch": 0.0387002523352074, "grad_norm": 2.8880008368350247, "learning_rate": 3.87002523352074e-06, "loss": 0.9024, "step": 8742 }, { "epoch": 0.03870467926867059, "grad_norm": 2.732813779786691, "learning_rate": 3.87046792686706e-06, "loss": 0.7523, "step": 8743 }, { "epoch": 0.03870910620213378, "grad_norm": 3.674422703109463, "learning_rate": 3.870910620213378e-06, "loss": 1.0599, "step": 8744 }, { "epoch": 0.03871353313559697, "grad_norm": 3.0543748887153352, "learning_rate": 3.871353313559698e-06, "loss": 0.9001, "step": 8745 }, { "epoch": 0.03871796006906016, "grad_norm": 3.0945430688810562, "learning_rate": 3.871796006906017e-06, "loss": 1.0768, "step": 8746 }, { "epoch": 0.038722387002523354, "grad_norm": 2.7180732467785393, "learning_rate": 3.8722387002523355e-06, "loss": 0.7327, "step": 8747 }, { "epoch": 0.038726813935986544, "grad_norm": 2.4943570312161585, "learning_rate": 3.872681393598655e-06, "loss": 0.7199, "step": 8748 }, { "epoch": 0.038731240869449735, "grad_norm": 2.2728408390792194, "learning_rate": 3.873124086944973e-06, "loss": 0.5661, "step": 8749 }, { "epoch": 0.038735667802912925, "grad_norm": 2.9911834304000466, "learning_rate": 3.873566780291293e-06, "loss": 0.9069, "step": 8750 }, { "epoch": 0.038740094736376116, "grad_norm": 2.568841760317663, "learning_rate": 3.874009473637611e-06, "loss": 0.6084, "step": 8751 }, { "epoch": 0.0387445216698393, "grad_norm": 2.612311733345511, "learning_rate": 3.874452166983931e-06, "loss": 0.5888, "step": 8752 }, { "epoch": 0.03874894860330249, "grad_norm": 2.9807781142284835, "learning_rate": 3.874894860330249e-06, "loss": 1.029, "step": 8753 }, { "epoch": 0.03875337553676568, "grad_norm": 2.8764583989775403, "learning_rate": 3.8753375536765686e-06, "loss": 0.7743, "step": 8754 }, { "epoch": 0.03875780247022887, "grad_norm": 4.075887141394631, "learning_rate": 3.875780247022888e-06, "loss": 0.9053, "step": 8755 }, { "epoch": 0.03876222940369206, "grad_norm": 2.679520669177097, "learning_rate": 3.8762229403692065e-06, "loss": 0.8827, "step": 8756 }, { "epoch": 0.03876665633715525, "grad_norm": 2.695939800430597, "learning_rate": 3.876665633715526e-06, "loss": 0.6497, "step": 8757 }, { "epoch": 0.03877108327061844, "grad_norm": 3.040662534881093, "learning_rate": 3.877108327061845e-06, "loss": 0.8938, "step": 8758 }, { "epoch": 0.03877551020408163, "grad_norm": 2.549749062169255, "learning_rate": 3.877551020408164e-06, "loss": 0.7118, "step": 8759 }, { "epoch": 0.03877993713754482, "grad_norm": 2.507911188865815, "learning_rate": 3.877993713754482e-06, "loss": 0.5526, "step": 8760 }, { "epoch": 0.038784364071008014, "grad_norm": 2.307150879110055, "learning_rate": 3.878436407100802e-06, "loss": 0.6616, "step": 8761 }, { "epoch": 0.038788791004471204, "grad_norm": 2.8661924919831057, "learning_rate": 3.87887910044712e-06, "loss": 1.0972, "step": 8762 }, { "epoch": 0.038793217937934395, "grad_norm": 2.6895791330254135, "learning_rate": 3.8793217937934395e-06, "loss": 0.5956, "step": 8763 }, { "epoch": 0.038797644871397585, "grad_norm": 3.5440191863431014, "learning_rate": 3.879764487139759e-06, "loss": 0.9378, "step": 8764 }, { "epoch": 0.038802071804860776, "grad_norm": 2.8513497890252593, "learning_rate": 3.880207180486077e-06, "loss": 0.7179, "step": 8765 }, { "epoch": 0.038806498738323966, "grad_norm": 2.5664733552374974, "learning_rate": 3.880649873832397e-06, "loss": 0.7849, "step": 8766 }, { "epoch": 0.03881092567178715, "grad_norm": 3.2268293437232214, "learning_rate": 3.881092567178716e-06, "loss": 0.9649, "step": 8767 }, { "epoch": 0.03881535260525034, "grad_norm": 2.977052011136424, "learning_rate": 3.881535260525035e-06, "loss": 0.6592, "step": 8768 }, { "epoch": 0.03881977953871353, "grad_norm": 2.7688014693199396, "learning_rate": 3.881977953871354e-06, "loss": 0.9058, "step": 8769 }, { "epoch": 0.03882420647217672, "grad_norm": 2.7196061386778623, "learning_rate": 3.8824206472176726e-06, "loss": 0.7688, "step": 8770 }, { "epoch": 0.03882863340563991, "grad_norm": 2.745347865460341, "learning_rate": 3.882863340563991e-06, "loss": 0.8331, "step": 8771 }, { "epoch": 0.0388330603391031, "grad_norm": 2.6402202379185713, "learning_rate": 3.8833060339103105e-06, "loss": 0.817, "step": 8772 }, { "epoch": 0.03883748727256629, "grad_norm": 2.805290252011265, "learning_rate": 3.88374872725663e-06, "loss": 0.5281, "step": 8773 }, { "epoch": 0.03884191420602948, "grad_norm": 2.8244740482256945, "learning_rate": 3.884191420602948e-06, "loss": 0.9507, "step": 8774 }, { "epoch": 0.038846341139492674, "grad_norm": 2.7231722557375075, "learning_rate": 3.884634113949268e-06, "loss": 0.6933, "step": 8775 }, { "epoch": 0.038850768072955864, "grad_norm": 2.8080096542978925, "learning_rate": 3.885076807295587e-06, "loss": 0.9904, "step": 8776 }, { "epoch": 0.038855195006419055, "grad_norm": 2.549478265869732, "learning_rate": 3.885519500641906e-06, "loss": 0.6495, "step": 8777 }, { "epoch": 0.038859621939882245, "grad_norm": 3.393136923970958, "learning_rate": 3.885962193988225e-06, "loss": 0.938, "step": 8778 }, { "epoch": 0.038864048873345436, "grad_norm": 2.945504891556573, "learning_rate": 3.8864048873345435e-06, "loss": 0.7773, "step": 8779 }, { "epoch": 0.038868475806808626, "grad_norm": 2.708989530664879, "learning_rate": 3.886847580680863e-06, "loss": 0.612, "step": 8780 }, { "epoch": 0.03887290274027182, "grad_norm": 3.093026615276355, "learning_rate": 3.887290274027181e-06, "loss": 0.696, "step": 8781 }, { "epoch": 0.038877329673735, "grad_norm": 3.513963282501656, "learning_rate": 3.887732967373501e-06, "loss": 1.2577, "step": 8782 }, { "epoch": 0.03888175660719819, "grad_norm": 2.881887708590949, "learning_rate": 3.888175660719819e-06, "loss": 0.9624, "step": 8783 }, { "epoch": 0.03888618354066138, "grad_norm": 2.618339151890415, "learning_rate": 3.888618354066139e-06, "loss": 0.7733, "step": 8784 }, { "epoch": 0.03889061047412457, "grad_norm": 2.5302213479953357, "learning_rate": 3.889061047412458e-06, "loss": 0.8068, "step": 8785 }, { "epoch": 0.03889503740758776, "grad_norm": 2.9495116098923875, "learning_rate": 3.8895037407587766e-06, "loss": 0.8347, "step": 8786 }, { "epoch": 0.03889946434105095, "grad_norm": 2.892134642032693, "learning_rate": 3.889946434105096e-06, "loss": 0.6655, "step": 8787 }, { "epoch": 0.038903891274514144, "grad_norm": 2.6844683859943985, "learning_rate": 3.890389127451415e-06, "loss": 0.5686, "step": 8788 }, { "epoch": 0.038908318207977334, "grad_norm": 3.3498746375855917, "learning_rate": 3.890831820797734e-06, "loss": 0.8156, "step": 8789 }, { "epoch": 0.038912745141440525, "grad_norm": 2.503446291053186, "learning_rate": 3.891274514144052e-06, "loss": 0.5559, "step": 8790 }, { "epoch": 0.038917172074903715, "grad_norm": 2.6277064999343493, "learning_rate": 3.891717207490372e-06, "loss": 0.6726, "step": 8791 }, { "epoch": 0.038921599008366906, "grad_norm": 4.734928501176527, "learning_rate": 3.89215990083669e-06, "loss": 1.8273, "step": 8792 }, { "epoch": 0.038926025941830096, "grad_norm": 4.790196764105228, "learning_rate": 3.89260259418301e-06, "loss": 0.9013, "step": 8793 }, { "epoch": 0.03893045287529329, "grad_norm": 3.1354236930261257, "learning_rate": 3.893045287529329e-06, "loss": 1.163, "step": 8794 }, { "epoch": 0.03893487980875648, "grad_norm": 2.754123849423538, "learning_rate": 3.8934879808756475e-06, "loss": 0.7366, "step": 8795 }, { "epoch": 0.03893930674221967, "grad_norm": 2.644360054207778, "learning_rate": 3.893930674221967e-06, "loss": 0.6693, "step": 8796 }, { "epoch": 0.03894373367568285, "grad_norm": 2.6148524076727817, "learning_rate": 3.894373367568286e-06, "loss": 0.7768, "step": 8797 }, { "epoch": 0.03894816060914604, "grad_norm": 2.9563084529808696, "learning_rate": 3.894816060914605e-06, "loss": 0.7996, "step": 8798 }, { "epoch": 0.03895258754260923, "grad_norm": 2.8796117319031356, "learning_rate": 3.895258754260924e-06, "loss": 0.8606, "step": 8799 }, { "epoch": 0.03895701447607242, "grad_norm": 2.7941446166787944, "learning_rate": 3.895701447607243e-06, "loss": 0.6834, "step": 8800 }, { "epoch": 0.03896144140953561, "grad_norm": 3.1450449173086725, "learning_rate": 3.896144140953562e-06, "loss": 0.979, "step": 8801 }, { "epoch": 0.038965868342998804, "grad_norm": 2.7326629950949797, "learning_rate": 3.8965868342998806e-06, "loss": 0.8022, "step": 8802 }, { "epoch": 0.038970295276461994, "grad_norm": 3.5302384824761055, "learning_rate": 3.8970295276462e-06, "loss": 0.9969, "step": 8803 }, { "epoch": 0.038974722209925185, "grad_norm": 2.1311104721675966, "learning_rate": 3.8974722209925185e-06, "loss": 0.5674, "step": 8804 }, { "epoch": 0.038979149143388375, "grad_norm": 3.1440783187775607, "learning_rate": 3.897914914338838e-06, "loss": 0.7584, "step": 8805 }, { "epoch": 0.038983576076851566, "grad_norm": 2.551468922433049, "learning_rate": 3.898357607685157e-06, "loss": 0.6597, "step": 8806 }, { "epoch": 0.038988003010314756, "grad_norm": 2.4041634810102885, "learning_rate": 3.898800301031476e-06, "loss": 0.6426, "step": 8807 }, { "epoch": 0.03899242994377795, "grad_norm": 2.543259569812373, "learning_rate": 3.899242994377795e-06, "loss": 0.8369, "step": 8808 }, { "epoch": 0.03899685687724114, "grad_norm": 3.1617017413117594, "learning_rate": 3.8996856877241145e-06, "loss": 0.9686, "step": 8809 }, { "epoch": 0.03900128381070433, "grad_norm": 3.090505334700194, "learning_rate": 3.900128381070433e-06, "loss": 0.8052, "step": 8810 }, { "epoch": 0.03900571074416752, "grad_norm": 3.1859175230526526, "learning_rate": 3.9005710744167515e-06, "loss": 1.0413, "step": 8811 }, { "epoch": 0.0390101376776307, "grad_norm": 2.3683851669666227, "learning_rate": 3.901013767763071e-06, "loss": 0.7638, "step": 8812 }, { "epoch": 0.03901456461109389, "grad_norm": 3.0650553355220698, "learning_rate": 3.901456461109389e-06, "loss": 0.547, "step": 8813 }, { "epoch": 0.03901899154455708, "grad_norm": 3.472993349485162, "learning_rate": 3.901899154455709e-06, "loss": 1.334, "step": 8814 }, { "epoch": 0.03902341847802027, "grad_norm": 2.5257706660874946, "learning_rate": 3.902341847802028e-06, "loss": 0.6806, "step": 8815 }, { "epoch": 0.039027845411483464, "grad_norm": 3.3180216644161584, "learning_rate": 3.902784541148347e-06, "loss": 0.9601, "step": 8816 }, { "epoch": 0.039032272344946654, "grad_norm": 2.6918080111611764, "learning_rate": 3.903227234494666e-06, "loss": 0.6919, "step": 8817 }, { "epoch": 0.039036699278409845, "grad_norm": 2.7847247527912287, "learning_rate": 3.903669927840985e-06, "loss": 0.518, "step": 8818 }, { "epoch": 0.039041126211873035, "grad_norm": 3.2348456969215755, "learning_rate": 3.904112621187304e-06, "loss": 0.6055, "step": 8819 }, { "epoch": 0.039045553145336226, "grad_norm": 3.4844392557138635, "learning_rate": 3.904555314533623e-06, "loss": 0.9952, "step": 8820 }, { "epoch": 0.039049980078799416, "grad_norm": 4.08936883977602, "learning_rate": 3.904998007879942e-06, "loss": 1.0405, "step": 8821 }, { "epoch": 0.03905440701226261, "grad_norm": 3.560703854157125, "learning_rate": 3.90544070122626e-06, "loss": 1.0792, "step": 8822 }, { "epoch": 0.0390588339457258, "grad_norm": 3.768778101186841, "learning_rate": 3.90588339457258e-06, "loss": 1.1812, "step": 8823 }, { "epoch": 0.03906326087918899, "grad_norm": 2.9302821975412865, "learning_rate": 3.906326087918899e-06, "loss": 0.6935, "step": 8824 }, { "epoch": 0.03906768781265218, "grad_norm": 3.445314939565821, "learning_rate": 3.906768781265218e-06, "loss": 1.2103, "step": 8825 }, { "epoch": 0.03907211474611537, "grad_norm": 2.7258097832049026, "learning_rate": 3.907211474611537e-06, "loss": 0.7242, "step": 8826 }, { "epoch": 0.03907654167957855, "grad_norm": 2.8276701834006115, "learning_rate": 3.907654167957856e-06, "loss": 0.7855, "step": 8827 }, { "epoch": 0.03908096861304174, "grad_norm": 2.9457421967427404, "learning_rate": 3.908096861304175e-06, "loss": 0.7186, "step": 8828 }, { "epoch": 0.039085395546504934, "grad_norm": 2.8423219447502697, "learning_rate": 3.908539554650494e-06, "loss": 0.5702, "step": 8829 }, { "epoch": 0.039089822479968124, "grad_norm": 2.60997097858749, "learning_rate": 3.908982247996813e-06, "loss": 0.5859, "step": 8830 }, { "epoch": 0.039094249413431315, "grad_norm": 3.2488721530360074, "learning_rate": 3.909424941343132e-06, "loss": 0.7709, "step": 8831 }, { "epoch": 0.039098676346894505, "grad_norm": 2.6665543737778434, "learning_rate": 3.909867634689451e-06, "loss": 0.7621, "step": 8832 }, { "epoch": 0.039103103280357696, "grad_norm": 3.3489992956750565, "learning_rate": 3.91031032803577e-06, "loss": 0.853, "step": 8833 }, { "epoch": 0.039107530213820886, "grad_norm": 3.3040330408314675, "learning_rate": 3.9107530213820886e-06, "loss": 0.8357, "step": 8834 }, { "epoch": 0.03911195714728408, "grad_norm": 3.2573374061653495, "learning_rate": 3.911195714728408e-06, "loss": 0.9309, "step": 8835 }, { "epoch": 0.03911638408074727, "grad_norm": 2.691653697916426, "learning_rate": 3.911638408074727e-06, "loss": 0.7047, "step": 8836 }, { "epoch": 0.03912081101421046, "grad_norm": 3.576256971899002, "learning_rate": 3.912081101421046e-06, "loss": 0.769, "step": 8837 }, { "epoch": 0.03912523794767365, "grad_norm": 2.880554395930091, "learning_rate": 3.912523794767365e-06, "loss": 0.654, "step": 8838 }, { "epoch": 0.03912966488113684, "grad_norm": 3.2916433810446155, "learning_rate": 3.9129664881136846e-06, "loss": 0.6317, "step": 8839 }, { "epoch": 0.03913409181460003, "grad_norm": 4.418068818230788, "learning_rate": 3.913409181460003e-06, "loss": 1.084, "step": 8840 }, { "epoch": 0.03913851874806322, "grad_norm": 3.1618620921449363, "learning_rate": 3.913851874806322e-06, "loss": 0.7642, "step": 8841 }, { "epoch": 0.0391429456815264, "grad_norm": 2.489261358130456, "learning_rate": 3.914294568152641e-06, "loss": 0.5559, "step": 8842 }, { "epoch": 0.039147372614989594, "grad_norm": 2.899155786421266, "learning_rate": 3.9147372614989595e-06, "loss": 0.5527, "step": 8843 }, { "epoch": 0.039151799548452784, "grad_norm": 2.865199863134137, "learning_rate": 3.915179954845279e-06, "loss": 1.0403, "step": 8844 }, { "epoch": 0.039156226481915975, "grad_norm": 2.7866492364626416, "learning_rate": 3.915622648191598e-06, "loss": 0.8627, "step": 8845 }, { "epoch": 0.039160653415379165, "grad_norm": 2.6550821068084867, "learning_rate": 3.916065341537917e-06, "loss": 0.7195, "step": 8846 }, { "epoch": 0.039165080348842356, "grad_norm": 2.577690237796825, "learning_rate": 3.916508034884236e-06, "loss": 0.5627, "step": 8847 }, { "epoch": 0.039169507282305546, "grad_norm": 2.4563338493730216, "learning_rate": 3.9169507282305555e-06, "loss": 0.8553, "step": 8848 }, { "epoch": 0.03917393421576874, "grad_norm": 3.0928788049289144, "learning_rate": 3.917393421576874e-06, "loss": 1.0214, "step": 8849 }, { "epoch": 0.03917836114923193, "grad_norm": 2.8718711317055727, "learning_rate": 3.917836114923193e-06, "loss": 0.8199, "step": 8850 }, { "epoch": 0.03918278808269512, "grad_norm": 2.8753892599596944, "learning_rate": 3.918278808269512e-06, "loss": 0.5548, "step": 8851 }, { "epoch": 0.03918721501615831, "grad_norm": 2.879742049012788, "learning_rate": 3.9187215016158305e-06, "loss": 0.7723, "step": 8852 }, { "epoch": 0.0391916419496215, "grad_norm": 2.5648893786124285, "learning_rate": 3.91916419496215e-06, "loss": 0.7987, "step": 8853 }, { "epoch": 0.03919606888308469, "grad_norm": 3.1397324684697807, "learning_rate": 3.919606888308469e-06, "loss": 0.9743, "step": 8854 }, { "epoch": 0.03920049581654788, "grad_norm": 2.3653014118042393, "learning_rate": 3.920049581654788e-06, "loss": 0.6742, "step": 8855 }, { "epoch": 0.03920492275001107, "grad_norm": 2.796916069259584, "learning_rate": 3.920492275001107e-06, "loss": 0.8655, "step": 8856 }, { "epoch": 0.03920934968347426, "grad_norm": 3.9443057087924593, "learning_rate": 3.9209349683474265e-06, "loss": 1.0675, "step": 8857 }, { "epoch": 0.039213776616937444, "grad_norm": 2.463233846429711, "learning_rate": 3.921377661693745e-06, "loss": 0.5666, "step": 8858 }, { "epoch": 0.039218203550400635, "grad_norm": 2.610220062533697, "learning_rate": 3.921820355040064e-06, "loss": 0.535, "step": 8859 }, { "epoch": 0.039222630483863825, "grad_norm": 3.0727111880533395, "learning_rate": 3.922263048386383e-06, "loss": 0.9112, "step": 8860 }, { "epoch": 0.039227057417327016, "grad_norm": 3.2238935791843684, "learning_rate": 3.922705741732702e-06, "loss": 1.2102, "step": 8861 }, { "epoch": 0.039231484350790206, "grad_norm": 2.7259190925863237, "learning_rate": 3.923148435079021e-06, "loss": 0.8665, "step": 8862 }, { "epoch": 0.0392359112842534, "grad_norm": 3.209910139602704, "learning_rate": 3.92359112842534e-06, "loss": 0.8285, "step": 8863 }, { "epoch": 0.03924033821771659, "grad_norm": 2.841348370372252, "learning_rate": 3.924033821771659e-06, "loss": 0.5693, "step": 8864 }, { "epoch": 0.03924476515117978, "grad_norm": 2.74405349714705, "learning_rate": 3.924476515117978e-06, "loss": 0.7565, "step": 8865 }, { "epoch": 0.03924919208464297, "grad_norm": 2.831432068085497, "learning_rate": 3.924919208464297e-06, "loss": 0.4178, "step": 8866 }, { "epoch": 0.03925361901810616, "grad_norm": 3.367050740632973, "learning_rate": 3.925361901810616e-06, "loss": 0.9636, "step": 8867 }, { "epoch": 0.03925804595156935, "grad_norm": 3.2085056717635916, "learning_rate": 3.925804595156935e-06, "loss": 0.7382, "step": 8868 }, { "epoch": 0.03926247288503254, "grad_norm": 3.219896885036398, "learning_rate": 3.926247288503255e-06, "loss": 0.9925, "step": 8869 }, { "epoch": 0.03926689981849573, "grad_norm": 3.199020043137533, "learning_rate": 3.926689981849573e-06, "loss": 0.5463, "step": 8870 }, { "epoch": 0.03927132675195892, "grad_norm": 3.057352161329629, "learning_rate": 3.927132675195892e-06, "loss": 0.6378, "step": 8871 }, { "epoch": 0.03927575368542211, "grad_norm": 3.301015359478351, "learning_rate": 3.927575368542211e-06, "loss": 0.9791, "step": 8872 }, { "epoch": 0.039280180618885295, "grad_norm": 2.7234323462562275, "learning_rate": 3.92801806188853e-06, "loss": 0.8704, "step": 8873 }, { "epoch": 0.039284607552348486, "grad_norm": 2.8475991236673512, "learning_rate": 3.928460755234849e-06, "loss": 0.918, "step": 8874 }, { "epoch": 0.039289034485811676, "grad_norm": 2.458980154492667, "learning_rate": 3.928903448581168e-06, "loss": 0.6417, "step": 8875 }, { "epoch": 0.03929346141927487, "grad_norm": 2.8713252031673098, "learning_rate": 3.929346141927487e-06, "loss": 0.8297, "step": 8876 }, { "epoch": 0.03929788835273806, "grad_norm": 2.3023863839822396, "learning_rate": 3.929788835273806e-06, "loss": 0.5426, "step": 8877 }, { "epoch": 0.03930231528620125, "grad_norm": 2.596447659460943, "learning_rate": 3.930231528620126e-06, "loss": 0.6781, "step": 8878 }, { "epoch": 0.03930674221966444, "grad_norm": 2.444767232932344, "learning_rate": 3.930674221966444e-06, "loss": 0.6886, "step": 8879 }, { "epoch": 0.03931116915312763, "grad_norm": 3.1475703644035606, "learning_rate": 3.9311169153127635e-06, "loss": 0.6353, "step": 8880 }, { "epoch": 0.03931559608659082, "grad_norm": 3.17112769712868, "learning_rate": 3.931559608659082e-06, "loss": 0.9939, "step": 8881 }, { "epoch": 0.03932002302005401, "grad_norm": 2.7464855262662766, "learning_rate": 3.932002302005401e-06, "loss": 0.9959, "step": 8882 }, { "epoch": 0.0393244499535172, "grad_norm": 3.6924535178647617, "learning_rate": 3.93244499535172e-06, "loss": 1.0696, "step": 8883 }, { "epoch": 0.03932887688698039, "grad_norm": 2.7141791671944744, "learning_rate": 3.932887688698039e-06, "loss": 0.6851, "step": 8884 }, { "epoch": 0.03933330382044358, "grad_norm": 2.7705232575377505, "learning_rate": 3.933330382044358e-06, "loss": 0.7141, "step": 8885 }, { "epoch": 0.03933773075390677, "grad_norm": 3.4030702837048565, "learning_rate": 3.933773075390677e-06, "loss": 0.9686, "step": 8886 }, { "epoch": 0.03934215768736996, "grad_norm": 2.9704961102611613, "learning_rate": 3.9342157687369966e-06, "loss": 0.7203, "step": 8887 }, { "epoch": 0.039346584620833146, "grad_norm": 4.529019593982352, "learning_rate": 3.934658462083315e-06, "loss": 0.9918, "step": 8888 }, { "epoch": 0.039351011554296336, "grad_norm": 2.438811831153057, "learning_rate": 3.9351011554296345e-06, "loss": 0.6957, "step": 8889 }, { "epoch": 0.03935543848775953, "grad_norm": 3.9709671716190393, "learning_rate": 3.935543848775953e-06, "loss": 0.8378, "step": 8890 }, { "epoch": 0.03935986542122272, "grad_norm": 3.2413577952414254, "learning_rate": 3.935986542122272e-06, "loss": 0.8128, "step": 8891 }, { "epoch": 0.03936429235468591, "grad_norm": 3.6145347595475497, "learning_rate": 3.936429235468591e-06, "loss": 0.9047, "step": 8892 }, { "epoch": 0.0393687192881491, "grad_norm": 2.675336572616572, "learning_rate": 3.93687192881491e-06, "loss": 0.669, "step": 8893 }, { "epoch": 0.03937314622161229, "grad_norm": 2.7332052879720155, "learning_rate": 3.937314622161229e-06, "loss": 0.7845, "step": 8894 }, { "epoch": 0.03937757315507548, "grad_norm": 2.415772391656132, "learning_rate": 3.937757315507548e-06, "loss": 0.7193, "step": 8895 }, { "epoch": 0.03938200008853867, "grad_norm": 2.8216705340481973, "learning_rate": 3.9382000088538675e-06, "loss": 0.9422, "step": 8896 }, { "epoch": 0.03938642702200186, "grad_norm": 3.364116927141922, "learning_rate": 3.938642702200186e-06, "loss": 0.776, "step": 8897 }, { "epoch": 0.03939085395546505, "grad_norm": 4.025921190278192, "learning_rate": 3.939085395546505e-06, "loss": 1.1329, "step": 8898 }, { "epoch": 0.03939528088892824, "grad_norm": 3.083481099260557, "learning_rate": 3.939528088892825e-06, "loss": 0.8786, "step": 8899 }, { "epoch": 0.03939970782239143, "grad_norm": 2.7900477560170023, "learning_rate": 3.939970782239143e-06, "loss": 0.9095, "step": 8900 }, { "epoch": 0.03940413475585462, "grad_norm": 2.4804693317192616, "learning_rate": 3.940413475585463e-06, "loss": 0.6478, "step": 8901 }, { "epoch": 0.03940856168931781, "grad_norm": 2.5265930445193328, "learning_rate": 3.940856168931781e-06, "loss": 0.6753, "step": 8902 }, { "epoch": 0.039412988622780996, "grad_norm": 2.8002890532429725, "learning_rate": 3.9412988622781e-06, "loss": 0.6136, "step": 8903 }, { "epoch": 0.03941741555624419, "grad_norm": 2.829370808898779, "learning_rate": 3.941741555624419e-06, "loss": 0.7127, "step": 8904 }, { "epoch": 0.03942184248970738, "grad_norm": 3.5090368502439686, "learning_rate": 3.9421842489707385e-06, "loss": 0.9821, "step": 8905 }, { "epoch": 0.03942626942317057, "grad_norm": 2.0085521912472277, "learning_rate": 3.942626942317057e-06, "loss": 0.4194, "step": 8906 }, { "epoch": 0.03943069635663376, "grad_norm": 2.7538492260055745, "learning_rate": 3.943069635663376e-06, "loss": 0.7105, "step": 8907 }, { "epoch": 0.03943512329009695, "grad_norm": 2.9129289206273494, "learning_rate": 3.943512329009696e-06, "loss": 0.8369, "step": 8908 }, { "epoch": 0.03943955022356014, "grad_norm": 3.0695966317246803, "learning_rate": 3.943955022356014e-06, "loss": 0.8917, "step": 8909 }, { "epoch": 0.03944397715702333, "grad_norm": 2.632262596733016, "learning_rate": 3.944397715702334e-06, "loss": 0.3213, "step": 8910 }, { "epoch": 0.03944840409048652, "grad_norm": 2.8378925833476396, "learning_rate": 3.944840409048652e-06, "loss": 0.6536, "step": 8911 }, { "epoch": 0.03945283102394971, "grad_norm": 2.7684576248289003, "learning_rate": 3.9452831023949715e-06, "loss": 0.6317, "step": 8912 }, { "epoch": 0.0394572579574129, "grad_norm": 3.852929335603023, "learning_rate": 3.94572579574129e-06, "loss": 1.0423, "step": 8913 }, { "epoch": 0.03946168489087609, "grad_norm": 2.7947536120250644, "learning_rate": 3.946168489087609e-06, "loss": 0.9325, "step": 8914 }, { "epoch": 0.03946611182433928, "grad_norm": 2.669952188876525, "learning_rate": 3.946611182433928e-06, "loss": 0.8187, "step": 8915 }, { "epoch": 0.03947053875780247, "grad_norm": 3.6613820220731785, "learning_rate": 3.947053875780247e-06, "loss": 0.8103, "step": 8916 }, { "epoch": 0.039474965691265664, "grad_norm": 2.6096300109545885, "learning_rate": 3.947496569126567e-06, "loss": 0.6818, "step": 8917 }, { "epoch": 0.03947939262472885, "grad_norm": 3.2247947276952194, "learning_rate": 3.947939262472885e-06, "loss": 0.6769, "step": 8918 }, { "epoch": 0.03948381955819204, "grad_norm": 3.2327573309774964, "learning_rate": 3.9483819558192046e-06, "loss": 0.8819, "step": 8919 }, { "epoch": 0.03948824649165523, "grad_norm": 4.398759111018459, "learning_rate": 3.948824649165524e-06, "loss": 0.8101, "step": 8920 }, { "epoch": 0.03949267342511842, "grad_norm": 2.9966317386392087, "learning_rate": 3.9492673425118425e-06, "loss": 0.6048, "step": 8921 }, { "epoch": 0.03949710035858161, "grad_norm": 3.368839009075189, "learning_rate": 3.949710035858161e-06, "loss": 0.9013, "step": 8922 }, { "epoch": 0.0395015272920448, "grad_norm": 3.1432492559778984, "learning_rate": 3.95015272920448e-06, "loss": 1.0448, "step": 8923 }, { "epoch": 0.03950595422550799, "grad_norm": 3.5152008241751567, "learning_rate": 3.950595422550799e-06, "loss": 0.7822, "step": 8924 }, { "epoch": 0.03951038115897118, "grad_norm": 2.9770412450191075, "learning_rate": 3.951038115897118e-06, "loss": 0.7063, "step": 8925 }, { "epoch": 0.03951480809243437, "grad_norm": 3.283398004768466, "learning_rate": 3.951480809243438e-06, "loss": 0.8584, "step": 8926 }, { "epoch": 0.03951923502589756, "grad_norm": 2.71226504158968, "learning_rate": 3.951923502589756e-06, "loss": 0.7782, "step": 8927 }, { "epoch": 0.03952366195936075, "grad_norm": 2.747731472088396, "learning_rate": 3.9523661959360755e-06, "loss": 0.9392, "step": 8928 }, { "epoch": 0.03952808889282394, "grad_norm": 2.627025949406292, "learning_rate": 3.952808889282395e-06, "loss": 0.5888, "step": 8929 }, { "epoch": 0.03953251582628713, "grad_norm": 2.6826257519197854, "learning_rate": 3.953251582628713e-06, "loss": 0.7537, "step": 8930 }, { "epoch": 0.039536942759750324, "grad_norm": 2.928130096835106, "learning_rate": 3.953694275975033e-06, "loss": 0.8796, "step": 8931 }, { "epoch": 0.039541369693213514, "grad_norm": 2.8114355375862647, "learning_rate": 3.954136969321351e-06, "loss": 1.0164, "step": 8932 }, { "epoch": 0.0395457966266767, "grad_norm": 2.810857770625599, "learning_rate": 3.95457966266767e-06, "loss": 0.4859, "step": 8933 }, { "epoch": 0.03955022356013989, "grad_norm": 4.19840524207051, "learning_rate": 3.955022356013989e-06, "loss": 1.2053, "step": 8934 }, { "epoch": 0.03955465049360308, "grad_norm": 2.8505346830056526, "learning_rate": 3.9554650493603086e-06, "loss": 0.9404, "step": 8935 }, { "epoch": 0.03955907742706627, "grad_norm": 3.1612740169967832, "learning_rate": 3.955907742706627e-06, "loss": 0.9108, "step": 8936 }, { "epoch": 0.03956350436052946, "grad_norm": 3.2088867937507795, "learning_rate": 3.9563504360529465e-06, "loss": 1.0053, "step": 8937 }, { "epoch": 0.03956793129399265, "grad_norm": 2.5111340803016655, "learning_rate": 3.956793129399266e-06, "loss": 0.6719, "step": 8938 }, { "epoch": 0.03957235822745584, "grad_norm": 3.2608273863673194, "learning_rate": 3.957235822745584e-06, "loss": 1.0914, "step": 8939 }, { "epoch": 0.03957678516091903, "grad_norm": 2.5559815156198114, "learning_rate": 3.957678516091904e-06, "loss": 0.5674, "step": 8940 }, { "epoch": 0.03958121209438222, "grad_norm": 3.120394002387, "learning_rate": 3.958121209438222e-06, "loss": 0.8228, "step": 8941 }, { "epoch": 0.03958563902784541, "grad_norm": 4.39635397361544, "learning_rate": 3.958563902784542e-06, "loss": 1.3025, "step": 8942 }, { "epoch": 0.0395900659613086, "grad_norm": 2.504794734400767, "learning_rate": 3.95900659613086e-06, "loss": 0.5812, "step": 8943 }, { "epoch": 0.03959449289477179, "grad_norm": 3.054042961184339, "learning_rate": 3.9594492894771795e-06, "loss": 0.9583, "step": 8944 }, { "epoch": 0.039598919828234984, "grad_norm": 2.6212484751896996, "learning_rate": 3.959891982823498e-06, "loss": 0.8738, "step": 8945 }, { "epoch": 0.039603346761698174, "grad_norm": 2.6375652788407216, "learning_rate": 3.960334676169817e-06, "loss": 0.7522, "step": 8946 }, { "epoch": 0.039607773695161365, "grad_norm": 3.6151581916538404, "learning_rate": 3.960777369516137e-06, "loss": 1.1369, "step": 8947 }, { "epoch": 0.03961220062862455, "grad_norm": 2.454784208736992, "learning_rate": 3.961220062862455e-06, "loss": 0.7186, "step": 8948 }, { "epoch": 0.03961662756208774, "grad_norm": 2.970840126760207, "learning_rate": 3.961662756208775e-06, "loss": 1.0989, "step": 8949 }, { "epoch": 0.03962105449555093, "grad_norm": 2.5253883658950147, "learning_rate": 3.962105449555094e-06, "loss": 0.6455, "step": 8950 }, { "epoch": 0.03962548142901412, "grad_norm": 3.1247370440188322, "learning_rate": 3.9625481429014126e-06, "loss": 0.9427, "step": 8951 }, { "epoch": 0.03962990836247731, "grad_norm": 2.897489365553299, "learning_rate": 3.962990836247731e-06, "loss": 0.8313, "step": 8952 }, { "epoch": 0.0396343352959405, "grad_norm": 3.687728600416479, "learning_rate": 3.9634335295940505e-06, "loss": 1.251, "step": 8953 }, { "epoch": 0.03963876222940369, "grad_norm": 3.3948235414669763, "learning_rate": 3.963876222940369e-06, "loss": 1.2515, "step": 8954 }, { "epoch": 0.03964318916286688, "grad_norm": 2.9546116785137793, "learning_rate": 3.964318916286688e-06, "loss": 0.8553, "step": 8955 }, { "epoch": 0.03964761609633007, "grad_norm": 2.6658733581640064, "learning_rate": 3.964761609633008e-06, "loss": 0.7369, "step": 8956 }, { "epoch": 0.03965204302979326, "grad_norm": 3.1779005333493826, "learning_rate": 3.965204302979326e-06, "loss": 0.6963, "step": 8957 }, { "epoch": 0.039656469963256454, "grad_norm": 2.752436870112405, "learning_rate": 3.965646996325646e-06, "loss": 0.7962, "step": 8958 }, { "epoch": 0.039660896896719644, "grad_norm": 3.389949225846612, "learning_rate": 3.966089689671965e-06, "loss": 0.961, "step": 8959 }, { "epoch": 0.039665323830182835, "grad_norm": 3.304851103655073, "learning_rate": 3.9665323830182835e-06, "loss": 1.0778, "step": 8960 }, { "epoch": 0.039669750763646025, "grad_norm": 2.4977914715252516, "learning_rate": 3.966975076364603e-06, "loss": 0.8093, "step": 8961 }, { "epoch": 0.039674177697109216, "grad_norm": 3.2696495640883625, "learning_rate": 3.967417769710921e-06, "loss": 1.0219, "step": 8962 }, { "epoch": 0.0396786046305724, "grad_norm": 3.183605697031841, "learning_rate": 3.967860463057241e-06, "loss": 0.5942, "step": 8963 }, { "epoch": 0.03968303156403559, "grad_norm": 2.996487633349161, "learning_rate": 3.968303156403559e-06, "loss": 0.6768, "step": 8964 }, { "epoch": 0.03968745849749878, "grad_norm": 2.614752913464397, "learning_rate": 3.968745849749879e-06, "loss": 0.8455, "step": 8965 }, { "epoch": 0.03969188543096197, "grad_norm": 2.4750166812354673, "learning_rate": 3.969188543096197e-06, "loss": 0.6156, "step": 8966 }, { "epoch": 0.03969631236442516, "grad_norm": 2.524902990146927, "learning_rate": 3.9696312364425166e-06, "loss": 0.6823, "step": 8967 }, { "epoch": 0.03970073929788835, "grad_norm": 3.408160492891061, "learning_rate": 3.970073929788836e-06, "loss": 0.7982, "step": 8968 }, { "epoch": 0.03970516623135154, "grad_norm": 2.5093420642859234, "learning_rate": 3.9705166231351545e-06, "loss": 0.7022, "step": 8969 }, { "epoch": 0.03970959316481473, "grad_norm": 3.1575493141364714, "learning_rate": 3.970959316481474e-06, "loss": 0.8226, "step": 8970 }, { "epoch": 0.03971402009827792, "grad_norm": 2.963077348811293, "learning_rate": 3.971402009827792e-06, "loss": 0.9978, "step": 8971 }, { "epoch": 0.039718447031741114, "grad_norm": 2.6854214801933107, "learning_rate": 3.971844703174112e-06, "loss": 0.7036, "step": 8972 }, { "epoch": 0.039722873965204304, "grad_norm": 2.8383716602844187, "learning_rate": 3.97228739652043e-06, "loss": 0.8511, "step": 8973 }, { "epoch": 0.039727300898667495, "grad_norm": 2.794471538427401, "learning_rate": 3.97273008986675e-06, "loss": 0.795, "step": 8974 }, { "epoch": 0.039731727832130685, "grad_norm": 2.744071909515808, "learning_rate": 3.973172783213068e-06, "loss": 0.8102, "step": 8975 }, { "epoch": 0.039736154765593876, "grad_norm": 3.2202927698625623, "learning_rate": 3.9736154765593875e-06, "loss": 0.7813, "step": 8976 }, { "epoch": 0.039740581699057066, "grad_norm": 2.793868977379155, "learning_rate": 3.974058169905707e-06, "loss": 0.7897, "step": 8977 }, { "epoch": 0.03974500863252025, "grad_norm": 3.425235353367778, "learning_rate": 3.974500863252025e-06, "loss": 0.9751, "step": 8978 }, { "epoch": 0.03974943556598344, "grad_norm": 3.44169251069921, "learning_rate": 3.974943556598345e-06, "loss": 1.0874, "step": 8979 }, { "epoch": 0.03975386249944663, "grad_norm": 3.626553051289692, "learning_rate": 3.975386249944664e-06, "loss": 1.1228, "step": 8980 }, { "epoch": 0.03975828943290982, "grad_norm": 3.0532776517849696, "learning_rate": 3.975828943290983e-06, "loss": 0.6899, "step": 8981 }, { "epoch": 0.03976271636637301, "grad_norm": 3.24131300249151, "learning_rate": 3.976271636637302e-06, "loss": 0.9051, "step": 8982 }, { "epoch": 0.0397671432998362, "grad_norm": 2.546242959383825, "learning_rate": 3.9767143299836206e-06, "loss": 0.8182, "step": 8983 }, { "epoch": 0.03977157023329939, "grad_norm": 2.6053257398296195, "learning_rate": 3.977157023329939e-06, "loss": 0.5914, "step": 8984 }, { "epoch": 0.03977599716676258, "grad_norm": 3.1884641238081306, "learning_rate": 3.9775997166762585e-06, "loss": 0.9767, "step": 8985 }, { "epoch": 0.039780424100225774, "grad_norm": 3.0873554123415867, "learning_rate": 3.978042410022578e-06, "loss": 0.7723, "step": 8986 }, { "epoch": 0.039784851033688964, "grad_norm": 3.0120464841360404, "learning_rate": 3.978485103368896e-06, "loss": 0.842, "step": 8987 }, { "epoch": 0.039789277967152155, "grad_norm": 3.492323644805827, "learning_rate": 3.978927796715216e-06, "loss": 0.7265, "step": 8988 }, { "epoch": 0.039793704900615345, "grad_norm": 3.797079740084096, "learning_rate": 3.979370490061535e-06, "loss": 1.0203, "step": 8989 }, { "epoch": 0.039798131834078536, "grad_norm": 2.399240775226593, "learning_rate": 3.979813183407854e-06, "loss": 0.5016, "step": 8990 }, { "epoch": 0.039802558767541726, "grad_norm": 2.6453793097772924, "learning_rate": 3.980255876754173e-06, "loss": 0.5462, "step": 8991 }, { "epoch": 0.03980698570100492, "grad_norm": 3.0115762295684774, "learning_rate": 3.9806985701004915e-06, "loss": 0.6555, "step": 8992 }, { "epoch": 0.03981141263446811, "grad_norm": 3.0703287840841442, "learning_rate": 3.981141263446811e-06, "loss": 0.783, "step": 8993 }, { "epoch": 0.03981583956793129, "grad_norm": 2.539517364448591, "learning_rate": 3.981583956793129e-06, "loss": 0.7573, "step": 8994 }, { "epoch": 0.03982026650139448, "grad_norm": 2.7408851136820442, "learning_rate": 3.982026650139449e-06, "loss": 0.6505, "step": 8995 }, { "epoch": 0.03982469343485767, "grad_norm": 3.03350785263478, "learning_rate": 3.982469343485767e-06, "loss": 0.7394, "step": 8996 }, { "epoch": 0.03982912036832086, "grad_norm": 2.8645244659522557, "learning_rate": 3.982912036832087e-06, "loss": 1.08, "step": 8997 }, { "epoch": 0.03983354730178405, "grad_norm": 2.8952096005848893, "learning_rate": 3.983354730178406e-06, "loss": 0.6094, "step": 8998 }, { "epoch": 0.039837974235247244, "grad_norm": 3.1357276721689904, "learning_rate": 3.983797423524725e-06, "loss": 0.7967, "step": 8999 }, { "epoch": 0.039842401168710434, "grad_norm": 2.2871863108176034, "learning_rate": 3.984240116871044e-06, "loss": 0.4368, "step": 9000 }, { "epoch": 0.039846828102173625, "grad_norm": 2.4962740647139454, "learning_rate": 3.984682810217363e-06, "loss": 0.6798, "step": 9001 }, { "epoch": 0.039851255035636815, "grad_norm": 2.7630061996055693, "learning_rate": 3.985125503563682e-06, "loss": 0.7365, "step": 9002 }, { "epoch": 0.039855681969100006, "grad_norm": 4.056072939932868, "learning_rate": 3.98556819691e-06, "loss": 0.8532, "step": 9003 }, { "epoch": 0.039860108902563196, "grad_norm": 3.0601852050508507, "learning_rate": 3.98601089025632e-06, "loss": 0.7531, "step": 9004 }, { "epoch": 0.03986453583602639, "grad_norm": 3.098573983380737, "learning_rate": 3.986453583602638e-06, "loss": 1.0318, "step": 9005 }, { "epoch": 0.03986896276948958, "grad_norm": 3.2566476207376116, "learning_rate": 3.986896276948958e-06, "loss": 0.8661, "step": 9006 }, { "epoch": 0.03987338970295277, "grad_norm": 2.536531174069205, "learning_rate": 3.987338970295277e-06, "loss": 0.7338, "step": 9007 }, { "epoch": 0.03987781663641596, "grad_norm": 2.550310739656493, "learning_rate": 3.9877816636415955e-06, "loss": 0.7156, "step": 9008 }, { "epoch": 0.03988224356987914, "grad_norm": 3.0288656973457133, "learning_rate": 3.988224356987915e-06, "loss": 0.9397, "step": 9009 }, { "epoch": 0.03988667050334233, "grad_norm": 2.9685940031115616, "learning_rate": 3.988667050334234e-06, "loss": 0.7847, "step": 9010 }, { "epoch": 0.03989109743680552, "grad_norm": 2.8225764828987767, "learning_rate": 3.989109743680553e-06, "loss": 0.6789, "step": 9011 }, { "epoch": 0.03989552437026871, "grad_norm": 2.545223591288932, "learning_rate": 3.989552437026872e-06, "loss": 0.809, "step": 9012 }, { "epoch": 0.039899951303731904, "grad_norm": 3.9662746551883714, "learning_rate": 3.989995130373191e-06, "loss": 0.8687, "step": 9013 }, { "epoch": 0.039904378237195094, "grad_norm": 2.612049210876635, "learning_rate": 3.990437823719509e-06, "loss": 0.7644, "step": 9014 }, { "epoch": 0.039908805170658285, "grad_norm": 3.122105211022773, "learning_rate": 3.990880517065829e-06, "loss": 0.7405, "step": 9015 }, { "epoch": 0.039913232104121475, "grad_norm": 3.028257391016196, "learning_rate": 3.991323210412148e-06, "loss": 0.7402, "step": 9016 }, { "epoch": 0.039917659037584666, "grad_norm": 2.735979340649022, "learning_rate": 3.9917659037584665e-06, "loss": 0.6918, "step": 9017 }, { "epoch": 0.039922085971047856, "grad_norm": 2.6475586497809225, "learning_rate": 3.992208597104786e-06, "loss": 0.4544, "step": 9018 }, { "epoch": 0.03992651290451105, "grad_norm": 2.691000035722202, "learning_rate": 3.992651290451105e-06, "loss": 0.8599, "step": 9019 }, { "epoch": 0.03993093983797424, "grad_norm": 3.0029702959610654, "learning_rate": 3.993093983797424e-06, "loss": 1.0109, "step": 9020 }, { "epoch": 0.03993536677143743, "grad_norm": 3.3941777871333505, "learning_rate": 3.993536677143743e-06, "loss": 0.8981, "step": 9021 }, { "epoch": 0.03993979370490062, "grad_norm": 2.6788082207820594, "learning_rate": 3.993979370490062e-06, "loss": 0.9979, "step": 9022 }, { "epoch": 0.03994422063836381, "grad_norm": 2.7392595959647807, "learning_rate": 3.994422063836381e-06, "loss": 0.6589, "step": 9023 }, { "epoch": 0.03994864757182699, "grad_norm": 2.7356387753363562, "learning_rate": 3.9948647571826995e-06, "loss": 1.0268, "step": 9024 }, { "epoch": 0.03995307450529018, "grad_norm": 2.658247865457686, "learning_rate": 3.995307450529019e-06, "loss": 0.7739, "step": 9025 }, { "epoch": 0.03995750143875337, "grad_norm": 2.5555555031229513, "learning_rate": 3.995750143875337e-06, "loss": 0.7995, "step": 9026 }, { "epoch": 0.039961928372216564, "grad_norm": 4.429047511262165, "learning_rate": 3.996192837221657e-06, "loss": 1.0966, "step": 9027 }, { "epoch": 0.039966355305679754, "grad_norm": 3.131087723735034, "learning_rate": 3.996635530567976e-06, "loss": 1.2638, "step": 9028 }, { "epoch": 0.039970782239142945, "grad_norm": 2.1841970029708224, "learning_rate": 3.997078223914295e-06, "loss": 0.5795, "step": 9029 }, { "epoch": 0.039975209172606135, "grad_norm": 2.5114636248455673, "learning_rate": 3.997520917260614e-06, "loss": 1.0969, "step": 9030 }, { "epoch": 0.039979636106069326, "grad_norm": 2.8881168151314074, "learning_rate": 3.9979636106069334e-06, "loss": 0.8989, "step": 9031 }, { "epoch": 0.039984063039532516, "grad_norm": 3.641759409381799, "learning_rate": 3.998406303953252e-06, "loss": 1.1094, "step": 9032 }, { "epoch": 0.03998848997299571, "grad_norm": 3.503048415395311, "learning_rate": 3.9988489972995705e-06, "loss": 1.0777, "step": 9033 }, { "epoch": 0.0399929169064589, "grad_norm": 3.6042819117163956, "learning_rate": 3.99929169064589e-06, "loss": 0.8055, "step": 9034 }, { "epoch": 0.03999734383992209, "grad_norm": 3.54852767059557, "learning_rate": 3.999734383992208e-06, "loss": 0.6647, "step": 9035 }, { "epoch": 0.04000177077338528, "grad_norm": 2.3878137652924076, "learning_rate": 4.000177077338528e-06, "loss": 0.7064, "step": 9036 }, { "epoch": 0.04000619770684847, "grad_norm": 3.9165020154126315, "learning_rate": 4.000619770684847e-06, "loss": 0.6325, "step": 9037 }, { "epoch": 0.04001062464031166, "grad_norm": 2.989807889018013, "learning_rate": 4.001062464031166e-06, "loss": 0.6529, "step": 9038 }, { "epoch": 0.04001505157377484, "grad_norm": 2.959085753366332, "learning_rate": 4.001505157377485e-06, "loss": 0.8185, "step": 9039 }, { "epoch": 0.040019478507238034, "grad_norm": 2.6139776671095296, "learning_rate": 4.001947850723804e-06, "loss": 0.8429, "step": 9040 }, { "epoch": 0.040023905440701224, "grad_norm": 3.416128459138535, "learning_rate": 4.002390544070123e-06, "loss": 0.755, "step": 9041 }, { "epoch": 0.040028332374164415, "grad_norm": 2.9957632336748357, "learning_rate": 4.002833237416442e-06, "loss": 0.8006, "step": 9042 }, { "epoch": 0.040032759307627605, "grad_norm": 2.73236551649719, "learning_rate": 4.003275930762761e-06, "loss": 0.6467, "step": 9043 }, { "epoch": 0.040037186241090796, "grad_norm": 2.8691140376368547, "learning_rate": 4.00371862410908e-06, "loss": 0.9363, "step": 9044 }, { "epoch": 0.040041613174553986, "grad_norm": 3.226150868566697, "learning_rate": 4.004161317455399e-06, "loss": 0.7459, "step": 9045 }, { "epoch": 0.04004604010801718, "grad_norm": 3.156514365079793, "learning_rate": 4.004604010801718e-06, "loss": 0.848, "step": 9046 }, { "epoch": 0.04005046704148037, "grad_norm": 2.9379165900949156, "learning_rate": 4.005046704148037e-06, "loss": 0.8569, "step": 9047 }, { "epoch": 0.04005489397494356, "grad_norm": 2.575540394874716, "learning_rate": 4.005489397494356e-06, "loss": 0.9481, "step": 9048 }, { "epoch": 0.04005932090840675, "grad_norm": 3.3965383797578776, "learning_rate": 4.005932090840675e-06, "loss": 1.0554, "step": 9049 }, { "epoch": 0.04006374784186994, "grad_norm": 3.085881187805075, "learning_rate": 4.006374784186994e-06, "loss": 0.6793, "step": 9050 }, { "epoch": 0.04006817477533313, "grad_norm": 2.781325541381274, "learning_rate": 4.006817477533313e-06, "loss": 0.773, "step": 9051 }, { "epoch": 0.04007260170879632, "grad_norm": 2.9969802698076338, "learning_rate": 4.007260170879632e-06, "loss": 0.9656, "step": 9052 }, { "epoch": 0.04007702864225951, "grad_norm": 2.6276127665778444, "learning_rate": 4.007702864225951e-06, "loss": 1.0017, "step": 9053 }, { "epoch": 0.040081455575722694, "grad_norm": 2.302727737664767, "learning_rate": 4.00814555757227e-06, "loss": 0.5596, "step": 9054 }, { "epoch": 0.040085882509185884, "grad_norm": 2.719745133656064, "learning_rate": 4.008588250918589e-06, "loss": 0.8645, "step": 9055 }, { "epoch": 0.040090309442649075, "grad_norm": 2.610064296719683, "learning_rate": 4.0090309442649075e-06, "loss": 0.4967, "step": 9056 }, { "epoch": 0.040094736376112265, "grad_norm": 2.857766216828635, "learning_rate": 4.009473637611227e-06, "loss": 0.7196, "step": 9057 }, { "epoch": 0.040099163309575456, "grad_norm": 4.159106085876264, "learning_rate": 4.009916330957546e-06, "loss": 1.0828, "step": 9058 }, { "epoch": 0.040103590243038646, "grad_norm": 2.940240386512923, "learning_rate": 4.010359024303865e-06, "loss": 0.7218, "step": 9059 }, { "epoch": 0.04010801717650184, "grad_norm": 2.7282866611512246, "learning_rate": 4.010801717650184e-06, "loss": 0.6428, "step": 9060 }, { "epoch": 0.04011244410996503, "grad_norm": 3.3759410346487746, "learning_rate": 4.0112444109965035e-06, "loss": 0.9039, "step": 9061 }, { "epoch": 0.04011687104342822, "grad_norm": 3.1521975506902042, "learning_rate": 4.011687104342822e-06, "loss": 0.6624, "step": 9062 }, { "epoch": 0.04012129797689141, "grad_norm": 3.2679107167401487, "learning_rate": 4.0121297976891414e-06, "loss": 0.8773, "step": 9063 }, { "epoch": 0.0401257249103546, "grad_norm": 3.9921092644207508, "learning_rate": 4.01257249103546e-06, "loss": 1.1329, "step": 9064 }, { "epoch": 0.04013015184381779, "grad_norm": 2.890488806512155, "learning_rate": 4.0130151843817785e-06, "loss": 1.0952, "step": 9065 }, { "epoch": 0.04013457877728098, "grad_norm": 3.0467536706937186, "learning_rate": 4.013457877728098e-06, "loss": 0.93, "step": 9066 }, { "epoch": 0.04013900571074417, "grad_norm": 3.1654303440984397, "learning_rate": 4.013900571074417e-06, "loss": 1.0325, "step": 9067 }, { "epoch": 0.04014343264420736, "grad_norm": 2.492862869806434, "learning_rate": 4.014343264420736e-06, "loss": 0.6657, "step": 9068 }, { "epoch": 0.040147859577670544, "grad_norm": 2.7894912330087167, "learning_rate": 4.014785957767055e-06, "loss": 0.8029, "step": 9069 }, { "epoch": 0.040152286511133735, "grad_norm": 2.5133943809457753, "learning_rate": 4.0152286511133745e-06, "loss": 0.5882, "step": 9070 }, { "epoch": 0.040156713444596925, "grad_norm": 2.792054714357484, "learning_rate": 4.015671344459693e-06, "loss": 0.7719, "step": 9071 }, { "epoch": 0.040161140378060116, "grad_norm": 3.2510180493768313, "learning_rate": 4.016114037806012e-06, "loss": 0.8519, "step": 9072 }, { "epoch": 0.040165567311523306, "grad_norm": 2.9903878517773297, "learning_rate": 4.016556731152331e-06, "loss": 0.9183, "step": 9073 }, { "epoch": 0.0401699942449865, "grad_norm": 2.995500409832371, "learning_rate": 4.01699942449865e-06, "loss": 1.0193, "step": 9074 }, { "epoch": 0.04017442117844969, "grad_norm": 3.629121063555162, "learning_rate": 4.017442117844969e-06, "loss": 0.7768, "step": 9075 }, { "epoch": 0.04017884811191288, "grad_norm": 3.094686184677363, "learning_rate": 4.017884811191288e-06, "loss": 0.645, "step": 9076 }, { "epoch": 0.04018327504537607, "grad_norm": 2.407194536545314, "learning_rate": 4.018327504537607e-06, "loss": 0.6655, "step": 9077 }, { "epoch": 0.04018770197883926, "grad_norm": 2.730126277971129, "learning_rate": 4.018770197883926e-06, "loss": 0.6217, "step": 9078 }, { "epoch": 0.04019212891230245, "grad_norm": 2.7794110271258314, "learning_rate": 4.0192128912302454e-06, "loss": 0.8259, "step": 9079 }, { "epoch": 0.04019655584576564, "grad_norm": 2.8837095176614684, "learning_rate": 4.019655584576564e-06, "loss": 0.6715, "step": 9080 }, { "epoch": 0.04020098277922883, "grad_norm": 2.737977219690257, "learning_rate": 4.020098277922883e-06, "loss": 0.8015, "step": 9081 }, { "epoch": 0.04020540971269202, "grad_norm": 2.80847543912325, "learning_rate": 4.020540971269203e-06, "loss": 0.5696, "step": 9082 }, { "epoch": 0.04020983664615521, "grad_norm": 3.3168491128585798, "learning_rate": 4.020983664615521e-06, "loss": 0.7942, "step": 9083 }, { "epoch": 0.040214263579618395, "grad_norm": 3.209576708482343, "learning_rate": 4.02142635796184e-06, "loss": 0.9338, "step": 9084 }, { "epoch": 0.040218690513081586, "grad_norm": 2.465289926709385, "learning_rate": 4.021869051308159e-06, "loss": 0.6068, "step": 9085 }, { "epoch": 0.040223117446544776, "grad_norm": 3.031810842433978, "learning_rate": 4.022311744654478e-06, "loss": 0.7171, "step": 9086 }, { "epoch": 0.04022754438000797, "grad_norm": 2.841793524888113, "learning_rate": 4.022754438000797e-06, "loss": 0.6831, "step": 9087 }, { "epoch": 0.04023197131347116, "grad_norm": 3.3580774520825787, "learning_rate": 4.023197131347116e-06, "loss": 0.8429, "step": 9088 }, { "epoch": 0.04023639824693435, "grad_norm": 2.5158428981848773, "learning_rate": 4.023639824693435e-06, "loss": 0.5994, "step": 9089 }, { "epoch": 0.04024082518039754, "grad_norm": 2.787071305395252, "learning_rate": 4.024082518039754e-06, "loss": 0.5779, "step": 9090 }, { "epoch": 0.04024525211386073, "grad_norm": 3.0766901451570043, "learning_rate": 4.024525211386074e-06, "loss": 0.8487, "step": 9091 }, { "epoch": 0.04024967904732392, "grad_norm": 3.164338764202694, "learning_rate": 4.024967904732392e-06, "loss": 0.923, "step": 9092 }, { "epoch": 0.04025410598078711, "grad_norm": 2.647996611830751, "learning_rate": 4.0254105980787115e-06, "loss": 0.6847, "step": 9093 }, { "epoch": 0.0402585329142503, "grad_norm": 2.7523331303210323, "learning_rate": 4.02585329142503e-06, "loss": 0.8358, "step": 9094 }, { "epoch": 0.04026295984771349, "grad_norm": 2.482305307318482, "learning_rate": 4.026295984771349e-06, "loss": 0.6324, "step": 9095 }, { "epoch": 0.04026738678117668, "grad_norm": 2.695468824910832, "learning_rate": 4.026738678117668e-06, "loss": 0.8924, "step": 9096 }, { "epoch": 0.04027181371463987, "grad_norm": 4.094279513124387, "learning_rate": 4.027181371463987e-06, "loss": 1.5055, "step": 9097 }, { "epoch": 0.04027624064810306, "grad_norm": 2.6064302553437417, "learning_rate": 4.027624064810306e-06, "loss": 0.66, "step": 9098 }, { "epoch": 0.040280667581566246, "grad_norm": 2.7186494099274507, "learning_rate": 4.028066758156625e-06, "loss": 0.8721, "step": 9099 }, { "epoch": 0.040285094515029436, "grad_norm": 2.5148507362476065, "learning_rate": 4.028509451502945e-06, "loss": 0.672, "step": 9100 }, { "epoch": 0.04028952144849263, "grad_norm": 3.13056834816192, "learning_rate": 4.028952144849263e-06, "loss": 0.8165, "step": 9101 }, { "epoch": 0.04029394838195582, "grad_norm": 3.289196173250471, "learning_rate": 4.0293948381955825e-06, "loss": 1.1082, "step": 9102 }, { "epoch": 0.04029837531541901, "grad_norm": 3.8471491558763335, "learning_rate": 4.029837531541901e-06, "loss": 1.121, "step": 9103 }, { "epoch": 0.0403028022488822, "grad_norm": 2.9260969474518803, "learning_rate": 4.03028022488822e-06, "loss": 0.8822, "step": 9104 }, { "epoch": 0.04030722918234539, "grad_norm": 2.8500377483833916, "learning_rate": 4.030722918234539e-06, "loss": 0.8904, "step": 9105 }, { "epoch": 0.04031165611580858, "grad_norm": 2.2930032845101938, "learning_rate": 4.031165611580858e-06, "loss": 0.4961, "step": 9106 }, { "epoch": 0.04031608304927177, "grad_norm": 2.432557696290942, "learning_rate": 4.031608304927177e-06, "loss": 0.6704, "step": 9107 }, { "epoch": 0.04032050998273496, "grad_norm": 2.6550682112133623, "learning_rate": 4.032050998273496e-06, "loss": 0.6932, "step": 9108 }, { "epoch": 0.04032493691619815, "grad_norm": 3.2941998855094097, "learning_rate": 4.0324936916198155e-06, "loss": 1.2293, "step": 9109 }, { "epoch": 0.04032936384966134, "grad_norm": 2.7025049069803186, "learning_rate": 4.032936384966134e-06, "loss": 0.8072, "step": 9110 }, { "epoch": 0.04033379078312453, "grad_norm": 3.0320590426835725, "learning_rate": 4.0333790783124534e-06, "loss": 0.8911, "step": 9111 }, { "epoch": 0.04033821771658772, "grad_norm": 2.393263198381947, "learning_rate": 4.033821771658773e-06, "loss": 0.8626, "step": 9112 }, { "epoch": 0.04034264465005091, "grad_norm": 2.9789414543023027, "learning_rate": 4.034264465005091e-06, "loss": 0.722, "step": 9113 }, { "epoch": 0.040347071583514096, "grad_norm": 3.286564433175087, "learning_rate": 4.03470715835141e-06, "loss": 0.9296, "step": 9114 }, { "epoch": 0.04035149851697729, "grad_norm": 2.5762013208804695, "learning_rate": 4.035149851697729e-06, "loss": 0.6856, "step": 9115 }, { "epoch": 0.04035592545044048, "grad_norm": 3.1483388688628824, "learning_rate": 4.035592545044048e-06, "loss": 0.7503, "step": 9116 }, { "epoch": 0.04036035238390367, "grad_norm": 2.6123734285384037, "learning_rate": 4.036035238390367e-06, "loss": 0.7619, "step": 9117 }, { "epoch": 0.04036477931736686, "grad_norm": 3.876234974188069, "learning_rate": 4.0364779317366865e-06, "loss": 1.0513, "step": 9118 }, { "epoch": 0.04036920625083005, "grad_norm": 2.63856621266257, "learning_rate": 4.036920625083005e-06, "loss": 0.6779, "step": 9119 }, { "epoch": 0.04037363318429324, "grad_norm": 3.131047395773274, "learning_rate": 4.037363318429324e-06, "loss": 1.2953, "step": 9120 }, { "epoch": 0.04037806011775643, "grad_norm": 3.5623648022848715, "learning_rate": 4.037806011775644e-06, "loss": 1.1552, "step": 9121 }, { "epoch": 0.04038248705121962, "grad_norm": 3.082074545598443, "learning_rate": 4.038248705121962e-06, "loss": 1.1386, "step": 9122 }, { "epoch": 0.04038691398468281, "grad_norm": 3.6818547247919517, "learning_rate": 4.038691398468282e-06, "loss": 1.1624, "step": 9123 }, { "epoch": 0.040391340918146, "grad_norm": 2.616023644748155, "learning_rate": 4.0391340918146e-06, "loss": 0.7415, "step": 9124 }, { "epoch": 0.04039576785160919, "grad_norm": 2.835055431113633, "learning_rate": 4.0395767851609195e-06, "loss": 0.6261, "step": 9125 }, { "epoch": 0.04040019478507238, "grad_norm": 2.7744085663418976, "learning_rate": 4.040019478507238e-06, "loss": 0.5232, "step": 9126 }, { "epoch": 0.04040462171853557, "grad_norm": 3.410534704240444, "learning_rate": 4.0404621718535574e-06, "loss": 1.1032, "step": 9127 }, { "epoch": 0.040409048651998764, "grad_norm": 2.9450765450509486, "learning_rate": 4.040904865199876e-06, "loss": 0.9921, "step": 9128 }, { "epoch": 0.04041347558546195, "grad_norm": 2.2589134446093886, "learning_rate": 4.041347558546195e-06, "loss": 0.3681, "step": 9129 }, { "epoch": 0.04041790251892514, "grad_norm": 2.2576312580574367, "learning_rate": 4.041790251892515e-06, "loss": 0.8045, "step": 9130 }, { "epoch": 0.04042232945238833, "grad_norm": 2.529725958095289, "learning_rate": 4.042232945238833e-06, "loss": 0.573, "step": 9131 }, { "epoch": 0.04042675638585152, "grad_norm": 2.7082859829662924, "learning_rate": 4.042675638585153e-06, "loss": 0.6322, "step": 9132 }, { "epoch": 0.04043118331931471, "grad_norm": 3.969311617192966, "learning_rate": 4.043118331931471e-06, "loss": 1.3303, "step": 9133 }, { "epoch": 0.0404356102527779, "grad_norm": 3.0710206935342295, "learning_rate": 4.0435610252777905e-06, "loss": 0.8106, "step": 9134 }, { "epoch": 0.04044003718624109, "grad_norm": 3.087814592931843, "learning_rate": 4.044003718624109e-06, "loss": 0.941, "step": 9135 }, { "epoch": 0.04044446411970428, "grad_norm": 2.5398496963998514, "learning_rate": 4.044446411970428e-06, "loss": 0.8912, "step": 9136 }, { "epoch": 0.04044889105316747, "grad_norm": 2.4617945559913226, "learning_rate": 4.044889105316747e-06, "loss": 0.668, "step": 9137 }, { "epoch": 0.04045331798663066, "grad_norm": 2.921150862529192, "learning_rate": 4.045331798663066e-06, "loss": 0.7324, "step": 9138 }, { "epoch": 0.04045774492009385, "grad_norm": 3.3307474506293433, "learning_rate": 4.045774492009386e-06, "loss": 0.9185, "step": 9139 }, { "epoch": 0.04046217185355704, "grad_norm": 3.099834509946038, "learning_rate": 4.046217185355704e-06, "loss": 0.957, "step": 9140 }, { "epoch": 0.04046659878702023, "grad_norm": 3.6283501756858003, "learning_rate": 4.0466598787020235e-06, "loss": 0.9962, "step": 9141 }, { "epoch": 0.040471025720483424, "grad_norm": 2.5921465964275248, "learning_rate": 4.047102572048343e-06, "loss": 0.6073, "step": 9142 }, { "epoch": 0.040475452653946614, "grad_norm": 2.5285260015892397, "learning_rate": 4.0475452653946614e-06, "loss": 0.6706, "step": 9143 }, { "epoch": 0.040479879587409805, "grad_norm": 3.6747748389455412, "learning_rate": 4.047987958740981e-06, "loss": 1.0906, "step": 9144 }, { "epoch": 0.04048430652087299, "grad_norm": 3.1501430104618, "learning_rate": 4.048430652087299e-06, "loss": 0.9234, "step": 9145 }, { "epoch": 0.04048873345433618, "grad_norm": 2.565437100455992, "learning_rate": 4.048873345433618e-06, "loss": 0.764, "step": 9146 }, { "epoch": 0.04049316038779937, "grad_norm": 3.3652323268735946, "learning_rate": 4.049316038779937e-06, "loss": 0.9338, "step": 9147 }, { "epoch": 0.04049758732126256, "grad_norm": 3.0502462597932793, "learning_rate": 4.049758732126257e-06, "loss": 0.9557, "step": 9148 }, { "epoch": 0.04050201425472575, "grad_norm": 3.2521313163858787, "learning_rate": 4.050201425472575e-06, "loss": 0.8496, "step": 9149 }, { "epoch": 0.04050644118818894, "grad_norm": 4.2770158284169755, "learning_rate": 4.0506441188188945e-06, "loss": 1.0804, "step": 9150 }, { "epoch": 0.04051086812165213, "grad_norm": 2.537434870338709, "learning_rate": 4.051086812165214e-06, "loss": 0.4854, "step": 9151 }, { "epoch": 0.04051529505511532, "grad_norm": 3.10963029297114, "learning_rate": 4.051529505511532e-06, "loss": 0.959, "step": 9152 }, { "epoch": 0.04051972198857851, "grad_norm": 2.8486036356435713, "learning_rate": 4.051972198857852e-06, "loss": 1.0105, "step": 9153 }, { "epoch": 0.0405241489220417, "grad_norm": 3.122393008819864, "learning_rate": 4.05241489220417e-06, "loss": 0.5173, "step": 9154 }, { "epoch": 0.04052857585550489, "grad_norm": 2.6371204127607757, "learning_rate": 4.05285758555049e-06, "loss": 0.8405, "step": 9155 }, { "epoch": 0.040533002788968084, "grad_norm": 2.707745997556161, "learning_rate": 4.053300278896808e-06, "loss": 0.5319, "step": 9156 }, { "epoch": 0.040537429722431274, "grad_norm": 2.6694647747376723, "learning_rate": 4.0537429722431275e-06, "loss": 0.9193, "step": 9157 }, { "epoch": 0.040541856655894465, "grad_norm": 2.713959736180637, "learning_rate": 4.054185665589446e-06, "loss": 0.8857, "step": 9158 }, { "epoch": 0.040546283589357655, "grad_norm": 2.959480969276418, "learning_rate": 4.0546283589357654e-06, "loss": 0.8366, "step": 9159 }, { "epoch": 0.04055071052282084, "grad_norm": 2.448848602272928, "learning_rate": 4.055071052282085e-06, "loss": 0.3985, "step": 9160 }, { "epoch": 0.04055513745628403, "grad_norm": 2.7407759779600838, "learning_rate": 4.055513745628403e-06, "loss": 0.6191, "step": 9161 }, { "epoch": 0.04055956438974722, "grad_norm": 2.715890996305074, "learning_rate": 4.055956438974723e-06, "loss": 0.9344, "step": 9162 }, { "epoch": 0.04056399132321041, "grad_norm": 2.949044954816681, "learning_rate": 4.056399132321042e-06, "loss": 0.6824, "step": 9163 }, { "epoch": 0.0405684182566736, "grad_norm": 3.1705983662575457, "learning_rate": 4.056841825667361e-06, "loss": 0.7343, "step": 9164 }, { "epoch": 0.04057284519013679, "grad_norm": 2.976420456183135, "learning_rate": 4.057284519013679e-06, "loss": 0.7864, "step": 9165 }, { "epoch": 0.04057727212359998, "grad_norm": 2.797256459534725, "learning_rate": 4.0577272123599985e-06, "loss": 0.7845, "step": 9166 }, { "epoch": 0.04058169905706317, "grad_norm": 2.969651954090975, "learning_rate": 4.058169905706317e-06, "loss": 0.8327, "step": 9167 }, { "epoch": 0.04058612599052636, "grad_norm": 2.8800940765841463, "learning_rate": 4.058612599052636e-06, "loss": 0.9235, "step": 9168 }, { "epoch": 0.040590552923989554, "grad_norm": 2.864662329391351, "learning_rate": 4.059055292398956e-06, "loss": 0.5161, "step": 9169 }, { "epoch": 0.040594979857452744, "grad_norm": 2.7984632741135727, "learning_rate": 4.059497985745274e-06, "loss": 0.7366, "step": 9170 }, { "epoch": 0.040599406790915935, "grad_norm": 2.4602490338682133, "learning_rate": 4.059940679091594e-06, "loss": 0.5136, "step": 9171 }, { "epoch": 0.040603833724379125, "grad_norm": 3.4686394233358637, "learning_rate": 4.060383372437913e-06, "loss": 0.9591, "step": 9172 }, { "epoch": 0.040608260657842316, "grad_norm": 2.830898777918055, "learning_rate": 4.0608260657842315e-06, "loss": 0.9204, "step": 9173 }, { "epoch": 0.040612687591305506, "grad_norm": 3.2502888110057526, "learning_rate": 4.061268759130551e-06, "loss": 0.9963, "step": 9174 }, { "epoch": 0.04061711452476869, "grad_norm": 2.7751322143624555, "learning_rate": 4.0617114524768694e-06, "loss": 0.8927, "step": 9175 }, { "epoch": 0.04062154145823188, "grad_norm": 2.5715833214127413, "learning_rate": 4.062154145823188e-06, "loss": 0.7119, "step": 9176 }, { "epoch": 0.04062596839169507, "grad_norm": 2.5393455206840887, "learning_rate": 4.062596839169507e-06, "loss": 0.6326, "step": 9177 }, { "epoch": 0.04063039532515826, "grad_norm": 2.897547577393649, "learning_rate": 4.063039532515827e-06, "loss": 1.1297, "step": 9178 }, { "epoch": 0.04063482225862145, "grad_norm": 2.622989131920737, "learning_rate": 4.063482225862145e-06, "loss": 0.9249, "step": 9179 }, { "epoch": 0.04063924919208464, "grad_norm": 2.4383005901530344, "learning_rate": 4.063924919208465e-06, "loss": 0.6901, "step": 9180 }, { "epoch": 0.04064367612554783, "grad_norm": 3.095836378001517, "learning_rate": 4.064367612554784e-06, "loss": 0.9407, "step": 9181 }, { "epoch": 0.04064810305901102, "grad_norm": 2.757904918710388, "learning_rate": 4.0648103059011025e-06, "loss": 0.9483, "step": 9182 }, { "epoch": 0.040652529992474214, "grad_norm": 2.5590594010590038, "learning_rate": 4.065252999247422e-06, "loss": 0.7238, "step": 9183 }, { "epoch": 0.040656956925937404, "grad_norm": 2.5830790014619143, "learning_rate": 4.06569569259374e-06, "loss": 0.7394, "step": 9184 }, { "epoch": 0.040661383859400595, "grad_norm": 2.870151348360343, "learning_rate": 4.06613838594006e-06, "loss": 0.9691, "step": 9185 }, { "epoch": 0.040665810792863785, "grad_norm": 2.2984748007610216, "learning_rate": 4.066581079286378e-06, "loss": 0.6467, "step": 9186 }, { "epoch": 0.040670237726326976, "grad_norm": 3.842822811207525, "learning_rate": 4.067023772632698e-06, "loss": 1.3588, "step": 9187 }, { "epoch": 0.040674664659790166, "grad_norm": 3.075723514031501, "learning_rate": 4.067466465979016e-06, "loss": 1.0024, "step": 9188 }, { "epoch": 0.04067909159325336, "grad_norm": 3.271924094764171, "learning_rate": 4.0679091593253355e-06, "loss": 0.9845, "step": 9189 }, { "epoch": 0.04068351852671654, "grad_norm": 3.287812443655905, "learning_rate": 4.068351852671655e-06, "loss": 0.7454, "step": 9190 }, { "epoch": 0.04068794546017973, "grad_norm": 3.926551404695853, "learning_rate": 4.0687945460179734e-06, "loss": 1.2537, "step": 9191 }, { "epoch": 0.04069237239364292, "grad_norm": 3.0155231668385065, "learning_rate": 4.069237239364293e-06, "loss": 0.7519, "step": 9192 }, { "epoch": 0.04069679932710611, "grad_norm": 2.8433171303788316, "learning_rate": 4.069679932710612e-06, "loss": 0.8876, "step": 9193 }, { "epoch": 0.0407012262605693, "grad_norm": 2.5909019666035698, "learning_rate": 4.070122626056931e-06, "loss": 0.5529, "step": 9194 }, { "epoch": 0.04070565319403249, "grad_norm": 2.661861450806499, "learning_rate": 4.070565319403249e-06, "loss": 0.7714, "step": 9195 }, { "epoch": 0.04071008012749568, "grad_norm": 3.200574960313494, "learning_rate": 4.071008012749569e-06, "loss": 1.2388, "step": 9196 }, { "epoch": 0.040714507060958874, "grad_norm": 2.4300204848069376, "learning_rate": 4.071450706095887e-06, "loss": 0.735, "step": 9197 }, { "epoch": 0.040718933994422064, "grad_norm": 3.3504893372708238, "learning_rate": 4.0718933994422065e-06, "loss": 0.9397, "step": 9198 }, { "epoch": 0.040723360927885255, "grad_norm": 2.4409493677919363, "learning_rate": 4.072336092788526e-06, "loss": 0.6811, "step": 9199 }, { "epoch": 0.040727787861348445, "grad_norm": 3.183112318026374, "learning_rate": 4.072778786134844e-06, "loss": 0.8458, "step": 9200 }, { "epoch": 0.040732214794811636, "grad_norm": 3.064910331486505, "learning_rate": 4.073221479481164e-06, "loss": 1.1153, "step": 9201 }, { "epoch": 0.040736641728274826, "grad_norm": 3.6026670557463185, "learning_rate": 4.073664172827483e-06, "loss": 1.2264, "step": 9202 }, { "epoch": 0.04074106866173802, "grad_norm": 2.7388214666338686, "learning_rate": 4.074106866173802e-06, "loss": 0.745, "step": 9203 }, { "epoch": 0.04074549559520121, "grad_norm": 2.975186826942557, "learning_rate": 4.074549559520121e-06, "loss": 0.6014, "step": 9204 }, { "epoch": 0.04074992252866439, "grad_norm": 2.7490194410681386, "learning_rate": 4.0749922528664395e-06, "loss": 0.6014, "step": 9205 }, { "epoch": 0.04075434946212758, "grad_norm": 2.541818863537109, "learning_rate": 4.075434946212758e-06, "loss": 0.7672, "step": 9206 }, { "epoch": 0.04075877639559077, "grad_norm": 2.993809859171802, "learning_rate": 4.0758776395590774e-06, "loss": 1.1469, "step": 9207 }, { "epoch": 0.04076320332905396, "grad_norm": 3.257380455446234, "learning_rate": 4.076320332905397e-06, "loss": 0.9668, "step": 9208 }, { "epoch": 0.04076763026251715, "grad_norm": 3.01633819351113, "learning_rate": 4.076763026251715e-06, "loss": 0.8175, "step": 9209 }, { "epoch": 0.040772057195980344, "grad_norm": 2.5377255229369076, "learning_rate": 4.077205719598035e-06, "loss": 0.7824, "step": 9210 }, { "epoch": 0.040776484129443534, "grad_norm": 3.083254899144547, "learning_rate": 4.077648412944354e-06, "loss": 0.8324, "step": 9211 }, { "epoch": 0.040780911062906725, "grad_norm": 3.562018932238808, "learning_rate": 4.078091106290673e-06, "loss": 1.0309, "step": 9212 }, { "epoch": 0.040785337996369915, "grad_norm": 3.57856692823342, "learning_rate": 4.078533799636992e-06, "loss": 0.9774, "step": 9213 }, { "epoch": 0.040789764929833106, "grad_norm": 3.013343835742236, "learning_rate": 4.0789764929833105e-06, "loss": 0.8016, "step": 9214 }, { "epoch": 0.040794191863296296, "grad_norm": 2.437837619787491, "learning_rate": 4.07941918632963e-06, "loss": 0.5706, "step": 9215 }, { "epoch": 0.04079861879675949, "grad_norm": 3.3539609888640696, "learning_rate": 4.079861879675948e-06, "loss": 0.9162, "step": 9216 }, { "epoch": 0.04080304573022268, "grad_norm": 2.615122953766271, "learning_rate": 4.080304573022268e-06, "loss": 0.74, "step": 9217 }, { "epoch": 0.04080747266368587, "grad_norm": 2.7217068097929737, "learning_rate": 4.080747266368587e-06, "loss": 0.5834, "step": 9218 }, { "epoch": 0.04081189959714906, "grad_norm": 2.7614997910420693, "learning_rate": 4.081189959714906e-06, "loss": 0.6224, "step": 9219 }, { "epoch": 0.04081632653061224, "grad_norm": 2.69173175773389, "learning_rate": 4.081632653061225e-06, "loss": 0.8562, "step": 9220 }, { "epoch": 0.04082075346407543, "grad_norm": 2.7122927627492674, "learning_rate": 4.082075346407544e-06, "loss": 0.818, "step": 9221 }, { "epoch": 0.04082518039753862, "grad_norm": 2.5405060645742354, "learning_rate": 4.082518039753863e-06, "loss": 0.6696, "step": 9222 }, { "epoch": 0.04082960733100181, "grad_norm": 3.161597677211825, "learning_rate": 4.082960733100182e-06, "loss": 0.7715, "step": 9223 }, { "epoch": 0.040834034264465004, "grad_norm": 2.8974085106005703, "learning_rate": 4.083403426446501e-06, "loss": 0.6642, "step": 9224 }, { "epoch": 0.040838461197928194, "grad_norm": 3.0101444389662766, "learning_rate": 4.08384611979282e-06, "loss": 1.0094, "step": 9225 }, { "epoch": 0.040842888131391385, "grad_norm": 2.9136584134959143, "learning_rate": 4.084288813139139e-06, "loss": 0.8862, "step": 9226 }, { "epoch": 0.040847315064854575, "grad_norm": 2.693425855468063, "learning_rate": 4.084731506485458e-06, "loss": 0.7288, "step": 9227 }, { "epoch": 0.040851741998317766, "grad_norm": 3.4197370468175534, "learning_rate": 4.085174199831777e-06, "loss": 1.1391, "step": 9228 }, { "epoch": 0.040856168931780956, "grad_norm": 3.1366139212193893, "learning_rate": 4.085616893178096e-06, "loss": 1.0181, "step": 9229 }, { "epoch": 0.04086059586524415, "grad_norm": 3.200504403917106, "learning_rate": 4.086059586524415e-06, "loss": 0.6787, "step": 9230 }, { "epoch": 0.04086502279870734, "grad_norm": 2.7360381549096755, "learning_rate": 4.086502279870734e-06, "loss": 0.7812, "step": 9231 }, { "epoch": 0.04086944973217053, "grad_norm": 3.2948546812222315, "learning_rate": 4.086944973217053e-06, "loss": 0.9347, "step": 9232 }, { "epoch": 0.04087387666563372, "grad_norm": 2.6387553662354266, "learning_rate": 4.087387666563372e-06, "loss": 0.7809, "step": 9233 }, { "epoch": 0.04087830359909691, "grad_norm": 2.5230808517568484, "learning_rate": 4.087830359909691e-06, "loss": 0.8244, "step": 9234 }, { "epoch": 0.04088273053256009, "grad_norm": 2.938463610360707, "learning_rate": 4.08827305325601e-06, "loss": 1.0305, "step": 9235 }, { "epoch": 0.04088715746602328, "grad_norm": 2.722516089090502, "learning_rate": 4.088715746602329e-06, "loss": 0.6616, "step": 9236 }, { "epoch": 0.04089158439948647, "grad_norm": 2.6571170147400895, "learning_rate": 4.0891584399486475e-06, "loss": 0.5418, "step": 9237 }, { "epoch": 0.040896011332949664, "grad_norm": 2.5896339236587607, "learning_rate": 4.089601133294967e-06, "loss": 0.7911, "step": 9238 }, { "epoch": 0.040900438266412854, "grad_norm": 2.6954394008035854, "learning_rate": 4.090043826641286e-06, "loss": 0.5005, "step": 9239 }, { "epoch": 0.040904865199876045, "grad_norm": 3.2532206871992573, "learning_rate": 4.090486519987605e-06, "loss": 0.9261, "step": 9240 }, { "epoch": 0.040909292133339235, "grad_norm": 2.605961276972821, "learning_rate": 4.090929213333924e-06, "loss": 0.6615, "step": 9241 }, { "epoch": 0.040913719066802426, "grad_norm": 2.6464941772250143, "learning_rate": 4.0913719066802435e-06, "loss": 0.7178, "step": 9242 }, { "epoch": 0.040918146000265616, "grad_norm": 3.024243240018372, "learning_rate": 4.091814600026562e-06, "loss": 0.8361, "step": 9243 }, { "epoch": 0.04092257293372881, "grad_norm": 4.692197379033659, "learning_rate": 4.0922572933728814e-06, "loss": 1.2861, "step": 9244 }, { "epoch": 0.040926999867192, "grad_norm": 2.3209017591638585, "learning_rate": 4.0926999867192e-06, "loss": 0.7033, "step": 9245 }, { "epoch": 0.04093142680065519, "grad_norm": 2.663785423912182, "learning_rate": 4.0931426800655185e-06, "loss": 0.6552, "step": 9246 }, { "epoch": 0.04093585373411838, "grad_norm": 2.5198837409423223, "learning_rate": 4.093585373411838e-06, "loss": 0.737, "step": 9247 }, { "epoch": 0.04094028066758157, "grad_norm": 3.6817482305146334, "learning_rate": 4.094028066758157e-06, "loss": 1.1195, "step": 9248 }, { "epoch": 0.04094470760104476, "grad_norm": 2.201562296736985, "learning_rate": 4.094470760104476e-06, "loss": 0.4037, "step": 9249 }, { "epoch": 0.04094913453450794, "grad_norm": 2.311757445415064, "learning_rate": 4.094913453450795e-06, "loss": 0.429, "step": 9250 }, { "epoch": 0.040953561467971134, "grad_norm": 2.4652131775437507, "learning_rate": 4.0953561467971145e-06, "loss": 0.5674, "step": 9251 }, { "epoch": 0.040957988401434324, "grad_norm": 2.7717025225610303, "learning_rate": 4.095798840143433e-06, "loss": 0.9385, "step": 9252 }, { "epoch": 0.040962415334897515, "grad_norm": 2.427626505063491, "learning_rate": 4.096241533489752e-06, "loss": 0.7013, "step": 9253 }, { "epoch": 0.040966842268360705, "grad_norm": 3.2366594528384933, "learning_rate": 4.096684226836071e-06, "loss": 0.7677, "step": 9254 }, { "epoch": 0.040971269201823896, "grad_norm": 3.046457616010871, "learning_rate": 4.09712692018239e-06, "loss": 0.8852, "step": 9255 }, { "epoch": 0.040975696135287086, "grad_norm": 2.7475370531309875, "learning_rate": 4.097569613528709e-06, "loss": 0.5764, "step": 9256 }, { "epoch": 0.04098012306875028, "grad_norm": 3.0215675565951052, "learning_rate": 4.098012306875028e-06, "loss": 0.9103, "step": 9257 }, { "epoch": 0.04098455000221347, "grad_norm": 2.7222160119067533, "learning_rate": 4.098455000221347e-06, "loss": 0.6655, "step": 9258 }, { "epoch": 0.04098897693567666, "grad_norm": 3.441796955771828, "learning_rate": 4.098897693567666e-06, "loss": 0.7681, "step": 9259 }, { "epoch": 0.04099340386913985, "grad_norm": 2.7225363251282566, "learning_rate": 4.0993403869139854e-06, "loss": 0.6427, "step": 9260 }, { "epoch": 0.04099783080260304, "grad_norm": 3.4522776751344164, "learning_rate": 4.099783080260304e-06, "loss": 0.781, "step": 9261 }, { "epoch": 0.04100225773606623, "grad_norm": 2.589164648025509, "learning_rate": 4.100225773606623e-06, "loss": 0.4615, "step": 9262 }, { "epoch": 0.04100668466952942, "grad_norm": 3.0040242667067587, "learning_rate": 4.100668466952943e-06, "loss": 0.6484, "step": 9263 }, { "epoch": 0.04101111160299261, "grad_norm": 2.83698109398516, "learning_rate": 4.101111160299261e-06, "loss": 0.6946, "step": 9264 }, { "epoch": 0.041015538536455794, "grad_norm": 2.562540811155809, "learning_rate": 4.10155385364558e-06, "loss": 0.6194, "step": 9265 }, { "epoch": 0.041019965469918984, "grad_norm": 2.4853051404284847, "learning_rate": 4.101996546991899e-06, "loss": 0.7901, "step": 9266 }, { "epoch": 0.041024392403382175, "grad_norm": 2.7814332715771646, "learning_rate": 4.102439240338218e-06, "loss": 0.6404, "step": 9267 }, { "epoch": 0.041028819336845365, "grad_norm": 3.148586142083781, "learning_rate": 4.102881933684537e-06, "loss": 0.9996, "step": 9268 }, { "epoch": 0.041033246270308556, "grad_norm": 3.089286087264811, "learning_rate": 4.103324627030856e-06, "loss": 0.577, "step": 9269 }, { "epoch": 0.041037673203771746, "grad_norm": 2.5695249418155734, "learning_rate": 4.103767320377175e-06, "loss": 0.5104, "step": 9270 }, { "epoch": 0.04104210013723494, "grad_norm": 3.297748111063529, "learning_rate": 4.104210013723494e-06, "loss": 0.7374, "step": 9271 }, { "epoch": 0.04104652707069813, "grad_norm": 3.0785260046691474, "learning_rate": 4.104652707069814e-06, "loss": 0.6136, "step": 9272 }, { "epoch": 0.04105095400416132, "grad_norm": 2.605756721939118, "learning_rate": 4.105095400416132e-06, "loss": 0.826, "step": 9273 }, { "epoch": 0.04105538093762451, "grad_norm": 3.5299684444154678, "learning_rate": 4.1055380937624515e-06, "loss": 1.2334, "step": 9274 }, { "epoch": 0.0410598078710877, "grad_norm": 2.7536219420825647, "learning_rate": 4.10598078710877e-06, "loss": 0.7915, "step": 9275 }, { "epoch": 0.04106423480455089, "grad_norm": 3.3122110427533342, "learning_rate": 4.106423480455089e-06, "loss": 0.9543, "step": 9276 }, { "epoch": 0.04106866173801408, "grad_norm": 3.277965958912085, "learning_rate": 4.106866173801408e-06, "loss": 0.7146, "step": 9277 }, { "epoch": 0.04107308867147727, "grad_norm": 3.7096675621968567, "learning_rate": 4.107308867147727e-06, "loss": 0.7901, "step": 9278 }, { "epoch": 0.04107751560494046, "grad_norm": 3.408706688958405, "learning_rate": 4.107751560494046e-06, "loss": 0.8784, "step": 9279 }, { "epoch": 0.041081942538403644, "grad_norm": 3.021661767860611, "learning_rate": 4.108194253840365e-06, "loss": 0.6789, "step": 9280 }, { "epoch": 0.041086369471866835, "grad_norm": 3.0264304482198976, "learning_rate": 4.108636947186685e-06, "loss": 0.9293, "step": 9281 }, { "epoch": 0.041090796405330025, "grad_norm": 2.945042546793994, "learning_rate": 4.109079640533003e-06, "loss": 0.6397, "step": 9282 }, { "epoch": 0.041095223338793216, "grad_norm": 2.8927316968090095, "learning_rate": 4.1095223338793225e-06, "loss": 0.6042, "step": 9283 }, { "epoch": 0.041099650272256406, "grad_norm": 2.450055420335312, "learning_rate": 4.109965027225641e-06, "loss": 0.5342, "step": 9284 }, { "epoch": 0.0411040772057196, "grad_norm": 3.081977493022492, "learning_rate": 4.11040772057196e-06, "loss": 1.0829, "step": 9285 }, { "epoch": 0.04110850413918279, "grad_norm": 2.8727310482267256, "learning_rate": 4.110850413918279e-06, "loss": 0.8015, "step": 9286 }, { "epoch": 0.04111293107264598, "grad_norm": 2.4752902615089463, "learning_rate": 4.111293107264598e-06, "loss": 0.6951, "step": 9287 }, { "epoch": 0.04111735800610917, "grad_norm": 3.2307692237260515, "learning_rate": 4.111735800610917e-06, "loss": 0.9253, "step": 9288 }, { "epoch": 0.04112178493957236, "grad_norm": 3.1267367144193803, "learning_rate": 4.112178493957236e-06, "loss": 0.818, "step": 9289 }, { "epoch": 0.04112621187303555, "grad_norm": 3.0542349331455045, "learning_rate": 4.1126211873035555e-06, "loss": 0.8344, "step": 9290 }, { "epoch": 0.04113063880649874, "grad_norm": 3.334837697000506, "learning_rate": 4.113063880649874e-06, "loss": 0.7092, "step": 9291 }, { "epoch": 0.04113506573996193, "grad_norm": 2.7875564893349645, "learning_rate": 4.1135065739961934e-06, "loss": 0.8186, "step": 9292 }, { "epoch": 0.04113949267342512, "grad_norm": 2.5350759137568457, "learning_rate": 4.113949267342513e-06, "loss": 0.5882, "step": 9293 }, { "epoch": 0.04114391960688831, "grad_norm": 2.76960737385139, "learning_rate": 4.114391960688831e-06, "loss": 0.6215, "step": 9294 }, { "epoch": 0.0411483465403515, "grad_norm": 2.8482319054340737, "learning_rate": 4.11483465403515e-06, "loss": 1.0271, "step": 9295 }, { "epoch": 0.041152773473814686, "grad_norm": 3.075555690668438, "learning_rate": 4.115277347381469e-06, "loss": 0.8582, "step": 9296 }, { "epoch": 0.041157200407277876, "grad_norm": 3.210770212880549, "learning_rate": 4.115720040727788e-06, "loss": 0.8092, "step": 9297 }, { "epoch": 0.04116162734074107, "grad_norm": 3.047216285596079, "learning_rate": 4.116162734074107e-06, "loss": 0.7432, "step": 9298 }, { "epoch": 0.04116605427420426, "grad_norm": 3.976690105339563, "learning_rate": 4.1166054274204265e-06, "loss": 0.8322, "step": 9299 }, { "epoch": 0.04117048120766745, "grad_norm": 2.7578911895845626, "learning_rate": 4.117048120766745e-06, "loss": 0.7751, "step": 9300 }, { "epoch": 0.04117490814113064, "grad_norm": 2.6946669701644925, "learning_rate": 4.117490814113064e-06, "loss": 0.8035, "step": 9301 }, { "epoch": 0.04117933507459383, "grad_norm": 2.91754842964892, "learning_rate": 4.117933507459384e-06, "loss": 0.9149, "step": 9302 }, { "epoch": 0.04118376200805702, "grad_norm": 3.6023630007232197, "learning_rate": 4.118376200805702e-06, "loss": 0.8412, "step": 9303 }, { "epoch": 0.04118818894152021, "grad_norm": 2.8752257783628985, "learning_rate": 4.118818894152022e-06, "loss": 0.705, "step": 9304 }, { "epoch": 0.0411926158749834, "grad_norm": 2.898760980387146, "learning_rate": 4.11926158749834e-06, "loss": 0.486, "step": 9305 }, { "epoch": 0.04119704280844659, "grad_norm": 2.8562643619761916, "learning_rate": 4.119704280844659e-06, "loss": 0.8193, "step": 9306 }, { "epoch": 0.04120146974190978, "grad_norm": 3.7812237910169753, "learning_rate": 4.120146974190978e-06, "loss": 1.7559, "step": 9307 }, { "epoch": 0.04120589667537297, "grad_norm": 2.8918044535432212, "learning_rate": 4.1205896675372974e-06, "loss": 0.9222, "step": 9308 }, { "epoch": 0.04121032360883616, "grad_norm": 2.7152874770740247, "learning_rate": 4.121032360883616e-06, "loss": 0.8793, "step": 9309 }, { "epoch": 0.04121475054229935, "grad_norm": 3.2048358835266044, "learning_rate": 4.121475054229935e-06, "loss": 0.8241, "step": 9310 }, { "epoch": 0.041219177475762536, "grad_norm": 2.688434652605655, "learning_rate": 4.121917747576255e-06, "loss": 0.6629, "step": 9311 }, { "epoch": 0.04122360440922573, "grad_norm": 3.399176853784695, "learning_rate": 4.122360440922573e-06, "loss": 1.15, "step": 9312 }, { "epoch": 0.04122803134268892, "grad_norm": 2.4658683586749213, "learning_rate": 4.122803134268893e-06, "loss": 0.6446, "step": 9313 }, { "epoch": 0.04123245827615211, "grad_norm": 2.9426806305535234, "learning_rate": 4.123245827615211e-06, "loss": 1.0273, "step": 9314 }, { "epoch": 0.0412368852096153, "grad_norm": 3.5176912093729147, "learning_rate": 4.1236885209615305e-06, "loss": 0.9875, "step": 9315 }, { "epoch": 0.04124131214307849, "grad_norm": 2.940135186530177, "learning_rate": 4.124131214307849e-06, "loss": 0.8141, "step": 9316 }, { "epoch": 0.04124573907654168, "grad_norm": 2.5671333094890585, "learning_rate": 4.124573907654168e-06, "loss": 0.574, "step": 9317 }, { "epoch": 0.04125016601000487, "grad_norm": 3.2142410224457847, "learning_rate": 4.125016601000487e-06, "loss": 0.3773, "step": 9318 }, { "epoch": 0.04125459294346806, "grad_norm": 2.43066887295827, "learning_rate": 4.125459294346806e-06, "loss": 0.6929, "step": 9319 }, { "epoch": 0.04125901987693125, "grad_norm": 3.057940183856958, "learning_rate": 4.125901987693126e-06, "loss": 0.6537, "step": 9320 }, { "epoch": 0.04126344681039444, "grad_norm": 3.0281385737527753, "learning_rate": 4.126344681039444e-06, "loss": 0.8582, "step": 9321 }, { "epoch": 0.04126787374385763, "grad_norm": 2.6967990594420836, "learning_rate": 4.1267873743857635e-06, "loss": 0.7175, "step": 9322 }, { "epoch": 0.04127230067732082, "grad_norm": 3.5160848714243156, "learning_rate": 4.127230067732083e-06, "loss": 1.0361, "step": 9323 }, { "epoch": 0.04127672761078401, "grad_norm": 2.6339068188515, "learning_rate": 4.1276727610784014e-06, "loss": 0.609, "step": 9324 }, { "epoch": 0.0412811545442472, "grad_norm": 4.063557084707709, "learning_rate": 4.128115454424721e-06, "loss": 1.2373, "step": 9325 }, { "epoch": 0.04128558147771039, "grad_norm": 3.767651485661355, "learning_rate": 4.128558147771039e-06, "loss": 1.1458, "step": 9326 }, { "epoch": 0.04129000841117358, "grad_norm": 3.572236728672352, "learning_rate": 4.129000841117358e-06, "loss": 0.6545, "step": 9327 }, { "epoch": 0.04129443534463677, "grad_norm": 3.535076059650943, "learning_rate": 4.129443534463677e-06, "loss": 1.2766, "step": 9328 }, { "epoch": 0.04129886227809996, "grad_norm": 2.551534254860819, "learning_rate": 4.129886227809997e-06, "loss": 0.6026, "step": 9329 }, { "epoch": 0.04130328921156315, "grad_norm": 2.7617366852283167, "learning_rate": 4.130328921156315e-06, "loss": 0.9142, "step": 9330 }, { "epoch": 0.04130771614502634, "grad_norm": 3.6243651775158736, "learning_rate": 4.1307716145026345e-06, "loss": 1.1065, "step": 9331 }, { "epoch": 0.04131214307848953, "grad_norm": 2.9157890948241376, "learning_rate": 4.131214307848954e-06, "loss": 0.4805, "step": 9332 }, { "epoch": 0.04131657001195272, "grad_norm": 2.7640979607924665, "learning_rate": 4.131657001195272e-06, "loss": 0.6304, "step": 9333 }, { "epoch": 0.04132099694541591, "grad_norm": 3.4704053005933324, "learning_rate": 4.132099694541592e-06, "loss": 0.6969, "step": 9334 }, { "epoch": 0.0413254238788791, "grad_norm": 3.3880776380063873, "learning_rate": 4.13254238788791e-06, "loss": 0.7079, "step": 9335 }, { "epoch": 0.04132985081234229, "grad_norm": 2.6365149625711184, "learning_rate": 4.13298508123423e-06, "loss": 0.5304, "step": 9336 }, { "epoch": 0.04133427774580548, "grad_norm": 2.538379409379446, "learning_rate": 4.133427774580548e-06, "loss": 0.6031, "step": 9337 }, { "epoch": 0.04133870467926867, "grad_norm": 2.6906800241029596, "learning_rate": 4.1338704679268675e-06, "loss": 0.8318, "step": 9338 }, { "epoch": 0.041343131612731863, "grad_norm": 3.1941650280696674, "learning_rate": 4.134313161273186e-06, "loss": 1.053, "step": 9339 }, { "epoch": 0.041347558546195054, "grad_norm": 3.041403197474406, "learning_rate": 4.1347558546195054e-06, "loss": 0.836, "step": 9340 }, { "epoch": 0.04135198547965824, "grad_norm": 3.336309861953174, "learning_rate": 4.135198547965825e-06, "loss": 0.6459, "step": 9341 }, { "epoch": 0.04135641241312143, "grad_norm": 2.5926910455773498, "learning_rate": 4.135641241312143e-06, "loss": 0.6427, "step": 9342 }, { "epoch": 0.04136083934658462, "grad_norm": 2.4255368273857774, "learning_rate": 4.136083934658463e-06, "loss": 0.6459, "step": 9343 }, { "epoch": 0.04136526628004781, "grad_norm": 3.7962185970296423, "learning_rate": 4.136526628004782e-06, "loss": 1.1066, "step": 9344 }, { "epoch": 0.041369693213511, "grad_norm": 2.814699680485418, "learning_rate": 4.136969321351101e-06, "loss": 0.8592, "step": 9345 }, { "epoch": 0.04137412014697419, "grad_norm": 2.735370156028252, "learning_rate": 4.137412014697419e-06, "loss": 0.7249, "step": 9346 }, { "epoch": 0.04137854708043738, "grad_norm": 2.4845173055778362, "learning_rate": 4.1378547080437385e-06, "loss": 0.6385, "step": 9347 }, { "epoch": 0.04138297401390057, "grad_norm": 3.753833296973511, "learning_rate": 4.138297401390057e-06, "loss": 1.2613, "step": 9348 }, { "epoch": 0.04138740094736376, "grad_norm": 3.3658044707904438, "learning_rate": 4.138740094736376e-06, "loss": 0.7558, "step": 9349 }, { "epoch": 0.04139182788082695, "grad_norm": 2.800836214581034, "learning_rate": 4.139182788082696e-06, "loss": 0.6241, "step": 9350 }, { "epoch": 0.04139625481429014, "grad_norm": 3.0532704558471413, "learning_rate": 4.139625481429014e-06, "loss": 0.5415, "step": 9351 }, { "epoch": 0.04140068174775333, "grad_norm": 3.221298855207912, "learning_rate": 4.140068174775334e-06, "loss": 0.8977, "step": 9352 }, { "epoch": 0.041405108681216524, "grad_norm": 3.1357332637116038, "learning_rate": 4.140510868121653e-06, "loss": 0.6332, "step": 9353 }, { "epoch": 0.041409535614679714, "grad_norm": 3.0266960536821452, "learning_rate": 4.1409535614679715e-06, "loss": 1.0088, "step": 9354 }, { "epoch": 0.041413962548142905, "grad_norm": 3.697697763421129, "learning_rate": 4.141396254814291e-06, "loss": 0.8868, "step": 9355 }, { "epoch": 0.04141838948160609, "grad_norm": 2.7263148202727012, "learning_rate": 4.1418389481606094e-06, "loss": 0.6713, "step": 9356 }, { "epoch": 0.04142281641506928, "grad_norm": 3.641416572279926, "learning_rate": 4.142281641506928e-06, "loss": 1.1444, "step": 9357 }, { "epoch": 0.04142724334853247, "grad_norm": 2.718209156112685, "learning_rate": 4.142724334853247e-06, "loss": 0.6272, "step": 9358 }, { "epoch": 0.04143167028199566, "grad_norm": 2.366752595649356, "learning_rate": 4.143167028199567e-06, "loss": 0.6641, "step": 9359 }, { "epoch": 0.04143609721545885, "grad_norm": 2.9203038012974356, "learning_rate": 4.143609721545885e-06, "loss": 0.6267, "step": 9360 }, { "epoch": 0.04144052414892204, "grad_norm": 3.024595121103361, "learning_rate": 4.144052414892205e-06, "loss": 0.8595, "step": 9361 }, { "epoch": 0.04144495108238523, "grad_norm": 2.6681016209911075, "learning_rate": 4.144495108238524e-06, "loss": 0.8616, "step": 9362 }, { "epoch": 0.04144937801584842, "grad_norm": 3.009593913219607, "learning_rate": 4.1449378015848425e-06, "loss": 0.5765, "step": 9363 }, { "epoch": 0.04145380494931161, "grad_norm": 2.9446593591887686, "learning_rate": 4.145380494931162e-06, "loss": 0.9212, "step": 9364 }, { "epoch": 0.0414582318827748, "grad_norm": 3.1361886521548934, "learning_rate": 4.14582318827748e-06, "loss": 0.797, "step": 9365 }, { "epoch": 0.04146265881623799, "grad_norm": 2.6929455671746294, "learning_rate": 4.1462658816238e-06, "loss": 0.5027, "step": 9366 }, { "epoch": 0.041467085749701184, "grad_norm": 2.563410150109456, "learning_rate": 4.146708574970118e-06, "loss": 0.7592, "step": 9367 }, { "epoch": 0.041471512683164374, "grad_norm": 3.819009960852605, "learning_rate": 4.147151268316438e-06, "loss": 1.0042, "step": 9368 }, { "epoch": 0.041475939616627565, "grad_norm": 3.0848772879473843, "learning_rate": 4.147593961662756e-06, "loss": 0.8003, "step": 9369 }, { "epoch": 0.041480366550090755, "grad_norm": 2.5841819152487853, "learning_rate": 4.1480366550090755e-06, "loss": 0.7879, "step": 9370 }, { "epoch": 0.04148479348355394, "grad_norm": 2.793428797961029, "learning_rate": 4.148479348355395e-06, "loss": 0.883, "step": 9371 }, { "epoch": 0.04148922041701713, "grad_norm": 2.6728441117519925, "learning_rate": 4.1489220417017134e-06, "loss": 0.7187, "step": 9372 }, { "epoch": 0.04149364735048032, "grad_norm": 2.743597775569141, "learning_rate": 4.149364735048033e-06, "loss": 0.6441, "step": 9373 }, { "epoch": 0.04149807428394351, "grad_norm": 2.8451893288901804, "learning_rate": 4.149807428394352e-06, "loss": 0.7904, "step": 9374 }, { "epoch": 0.0415025012174067, "grad_norm": 2.6933748317281236, "learning_rate": 4.150250121740671e-06, "loss": 0.8852, "step": 9375 }, { "epoch": 0.04150692815086989, "grad_norm": 2.964732635881581, "learning_rate": 4.150692815086989e-06, "loss": 0.7352, "step": 9376 }, { "epoch": 0.04151135508433308, "grad_norm": 2.450701831054908, "learning_rate": 4.151135508433309e-06, "loss": 0.7365, "step": 9377 }, { "epoch": 0.04151578201779627, "grad_norm": 2.637297786141411, "learning_rate": 4.151578201779627e-06, "loss": 0.6707, "step": 9378 }, { "epoch": 0.04152020895125946, "grad_norm": 2.860214286169136, "learning_rate": 4.1520208951259465e-06, "loss": 0.6505, "step": 9379 }, { "epoch": 0.041524635884722653, "grad_norm": 2.8844787996902785, "learning_rate": 4.152463588472266e-06, "loss": 0.887, "step": 9380 }, { "epoch": 0.041529062818185844, "grad_norm": 4.296363913656936, "learning_rate": 4.152906281818584e-06, "loss": 1.403, "step": 9381 }, { "epoch": 0.041533489751649035, "grad_norm": 3.7159755459779675, "learning_rate": 4.153348975164904e-06, "loss": 1.2714, "step": 9382 }, { "epoch": 0.041537916685112225, "grad_norm": 2.9224184293655173, "learning_rate": 4.153791668511223e-06, "loss": 0.5731, "step": 9383 }, { "epoch": 0.041542343618575416, "grad_norm": 2.541448698404706, "learning_rate": 4.154234361857542e-06, "loss": 0.4855, "step": 9384 }, { "epoch": 0.041546770552038606, "grad_norm": 2.7766774573783297, "learning_rate": 4.154677055203861e-06, "loss": 0.7836, "step": 9385 }, { "epoch": 0.04155119748550179, "grad_norm": 2.885057111969339, "learning_rate": 4.1551197485501795e-06, "loss": 0.6259, "step": 9386 }, { "epoch": 0.04155562441896498, "grad_norm": 2.5655372489403723, "learning_rate": 4.155562441896498e-06, "loss": 0.7257, "step": 9387 }, { "epoch": 0.04156005135242817, "grad_norm": 4.856743539802975, "learning_rate": 4.1560051352428174e-06, "loss": 1.1042, "step": 9388 }, { "epoch": 0.04156447828589136, "grad_norm": 4.113209566758747, "learning_rate": 4.156447828589137e-06, "loss": 1.2372, "step": 9389 }, { "epoch": 0.04156890521935455, "grad_norm": 2.878104389341319, "learning_rate": 4.156890521935455e-06, "loss": 0.5868, "step": 9390 }, { "epoch": 0.04157333215281774, "grad_norm": 2.3905941651795235, "learning_rate": 4.157333215281775e-06, "loss": 0.5431, "step": 9391 }, { "epoch": 0.04157775908628093, "grad_norm": 2.460092703243874, "learning_rate": 4.157775908628094e-06, "loss": 0.5868, "step": 9392 }, { "epoch": 0.04158218601974412, "grad_norm": 2.494306157257062, "learning_rate": 4.158218601974413e-06, "loss": 0.7014, "step": 9393 }, { "epoch": 0.041586612953207314, "grad_norm": 2.739657233630125, "learning_rate": 4.158661295320732e-06, "loss": 0.6521, "step": 9394 }, { "epoch": 0.041591039886670504, "grad_norm": 2.565913952533954, "learning_rate": 4.1591039886670505e-06, "loss": 0.7247, "step": 9395 }, { "epoch": 0.041595466820133695, "grad_norm": 2.879359113403314, "learning_rate": 4.15954668201337e-06, "loss": 0.7558, "step": 9396 }, { "epoch": 0.041599893753596885, "grad_norm": 2.970392687737244, "learning_rate": 4.159989375359688e-06, "loss": 0.6251, "step": 9397 }, { "epoch": 0.041604320687060076, "grad_norm": 2.3810310336416176, "learning_rate": 4.160432068706008e-06, "loss": 0.6335, "step": 9398 }, { "epoch": 0.041608747620523266, "grad_norm": 2.321998366135914, "learning_rate": 4.160874762052326e-06, "loss": 0.5336, "step": 9399 }, { "epoch": 0.04161317455398646, "grad_norm": 2.6854903151643956, "learning_rate": 4.161317455398646e-06, "loss": 0.8331, "step": 9400 }, { "epoch": 0.04161760148744964, "grad_norm": 3.59438862273452, "learning_rate": 4.161760148744965e-06, "loss": 1.2378, "step": 9401 }, { "epoch": 0.04162202842091283, "grad_norm": 2.856886275892515, "learning_rate": 4.1622028420912835e-06, "loss": 0.7274, "step": 9402 }, { "epoch": 0.04162645535437602, "grad_norm": 3.44469428825979, "learning_rate": 4.162645535437603e-06, "loss": 1.2018, "step": 9403 }, { "epoch": 0.04163088228783921, "grad_norm": 2.7752305243665196, "learning_rate": 4.163088228783922e-06, "loss": 0.7203, "step": 9404 }, { "epoch": 0.0416353092213024, "grad_norm": 2.564349240500064, "learning_rate": 4.163530922130241e-06, "loss": 0.7315, "step": 9405 }, { "epoch": 0.04163973615476559, "grad_norm": 3.498246473430204, "learning_rate": 4.163973615476559e-06, "loss": 0.7832, "step": 9406 }, { "epoch": 0.04164416308822878, "grad_norm": 2.7887527214632173, "learning_rate": 4.164416308822879e-06, "loss": 0.4584, "step": 9407 }, { "epoch": 0.041648590021691974, "grad_norm": 2.8089274575371372, "learning_rate": 4.164859002169197e-06, "loss": 0.7438, "step": 9408 }, { "epoch": 0.041653016955155164, "grad_norm": 2.5813960898537687, "learning_rate": 4.165301695515517e-06, "loss": 0.6879, "step": 9409 }, { "epoch": 0.041657443888618355, "grad_norm": 3.0480178303758114, "learning_rate": 4.165744388861836e-06, "loss": 0.6472, "step": 9410 }, { "epoch": 0.041661870822081545, "grad_norm": 2.6302568678508944, "learning_rate": 4.1661870822081545e-06, "loss": 0.7639, "step": 9411 }, { "epoch": 0.041666297755544736, "grad_norm": 2.4140387808424766, "learning_rate": 4.166629775554474e-06, "loss": 0.6389, "step": 9412 }, { "epoch": 0.041670724689007926, "grad_norm": 2.9058383940965764, "learning_rate": 4.167072468900793e-06, "loss": 0.9717, "step": 9413 }, { "epoch": 0.04167515162247112, "grad_norm": 2.7851316862473183, "learning_rate": 4.167515162247112e-06, "loss": 0.6469, "step": 9414 }, { "epoch": 0.04167957855593431, "grad_norm": 3.2000758558553564, "learning_rate": 4.167957855593431e-06, "loss": 0.8306, "step": 9415 }, { "epoch": 0.04168400548939749, "grad_norm": 2.7453063409090586, "learning_rate": 4.16840054893975e-06, "loss": 0.6036, "step": 9416 }, { "epoch": 0.04168843242286068, "grad_norm": 3.440032939022169, "learning_rate": 4.168843242286069e-06, "loss": 1.18, "step": 9417 }, { "epoch": 0.04169285935632387, "grad_norm": 2.557442231523357, "learning_rate": 4.1692859356323876e-06, "loss": 0.6801, "step": 9418 }, { "epoch": 0.04169728628978706, "grad_norm": 2.390168828875447, "learning_rate": 4.169728628978707e-06, "loss": 0.5821, "step": 9419 }, { "epoch": 0.04170171322325025, "grad_norm": 4.407682691208974, "learning_rate": 4.1701713223250254e-06, "loss": 1.4248, "step": 9420 }, { "epoch": 0.041706140156713443, "grad_norm": 2.447173018662014, "learning_rate": 4.170614015671345e-06, "loss": 0.6354, "step": 9421 }, { "epoch": 0.041710567090176634, "grad_norm": 3.283927254251047, "learning_rate": 4.171056709017664e-06, "loss": 0.8528, "step": 9422 }, { "epoch": 0.041714994023639825, "grad_norm": 3.171723542978144, "learning_rate": 4.171499402363983e-06, "loss": 1.2387, "step": 9423 }, { "epoch": 0.041719420957103015, "grad_norm": 2.9873263174759757, "learning_rate": 4.171942095710302e-06, "loss": 0.8466, "step": 9424 }, { "epoch": 0.041723847890566206, "grad_norm": 2.8842251222483553, "learning_rate": 4.1723847890566215e-06, "loss": 0.9624, "step": 9425 }, { "epoch": 0.041728274824029396, "grad_norm": 3.0351630571794033, "learning_rate": 4.17282748240294e-06, "loss": 0.9828, "step": 9426 }, { "epoch": 0.04173270175749259, "grad_norm": 3.3010191034916585, "learning_rate": 4.1732701757492585e-06, "loss": 0.9807, "step": 9427 }, { "epoch": 0.04173712869095578, "grad_norm": 2.652136077021117, "learning_rate": 4.173712869095578e-06, "loss": 0.8387, "step": 9428 }, { "epoch": 0.04174155562441897, "grad_norm": 2.6058059012989423, "learning_rate": 4.174155562441896e-06, "loss": 0.7583, "step": 9429 }, { "epoch": 0.04174598255788216, "grad_norm": 2.9166174671425003, "learning_rate": 4.174598255788216e-06, "loss": 0.8807, "step": 9430 }, { "epoch": 0.04175040949134534, "grad_norm": 2.3712435092883735, "learning_rate": 4.175040949134535e-06, "loss": 0.5742, "step": 9431 }, { "epoch": 0.04175483642480853, "grad_norm": 2.8496327986320473, "learning_rate": 4.175483642480854e-06, "loss": 0.8151, "step": 9432 }, { "epoch": 0.04175926335827172, "grad_norm": 2.714945156279922, "learning_rate": 4.175926335827173e-06, "loss": 0.6631, "step": 9433 }, { "epoch": 0.04176369029173491, "grad_norm": 2.7840376293272433, "learning_rate": 4.176369029173492e-06, "loss": 0.6669, "step": 9434 }, { "epoch": 0.041768117225198104, "grad_norm": 2.8030159059343105, "learning_rate": 4.176811722519811e-06, "loss": 0.5586, "step": 9435 }, { "epoch": 0.041772544158661294, "grad_norm": 2.8836058037537873, "learning_rate": 4.17725441586613e-06, "loss": 0.9088, "step": 9436 }, { "epoch": 0.041776971092124485, "grad_norm": 3.0930027070464914, "learning_rate": 4.177697109212449e-06, "loss": 0.8069, "step": 9437 }, { "epoch": 0.041781398025587675, "grad_norm": 2.50397042823935, "learning_rate": 4.178139802558767e-06, "loss": 0.5193, "step": 9438 }, { "epoch": 0.041785824959050866, "grad_norm": 2.63189873620942, "learning_rate": 4.178582495905087e-06, "loss": 0.6031, "step": 9439 }, { "epoch": 0.041790251892514056, "grad_norm": 3.0860893055688043, "learning_rate": 4.179025189251406e-06, "loss": 0.9096, "step": 9440 }, { "epoch": 0.04179467882597725, "grad_norm": 3.157465122501188, "learning_rate": 4.179467882597725e-06, "loss": 1.022, "step": 9441 }, { "epoch": 0.04179910575944044, "grad_norm": 3.1117120836929777, "learning_rate": 4.179910575944044e-06, "loss": 0.9081, "step": 9442 }, { "epoch": 0.04180353269290363, "grad_norm": 2.8566854471337204, "learning_rate": 4.180353269290363e-06, "loss": 0.776, "step": 9443 }, { "epoch": 0.04180795962636682, "grad_norm": 3.424615831780376, "learning_rate": 4.180795962636682e-06, "loss": 0.8308, "step": 9444 }, { "epoch": 0.04181238655983001, "grad_norm": 2.676308350643638, "learning_rate": 4.181238655983001e-06, "loss": 0.6731, "step": 9445 }, { "epoch": 0.0418168134932932, "grad_norm": 3.415537887404375, "learning_rate": 4.18168134932932e-06, "loss": 1.2288, "step": 9446 }, { "epoch": 0.04182124042675638, "grad_norm": 2.5867548414761288, "learning_rate": 4.182124042675639e-06, "loss": 0.8719, "step": 9447 }, { "epoch": 0.04182566736021957, "grad_norm": 3.1200445673137187, "learning_rate": 4.182566736021958e-06, "loss": 0.9673, "step": 9448 }, { "epoch": 0.041830094293682764, "grad_norm": 2.5919180306748735, "learning_rate": 4.183009429368277e-06, "loss": 0.6843, "step": 9449 }, { "epoch": 0.041834521227145954, "grad_norm": 3.5660411690429394, "learning_rate": 4.1834521227145956e-06, "loss": 1.2694, "step": 9450 }, { "epoch": 0.041838948160609145, "grad_norm": 3.049141508553924, "learning_rate": 4.183894816060915e-06, "loss": 0.8794, "step": 9451 }, { "epoch": 0.041843375094072335, "grad_norm": 3.439412002860641, "learning_rate": 4.184337509407234e-06, "loss": 1.0691, "step": 9452 }, { "epoch": 0.041847802027535526, "grad_norm": 3.1021128397833353, "learning_rate": 4.184780202753553e-06, "loss": 0.5721, "step": 9453 }, { "epoch": 0.041852228960998716, "grad_norm": 2.8783956477837997, "learning_rate": 4.185222896099872e-06, "loss": 0.5526, "step": 9454 }, { "epoch": 0.04185665589446191, "grad_norm": 2.997664720174255, "learning_rate": 4.1856655894461916e-06, "loss": 0.5434, "step": 9455 }, { "epoch": 0.0418610828279251, "grad_norm": 2.7238888745210583, "learning_rate": 4.18610828279251e-06, "loss": 0.8194, "step": 9456 }, { "epoch": 0.04186550976138829, "grad_norm": 2.599523454314437, "learning_rate": 4.186550976138829e-06, "loss": 0.6287, "step": 9457 }, { "epoch": 0.04186993669485148, "grad_norm": 2.7605980145931635, "learning_rate": 4.186993669485148e-06, "loss": 0.803, "step": 9458 }, { "epoch": 0.04187436362831467, "grad_norm": 2.8097375282319144, "learning_rate": 4.1874363628314665e-06, "loss": 0.7867, "step": 9459 }, { "epoch": 0.04187879056177786, "grad_norm": 3.0936561572149883, "learning_rate": 4.187879056177786e-06, "loss": 0.9993, "step": 9460 }, { "epoch": 0.04188321749524105, "grad_norm": 3.577458676972953, "learning_rate": 4.188321749524105e-06, "loss": 1.3279, "step": 9461 }, { "epoch": 0.041887644428704233, "grad_norm": 3.085558978840629, "learning_rate": 4.188764442870424e-06, "loss": 0.9868, "step": 9462 }, { "epoch": 0.041892071362167424, "grad_norm": 2.47983476187052, "learning_rate": 4.189207136216743e-06, "loss": 0.6246, "step": 9463 }, { "epoch": 0.041896498295630615, "grad_norm": 2.782869047355682, "learning_rate": 4.1896498295630625e-06, "loss": 0.8438, "step": 9464 }, { "epoch": 0.041900925229093805, "grad_norm": 2.9780804283027016, "learning_rate": 4.190092522909381e-06, "loss": 0.7503, "step": 9465 }, { "epoch": 0.041905352162556996, "grad_norm": 2.4974715063962134, "learning_rate": 4.1905352162557e-06, "loss": 0.6679, "step": 9466 }, { "epoch": 0.041909779096020186, "grad_norm": 3.0177132703063725, "learning_rate": 4.190977909602019e-06, "loss": 0.653, "step": 9467 }, { "epoch": 0.04191420602948338, "grad_norm": 3.1080099805656305, "learning_rate": 4.1914206029483374e-06, "loss": 0.7664, "step": 9468 }, { "epoch": 0.04191863296294657, "grad_norm": 3.2756382771040964, "learning_rate": 4.191863296294657e-06, "loss": 0.5774, "step": 9469 }, { "epoch": 0.04192305989640976, "grad_norm": 2.6558538645171312, "learning_rate": 4.192305989640976e-06, "loss": 0.6876, "step": 9470 }, { "epoch": 0.04192748682987295, "grad_norm": 3.1423153054573216, "learning_rate": 4.192748682987295e-06, "loss": 0.9642, "step": 9471 }, { "epoch": 0.04193191376333614, "grad_norm": 2.807044186246442, "learning_rate": 4.193191376333614e-06, "loss": 1.0296, "step": 9472 }, { "epoch": 0.04193634069679933, "grad_norm": 2.7200818756477343, "learning_rate": 4.1936340696799335e-06, "loss": 0.6333, "step": 9473 }, { "epoch": 0.04194076763026252, "grad_norm": 2.4866222714517288, "learning_rate": 4.194076763026252e-06, "loss": 0.6607, "step": 9474 }, { "epoch": 0.04194519456372571, "grad_norm": 2.72330500058549, "learning_rate": 4.194519456372571e-06, "loss": 0.7756, "step": 9475 }, { "epoch": 0.0419496214971889, "grad_norm": 3.3925789679380687, "learning_rate": 4.19496214971889e-06, "loss": 1.1013, "step": 9476 }, { "epoch": 0.041954048430652084, "grad_norm": 2.594268261554818, "learning_rate": 4.195404843065209e-06, "loss": 0.6457, "step": 9477 }, { "epoch": 0.041958475364115275, "grad_norm": 2.662074920623554, "learning_rate": 4.195847536411528e-06, "loss": 0.426, "step": 9478 }, { "epoch": 0.041962902297578465, "grad_norm": 3.218688361975932, "learning_rate": 4.196290229757847e-06, "loss": 1.0096, "step": 9479 }, { "epoch": 0.041967329231041656, "grad_norm": 2.6095581307926716, "learning_rate": 4.196732923104166e-06, "loss": 0.7055, "step": 9480 }, { "epoch": 0.041971756164504846, "grad_norm": 2.716251818600482, "learning_rate": 4.197175616450485e-06, "loss": 0.7531, "step": 9481 }, { "epoch": 0.04197618309796804, "grad_norm": 3.1809370276985742, "learning_rate": 4.197618309796804e-06, "loss": 0.645, "step": 9482 }, { "epoch": 0.04198061003143123, "grad_norm": 3.123873855397109, "learning_rate": 4.198061003143123e-06, "loss": 0.8657, "step": 9483 }, { "epoch": 0.04198503696489442, "grad_norm": 2.7162370451982785, "learning_rate": 4.198503696489442e-06, "loss": 0.8265, "step": 9484 }, { "epoch": 0.04198946389835761, "grad_norm": 4.400935708379886, "learning_rate": 4.198946389835762e-06, "loss": 1.0759, "step": 9485 }, { "epoch": 0.0419938908318208, "grad_norm": 3.083301449742807, "learning_rate": 4.19938908318208e-06, "loss": 0.7471, "step": 9486 }, { "epoch": 0.04199831776528399, "grad_norm": 2.8317051825512993, "learning_rate": 4.199831776528399e-06, "loss": 0.7946, "step": 9487 }, { "epoch": 0.04200274469874718, "grad_norm": 2.694669020706651, "learning_rate": 4.200274469874718e-06, "loss": 0.5725, "step": 9488 }, { "epoch": 0.04200717163221037, "grad_norm": 3.0161672938987327, "learning_rate": 4.200717163221037e-06, "loss": 1.0301, "step": 9489 }, { "epoch": 0.04201159856567356, "grad_norm": 2.221754901469001, "learning_rate": 4.201159856567356e-06, "loss": 0.4503, "step": 9490 }, { "epoch": 0.04201602549913675, "grad_norm": 2.449409976430809, "learning_rate": 4.201602549913675e-06, "loss": 0.7046, "step": 9491 }, { "epoch": 0.042020452432599935, "grad_norm": 3.201840164792975, "learning_rate": 4.202045243259994e-06, "loss": 0.7628, "step": 9492 }, { "epoch": 0.042024879366063125, "grad_norm": 3.242985019122975, "learning_rate": 4.202487936606313e-06, "loss": 0.8214, "step": 9493 }, { "epoch": 0.042029306299526316, "grad_norm": 2.7055410136049933, "learning_rate": 4.202930629952633e-06, "loss": 0.7334, "step": 9494 }, { "epoch": 0.042033733232989506, "grad_norm": 3.09144706407401, "learning_rate": 4.203373323298951e-06, "loss": 0.9278, "step": 9495 }, { "epoch": 0.0420381601664527, "grad_norm": 2.6453673826742126, "learning_rate": 4.2038160166452705e-06, "loss": 0.6386, "step": 9496 }, { "epoch": 0.04204258709991589, "grad_norm": 2.6822746966744804, "learning_rate": 4.204258709991589e-06, "loss": 0.6795, "step": 9497 }, { "epoch": 0.04204701403337908, "grad_norm": 2.981077666916468, "learning_rate": 4.204701403337908e-06, "loss": 0.7083, "step": 9498 }, { "epoch": 0.04205144096684227, "grad_norm": 3.091497758648295, "learning_rate": 4.205144096684227e-06, "loss": 0.9124, "step": 9499 }, { "epoch": 0.04205586790030546, "grad_norm": 2.7071255358295843, "learning_rate": 4.205586790030546e-06, "loss": 0.6591, "step": 9500 }, { "epoch": 0.04206029483376865, "grad_norm": 2.548302110640406, "learning_rate": 4.206029483376865e-06, "loss": 0.7278, "step": 9501 }, { "epoch": 0.04206472176723184, "grad_norm": 2.5341334027275897, "learning_rate": 4.206472176723184e-06, "loss": 0.8894, "step": 9502 }, { "epoch": 0.04206914870069503, "grad_norm": 2.661450931149777, "learning_rate": 4.2069148700695036e-06, "loss": 0.7737, "step": 9503 }, { "epoch": 0.04207357563415822, "grad_norm": 2.681702406048725, "learning_rate": 4.207357563415822e-06, "loss": 0.7801, "step": 9504 }, { "epoch": 0.04207800256762141, "grad_norm": 3.63690278254715, "learning_rate": 4.2078002567621415e-06, "loss": 0.4309, "step": 9505 }, { "epoch": 0.0420824295010846, "grad_norm": 2.7949066691478066, "learning_rate": 4.208242950108461e-06, "loss": 0.7661, "step": 9506 }, { "epoch": 0.042086856434547786, "grad_norm": 2.664498240544984, "learning_rate": 4.208685643454779e-06, "loss": 0.8787, "step": 9507 }, { "epoch": 0.042091283368010976, "grad_norm": 2.8517767799872056, "learning_rate": 4.209128336801098e-06, "loss": 0.8087, "step": 9508 }, { "epoch": 0.04209571030147417, "grad_norm": 2.5914317644658147, "learning_rate": 4.209571030147417e-06, "loss": 0.7224, "step": 9509 }, { "epoch": 0.04210013723493736, "grad_norm": 2.797870401897411, "learning_rate": 4.210013723493736e-06, "loss": 0.9022, "step": 9510 }, { "epoch": 0.04210456416840055, "grad_norm": 3.201645963021697, "learning_rate": 4.210456416840055e-06, "loss": 1.037, "step": 9511 }, { "epoch": 0.04210899110186374, "grad_norm": 3.2043053907252204, "learning_rate": 4.2108991101863745e-06, "loss": 0.8582, "step": 9512 }, { "epoch": 0.04211341803532693, "grad_norm": 2.5034833478603624, "learning_rate": 4.211341803532693e-06, "loss": 0.5593, "step": 9513 }, { "epoch": 0.04211784496879012, "grad_norm": 2.826512995883643, "learning_rate": 4.211784496879012e-06, "loss": 0.7778, "step": 9514 }, { "epoch": 0.04212227190225331, "grad_norm": 2.314717196449132, "learning_rate": 4.212227190225332e-06, "loss": 0.4829, "step": 9515 }, { "epoch": 0.0421266988357165, "grad_norm": 2.3308705481111773, "learning_rate": 4.21266988357165e-06, "loss": 0.6496, "step": 9516 }, { "epoch": 0.04213112576917969, "grad_norm": 2.5143965454495913, "learning_rate": 4.21311257691797e-06, "loss": 0.7483, "step": 9517 }, { "epoch": 0.04213555270264288, "grad_norm": 2.7361540879873307, "learning_rate": 4.213555270264288e-06, "loss": 0.7663, "step": 9518 }, { "epoch": 0.04213997963610607, "grad_norm": 2.565483261980191, "learning_rate": 4.213997963610607e-06, "loss": 0.8452, "step": 9519 }, { "epoch": 0.04214440656956926, "grad_norm": 2.4867644176351633, "learning_rate": 4.214440656956926e-06, "loss": 0.5809, "step": 9520 }, { "epoch": 0.04214883350303245, "grad_norm": 3.188766812488396, "learning_rate": 4.2148833503032455e-06, "loss": 0.823, "step": 9521 }, { "epoch": 0.042153260436495636, "grad_norm": 2.7736113227071013, "learning_rate": 4.215326043649564e-06, "loss": 0.9947, "step": 9522 }, { "epoch": 0.04215768736995883, "grad_norm": 2.7548672115634973, "learning_rate": 4.215768736995883e-06, "loss": 0.7838, "step": 9523 }, { "epoch": 0.04216211430342202, "grad_norm": 3.090095048921599, "learning_rate": 4.216211430342203e-06, "loss": 0.8944, "step": 9524 }, { "epoch": 0.04216654123688521, "grad_norm": 2.8480605040023157, "learning_rate": 4.216654123688521e-06, "loss": 0.8469, "step": 9525 }, { "epoch": 0.0421709681703484, "grad_norm": 3.5332060535365644, "learning_rate": 4.217096817034841e-06, "loss": 0.859, "step": 9526 }, { "epoch": 0.04217539510381159, "grad_norm": 3.3659171223353765, "learning_rate": 4.217539510381159e-06, "loss": 0.9569, "step": 9527 }, { "epoch": 0.04217982203727478, "grad_norm": 3.0307927211554286, "learning_rate": 4.2179822037274785e-06, "loss": 0.8901, "step": 9528 }, { "epoch": 0.04218424897073797, "grad_norm": 3.2397054518386215, "learning_rate": 4.218424897073797e-06, "loss": 0.9625, "step": 9529 }, { "epoch": 0.04218867590420116, "grad_norm": 2.8304774723794215, "learning_rate": 4.218867590420116e-06, "loss": 0.7602, "step": 9530 }, { "epoch": 0.04219310283766435, "grad_norm": 2.866081164477119, "learning_rate": 4.219310283766435e-06, "loss": 0.7881, "step": 9531 }, { "epoch": 0.04219752977112754, "grad_norm": 2.7506560921834207, "learning_rate": 4.219752977112754e-06, "loss": 0.6607, "step": 9532 }, { "epoch": 0.04220195670459073, "grad_norm": 2.611437484510764, "learning_rate": 4.220195670459074e-06, "loss": 0.6547, "step": 9533 }, { "epoch": 0.04220638363805392, "grad_norm": 3.623205164336191, "learning_rate": 4.220638363805392e-06, "loss": 1.0663, "step": 9534 }, { "epoch": 0.04221081057151711, "grad_norm": 2.754383477862991, "learning_rate": 4.2210810571517116e-06, "loss": 0.6844, "step": 9535 }, { "epoch": 0.0422152375049803, "grad_norm": 2.5897546875002644, "learning_rate": 4.221523750498031e-06, "loss": 0.7309, "step": 9536 }, { "epoch": 0.04221966443844349, "grad_norm": 2.515775556123786, "learning_rate": 4.2219664438443495e-06, "loss": 0.6004, "step": 9537 }, { "epoch": 0.04222409137190668, "grad_norm": 2.560636305955256, "learning_rate": 4.222409137190668e-06, "loss": 0.6886, "step": 9538 }, { "epoch": 0.04222851830536987, "grad_norm": 2.8639263163341697, "learning_rate": 4.222851830536987e-06, "loss": 0.9794, "step": 9539 }, { "epoch": 0.04223294523883306, "grad_norm": 2.519800889562269, "learning_rate": 4.223294523883306e-06, "loss": 0.7331, "step": 9540 }, { "epoch": 0.04223737217229625, "grad_norm": 2.997624728215942, "learning_rate": 4.223737217229625e-06, "loss": 0.8432, "step": 9541 }, { "epoch": 0.04224179910575944, "grad_norm": 4.177679193770784, "learning_rate": 4.224179910575945e-06, "loss": 0.8865, "step": 9542 }, { "epoch": 0.04224622603922263, "grad_norm": 2.4582691170140034, "learning_rate": 4.224622603922263e-06, "loss": 0.439, "step": 9543 }, { "epoch": 0.04225065297268582, "grad_norm": 3.218535041799655, "learning_rate": 4.2250652972685825e-06, "loss": 0.9279, "step": 9544 }, { "epoch": 0.04225507990614901, "grad_norm": 2.9913197292557383, "learning_rate": 4.225507990614902e-06, "loss": 0.9006, "step": 9545 }, { "epoch": 0.0422595068396122, "grad_norm": 3.012070592817895, "learning_rate": 4.22595068396122e-06, "loss": 0.5817, "step": 9546 }, { "epoch": 0.04226393377307539, "grad_norm": 2.550629779525327, "learning_rate": 4.22639337730754e-06, "loss": 0.8119, "step": 9547 }, { "epoch": 0.04226836070653858, "grad_norm": 2.523119338404898, "learning_rate": 4.226836070653858e-06, "loss": 0.5891, "step": 9548 }, { "epoch": 0.04227278764000177, "grad_norm": 3.1247382165623865, "learning_rate": 4.227278764000177e-06, "loss": 0.8605, "step": 9549 }, { "epoch": 0.042277214573464963, "grad_norm": 2.619362092516268, "learning_rate": 4.227721457346496e-06, "loss": 0.7148, "step": 9550 }, { "epoch": 0.042281641506928154, "grad_norm": 3.218427232740605, "learning_rate": 4.2281641506928156e-06, "loss": 1.0928, "step": 9551 }, { "epoch": 0.04228606844039134, "grad_norm": 3.100208341535368, "learning_rate": 4.228606844039134e-06, "loss": 0.9751, "step": 9552 }, { "epoch": 0.04229049537385453, "grad_norm": 2.6127281358659205, "learning_rate": 4.2290495373854535e-06, "loss": 0.6811, "step": 9553 }, { "epoch": 0.04229492230731772, "grad_norm": 3.223355763169369, "learning_rate": 4.229492230731773e-06, "loss": 0.891, "step": 9554 }, { "epoch": 0.04229934924078091, "grad_norm": 2.550037461270266, "learning_rate": 4.229934924078091e-06, "loss": 0.6932, "step": 9555 }, { "epoch": 0.0423037761742441, "grad_norm": 4.290049219479147, "learning_rate": 4.230377617424411e-06, "loss": 1.1439, "step": 9556 }, { "epoch": 0.04230820310770729, "grad_norm": 2.6740129893131193, "learning_rate": 4.230820310770729e-06, "loss": 0.6786, "step": 9557 }, { "epoch": 0.04231263004117048, "grad_norm": 3.1618958744111803, "learning_rate": 4.231263004117049e-06, "loss": 0.6983, "step": 9558 }, { "epoch": 0.04231705697463367, "grad_norm": 2.601908162562674, "learning_rate": 4.231705697463367e-06, "loss": 0.8781, "step": 9559 }, { "epoch": 0.04232148390809686, "grad_norm": 2.7907967723990366, "learning_rate": 4.2321483908096865e-06, "loss": 0.5696, "step": 9560 }, { "epoch": 0.04232591084156005, "grad_norm": 3.272472291056926, "learning_rate": 4.232591084156005e-06, "loss": 1.1826, "step": 9561 }, { "epoch": 0.04233033777502324, "grad_norm": 2.308474105305111, "learning_rate": 4.233033777502324e-06, "loss": 0.7795, "step": 9562 }, { "epoch": 0.04233476470848643, "grad_norm": 2.9265854646221197, "learning_rate": 4.233476470848644e-06, "loss": 0.6898, "step": 9563 }, { "epoch": 0.042339191641949624, "grad_norm": 2.8645713351377347, "learning_rate": 4.233919164194962e-06, "loss": 0.9087, "step": 9564 }, { "epoch": 0.042343618575412814, "grad_norm": 3.0048928298077193, "learning_rate": 4.234361857541282e-06, "loss": 0.7158, "step": 9565 }, { "epoch": 0.042348045508876005, "grad_norm": 2.6798188076097937, "learning_rate": 4.234804550887601e-06, "loss": 0.7839, "step": 9566 }, { "epoch": 0.04235247244233919, "grad_norm": 2.917551157954193, "learning_rate": 4.2352472442339196e-06, "loss": 0.8275, "step": 9567 }, { "epoch": 0.04235689937580238, "grad_norm": 2.5086576362667734, "learning_rate": 4.235689937580238e-06, "loss": 0.557, "step": 9568 }, { "epoch": 0.04236132630926557, "grad_norm": 2.7470891820527528, "learning_rate": 4.2361326309265575e-06, "loss": 0.4602, "step": 9569 }, { "epoch": 0.04236575324272876, "grad_norm": 2.302069553205547, "learning_rate": 4.236575324272876e-06, "loss": 0.4136, "step": 9570 }, { "epoch": 0.04237018017619195, "grad_norm": 2.4743882120257505, "learning_rate": 4.237018017619195e-06, "loss": 0.6769, "step": 9571 }, { "epoch": 0.04237460710965514, "grad_norm": 3.154727961923202, "learning_rate": 4.237460710965515e-06, "loss": 0.958, "step": 9572 }, { "epoch": 0.04237903404311833, "grad_norm": 2.877526956967524, "learning_rate": 4.237903404311833e-06, "loss": 0.3869, "step": 9573 }, { "epoch": 0.04238346097658152, "grad_norm": 2.806926327047993, "learning_rate": 4.238346097658153e-06, "loss": 0.8672, "step": 9574 }, { "epoch": 0.04238788791004471, "grad_norm": 3.051426121817871, "learning_rate": 4.238788791004472e-06, "loss": 1.0274, "step": 9575 }, { "epoch": 0.0423923148435079, "grad_norm": 2.9170073461674964, "learning_rate": 4.2392314843507905e-06, "loss": 0.8052, "step": 9576 }, { "epoch": 0.04239674177697109, "grad_norm": 2.3536019649613853, "learning_rate": 4.23967417769711e-06, "loss": 0.4997, "step": 9577 }, { "epoch": 0.042401168710434284, "grad_norm": 2.525630748510692, "learning_rate": 4.240116871043428e-06, "loss": 0.8567, "step": 9578 }, { "epoch": 0.042405595643897474, "grad_norm": 2.572504201050239, "learning_rate": 4.240559564389748e-06, "loss": 0.7711, "step": 9579 }, { "epoch": 0.042410022577360665, "grad_norm": 2.7414078210740627, "learning_rate": 4.241002257736066e-06, "loss": 0.6603, "step": 9580 }, { "epoch": 0.042414449510823855, "grad_norm": 3.1786979717290897, "learning_rate": 4.241444951082386e-06, "loss": 0.9829, "step": 9581 }, { "epoch": 0.04241887644428704, "grad_norm": 2.4903940802488544, "learning_rate": 4.241887644428704e-06, "loss": 0.6872, "step": 9582 }, { "epoch": 0.04242330337775023, "grad_norm": 2.9527440982036315, "learning_rate": 4.2423303377750236e-06, "loss": 0.9772, "step": 9583 }, { "epoch": 0.04242773031121342, "grad_norm": 2.935288079197814, "learning_rate": 4.242773031121343e-06, "loss": 0.7294, "step": 9584 }, { "epoch": 0.04243215724467661, "grad_norm": 3.8236554680712187, "learning_rate": 4.2432157244676615e-06, "loss": 1.1138, "step": 9585 }, { "epoch": 0.0424365841781398, "grad_norm": 2.7610377716810324, "learning_rate": 4.243658417813981e-06, "loss": 0.7969, "step": 9586 }, { "epoch": 0.04244101111160299, "grad_norm": 2.5340018306695433, "learning_rate": 4.244101111160299e-06, "loss": 0.6721, "step": 9587 }, { "epoch": 0.04244543804506618, "grad_norm": 3.087892345370982, "learning_rate": 4.244543804506619e-06, "loss": 0.8346, "step": 9588 }, { "epoch": 0.04244986497852937, "grad_norm": 2.8567272689741845, "learning_rate": 4.244986497852937e-06, "loss": 0.7314, "step": 9589 }, { "epoch": 0.04245429191199256, "grad_norm": 2.788981143258574, "learning_rate": 4.245429191199257e-06, "loss": 0.7819, "step": 9590 }, { "epoch": 0.042458718845455753, "grad_norm": 2.5403090439086258, "learning_rate": 4.245871884545575e-06, "loss": 0.705, "step": 9591 }, { "epoch": 0.042463145778918944, "grad_norm": 2.7736759892107967, "learning_rate": 4.2463145778918945e-06, "loss": 0.671, "step": 9592 }, { "epoch": 0.042467572712382134, "grad_norm": 2.291857114517907, "learning_rate": 4.246757271238214e-06, "loss": 0.7394, "step": 9593 }, { "epoch": 0.042471999645845325, "grad_norm": 2.504486333289536, "learning_rate": 4.247199964584532e-06, "loss": 0.6813, "step": 9594 }, { "epoch": 0.042476426579308516, "grad_norm": 2.4139334528584357, "learning_rate": 4.247642657930852e-06, "loss": 0.6254, "step": 9595 }, { "epoch": 0.042480853512771706, "grad_norm": 3.21048156376318, "learning_rate": 4.248085351277171e-06, "loss": 0.7849, "step": 9596 }, { "epoch": 0.042485280446234897, "grad_norm": 2.519903678635381, "learning_rate": 4.24852804462349e-06, "loss": 0.5213, "step": 9597 }, { "epoch": 0.04248970737969808, "grad_norm": 2.4299514193042264, "learning_rate": 4.248970737969809e-06, "loss": 0.639, "step": 9598 }, { "epoch": 0.04249413431316127, "grad_norm": 3.309849296025581, "learning_rate": 4.2494134313161276e-06, "loss": 1.1722, "step": 9599 }, { "epoch": 0.04249856124662446, "grad_norm": 2.2852192580338664, "learning_rate": 4.249856124662446e-06, "loss": 0.5704, "step": 9600 }, { "epoch": 0.04250298818008765, "grad_norm": 3.8301836607859943, "learning_rate": 4.2502988180087655e-06, "loss": 1.0822, "step": 9601 }, { "epoch": 0.04250741511355084, "grad_norm": 2.445352664899533, "learning_rate": 4.250741511355085e-06, "loss": 0.5909, "step": 9602 }, { "epoch": 0.04251184204701403, "grad_norm": 2.8516092156101926, "learning_rate": 4.251184204701403e-06, "loss": 0.6992, "step": 9603 }, { "epoch": 0.04251626898047722, "grad_norm": 4.209675268023627, "learning_rate": 4.251626898047723e-06, "loss": 1.3938, "step": 9604 }, { "epoch": 0.042520695913940414, "grad_norm": 2.4363993361175664, "learning_rate": 4.252069591394042e-06, "loss": 0.736, "step": 9605 }, { "epoch": 0.042525122847403604, "grad_norm": 2.73148276337311, "learning_rate": 4.252512284740361e-06, "loss": 0.669, "step": 9606 }, { "epoch": 0.042529549780866795, "grad_norm": 2.7552328431789816, "learning_rate": 4.25295497808668e-06, "loss": 0.7398, "step": 9607 }, { "epoch": 0.042533976714329985, "grad_norm": 2.409536432836972, "learning_rate": 4.2533976714329985e-06, "loss": 0.5138, "step": 9608 }, { "epoch": 0.042538403647793176, "grad_norm": 3.0890147648437716, "learning_rate": 4.253840364779318e-06, "loss": 0.8165, "step": 9609 }, { "epoch": 0.042542830581256366, "grad_norm": 2.9192297582738673, "learning_rate": 4.254283058125636e-06, "loss": 0.6172, "step": 9610 }, { "epoch": 0.04254725751471956, "grad_norm": 2.8577963442639285, "learning_rate": 4.254725751471956e-06, "loss": 0.8314, "step": 9611 }, { "epoch": 0.04255168444818275, "grad_norm": 2.972670941328859, "learning_rate": 4.255168444818274e-06, "loss": 0.6133, "step": 9612 }, { "epoch": 0.04255611138164593, "grad_norm": 3.096580356943986, "learning_rate": 4.255611138164594e-06, "loss": 0.9258, "step": 9613 }, { "epoch": 0.04256053831510912, "grad_norm": 3.4730523372810453, "learning_rate": 4.256053831510913e-06, "loss": 0.9683, "step": 9614 }, { "epoch": 0.04256496524857231, "grad_norm": 2.6822264524472144, "learning_rate": 4.2564965248572316e-06, "loss": 0.7379, "step": 9615 }, { "epoch": 0.0425693921820355, "grad_norm": 2.5310387762567443, "learning_rate": 4.256939218203551e-06, "loss": 0.7149, "step": 9616 }, { "epoch": 0.04257381911549869, "grad_norm": 2.589003494742552, "learning_rate": 4.25738191154987e-06, "loss": 0.6854, "step": 9617 }, { "epoch": 0.04257824604896188, "grad_norm": 2.6787414583569564, "learning_rate": 4.257824604896189e-06, "loss": 0.6506, "step": 9618 }, { "epoch": 0.042582672982425074, "grad_norm": 3.001257840603278, "learning_rate": 4.258267298242507e-06, "loss": 0.718, "step": 9619 }, { "epoch": 0.042587099915888264, "grad_norm": 2.9436560646674126, "learning_rate": 4.258709991588827e-06, "loss": 0.7212, "step": 9620 }, { "epoch": 0.042591526849351455, "grad_norm": 2.6047134152178315, "learning_rate": 4.259152684935145e-06, "loss": 0.5606, "step": 9621 }, { "epoch": 0.042595953782814645, "grad_norm": 3.480445146892542, "learning_rate": 4.259595378281465e-06, "loss": 0.8862, "step": 9622 }, { "epoch": 0.042600380716277836, "grad_norm": 3.597711159443635, "learning_rate": 4.260038071627784e-06, "loss": 0.8827, "step": 9623 }, { "epoch": 0.042604807649741026, "grad_norm": 3.689880829406846, "learning_rate": 4.2604807649741025e-06, "loss": 1.0342, "step": 9624 }, { "epoch": 0.04260923458320422, "grad_norm": 3.292356029110218, "learning_rate": 4.260923458320422e-06, "loss": 0.9266, "step": 9625 }, { "epoch": 0.04261366151666741, "grad_norm": 3.48944175808554, "learning_rate": 4.261366151666741e-06, "loss": 1.253, "step": 9626 }, { "epoch": 0.0426180884501306, "grad_norm": 3.1184762986101635, "learning_rate": 4.26180884501306e-06, "loss": 0.8642, "step": 9627 }, { "epoch": 0.04262251538359378, "grad_norm": 2.613075667400983, "learning_rate": 4.262251538359379e-06, "loss": 0.6508, "step": 9628 }, { "epoch": 0.04262694231705697, "grad_norm": 2.795511192797644, "learning_rate": 4.262694231705698e-06, "loss": 0.8259, "step": 9629 }, { "epoch": 0.04263136925052016, "grad_norm": 2.9972227738279047, "learning_rate": 4.263136925052016e-06, "loss": 0.7643, "step": 9630 }, { "epoch": 0.04263579618398335, "grad_norm": 3.37369591422778, "learning_rate": 4.2635796183983356e-06, "loss": 1.1177, "step": 9631 }, { "epoch": 0.042640223117446543, "grad_norm": 2.892234940710594, "learning_rate": 4.264022311744655e-06, "loss": 0.8304, "step": 9632 }, { "epoch": 0.042644650050909734, "grad_norm": 3.1084723726052714, "learning_rate": 4.2644650050909735e-06, "loss": 0.9347, "step": 9633 }, { "epoch": 0.042649076984372924, "grad_norm": 3.3391538798827627, "learning_rate": 4.264907698437293e-06, "loss": 1.0778, "step": 9634 }, { "epoch": 0.042653503917836115, "grad_norm": 3.2613849472010834, "learning_rate": 4.265350391783612e-06, "loss": 0.8608, "step": 9635 }, { "epoch": 0.042657930851299306, "grad_norm": 3.6981084601712744, "learning_rate": 4.265793085129931e-06, "loss": 1.1472, "step": 9636 }, { "epoch": 0.042662357784762496, "grad_norm": 3.2665682379416707, "learning_rate": 4.26623577847625e-06, "loss": 0.6636, "step": 9637 }, { "epoch": 0.042666784718225687, "grad_norm": 2.5278323216279346, "learning_rate": 4.266678471822569e-06, "loss": 0.7793, "step": 9638 }, { "epoch": 0.04267121165168888, "grad_norm": 2.5211195472246866, "learning_rate": 4.267121165168888e-06, "loss": 0.6888, "step": 9639 }, { "epoch": 0.04267563858515207, "grad_norm": 2.459866434183107, "learning_rate": 4.2675638585152065e-06, "loss": 0.8079, "step": 9640 }, { "epoch": 0.04268006551861526, "grad_norm": 2.6635914853839657, "learning_rate": 4.268006551861526e-06, "loss": 0.7682, "step": 9641 }, { "epoch": 0.04268449245207845, "grad_norm": 2.3190328620775755, "learning_rate": 4.268449245207844e-06, "loss": 0.6485, "step": 9642 }, { "epoch": 0.04268891938554163, "grad_norm": 2.8043660286566072, "learning_rate": 4.268891938554164e-06, "loss": 0.5624, "step": 9643 }, { "epoch": 0.04269334631900482, "grad_norm": 3.2407389469087886, "learning_rate": 4.269334631900483e-06, "loss": 0.9721, "step": 9644 }, { "epoch": 0.04269777325246801, "grad_norm": 3.0906834274037207, "learning_rate": 4.269777325246802e-06, "loss": 0.7211, "step": 9645 }, { "epoch": 0.042702200185931204, "grad_norm": 3.252637687176146, "learning_rate": 4.270220018593121e-06, "loss": 1.1687, "step": 9646 }, { "epoch": 0.042706627119394394, "grad_norm": 3.255123078871617, "learning_rate": 4.27066271193944e-06, "loss": 0.9157, "step": 9647 }, { "epoch": 0.042711054052857585, "grad_norm": 2.668513338626873, "learning_rate": 4.271105405285759e-06, "loss": 0.5909, "step": 9648 }, { "epoch": 0.042715480986320775, "grad_norm": 2.6303036996213596, "learning_rate": 4.2715480986320775e-06, "loss": 0.6387, "step": 9649 }, { "epoch": 0.042719907919783966, "grad_norm": 2.949579233242236, "learning_rate": 4.271990791978397e-06, "loss": 0.5252, "step": 9650 }, { "epoch": 0.042724334853247156, "grad_norm": 2.579638661322768, "learning_rate": 4.272433485324715e-06, "loss": 0.8043, "step": 9651 }, { "epoch": 0.04272876178671035, "grad_norm": 2.8230802686525576, "learning_rate": 4.272876178671035e-06, "loss": 0.809, "step": 9652 }, { "epoch": 0.04273318872017354, "grad_norm": 2.7094149320942558, "learning_rate": 4.273318872017354e-06, "loss": 0.7358, "step": 9653 }, { "epoch": 0.04273761565363673, "grad_norm": 2.796617049790207, "learning_rate": 4.273761565363673e-06, "loss": 0.6435, "step": 9654 }, { "epoch": 0.04274204258709992, "grad_norm": 2.92134626687003, "learning_rate": 4.274204258709992e-06, "loss": 0.5443, "step": 9655 }, { "epoch": 0.04274646952056311, "grad_norm": 3.7125284340731306, "learning_rate": 4.274646952056311e-06, "loss": 1.2904, "step": 9656 }, { "epoch": 0.0427508964540263, "grad_norm": 3.1205473973290827, "learning_rate": 4.27508964540263e-06, "loss": 1.0117, "step": 9657 }, { "epoch": 0.04275532338748948, "grad_norm": 2.749855072020798, "learning_rate": 4.275532338748949e-06, "loss": 0.7321, "step": 9658 }, { "epoch": 0.04275975032095267, "grad_norm": 3.026677042341787, "learning_rate": 4.275975032095268e-06, "loss": 0.9462, "step": 9659 }, { "epoch": 0.042764177254415864, "grad_norm": 2.3692101062649242, "learning_rate": 4.276417725441587e-06, "loss": 0.4641, "step": 9660 }, { "epoch": 0.042768604187879054, "grad_norm": 4.605155840303451, "learning_rate": 4.276860418787906e-06, "loss": 0.8718, "step": 9661 }, { "epoch": 0.042773031121342245, "grad_norm": 2.4332604748664073, "learning_rate": 4.277303112134225e-06, "loss": 0.6913, "step": 9662 }, { "epoch": 0.042777458054805435, "grad_norm": 2.721145458668921, "learning_rate": 4.2777458054805436e-06, "loss": 0.8558, "step": 9663 }, { "epoch": 0.042781884988268626, "grad_norm": 3.7583284116275117, "learning_rate": 4.278188498826863e-06, "loss": 1.2617, "step": 9664 }, { "epoch": 0.042786311921731816, "grad_norm": 3.294186769431652, "learning_rate": 4.278631192173182e-06, "loss": 0.9575, "step": 9665 }, { "epoch": 0.04279073885519501, "grad_norm": 2.4718574421058315, "learning_rate": 4.279073885519501e-06, "loss": 0.6944, "step": 9666 }, { "epoch": 0.0427951657886582, "grad_norm": 2.5138945508304302, "learning_rate": 4.27951657886582e-06, "loss": 0.8518, "step": 9667 }, { "epoch": 0.04279959272212139, "grad_norm": 3.296573818092199, "learning_rate": 4.279959272212139e-06, "loss": 1.1011, "step": 9668 }, { "epoch": 0.04280401965558458, "grad_norm": 3.518174740604332, "learning_rate": 4.280401965558458e-06, "loss": 0.9345, "step": 9669 }, { "epoch": 0.04280844658904777, "grad_norm": 2.6864527025485834, "learning_rate": 4.280844658904777e-06, "loss": 0.6176, "step": 9670 }, { "epoch": 0.04281287352251096, "grad_norm": 2.615702283523351, "learning_rate": 4.281287352251096e-06, "loss": 0.5134, "step": 9671 }, { "epoch": 0.04281730045597415, "grad_norm": 3.184149803830969, "learning_rate": 4.2817300455974145e-06, "loss": 0.9825, "step": 9672 }, { "epoch": 0.042821727389437333, "grad_norm": 2.7424271058382184, "learning_rate": 4.282172738943734e-06, "loss": 0.6968, "step": 9673 }, { "epoch": 0.042826154322900524, "grad_norm": 2.537097827449038, "learning_rate": 4.282615432290053e-06, "loss": 0.6957, "step": 9674 }, { "epoch": 0.042830581256363714, "grad_norm": 2.978938657742035, "learning_rate": 4.283058125636372e-06, "loss": 0.9101, "step": 9675 }, { "epoch": 0.042835008189826905, "grad_norm": 2.543011985091131, "learning_rate": 4.283500818982691e-06, "loss": 0.778, "step": 9676 }, { "epoch": 0.042839435123290096, "grad_norm": 3.661413768845855, "learning_rate": 4.2839435123290105e-06, "loss": 1.0075, "step": 9677 }, { "epoch": 0.042843862056753286, "grad_norm": 2.684499221903825, "learning_rate": 4.284386205675329e-06, "loss": 0.7134, "step": 9678 }, { "epoch": 0.042848288990216477, "grad_norm": 2.568028850038345, "learning_rate": 4.284828899021648e-06, "loss": 0.7179, "step": 9679 }, { "epoch": 0.04285271592367967, "grad_norm": 2.8882851965585954, "learning_rate": 4.285271592367967e-06, "loss": 0.8562, "step": 9680 }, { "epoch": 0.04285714285714286, "grad_norm": 2.7491779606994675, "learning_rate": 4.2857142857142855e-06, "loss": 0.8254, "step": 9681 }, { "epoch": 0.04286156979060605, "grad_norm": 2.8064976057206725, "learning_rate": 4.286156979060605e-06, "loss": 0.7986, "step": 9682 }, { "epoch": 0.04286599672406924, "grad_norm": 2.7268657955910216, "learning_rate": 4.286599672406924e-06, "loss": 0.9718, "step": 9683 }, { "epoch": 0.04287042365753243, "grad_norm": 2.449808118706662, "learning_rate": 4.287042365753243e-06, "loss": 0.4845, "step": 9684 }, { "epoch": 0.04287485059099562, "grad_norm": 2.812164455813729, "learning_rate": 4.287485059099562e-06, "loss": 0.8905, "step": 9685 }, { "epoch": 0.04287927752445881, "grad_norm": 2.2637197605003516, "learning_rate": 4.2879277524458815e-06, "loss": 0.4854, "step": 9686 }, { "epoch": 0.042883704457922, "grad_norm": 2.3365704583090103, "learning_rate": 4.2883704457922e-06, "loss": 0.7345, "step": 9687 }, { "epoch": 0.042888131391385184, "grad_norm": 3.305772176786561, "learning_rate": 4.288813139138519e-06, "loss": 0.6415, "step": 9688 }, { "epoch": 0.042892558324848375, "grad_norm": 3.0409943750741815, "learning_rate": 4.289255832484838e-06, "loss": 0.7838, "step": 9689 }, { "epoch": 0.042896985258311565, "grad_norm": 3.007164531723323, "learning_rate": 4.289698525831157e-06, "loss": 0.6825, "step": 9690 }, { "epoch": 0.042901412191774756, "grad_norm": 4.067361085197001, "learning_rate": 4.290141219177476e-06, "loss": 1.0241, "step": 9691 }, { "epoch": 0.042905839125237946, "grad_norm": 2.9261569789849067, "learning_rate": 4.290583912523795e-06, "loss": 0.5715, "step": 9692 }, { "epoch": 0.04291026605870114, "grad_norm": 3.1619570751406068, "learning_rate": 4.291026605870114e-06, "loss": 0.5442, "step": 9693 }, { "epoch": 0.04291469299216433, "grad_norm": 3.551981225423866, "learning_rate": 4.291469299216433e-06, "loss": 1.1001, "step": 9694 }, { "epoch": 0.04291911992562752, "grad_norm": 2.6415679271383703, "learning_rate": 4.291911992562752e-06, "loss": 0.6534, "step": 9695 }, { "epoch": 0.04292354685909071, "grad_norm": 2.6839029396576244, "learning_rate": 4.292354685909071e-06, "loss": 0.8241, "step": 9696 }, { "epoch": 0.0429279737925539, "grad_norm": 3.0032960218828957, "learning_rate": 4.29279737925539e-06, "loss": 0.6704, "step": 9697 }, { "epoch": 0.04293240072601709, "grad_norm": 3.3003151205424737, "learning_rate": 4.29324007260171e-06, "loss": 0.6455, "step": 9698 }, { "epoch": 0.04293682765948028, "grad_norm": 2.852320453574078, "learning_rate": 4.293682765948028e-06, "loss": 0.5699, "step": 9699 }, { "epoch": 0.04294125459294347, "grad_norm": 2.4082253523140267, "learning_rate": 4.294125459294347e-06, "loss": 0.598, "step": 9700 }, { "epoch": 0.04294568152640666, "grad_norm": 2.4543554784186665, "learning_rate": 4.294568152640666e-06, "loss": 0.7986, "step": 9701 }, { "epoch": 0.04295010845986985, "grad_norm": 2.4337695192376065, "learning_rate": 4.295010845986985e-06, "loss": 0.6304, "step": 9702 }, { "epoch": 0.042954535393333035, "grad_norm": 2.607620165736351, "learning_rate": 4.295453539333304e-06, "loss": 0.8304, "step": 9703 }, { "epoch": 0.042958962326796225, "grad_norm": 3.765227638027119, "learning_rate": 4.295896232679623e-06, "loss": 0.7849, "step": 9704 }, { "epoch": 0.042963389260259416, "grad_norm": 2.714400752517171, "learning_rate": 4.296338926025942e-06, "loss": 0.7751, "step": 9705 }, { "epoch": 0.042967816193722606, "grad_norm": 2.7420137142019354, "learning_rate": 4.296781619372261e-06, "loss": 0.6678, "step": 9706 }, { "epoch": 0.0429722431271858, "grad_norm": 2.4405874335154163, "learning_rate": 4.297224312718581e-06, "loss": 0.6889, "step": 9707 }, { "epoch": 0.04297667006064899, "grad_norm": 2.493855246847291, "learning_rate": 4.297667006064899e-06, "loss": 0.9904, "step": 9708 }, { "epoch": 0.04298109699411218, "grad_norm": 2.6872462087477396, "learning_rate": 4.2981096994112185e-06, "loss": 0.9165, "step": 9709 }, { "epoch": 0.04298552392757537, "grad_norm": 3.070443381277907, "learning_rate": 4.298552392757537e-06, "loss": 0.8347, "step": 9710 }, { "epoch": 0.04298995086103856, "grad_norm": 2.6229107117821244, "learning_rate": 4.2989950861038556e-06, "loss": 0.8336, "step": 9711 }, { "epoch": 0.04299437779450175, "grad_norm": 2.7371021916971316, "learning_rate": 4.299437779450175e-06, "loss": 0.4134, "step": 9712 }, { "epoch": 0.04299880472796494, "grad_norm": 2.4412658923788935, "learning_rate": 4.299880472796494e-06, "loss": 0.6895, "step": 9713 }, { "epoch": 0.04300323166142813, "grad_norm": 3.1891909541494523, "learning_rate": 4.300323166142813e-06, "loss": 0.9765, "step": 9714 }, { "epoch": 0.04300765859489132, "grad_norm": 2.8842697846537027, "learning_rate": 4.300765859489132e-06, "loss": 0.7217, "step": 9715 }, { "epoch": 0.04301208552835451, "grad_norm": 2.5979669192431563, "learning_rate": 4.3012085528354516e-06, "loss": 0.5726, "step": 9716 }, { "epoch": 0.0430165124618177, "grad_norm": 2.6886181419675865, "learning_rate": 4.30165124618177e-06, "loss": 0.7302, "step": 9717 }, { "epoch": 0.043020939395280886, "grad_norm": 2.5839318173324006, "learning_rate": 4.3020939395280895e-06, "loss": 0.7738, "step": 9718 }, { "epoch": 0.043025366328744076, "grad_norm": 4.257572939842907, "learning_rate": 4.302536632874408e-06, "loss": 0.7478, "step": 9719 }, { "epoch": 0.043029793262207267, "grad_norm": 2.8624172452316623, "learning_rate": 4.302979326220727e-06, "loss": 0.7658, "step": 9720 }, { "epoch": 0.04303422019567046, "grad_norm": 2.1399922078329845, "learning_rate": 4.303422019567046e-06, "loss": 0.7186, "step": 9721 }, { "epoch": 0.04303864712913365, "grad_norm": 2.8232981698045667, "learning_rate": 4.303864712913365e-06, "loss": 0.8913, "step": 9722 }, { "epoch": 0.04304307406259684, "grad_norm": 2.7845720754431884, "learning_rate": 4.304307406259684e-06, "loss": 0.8297, "step": 9723 }, { "epoch": 0.04304750099606003, "grad_norm": 2.6246298768239344, "learning_rate": 4.304750099606003e-06, "loss": 0.7364, "step": 9724 }, { "epoch": 0.04305192792952322, "grad_norm": 2.459841177379572, "learning_rate": 4.3051927929523225e-06, "loss": 0.7514, "step": 9725 }, { "epoch": 0.04305635486298641, "grad_norm": 3.6202968640087114, "learning_rate": 4.305635486298641e-06, "loss": 1.0705, "step": 9726 }, { "epoch": 0.0430607817964496, "grad_norm": 2.887633693611503, "learning_rate": 4.30607817964496e-06, "loss": 0.6399, "step": 9727 }, { "epoch": 0.04306520872991279, "grad_norm": 2.6837460192788427, "learning_rate": 4.30652087299128e-06, "loss": 0.5967, "step": 9728 }, { "epoch": 0.04306963566337598, "grad_norm": 5.25139874508228, "learning_rate": 4.306963566337598e-06, "loss": 1.2746, "step": 9729 }, { "epoch": 0.04307406259683917, "grad_norm": 2.769847622448137, "learning_rate": 4.307406259683917e-06, "loss": 0.7208, "step": 9730 }, { "epoch": 0.04307848953030236, "grad_norm": 2.3607050237456724, "learning_rate": 4.307848953030236e-06, "loss": 0.7478, "step": 9731 }, { "epoch": 0.04308291646376555, "grad_norm": 3.0383673046634736, "learning_rate": 4.308291646376555e-06, "loss": 0.7046, "step": 9732 }, { "epoch": 0.043087343397228736, "grad_norm": 4.271134394423067, "learning_rate": 4.308734339722874e-06, "loss": 0.9473, "step": 9733 }, { "epoch": 0.04309177033069193, "grad_norm": 2.881733166525457, "learning_rate": 4.3091770330691935e-06, "loss": 0.9214, "step": 9734 }, { "epoch": 0.04309619726415512, "grad_norm": 2.552670894577333, "learning_rate": 4.309619726415512e-06, "loss": 0.5557, "step": 9735 }, { "epoch": 0.04310062419761831, "grad_norm": 3.037314172203193, "learning_rate": 4.310062419761831e-06, "loss": 0.8853, "step": 9736 }, { "epoch": 0.0431050511310815, "grad_norm": 3.099375310058438, "learning_rate": 4.310505113108151e-06, "loss": 1.0352, "step": 9737 }, { "epoch": 0.04310947806454469, "grad_norm": 2.428334908915358, "learning_rate": 4.310947806454469e-06, "loss": 0.7161, "step": 9738 }, { "epoch": 0.04311390499800788, "grad_norm": 3.328024187380105, "learning_rate": 4.311390499800789e-06, "loss": 1.0308, "step": 9739 }, { "epoch": 0.04311833193147107, "grad_norm": 2.6819165357738264, "learning_rate": 4.311833193147107e-06, "loss": 0.7611, "step": 9740 }, { "epoch": 0.04312275886493426, "grad_norm": 2.3969089604813183, "learning_rate": 4.3122758864934265e-06, "loss": 0.4745, "step": 9741 }, { "epoch": 0.04312718579839745, "grad_norm": 2.798136507401253, "learning_rate": 4.312718579839745e-06, "loss": 0.7359, "step": 9742 }, { "epoch": 0.04313161273186064, "grad_norm": 2.5348412571633117, "learning_rate": 4.313161273186064e-06, "loss": 0.6272, "step": 9743 }, { "epoch": 0.04313603966532383, "grad_norm": 2.3013195578763233, "learning_rate": 4.313603966532383e-06, "loss": 0.5135, "step": 9744 }, { "epoch": 0.04314046659878702, "grad_norm": 2.4020945880370195, "learning_rate": 4.314046659878702e-06, "loss": 0.7188, "step": 9745 }, { "epoch": 0.04314489353225021, "grad_norm": 3.2240622013256948, "learning_rate": 4.314489353225022e-06, "loss": 0.8726, "step": 9746 }, { "epoch": 0.0431493204657134, "grad_norm": 2.7595912104482783, "learning_rate": 4.31493204657134e-06, "loss": 0.8563, "step": 9747 }, { "epoch": 0.043153747399176594, "grad_norm": 3.046606710674723, "learning_rate": 4.3153747399176596e-06, "loss": 0.6734, "step": 9748 }, { "epoch": 0.04315817433263978, "grad_norm": 2.7638789447482535, "learning_rate": 4.315817433263978e-06, "loss": 0.7278, "step": 9749 }, { "epoch": 0.04316260126610297, "grad_norm": 2.8793297125292523, "learning_rate": 4.3162601266102975e-06, "loss": 0.6428, "step": 9750 }, { "epoch": 0.04316702819956616, "grad_norm": 2.3994637635559672, "learning_rate": 4.316702819956616e-06, "loss": 0.8415, "step": 9751 }, { "epoch": 0.04317145513302935, "grad_norm": 2.756075575807028, "learning_rate": 4.317145513302935e-06, "loss": 0.5656, "step": 9752 }, { "epoch": 0.04317588206649254, "grad_norm": 2.5278797538195783, "learning_rate": 4.317588206649254e-06, "loss": 0.7155, "step": 9753 }, { "epoch": 0.04318030899995573, "grad_norm": 2.7253868473278926, "learning_rate": 4.318030899995573e-06, "loss": 0.5977, "step": 9754 }, { "epoch": 0.04318473593341892, "grad_norm": 2.6437073248099257, "learning_rate": 4.318473593341893e-06, "loss": 0.8666, "step": 9755 }, { "epoch": 0.04318916286688211, "grad_norm": 3.4782060284346885, "learning_rate": 4.318916286688211e-06, "loss": 1.0242, "step": 9756 }, { "epoch": 0.0431935898003453, "grad_norm": 3.2365364008147677, "learning_rate": 4.3193589800345305e-06, "loss": 0.7415, "step": 9757 }, { "epoch": 0.04319801673380849, "grad_norm": 2.670970269992796, "learning_rate": 4.31980167338085e-06, "loss": 0.5062, "step": 9758 }, { "epoch": 0.04320244366727168, "grad_norm": 2.839555387861125, "learning_rate": 4.320244366727168e-06, "loss": 0.7884, "step": 9759 }, { "epoch": 0.04320687060073487, "grad_norm": 2.7464832671642117, "learning_rate": 4.320687060073488e-06, "loss": 0.85, "step": 9760 }, { "epoch": 0.04321129753419806, "grad_norm": 2.8301741062264916, "learning_rate": 4.321129753419806e-06, "loss": 0.9207, "step": 9761 }, { "epoch": 0.043215724467661254, "grad_norm": 2.800529011801541, "learning_rate": 4.321572446766125e-06, "loss": 0.8106, "step": 9762 }, { "epoch": 0.043220151401124444, "grad_norm": 2.2135116466186253, "learning_rate": 4.322015140112444e-06, "loss": 0.5052, "step": 9763 }, { "epoch": 0.04322457833458763, "grad_norm": 2.767448249736264, "learning_rate": 4.3224578334587636e-06, "loss": 0.6736, "step": 9764 }, { "epoch": 0.04322900526805082, "grad_norm": 2.8917867877102905, "learning_rate": 4.322900526805082e-06, "loss": 0.7917, "step": 9765 }, { "epoch": 0.04323343220151401, "grad_norm": 2.8954232424564132, "learning_rate": 4.3233432201514015e-06, "loss": 0.721, "step": 9766 }, { "epoch": 0.0432378591349772, "grad_norm": 2.5040557648394075, "learning_rate": 4.323785913497721e-06, "loss": 0.6509, "step": 9767 }, { "epoch": 0.04324228606844039, "grad_norm": 3.908463555754662, "learning_rate": 4.324228606844039e-06, "loss": 1.0625, "step": 9768 }, { "epoch": 0.04324671300190358, "grad_norm": 2.768734754021484, "learning_rate": 4.324671300190359e-06, "loss": 0.7061, "step": 9769 }, { "epoch": 0.04325113993536677, "grad_norm": 3.2650265944108896, "learning_rate": 4.325113993536677e-06, "loss": 1.2591, "step": 9770 }, { "epoch": 0.04325556686882996, "grad_norm": 4.18111656447988, "learning_rate": 4.325556686882997e-06, "loss": 0.9174, "step": 9771 }, { "epoch": 0.04325999380229315, "grad_norm": 3.2567174736308337, "learning_rate": 4.325999380229315e-06, "loss": 0.6278, "step": 9772 }, { "epoch": 0.04326442073575634, "grad_norm": 3.192740336362954, "learning_rate": 4.3264420735756345e-06, "loss": 0.8733, "step": 9773 }, { "epoch": 0.04326884766921953, "grad_norm": 3.1284559633724673, "learning_rate": 4.326884766921953e-06, "loss": 0.9091, "step": 9774 }, { "epoch": 0.043273274602682724, "grad_norm": 2.7082452907339376, "learning_rate": 4.327327460268272e-06, "loss": 0.573, "step": 9775 }, { "epoch": 0.043277701536145914, "grad_norm": 3.0025232821310395, "learning_rate": 4.327770153614592e-06, "loss": 0.8205, "step": 9776 }, { "epoch": 0.043282128469609105, "grad_norm": 3.427568296429945, "learning_rate": 4.32821284696091e-06, "loss": 1.1683, "step": 9777 }, { "epoch": 0.043286555403072295, "grad_norm": 2.872115252468488, "learning_rate": 4.32865554030723e-06, "loss": 0.8663, "step": 9778 }, { "epoch": 0.04329098233653548, "grad_norm": 3.2515299034382346, "learning_rate": 4.329098233653549e-06, "loss": 0.9873, "step": 9779 }, { "epoch": 0.04329540926999867, "grad_norm": 3.238638905145257, "learning_rate": 4.3295409269998676e-06, "loss": 0.72, "step": 9780 }, { "epoch": 0.04329983620346186, "grad_norm": 2.9539223730716144, "learning_rate": 4.329983620346186e-06, "loss": 0.6465, "step": 9781 }, { "epoch": 0.04330426313692505, "grad_norm": 2.8092625009796306, "learning_rate": 4.3304263136925055e-06, "loss": 0.8802, "step": 9782 }, { "epoch": 0.04330869007038824, "grad_norm": 2.662730394280232, "learning_rate": 4.330869007038824e-06, "loss": 0.8737, "step": 9783 }, { "epoch": 0.04331311700385143, "grad_norm": 2.887751860021932, "learning_rate": 4.331311700385143e-06, "loss": 0.857, "step": 9784 }, { "epoch": 0.04331754393731462, "grad_norm": 3.1861139709919164, "learning_rate": 4.331754393731463e-06, "loss": 0.5937, "step": 9785 }, { "epoch": 0.04332197087077781, "grad_norm": 3.564262649416018, "learning_rate": 4.332197087077781e-06, "loss": 1.1986, "step": 9786 }, { "epoch": 0.043326397804241, "grad_norm": 3.5855980640519047, "learning_rate": 4.332639780424101e-06, "loss": 0.8255, "step": 9787 }, { "epoch": 0.04333082473770419, "grad_norm": 2.494352972018616, "learning_rate": 4.33308247377042e-06, "loss": 0.5643, "step": 9788 }, { "epoch": 0.043335251671167384, "grad_norm": 2.3405510039628505, "learning_rate": 4.3335251671167385e-06, "loss": 0.6241, "step": 9789 }, { "epoch": 0.043339678604630574, "grad_norm": 2.717856834665994, "learning_rate": 4.333967860463058e-06, "loss": 0.7281, "step": 9790 }, { "epoch": 0.043344105538093765, "grad_norm": 3.0659660271074034, "learning_rate": 4.334410553809376e-06, "loss": 0.8248, "step": 9791 }, { "epoch": 0.043348532471556955, "grad_norm": 4.03924944570908, "learning_rate": 4.334853247155695e-06, "loss": 0.8462, "step": 9792 }, { "epoch": 0.043352959405020146, "grad_norm": 2.777334897463842, "learning_rate": 4.335295940502014e-06, "loss": 0.8174, "step": 9793 }, { "epoch": 0.04335738633848333, "grad_norm": 2.6870875293101575, "learning_rate": 4.335738633848334e-06, "loss": 0.7634, "step": 9794 }, { "epoch": 0.04336181327194652, "grad_norm": 2.7236749139287135, "learning_rate": 4.336181327194652e-06, "loss": 0.734, "step": 9795 }, { "epoch": 0.04336624020540971, "grad_norm": 3.6113644689118742, "learning_rate": 4.3366240205409716e-06, "loss": 1.3466, "step": 9796 }, { "epoch": 0.0433706671388729, "grad_norm": 2.8249467424179975, "learning_rate": 4.337066713887291e-06, "loss": 0.715, "step": 9797 }, { "epoch": 0.04337509407233609, "grad_norm": 2.728466539802812, "learning_rate": 4.3375094072336095e-06, "loss": 0.4606, "step": 9798 }, { "epoch": 0.04337952100579928, "grad_norm": 3.3466924567686864, "learning_rate": 4.337952100579929e-06, "loss": 0.9309, "step": 9799 }, { "epoch": 0.04338394793926247, "grad_norm": 2.6978137518810645, "learning_rate": 4.338394793926247e-06, "loss": 0.7857, "step": 9800 }, { "epoch": 0.04338837487272566, "grad_norm": 2.854945639526431, "learning_rate": 4.338837487272567e-06, "loss": 0.749, "step": 9801 }, { "epoch": 0.04339280180618885, "grad_norm": 2.625456330924714, "learning_rate": 4.339280180618885e-06, "loss": 0.6675, "step": 9802 }, { "epoch": 0.043397228739652044, "grad_norm": 2.712250474587278, "learning_rate": 4.339722873965205e-06, "loss": 0.7697, "step": 9803 }, { "epoch": 0.043401655673115234, "grad_norm": 2.909209954941832, "learning_rate": 4.340165567311523e-06, "loss": 0.6202, "step": 9804 }, { "epoch": 0.043406082606578425, "grad_norm": 2.606620306339976, "learning_rate": 4.3406082606578425e-06, "loss": 0.611, "step": 9805 }, { "epoch": 0.043410509540041615, "grad_norm": 2.4927833995909694, "learning_rate": 4.341050954004162e-06, "loss": 0.6147, "step": 9806 }, { "epoch": 0.043414936473504806, "grad_norm": 2.72762081999213, "learning_rate": 4.34149364735048e-06, "loss": 0.5894, "step": 9807 }, { "epoch": 0.043419363406967997, "grad_norm": 2.86120447374349, "learning_rate": 4.3419363406968e-06, "loss": 0.7995, "step": 9808 }, { "epoch": 0.04342379034043118, "grad_norm": 3.8637712708895173, "learning_rate": 4.342379034043119e-06, "loss": 1.2443, "step": 9809 }, { "epoch": 0.04342821727389437, "grad_norm": 4.220657078545206, "learning_rate": 4.342821727389438e-06, "loss": 0.9494, "step": 9810 }, { "epoch": 0.04343264420735756, "grad_norm": 2.7677183832695267, "learning_rate": 4.343264420735756e-06, "loss": 0.759, "step": 9811 }, { "epoch": 0.04343707114082075, "grad_norm": 3.145406141945354, "learning_rate": 4.3437071140820756e-06, "loss": 0.801, "step": 9812 }, { "epoch": 0.04344149807428394, "grad_norm": 2.4154391162225592, "learning_rate": 4.344149807428395e-06, "loss": 0.6976, "step": 9813 }, { "epoch": 0.04344592500774713, "grad_norm": 2.6668897942795873, "learning_rate": 4.3445925007747135e-06, "loss": 0.7454, "step": 9814 }, { "epoch": 0.04345035194121032, "grad_norm": 2.4508902815386473, "learning_rate": 4.345035194121033e-06, "loss": 0.5502, "step": 9815 }, { "epoch": 0.043454778874673514, "grad_norm": 2.5647508128738443, "learning_rate": 4.345477887467352e-06, "loss": 0.8615, "step": 9816 }, { "epoch": 0.043459205808136704, "grad_norm": 2.6688766398058963, "learning_rate": 4.345920580813671e-06, "loss": 0.5698, "step": 9817 }, { "epoch": 0.043463632741599895, "grad_norm": 3.7212500471787955, "learning_rate": 4.34636327415999e-06, "loss": 0.759, "step": 9818 }, { "epoch": 0.043468059675063085, "grad_norm": 2.850876687605629, "learning_rate": 4.346805967506309e-06, "loss": 0.7482, "step": 9819 }, { "epoch": 0.043472486608526276, "grad_norm": 3.1131089133588903, "learning_rate": 4.347248660852628e-06, "loss": 0.9218, "step": 9820 }, { "epoch": 0.043476913541989466, "grad_norm": 3.0684981942302842, "learning_rate": 4.3476913541989465e-06, "loss": 0.8205, "step": 9821 }, { "epoch": 0.04348134047545266, "grad_norm": 3.29113399619656, "learning_rate": 4.348134047545266e-06, "loss": 1.0882, "step": 9822 }, { "epoch": 0.04348576740891585, "grad_norm": 2.576218386742523, "learning_rate": 4.348576740891584e-06, "loss": 0.8421, "step": 9823 }, { "epoch": 0.04349019434237903, "grad_norm": 2.4981719592435216, "learning_rate": 4.349019434237904e-06, "loss": 0.7059, "step": 9824 }, { "epoch": 0.04349462127584222, "grad_norm": 4.911750007278497, "learning_rate": 4.349462127584223e-06, "loss": 1.6866, "step": 9825 }, { "epoch": 0.04349904820930541, "grad_norm": 2.785371612714295, "learning_rate": 4.349904820930542e-06, "loss": 0.6137, "step": 9826 }, { "epoch": 0.0435034751427686, "grad_norm": 2.6211489479350374, "learning_rate": 4.350347514276861e-06, "loss": 0.7064, "step": 9827 }, { "epoch": 0.04350790207623179, "grad_norm": 2.8083118183497917, "learning_rate": 4.3507902076231804e-06, "loss": 0.6587, "step": 9828 }, { "epoch": 0.04351232900969498, "grad_norm": 2.905955137743454, "learning_rate": 4.351232900969499e-06, "loss": 0.9178, "step": 9829 }, { "epoch": 0.043516755943158174, "grad_norm": 3.0357049414459127, "learning_rate": 4.3516755943158175e-06, "loss": 0.6147, "step": 9830 }, { "epoch": 0.043521182876621364, "grad_norm": 3.5252974716688477, "learning_rate": 4.352118287662137e-06, "loss": 1.2227, "step": 9831 }, { "epoch": 0.043525609810084555, "grad_norm": 3.21809109392831, "learning_rate": 4.352560981008455e-06, "loss": 1.0213, "step": 9832 }, { "epoch": 0.043530036743547745, "grad_norm": 3.0792168905232864, "learning_rate": 4.353003674354775e-06, "loss": 0.8831, "step": 9833 }, { "epoch": 0.043534463677010936, "grad_norm": 2.9994000065627597, "learning_rate": 4.353446367701094e-06, "loss": 0.8137, "step": 9834 }, { "epoch": 0.043538890610474126, "grad_norm": 2.5575402885042213, "learning_rate": 4.353889061047413e-06, "loss": 0.6585, "step": 9835 }, { "epoch": 0.04354331754393732, "grad_norm": 3.114432746642399, "learning_rate": 4.354331754393732e-06, "loss": 0.9418, "step": 9836 }, { "epoch": 0.04354774447740051, "grad_norm": 3.254062567565067, "learning_rate": 4.354774447740051e-06, "loss": 0.7805, "step": 9837 }, { "epoch": 0.0435521714108637, "grad_norm": 3.0628186045012273, "learning_rate": 4.35521714108637e-06, "loss": 0.8234, "step": 9838 }, { "epoch": 0.04355659834432688, "grad_norm": 3.0090664716060993, "learning_rate": 4.355659834432689e-06, "loss": 0.7306, "step": 9839 }, { "epoch": 0.04356102527779007, "grad_norm": 2.6985917378422024, "learning_rate": 4.356102527779008e-06, "loss": 0.4739, "step": 9840 }, { "epoch": 0.04356545221125326, "grad_norm": 3.204586760441998, "learning_rate": 4.356545221125327e-06, "loss": 0.9956, "step": 9841 }, { "epoch": 0.04356987914471645, "grad_norm": 3.009881222939361, "learning_rate": 4.356987914471646e-06, "loss": 0.8731, "step": 9842 }, { "epoch": 0.04357430607817964, "grad_norm": 2.8845189524107333, "learning_rate": 4.357430607817965e-06, "loss": 0.6203, "step": 9843 }, { "epoch": 0.043578733011642834, "grad_norm": 2.7753659026476765, "learning_rate": 4.357873301164284e-06, "loss": 0.9701, "step": 9844 }, { "epoch": 0.043583159945106024, "grad_norm": 2.9785102800507244, "learning_rate": 4.358315994510603e-06, "loss": 0.6591, "step": 9845 }, { "epoch": 0.043587586878569215, "grad_norm": 2.5839116353452205, "learning_rate": 4.358758687856922e-06, "loss": 0.6421, "step": 9846 }, { "epoch": 0.043592013812032405, "grad_norm": 3.712177881392403, "learning_rate": 4.359201381203241e-06, "loss": 0.6977, "step": 9847 }, { "epoch": 0.043596440745495596, "grad_norm": 3.0328855329576996, "learning_rate": 4.35964407454956e-06, "loss": 0.6725, "step": 9848 }, { "epoch": 0.043600867678958787, "grad_norm": 3.27630042844416, "learning_rate": 4.360086767895879e-06, "loss": 0.8927, "step": 9849 }, { "epoch": 0.04360529461242198, "grad_norm": 3.016655574014259, "learning_rate": 4.360529461242198e-06, "loss": 0.8592, "step": 9850 }, { "epoch": 0.04360972154588517, "grad_norm": 2.393590473774428, "learning_rate": 4.360972154588517e-06, "loss": 0.557, "step": 9851 }, { "epoch": 0.04361414847934836, "grad_norm": 2.7162902527464663, "learning_rate": 4.361414847934836e-06, "loss": 0.8579, "step": 9852 }, { "epoch": 0.04361857541281155, "grad_norm": 2.1512325396365632, "learning_rate": 4.3618575412811545e-06, "loss": 0.4948, "step": 9853 }, { "epoch": 0.04362300234627473, "grad_norm": 3.2997253062699254, "learning_rate": 4.362300234627474e-06, "loss": 0.7887, "step": 9854 }, { "epoch": 0.04362742927973792, "grad_norm": 2.270296088210022, "learning_rate": 4.362742927973793e-06, "loss": 0.4735, "step": 9855 }, { "epoch": 0.04363185621320111, "grad_norm": 2.7984906905725198, "learning_rate": 4.363185621320112e-06, "loss": 0.7954, "step": 9856 }, { "epoch": 0.043636283146664304, "grad_norm": 3.4591636037095785, "learning_rate": 4.363628314666431e-06, "loss": 1.0086, "step": 9857 }, { "epoch": 0.043640710080127494, "grad_norm": 2.6441695480248093, "learning_rate": 4.3640710080127505e-06, "loss": 0.852, "step": 9858 }, { "epoch": 0.043645137013590685, "grad_norm": 3.2930251916350004, "learning_rate": 4.364513701359069e-06, "loss": 0.9448, "step": 9859 }, { "epoch": 0.043649563947053875, "grad_norm": 2.732879369941629, "learning_rate": 4.3649563947053884e-06, "loss": 0.5982, "step": 9860 }, { "epoch": 0.043653990880517066, "grad_norm": 3.095291619518119, "learning_rate": 4.365399088051707e-06, "loss": 0.5932, "step": 9861 }, { "epoch": 0.043658417813980256, "grad_norm": 2.6709054950762106, "learning_rate": 4.3658417813980255e-06, "loss": 0.5697, "step": 9862 }, { "epoch": 0.04366284474744345, "grad_norm": 2.9013975167870574, "learning_rate": 4.366284474744345e-06, "loss": 0.8218, "step": 9863 }, { "epoch": 0.04366727168090664, "grad_norm": 2.621014266078405, "learning_rate": 4.366727168090664e-06, "loss": 0.9499, "step": 9864 }, { "epoch": 0.04367169861436983, "grad_norm": 2.4438274188633295, "learning_rate": 4.367169861436983e-06, "loss": 0.7433, "step": 9865 }, { "epoch": 0.04367612554783302, "grad_norm": 2.9167826960439873, "learning_rate": 4.367612554783302e-06, "loss": 0.8708, "step": 9866 }, { "epoch": 0.04368055248129621, "grad_norm": 3.4896833072409446, "learning_rate": 4.3680552481296215e-06, "loss": 1.0639, "step": 9867 }, { "epoch": 0.0436849794147594, "grad_norm": 3.1314846285199947, "learning_rate": 4.36849794147594e-06, "loss": 0.5828, "step": 9868 }, { "epoch": 0.04368940634822258, "grad_norm": 2.6690525533003266, "learning_rate": 4.368940634822259e-06, "loss": 0.6235, "step": 9869 }, { "epoch": 0.04369383328168577, "grad_norm": 2.6461623055865964, "learning_rate": 4.369383328168578e-06, "loss": 0.5441, "step": 9870 }, { "epoch": 0.043698260215148964, "grad_norm": 2.4618264321153025, "learning_rate": 4.369826021514897e-06, "loss": 0.6693, "step": 9871 }, { "epoch": 0.043702687148612154, "grad_norm": 3.517168131096253, "learning_rate": 4.370268714861216e-06, "loss": 1.1535, "step": 9872 }, { "epoch": 0.043707114082075345, "grad_norm": 3.936760306399816, "learning_rate": 4.370711408207535e-06, "loss": 1.3165, "step": 9873 }, { "epoch": 0.043711541015538535, "grad_norm": 2.810268281476019, "learning_rate": 4.371154101553854e-06, "loss": 0.7168, "step": 9874 }, { "epoch": 0.043715967949001726, "grad_norm": 2.483025969771683, "learning_rate": 4.371596794900173e-06, "loss": 0.8337, "step": 9875 }, { "epoch": 0.043720394882464916, "grad_norm": 2.710842659909372, "learning_rate": 4.3720394882464924e-06, "loss": 0.8285, "step": 9876 }, { "epoch": 0.04372482181592811, "grad_norm": 2.686930471254727, "learning_rate": 4.372482181592811e-06, "loss": 0.596, "step": 9877 }, { "epoch": 0.0437292487493913, "grad_norm": 2.9217426015128094, "learning_rate": 4.37292487493913e-06, "loss": 0.8429, "step": 9878 }, { "epoch": 0.04373367568285449, "grad_norm": 2.8233370895815924, "learning_rate": 4.37336756828545e-06, "loss": 0.6816, "step": 9879 }, { "epoch": 0.04373810261631768, "grad_norm": 3.1599062473923865, "learning_rate": 4.373810261631768e-06, "loss": 0.8702, "step": 9880 }, { "epoch": 0.04374252954978087, "grad_norm": 2.869816245185607, "learning_rate": 4.374252954978087e-06, "loss": 0.6468, "step": 9881 }, { "epoch": 0.04374695648324406, "grad_norm": 3.2612761967258486, "learning_rate": 4.374695648324406e-06, "loss": 0.9512, "step": 9882 }, { "epoch": 0.04375138341670725, "grad_norm": 2.5565355450767857, "learning_rate": 4.375138341670725e-06, "loss": 0.6158, "step": 9883 }, { "epoch": 0.04375581035017044, "grad_norm": 3.2408935627478774, "learning_rate": 4.375581035017044e-06, "loss": 1.4178, "step": 9884 }, { "epoch": 0.043760237283633624, "grad_norm": 3.1390655409476023, "learning_rate": 4.376023728363363e-06, "loss": 0.7543, "step": 9885 }, { "epoch": 0.043764664217096814, "grad_norm": 2.8300767413186025, "learning_rate": 4.376466421709682e-06, "loss": 0.7559, "step": 9886 }, { "epoch": 0.043769091150560005, "grad_norm": 2.4486458546943908, "learning_rate": 4.376909115056001e-06, "loss": 0.7712, "step": 9887 }, { "epoch": 0.043773518084023195, "grad_norm": 2.4623685823516075, "learning_rate": 4.377351808402321e-06, "loss": 0.8334, "step": 9888 }, { "epoch": 0.043777945017486386, "grad_norm": 2.7866203446723365, "learning_rate": 4.377794501748639e-06, "loss": 0.5812, "step": 9889 }, { "epoch": 0.043782371950949577, "grad_norm": 2.3803464347455057, "learning_rate": 4.3782371950949585e-06, "loss": 0.756, "step": 9890 }, { "epoch": 0.04378679888441277, "grad_norm": 2.6723148456034673, "learning_rate": 4.378679888441277e-06, "loss": 0.6833, "step": 9891 }, { "epoch": 0.04379122581787596, "grad_norm": 2.453536889072959, "learning_rate": 4.379122581787596e-06, "loss": 0.7472, "step": 9892 }, { "epoch": 0.04379565275133915, "grad_norm": 2.659314764666217, "learning_rate": 4.379565275133915e-06, "loss": 0.6785, "step": 9893 }, { "epoch": 0.04380007968480234, "grad_norm": 3.916754674058519, "learning_rate": 4.380007968480234e-06, "loss": 1.0946, "step": 9894 }, { "epoch": 0.04380450661826553, "grad_norm": 3.1595549757716688, "learning_rate": 4.380450661826553e-06, "loss": 0.8149, "step": 9895 }, { "epoch": 0.04380893355172872, "grad_norm": 3.0069264684891417, "learning_rate": 4.380893355172872e-06, "loss": 0.8486, "step": 9896 }, { "epoch": 0.04381336048519191, "grad_norm": 4.170577002627626, "learning_rate": 4.381336048519192e-06, "loss": 1.0242, "step": 9897 }, { "epoch": 0.0438177874186551, "grad_norm": 2.8799770221372296, "learning_rate": 4.38177874186551e-06, "loss": 0.8782, "step": 9898 }, { "epoch": 0.04382221435211829, "grad_norm": 2.785925457920732, "learning_rate": 4.3822214352118295e-06, "loss": 0.7378, "step": 9899 }, { "epoch": 0.043826641285581475, "grad_norm": 3.329099742877059, "learning_rate": 4.382664128558148e-06, "loss": 1.04, "step": 9900 }, { "epoch": 0.043831068219044665, "grad_norm": 3.2003512934707534, "learning_rate": 4.383106821904467e-06, "loss": 1.0012, "step": 9901 }, { "epoch": 0.043835495152507856, "grad_norm": 2.878290790251443, "learning_rate": 4.383549515250786e-06, "loss": 0.5802, "step": 9902 }, { "epoch": 0.043839922085971046, "grad_norm": 2.6209947216530654, "learning_rate": 4.383992208597105e-06, "loss": 0.7619, "step": 9903 }, { "epoch": 0.04384434901943424, "grad_norm": 2.5847837154795887, "learning_rate": 4.384434901943424e-06, "loss": 0.9014, "step": 9904 }, { "epoch": 0.04384877595289743, "grad_norm": 3.11006001910067, "learning_rate": 4.384877595289743e-06, "loss": 0.9832, "step": 9905 }, { "epoch": 0.04385320288636062, "grad_norm": 3.117500690838053, "learning_rate": 4.3853202886360625e-06, "loss": 0.8616, "step": 9906 }, { "epoch": 0.04385762981982381, "grad_norm": 2.3616614601233454, "learning_rate": 4.385762981982381e-06, "loss": 0.7525, "step": 9907 }, { "epoch": 0.043862056753287, "grad_norm": 2.7956479771135374, "learning_rate": 4.3862056753287004e-06, "loss": 0.514, "step": 9908 }, { "epoch": 0.04386648368675019, "grad_norm": 4.019918024889634, "learning_rate": 4.38664836867502e-06, "loss": 0.7253, "step": 9909 }, { "epoch": 0.04387091062021338, "grad_norm": 2.805042956150598, "learning_rate": 4.387091062021338e-06, "loss": 0.9999, "step": 9910 }, { "epoch": 0.04387533755367657, "grad_norm": 2.7908697488081367, "learning_rate": 4.387533755367657e-06, "loss": 0.8346, "step": 9911 }, { "epoch": 0.04387976448713976, "grad_norm": 4.037347507693442, "learning_rate": 4.387976448713976e-06, "loss": 1.0132, "step": 9912 }, { "epoch": 0.04388419142060295, "grad_norm": 2.831829136878585, "learning_rate": 4.388419142060295e-06, "loss": 0.5429, "step": 9913 }, { "epoch": 0.04388861835406614, "grad_norm": 3.1595452052493145, "learning_rate": 4.388861835406614e-06, "loss": 0.8819, "step": 9914 }, { "epoch": 0.043893045287529325, "grad_norm": 2.7843896796349843, "learning_rate": 4.3893045287529335e-06, "loss": 0.8901, "step": 9915 }, { "epoch": 0.043897472220992516, "grad_norm": 2.9739251087761955, "learning_rate": 4.389747222099252e-06, "loss": 0.9006, "step": 9916 }, { "epoch": 0.043901899154455706, "grad_norm": 2.8812251938664497, "learning_rate": 4.390189915445571e-06, "loss": 0.9663, "step": 9917 }, { "epoch": 0.0439063260879189, "grad_norm": 2.746175325986761, "learning_rate": 4.390632608791891e-06, "loss": 0.7261, "step": 9918 }, { "epoch": 0.04391075302138209, "grad_norm": 3.1676942762385476, "learning_rate": 4.391075302138209e-06, "loss": 1.0965, "step": 9919 }, { "epoch": 0.04391517995484528, "grad_norm": 2.428733293492339, "learning_rate": 4.391517995484529e-06, "loss": 0.5259, "step": 9920 }, { "epoch": 0.04391960688830847, "grad_norm": 2.7510211485201426, "learning_rate": 4.391960688830847e-06, "loss": 0.755, "step": 9921 }, { "epoch": 0.04392403382177166, "grad_norm": 3.0825442105459646, "learning_rate": 4.392403382177166e-06, "loss": 0.8385, "step": 9922 }, { "epoch": 0.04392846075523485, "grad_norm": 2.9347043050291535, "learning_rate": 4.392846075523485e-06, "loss": 0.8937, "step": 9923 }, { "epoch": 0.04393288768869804, "grad_norm": 3.1794954545280025, "learning_rate": 4.3932887688698044e-06, "loss": 0.904, "step": 9924 }, { "epoch": 0.04393731462216123, "grad_norm": 5.018237040734412, "learning_rate": 4.393731462216123e-06, "loss": 1.1901, "step": 9925 }, { "epoch": 0.04394174155562442, "grad_norm": 4.542989895244345, "learning_rate": 4.394174155562442e-06, "loss": 1.3834, "step": 9926 }, { "epoch": 0.04394616848908761, "grad_norm": 2.534913998326752, "learning_rate": 4.394616848908762e-06, "loss": 0.7256, "step": 9927 }, { "epoch": 0.0439505954225508, "grad_norm": 3.2232540061256865, "learning_rate": 4.39505954225508e-06, "loss": 1.0819, "step": 9928 }, { "epoch": 0.04395502235601399, "grad_norm": 2.8836705787629557, "learning_rate": 4.3955022356014e-06, "loss": 0.5604, "step": 9929 }, { "epoch": 0.043959449289477176, "grad_norm": 3.4710516329246284, "learning_rate": 4.395944928947718e-06, "loss": 1.0144, "step": 9930 }, { "epoch": 0.043963876222940367, "grad_norm": 2.929177014845286, "learning_rate": 4.3963876222940375e-06, "loss": 0.8975, "step": 9931 }, { "epoch": 0.04396830315640356, "grad_norm": 2.0889548582671145, "learning_rate": 4.396830315640356e-06, "loss": 0.4879, "step": 9932 }, { "epoch": 0.04397273008986675, "grad_norm": 3.1471932060119197, "learning_rate": 4.397273008986675e-06, "loss": 1.0576, "step": 9933 }, { "epoch": 0.04397715702332994, "grad_norm": 3.133211089980336, "learning_rate": 4.397715702332994e-06, "loss": 0.7055, "step": 9934 }, { "epoch": 0.04398158395679313, "grad_norm": 3.6849993193604287, "learning_rate": 4.398158395679313e-06, "loss": 1.0897, "step": 9935 }, { "epoch": 0.04398601089025632, "grad_norm": 2.526830893186309, "learning_rate": 4.398601089025633e-06, "loss": 0.7442, "step": 9936 }, { "epoch": 0.04399043782371951, "grad_norm": 3.1397141520980782, "learning_rate": 4.399043782371951e-06, "loss": 1.0247, "step": 9937 }, { "epoch": 0.0439948647571827, "grad_norm": 2.6722949411522445, "learning_rate": 4.3994864757182705e-06, "loss": 0.824, "step": 9938 }, { "epoch": 0.04399929169064589, "grad_norm": 2.3861981895854356, "learning_rate": 4.39992916906459e-06, "loss": 0.6128, "step": 9939 }, { "epoch": 0.04400371862410908, "grad_norm": 2.91544496096591, "learning_rate": 4.4003718624109084e-06, "loss": 0.7651, "step": 9940 }, { "epoch": 0.04400814555757227, "grad_norm": 3.1538833641355253, "learning_rate": 4.400814555757228e-06, "loss": 0.7877, "step": 9941 }, { "epoch": 0.04401257249103546, "grad_norm": 2.8799862676399823, "learning_rate": 4.401257249103546e-06, "loss": 0.8462, "step": 9942 }, { "epoch": 0.04401699942449865, "grad_norm": 3.3058596698130662, "learning_rate": 4.401699942449865e-06, "loss": 1.097, "step": 9943 }, { "epoch": 0.04402142635796184, "grad_norm": 2.6483696576962767, "learning_rate": 4.402142635796184e-06, "loss": 0.4048, "step": 9944 }, { "epoch": 0.04402585329142503, "grad_norm": 2.6795867679193965, "learning_rate": 4.402585329142504e-06, "loss": 0.6693, "step": 9945 }, { "epoch": 0.04403028022488822, "grad_norm": 2.541639658724195, "learning_rate": 4.403028022488822e-06, "loss": 0.5515, "step": 9946 }, { "epoch": 0.04403470715835141, "grad_norm": 2.461191429273548, "learning_rate": 4.4034707158351415e-06, "loss": 0.5602, "step": 9947 }, { "epoch": 0.0440391340918146, "grad_norm": 3.4804672853454224, "learning_rate": 4.403913409181461e-06, "loss": 0.936, "step": 9948 }, { "epoch": 0.04404356102527779, "grad_norm": 3.210410035193985, "learning_rate": 4.404356102527779e-06, "loss": 0.5799, "step": 9949 }, { "epoch": 0.04404798795874098, "grad_norm": 2.964502078653461, "learning_rate": 4.404798795874099e-06, "loss": 0.9383, "step": 9950 }, { "epoch": 0.04405241489220417, "grad_norm": 3.7769395507823122, "learning_rate": 4.405241489220417e-06, "loss": 0.7131, "step": 9951 }, { "epoch": 0.04405684182566736, "grad_norm": 2.8501546005043537, "learning_rate": 4.405684182566737e-06, "loss": 0.73, "step": 9952 }, { "epoch": 0.04406126875913055, "grad_norm": 2.9744915326474475, "learning_rate": 4.406126875913055e-06, "loss": 0.7213, "step": 9953 }, { "epoch": 0.04406569569259374, "grad_norm": 4.013376893681714, "learning_rate": 4.4065695692593745e-06, "loss": 0.9688, "step": 9954 }, { "epoch": 0.04407012262605693, "grad_norm": 3.374961200192125, "learning_rate": 4.407012262605693e-06, "loss": 1.3111, "step": 9955 }, { "epoch": 0.04407454955952012, "grad_norm": 2.5643587019672807, "learning_rate": 4.4074549559520124e-06, "loss": 0.5884, "step": 9956 }, { "epoch": 0.04407897649298331, "grad_norm": 3.0259097820819414, "learning_rate": 4.407897649298332e-06, "loss": 0.9, "step": 9957 }, { "epoch": 0.0440834034264465, "grad_norm": 2.5424030034811214, "learning_rate": 4.40834034264465e-06, "loss": 0.7126, "step": 9958 }, { "epoch": 0.044087830359909694, "grad_norm": 2.989221243891611, "learning_rate": 4.40878303599097e-06, "loss": 0.8101, "step": 9959 }, { "epoch": 0.04409225729337288, "grad_norm": 3.2350127535640945, "learning_rate": 4.409225729337289e-06, "loss": 0.6689, "step": 9960 }, { "epoch": 0.04409668422683607, "grad_norm": 3.209104638517827, "learning_rate": 4.409668422683608e-06, "loss": 0.9069, "step": 9961 }, { "epoch": 0.04410111116029926, "grad_norm": 3.0346942753939903, "learning_rate": 4.410111116029926e-06, "loss": 0.8909, "step": 9962 }, { "epoch": 0.04410553809376245, "grad_norm": 2.6678384185554385, "learning_rate": 4.4105538093762455e-06, "loss": 0.8913, "step": 9963 }, { "epoch": 0.04410996502722564, "grad_norm": 3.940614755822789, "learning_rate": 4.410996502722564e-06, "loss": 1.1316, "step": 9964 }, { "epoch": 0.04411439196068883, "grad_norm": 2.570010987691128, "learning_rate": 4.411439196068883e-06, "loss": 0.821, "step": 9965 }, { "epoch": 0.04411881889415202, "grad_norm": 2.482743678383035, "learning_rate": 4.411881889415203e-06, "loss": 0.7144, "step": 9966 }, { "epoch": 0.04412324582761521, "grad_norm": 2.667229706683703, "learning_rate": 4.412324582761521e-06, "loss": 0.6332, "step": 9967 }, { "epoch": 0.0441276727610784, "grad_norm": 3.5298068240678755, "learning_rate": 4.412767276107841e-06, "loss": 0.9635, "step": 9968 }, { "epoch": 0.04413209969454159, "grad_norm": 3.504320417325748, "learning_rate": 4.41320996945416e-06, "loss": 0.9113, "step": 9969 }, { "epoch": 0.04413652662800478, "grad_norm": 2.4816941657545786, "learning_rate": 4.4136526628004785e-06, "loss": 0.6554, "step": 9970 }, { "epoch": 0.04414095356146797, "grad_norm": 3.346552701024511, "learning_rate": 4.414095356146798e-06, "loss": 1.3201, "step": 9971 }, { "epoch": 0.04414538049493116, "grad_norm": 2.776682572794694, "learning_rate": 4.4145380494931164e-06, "loss": 0.8211, "step": 9972 }, { "epoch": 0.044149807428394354, "grad_norm": 2.766285758312826, "learning_rate": 4.414980742839435e-06, "loss": 0.7143, "step": 9973 }, { "epoch": 0.044154234361857544, "grad_norm": 3.0053230166711877, "learning_rate": 4.415423436185754e-06, "loss": 0.8764, "step": 9974 }, { "epoch": 0.04415866129532073, "grad_norm": 3.8460817081053507, "learning_rate": 4.415866129532074e-06, "loss": 1.3564, "step": 9975 }, { "epoch": 0.04416308822878392, "grad_norm": 2.852417738581353, "learning_rate": 4.416308822878392e-06, "loss": 0.855, "step": 9976 }, { "epoch": 0.04416751516224711, "grad_norm": 3.2382249006179675, "learning_rate": 4.416751516224712e-06, "loss": 0.704, "step": 9977 }, { "epoch": 0.0441719420957103, "grad_norm": 3.088660184764285, "learning_rate": 4.417194209571031e-06, "loss": 0.6331, "step": 9978 }, { "epoch": 0.04417636902917349, "grad_norm": 2.540702044174094, "learning_rate": 4.4176369029173495e-06, "loss": 0.8552, "step": 9979 }, { "epoch": 0.04418079596263668, "grad_norm": 2.7007861607685557, "learning_rate": 4.418079596263669e-06, "loss": 0.7958, "step": 9980 }, { "epoch": 0.04418522289609987, "grad_norm": 4.1587093757019025, "learning_rate": 4.418522289609987e-06, "loss": 1.1603, "step": 9981 }, { "epoch": 0.04418964982956306, "grad_norm": 2.429172837773831, "learning_rate": 4.418964982956307e-06, "loss": 0.6694, "step": 9982 }, { "epoch": 0.04419407676302625, "grad_norm": 2.9975652879501102, "learning_rate": 4.419407676302625e-06, "loss": 0.7226, "step": 9983 }, { "epoch": 0.04419850369648944, "grad_norm": 3.1671194139553402, "learning_rate": 4.419850369648945e-06, "loss": 0.9889, "step": 9984 }, { "epoch": 0.04420293062995263, "grad_norm": 2.566640418250273, "learning_rate": 4.420293062995263e-06, "loss": 0.7242, "step": 9985 }, { "epoch": 0.044207357563415824, "grad_norm": 3.5971430741891663, "learning_rate": 4.4207357563415825e-06, "loss": 0.8589, "step": 9986 }, { "epoch": 0.044211784496879014, "grad_norm": 2.549620371493178, "learning_rate": 4.421178449687902e-06, "loss": 0.6889, "step": 9987 }, { "epoch": 0.044216211430342205, "grad_norm": 2.692419526428354, "learning_rate": 4.4216211430342204e-06, "loss": 0.7128, "step": 9988 }, { "epoch": 0.044220638363805395, "grad_norm": 2.977376740597997, "learning_rate": 4.42206383638054e-06, "loss": 0.9814, "step": 9989 }, { "epoch": 0.04422506529726858, "grad_norm": 2.708326884584399, "learning_rate": 4.422506529726859e-06, "loss": 0.8074, "step": 9990 }, { "epoch": 0.04422949223073177, "grad_norm": 2.5238661839965237, "learning_rate": 4.422949223073178e-06, "loss": 0.5408, "step": 9991 }, { "epoch": 0.04423391916419496, "grad_norm": 2.679813570986384, "learning_rate": 4.423391916419496e-06, "loss": 0.654, "step": 9992 }, { "epoch": 0.04423834609765815, "grad_norm": 2.512703323055746, "learning_rate": 4.423834609765816e-06, "loss": 0.838, "step": 9993 }, { "epoch": 0.04424277303112134, "grad_norm": 2.7491757192931856, "learning_rate": 4.424277303112134e-06, "loss": 0.4241, "step": 9994 }, { "epoch": 0.04424719996458453, "grad_norm": 2.3001928312022337, "learning_rate": 4.4247199964584535e-06, "loss": 0.5658, "step": 9995 }, { "epoch": 0.04425162689804772, "grad_norm": 3.1403054315435313, "learning_rate": 4.425162689804773e-06, "loss": 0.8333, "step": 9996 }, { "epoch": 0.04425605383151091, "grad_norm": 2.6299260812467913, "learning_rate": 4.425605383151091e-06, "loss": 0.5515, "step": 9997 }, { "epoch": 0.0442604807649741, "grad_norm": 2.4917775834484592, "learning_rate": 4.426048076497411e-06, "loss": 0.833, "step": 9998 }, { "epoch": 0.04426490769843729, "grad_norm": 3.0189813754365544, "learning_rate": 4.42649076984373e-06, "loss": 0.9333, "step": 9999 }, { "epoch": 0.044269334631900484, "grad_norm": 2.8267633599581257, "learning_rate": 4.426933463190049e-06, "loss": 0.7211, "step": 10000 }, { "epoch": 0.044273761565363674, "grad_norm": 2.7058125208056483, "learning_rate": 4.427376156536368e-06, "loss": 0.4352, "step": 10001 }, { "epoch": 0.044278188498826865, "grad_norm": 3.144759874003944, "learning_rate": 4.4278188498826865e-06, "loss": 0.7986, "step": 10002 }, { "epoch": 0.044282615432290055, "grad_norm": 3.409083954266057, "learning_rate": 4.428261543229005e-06, "loss": 0.9254, "step": 10003 }, { "epoch": 0.044287042365753246, "grad_norm": 3.225496838731908, "learning_rate": 4.4287042365753244e-06, "loss": 0.7505, "step": 10004 }, { "epoch": 0.04429146929921643, "grad_norm": 2.568327227528144, "learning_rate": 4.429146929921644e-06, "loss": 0.5568, "step": 10005 }, { "epoch": 0.04429589623267962, "grad_norm": 2.3582507396276844, "learning_rate": 4.429589623267962e-06, "loss": 0.7428, "step": 10006 }, { "epoch": 0.04430032316614281, "grad_norm": 2.798503634687116, "learning_rate": 4.430032316614282e-06, "loss": 0.7949, "step": 10007 }, { "epoch": 0.044304750099606, "grad_norm": 2.3364015398069005, "learning_rate": 4.430475009960601e-06, "loss": 0.6271, "step": 10008 }, { "epoch": 0.04430917703306919, "grad_norm": 2.4416676881275197, "learning_rate": 4.43091770330692e-06, "loss": 0.7164, "step": 10009 }, { "epoch": 0.04431360396653238, "grad_norm": 2.443180407198342, "learning_rate": 4.431360396653239e-06, "loss": 0.5974, "step": 10010 }, { "epoch": 0.04431803089999557, "grad_norm": 2.562523347460783, "learning_rate": 4.4318030899995575e-06, "loss": 0.6371, "step": 10011 }, { "epoch": 0.04432245783345876, "grad_norm": 2.5351143920487966, "learning_rate": 4.432245783345877e-06, "loss": 0.7595, "step": 10012 }, { "epoch": 0.04432688476692195, "grad_norm": 2.3178652405256486, "learning_rate": 4.432688476692195e-06, "loss": 0.6864, "step": 10013 }, { "epoch": 0.044331311700385144, "grad_norm": 3.189837925872288, "learning_rate": 4.433131170038515e-06, "loss": 0.896, "step": 10014 }, { "epoch": 0.044335738633848334, "grad_norm": 2.568049882301947, "learning_rate": 4.433573863384833e-06, "loss": 0.8243, "step": 10015 }, { "epoch": 0.044340165567311525, "grad_norm": 2.6987201809808843, "learning_rate": 4.434016556731153e-06, "loss": 0.9479, "step": 10016 }, { "epoch": 0.044344592500774715, "grad_norm": 2.9255156485103626, "learning_rate": 4.434459250077472e-06, "loss": 0.629, "step": 10017 }, { "epoch": 0.044349019434237906, "grad_norm": 2.9044557331329273, "learning_rate": 4.4349019434237905e-06, "loss": 0.9391, "step": 10018 }, { "epoch": 0.044353446367701096, "grad_norm": 2.986263267040625, "learning_rate": 4.43534463677011e-06, "loss": 0.799, "step": 10019 }, { "epoch": 0.04435787330116428, "grad_norm": 2.5967090488948297, "learning_rate": 4.435787330116429e-06, "loss": 0.4288, "step": 10020 }, { "epoch": 0.04436230023462747, "grad_norm": 3.3425921102682272, "learning_rate": 4.436230023462748e-06, "loss": 0.5883, "step": 10021 }, { "epoch": 0.04436672716809066, "grad_norm": 2.7849865653781674, "learning_rate": 4.436672716809067e-06, "loss": 0.8927, "step": 10022 }, { "epoch": 0.04437115410155385, "grad_norm": 2.897839325141822, "learning_rate": 4.437115410155386e-06, "loss": 0.9045, "step": 10023 }, { "epoch": 0.04437558103501704, "grad_norm": 3.405927368150202, "learning_rate": 4.437558103501704e-06, "loss": 1.0889, "step": 10024 }, { "epoch": 0.04438000796848023, "grad_norm": 3.228582694110056, "learning_rate": 4.438000796848024e-06, "loss": 1.0373, "step": 10025 }, { "epoch": 0.04438443490194342, "grad_norm": 2.7465687169375355, "learning_rate": 4.438443490194343e-06, "loss": 0.8616, "step": 10026 }, { "epoch": 0.044388861835406614, "grad_norm": 3.0274087775884113, "learning_rate": 4.4388861835406615e-06, "loss": 0.8324, "step": 10027 }, { "epoch": 0.044393288768869804, "grad_norm": 3.1135716713050483, "learning_rate": 4.439328876886981e-06, "loss": 0.8845, "step": 10028 }, { "epoch": 0.044397715702332995, "grad_norm": 3.0094225727739072, "learning_rate": 4.4397715702333e-06, "loss": 0.8003, "step": 10029 }, { "epoch": 0.044402142635796185, "grad_norm": 3.301792625372557, "learning_rate": 4.440214263579619e-06, "loss": 1.0108, "step": 10030 }, { "epoch": 0.044406569569259376, "grad_norm": 2.4606665425398937, "learning_rate": 4.440656956925938e-06, "loss": 0.816, "step": 10031 }, { "epoch": 0.044410996502722566, "grad_norm": 3.7315989849671745, "learning_rate": 4.441099650272257e-06, "loss": 1.0263, "step": 10032 }, { "epoch": 0.04441542343618576, "grad_norm": 3.130617213985613, "learning_rate": 4.441542343618576e-06, "loss": 0.8467, "step": 10033 }, { "epoch": 0.04441985036964895, "grad_norm": 2.762358137448328, "learning_rate": 4.4419850369648945e-06, "loss": 0.7328, "step": 10034 }, { "epoch": 0.04442427730311214, "grad_norm": 2.8781590733541855, "learning_rate": 4.442427730311214e-06, "loss": 0.8949, "step": 10035 }, { "epoch": 0.04442870423657532, "grad_norm": 2.7710043304359364, "learning_rate": 4.4428704236575324e-06, "loss": 0.7242, "step": 10036 }, { "epoch": 0.04443313117003851, "grad_norm": 2.758021920474533, "learning_rate": 4.443313117003852e-06, "loss": 0.8092, "step": 10037 }, { "epoch": 0.0444375581035017, "grad_norm": 2.282936508676403, "learning_rate": 4.443755810350171e-06, "loss": 0.5033, "step": 10038 }, { "epoch": 0.04444198503696489, "grad_norm": 2.813734647651703, "learning_rate": 4.44419850369649e-06, "loss": 0.7625, "step": 10039 }, { "epoch": 0.04444641197042808, "grad_norm": 2.7705675340172733, "learning_rate": 4.444641197042809e-06, "loss": 0.6238, "step": 10040 }, { "epoch": 0.044450838903891274, "grad_norm": 2.680548594649474, "learning_rate": 4.4450838903891284e-06, "loss": 0.6675, "step": 10041 }, { "epoch": 0.044455265837354464, "grad_norm": 2.701409695910706, "learning_rate": 4.445526583735447e-06, "loss": 0.7265, "step": 10042 }, { "epoch": 0.044459692770817655, "grad_norm": 2.824393096440102, "learning_rate": 4.4459692770817655e-06, "loss": 0.6168, "step": 10043 }, { "epoch": 0.044464119704280845, "grad_norm": 2.617103662928044, "learning_rate": 4.446411970428085e-06, "loss": 0.6168, "step": 10044 }, { "epoch": 0.044468546637744036, "grad_norm": 2.6439943804558275, "learning_rate": 4.446854663774403e-06, "loss": 0.5937, "step": 10045 }, { "epoch": 0.044472973571207226, "grad_norm": 2.8219825099130875, "learning_rate": 4.447297357120723e-06, "loss": 0.6837, "step": 10046 }, { "epoch": 0.04447740050467042, "grad_norm": 2.7998866850653137, "learning_rate": 4.447740050467042e-06, "loss": 0.7965, "step": 10047 }, { "epoch": 0.04448182743813361, "grad_norm": 2.5989092659338136, "learning_rate": 4.448182743813361e-06, "loss": 0.5259, "step": 10048 }, { "epoch": 0.0444862543715968, "grad_norm": 3.056268328584268, "learning_rate": 4.44862543715968e-06, "loss": 0.6407, "step": 10049 }, { "epoch": 0.04449068130505999, "grad_norm": 4.451197716430688, "learning_rate": 4.449068130505999e-06, "loss": 0.9542, "step": 10050 }, { "epoch": 0.04449510823852317, "grad_norm": 3.1999991525463662, "learning_rate": 4.449510823852318e-06, "loss": 0.6967, "step": 10051 }, { "epoch": 0.04449953517198636, "grad_norm": 2.9432217815910375, "learning_rate": 4.449953517198637e-06, "loss": 0.953, "step": 10052 }, { "epoch": 0.04450396210544955, "grad_norm": 2.647386497556423, "learning_rate": 4.450396210544956e-06, "loss": 0.4806, "step": 10053 }, { "epoch": 0.04450838903891274, "grad_norm": 3.137451446395356, "learning_rate": 4.450838903891274e-06, "loss": 0.7953, "step": 10054 }, { "epoch": 0.044512815972375934, "grad_norm": 2.7577016394751723, "learning_rate": 4.451281597237594e-06, "loss": 0.7851, "step": 10055 }, { "epoch": 0.044517242905839124, "grad_norm": 4.684785491273057, "learning_rate": 4.451724290583913e-06, "loss": 1.2234, "step": 10056 }, { "epoch": 0.044521669839302315, "grad_norm": 2.658645599588281, "learning_rate": 4.452166983930232e-06, "loss": 0.7801, "step": 10057 }, { "epoch": 0.044526096772765505, "grad_norm": 3.258403127240718, "learning_rate": 4.452609677276551e-06, "loss": 0.622, "step": 10058 }, { "epoch": 0.044530523706228696, "grad_norm": 2.8444778720865855, "learning_rate": 4.45305237062287e-06, "loss": 0.9594, "step": 10059 }, { "epoch": 0.044534950639691886, "grad_norm": 2.5880075808353284, "learning_rate": 4.453495063969189e-06, "loss": 0.7091, "step": 10060 }, { "epoch": 0.04453937757315508, "grad_norm": 3.544136710170401, "learning_rate": 4.453937757315508e-06, "loss": 1.1977, "step": 10061 }, { "epoch": 0.04454380450661827, "grad_norm": 2.645821324619372, "learning_rate": 4.454380450661827e-06, "loss": 0.561, "step": 10062 }, { "epoch": 0.04454823144008146, "grad_norm": 3.8955433981753718, "learning_rate": 4.454823144008146e-06, "loss": 0.7097, "step": 10063 }, { "epoch": 0.04455265837354465, "grad_norm": 2.9453818971272696, "learning_rate": 4.455265837354465e-06, "loss": 0.9006, "step": 10064 }, { "epoch": 0.04455708530700784, "grad_norm": 4.320846506800154, "learning_rate": 4.455708530700784e-06, "loss": 1.1585, "step": 10065 }, { "epoch": 0.04456151224047102, "grad_norm": 3.5949019447862285, "learning_rate": 4.4561512240471025e-06, "loss": 1.031, "step": 10066 }, { "epoch": 0.04456593917393421, "grad_norm": 2.747518408108719, "learning_rate": 4.456593917393422e-06, "loss": 0.7533, "step": 10067 }, { "epoch": 0.044570366107397404, "grad_norm": 3.0950637795891316, "learning_rate": 4.457036610739741e-06, "loss": 0.8381, "step": 10068 }, { "epoch": 0.044574793040860594, "grad_norm": 2.5325381630890735, "learning_rate": 4.45747930408606e-06, "loss": 0.824, "step": 10069 }, { "epoch": 0.044579219974323785, "grad_norm": 3.2954578268024064, "learning_rate": 4.457921997432379e-06, "loss": 0.8763, "step": 10070 }, { "epoch": 0.044583646907786975, "grad_norm": 2.791839902348126, "learning_rate": 4.4583646907786985e-06, "loss": 0.7089, "step": 10071 }, { "epoch": 0.044588073841250166, "grad_norm": 3.077535880928192, "learning_rate": 4.458807384125017e-06, "loss": 0.6098, "step": 10072 }, { "epoch": 0.044592500774713356, "grad_norm": 3.0434090394212827, "learning_rate": 4.459250077471336e-06, "loss": 0.9103, "step": 10073 }, { "epoch": 0.04459692770817655, "grad_norm": 2.5111995945623966, "learning_rate": 4.459692770817655e-06, "loss": 0.7077, "step": 10074 }, { "epoch": 0.04460135464163974, "grad_norm": 3.5848221764350825, "learning_rate": 4.4601354641639735e-06, "loss": 0.6954, "step": 10075 }, { "epoch": 0.04460578157510293, "grad_norm": 3.1936012790293655, "learning_rate": 4.460578157510293e-06, "loss": 0.8982, "step": 10076 }, { "epoch": 0.04461020850856612, "grad_norm": 2.774811000837947, "learning_rate": 4.461020850856612e-06, "loss": 0.7955, "step": 10077 }, { "epoch": 0.04461463544202931, "grad_norm": 2.6488219832984523, "learning_rate": 4.461463544202931e-06, "loss": 0.7367, "step": 10078 }, { "epoch": 0.0446190623754925, "grad_norm": 2.6307240492308814, "learning_rate": 4.46190623754925e-06, "loss": 0.6823, "step": 10079 }, { "epoch": 0.04462348930895569, "grad_norm": 2.6972242594983804, "learning_rate": 4.4623489308955695e-06, "loss": 0.6125, "step": 10080 }, { "epoch": 0.04462791624241887, "grad_norm": 2.752656550627329, "learning_rate": 4.462791624241888e-06, "loss": 0.9048, "step": 10081 }, { "epoch": 0.044632343175882064, "grad_norm": 3.2647983181328644, "learning_rate": 4.463234317588207e-06, "loss": 0.7615, "step": 10082 }, { "epoch": 0.044636770109345254, "grad_norm": 2.4178544561919706, "learning_rate": 4.463677010934526e-06, "loss": 0.6445, "step": 10083 }, { "epoch": 0.044641197042808445, "grad_norm": 2.9528998682077194, "learning_rate": 4.4641197042808444e-06, "loss": 0.5912, "step": 10084 }, { "epoch": 0.044645623976271635, "grad_norm": 3.7434192947721616, "learning_rate": 4.464562397627164e-06, "loss": 1.1378, "step": 10085 }, { "epoch": 0.044650050909734826, "grad_norm": 2.6993555062312313, "learning_rate": 4.465005090973483e-06, "loss": 0.7004, "step": 10086 }, { "epoch": 0.044654477843198016, "grad_norm": 2.6987255049568777, "learning_rate": 4.465447784319802e-06, "loss": 0.7979, "step": 10087 }, { "epoch": 0.04465890477666121, "grad_norm": 2.5120948353034143, "learning_rate": 4.465890477666121e-06, "loss": 0.4335, "step": 10088 }, { "epoch": 0.0446633317101244, "grad_norm": 2.937747363361106, "learning_rate": 4.4663331710124404e-06, "loss": 0.8481, "step": 10089 }, { "epoch": 0.04466775864358759, "grad_norm": 3.0232560731714697, "learning_rate": 4.466775864358759e-06, "loss": 0.7381, "step": 10090 }, { "epoch": 0.04467218557705078, "grad_norm": 3.3747887143521096, "learning_rate": 4.467218557705078e-06, "loss": 0.9273, "step": 10091 }, { "epoch": 0.04467661251051397, "grad_norm": 2.711051793112923, "learning_rate": 4.467661251051397e-06, "loss": 0.7329, "step": 10092 }, { "epoch": 0.04468103944397716, "grad_norm": 3.171821727183137, "learning_rate": 4.468103944397716e-06, "loss": 0.7271, "step": 10093 }, { "epoch": 0.04468546637744035, "grad_norm": 2.736191403057802, "learning_rate": 4.468546637744035e-06, "loss": 0.691, "step": 10094 }, { "epoch": 0.04468989331090354, "grad_norm": 2.665396406021263, "learning_rate": 4.468989331090354e-06, "loss": 0.6785, "step": 10095 }, { "epoch": 0.044694320244366724, "grad_norm": 2.7118432768307787, "learning_rate": 4.469432024436673e-06, "loss": 0.742, "step": 10096 }, { "epoch": 0.044698747177829914, "grad_norm": 2.6070258947120957, "learning_rate": 4.469874717782992e-06, "loss": 0.8023, "step": 10097 }, { "epoch": 0.044703174111293105, "grad_norm": 2.9912387651484798, "learning_rate": 4.470317411129311e-06, "loss": 0.6753, "step": 10098 }, { "epoch": 0.044707601044756295, "grad_norm": 2.3981328135105873, "learning_rate": 4.47076010447563e-06, "loss": 0.8597, "step": 10099 }, { "epoch": 0.044712027978219486, "grad_norm": 2.8904889728091083, "learning_rate": 4.471202797821949e-06, "loss": 0.7993, "step": 10100 }, { "epoch": 0.044716454911682676, "grad_norm": 3.1091485826039387, "learning_rate": 4.471645491168269e-06, "loss": 1.0702, "step": 10101 }, { "epoch": 0.04472088184514587, "grad_norm": 3.2271999968218013, "learning_rate": 4.472088184514587e-06, "loss": 0.7429, "step": 10102 }, { "epoch": 0.04472530877860906, "grad_norm": 2.6982567308503205, "learning_rate": 4.472530877860906e-06, "loss": 0.8129, "step": 10103 }, { "epoch": 0.04472973571207225, "grad_norm": 2.9707188225470467, "learning_rate": 4.472973571207225e-06, "loss": 0.7625, "step": 10104 }, { "epoch": 0.04473416264553544, "grad_norm": 2.568307777110316, "learning_rate": 4.473416264553544e-06, "loss": 0.6281, "step": 10105 }, { "epoch": 0.04473858957899863, "grad_norm": 2.7845719319932374, "learning_rate": 4.473858957899863e-06, "loss": 0.8685, "step": 10106 }, { "epoch": 0.04474301651246182, "grad_norm": 2.749483825416175, "learning_rate": 4.474301651246182e-06, "loss": 0.9876, "step": 10107 }, { "epoch": 0.04474744344592501, "grad_norm": 2.52263239985432, "learning_rate": 4.474744344592501e-06, "loss": 0.8636, "step": 10108 }, { "epoch": 0.0447518703793882, "grad_norm": 2.809408572281076, "learning_rate": 4.47518703793882e-06, "loss": 0.4921, "step": 10109 }, { "epoch": 0.04475629731285139, "grad_norm": 2.244974022364858, "learning_rate": 4.47562973128514e-06, "loss": 0.6338, "step": 10110 }, { "epoch": 0.044760724246314575, "grad_norm": 3.3638576976838874, "learning_rate": 4.476072424631458e-06, "loss": 1.2271, "step": 10111 }, { "epoch": 0.044765151179777765, "grad_norm": 2.889870421537811, "learning_rate": 4.4765151179777775e-06, "loss": 0.7337, "step": 10112 }, { "epoch": 0.044769578113240956, "grad_norm": 2.72105584430994, "learning_rate": 4.476957811324096e-06, "loss": 0.7574, "step": 10113 }, { "epoch": 0.044774005046704146, "grad_norm": 2.9856188431466286, "learning_rate": 4.477400504670415e-06, "loss": 0.811, "step": 10114 }, { "epoch": 0.04477843198016734, "grad_norm": 2.8038072715646916, "learning_rate": 4.477843198016734e-06, "loss": 0.8168, "step": 10115 }, { "epoch": 0.04478285891363053, "grad_norm": 2.819364537096475, "learning_rate": 4.478285891363053e-06, "loss": 0.9226, "step": 10116 }, { "epoch": 0.04478728584709372, "grad_norm": 3.4381444700563377, "learning_rate": 4.478728584709372e-06, "loss": 1.1919, "step": 10117 }, { "epoch": 0.04479171278055691, "grad_norm": 2.7317720729354504, "learning_rate": 4.479171278055691e-06, "loss": 0.6709, "step": 10118 }, { "epoch": 0.0447961397140201, "grad_norm": 3.2366279722791016, "learning_rate": 4.4796139714020105e-06, "loss": 1.0056, "step": 10119 }, { "epoch": 0.04480056664748329, "grad_norm": 2.267141633211776, "learning_rate": 4.480056664748329e-06, "loss": 0.3576, "step": 10120 }, { "epoch": 0.04480499358094648, "grad_norm": 2.675674506740411, "learning_rate": 4.4804993580946484e-06, "loss": 0.8515, "step": 10121 }, { "epoch": 0.04480942051440967, "grad_norm": 4.0103498592692475, "learning_rate": 4.480942051440968e-06, "loss": 0.9583, "step": 10122 }, { "epoch": 0.04481384744787286, "grad_norm": 3.0495431725804707, "learning_rate": 4.481384744787286e-06, "loss": 0.7618, "step": 10123 }, { "epoch": 0.04481827438133605, "grad_norm": 3.087869078475341, "learning_rate": 4.481827438133605e-06, "loss": 0.8509, "step": 10124 }, { "epoch": 0.04482270131479924, "grad_norm": 3.074025835644347, "learning_rate": 4.482270131479924e-06, "loss": 0.82, "step": 10125 }, { "epoch": 0.044827128248262425, "grad_norm": 2.7740550840764193, "learning_rate": 4.482712824826243e-06, "loss": 0.6357, "step": 10126 }, { "epoch": 0.044831555181725616, "grad_norm": 2.7023222489248035, "learning_rate": 4.483155518172562e-06, "loss": 0.7548, "step": 10127 }, { "epoch": 0.044835982115188806, "grad_norm": 3.3873393416209825, "learning_rate": 4.4835982115188815e-06, "loss": 0.7659, "step": 10128 }, { "epoch": 0.044840409048652, "grad_norm": 2.617373489520458, "learning_rate": 4.4840409048652e-06, "loss": 0.4166, "step": 10129 }, { "epoch": 0.04484483598211519, "grad_norm": 2.191855972939191, "learning_rate": 4.484483598211519e-06, "loss": 0.5556, "step": 10130 }, { "epoch": 0.04484926291557838, "grad_norm": 2.809194941586707, "learning_rate": 4.484926291557839e-06, "loss": 0.9512, "step": 10131 }, { "epoch": 0.04485368984904157, "grad_norm": 2.6376359325451286, "learning_rate": 4.485368984904157e-06, "loss": 0.5319, "step": 10132 }, { "epoch": 0.04485811678250476, "grad_norm": 3.3288015365070027, "learning_rate": 4.485811678250477e-06, "loss": 0.8244, "step": 10133 }, { "epoch": 0.04486254371596795, "grad_norm": 2.4940603529585883, "learning_rate": 4.486254371596795e-06, "loss": 0.6309, "step": 10134 }, { "epoch": 0.04486697064943114, "grad_norm": 2.7507994590933524, "learning_rate": 4.486697064943114e-06, "loss": 0.7754, "step": 10135 }, { "epoch": 0.04487139758289433, "grad_norm": 3.6973455233203563, "learning_rate": 4.487139758289433e-06, "loss": 1.0465, "step": 10136 }, { "epoch": 0.04487582451635752, "grad_norm": 2.4897770546913005, "learning_rate": 4.4875824516357524e-06, "loss": 0.5967, "step": 10137 }, { "epoch": 0.04488025144982071, "grad_norm": 2.8677028635524304, "learning_rate": 4.488025144982071e-06, "loss": 0.718, "step": 10138 }, { "epoch": 0.0448846783832839, "grad_norm": 2.655526830621669, "learning_rate": 4.48846783832839e-06, "loss": 0.8288, "step": 10139 }, { "epoch": 0.04488910531674709, "grad_norm": 2.664831402215093, "learning_rate": 4.48891053167471e-06, "loss": 0.8817, "step": 10140 }, { "epoch": 0.044893532250210276, "grad_norm": 2.5133114621219392, "learning_rate": 4.489353225021028e-06, "loss": 0.6221, "step": 10141 }, { "epoch": 0.044897959183673466, "grad_norm": 2.904333812540691, "learning_rate": 4.489795918367348e-06, "loss": 0.9168, "step": 10142 }, { "epoch": 0.04490238611713666, "grad_norm": 2.838614860497503, "learning_rate": 4.490238611713666e-06, "loss": 0.4885, "step": 10143 }, { "epoch": 0.04490681305059985, "grad_norm": 2.6023563798522913, "learning_rate": 4.4906813050599855e-06, "loss": 0.5707, "step": 10144 }, { "epoch": 0.04491123998406304, "grad_norm": 2.753494147334431, "learning_rate": 4.491123998406304e-06, "loss": 0.6852, "step": 10145 }, { "epoch": 0.04491566691752623, "grad_norm": 2.8245297511904113, "learning_rate": 4.491566691752623e-06, "loss": 0.7271, "step": 10146 }, { "epoch": 0.04492009385098942, "grad_norm": 2.820140965655158, "learning_rate": 4.492009385098942e-06, "loss": 0.6246, "step": 10147 }, { "epoch": 0.04492452078445261, "grad_norm": 3.0480822106035044, "learning_rate": 4.492452078445261e-06, "loss": 0.7323, "step": 10148 }, { "epoch": 0.0449289477179158, "grad_norm": 3.6019681748588965, "learning_rate": 4.492894771791581e-06, "loss": 1.209, "step": 10149 }, { "epoch": 0.04493337465137899, "grad_norm": 3.195367878536205, "learning_rate": 4.493337465137899e-06, "loss": 0.7479, "step": 10150 }, { "epoch": 0.04493780158484218, "grad_norm": 2.686925543080492, "learning_rate": 4.4937801584842185e-06, "loss": 0.5697, "step": 10151 }, { "epoch": 0.04494222851830537, "grad_norm": 2.645002881512087, "learning_rate": 4.494222851830538e-06, "loss": 0.9384, "step": 10152 }, { "epoch": 0.04494665545176856, "grad_norm": 2.8676347717944255, "learning_rate": 4.4946655451768564e-06, "loss": 0.7455, "step": 10153 }, { "epoch": 0.04495108238523175, "grad_norm": 2.9098909939742774, "learning_rate": 4.495108238523175e-06, "loss": 0.9308, "step": 10154 }, { "epoch": 0.04495550931869494, "grad_norm": 2.6828188146859655, "learning_rate": 4.495550931869494e-06, "loss": 0.7366, "step": 10155 }, { "epoch": 0.04495993625215813, "grad_norm": 3.48566044792952, "learning_rate": 4.495993625215813e-06, "loss": 1.15, "step": 10156 }, { "epoch": 0.04496436318562132, "grad_norm": 3.0499227959683983, "learning_rate": 4.496436318562132e-06, "loss": 0.5411, "step": 10157 }, { "epoch": 0.04496879011908451, "grad_norm": 2.934896876498955, "learning_rate": 4.496879011908452e-06, "loss": 0.6647, "step": 10158 }, { "epoch": 0.0449732170525477, "grad_norm": 2.630479515259947, "learning_rate": 4.49732170525477e-06, "loss": 0.6339, "step": 10159 }, { "epoch": 0.04497764398601089, "grad_norm": 2.7030221827742418, "learning_rate": 4.4977643986010895e-06, "loss": 0.5865, "step": 10160 }, { "epoch": 0.04498207091947408, "grad_norm": 2.413208969640891, "learning_rate": 4.498207091947409e-06, "loss": 0.5976, "step": 10161 }, { "epoch": 0.04498649785293727, "grad_norm": 2.700046469478153, "learning_rate": 4.498649785293727e-06, "loss": 0.5814, "step": 10162 }, { "epoch": 0.04499092478640046, "grad_norm": 2.8393924095237293, "learning_rate": 4.499092478640047e-06, "loss": 0.847, "step": 10163 }, { "epoch": 0.04499535171986365, "grad_norm": 3.4146338075032827, "learning_rate": 4.499535171986365e-06, "loss": 1.174, "step": 10164 }, { "epoch": 0.04499977865332684, "grad_norm": 2.4798589037849257, "learning_rate": 4.499977865332684e-06, "loss": 0.5897, "step": 10165 }, { "epoch": 0.04500420558679003, "grad_norm": 3.1606621789638494, "learning_rate": 4.500420558679003e-06, "loss": 0.8294, "step": 10166 }, { "epoch": 0.04500863252025322, "grad_norm": 3.369962702501079, "learning_rate": 4.5008632520253225e-06, "loss": 0.9899, "step": 10167 }, { "epoch": 0.04501305945371641, "grad_norm": 2.8350965995463717, "learning_rate": 4.501305945371641e-06, "loss": 0.4958, "step": 10168 }, { "epoch": 0.0450174863871796, "grad_norm": 3.166094420751935, "learning_rate": 4.5017486387179604e-06, "loss": 1.1709, "step": 10169 }, { "epoch": 0.045021913320642794, "grad_norm": 3.170815782673613, "learning_rate": 4.50219133206428e-06, "loss": 0.9017, "step": 10170 }, { "epoch": 0.04502634025410598, "grad_norm": 2.2858939239091436, "learning_rate": 4.502634025410598e-06, "loss": 0.6241, "step": 10171 }, { "epoch": 0.04503076718756917, "grad_norm": 3.217645717005464, "learning_rate": 4.503076718756918e-06, "loss": 0.5741, "step": 10172 }, { "epoch": 0.04503519412103236, "grad_norm": 3.5716890768958662, "learning_rate": 4.503519412103236e-06, "loss": 0.76, "step": 10173 }, { "epoch": 0.04503962105449555, "grad_norm": 3.8326451310032863, "learning_rate": 4.503962105449556e-06, "loss": 0.9961, "step": 10174 }, { "epoch": 0.04504404798795874, "grad_norm": 2.6247415759468447, "learning_rate": 4.504404798795874e-06, "loss": 0.8148, "step": 10175 }, { "epoch": 0.04504847492142193, "grad_norm": 3.350478491786204, "learning_rate": 4.5048474921421935e-06, "loss": 1.0167, "step": 10176 }, { "epoch": 0.04505290185488512, "grad_norm": 2.696533335708503, "learning_rate": 4.505290185488512e-06, "loss": 0.7484, "step": 10177 }, { "epoch": 0.04505732878834831, "grad_norm": 4.4539607025936885, "learning_rate": 4.505732878834831e-06, "loss": 1.0738, "step": 10178 }, { "epoch": 0.0450617557218115, "grad_norm": 2.3252338681831644, "learning_rate": 4.506175572181151e-06, "loss": 0.478, "step": 10179 }, { "epoch": 0.04506618265527469, "grad_norm": 3.1695140652876086, "learning_rate": 4.506618265527469e-06, "loss": 0.7848, "step": 10180 }, { "epoch": 0.04507060958873788, "grad_norm": 2.431667646964256, "learning_rate": 4.507060958873789e-06, "loss": 0.808, "step": 10181 }, { "epoch": 0.04507503652220107, "grad_norm": 2.5717572103830784, "learning_rate": 4.507503652220108e-06, "loss": 0.6972, "step": 10182 }, { "epoch": 0.04507946345566426, "grad_norm": 3.201542452400996, "learning_rate": 4.5079463455664265e-06, "loss": 0.8881, "step": 10183 }, { "epoch": 0.045083890389127454, "grad_norm": 2.9075736122701246, "learning_rate": 4.508389038912745e-06, "loss": 0.8121, "step": 10184 }, { "epoch": 0.045088317322590644, "grad_norm": 2.874388155194478, "learning_rate": 4.5088317322590644e-06, "loss": 0.7013, "step": 10185 }, { "epoch": 0.045092744256053835, "grad_norm": 2.869708115832358, "learning_rate": 4.509274425605383e-06, "loss": 0.9869, "step": 10186 }, { "epoch": 0.04509717118951702, "grad_norm": 2.4535603763464655, "learning_rate": 4.509717118951702e-06, "loss": 0.6414, "step": 10187 }, { "epoch": 0.04510159812298021, "grad_norm": 2.4593703453711226, "learning_rate": 4.510159812298022e-06, "loss": 0.4345, "step": 10188 }, { "epoch": 0.0451060250564434, "grad_norm": 2.281491432452515, "learning_rate": 4.51060250564434e-06, "loss": 0.608, "step": 10189 }, { "epoch": 0.04511045198990659, "grad_norm": 3.2043524889924906, "learning_rate": 4.51104519899066e-06, "loss": 0.8098, "step": 10190 }, { "epoch": 0.04511487892336978, "grad_norm": 2.7947541076360145, "learning_rate": 4.511487892336979e-06, "loss": 0.8617, "step": 10191 }, { "epoch": 0.04511930585683297, "grad_norm": 2.6710313848117764, "learning_rate": 4.5119305856832975e-06, "loss": 1.011, "step": 10192 }, { "epoch": 0.04512373279029616, "grad_norm": 2.7365876395388518, "learning_rate": 4.512373279029617e-06, "loss": 0.7245, "step": 10193 }, { "epoch": 0.04512815972375935, "grad_norm": 2.394958019212632, "learning_rate": 4.512815972375935e-06, "loss": 0.6024, "step": 10194 }, { "epoch": 0.04513258665722254, "grad_norm": 3.9887913792639567, "learning_rate": 4.513258665722255e-06, "loss": 0.8109, "step": 10195 }, { "epoch": 0.04513701359068573, "grad_norm": 2.7567297481511197, "learning_rate": 4.513701359068573e-06, "loss": 0.7382, "step": 10196 }, { "epoch": 0.045141440524148924, "grad_norm": 3.136889033375355, "learning_rate": 4.514144052414893e-06, "loss": 0.8426, "step": 10197 }, { "epoch": 0.045145867457612114, "grad_norm": 2.6296977171211764, "learning_rate": 4.514586745761211e-06, "loss": 0.6537, "step": 10198 }, { "epoch": 0.045150294391075305, "grad_norm": 2.6326732026110395, "learning_rate": 4.5150294391075305e-06, "loss": 0.7522, "step": 10199 }, { "epoch": 0.045154721324538495, "grad_norm": 3.1854858286575727, "learning_rate": 4.51547213245385e-06, "loss": 0.7708, "step": 10200 }, { "epoch": 0.045159148258001686, "grad_norm": 2.786697433868505, "learning_rate": 4.5159148258001684e-06, "loss": 0.9064, "step": 10201 }, { "epoch": 0.04516357519146487, "grad_norm": 2.762575283039302, "learning_rate": 4.516357519146488e-06, "loss": 0.8923, "step": 10202 }, { "epoch": 0.04516800212492806, "grad_norm": 2.71326785159355, "learning_rate": 4.516800212492806e-06, "loss": 0.81, "step": 10203 }, { "epoch": 0.04517242905839125, "grad_norm": 3.6100063938451186, "learning_rate": 4.517242905839126e-06, "loss": 0.9952, "step": 10204 }, { "epoch": 0.04517685599185444, "grad_norm": 2.7156297498456055, "learning_rate": 4.517685599185444e-06, "loss": 0.8506, "step": 10205 }, { "epoch": 0.04518128292531763, "grad_norm": 2.589572114430718, "learning_rate": 4.518128292531764e-06, "loss": 0.6127, "step": 10206 }, { "epoch": 0.04518570985878082, "grad_norm": 2.784513006084128, "learning_rate": 4.518570985878082e-06, "loss": 0.5661, "step": 10207 }, { "epoch": 0.04519013679224401, "grad_norm": 3.2377806943939063, "learning_rate": 4.5190136792244015e-06, "loss": 0.7644, "step": 10208 }, { "epoch": 0.0451945637257072, "grad_norm": 4.461230269423387, "learning_rate": 4.519456372570721e-06, "loss": 1.2012, "step": 10209 }, { "epoch": 0.04519899065917039, "grad_norm": 2.271651789318134, "learning_rate": 4.519899065917039e-06, "loss": 0.5739, "step": 10210 }, { "epoch": 0.045203417592633584, "grad_norm": 3.025302078566527, "learning_rate": 4.520341759263359e-06, "loss": 0.7577, "step": 10211 }, { "epoch": 0.045207844526096774, "grad_norm": 2.2071510023685543, "learning_rate": 4.520784452609678e-06, "loss": 0.6876, "step": 10212 }, { "epoch": 0.045212271459559965, "grad_norm": 2.4334151260484886, "learning_rate": 4.521227145955997e-06, "loss": 0.611, "step": 10213 }, { "epoch": 0.045216698393023155, "grad_norm": 2.8730392044757282, "learning_rate": 4.521669839302316e-06, "loss": 0.7774, "step": 10214 }, { "epoch": 0.045221125326486346, "grad_norm": 2.8934224889267224, "learning_rate": 4.5221125326486345e-06, "loss": 0.812, "step": 10215 }, { "epoch": 0.045225552259949536, "grad_norm": 2.402423241400355, "learning_rate": 4.522555225994953e-06, "loss": 0.7543, "step": 10216 }, { "epoch": 0.04522997919341272, "grad_norm": 2.7881357834370246, "learning_rate": 4.5229979193412724e-06, "loss": 0.784, "step": 10217 }, { "epoch": 0.04523440612687591, "grad_norm": 2.9123375631692054, "learning_rate": 4.523440612687592e-06, "loss": 0.9956, "step": 10218 }, { "epoch": 0.0452388330603391, "grad_norm": 3.2773595753067686, "learning_rate": 4.52388330603391e-06, "loss": 1.2587, "step": 10219 }, { "epoch": 0.04524325999380229, "grad_norm": 3.7017201152662946, "learning_rate": 4.52432599938023e-06, "loss": 0.9948, "step": 10220 }, { "epoch": 0.04524768692726548, "grad_norm": 2.982936012615196, "learning_rate": 4.524768692726549e-06, "loss": 0.8407, "step": 10221 }, { "epoch": 0.04525211386072867, "grad_norm": 3.5225030736964302, "learning_rate": 4.525211386072868e-06, "loss": 0.7376, "step": 10222 }, { "epoch": 0.04525654079419186, "grad_norm": 3.897454075113524, "learning_rate": 4.525654079419187e-06, "loss": 0.884, "step": 10223 }, { "epoch": 0.04526096772765505, "grad_norm": 2.735914451569994, "learning_rate": 4.5260967727655055e-06, "loss": 0.8335, "step": 10224 }, { "epoch": 0.045265394661118244, "grad_norm": 3.7774127368959936, "learning_rate": 4.526539466111825e-06, "loss": 1.3291, "step": 10225 }, { "epoch": 0.045269821594581434, "grad_norm": 2.624741418294732, "learning_rate": 4.526982159458143e-06, "loss": 0.5908, "step": 10226 }, { "epoch": 0.045274248528044625, "grad_norm": 2.418853748528757, "learning_rate": 4.527424852804463e-06, "loss": 0.7755, "step": 10227 }, { "epoch": 0.045278675461507815, "grad_norm": 2.9421068846570155, "learning_rate": 4.527867546150781e-06, "loss": 0.7301, "step": 10228 }, { "epoch": 0.045283102394971006, "grad_norm": 3.157303577999515, "learning_rate": 4.528310239497101e-06, "loss": 0.66, "step": 10229 }, { "epoch": 0.045287529328434196, "grad_norm": 2.605007887037007, "learning_rate": 4.52875293284342e-06, "loss": 0.7917, "step": 10230 }, { "epoch": 0.04529195626189739, "grad_norm": 3.578628041961411, "learning_rate": 4.5291956261897386e-06, "loss": 1.0484, "step": 10231 }, { "epoch": 0.04529638319536057, "grad_norm": 2.970520496613286, "learning_rate": 4.529638319536058e-06, "loss": 0.902, "step": 10232 }, { "epoch": 0.04530081012882376, "grad_norm": 3.178861462254572, "learning_rate": 4.530081012882377e-06, "loss": 0.8318, "step": 10233 }, { "epoch": 0.04530523706228695, "grad_norm": 2.961304756866971, "learning_rate": 4.530523706228696e-06, "loss": 0.8611, "step": 10234 }, { "epoch": 0.04530966399575014, "grad_norm": 3.0434253654542363, "learning_rate": 4.530966399575014e-06, "loss": 0.9188, "step": 10235 }, { "epoch": 0.04531409092921333, "grad_norm": 2.918009931447039, "learning_rate": 4.531409092921334e-06, "loss": 0.741, "step": 10236 }, { "epoch": 0.04531851786267652, "grad_norm": 2.980099320201289, "learning_rate": 4.531851786267652e-06, "loss": 0.7207, "step": 10237 }, { "epoch": 0.045322944796139714, "grad_norm": 4.392249633536189, "learning_rate": 4.532294479613972e-06, "loss": 1.131, "step": 10238 }, { "epoch": 0.045327371729602904, "grad_norm": 2.432209929988559, "learning_rate": 4.532737172960291e-06, "loss": 0.7577, "step": 10239 }, { "epoch": 0.045331798663066095, "grad_norm": 2.5828835117778133, "learning_rate": 4.5331798663066095e-06, "loss": 0.466, "step": 10240 }, { "epoch": 0.045336225596529285, "grad_norm": 2.561611733113231, "learning_rate": 4.533622559652929e-06, "loss": 0.6767, "step": 10241 }, { "epoch": 0.045340652529992476, "grad_norm": 3.576265024559058, "learning_rate": 4.534065252999248e-06, "loss": 1.2914, "step": 10242 }, { "epoch": 0.045345079463455666, "grad_norm": 2.890256381076611, "learning_rate": 4.534507946345567e-06, "loss": 0.7911, "step": 10243 }, { "epoch": 0.04534950639691886, "grad_norm": 3.1629203272120954, "learning_rate": 4.534950639691886e-06, "loss": 0.6735, "step": 10244 }, { "epoch": 0.04535393333038205, "grad_norm": 2.861992596403183, "learning_rate": 4.535393333038205e-06, "loss": 1.032, "step": 10245 }, { "epoch": 0.04535836026384524, "grad_norm": 2.4404611642958534, "learning_rate": 4.535836026384523e-06, "loss": 0.5853, "step": 10246 }, { "epoch": 0.04536278719730842, "grad_norm": 2.8558244689302104, "learning_rate": 4.5362787197308426e-06, "loss": 0.7261, "step": 10247 }, { "epoch": 0.04536721413077161, "grad_norm": 2.3905793962669732, "learning_rate": 4.536721413077162e-06, "loss": 0.5734, "step": 10248 }, { "epoch": 0.0453716410642348, "grad_norm": 2.9924568039060206, "learning_rate": 4.5371641064234804e-06, "loss": 0.6756, "step": 10249 }, { "epoch": 0.04537606799769799, "grad_norm": 2.6957282782709924, "learning_rate": 4.5376067997698e-06, "loss": 0.6844, "step": 10250 }, { "epoch": 0.04538049493116118, "grad_norm": 2.4759392002362786, "learning_rate": 4.538049493116119e-06, "loss": 0.733, "step": 10251 }, { "epoch": 0.045384921864624374, "grad_norm": 2.771184671434079, "learning_rate": 4.538492186462438e-06, "loss": 0.8181, "step": 10252 }, { "epoch": 0.045389348798087564, "grad_norm": 2.841733190943662, "learning_rate": 4.538934879808757e-06, "loss": 0.8251, "step": 10253 }, { "epoch": 0.045393775731550755, "grad_norm": 3.0407632113731675, "learning_rate": 4.539377573155076e-06, "loss": 0.5238, "step": 10254 }, { "epoch": 0.045398202665013945, "grad_norm": 3.006342927025704, "learning_rate": 4.539820266501395e-06, "loss": 0.9545, "step": 10255 }, { "epoch": 0.045402629598477136, "grad_norm": 2.802574969218792, "learning_rate": 4.5402629598477135e-06, "loss": 0.7585, "step": 10256 }, { "epoch": 0.045407056531940326, "grad_norm": 2.484015236005169, "learning_rate": 4.540705653194033e-06, "loss": 0.792, "step": 10257 }, { "epoch": 0.04541148346540352, "grad_norm": 2.2165270365847154, "learning_rate": 4.541148346540351e-06, "loss": 0.6051, "step": 10258 }, { "epoch": 0.04541591039886671, "grad_norm": 2.8582347495357303, "learning_rate": 4.541591039886671e-06, "loss": 0.8372, "step": 10259 }, { "epoch": 0.0454203373323299, "grad_norm": 2.6024568795020464, "learning_rate": 4.54203373323299e-06, "loss": 0.8082, "step": 10260 }, { "epoch": 0.04542476426579309, "grad_norm": 3.559694579406514, "learning_rate": 4.542476426579309e-06, "loss": 0.9598, "step": 10261 }, { "epoch": 0.04542919119925627, "grad_norm": 2.712593475698205, "learning_rate": 4.542919119925628e-06, "loss": 0.732, "step": 10262 }, { "epoch": 0.04543361813271946, "grad_norm": 2.6213473268098304, "learning_rate": 4.543361813271947e-06, "loss": 0.6129, "step": 10263 }, { "epoch": 0.04543804506618265, "grad_norm": 2.735092714991669, "learning_rate": 4.543804506618266e-06, "loss": 0.688, "step": 10264 }, { "epoch": 0.04544247199964584, "grad_norm": 2.6087055247025632, "learning_rate": 4.5442471999645844e-06, "loss": 0.7778, "step": 10265 }, { "epoch": 0.045446898933109034, "grad_norm": 2.645513405776756, "learning_rate": 4.544689893310904e-06, "loss": 0.7887, "step": 10266 }, { "epoch": 0.045451325866572224, "grad_norm": 2.5673984531923746, "learning_rate": 4.545132586657222e-06, "loss": 0.7095, "step": 10267 }, { "epoch": 0.045455752800035415, "grad_norm": 3.473135027505062, "learning_rate": 4.545575280003542e-06, "loss": 0.9651, "step": 10268 }, { "epoch": 0.045460179733498605, "grad_norm": 3.252101426238296, "learning_rate": 4.546017973349861e-06, "loss": 0.895, "step": 10269 }, { "epoch": 0.045464606666961796, "grad_norm": 3.3633592971424147, "learning_rate": 4.54646066669618e-06, "loss": 0.8876, "step": 10270 }, { "epoch": 0.045469033600424986, "grad_norm": 3.261870963295539, "learning_rate": 4.546903360042499e-06, "loss": 0.7739, "step": 10271 }, { "epoch": 0.04547346053388818, "grad_norm": 2.508968035755125, "learning_rate": 4.547346053388818e-06, "loss": 0.543, "step": 10272 }, { "epoch": 0.04547788746735137, "grad_norm": 3.359881282585049, "learning_rate": 4.547788746735137e-06, "loss": 0.8314, "step": 10273 }, { "epoch": 0.04548231440081456, "grad_norm": 3.127147276726631, "learning_rate": 4.548231440081456e-06, "loss": 0.8577, "step": 10274 }, { "epoch": 0.04548674133427775, "grad_norm": 2.4530660939241637, "learning_rate": 4.548674133427775e-06, "loss": 0.8314, "step": 10275 }, { "epoch": 0.04549116826774094, "grad_norm": 2.534324161969274, "learning_rate": 4.549116826774094e-06, "loss": 0.6636, "step": 10276 }, { "epoch": 0.04549559520120412, "grad_norm": 3.4258537546470738, "learning_rate": 4.549559520120413e-06, "loss": 1.0619, "step": 10277 }, { "epoch": 0.04550002213466731, "grad_norm": 2.6224731319712524, "learning_rate": 4.550002213466732e-06, "loss": 0.7567, "step": 10278 }, { "epoch": 0.045504449068130504, "grad_norm": 3.380398147912237, "learning_rate": 4.5504449068130506e-06, "loss": 1.036, "step": 10279 }, { "epoch": 0.045508876001593694, "grad_norm": 2.439650287160746, "learning_rate": 4.55088760015937e-06, "loss": 0.7517, "step": 10280 }, { "epoch": 0.045513302935056885, "grad_norm": 2.5700890523737154, "learning_rate": 4.551330293505689e-06, "loss": 0.7873, "step": 10281 }, { "epoch": 0.045517729868520075, "grad_norm": 2.8408041603316176, "learning_rate": 4.551772986852008e-06, "loss": 0.8407, "step": 10282 }, { "epoch": 0.045522156801983266, "grad_norm": 3.5440650671990626, "learning_rate": 4.552215680198327e-06, "loss": 0.793, "step": 10283 }, { "epoch": 0.045526583735446456, "grad_norm": 2.7508177178112128, "learning_rate": 4.552658373544646e-06, "loss": 0.9168, "step": 10284 }, { "epoch": 0.04553101066890965, "grad_norm": 2.767908193685147, "learning_rate": 4.553101066890965e-06, "loss": 0.6312, "step": 10285 }, { "epoch": 0.04553543760237284, "grad_norm": 3.0184713451940692, "learning_rate": 4.553543760237284e-06, "loss": 0.8453, "step": 10286 }, { "epoch": 0.04553986453583603, "grad_norm": 2.872060138971649, "learning_rate": 4.553986453583603e-06, "loss": 0.7376, "step": 10287 }, { "epoch": 0.04554429146929922, "grad_norm": 3.1527218260774768, "learning_rate": 4.5544291469299215e-06, "loss": 1.0977, "step": 10288 }, { "epoch": 0.04554871840276241, "grad_norm": 2.681191068360508, "learning_rate": 4.554871840276241e-06, "loss": 0.5594, "step": 10289 }, { "epoch": 0.0455531453362256, "grad_norm": 2.994125685105999, "learning_rate": 4.55531453362256e-06, "loss": 0.9358, "step": 10290 }, { "epoch": 0.04555757226968879, "grad_norm": 2.1774577058131945, "learning_rate": 4.555757226968879e-06, "loss": 0.6832, "step": 10291 }, { "epoch": 0.04556199920315197, "grad_norm": 2.213435926190421, "learning_rate": 4.556199920315198e-06, "loss": 0.5813, "step": 10292 }, { "epoch": 0.045566426136615164, "grad_norm": 3.5479652090044245, "learning_rate": 4.5566426136615175e-06, "loss": 1.0998, "step": 10293 }, { "epoch": 0.045570853070078354, "grad_norm": 2.907742012031895, "learning_rate": 4.557085307007836e-06, "loss": 0.8357, "step": 10294 }, { "epoch": 0.045575280003541545, "grad_norm": 2.2680016134572063, "learning_rate": 4.557528000354155e-06, "loss": 0.5298, "step": 10295 }, { "epoch": 0.045579706937004735, "grad_norm": 2.7843701709215996, "learning_rate": 4.557970693700474e-06, "loss": 0.7871, "step": 10296 }, { "epoch": 0.045584133870467926, "grad_norm": 3.385423681610045, "learning_rate": 4.5584133870467924e-06, "loss": 0.7895, "step": 10297 }, { "epoch": 0.045588560803931116, "grad_norm": 2.7237914718181733, "learning_rate": 4.558856080393112e-06, "loss": 0.855, "step": 10298 }, { "epoch": 0.04559298773739431, "grad_norm": 2.8069881939743544, "learning_rate": 4.559298773739431e-06, "loss": 0.7097, "step": 10299 }, { "epoch": 0.0455974146708575, "grad_norm": 3.6827631692002982, "learning_rate": 4.55974146708575e-06, "loss": 0.7325, "step": 10300 }, { "epoch": 0.04560184160432069, "grad_norm": 2.3932277579050756, "learning_rate": 4.560184160432069e-06, "loss": 0.6122, "step": 10301 }, { "epoch": 0.04560626853778388, "grad_norm": 4.633493995966582, "learning_rate": 4.5606268537783885e-06, "loss": 1.5901, "step": 10302 }, { "epoch": 0.04561069547124707, "grad_norm": 2.9893324626982793, "learning_rate": 4.561069547124707e-06, "loss": 0.7872, "step": 10303 }, { "epoch": 0.04561512240471026, "grad_norm": 3.1263007742819964, "learning_rate": 4.561512240471026e-06, "loss": 0.9046, "step": 10304 }, { "epoch": 0.04561954933817345, "grad_norm": 3.1905851966167944, "learning_rate": 4.561954933817345e-06, "loss": 1.2404, "step": 10305 }, { "epoch": 0.04562397627163664, "grad_norm": 3.556803361714402, "learning_rate": 4.562397627163664e-06, "loss": 0.8882, "step": 10306 }, { "epoch": 0.045628403205099824, "grad_norm": 2.7161760823129497, "learning_rate": 4.562840320509983e-06, "loss": 0.6084, "step": 10307 }, { "epoch": 0.045632830138563014, "grad_norm": 3.459942130519973, "learning_rate": 4.563283013856302e-06, "loss": 1.0264, "step": 10308 }, { "epoch": 0.045637257072026205, "grad_norm": 2.660667537599769, "learning_rate": 4.563725707202621e-06, "loss": 0.6179, "step": 10309 }, { "epoch": 0.045641684005489395, "grad_norm": 3.067787004010932, "learning_rate": 4.56416840054894e-06, "loss": 0.8197, "step": 10310 }, { "epoch": 0.045646110938952586, "grad_norm": 3.0905740533399446, "learning_rate": 4.564611093895259e-06, "loss": 1.0324, "step": 10311 }, { "epoch": 0.045650537872415776, "grad_norm": 2.971254469432953, "learning_rate": 4.565053787241578e-06, "loss": 0.8202, "step": 10312 }, { "epoch": 0.04565496480587897, "grad_norm": 3.5882688418794326, "learning_rate": 4.565496480587897e-06, "loss": 0.7038, "step": 10313 }, { "epoch": 0.04565939173934216, "grad_norm": 2.405789718485748, "learning_rate": 4.565939173934217e-06, "loss": 0.5952, "step": 10314 }, { "epoch": 0.04566381867280535, "grad_norm": 2.6654781925292976, "learning_rate": 4.566381867280535e-06, "loss": 1.0127, "step": 10315 }, { "epoch": 0.04566824560626854, "grad_norm": 2.7198267732599333, "learning_rate": 4.566824560626854e-06, "loss": 0.921, "step": 10316 }, { "epoch": 0.04567267253973173, "grad_norm": 3.0446522417853323, "learning_rate": 4.567267253973173e-06, "loss": 0.897, "step": 10317 }, { "epoch": 0.04567709947319492, "grad_norm": 3.5918991480289875, "learning_rate": 4.567709947319492e-06, "loss": 0.9598, "step": 10318 }, { "epoch": 0.04568152640665811, "grad_norm": 2.752276997414532, "learning_rate": 4.568152640665811e-06, "loss": 0.8988, "step": 10319 }, { "epoch": 0.0456859533401213, "grad_norm": 4.0785505099290065, "learning_rate": 4.56859533401213e-06, "loss": 1.0881, "step": 10320 }, { "epoch": 0.04569038027358449, "grad_norm": 2.7461399845337677, "learning_rate": 4.569038027358449e-06, "loss": 0.5238, "step": 10321 }, { "epoch": 0.045694807207047675, "grad_norm": 2.9570328570505118, "learning_rate": 4.569480720704768e-06, "loss": 0.9349, "step": 10322 }, { "epoch": 0.045699234140510865, "grad_norm": 2.122472473402696, "learning_rate": 4.569923414051088e-06, "loss": 0.5298, "step": 10323 }, { "epoch": 0.045703661073974056, "grad_norm": 2.58224766597385, "learning_rate": 4.570366107397406e-06, "loss": 0.8691, "step": 10324 }, { "epoch": 0.045708088007437246, "grad_norm": 3.3530710170211386, "learning_rate": 4.5708088007437255e-06, "loss": 0.865, "step": 10325 }, { "epoch": 0.04571251494090044, "grad_norm": 2.805430012838949, "learning_rate": 4.571251494090044e-06, "loss": 0.5955, "step": 10326 }, { "epoch": 0.04571694187436363, "grad_norm": 2.817547967015529, "learning_rate": 4.5716941874363626e-06, "loss": 0.7776, "step": 10327 }, { "epoch": 0.04572136880782682, "grad_norm": 3.801651656501066, "learning_rate": 4.572136880782682e-06, "loss": 0.9381, "step": 10328 }, { "epoch": 0.04572579574129001, "grad_norm": 2.9754049748857447, "learning_rate": 4.572579574129001e-06, "loss": 0.751, "step": 10329 }, { "epoch": 0.0457302226747532, "grad_norm": 3.311858350743936, "learning_rate": 4.57302226747532e-06, "loss": 0.8665, "step": 10330 }, { "epoch": 0.04573464960821639, "grad_norm": 4.025788932351772, "learning_rate": 4.573464960821639e-06, "loss": 1.2457, "step": 10331 }, { "epoch": 0.04573907654167958, "grad_norm": 3.4491321141432687, "learning_rate": 4.5739076541679586e-06, "loss": 0.9898, "step": 10332 }, { "epoch": 0.04574350347514277, "grad_norm": 2.4999648690607494, "learning_rate": 4.574350347514277e-06, "loss": 0.5324, "step": 10333 }, { "epoch": 0.04574793040860596, "grad_norm": 2.4043628162508432, "learning_rate": 4.5747930408605965e-06, "loss": 0.5025, "step": 10334 }, { "epoch": 0.04575235734206915, "grad_norm": 3.7717077024827175, "learning_rate": 4.575235734206915e-06, "loss": 1.4147, "step": 10335 }, { "epoch": 0.04575678427553234, "grad_norm": 3.514288293683011, "learning_rate": 4.575678427553234e-06, "loss": 0.6468, "step": 10336 }, { "epoch": 0.04576121120899553, "grad_norm": 3.6644712285513985, "learning_rate": 4.576121120899553e-06, "loss": 0.7799, "step": 10337 }, { "epoch": 0.045765638142458716, "grad_norm": 2.340195018900402, "learning_rate": 4.576563814245872e-06, "loss": 0.4778, "step": 10338 }, { "epoch": 0.045770065075921906, "grad_norm": 2.454434854874993, "learning_rate": 4.577006507592191e-06, "loss": 0.6262, "step": 10339 }, { "epoch": 0.0457744920093851, "grad_norm": 2.9991495345712607, "learning_rate": 4.57744920093851e-06, "loss": 1.0436, "step": 10340 }, { "epoch": 0.04577891894284829, "grad_norm": 2.9406209659845364, "learning_rate": 4.5778918942848295e-06, "loss": 0.9021, "step": 10341 }, { "epoch": 0.04578334587631148, "grad_norm": 2.7293421759904364, "learning_rate": 4.578334587631148e-06, "loss": 0.8047, "step": 10342 }, { "epoch": 0.04578777280977467, "grad_norm": 3.2683673073943273, "learning_rate": 4.578777280977467e-06, "loss": 0.898, "step": 10343 }, { "epoch": 0.04579219974323786, "grad_norm": 2.603885750374513, "learning_rate": 4.579219974323787e-06, "loss": 0.4826, "step": 10344 }, { "epoch": 0.04579662667670105, "grad_norm": 2.4412895618705903, "learning_rate": 4.579662667670105e-06, "loss": 0.5977, "step": 10345 }, { "epoch": 0.04580105361016424, "grad_norm": 3.4785279862810965, "learning_rate": 4.580105361016424e-06, "loss": 0.9492, "step": 10346 }, { "epoch": 0.04580548054362743, "grad_norm": 2.875677430519294, "learning_rate": 4.580548054362743e-06, "loss": 0.8803, "step": 10347 }, { "epoch": 0.04580990747709062, "grad_norm": 2.765678966204325, "learning_rate": 4.580990747709062e-06, "loss": 0.781, "step": 10348 }, { "epoch": 0.04581433441055381, "grad_norm": 3.870678287232875, "learning_rate": 4.581433441055381e-06, "loss": 1.0567, "step": 10349 }, { "epoch": 0.045818761344017, "grad_norm": 3.386123681465108, "learning_rate": 4.5818761344017005e-06, "loss": 0.4361, "step": 10350 }, { "epoch": 0.04582318827748019, "grad_norm": 2.896998525002669, "learning_rate": 4.582318827748019e-06, "loss": 0.8118, "step": 10351 }, { "epoch": 0.04582761521094338, "grad_norm": 2.9804498141596216, "learning_rate": 4.582761521094338e-06, "loss": 1.0074, "step": 10352 }, { "epoch": 0.045832042144406566, "grad_norm": 2.577687306783325, "learning_rate": 4.583204214440658e-06, "loss": 0.6702, "step": 10353 }, { "epoch": 0.04583646907786976, "grad_norm": 2.674340591075979, "learning_rate": 4.583646907786976e-06, "loss": 0.9239, "step": 10354 }, { "epoch": 0.04584089601133295, "grad_norm": 2.3293998635026103, "learning_rate": 4.584089601133296e-06, "loss": 0.4917, "step": 10355 }, { "epoch": 0.04584532294479614, "grad_norm": 3.5136960957077976, "learning_rate": 4.584532294479614e-06, "loss": 0.8812, "step": 10356 }, { "epoch": 0.04584974987825933, "grad_norm": 2.7533055829620476, "learning_rate": 4.5849749878259335e-06, "loss": 0.9458, "step": 10357 }, { "epoch": 0.04585417681172252, "grad_norm": 3.0846978431927137, "learning_rate": 4.585417681172252e-06, "loss": 0.9116, "step": 10358 }, { "epoch": 0.04585860374518571, "grad_norm": 2.4903548183291058, "learning_rate": 4.585860374518571e-06, "loss": 0.737, "step": 10359 }, { "epoch": 0.0458630306786489, "grad_norm": 2.720660156097887, "learning_rate": 4.58630306786489e-06, "loss": 0.8461, "step": 10360 }, { "epoch": 0.04586745761211209, "grad_norm": 2.531536924721202, "learning_rate": 4.586745761211209e-06, "loss": 0.7883, "step": 10361 }, { "epoch": 0.04587188454557528, "grad_norm": 2.3678244160503388, "learning_rate": 4.587188454557529e-06, "loss": 0.864, "step": 10362 }, { "epoch": 0.04587631147903847, "grad_norm": 3.579208493713728, "learning_rate": 4.587631147903847e-06, "loss": 1.3588, "step": 10363 }, { "epoch": 0.04588073841250166, "grad_norm": 2.8700281554423652, "learning_rate": 4.5880738412501666e-06, "loss": 0.8132, "step": 10364 }, { "epoch": 0.04588516534596485, "grad_norm": 3.631973664317127, "learning_rate": 4.588516534596485e-06, "loss": 1.0081, "step": 10365 }, { "epoch": 0.04588959227942804, "grad_norm": 2.461669147627926, "learning_rate": 4.5889592279428045e-06, "loss": 0.6309, "step": 10366 }, { "epoch": 0.045894019212891234, "grad_norm": 2.395633655728377, "learning_rate": 4.589401921289123e-06, "loss": 0.4771, "step": 10367 }, { "epoch": 0.04589844614635442, "grad_norm": 4.6044416473412175, "learning_rate": 4.589844614635442e-06, "loss": 1.3418, "step": 10368 }, { "epoch": 0.04590287307981761, "grad_norm": 3.606316528577519, "learning_rate": 4.590287307981761e-06, "loss": 1.3549, "step": 10369 }, { "epoch": 0.0459073000132808, "grad_norm": 2.7672900814013484, "learning_rate": 4.59073000132808e-06, "loss": 0.6879, "step": 10370 }, { "epoch": 0.04591172694674399, "grad_norm": 3.177134782156985, "learning_rate": 4.5911726946744e-06, "loss": 0.8795, "step": 10371 }, { "epoch": 0.04591615388020718, "grad_norm": 2.131632049226491, "learning_rate": 4.591615388020718e-06, "loss": 0.3798, "step": 10372 }, { "epoch": 0.04592058081367037, "grad_norm": 3.974875598542832, "learning_rate": 4.5920580813670375e-06, "loss": 0.7993, "step": 10373 }, { "epoch": 0.04592500774713356, "grad_norm": 3.0128979267442464, "learning_rate": 4.592500774713357e-06, "loss": 0.5866, "step": 10374 }, { "epoch": 0.04592943468059675, "grad_norm": 2.83283351221148, "learning_rate": 4.592943468059675e-06, "loss": 1.0071, "step": 10375 }, { "epoch": 0.04593386161405994, "grad_norm": 2.9041408361966417, "learning_rate": 4.593386161405995e-06, "loss": 0.8909, "step": 10376 }, { "epoch": 0.04593828854752313, "grad_norm": 2.942156878050815, "learning_rate": 4.593828854752313e-06, "loss": 0.7642, "step": 10377 }, { "epoch": 0.04594271548098632, "grad_norm": 2.613030945729954, "learning_rate": 4.594271548098632e-06, "loss": 0.7787, "step": 10378 }, { "epoch": 0.04594714241444951, "grad_norm": 3.1352277254962555, "learning_rate": 4.594714241444951e-06, "loss": 1.1103, "step": 10379 }, { "epoch": 0.0459515693479127, "grad_norm": 3.0273669147629714, "learning_rate": 4.5951569347912706e-06, "loss": 0.6088, "step": 10380 }, { "epoch": 0.045955996281375894, "grad_norm": 3.0985000685066395, "learning_rate": 4.595599628137589e-06, "loss": 1.1545, "step": 10381 }, { "epoch": 0.045960423214839084, "grad_norm": 2.8638323202995064, "learning_rate": 4.5960423214839085e-06, "loss": 0.6755, "step": 10382 }, { "epoch": 0.04596485014830227, "grad_norm": 3.1518875611953847, "learning_rate": 4.596485014830228e-06, "loss": 0.953, "step": 10383 }, { "epoch": 0.04596927708176546, "grad_norm": 2.958872512610601, "learning_rate": 4.596927708176546e-06, "loss": 0.7981, "step": 10384 }, { "epoch": 0.04597370401522865, "grad_norm": 3.0319533340920963, "learning_rate": 4.597370401522866e-06, "loss": 0.9159, "step": 10385 }, { "epoch": 0.04597813094869184, "grad_norm": 3.6321880362425683, "learning_rate": 4.597813094869184e-06, "loss": 0.9343, "step": 10386 }, { "epoch": 0.04598255788215503, "grad_norm": 2.4406143488906804, "learning_rate": 4.598255788215504e-06, "loss": 0.5898, "step": 10387 }, { "epoch": 0.04598698481561822, "grad_norm": 3.318470475451679, "learning_rate": 4.598698481561822e-06, "loss": 0.8333, "step": 10388 }, { "epoch": 0.04599141174908141, "grad_norm": 3.5660459935307043, "learning_rate": 4.5991411749081415e-06, "loss": 0.6215, "step": 10389 }, { "epoch": 0.0459958386825446, "grad_norm": 2.6892994723939716, "learning_rate": 4.59958386825446e-06, "loss": 0.8689, "step": 10390 }, { "epoch": 0.04600026561600779, "grad_norm": 3.590276108723635, "learning_rate": 4.600026561600779e-06, "loss": 0.7009, "step": 10391 }, { "epoch": 0.04600469254947098, "grad_norm": 2.498670512369978, "learning_rate": 4.600469254947099e-06, "loss": 0.7463, "step": 10392 }, { "epoch": 0.04600911948293417, "grad_norm": 3.0078868175431297, "learning_rate": 4.600911948293417e-06, "loss": 0.6502, "step": 10393 }, { "epoch": 0.04601354641639736, "grad_norm": 2.3461858703971026, "learning_rate": 4.601354641639737e-06, "loss": 0.6796, "step": 10394 }, { "epoch": 0.046017973349860554, "grad_norm": 3.1735129181495014, "learning_rate": 4.601797334986056e-06, "loss": 0.8977, "step": 10395 }, { "epoch": 0.046022400283323744, "grad_norm": 2.8658011286089207, "learning_rate": 4.6022400283323746e-06, "loss": 0.4539, "step": 10396 }, { "epoch": 0.046026827216786935, "grad_norm": 2.3967113606788697, "learning_rate": 4.602682721678693e-06, "loss": 0.6965, "step": 10397 }, { "epoch": 0.04603125415025012, "grad_norm": 2.8758083414885647, "learning_rate": 4.6031254150250125e-06, "loss": 0.8177, "step": 10398 }, { "epoch": 0.04603568108371331, "grad_norm": 2.909992177057548, "learning_rate": 4.603568108371331e-06, "loss": 0.9684, "step": 10399 }, { "epoch": 0.0460401080171765, "grad_norm": 3.132728466699148, "learning_rate": 4.60401080171765e-06, "loss": 0.9375, "step": 10400 }, { "epoch": 0.04604453495063969, "grad_norm": 2.745773360584363, "learning_rate": 4.60445349506397e-06, "loss": 0.8571, "step": 10401 }, { "epoch": 0.04604896188410288, "grad_norm": 3.581238589515626, "learning_rate": 4.604896188410288e-06, "loss": 1.1245, "step": 10402 }, { "epoch": 0.04605338881756607, "grad_norm": 3.0617122466522324, "learning_rate": 4.605338881756608e-06, "loss": 0.8557, "step": 10403 }, { "epoch": 0.04605781575102926, "grad_norm": 2.827012015659977, "learning_rate": 4.605781575102927e-06, "loss": 0.5577, "step": 10404 }, { "epoch": 0.04606224268449245, "grad_norm": 2.5886395313902506, "learning_rate": 4.6062242684492455e-06, "loss": 0.6494, "step": 10405 }, { "epoch": 0.04606666961795564, "grad_norm": 2.8820176565614966, "learning_rate": 4.606666961795565e-06, "loss": 0.7259, "step": 10406 }, { "epoch": 0.04607109655141883, "grad_norm": 3.419136435116232, "learning_rate": 4.607109655141883e-06, "loss": 1.0792, "step": 10407 }, { "epoch": 0.046075523484882024, "grad_norm": 2.827565804101672, "learning_rate": 4.607552348488203e-06, "loss": 1.0675, "step": 10408 }, { "epoch": 0.046079950418345214, "grad_norm": 2.8353563334012413, "learning_rate": 4.607995041834521e-06, "loss": 0.8051, "step": 10409 }, { "epoch": 0.046084377351808405, "grad_norm": 2.6949260010421288, "learning_rate": 4.608437735180841e-06, "loss": 0.6429, "step": 10410 }, { "epoch": 0.046088804285271595, "grad_norm": 3.150310097420824, "learning_rate": 4.60888042852716e-06, "loss": 0.875, "step": 10411 }, { "epoch": 0.046093231218734786, "grad_norm": 3.083874982266309, "learning_rate": 4.6093231218734786e-06, "loss": 1.0728, "step": 10412 }, { "epoch": 0.04609765815219797, "grad_norm": 3.6621739696511226, "learning_rate": 4.609765815219798e-06, "loss": 1.1454, "step": 10413 }, { "epoch": 0.04610208508566116, "grad_norm": 2.602508364540795, "learning_rate": 4.610208508566117e-06, "loss": 0.7212, "step": 10414 }, { "epoch": 0.04610651201912435, "grad_norm": 2.89625048239605, "learning_rate": 4.610651201912436e-06, "loss": 0.8026, "step": 10415 }, { "epoch": 0.04611093895258754, "grad_norm": 2.526986310192155, "learning_rate": 4.611093895258754e-06, "loss": 0.8562, "step": 10416 }, { "epoch": 0.04611536588605073, "grad_norm": 2.2621821991434667, "learning_rate": 4.611536588605074e-06, "loss": 0.6069, "step": 10417 }, { "epoch": 0.04611979281951392, "grad_norm": 2.3130410366371814, "learning_rate": 4.611979281951392e-06, "loss": 0.6072, "step": 10418 }, { "epoch": 0.04612421975297711, "grad_norm": 2.5124806489643077, "learning_rate": 4.612421975297712e-06, "loss": 0.5215, "step": 10419 }, { "epoch": 0.0461286466864403, "grad_norm": 3.1972366956252323, "learning_rate": 4.612864668644031e-06, "loss": 0.7895, "step": 10420 }, { "epoch": 0.04613307361990349, "grad_norm": 2.822409815941926, "learning_rate": 4.6133073619903495e-06, "loss": 0.4906, "step": 10421 }, { "epoch": 0.046137500553366684, "grad_norm": 2.7463349126251906, "learning_rate": 4.613750055336669e-06, "loss": 0.6106, "step": 10422 }, { "epoch": 0.046141927486829874, "grad_norm": 2.6186931834308145, "learning_rate": 4.614192748682988e-06, "loss": 0.7242, "step": 10423 }, { "epoch": 0.046146354420293065, "grad_norm": 2.444208361175726, "learning_rate": 4.614635442029307e-06, "loss": 0.563, "step": 10424 }, { "epoch": 0.046150781353756255, "grad_norm": 4.013701543213014, "learning_rate": 4.615078135375626e-06, "loss": 1.0206, "step": 10425 }, { "epoch": 0.046155208287219446, "grad_norm": 2.5121518862290086, "learning_rate": 4.615520828721945e-06, "loss": 0.7713, "step": 10426 }, { "epoch": 0.046159635220682636, "grad_norm": 3.139486554489169, "learning_rate": 4.615963522068263e-06, "loss": 0.599, "step": 10427 }, { "epoch": 0.04616406215414582, "grad_norm": 3.703539561884901, "learning_rate": 4.6164062154145826e-06, "loss": 1.0492, "step": 10428 }, { "epoch": 0.04616848908760901, "grad_norm": 2.67309717122353, "learning_rate": 4.616848908760902e-06, "loss": 0.9556, "step": 10429 }, { "epoch": 0.0461729160210722, "grad_norm": 3.1684437831315795, "learning_rate": 4.6172916021072205e-06, "loss": 0.8478, "step": 10430 }, { "epoch": 0.04617734295453539, "grad_norm": 2.954210238109145, "learning_rate": 4.61773429545354e-06, "loss": 0.642, "step": 10431 }, { "epoch": 0.04618176988799858, "grad_norm": 3.467507037734996, "learning_rate": 4.618176988799859e-06, "loss": 0.6749, "step": 10432 }, { "epoch": 0.04618619682146177, "grad_norm": 3.3558966856969823, "learning_rate": 4.618619682146178e-06, "loss": 0.8922, "step": 10433 }, { "epoch": 0.04619062375492496, "grad_norm": 2.258405924994668, "learning_rate": 4.619062375492497e-06, "loss": 0.7277, "step": 10434 }, { "epoch": 0.04619505068838815, "grad_norm": 3.052083398742592, "learning_rate": 4.619505068838816e-06, "loss": 0.8599, "step": 10435 }, { "epoch": 0.046199477621851344, "grad_norm": 3.0550470709699624, "learning_rate": 4.619947762185135e-06, "loss": 0.868, "step": 10436 }, { "epoch": 0.046203904555314534, "grad_norm": 2.665283169952253, "learning_rate": 4.6203904555314535e-06, "loss": 0.8287, "step": 10437 }, { "epoch": 0.046208331488777725, "grad_norm": 2.714752609959764, "learning_rate": 4.620833148877773e-06, "loss": 0.7157, "step": 10438 }, { "epoch": 0.046212758422240915, "grad_norm": 2.792081073558724, "learning_rate": 4.621275842224091e-06, "loss": 0.8101, "step": 10439 }, { "epoch": 0.046217185355704106, "grad_norm": 2.51364083386322, "learning_rate": 4.621718535570411e-06, "loss": 0.7245, "step": 10440 }, { "epoch": 0.046221612289167296, "grad_norm": 2.6170128726270847, "learning_rate": 4.62216122891673e-06, "loss": 0.6805, "step": 10441 }, { "epoch": 0.04622603922263049, "grad_norm": 2.914556638791692, "learning_rate": 4.622603922263049e-06, "loss": 0.7464, "step": 10442 }, { "epoch": 0.04623046615609367, "grad_norm": 3.087073536727201, "learning_rate": 4.623046615609368e-06, "loss": 0.8255, "step": 10443 }, { "epoch": 0.04623489308955686, "grad_norm": 3.4847788459121793, "learning_rate": 4.623489308955687e-06, "loss": 0.8206, "step": 10444 }, { "epoch": 0.04623932002302005, "grad_norm": 2.897399011130901, "learning_rate": 4.623932002302006e-06, "loss": 0.7181, "step": 10445 }, { "epoch": 0.04624374695648324, "grad_norm": 2.8535931581311575, "learning_rate": 4.6243746956483245e-06, "loss": 1.047, "step": 10446 }, { "epoch": 0.04624817388994643, "grad_norm": 2.583924686136677, "learning_rate": 4.624817388994644e-06, "loss": 0.9502, "step": 10447 }, { "epoch": 0.04625260082340962, "grad_norm": 2.722283757527444, "learning_rate": 4.625260082340962e-06, "loss": 0.9165, "step": 10448 }, { "epoch": 0.046257027756872814, "grad_norm": 3.002644985876082, "learning_rate": 4.625702775687282e-06, "loss": 0.8927, "step": 10449 }, { "epoch": 0.046261454690336004, "grad_norm": 2.930280604872722, "learning_rate": 4.626145469033601e-06, "loss": 0.9499, "step": 10450 }, { "epoch": 0.046265881623799195, "grad_norm": 3.262422865354215, "learning_rate": 4.62658816237992e-06, "loss": 0.7912, "step": 10451 }, { "epoch": 0.046270308557262385, "grad_norm": 3.6587573839594127, "learning_rate": 4.627030855726239e-06, "loss": 0.4451, "step": 10452 }, { "epoch": 0.046274735490725576, "grad_norm": 2.929605364296936, "learning_rate": 4.627473549072558e-06, "loss": 0.9546, "step": 10453 }, { "epoch": 0.046279162424188766, "grad_norm": 3.061106267210175, "learning_rate": 4.627916242418877e-06, "loss": 0.6668, "step": 10454 }, { "epoch": 0.04628358935765196, "grad_norm": 2.604943020394864, "learning_rate": 4.628358935765196e-06, "loss": 0.7197, "step": 10455 }, { "epoch": 0.04628801629111515, "grad_norm": 2.6781606330941883, "learning_rate": 4.628801629111515e-06, "loss": 0.8137, "step": 10456 }, { "epoch": 0.04629244322457834, "grad_norm": 2.9368958653893538, "learning_rate": 4.629244322457834e-06, "loss": 0.8922, "step": 10457 }, { "epoch": 0.04629687015804152, "grad_norm": 2.5455489323450324, "learning_rate": 4.629687015804153e-06, "loss": 0.7362, "step": 10458 }, { "epoch": 0.04630129709150471, "grad_norm": 2.4553745336026096, "learning_rate": 4.630129709150472e-06, "loss": 0.7511, "step": 10459 }, { "epoch": 0.0463057240249679, "grad_norm": 2.955952646330303, "learning_rate": 4.6305724024967906e-06, "loss": 0.806, "step": 10460 }, { "epoch": 0.04631015095843109, "grad_norm": 3.2484047165076, "learning_rate": 4.63101509584311e-06, "loss": 0.8058, "step": 10461 }, { "epoch": 0.04631457789189428, "grad_norm": 3.233929901774407, "learning_rate": 4.631457789189429e-06, "loss": 0.889, "step": 10462 }, { "epoch": 0.046319004825357474, "grad_norm": 3.61514469524622, "learning_rate": 4.631900482535748e-06, "loss": 0.8726, "step": 10463 }, { "epoch": 0.046323431758820664, "grad_norm": 3.1037739913382594, "learning_rate": 4.632343175882067e-06, "loss": 0.941, "step": 10464 }, { "epoch": 0.046327858692283855, "grad_norm": 2.31459616869569, "learning_rate": 4.632785869228386e-06, "loss": 0.679, "step": 10465 }, { "epoch": 0.046332285625747045, "grad_norm": 2.6937002783921775, "learning_rate": 4.633228562574705e-06, "loss": 0.9633, "step": 10466 }, { "epoch": 0.046336712559210236, "grad_norm": 2.5047185143921307, "learning_rate": 4.633671255921024e-06, "loss": 0.5034, "step": 10467 }, { "epoch": 0.046341139492673426, "grad_norm": 3.01956149465523, "learning_rate": 4.634113949267343e-06, "loss": 0.809, "step": 10468 }, { "epoch": 0.04634556642613662, "grad_norm": 2.587126015888038, "learning_rate": 4.6345566426136615e-06, "loss": 0.5239, "step": 10469 }, { "epoch": 0.04634999335959981, "grad_norm": 3.1667201444293127, "learning_rate": 4.634999335959981e-06, "loss": 1.0071, "step": 10470 }, { "epoch": 0.046354420293063, "grad_norm": 2.6894453691868505, "learning_rate": 4.6354420293063e-06, "loss": 0.9555, "step": 10471 }, { "epoch": 0.04635884722652619, "grad_norm": 2.7387025981838238, "learning_rate": 4.635884722652619e-06, "loss": 0.8785, "step": 10472 }, { "epoch": 0.04636327415998937, "grad_norm": 3.0249644349824676, "learning_rate": 4.636327415998938e-06, "loss": 0.8976, "step": 10473 }, { "epoch": 0.04636770109345256, "grad_norm": 3.2194887322370254, "learning_rate": 4.6367701093452575e-06, "loss": 0.7227, "step": 10474 }, { "epoch": 0.04637212802691575, "grad_norm": 2.4194828689912544, "learning_rate": 4.637212802691576e-06, "loss": 0.7503, "step": 10475 }, { "epoch": 0.04637655496037894, "grad_norm": 2.275462833718963, "learning_rate": 4.637655496037895e-06, "loss": 0.4778, "step": 10476 }, { "epoch": 0.046380981893842134, "grad_norm": 3.065734570453531, "learning_rate": 4.638098189384214e-06, "loss": 0.6873, "step": 10477 }, { "epoch": 0.046385408827305324, "grad_norm": 3.0779974301390722, "learning_rate": 4.6385408827305325e-06, "loss": 1.096, "step": 10478 }, { "epoch": 0.046389835760768515, "grad_norm": 2.772903670746354, "learning_rate": 4.638983576076852e-06, "loss": 0.6695, "step": 10479 }, { "epoch": 0.046394262694231705, "grad_norm": 3.188204666787631, "learning_rate": 4.639426269423171e-06, "loss": 0.7887, "step": 10480 }, { "epoch": 0.046398689627694896, "grad_norm": 2.437524971571712, "learning_rate": 4.63986896276949e-06, "loss": 0.4544, "step": 10481 }, { "epoch": 0.046403116561158086, "grad_norm": 2.286750480654979, "learning_rate": 4.640311656115809e-06, "loss": 0.7486, "step": 10482 }, { "epoch": 0.04640754349462128, "grad_norm": 2.8105004674772216, "learning_rate": 4.6407543494621285e-06, "loss": 0.7253, "step": 10483 }, { "epoch": 0.04641197042808447, "grad_norm": 2.618912328277542, "learning_rate": 4.641197042808447e-06, "loss": 0.8175, "step": 10484 }, { "epoch": 0.04641639736154766, "grad_norm": 2.6437483968859388, "learning_rate": 4.641639736154766e-06, "loss": 0.7263, "step": 10485 }, { "epoch": 0.04642082429501085, "grad_norm": 2.491086230941326, "learning_rate": 4.642082429501085e-06, "loss": 0.6812, "step": 10486 }, { "epoch": 0.04642525122847404, "grad_norm": 2.919210518777924, "learning_rate": 4.642525122847404e-06, "loss": 0.8874, "step": 10487 }, { "epoch": 0.04642967816193723, "grad_norm": 3.1512701320661214, "learning_rate": 4.642967816193723e-06, "loss": 0.9051, "step": 10488 }, { "epoch": 0.04643410509540041, "grad_norm": 2.3107262097466883, "learning_rate": 4.643410509540042e-06, "loss": 0.6058, "step": 10489 }, { "epoch": 0.046438532028863604, "grad_norm": 3.247211206171968, "learning_rate": 4.643853202886361e-06, "loss": 1.0297, "step": 10490 }, { "epoch": 0.046442958962326794, "grad_norm": 3.2822789294001495, "learning_rate": 4.64429589623268e-06, "loss": 0.9133, "step": 10491 }, { "epoch": 0.046447385895789985, "grad_norm": 3.3370480753447986, "learning_rate": 4.644738589578999e-06, "loss": 0.5569, "step": 10492 }, { "epoch": 0.046451812829253175, "grad_norm": 3.0820378140700573, "learning_rate": 4.645181282925318e-06, "loss": 0.9355, "step": 10493 }, { "epoch": 0.046456239762716366, "grad_norm": 2.5449153636958908, "learning_rate": 4.645623976271637e-06, "loss": 0.7896, "step": 10494 }, { "epoch": 0.046460666696179556, "grad_norm": 2.8532591656053583, "learning_rate": 4.646066669617957e-06, "loss": 0.8976, "step": 10495 }, { "epoch": 0.04646509362964275, "grad_norm": 2.521364030131563, "learning_rate": 4.646509362964275e-06, "loss": 0.6336, "step": 10496 }, { "epoch": 0.04646952056310594, "grad_norm": 2.4479756750786827, "learning_rate": 4.646952056310594e-06, "loss": 0.7094, "step": 10497 }, { "epoch": 0.04647394749656913, "grad_norm": 2.2770630474772506, "learning_rate": 4.647394749656913e-06, "loss": 0.7065, "step": 10498 }, { "epoch": 0.04647837443003232, "grad_norm": 2.3133668932283005, "learning_rate": 4.647837443003232e-06, "loss": 0.5618, "step": 10499 }, { "epoch": 0.04648280136349551, "grad_norm": 3.273875385579524, "learning_rate": 4.648280136349551e-06, "loss": 1.0589, "step": 10500 }, { "epoch": 0.0464872282969587, "grad_norm": 2.3968473212578623, "learning_rate": 4.64872282969587e-06, "loss": 0.4176, "step": 10501 }, { "epoch": 0.04649165523042189, "grad_norm": 2.780429638398772, "learning_rate": 4.649165523042189e-06, "loss": 0.7623, "step": 10502 }, { "epoch": 0.04649608216388508, "grad_norm": 2.5590118959604995, "learning_rate": 4.649608216388508e-06, "loss": 0.7178, "step": 10503 }, { "epoch": 0.046500509097348264, "grad_norm": 2.7198954375714646, "learning_rate": 4.650050909734828e-06, "loss": 0.7453, "step": 10504 }, { "epoch": 0.046504936030811454, "grad_norm": 2.754056864317689, "learning_rate": 4.650493603081146e-06, "loss": 0.52, "step": 10505 }, { "epoch": 0.046509362964274645, "grad_norm": 3.169417397457322, "learning_rate": 4.6509362964274655e-06, "loss": 0.7803, "step": 10506 }, { "epoch": 0.046513789897737835, "grad_norm": 2.617076557754266, "learning_rate": 4.651378989773784e-06, "loss": 0.801, "step": 10507 }, { "epoch": 0.046518216831201026, "grad_norm": 2.791911774342596, "learning_rate": 4.6518216831201026e-06, "loss": 0.8007, "step": 10508 }, { "epoch": 0.046522643764664216, "grad_norm": 2.710183703718222, "learning_rate": 4.652264376466422e-06, "loss": 0.8389, "step": 10509 }, { "epoch": 0.04652707069812741, "grad_norm": 2.6605902544389384, "learning_rate": 4.652707069812741e-06, "loss": 0.8865, "step": 10510 }, { "epoch": 0.0465314976315906, "grad_norm": 2.885374298985396, "learning_rate": 4.65314976315906e-06, "loss": 0.6592, "step": 10511 }, { "epoch": 0.04653592456505379, "grad_norm": 2.9656721874622236, "learning_rate": 4.653592456505379e-06, "loss": 0.9561, "step": 10512 }, { "epoch": 0.04654035149851698, "grad_norm": 2.7979829233672575, "learning_rate": 4.6540351498516986e-06, "loss": 0.601, "step": 10513 }, { "epoch": 0.04654477843198017, "grad_norm": 2.844036862032888, "learning_rate": 4.654477843198017e-06, "loss": 0.8577, "step": 10514 }, { "epoch": 0.04654920536544336, "grad_norm": 2.7879976516030744, "learning_rate": 4.6549205365443365e-06, "loss": 0.9534, "step": 10515 }, { "epoch": 0.04655363229890655, "grad_norm": 2.536301130013438, "learning_rate": 4.655363229890655e-06, "loss": 0.5346, "step": 10516 }, { "epoch": 0.04655805923236974, "grad_norm": 3.28971570709657, "learning_rate": 4.655805923236974e-06, "loss": 0.8359, "step": 10517 }, { "epoch": 0.04656248616583293, "grad_norm": 3.2312146564489197, "learning_rate": 4.656248616583293e-06, "loss": 0.9016, "step": 10518 }, { "epoch": 0.046566913099296114, "grad_norm": 2.9611557363627026, "learning_rate": 4.656691309929612e-06, "loss": 0.6214, "step": 10519 }, { "epoch": 0.046571340032759305, "grad_norm": 3.167924819474369, "learning_rate": 4.657134003275931e-06, "loss": 0.9887, "step": 10520 }, { "epoch": 0.046575766966222495, "grad_norm": 2.4972797999175063, "learning_rate": 4.65757669662225e-06, "loss": 0.6476, "step": 10521 }, { "epoch": 0.046580193899685686, "grad_norm": 2.815710044569264, "learning_rate": 4.6580193899685695e-06, "loss": 0.4829, "step": 10522 }, { "epoch": 0.046584620833148876, "grad_norm": 2.580096639908644, "learning_rate": 4.658462083314888e-06, "loss": 0.5448, "step": 10523 }, { "epoch": 0.04658904776661207, "grad_norm": 3.1786486095253617, "learning_rate": 4.658904776661207e-06, "loss": 0.652, "step": 10524 }, { "epoch": 0.04659347470007526, "grad_norm": 2.603596611476523, "learning_rate": 4.659347470007527e-06, "loss": 0.6564, "step": 10525 }, { "epoch": 0.04659790163353845, "grad_norm": 2.7367872583131896, "learning_rate": 4.659790163353845e-06, "loss": 0.7524, "step": 10526 }, { "epoch": 0.04660232856700164, "grad_norm": 2.646635997033692, "learning_rate": 4.660232856700164e-06, "loss": 0.8601, "step": 10527 }, { "epoch": 0.04660675550046483, "grad_norm": 2.6025230225269995, "learning_rate": 4.660675550046483e-06, "loss": 0.6308, "step": 10528 }, { "epoch": 0.04661118243392802, "grad_norm": 2.927963174994765, "learning_rate": 4.661118243392802e-06, "loss": 0.4316, "step": 10529 }, { "epoch": 0.04661560936739121, "grad_norm": 2.752971001397724, "learning_rate": 4.661560936739121e-06, "loss": 0.8752, "step": 10530 }, { "epoch": 0.0466200363008544, "grad_norm": 2.907817510668801, "learning_rate": 4.6620036300854405e-06, "loss": 0.6891, "step": 10531 }, { "epoch": 0.04662446323431759, "grad_norm": 2.7952660034024635, "learning_rate": 4.662446323431759e-06, "loss": 0.8943, "step": 10532 }, { "epoch": 0.04662889016778078, "grad_norm": 2.5680962480732994, "learning_rate": 4.662889016778078e-06, "loss": 0.6225, "step": 10533 }, { "epoch": 0.046633317101243965, "grad_norm": 2.7729218214582896, "learning_rate": 4.663331710124398e-06, "loss": 0.4728, "step": 10534 }, { "epoch": 0.046637744034707156, "grad_norm": 2.571764605340448, "learning_rate": 4.663774403470716e-06, "loss": 0.8369, "step": 10535 }, { "epoch": 0.046642170968170346, "grad_norm": 2.6948747919275577, "learning_rate": 4.664217096817036e-06, "loss": 0.883, "step": 10536 }, { "epoch": 0.04664659790163354, "grad_norm": 2.3473257606027063, "learning_rate": 4.664659790163354e-06, "loss": 0.5015, "step": 10537 }, { "epoch": 0.04665102483509673, "grad_norm": 3.3338143174747636, "learning_rate": 4.6651024835096735e-06, "loss": 0.8953, "step": 10538 }, { "epoch": 0.04665545176855992, "grad_norm": 2.445644268609302, "learning_rate": 4.665545176855992e-06, "loss": 0.6731, "step": 10539 }, { "epoch": 0.04665987870202311, "grad_norm": 2.998096502005436, "learning_rate": 4.665987870202311e-06, "loss": 0.9607, "step": 10540 }, { "epoch": 0.0466643056354863, "grad_norm": 3.2704523521892557, "learning_rate": 4.66643056354863e-06, "loss": 0.8709, "step": 10541 }, { "epoch": 0.04666873256894949, "grad_norm": 2.7378861790903244, "learning_rate": 4.666873256894949e-06, "loss": 0.764, "step": 10542 }, { "epoch": 0.04667315950241268, "grad_norm": 3.091352697056467, "learning_rate": 4.667315950241269e-06, "loss": 0.7869, "step": 10543 }, { "epoch": 0.04667758643587587, "grad_norm": 2.6134067124831533, "learning_rate": 4.667758643587587e-06, "loss": 0.4123, "step": 10544 }, { "epoch": 0.04668201336933906, "grad_norm": 2.4529510110671118, "learning_rate": 4.6682013369339066e-06, "loss": 0.7223, "step": 10545 }, { "epoch": 0.04668644030280225, "grad_norm": 2.197183471879821, "learning_rate": 4.668644030280225e-06, "loss": 0.7147, "step": 10546 }, { "epoch": 0.04669086723626544, "grad_norm": 3.064693052185515, "learning_rate": 4.6690867236265445e-06, "loss": 0.8168, "step": 10547 }, { "epoch": 0.04669529416972863, "grad_norm": 3.0985334669059137, "learning_rate": 4.669529416972863e-06, "loss": 0.8847, "step": 10548 }, { "epoch": 0.046699721103191816, "grad_norm": 4.019885628426529, "learning_rate": 4.669972110319182e-06, "loss": 1.4367, "step": 10549 }, { "epoch": 0.046704148036655006, "grad_norm": 2.4997958048018933, "learning_rate": 4.670414803665501e-06, "loss": 0.7894, "step": 10550 }, { "epoch": 0.0467085749701182, "grad_norm": 3.200295542289402, "learning_rate": 4.67085749701182e-06, "loss": 1.069, "step": 10551 }, { "epoch": 0.04671300190358139, "grad_norm": 2.5318029559838617, "learning_rate": 4.67130019035814e-06, "loss": 0.747, "step": 10552 }, { "epoch": 0.04671742883704458, "grad_norm": 2.5825979914550232, "learning_rate": 4.671742883704458e-06, "loss": 0.8009, "step": 10553 }, { "epoch": 0.04672185577050777, "grad_norm": 2.670615710295612, "learning_rate": 4.6721855770507775e-06, "loss": 0.5323, "step": 10554 }, { "epoch": 0.04672628270397096, "grad_norm": 3.645489618755263, "learning_rate": 4.672628270397097e-06, "loss": 0.9025, "step": 10555 }, { "epoch": 0.04673070963743415, "grad_norm": 3.534764785028177, "learning_rate": 4.673070963743415e-06, "loss": 0.8453, "step": 10556 }, { "epoch": 0.04673513657089734, "grad_norm": 2.4644491348701485, "learning_rate": 4.673513657089735e-06, "loss": 0.7768, "step": 10557 }, { "epoch": 0.04673956350436053, "grad_norm": 2.410720014404511, "learning_rate": 4.673956350436053e-06, "loss": 0.6287, "step": 10558 }, { "epoch": 0.04674399043782372, "grad_norm": 3.1156244333708445, "learning_rate": 4.674399043782372e-06, "loss": 0.7702, "step": 10559 }, { "epoch": 0.04674841737128691, "grad_norm": 3.0127220565194275, "learning_rate": 4.674841737128691e-06, "loss": 0.6788, "step": 10560 }, { "epoch": 0.0467528443047501, "grad_norm": 2.690684059823656, "learning_rate": 4.6752844304750106e-06, "loss": 0.7055, "step": 10561 }, { "epoch": 0.04675727123821329, "grad_norm": 2.7801596425878996, "learning_rate": 4.675727123821329e-06, "loss": 0.795, "step": 10562 }, { "epoch": 0.04676169817167648, "grad_norm": 2.902847266268778, "learning_rate": 4.6761698171676485e-06, "loss": 0.8643, "step": 10563 }, { "epoch": 0.046766125105139666, "grad_norm": 2.2994441645232335, "learning_rate": 4.676612510513968e-06, "loss": 0.6362, "step": 10564 }, { "epoch": 0.04677055203860286, "grad_norm": 2.8178389734651215, "learning_rate": 4.677055203860286e-06, "loss": 0.6656, "step": 10565 }, { "epoch": 0.04677497897206605, "grad_norm": 2.309423180488068, "learning_rate": 4.677497897206606e-06, "loss": 0.8192, "step": 10566 }, { "epoch": 0.04677940590552924, "grad_norm": 3.321246420629543, "learning_rate": 4.677940590552924e-06, "loss": 0.941, "step": 10567 }, { "epoch": 0.04678383283899243, "grad_norm": 3.7407569512283803, "learning_rate": 4.678383283899244e-06, "loss": 1.0276, "step": 10568 }, { "epoch": 0.04678825977245562, "grad_norm": 2.9294692138843774, "learning_rate": 4.678825977245562e-06, "loss": 0.4798, "step": 10569 }, { "epoch": 0.04679268670591881, "grad_norm": 2.9603557121358377, "learning_rate": 4.6792686705918815e-06, "loss": 1.004, "step": 10570 }, { "epoch": 0.046797113639382, "grad_norm": 2.5071872867081457, "learning_rate": 4.6797113639382e-06, "loss": 0.5513, "step": 10571 }, { "epoch": 0.04680154057284519, "grad_norm": 3.1322555676433272, "learning_rate": 4.680154057284519e-06, "loss": 1.2286, "step": 10572 }, { "epoch": 0.04680596750630838, "grad_norm": 3.377779239055122, "learning_rate": 4.680596750630839e-06, "loss": 0.9925, "step": 10573 }, { "epoch": 0.04681039443977157, "grad_norm": 3.44174001752323, "learning_rate": 4.681039443977157e-06, "loss": 1.0359, "step": 10574 }, { "epoch": 0.04681482137323476, "grad_norm": 2.3969546030655002, "learning_rate": 4.681482137323477e-06, "loss": 0.5386, "step": 10575 }, { "epoch": 0.04681924830669795, "grad_norm": 2.643496428066281, "learning_rate": 4.681924830669796e-06, "loss": 0.5989, "step": 10576 }, { "epoch": 0.04682367524016114, "grad_norm": 2.4234192633327996, "learning_rate": 4.6823675240161146e-06, "loss": 0.8531, "step": 10577 }, { "epoch": 0.046828102173624334, "grad_norm": 2.816106556914596, "learning_rate": 4.682810217362433e-06, "loss": 0.8395, "step": 10578 }, { "epoch": 0.04683252910708752, "grad_norm": 2.763985254297581, "learning_rate": 4.6832529107087525e-06, "loss": 0.7735, "step": 10579 }, { "epoch": 0.04683695604055071, "grad_norm": 2.458790663115832, "learning_rate": 4.683695604055071e-06, "loss": 0.5254, "step": 10580 }, { "epoch": 0.0468413829740139, "grad_norm": 3.148763242293294, "learning_rate": 4.68413829740139e-06, "loss": 0.9985, "step": 10581 }, { "epoch": 0.04684580990747709, "grad_norm": 2.2761916477271305, "learning_rate": 4.68458099074771e-06, "loss": 0.5359, "step": 10582 }, { "epoch": 0.04685023684094028, "grad_norm": 3.257109582351943, "learning_rate": 4.685023684094028e-06, "loss": 0.9323, "step": 10583 }, { "epoch": 0.04685466377440347, "grad_norm": 3.0758319409518475, "learning_rate": 4.685466377440348e-06, "loss": 0.7083, "step": 10584 }, { "epoch": 0.04685909070786666, "grad_norm": 3.3251359596529344, "learning_rate": 4.685909070786667e-06, "loss": 0.9902, "step": 10585 }, { "epoch": 0.04686351764132985, "grad_norm": 2.456653735558779, "learning_rate": 4.6863517641329855e-06, "loss": 0.7427, "step": 10586 }, { "epoch": 0.04686794457479304, "grad_norm": 3.1785326405920986, "learning_rate": 4.686794457479305e-06, "loss": 0.7138, "step": 10587 }, { "epoch": 0.04687237150825623, "grad_norm": 2.7356034926105095, "learning_rate": 4.687237150825623e-06, "loss": 0.7106, "step": 10588 }, { "epoch": 0.04687679844171942, "grad_norm": 2.6270893888877436, "learning_rate": 4.687679844171942e-06, "loss": 0.7113, "step": 10589 }, { "epoch": 0.04688122537518261, "grad_norm": 3.034696233053828, "learning_rate": 4.688122537518261e-06, "loss": 0.6856, "step": 10590 }, { "epoch": 0.0468856523086458, "grad_norm": 3.768301762811679, "learning_rate": 4.688565230864581e-06, "loss": 1.1741, "step": 10591 }, { "epoch": 0.046890079242108994, "grad_norm": 3.6849849827938566, "learning_rate": 4.689007924210899e-06, "loss": 1.0361, "step": 10592 }, { "epoch": 0.046894506175572184, "grad_norm": 2.705998469412303, "learning_rate": 4.6894506175572186e-06, "loss": 0.761, "step": 10593 }, { "epoch": 0.04689893310903537, "grad_norm": 3.1550616363067894, "learning_rate": 4.689893310903538e-06, "loss": 1.0055, "step": 10594 }, { "epoch": 0.04690336004249856, "grad_norm": 2.7988637720138643, "learning_rate": 4.6903360042498565e-06, "loss": 0.6906, "step": 10595 }, { "epoch": 0.04690778697596175, "grad_norm": 2.604166618240494, "learning_rate": 4.690778697596176e-06, "loss": 0.5441, "step": 10596 }, { "epoch": 0.04691221390942494, "grad_norm": 2.579378337952487, "learning_rate": 4.691221390942494e-06, "loss": 0.6887, "step": 10597 }, { "epoch": 0.04691664084288813, "grad_norm": 3.807370469605514, "learning_rate": 4.691664084288814e-06, "loss": 1.0781, "step": 10598 }, { "epoch": 0.04692106777635132, "grad_norm": 3.007716086787052, "learning_rate": 4.692106777635132e-06, "loss": 0.8962, "step": 10599 }, { "epoch": 0.04692549470981451, "grad_norm": 2.6523309734244123, "learning_rate": 4.692549470981452e-06, "loss": 0.7659, "step": 10600 }, { "epoch": 0.0469299216432777, "grad_norm": 2.6743647826711903, "learning_rate": 4.69299216432777e-06, "loss": 0.9321, "step": 10601 }, { "epoch": 0.04693434857674089, "grad_norm": 3.633744683838811, "learning_rate": 4.6934348576740895e-06, "loss": 1.2549, "step": 10602 }, { "epoch": 0.04693877551020408, "grad_norm": 3.209203233565065, "learning_rate": 4.693877551020409e-06, "loss": 0.8305, "step": 10603 }, { "epoch": 0.04694320244366727, "grad_norm": 2.7292378714088774, "learning_rate": 4.694320244366727e-06, "loss": 0.6112, "step": 10604 }, { "epoch": 0.04694762937713046, "grad_norm": 2.681853096276862, "learning_rate": 4.694762937713047e-06, "loss": 0.5952, "step": 10605 }, { "epoch": 0.046952056310593654, "grad_norm": 3.411118269564917, "learning_rate": 4.695205631059366e-06, "loss": 0.9012, "step": 10606 }, { "epoch": 0.046956483244056844, "grad_norm": 2.6858631703040894, "learning_rate": 4.695648324405685e-06, "loss": 0.7546, "step": 10607 }, { "epoch": 0.046960910177520035, "grad_norm": 3.1538713781588186, "learning_rate": 4.696091017752003e-06, "loss": 0.6912, "step": 10608 }, { "epoch": 0.04696533711098322, "grad_norm": 2.8765444273535263, "learning_rate": 4.6965337110983226e-06, "loss": 0.5808, "step": 10609 }, { "epoch": 0.04696976404444641, "grad_norm": 2.626054593026773, "learning_rate": 4.696976404444641e-06, "loss": 0.7838, "step": 10610 }, { "epoch": 0.0469741909779096, "grad_norm": 2.681094758062079, "learning_rate": 4.6974190977909605e-06, "loss": 0.8079, "step": 10611 }, { "epoch": 0.04697861791137279, "grad_norm": 2.7192956283121457, "learning_rate": 4.69786179113728e-06, "loss": 0.7618, "step": 10612 }, { "epoch": 0.04698304484483598, "grad_norm": 2.9748297610612027, "learning_rate": 4.698304484483598e-06, "loss": 0.5805, "step": 10613 }, { "epoch": 0.04698747177829917, "grad_norm": 2.681230590264709, "learning_rate": 4.698747177829918e-06, "loss": 0.5999, "step": 10614 }, { "epoch": 0.04699189871176236, "grad_norm": 2.644443418497742, "learning_rate": 4.699189871176237e-06, "loss": 0.5823, "step": 10615 }, { "epoch": 0.04699632564522555, "grad_norm": 2.602333080551974, "learning_rate": 4.699632564522556e-06, "loss": 0.5689, "step": 10616 }, { "epoch": 0.04700075257868874, "grad_norm": 2.491534378567853, "learning_rate": 4.700075257868875e-06, "loss": 0.6558, "step": 10617 }, { "epoch": 0.04700517951215193, "grad_norm": 2.3967286322879113, "learning_rate": 4.7005179512151935e-06, "loss": 0.5955, "step": 10618 }, { "epoch": 0.047009606445615124, "grad_norm": 2.8208071931732657, "learning_rate": 4.700960644561512e-06, "loss": 0.9177, "step": 10619 }, { "epoch": 0.047014033379078314, "grad_norm": 3.2091567616640764, "learning_rate": 4.701403337907831e-06, "loss": 0.8242, "step": 10620 }, { "epoch": 0.047018460312541505, "grad_norm": 2.5964789340455083, "learning_rate": 4.701846031254151e-06, "loss": 0.5067, "step": 10621 }, { "epoch": 0.047022887246004695, "grad_norm": 2.5482749344091813, "learning_rate": 4.702288724600469e-06, "loss": 0.6292, "step": 10622 }, { "epoch": 0.047027314179467886, "grad_norm": 3.135839398341716, "learning_rate": 4.702731417946789e-06, "loss": 0.6327, "step": 10623 }, { "epoch": 0.04703174111293107, "grad_norm": 2.684184395048242, "learning_rate": 4.703174111293108e-06, "loss": 0.7677, "step": 10624 }, { "epoch": 0.04703616804639426, "grad_norm": 2.890395955428943, "learning_rate": 4.7036168046394266e-06, "loss": 0.8878, "step": 10625 }, { "epoch": 0.04704059497985745, "grad_norm": 2.6798125577906755, "learning_rate": 4.704059497985746e-06, "loss": 0.762, "step": 10626 }, { "epoch": 0.04704502191332064, "grad_norm": 3.150931551101219, "learning_rate": 4.7045021913320645e-06, "loss": 1.0893, "step": 10627 }, { "epoch": 0.04704944884678383, "grad_norm": 2.751827593070759, "learning_rate": 4.704944884678384e-06, "loss": 0.7536, "step": 10628 }, { "epoch": 0.04705387578024702, "grad_norm": 2.550406695913521, "learning_rate": 4.705387578024702e-06, "loss": 0.6334, "step": 10629 }, { "epoch": 0.04705830271371021, "grad_norm": 3.2360007701432387, "learning_rate": 4.705830271371022e-06, "loss": 0.5272, "step": 10630 }, { "epoch": 0.0470627296471734, "grad_norm": 2.4347688055036447, "learning_rate": 4.70627296471734e-06, "loss": 0.5284, "step": 10631 }, { "epoch": 0.04706715658063659, "grad_norm": 3.4478091837121507, "learning_rate": 4.70671565806366e-06, "loss": 0.9137, "step": 10632 }, { "epoch": 0.047071583514099784, "grad_norm": 2.1972478289506565, "learning_rate": 4.707158351409979e-06, "loss": 0.6873, "step": 10633 }, { "epoch": 0.047076010447562974, "grad_norm": 4.16852227662001, "learning_rate": 4.7076010447562975e-06, "loss": 0.9154, "step": 10634 }, { "epoch": 0.047080437381026165, "grad_norm": 3.072068846095946, "learning_rate": 4.708043738102617e-06, "loss": 0.8091, "step": 10635 }, { "epoch": 0.047084864314489355, "grad_norm": 2.9769228200367226, "learning_rate": 4.708486431448936e-06, "loss": 1.1082, "step": 10636 }, { "epoch": 0.047089291247952546, "grad_norm": 3.1173200344689196, "learning_rate": 4.708929124795255e-06, "loss": 0.5933, "step": 10637 }, { "epoch": 0.047093718181415736, "grad_norm": 2.3232503408315877, "learning_rate": 4.709371818141574e-06, "loss": 0.3198, "step": 10638 }, { "epoch": 0.04709814511487893, "grad_norm": 2.446703297702766, "learning_rate": 4.709814511487893e-06, "loss": 0.7066, "step": 10639 }, { "epoch": 0.04710257204834211, "grad_norm": 2.371168215187315, "learning_rate": 4.710257204834211e-06, "loss": 0.4849, "step": 10640 }, { "epoch": 0.0471069989818053, "grad_norm": 3.061517469733117, "learning_rate": 4.710699898180531e-06, "loss": 0.7502, "step": 10641 }, { "epoch": 0.04711142591526849, "grad_norm": 2.8681500019614803, "learning_rate": 4.71114259152685e-06, "loss": 0.8331, "step": 10642 }, { "epoch": 0.04711585284873168, "grad_norm": 3.1383715159845798, "learning_rate": 4.7115852848731685e-06, "loss": 0.606, "step": 10643 }, { "epoch": 0.04712027978219487, "grad_norm": 2.561414450578371, "learning_rate": 4.712027978219488e-06, "loss": 0.8029, "step": 10644 }, { "epoch": 0.04712470671565806, "grad_norm": 3.134578249894656, "learning_rate": 4.712470671565807e-06, "loss": 0.782, "step": 10645 }, { "epoch": 0.04712913364912125, "grad_norm": 2.5290700599758003, "learning_rate": 4.712913364912126e-06, "loss": 0.7272, "step": 10646 }, { "epoch": 0.047133560582584444, "grad_norm": 3.060380135595679, "learning_rate": 4.713356058258445e-06, "loss": 0.8518, "step": 10647 }, { "epoch": 0.047137987516047634, "grad_norm": 3.2730982315123867, "learning_rate": 4.713798751604764e-06, "loss": 0.8982, "step": 10648 }, { "epoch": 0.047142414449510825, "grad_norm": 2.7180833832650526, "learning_rate": 4.714241444951083e-06, "loss": 0.5537, "step": 10649 }, { "epoch": 0.047146841382974015, "grad_norm": 2.9884340771568625, "learning_rate": 4.7146841382974015e-06, "loss": 0.7421, "step": 10650 }, { "epoch": 0.047151268316437206, "grad_norm": 3.743989481059748, "learning_rate": 4.715126831643721e-06, "loss": 1.0835, "step": 10651 }, { "epoch": 0.047155695249900396, "grad_norm": 3.288576065021153, "learning_rate": 4.715569524990039e-06, "loss": 0.9866, "step": 10652 }, { "epoch": 0.04716012218336359, "grad_norm": 3.4538447039176106, "learning_rate": 4.716012218336359e-06, "loss": 1.2214, "step": 10653 }, { "epoch": 0.04716454911682678, "grad_norm": 3.254816691328511, "learning_rate": 4.716454911682678e-06, "loss": 0.8938, "step": 10654 }, { "epoch": 0.04716897605028996, "grad_norm": 2.621677491844701, "learning_rate": 4.716897605028997e-06, "loss": 0.5472, "step": 10655 }, { "epoch": 0.04717340298375315, "grad_norm": 3.025555712220872, "learning_rate": 4.717340298375316e-06, "loss": 0.9253, "step": 10656 }, { "epoch": 0.04717782991721634, "grad_norm": 3.0492250615997794, "learning_rate": 4.7177829917216354e-06, "loss": 0.9905, "step": 10657 }, { "epoch": 0.04718225685067953, "grad_norm": 2.475424399582281, "learning_rate": 4.718225685067954e-06, "loss": 0.4274, "step": 10658 }, { "epoch": 0.04718668378414272, "grad_norm": 3.224529116090136, "learning_rate": 4.7186683784142725e-06, "loss": 0.9651, "step": 10659 }, { "epoch": 0.047191110717605914, "grad_norm": 2.425419833353762, "learning_rate": 4.719111071760592e-06, "loss": 0.7361, "step": 10660 }, { "epoch": 0.047195537651069104, "grad_norm": 3.749783284675321, "learning_rate": 4.71955376510691e-06, "loss": 0.6863, "step": 10661 }, { "epoch": 0.047199964584532295, "grad_norm": 3.0109575347801685, "learning_rate": 4.71999645845323e-06, "loss": 0.9129, "step": 10662 }, { "epoch": 0.047204391517995485, "grad_norm": 2.6932804913308854, "learning_rate": 4.720439151799549e-06, "loss": 0.5883, "step": 10663 }, { "epoch": 0.047208818451458676, "grad_norm": 2.7133372684168644, "learning_rate": 4.720881845145868e-06, "loss": 0.5884, "step": 10664 }, { "epoch": 0.047213245384921866, "grad_norm": 2.973574126469368, "learning_rate": 4.721324538492187e-06, "loss": 0.9255, "step": 10665 }, { "epoch": 0.04721767231838506, "grad_norm": 3.4194755464048385, "learning_rate": 4.721767231838506e-06, "loss": 0.5597, "step": 10666 }, { "epoch": 0.04722209925184825, "grad_norm": 3.1940957296586947, "learning_rate": 4.722209925184825e-06, "loss": 1.0135, "step": 10667 }, { "epoch": 0.04722652618531144, "grad_norm": 2.7860488610704914, "learning_rate": 4.722652618531144e-06, "loss": 0.7342, "step": 10668 }, { "epoch": 0.04723095311877463, "grad_norm": 3.837830981897927, "learning_rate": 4.723095311877463e-06, "loss": 1.0053, "step": 10669 }, { "epoch": 0.04723538005223781, "grad_norm": 2.550280443735348, "learning_rate": 4.723538005223781e-06, "loss": 0.4588, "step": 10670 }, { "epoch": 0.047239806985701, "grad_norm": 2.305456793062778, "learning_rate": 4.723980698570101e-06, "loss": 0.4706, "step": 10671 }, { "epoch": 0.04724423391916419, "grad_norm": 2.9862721601003774, "learning_rate": 4.72442339191642e-06, "loss": 1.0871, "step": 10672 }, { "epoch": 0.04724866085262738, "grad_norm": 2.563433408505084, "learning_rate": 4.724866085262739e-06, "loss": 0.8825, "step": 10673 }, { "epoch": 0.047253087786090574, "grad_norm": 2.8327715779411524, "learning_rate": 4.725308778609058e-06, "loss": 0.9029, "step": 10674 }, { "epoch": 0.047257514719553764, "grad_norm": 2.573850135463267, "learning_rate": 4.725751471955377e-06, "loss": 0.5819, "step": 10675 }, { "epoch": 0.047261941653016955, "grad_norm": 2.4713860142105277, "learning_rate": 4.726194165301696e-06, "loss": 0.586, "step": 10676 }, { "epoch": 0.047266368586480145, "grad_norm": 3.349471509713494, "learning_rate": 4.726636858648015e-06, "loss": 0.6751, "step": 10677 }, { "epoch": 0.047270795519943336, "grad_norm": 2.2718992388521415, "learning_rate": 4.727079551994334e-06, "loss": 0.617, "step": 10678 }, { "epoch": 0.047275222453406526, "grad_norm": 2.450751256492074, "learning_rate": 4.727522245340653e-06, "loss": 0.7513, "step": 10679 }, { "epoch": 0.04727964938686972, "grad_norm": 2.73520307923462, "learning_rate": 4.727964938686972e-06, "loss": 0.7256, "step": 10680 }, { "epoch": 0.04728407632033291, "grad_norm": 2.599110849284114, "learning_rate": 4.728407632033291e-06, "loss": 0.7001, "step": 10681 }, { "epoch": 0.0472885032537961, "grad_norm": 2.714377012877291, "learning_rate": 4.7288503253796095e-06, "loss": 0.7492, "step": 10682 }, { "epoch": 0.04729293018725929, "grad_norm": 2.911126851586058, "learning_rate": 4.729293018725929e-06, "loss": 0.8036, "step": 10683 }, { "epoch": 0.04729735712072248, "grad_norm": 2.6620204769833573, "learning_rate": 4.729735712072248e-06, "loss": 0.7602, "step": 10684 }, { "epoch": 0.04730178405418566, "grad_norm": 2.9313419641816614, "learning_rate": 4.730178405418567e-06, "loss": 0.8252, "step": 10685 }, { "epoch": 0.04730621098764885, "grad_norm": 3.0352699541655253, "learning_rate": 4.730621098764886e-06, "loss": 0.9118, "step": 10686 }, { "epoch": 0.04731063792111204, "grad_norm": 3.7073121500735033, "learning_rate": 4.7310637921112055e-06, "loss": 0.9349, "step": 10687 }, { "epoch": 0.047315064854575234, "grad_norm": 3.2080765840377374, "learning_rate": 4.731506485457524e-06, "loss": 0.577, "step": 10688 }, { "epoch": 0.047319491788038424, "grad_norm": 3.4958722825992052, "learning_rate": 4.731949178803843e-06, "loss": 0.5754, "step": 10689 }, { "epoch": 0.047323918721501615, "grad_norm": 2.3187402257580123, "learning_rate": 4.732391872150162e-06, "loss": 0.5115, "step": 10690 }, { "epoch": 0.047328345654964805, "grad_norm": 4.20495897389575, "learning_rate": 4.7328345654964805e-06, "loss": 0.7784, "step": 10691 }, { "epoch": 0.047332772588427996, "grad_norm": 3.1109399348511757, "learning_rate": 4.7332772588428e-06, "loss": 0.9711, "step": 10692 }, { "epoch": 0.047337199521891186, "grad_norm": 3.596099420452753, "learning_rate": 4.733719952189119e-06, "loss": 1.029, "step": 10693 }, { "epoch": 0.04734162645535438, "grad_norm": 2.689065413170569, "learning_rate": 4.734162645535438e-06, "loss": 0.6463, "step": 10694 }, { "epoch": 0.04734605338881757, "grad_norm": 2.339298460244897, "learning_rate": 4.734605338881757e-06, "loss": 0.6652, "step": 10695 }, { "epoch": 0.04735048032228076, "grad_norm": 3.728121080652313, "learning_rate": 4.7350480322280765e-06, "loss": 0.9756, "step": 10696 }, { "epoch": 0.04735490725574395, "grad_norm": 2.722569739690717, "learning_rate": 4.735490725574395e-06, "loss": 0.7496, "step": 10697 }, { "epoch": 0.04735933418920714, "grad_norm": 3.0053079689417097, "learning_rate": 4.735933418920714e-06, "loss": 0.7862, "step": 10698 }, { "epoch": 0.04736376112267033, "grad_norm": 2.5126712565612914, "learning_rate": 4.736376112267033e-06, "loss": 0.7269, "step": 10699 }, { "epoch": 0.04736818805613351, "grad_norm": 2.699077850691252, "learning_rate": 4.736818805613351e-06, "loss": 0.605, "step": 10700 }, { "epoch": 0.047372614989596704, "grad_norm": 2.8236819255130006, "learning_rate": 4.737261498959671e-06, "loss": 0.8264, "step": 10701 }, { "epoch": 0.047377041923059894, "grad_norm": 2.73025360799625, "learning_rate": 4.73770419230599e-06, "loss": 0.6318, "step": 10702 }, { "epoch": 0.047381468856523085, "grad_norm": 3.6695504601234417, "learning_rate": 4.738146885652309e-06, "loss": 0.9919, "step": 10703 }, { "epoch": 0.047385895789986275, "grad_norm": 2.6207442642424224, "learning_rate": 4.738589578998628e-06, "loss": 0.6404, "step": 10704 }, { "epoch": 0.047390322723449466, "grad_norm": 2.4518344221538144, "learning_rate": 4.7390322723449474e-06, "loss": 0.6654, "step": 10705 }, { "epoch": 0.047394749656912656, "grad_norm": 2.982017045081718, "learning_rate": 4.739474965691266e-06, "loss": 0.711, "step": 10706 }, { "epoch": 0.04739917659037585, "grad_norm": 3.4526808087774428, "learning_rate": 4.739917659037585e-06, "loss": 0.8792, "step": 10707 }, { "epoch": 0.04740360352383904, "grad_norm": 2.6419750578975116, "learning_rate": 4.740360352383904e-06, "loss": 0.9675, "step": 10708 }, { "epoch": 0.04740803045730223, "grad_norm": 2.654471099620153, "learning_rate": 4.740803045730223e-06, "loss": 0.8198, "step": 10709 }, { "epoch": 0.04741245739076542, "grad_norm": 3.553370115044947, "learning_rate": 4.741245739076542e-06, "loss": 0.8691, "step": 10710 }, { "epoch": 0.04741688432422861, "grad_norm": 3.7297896734610214, "learning_rate": 4.741688432422861e-06, "loss": 1.0514, "step": 10711 }, { "epoch": 0.0474213112576918, "grad_norm": 3.423792639359363, "learning_rate": 4.74213112576918e-06, "loss": 0.9163, "step": 10712 }, { "epoch": 0.04742573819115499, "grad_norm": 3.0629280688815137, "learning_rate": 4.742573819115499e-06, "loss": 0.9833, "step": 10713 }, { "epoch": 0.04743016512461818, "grad_norm": 3.749348408594911, "learning_rate": 4.743016512461818e-06, "loss": 0.901, "step": 10714 }, { "epoch": 0.047434592058081364, "grad_norm": 2.8777708991152955, "learning_rate": 4.743459205808137e-06, "loss": 0.694, "step": 10715 }, { "epoch": 0.047439018991544554, "grad_norm": 2.9933545302570637, "learning_rate": 4.743901899154456e-06, "loss": 0.7302, "step": 10716 }, { "epoch": 0.047443445925007745, "grad_norm": 3.046093739091809, "learning_rate": 4.744344592500776e-06, "loss": 0.7719, "step": 10717 }, { "epoch": 0.047447872858470935, "grad_norm": 3.2601253410938993, "learning_rate": 4.744787285847094e-06, "loss": 1.1211, "step": 10718 }, { "epoch": 0.047452299791934126, "grad_norm": 3.4134927424858974, "learning_rate": 4.745229979193413e-06, "loss": 0.9526, "step": 10719 }, { "epoch": 0.047456726725397316, "grad_norm": 3.2072410096030115, "learning_rate": 4.745672672539732e-06, "loss": 0.9109, "step": 10720 }, { "epoch": 0.04746115365886051, "grad_norm": 3.3115123885110496, "learning_rate": 4.746115365886051e-06, "loss": 1.1809, "step": 10721 }, { "epoch": 0.0474655805923237, "grad_norm": 3.1955386855511607, "learning_rate": 4.74655805923237e-06, "loss": 0.9665, "step": 10722 }, { "epoch": 0.04747000752578689, "grad_norm": 3.1695937908266965, "learning_rate": 4.747000752578689e-06, "loss": 0.6077, "step": 10723 }, { "epoch": 0.04747443445925008, "grad_norm": 2.5658098630082886, "learning_rate": 4.747443445925008e-06, "loss": 0.6292, "step": 10724 }, { "epoch": 0.04747886139271327, "grad_norm": 3.0692984294249004, "learning_rate": 4.747886139271327e-06, "loss": 1.0102, "step": 10725 }, { "epoch": 0.04748328832617646, "grad_norm": 2.4789798360285857, "learning_rate": 4.748328832617647e-06, "loss": 0.8013, "step": 10726 }, { "epoch": 0.04748771525963965, "grad_norm": 2.448111718418214, "learning_rate": 4.748771525963965e-06, "loss": 0.7168, "step": 10727 }, { "epoch": 0.04749214219310284, "grad_norm": 2.9378963626570407, "learning_rate": 4.7492142193102845e-06, "loss": 0.6091, "step": 10728 }, { "epoch": 0.04749656912656603, "grad_norm": 2.1841952516151317, "learning_rate": 4.749656912656603e-06, "loss": 0.4944, "step": 10729 }, { "epoch": 0.047500996060029214, "grad_norm": 2.407760819110563, "learning_rate": 4.750099606002922e-06, "loss": 0.5765, "step": 10730 }, { "epoch": 0.047505422993492405, "grad_norm": 3.2507186588075747, "learning_rate": 4.750542299349241e-06, "loss": 1.1431, "step": 10731 }, { "epoch": 0.047509849926955595, "grad_norm": 3.275146975406704, "learning_rate": 4.75098499269556e-06, "loss": 0.6966, "step": 10732 }, { "epoch": 0.047514276860418786, "grad_norm": 2.830795128023966, "learning_rate": 4.751427686041879e-06, "loss": 0.6872, "step": 10733 }, { "epoch": 0.047518703793881976, "grad_norm": 2.8730795350763296, "learning_rate": 4.751870379388198e-06, "loss": 0.5558, "step": 10734 }, { "epoch": 0.04752313072734517, "grad_norm": 2.848915539787392, "learning_rate": 4.7523130727345175e-06, "loss": 0.7062, "step": 10735 }, { "epoch": 0.04752755766080836, "grad_norm": 3.1141499780927555, "learning_rate": 4.752755766080836e-06, "loss": 0.964, "step": 10736 }, { "epoch": 0.04753198459427155, "grad_norm": 2.4095441804069364, "learning_rate": 4.7531984594271554e-06, "loss": 0.515, "step": 10737 }, { "epoch": 0.04753641152773474, "grad_norm": 3.07656842744496, "learning_rate": 4.753641152773475e-06, "loss": 0.7645, "step": 10738 }, { "epoch": 0.04754083846119793, "grad_norm": 2.5272640648289078, "learning_rate": 4.754083846119793e-06, "loss": 0.3734, "step": 10739 }, { "epoch": 0.04754526539466112, "grad_norm": 2.4179181335022237, "learning_rate": 4.754526539466112e-06, "loss": 0.5823, "step": 10740 }, { "epoch": 0.04754969232812431, "grad_norm": 2.864267902699965, "learning_rate": 4.754969232812431e-06, "loss": 0.9492, "step": 10741 }, { "epoch": 0.0475541192615875, "grad_norm": 2.3721230783901133, "learning_rate": 4.75541192615875e-06, "loss": 0.6244, "step": 10742 }, { "epoch": 0.04755854619505069, "grad_norm": 2.836372693480424, "learning_rate": 4.755854619505069e-06, "loss": 0.6602, "step": 10743 }, { "epoch": 0.04756297312851388, "grad_norm": 3.047612876567751, "learning_rate": 4.7562973128513885e-06, "loss": 1.0814, "step": 10744 }, { "epoch": 0.047567400061977065, "grad_norm": 2.386274620143975, "learning_rate": 4.756740006197707e-06, "loss": 0.7118, "step": 10745 }, { "epoch": 0.047571826995440256, "grad_norm": 3.018342122993206, "learning_rate": 4.757182699544026e-06, "loss": 0.9868, "step": 10746 }, { "epoch": 0.047576253928903446, "grad_norm": 2.843498965064588, "learning_rate": 4.757625392890346e-06, "loss": 0.6963, "step": 10747 }, { "epoch": 0.04758068086236664, "grad_norm": 2.6872691124845947, "learning_rate": 4.758068086236664e-06, "loss": 0.8003, "step": 10748 }, { "epoch": 0.04758510779582983, "grad_norm": 2.798357352340837, "learning_rate": 4.758510779582984e-06, "loss": 0.9084, "step": 10749 }, { "epoch": 0.04758953472929302, "grad_norm": 2.472649776914711, "learning_rate": 4.758953472929302e-06, "loss": 0.5553, "step": 10750 }, { "epoch": 0.04759396166275621, "grad_norm": 3.04997259758949, "learning_rate": 4.759396166275621e-06, "loss": 0.8887, "step": 10751 }, { "epoch": 0.0475983885962194, "grad_norm": 2.2244961825690455, "learning_rate": 4.75983885962194e-06, "loss": 0.6365, "step": 10752 }, { "epoch": 0.04760281552968259, "grad_norm": 2.8031861471344435, "learning_rate": 4.7602815529682594e-06, "loss": 0.8544, "step": 10753 }, { "epoch": 0.04760724246314578, "grad_norm": 2.759655341459307, "learning_rate": 4.760724246314578e-06, "loss": 1.0988, "step": 10754 }, { "epoch": 0.04761166939660897, "grad_norm": 2.423358939180133, "learning_rate": 4.761166939660897e-06, "loss": 0.4322, "step": 10755 }, { "epoch": 0.04761609633007216, "grad_norm": 2.479715038176018, "learning_rate": 4.761609633007217e-06, "loss": 0.8912, "step": 10756 }, { "epoch": 0.04762052326353535, "grad_norm": 2.846042825724154, "learning_rate": 4.762052326353535e-06, "loss": 0.6939, "step": 10757 }, { "epoch": 0.04762495019699854, "grad_norm": 2.523622569044762, "learning_rate": 4.762495019699855e-06, "loss": 0.4347, "step": 10758 }, { "epoch": 0.04762937713046173, "grad_norm": 2.5791146063707533, "learning_rate": 4.762937713046173e-06, "loss": 0.8522, "step": 10759 }, { "epoch": 0.047633804063924916, "grad_norm": 3.225375062936052, "learning_rate": 4.7633804063924925e-06, "loss": 1.1081, "step": 10760 }, { "epoch": 0.047638230997388106, "grad_norm": 2.5163118979993704, "learning_rate": 4.763823099738811e-06, "loss": 0.7878, "step": 10761 }, { "epoch": 0.0476426579308513, "grad_norm": 3.196137339084351, "learning_rate": 4.76426579308513e-06, "loss": 0.784, "step": 10762 }, { "epoch": 0.04764708486431449, "grad_norm": 2.896964186957735, "learning_rate": 4.764708486431449e-06, "loss": 0.7893, "step": 10763 }, { "epoch": 0.04765151179777768, "grad_norm": 2.7930430525855527, "learning_rate": 4.765151179777768e-06, "loss": 0.6742, "step": 10764 }, { "epoch": 0.04765593873124087, "grad_norm": 3.1250256293770313, "learning_rate": 4.765593873124088e-06, "loss": 0.7889, "step": 10765 }, { "epoch": 0.04766036566470406, "grad_norm": 2.5316724808787368, "learning_rate": 4.766036566470406e-06, "loss": 0.5235, "step": 10766 }, { "epoch": 0.04766479259816725, "grad_norm": 3.066032091456602, "learning_rate": 4.7664792598167255e-06, "loss": 0.8222, "step": 10767 }, { "epoch": 0.04766921953163044, "grad_norm": 3.1966096682437732, "learning_rate": 4.766921953163045e-06, "loss": 1.0218, "step": 10768 }, { "epoch": 0.04767364646509363, "grad_norm": 2.648175481597672, "learning_rate": 4.7673646465093634e-06, "loss": 0.7113, "step": 10769 }, { "epoch": 0.04767807339855682, "grad_norm": 3.7025445249674926, "learning_rate": 4.767807339855682e-06, "loss": 1.216, "step": 10770 }, { "epoch": 0.04768250033202001, "grad_norm": 2.968840683968681, "learning_rate": 4.768250033202001e-06, "loss": 0.6915, "step": 10771 }, { "epoch": 0.0476869272654832, "grad_norm": 2.2987525031973433, "learning_rate": 4.76869272654832e-06, "loss": 0.6645, "step": 10772 }, { "epoch": 0.04769135419894639, "grad_norm": 3.4662906913392444, "learning_rate": 4.769135419894639e-06, "loss": 0.5753, "step": 10773 }, { "epoch": 0.04769578113240958, "grad_norm": 2.8383317501591927, "learning_rate": 4.769578113240959e-06, "loss": 0.8509, "step": 10774 }, { "epoch": 0.04770020806587277, "grad_norm": 3.464157589525222, "learning_rate": 4.770020806587277e-06, "loss": 0.8983, "step": 10775 }, { "epoch": 0.04770463499933596, "grad_norm": 2.6924401525318515, "learning_rate": 4.7704634999335965e-06, "loss": 0.6416, "step": 10776 }, { "epoch": 0.04770906193279915, "grad_norm": 2.3907788495275124, "learning_rate": 4.770906193279916e-06, "loss": 0.5952, "step": 10777 }, { "epoch": 0.04771348886626234, "grad_norm": 2.847902382412748, "learning_rate": 4.771348886626234e-06, "loss": 0.6894, "step": 10778 }, { "epoch": 0.04771791579972553, "grad_norm": 2.6867009545970744, "learning_rate": 4.771791579972554e-06, "loss": 1.0581, "step": 10779 }, { "epoch": 0.04772234273318872, "grad_norm": 2.550460488935775, "learning_rate": 4.772234273318872e-06, "loss": 0.6028, "step": 10780 }, { "epoch": 0.04772676966665191, "grad_norm": 2.41595817676016, "learning_rate": 4.772676966665191e-06, "loss": 0.6105, "step": 10781 }, { "epoch": 0.0477311966001151, "grad_norm": 3.020428857928604, "learning_rate": 4.77311966001151e-06, "loss": 1.2466, "step": 10782 }, { "epoch": 0.04773562353357829, "grad_norm": 2.851173679578781, "learning_rate": 4.7735623533578295e-06, "loss": 0.7246, "step": 10783 }, { "epoch": 0.04774005046704148, "grad_norm": 2.627306114167237, "learning_rate": 4.774005046704148e-06, "loss": 0.7423, "step": 10784 }, { "epoch": 0.04774447740050467, "grad_norm": 3.3128164500632002, "learning_rate": 4.7744477400504674e-06, "loss": 1.363, "step": 10785 }, { "epoch": 0.04774890433396786, "grad_norm": 2.7182247239489548, "learning_rate": 4.774890433396787e-06, "loss": 0.6245, "step": 10786 }, { "epoch": 0.04775333126743105, "grad_norm": 3.0702999604349355, "learning_rate": 4.775333126743105e-06, "loss": 0.9464, "step": 10787 }, { "epoch": 0.04775775820089424, "grad_norm": 2.7394605004413664, "learning_rate": 4.775775820089425e-06, "loss": 0.7424, "step": 10788 }, { "epoch": 0.047762185134357434, "grad_norm": 2.9200303332829134, "learning_rate": 4.776218513435743e-06, "loss": 0.6779, "step": 10789 }, { "epoch": 0.047766612067820624, "grad_norm": 2.9555334152384534, "learning_rate": 4.776661206782063e-06, "loss": 0.9308, "step": 10790 }, { "epoch": 0.04777103900128381, "grad_norm": 3.1944878447435388, "learning_rate": 4.777103900128381e-06, "loss": 0.793, "step": 10791 }, { "epoch": 0.047775465934747, "grad_norm": 3.1750994572993894, "learning_rate": 4.7775465934747005e-06, "loss": 0.7329, "step": 10792 }, { "epoch": 0.04777989286821019, "grad_norm": 2.4696964332391564, "learning_rate": 4.777989286821019e-06, "loss": 0.7637, "step": 10793 }, { "epoch": 0.04778431980167338, "grad_norm": 2.3484527066352796, "learning_rate": 4.778431980167338e-06, "loss": 0.5964, "step": 10794 }, { "epoch": 0.04778874673513657, "grad_norm": 2.726557847098067, "learning_rate": 4.778874673513658e-06, "loss": 0.9084, "step": 10795 }, { "epoch": 0.04779317366859976, "grad_norm": 3.3246541763463506, "learning_rate": 4.779317366859976e-06, "loss": 1.0762, "step": 10796 }, { "epoch": 0.04779760060206295, "grad_norm": 3.126797015745481, "learning_rate": 4.779760060206296e-06, "loss": 0.6244, "step": 10797 }, { "epoch": 0.04780202753552614, "grad_norm": 3.5446810250324794, "learning_rate": 4.780202753552615e-06, "loss": 0.759, "step": 10798 }, { "epoch": 0.04780645446898933, "grad_norm": 2.935354059449646, "learning_rate": 4.7806454468989335e-06, "loss": 0.9341, "step": 10799 }, { "epoch": 0.04781088140245252, "grad_norm": 2.5997386630498722, "learning_rate": 4.781088140245252e-06, "loss": 0.8318, "step": 10800 }, { "epoch": 0.04781530833591571, "grad_norm": 2.7285775149668026, "learning_rate": 4.7815308335915714e-06, "loss": 0.9846, "step": 10801 }, { "epoch": 0.0478197352693789, "grad_norm": 2.7952132788620228, "learning_rate": 4.78197352693789e-06, "loss": 0.8007, "step": 10802 }, { "epoch": 0.047824162202842094, "grad_norm": 2.375353867902433, "learning_rate": 4.782416220284209e-06, "loss": 0.6999, "step": 10803 }, { "epoch": 0.047828589136305284, "grad_norm": 2.6103971898369003, "learning_rate": 4.782858913630529e-06, "loss": 0.575, "step": 10804 }, { "epoch": 0.047833016069768475, "grad_norm": 2.306727099976083, "learning_rate": 4.783301606976847e-06, "loss": 0.485, "step": 10805 }, { "epoch": 0.04783744300323166, "grad_norm": 2.7682628545065238, "learning_rate": 4.783744300323167e-06, "loss": 0.709, "step": 10806 }, { "epoch": 0.04784186993669485, "grad_norm": 4.588064706302093, "learning_rate": 4.784186993669486e-06, "loss": 0.8507, "step": 10807 }, { "epoch": 0.04784629687015804, "grad_norm": 3.733113908383307, "learning_rate": 4.7846296870158045e-06, "loss": 0.9029, "step": 10808 }, { "epoch": 0.04785072380362123, "grad_norm": 2.954167347586811, "learning_rate": 4.785072380362124e-06, "loss": 0.7023, "step": 10809 }, { "epoch": 0.04785515073708442, "grad_norm": 2.7230809870408326, "learning_rate": 4.785515073708442e-06, "loss": 0.6922, "step": 10810 }, { "epoch": 0.04785957767054761, "grad_norm": 2.3265676356918528, "learning_rate": 4.785957767054762e-06, "loss": 0.6181, "step": 10811 }, { "epoch": 0.0478640046040108, "grad_norm": 4.017662166670868, "learning_rate": 4.78640046040108e-06, "loss": 1.4284, "step": 10812 }, { "epoch": 0.04786843153747399, "grad_norm": 3.9617274182084525, "learning_rate": 4.7868431537474e-06, "loss": 1.1492, "step": 10813 }, { "epoch": 0.04787285847093718, "grad_norm": 3.562149457615251, "learning_rate": 4.787285847093718e-06, "loss": 0.6697, "step": 10814 }, { "epoch": 0.04787728540440037, "grad_norm": 2.755821094332676, "learning_rate": 4.7877285404400375e-06, "loss": 0.7325, "step": 10815 }, { "epoch": 0.04788171233786356, "grad_norm": 3.4221059103025526, "learning_rate": 4.788171233786357e-06, "loss": 1.089, "step": 10816 }, { "epoch": 0.047886139271326754, "grad_norm": 2.575377058186538, "learning_rate": 4.7886139271326754e-06, "loss": 0.6685, "step": 10817 }, { "epoch": 0.047890566204789944, "grad_norm": 3.287908634377632, "learning_rate": 4.789056620478995e-06, "loss": 0.9464, "step": 10818 }, { "epoch": 0.047894993138253135, "grad_norm": 4.072737811152527, "learning_rate": 4.789499313825313e-06, "loss": 1.659, "step": 10819 }, { "epoch": 0.047899420071716325, "grad_norm": 2.900408000461578, "learning_rate": 4.789942007171633e-06, "loss": 0.7937, "step": 10820 }, { "epoch": 0.04790384700517951, "grad_norm": 3.049348631926966, "learning_rate": 4.790384700517951e-06, "loss": 0.8919, "step": 10821 }, { "epoch": 0.0479082739386427, "grad_norm": 2.571574691656683, "learning_rate": 4.790827393864271e-06, "loss": 0.6746, "step": 10822 }, { "epoch": 0.04791270087210589, "grad_norm": 2.680471712403376, "learning_rate": 4.791270087210589e-06, "loss": 0.8229, "step": 10823 }, { "epoch": 0.04791712780556908, "grad_norm": 3.1374201164521613, "learning_rate": 4.7917127805569085e-06, "loss": 0.8374, "step": 10824 }, { "epoch": 0.04792155473903227, "grad_norm": 2.641017825569034, "learning_rate": 4.792155473903228e-06, "loss": 0.661, "step": 10825 }, { "epoch": 0.04792598167249546, "grad_norm": 3.531709670153657, "learning_rate": 4.792598167249546e-06, "loss": 0.9988, "step": 10826 }, { "epoch": 0.04793040860595865, "grad_norm": 3.2682650322048676, "learning_rate": 4.793040860595866e-06, "loss": 0.9581, "step": 10827 }, { "epoch": 0.04793483553942184, "grad_norm": 2.4037456836442406, "learning_rate": 4.793483553942185e-06, "loss": 0.706, "step": 10828 }, { "epoch": 0.04793926247288503, "grad_norm": 2.6128463642581408, "learning_rate": 4.793926247288504e-06, "loss": 0.8053, "step": 10829 }, { "epoch": 0.047943689406348224, "grad_norm": 3.2923849868101467, "learning_rate": 4.794368940634823e-06, "loss": 1.1139, "step": 10830 }, { "epoch": 0.047948116339811414, "grad_norm": 2.3711048934007106, "learning_rate": 4.7948116339811415e-06, "loss": 0.5638, "step": 10831 }, { "epoch": 0.047952543273274605, "grad_norm": 2.587022767172095, "learning_rate": 4.79525432732746e-06, "loss": 1.0572, "step": 10832 }, { "epoch": 0.047956970206737795, "grad_norm": 2.591636251435305, "learning_rate": 4.7956970206737794e-06, "loss": 0.6541, "step": 10833 }, { "epoch": 0.047961397140200986, "grad_norm": 3.1714087986989647, "learning_rate": 4.796139714020099e-06, "loss": 0.7849, "step": 10834 }, { "epoch": 0.047965824073664176, "grad_norm": 2.4829777157671438, "learning_rate": 4.796582407366417e-06, "loss": 0.5848, "step": 10835 }, { "epoch": 0.04797025100712736, "grad_norm": 2.992298390537329, "learning_rate": 4.797025100712737e-06, "loss": 0.9271, "step": 10836 }, { "epoch": 0.04797467794059055, "grad_norm": 3.0002345802319574, "learning_rate": 4.797467794059056e-06, "loss": 0.8121, "step": 10837 }, { "epoch": 0.04797910487405374, "grad_norm": 2.9429639318329053, "learning_rate": 4.797910487405375e-06, "loss": 0.8193, "step": 10838 }, { "epoch": 0.04798353180751693, "grad_norm": 3.7924554352763424, "learning_rate": 4.798353180751694e-06, "loss": 1.1412, "step": 10839 }, { "epoch": 0.04798795874098012, "grad_norm": 2.75406928767989, "learning_rate": 4.7987958740980125e-06, "loss": 0.899, "step": 10840 }, { "epoch": 0.04799238567444331, "grad_norm": 2.647570120406004, "learning_rate": 4.799238567444332e-06, "loss": 0.8313, "step": 10841 }, { "epoch": 0.0479968126079065, "grad_norm": 3.130457553902887, "learning_rate": 4.79968126079065e-06, "loss": 0.8944, "step": 10842 }, { "epoch": 0.04800123954136969, "grad_norm": 2.560380173822854, "learning_rate": 4.80012395413697e-06, "loss": 0.6228, "step": 10843 }, { "epoch": 0.048005666474832884, "grad_norm": 2.7916570754206327, "learning_rate": 4.800566647483288e-06, "loss": 0.9582, "step": 10844 }, { "epoch": 0.048010093408296074, "grad_norm": 2.7519329548567826, "learning_rate": 4.801009340829608e-06, "loss": 0.6667, "step": 10845 }, { "epoch": 0.048014520341759265, "grad_norm": 2.877353130041026, "learning_rate": 4.801452034175927e-06, "loss": 0.7501, "step": 10846 }, { "epoch": 0.048018947275222455, "grad_norm": 2.829243278114393, "learning_rate": 4.8018947275222455e-06, "loss": 0.6008, "step": 10847 }, { "epoch": 0.048023374208685646, "grad_norm": 2.961701987251831, "learning_rate": 4.802337420868565e-06, "loss": 0.8161, "step": 10848 }, { "epoch": 0.048027801142148836, "grad_norm": 2.675517286594874, "learning_rate": 4.802780114214884e-06, "loss": 0.7051, "step": 10849 }, { "epoch": 0.04803222807561203, "grad_norm": 3.435839744815912, "learning_rate": 4.803222807561203e-06, "loss": 0.9986, "step": 10850 }, { "epoch": 0.04803665500907521, "grad_norm": 2.352103558491243, "learning_rate": 4.803665500907521e-06, "loss": 0.7125, "step": 10851 }, { "epoch": 0.0480410819425384, "grad_norm": 2.8210296816583766, "learning_rate": 4.804108194253841e-06, "loss": 0.5538, "step": 10852 }, { "epoch": 0.04804550887600159, "grad_norm": 2.435057251837193, "learning_rate": 4.804550887600159e-06, "loss": 0.6252, "step": 10853 }, { "epoch": 0.04804993580946478, "grad_norm": 2.7993689022375783, "learning_rate": 4.804993580946479e-06, "loss": 0.8534, "step": 10854 }, { "epoch": 0.04805436274292797, "grad_norm": 2.529351192990267, "learning_rate": 4.805436274292798e-06, "loss": 0.7382, "step": 10855 }, { "epoch": 0.04805878967639116, "grad_norm": 2.239192768281857, "learning_rate": 4.8058789676391165e-06, "loss": 0.5538, "step": 10856 }, { "epoch": 0.04806321660985435, "grad_norm": 3.30864273991822, "learning_rate": 4.806321660985436e-06, "loss": 1.0299, "step": 10857 }, { "epoch": 0.048067643543317544, "grad_norm": 2.518637357988106, "learning_rate": 4.806764354331755e-06, "loss": 0.8189, "step": 10858 }, { "epoch": 0.048072070476780734, "grad_norm": 2.6127624334946167, "learning_rate": 4.807207047678074e-06, "loss": 0.8418, "step": 10859 }, { "epoch": 0.048076497410243925, "grad_norm": 2.7092419132443486, "learning_rate": 4.807649741024393e-06, "loss": 0.89, "step": 10860 }, { "epoch": 0.048080924343707115, "grad_norm": 2.7232216742745408, "learning_rate": 4.808092434370712e-06, "loss": 0.7419, "step": 10861 }, { "epoch": 0.048085351277170306, "grad_norm": 2.672266437945467, "learning_rate": 4.80853512771703e-06, "loss": 0.6949, "step": 10862 }, { "epoch": 0.048089778210633496, "grad_norm": 3.2060025647174815, "learning_rate": 4.8089778210633495e-06, "loss": 0.8787, "step": 10863 }, { "epoch": 0.04809420514409669, "grad_norm": 3.816522841712619, "learning_rate": 4.809420514409669e-06, "loss": 1.2374, "step": 10864 }, { "epoch": 0.04809863207755988, "grad_norm": 2.129824947801934, "learning_rate": 4.8098632077559874e-06, "loss": 0.6097, "step": 10865 }, { "epoch": 0.04810305901102306, "grad_norm": 2.4761346183931776, "learning_rate": 4.810305901102307e-06, "loss": 0.8584, "step": 10866 }, { "epoch": 0.04810748594448625, "grad_norm": 3.133186740687506, "learning_rate": 4.810748594448626e-06, "loss": 0.7736, "step": 10867 }, { "epoch": 0.04811191287794944, "grad_norm": 3.639538015231635, "learning_rate": 4.811191287794945e-06, "loss": 1.0039, "step": 10868 }, { "epoch": 0.04811633981141263, "grad_norm": 2.4786197392220313, "learning_rate": 4.811633981141264e-06, "loss": 0.7376, "step": 10869 }, { "epoch": 0.04812076674487582, "grad_norm": 2.898920087862737, "learning_rate": 4.812076674487583e-06, "loss": 0.6507, "step": 10870 }, { "epoch": 0.048125193678339014, "grad_norm": 3.055866154195869, "learning_rate": 4.812519367833902e-06, "loss": 0.4207, "step": 10871 }, { "epoch": 0.048129620611802204, "grad_norm": 2.776118359913558, "learning_rate": 4.8129620611802205e-06, "loss": 0.6895, "step": 10872 }, { "epoch": 0.048134047545265395, "grad_norm": 2.284068739486909, "learning_rate": 4.81340475452654e-06, "loss": 0.6158, "step": 10873 }, { "epoch": 0.048138474478728585, "grad_norm": 3.2676374741579854, "learning_rate": 4.813847447872858e-06, "loss": 0.9267, "step": 10874 }, { "epoch": 0.048142901412191776, "grad_norm": 4.008071126194864, "learning_rate": 4.814290141219178e-06, "loss": 1.0842, "step": 10875 }, { "epoch": 0.048147328345654966, "grad_norm": 2.7121621508934246, "learning_rate": 4.814732834565497e-06, "loss": 0.6772, "step": 10876 }, { "epoch": 0.04815175527911816, "grad_norm": 3.1311413365979894, "learning_rate": 4.815175527911816e-06, "loss": 0.8544, "step": 10877 }, { "epoch": 0.04815618221258135, "grad_norm": 3.6714967631303237, "learning_rate": 4.815618221258135e-06, "loss": 0.958, "step": 10878 }, { "epoch": 0.04816060914604454, "grad_norm": 2.893113604293035, "learning_rate": 4.816060914604454e-06, "loss": 0.8677, "step": 10879 }, { "epoch": 0.04816503607950773, "grad_norm": 2.5985843762914067, "learning_rate": 4.816503607950773e-06, "loss": 0.5875, "step": 10880 }, { "epoch": 0.04816946301297091, "grad_norm": 2.609955847819849, "learning_rate": 4.8169463012970914e-06, "loss": 0.729, "step": 10881 }, { "epoch": 0.0481738899464341, "grad_norm": 3.1289997810232593, "learning_rate": 4.817388994643411e-06, "loss": 0.7136, "step": 10882 }, { "epoch": 0.04817831687989729, "grad_norm": 3.716133515666016, "learning_rate": 4.817831687989729e-06, "loss": 0.745, "step": 10883 }, { "epoch": 0.04818274381336048, "grad_norm": 2.6878400129551316, "learning_rate": 4.818274381336049e-06, "loss": 0.5526, "step": 10884 }, { "epoch": 0.048187170746823674, "grad_norm": 2.5494179310365856, "learning_rate": 4.818717074682368e-06, "loss": 0.7487, "step": 10885 }, { "epoch": 0.048191597680286864, "grad_norm": 2.515635026909762, "learning_rate": 4.819159768028687e-06, "loss": 0.4617, "step": 10886 }, { "epoch": 0.048196024613750055, "grad_norm": 2.946854201918775, "learning_rate": 4.819602461375006e-06, "loss": 0.8422, "step": 10887 }, { "epoch": 0.048200451547213245, "grad_norm": 3.2860272808027537, "learning_rate": 4.820045154721325e-06, "loss": 1.0848, "step": 10888 }, { "epoch": 0.048204878480676436, "grad_norm": 2.652293666535205, "learning_rate": 4.820487848067644e-06, "loss": 0.8845, "step": 10889 }, { "epoch": 0.048209305414139626, "grad_norm": 4.286928179160693, "learning_rate": 4.820930541413963e-06, "loss": 0.7613, "step": 10890 }, { "epoch": 0.04821373234760282, "grad_norm": 3.0277901371266402, "learning_rate": 4.821373234760282e-06, "loss": 0.9455, "step": 10891 }, { "epoch": 0.04821815928106601, "grad_norm": 2.982955063345548, "learning_rate": 4.821815928106601e-06, "loss": 0.8442, "step": 10892 }, { "epoch": 0.0482225862145292, "grad_norm": 3.5547292465073004, "learning_rate": 4.82225862145292e-06, "loss": 1.1729, "step": 10893 }, { "epoch": 0.04822701314799239, "grad_norm": 3.3439436996980128, "learning_rate": 4.822701314799239e-06, "loss": 0.7834, "step": 10894 }, { "epoch": 0.04823144008145558, "grad_norm": 2.7552938565549185, "learning_rate": 4.8231440081455575e-06, "loss": 0.6259, "step": 10895 }, { "epoch": 0.04823586701491876, "grad_norm": 3.0652790220256074, "learning_rate": 4.823586701491877e-06, "loss": 0.8848, "step": 10896 }, { "epoch": 0.04824029394838195, "grad_norm": 3.705215709022287, "learning_rate": 4.824029394838196e-06, "loss": 1.0099, "step": 10897 }, { "epoch": 0.04824472088184514, "grad_norm": 2.3684016474197556, "learning_rate": 4.824472088184515e-06, "loss": 0.6147, "step": 10898 }, { "epoch": 0.048249147815308334, "grad_norm": 2.7618391598441034, "learning_rate": 4.824914781530834e-06, "loss": 0.6771, "step": 10899 }, { "epoch": 0.048253574748771524, "grad_norm": 2.6122301934676733, "learning_rate": 4.825357474877153e-06, "loss": 0.754, "step": 10900 }, { "epoch": 0.048258001682234715, "grad_norm": 2.5954265549964246, "learning_rate": 4.825800168223472e-06, "loss": 0.5834, "step": 10901 }, { "epoch": 0.048262428615697905, "grad_norm": 2.463797110072766, "learning_rate": 4.826242861569791e-06, "loss": 0.3421, "step": 10902 }, { "epoch": 0.048266855549161096, "grad_norm": 2.431435592280216, "learning_rate": 4.82668555491611e-06, "loss": 0.6315, "step": 10903 }, { "epoch": 0.048271282482624286, "grad_norm": 3.8180258016144837, "learning_rate": 4.8271282482624285e-06, "loss": 1.0107, "step": 10904 }, { "epoch": 0.04827570941608748, "grad_norm": 2.698132474741114, "learning_rate": 4.827570941608748e-06, "loss": 0.8054, "step": 10905 }, { "epoch": 0.04828013634955067, "grad_norm": 3.012084145195767, "learning_rate": 4.828013634955067e-06, "loss": 0.7294, "step": 10906 }, { "epoch": 0.04828456328301386, "grad_norm": 3.6150240547138837, "learning_rate": 4.828456328301386e-06, "loss": 0.9361, "step": 10907 }, { "epoch": 0.04828899021647705, "grad_norm": 2.9795392940041667, "learning_rate": 4.828899021647705e-06, "loss": 0.6826, "step": 10908 }, { "epoch": 0.04829341714994024, "grad_norm": 2.8120419370275735, "learning_rate": 4.8293417149940245e-06, "loss": 1.0513, "step": 10909 }, { "epoch": 0.04829784408340343, "grad_norm": 2.5504284036678975, "learning_rate": 4.829784408340343e-06, "loss": 0.757, "step": 10910 }, { "epoch": 0.04830227101686661, "grad_norm": 3.3390773272680816, "learning_rate": 4.830227101686662e-06, "loss": 0.8723, "step": 10911 }, { "epoch": 0.048306697950329804, "grad_norm": 3.0460243516813725, "learning_rate": 4.830669795032981e-06, "loss": 0.6578, "step": 10912 }, { "epoch": 0.048311124883792994, "grad_norm": 3.583245155101011, "learning_rate": 4.8311124883792994e-06, "loss": 0.9481, "step": 10913 }, { "epoch": 0.048315551817256185, "grad_norm": 2.858211326760492, "learning_rate": 4.831555181725619e-06, "loss": 0.7894, "step": 10914 }, { "epoch": 0.048319978750719375, "grad_norm": 2.6315246571272928, "learning_rate": 4.831997875071938e-06, "loss": 0.767, "step": 10915 }, { "epoch": 0.048324405684182566, "grad_norm": 2.922221688293995, "learning_rate": 4.832440568418257e-06, "loss": 0.8468, "step": 10916 }, { "epoch": 0.048328832617645756, "grad_norm": 2.310057643723757, "learning_rate": 4.832883261764576e-06, "loss": 0.639, "step": 10917 }, { "epoch": 0.04833325955110895, "grad_norm": 3.456306055288403, "learning_rate": 4.8333259551108954e-06, "loss": 1.0517, "step": 10918 }, { "epoch": 0.04833768648457214, "grad_norm": 3.064192913477443, "learning_rate": 4.833768648457214e-06, "loss": 0.8557, "step": 10919 }, { "epoch": 0.04834211341803533, "grad_norm": 3.559865654193175, "learning_rate": 4.834211341803533e-06, "loss": 1.0352, "step": 10920 }, { "epoch": 0.04834654035149852, "grad_norm": 3.148657238379398, "learning_rate": 4.834654035149852e-06, "loss": 0.9313, "step": 10921 }, { "epoch": 0.04835096728496171, "grad_norm": 2.908546110295818, "learning_rate": 4.835096728496171e-06, "loss": 1.1163, "step": 10922 }, { "epoch": 0.0483553942184249, "grad_norm": 2.2711178266529672, "learning_rate": 4.83553942184249e-06, "loss": 0.5927, "step": 10923 }, { "epoch": 0.04835982115188809, "grad_norm": 2.5831306468620387, "learning_rate": 4.835982115188809e-06, "loss": 0.7002, "step": 10924 }, { "epoch": 0.04836424808535128, "grad_norm": 2.6545664900492683, "learning_rate": 4.836424808535128e-06, "loss": 0.7643, "step": 10925 }, { "epoch": 0.04836867501881447, "grad_norm": 2.7007838617002893, "learning_rate": 4.836867501881447e-06, "loss": 0.7203, "step": 10926 }, { "epoch": 0.048373101952277654, "grad_norm": 2.7864343678203927, "learning_rate": 4.837310195227766e-06, "loss": 0.7412, "step": 10927 }, { "epoch": 0.048377528885740845, "grad_norm": 2.744547924806541, "learning_rate": 4.837752888574085e-06, "loss": 0.6316, "step": 10928 }, { "epoch": 0.048381955819204035, "grad_norm": 3.462979899787591, "learning_rate": 4.838195581920404e-06, "loss": 0.6782, "step": 10929 }, { "epoch": 0.048386382752667226, "grad_norm": 2.5091505414420663, "learning_rate": 4.838638275266724e-06, "loss": 0.5467, "step": 10930 }, { "epoch": 0.048390809686130416, "grad_norm": 2.951822204207016, "learning_rate": 4.839080968613042e-06, "loss": 0.6403, "step": 10931 }, { "epoch": 0.04839523661959361, "grad_norm": 2.819467515640695, "learning_rate": 4.839523661959361e-06, "loss": 0.919, "step": 10932 }, { "epoch": 0.0483996635530568, "grad_norm": 2.598314256159706, "learning_rate": 4.83996635530568e-06, "loss": 0.8976, "step": 10933 }, { "epoch": 0.04840409048651999, "grad_norm": 2.5233847421276225, "learning_rate": 4.840409048651999e-06, "loss": 0.6329, "step": 10934 }, { "epoch": 0.04840851741998318, "grad_norm": 2.5629207358890453, "learning_rate": 4.840851741998318e-06, "loss": 0.4541, "step": 10935 }, { "epoch": 0.04841294435344637, "grad_norm": 3.4273731384358106, "learning_rate": 4.841294435344637e-06, "loss": 1.3459, "step": 10936 }, { "epoch": 0.04841737128690956, "grad_norm": 2.8996116748906737, "learning_rate": 4.841737128690956e-06, "loss": 0.7702, "step": 10937 }, { "epoch": 0.04842179822037275, "grad_norm": 3.3175061837842623, "learning_rate": 4.842179822037275e-06, "loss": 1.1839, "step": 10938 }, { "epoch": 0.04842622515383594, "grad_norm": 2.199776575742537, "learning_rate": 4.842622515383595e-06, "loss": 0.5422, "step": 10939 }, { "epoch": 0.04843065208729913, "grad_norm": 3.996135774303295, "learning_rate": 4.843065208729913e-06, "loss": 1.2138, "step": 10940 }, { "epoch": 0.04843507902076232, "grad_norm": 2.470857997646495, "learning_rate": 4.8435079020762325e-06, "loss": 0.7389, "step": 10941 }, { "epoch": 0.048439505954225505, "grad_norm": 2.586115861642406, "learning_rate": 4.843950595422551e-06, "loss": 0.6343, "step": 10942 }, { "epoch": 0.048443932887688695, "grad_norm": 2.7571002949522647, "learning_rate": 4.8443932887688695e-06, "loss": 0.9074, "step": 10943 }, { "epoch": 0.048448359821151886, "grad_norm": 2.2832663330520115, "learning_rate": 4.844835982115189e-06, "loss": 0.5869, "step": 10944 }, { "epoch": 0.048452786754615076, "grad_norm": 3.2255531599004743, "learning_rate": 4.845278675461508e-06, "loss": 1.0204, "step": 10945 }, { "epoch": 0.04845721368807827, "grad_norm": 3.105634901087519, "learning_rate": 4.845721368807827e-06, "loss": 0.8754, "step": 10946 }, { "epoch": 0.04846164062154146, "grad_norm": 3.8609246769164267, "learning_rate": 4.846164062154146e-06, "loss": 1.146, "step": 10947 }, { "epoch": 0.04846606755500465, "grad_norm": 3.4354750241628973, "learning_rate": 4.8466067555004655e-06, "loss": 1.0465, "step": 10948 }, { "epoch": 0.04847049448846784, "grad_norm": 3.3989280581399752, "learning_rate": 4.847049448846784e-06, "loss": 0.6758, "step": 10949 }, { "epoch": 0.04847492142193103, "grad_norm": 2.828065411575756, "learning_rate": 4.8474921421931034e-06, "loss": 0.8254, "step": 10950 }, { "epoch": 0.04847934835539422, "grad_norm": 3.558424514110231, "learning_rate": 4.847934835539422e-06, "loss": 0.9042, "step": 10951 }, { "epoch": 0.04848377528885741, "grad_norm": 2.558162156174663, "learning_rate": 4.848377528885741e-06, "loss": 0.6897, "step": 10952 }, { "epoch": 0.0484882022223206, "grad_norm": 2.670048715068986, "learning_rate": 4.84882022223206e-06, "loss": 0.6739, "step": 10953 }, { "epoch": 0.04849262915578379, "grad_norm": 2.691403050453392, "learning_rate": 4.849262915578379e-06, "loss": 1.0123, "step": 10954 }, { "epoch": 0.04849705608924698, "grad_norm": 2.8832590993224803, "learning_rate": 4.849705608924698e-06, "loss": 0.9297, "step": 10955 }, { "epoch": 0.04850148302271017, "grad_norm": 2.750044895841248, "learning_rate": 4.850148302271017e-06, "loss": 0.9258, "step": 10956 }, { "epoch": 0.048505909956173356, "grad_norm": 3.1691907824840175, "learning_rate": 4.8505909956173365e-06, "loss": 1.3861, "step": 10957 }, { "epoch": 0.048510336889636546, "grad_norm": 2.575337358209772, "learning_rate": 4.851033688963655e-06, "loss": 0.622, "step": 10958 }, { "epoch": 0.04851476382309974, "grad_norm": 4.0225215783269315, "learning_rate": 4.851476382309974e-06, "loss": 1.2967, "step": 10959 }, { "epoch": 0.04851919075656293, "grad_norm": 2.7942046482815304, "learning_rate": 4.851919075656294e-06, "loss": 0.9691, "step": 10960 }, { "epoch": 0.04852361769002612, "grad_norm": 2.9083284392102087, "learning_rate": 4.852361769002612e-06, "loss": 0.8293, "step": 10961 }, { "epoch": 0.04852804462348931, "grad_norm": 2.1552603829797423, "learning_rate": 4.852804462348931e-06, "loss": 0.5742, "step": 10962 }, { "epoch": 0.0485324715569525, "grad_norm": 3.809556477363199, "learning_rate": 4.85324715569525e-06, "loss": 0.9328, "step": 10963 }, { "epoch": 0.04853689849041569, "grad_norm": 3.1166360740262244, "learning_rate": 4.853689849041569e-06, "loss": 0.8199, "step": 10964 }, { "epoch": 0.04854132542387888, "grad_norm": 2.6757544717244683, "learning_rate": 4.854132542387888e-06, "loss": 0.9633, "step": 10965 }, { "epoch": 0.04854575235734207, "grad_norm": 3.487409503968886, "learning_rate": 4.8545752357342074e-06, "loss": 1.0085, "step": 10966 }, { "epoch": 0.04855017929080526, "grad_norm": 3.1159607851888573, "learning_rate": 4.855017929080526e-06, "loss": 0.6743, "step": 10967 }, { "epoch": 0.04855460622426845, "grad_norm": 3.0596978264947707, "learning_rate": 4.855460622426845e-06, "loss": 1.0206, "step": 10968 }, { "epoch": 0.04855903315773164, "grad_norm": 2.055120750669005, "learning_rate": 4.855903315773165e-06, "loss": 0.4824, "step": 10969 }, { "epoch": 0.04856346009119483, "grad_norm": 4.1637877217389, "learning_rate": 4.856346009119483e-06, "loss": 0.9728, "step": 10970 }, { "epoch": 0.04856788702465802, "grad_norm": 2.4770123642653172, "learning_rate": 4.856788702465803e-06, "loss": 0.5472, "step": 10971 }, { "epoch": 0.048572313958121206, "grad_norm": 2.67232243045412, "learning_rate": 4.857231395812121e-06, "loss": 0.8795, "step": 10972 }, { "epoch": 0.0485767408915844, "grad_norm": 2.693253538664008, "learning_rate": 4.8576740891584405e-06, "loss": 0.6723, "step": 10973 }, { "epoch": 0.04858116782504759, "grad_norm": 2.670394056167099, "learning_rate": 4.858116782504759e-06, "loss": 0.8982, "step": 10974 }, { "epoch": 0.04858559475851078, "grad_norm": 3.016482866894097, "learning_rate": 4.858559475851078e-06, "loss": 0.8271, "step": 10975 }, { "epoch": 0.04859002169197397, "grad_norm": 3.0266756959064356, "learning_rate": 4.859002169197397e-06, "loss": 0.8665, "step": 10976 }, { "epoch": 0.04859444862543716, "grad_norm": 2.8545671683206773, "learning_rate": 4.859444862543716e-06, "loss": 0.6428, "step": 10977 }, { "epoch": 0.04859887555890035, "grad_norm": 2.8448874683963767, "learning_rate": 4.859887555890036e-06, "loss": 0.5535, "step": 10978 }, { "epoch": 0.04860330249236354, "grad_norm": 2.8441404069724405, "learning_rate": 4.860330249236354e-06, "loss": 0.7738, "step": 10979 }, { "epoch": 0.04860772942582673, "grad_norm": 3.1187726396057958, "learning_rate": 4.8607729425826735e-06, "loss": 0.981, "step": 10980 }, { "epoch": 0.04861215635928992, "grad_norm": 3.134304877100993, "learning_rate": 4.861215635928992e-06, "loss": 0.8859, "step": 10981 }, { "epoch": 0.04861658329275311, "grad_norm": 3.8706698463504647, "learning_rate": 4.8616583292753114e-06, "loss": 0.9569, "step": 10982 }, { "epoch": 0.0486210102262163, "grad_norm": 2.8309118716917103, "learning_rate": 4.86210102262163e-06, "loss": 0.8553, "step": 10983 }, { "epoch": 0.04862543715967949, "grad_norm": 2.765448487637958, "learning_rate": 4.862543715967949e-06, "loss": 0.7286, "step": 10984 }, { "epoch": 0.04862986409314268, "grad_norm": 2.4513285316086724, "learning_rate": 4.862986409314268e-06, "loss": 0.6002, "step": 10985 }, { "epoch": 0.04863429102660587, "grad_norm": 2.478885430240255, "learning_rate": 4.863429102660587e-06, "loss": 0.6522, "step": 10986 }, { "epoch": 0.04863871796006906, "grad_norm": 2.5102999760374045, "learning_rate": 4.863871796006907e-06, "loss": 0.5799, "step": 10987 }, { "epoch": 0.04864314489353225, "grad_norm": 3.147383585729555, "learning_rate": 4.864314489353225e-06, "loss": 1.1802, "step": 10988 }, { "epoch": 0.04864757182699544, "grad_norm": 2.680446376322298, "learning_rate": 4.8647571826995445e-06, "loss": 0.6334, "step": 10989 }, { "epoch": 0.04865199876045863, "grad_norm": 2.9351062797591805, "learning_rate": 4.865199876045864e-06, "loss": 0.7306, "step": 10990 }, { "epoch": 0.04865642569392182, "grad_norm": 3.1769374961346055, "learning_rate": 4.865642569392182e-06, "loss": 0.8316, "step": 10991 }, { "epoch": 0.04866085262738501, "grad_norm": 2.732872547933729, "learning_rate": 4.866085262738502e-06, "loss": 0.6948, "step": 10992 }, { "epoch": 0.0486652795608482, "grad_norm": 3.8621558230126993, "learning_rate": 4.86652795608482e-06, "loss": 0.9231, "step": 10993 }, { "epoch": 0.04866970649431139, "grad_norm": 4.27247011519808, "learning_rate": 4.866970649431139e-06, "loss": 1.251, "step": 10994 }, { "epoch": 0.04867413342777458, "grad_norm": 2.583810114563445, "learning_rate": 4.867413342777458e-06, "loss": 0.5009, "step": 10995 }, { "epoch": 0.04867856036123777, "grad_norm": 2.4545250080227548, "learning_rate": 4.8678560361237775e-06, "loss": 0.7689, "step": 10996 }, { "epoch": 0.04868298729470096, "grad_norm": 2.6104344838880307, "learning_rate": 4.868298729470096e-06, "loss": 0.6689, "step": 10997 }, { "epoch": 0.04868741422816415, "grad_norm": 2.7091626940852676, "learning_rate": 4.8687414228164154e-06, "loss": 0.4329, "step": 10998 }, { "epoch": 0.04869184116162734, "grad_norm": 2.948985768832743, "learning_rate": 4.869184116162735e-06, "loss": 0.7622, "step": 10999 }, { "epoch": 0.048696268095090534, "grad_norm": 2.996361153183582, "learning_rate": 4.869626809509053e-06, "loss": 0.8808, "step": 11000 }, { "epoch": 0.048700695028553724, "grad_norm": 2.764516012952326, "learning_rate": 4.870069502855373e-06, "loss": 0.6608, "step": 11001 }, { "epoch": 0.04870512196201691, "grad_norm": 2.7987287950826807, "learning_rate": 4.870512196201691e-06, "loss": 0.8372, "step": 11002 }, { "epoch": 0.0487095488954801, "grad_norm": 2.5939066689652286, "learning_rate": 4.870954889548011e-06, "loss": 0.7152, "step": 11003 }, { "epoch": 0.04871397582894329, "grad_norm": 2.2785261620791197, "learning_rate": 4.871397582894329e-06, "loss": 0.5918, "step": 11004 }, { "epoch": 0.04871840276240648, "grad_norm": 2.8484961062369756, "learning_rate": 4.8718402762406485e-06, "loss": 0.7559, "step": 11005 }, { "epoch": 0.04872282969586967, "grad_norm": 2.7374420928094008, "learning_rate": 4.872282969586968e-06, "loss": 0.8521, "step": 11006 }, { "epoch": 0.04872725662933286, "grad_norm": 3.0864901377519867, "learning_rate": 4.872725662933286e-06, "loss": 1.1392, "step": 11007 }, { "epoch": 0.04873168356279605, "grad_norm": 2.5547546122546034, "learning_rate": 4.873168356279606e-06, "loss": 0.7078, "step": 11008 }, { "epoch": 0.04873611049625924, "grad_norm": 3.277673114628005, "learning_rate": 4.873611049625925e-06, "loss": 0.7501, "step": 11009 }, { "epoch": 0.04874053742972243, "grad_norm": 3.93766591503926, "learning_rate": 4.874053742972244e-06, "loss": 0.9596, "step": 11010 }, { "epoch": 0.04874496436318562, "grad_norm": 2.9492658534644556, "learning_rate": 4.874496436318563e-06, "loss": 0.6118, "step": 11011 }, { "epoch": 0.04874939129664881, "grad_norm": 2.2725668789723463, "learning_rate": 4.8749391296648815e-06, "loss": 0.5551, "step": 11012 }, { "epoch": 0.048753818230112, "grad_norm": 2.564142193575067, "learning_rate": 4.8753818230112e-06, "loss": 0.8423, "step": 11013 }, { "epoch": 0.048758245163575194, "grad_norm": 2.5352924734122833, "learning_rate": 4.8758245163575194e-06, "loss": 0.8071, "step": 11014 }, { "epoch": 0.048762672097038384, "grad_norm": 3.242089687973812, "learning_rate": 4.876267209703839e-06, "loss": 0.9219, "step": 11015 }, { "epoch": 0.048767099030501575, "grad_norm": 3.3393642348132597, "learning_rate": 4.876709903050157e-06, "loss": 0.7281, "step": 11016 }, { "epoch": 0.04877152596396476, "grad_norm": 3.3818771179981204, "learning_rate": 4.877152596396477e-06, "loss": 0.9842, "step": 11017 }, { "epoch": 0.04877595289742795, "grad_norm": 2.358869961562946, "learning_rate": 4.877595289742796e-06, "loss": 0.7293, "step": 11018 }, { "epoch": 0.04878037983089114, "grad_norm": 2.2838245679454103, "learning_rate": 4.878037983089115e-06, "loss": 0.716, "step": 11019 }, { "epoch": 0.04878480676435433, "grad_norm": 2.827405478172065, "learning_rate": 4.878480676435434e-06, "loss": 0.6779, "step": 11020 }, { "epoch": 0.04878923369781752, "grad_norm": 2.9074546014877716, "learning_rate": 4.8789233697817525e-06, "loss": 0.7956, "step": 11021 }, { "epoch": 0.04879366063128071, "grad_norm": 2.938856696222919, "learning_rate": 4.879366063128072e-06, "loss": 0.6851, "step": 11022 }, { "epoch": 0.0487980875647439, "grad_norm": 2.928872359646289, "learning_rate": 4.87980875647439e-06, "loss": 0.6848, "step": 11023 }, { "epoch": 0.04880251449820709, "grad_norm": 2.6635189401537507, "learning_rate": 4.88025144982071e-06, "loss": 0.6677, "step": 11024 }, { "epoch": 0.04880694143167028, "grad_norm": 3.132714688168088, "learning_rate": 4.880694143167028e-06, "loss": 0.9259, "step": 11025 }, { "epoch": 0.04881136836513347, "grad_norm": 3.7485576049489526, "learning_rate": 4.881136836513348e-06, "loss": 1.2076, "step": 11026 }, { "epoch": 0.04881579529859666, "grad_norm": 4.270935850236914, "learning_rate": 4.881579529859667e-06, "loss": 1.1193, "step": 11027 }, { "epoch": 0.048820222232059854, "grad_norm": 2.510175389668229, "learning_rate": 4.8820222232059855e-06, "loss": 0.6291, "step": 11028 }, { "epoch": 0.048824649165523044, "grad_norm": 2.596420318283869, "learning_rate": 4.882464916552305e-06, "loss": 0.8203, "step": 11029 }, { "epoch": 0.048829076098986235, "grad_norm": 3.1110713060419055, "learning_rate": 4.882907609898624e-06, "loss": 0.7933, "step": 11030 }, { "epoch": 0.048833503032449425, "grad_norm": 2.7787504041739277, "learning_rate": 4.883350303244943e-06, "loss": 0.8508, "step": 11031 }, { "epoch": 0.04883792996591261, "grad_norm": 2.8964431225872627, "learning_rate": 4.883792996591261e-06, "loss": 0.7633, "step": 11032 }, { "epoch": 0.0488423568993758, "grad_norm": 2.8093968750700684, "learning_rate": 4.884235689937581e-06, "loss": 0.9698, "step": 11033 }, { "epoch": 0.04884678383283899, "grad_norm": 3.450015162395997, "learning_rate": 4.884678383283899e-06, "loss": 1.0376, "step": 11034 }, { "epoch": 0.04885121076630218, "grad_norm": 2.994558300802945, "learning_rate": 4.885121076630219e-06, "loss": 1.0391, "step": 11035 }, { "epoch": 0.04885563769976537, "grad_norm": 3.046118902072553, "learning_rate": 4.885563769976538e-06, "loss": 0.5535, "step": 11036 }, { "epoch": 0.04886006463322856, "grad_norm": 2.7634224418176783, "learning_rate": 4.8860064633228565e-06, "loss": 0.7563, "step": 11037 }, { "epoch": 0.04886449156669175, "grad_norm": 2.4786843696864453, "learning_rate": 4.886449156669176e-06, "loss": 0.6036, "step": 11038 }, { "epoch": 0.04886891850015494, "grad_norm": 2.759970113715877, "learning_rate": 4.886891850015495e-06, "loss": 0.6583, "step": 11039 }, { "epoch": 0.04887334543361813, "grad_norm": 3.0637746732925826, "learning_rate": 4.887334543361814e-06, "loss": 0.8618, "step": 11040 }, { "epoch": 0.048877772367081324, "grad_norm": 3.3271397058729892, "learning_rate": 4.887777236708133e-06, "loss": 0.9951, "step": 11041 }, { "epoch": 0.048882199300544514, "grad_norm": 3.5328680328896844, "learning_rate": 4.888219930054452e-06, "loss": 1.1397, "step": 11042 }, { "epoch": 0.048886626234007705, "grad_norm": 3.426536206686326, "learning_rate": 4.88866262340077e-06, "loss": 0.5273, "step": 11043 }, { "epoch": 0.048891053167470895, "grad_norm": 2.626424417908399, "learning_rate": 4.8891053167470896e-06, "loss": 0.4055, "step": 11044 }, { "epoch": 0.048895480100934086, "grad_norm": 2.3728268866751945, "learning_rate": 4.889548010093409e-06, "loss": 0.6496, "step": 11045 }, { "epoch": 0.048899907034397276, "grad_norm": 2.4070516406340827, "learning_rate": 4.8899907034397274e-06, "loss": 0.6224, "step": 11046 }, { "epoch": 0.04890433396786046, "grad_norm": 3.5519101438597853, "learning_rate": 4.890433396786047e-06, "loss": 1.0868, "step": 11047 }, { "epoch": 0.04890876090132365, "grad_norm": 3.070214523648362, "learning_rate": 4.890876090132366e-06, "loss": 0.8784, "step": 11048 }, { "epoch": 0.04891318783478684, "grad_norm": 2.526099827759701, "learning_rate": 4.891318783478685e-06, "loss": 0.6926, "step": 11049 }, { "epoch": 0.04891761476825003, "grad_norm": 2.3363672407016667, "learning_rate": 4.891761476825004e-06, "loss": 0.5533, "step": 11050 }, { "epoch": 0.04892204170171322, "grad_norm": 2.46583016428477, "learning_rate": 4.892204170171323e-06, "loss": 0.9034, "step": 11051 }, { "epoch": 0.04892646863517641, "grad_norm": 2.6574837673389995, "learning_rate": 4.892646863517642e-06, "loss": 0.626, "step": 11052 }, { "epoch": 0.0489308955686396, "grad_norm": 2.531439407632224, "learning_rate": 4.8930895568639605e-06, "loss": 0.6066, "step": 11053 }, { "epoch": 0.04893532250210279, "grad_norm": 2.48198587547995, "learning_rate": 4.89353225021028e-06, "loss": 0.7074, "step": 11054 }, { "epoch": 0.048939749435565984, "grad_norm": 3.8302134743767073, "learning_rate": 4.893974943556598e-06, "loss": 1.225, "step": 11055 }, { "epoch": 0.048944176369029174, "grad_norm": 3.4982412448990328, "learning_rate": 4.894417636902918e-06, "loss": 1.0922, "step": 11056 }, { "epoch": 0.048948603302492365, "grad_norm": 3.2594089655836953, "learning_rate": 4.894860330249237e-06, "loss": 0.9397, "step": 11057 }, { "epoch": 0.048953030235955555, "grad_norm": 3.1266427700563417, "learning_rate": 4.895303023595556e-06, "loss": 0.7088, "step": 11058 }, { "epoch": 0.048957457169418746, "grad_norm": 2.8399758492726526, "learning_rate": 4.895745716941875e-06, "loss": 0.7102, "step": 11059 }, { "epoch": 0.048961884102881936, "grad_norm": 2.4649699842237203, "learning_rate": 4.896188410288194e-06, "loss": 0.6423, "step": 11060 }, { "epoch": 0.04896631103634513, "grad_norm": 3.03338967084373, "learning_rate": 4.896631103634513e-06, "loss": 1.0179, "step": 11061 }, { "epoch": 0.04897073796980831, "grad_norm": 2.739769934286622, "learning_rate": 4.8970737969808314e-06, "loss": 0.6058, "step": 11062 }, { "epoch": 0.0489751649032715, "grad_norm": 2.798415257021175, "learning_rate": 4.897516490327151e-06, "loss": 1.0595, "step": 11063 }, { "epoch": 0.04897959183673469, "grad_norm": 2.7928663881019014, "learning_rate": 4.897959183673469e-06, "loss": 0.9152, "step": 11064 }, { "epoch": 0.04898401877019788, "grad_norm": 2.4992464214787704, "learning_rate": 4.898401877019789e-06, "loss": 0.8637, "step": 11065 }, { "epoch": 0.04898844570366107, "grad_norm": 2.6454879662155757, "learning_rate": 4.898844570366108e-06, "loss": 0.6535, "step": 11066 }, { "epoch": 0.04899287263712426, "grad_norm": 2.8448208810308024, "learning_rate": 4.899287263712427e-06, "loss": 0.9004, "step": 11067 }, { "epoch": 0.04899729957058745, "grad_norm": 2.431015913670879, "learning_rate": 4.899729957058746e-06, "loss": 0.6685, "step": 11068 }, { "epoch": 0.049001726504050644, "grad_norm": 2.8965272186519755, "learning_rate": 4.900172650405065e-06, "loss": 0.5202, "step": 11069 }, { "epoch": 0.049006153437513834, "grad_norm": 3.1790976360671284, "learning_rate": 4.900615343751384e-06, "loss": 1.0384, "step": 11070 }, { "epoch": 0.049010580370977025, "grad_norm": 3.517169351516753, "learning_rate": 4.901058037097703e-06, "loss": 0.7694, "step": 11071 }, { "epoch": 0.049015007304440215, "grad_norm": 3.4149032118502043, "learning_rate": 4.901500730444022e-06, "loss": 1.291, "step": 11072 }, { "epoch": 0.049019434237903406, "grad_norm": 2.819922163638415, "learning_rate": 4.901943423790341e-06, "loss": 0.6133, "step": 11073 }, { "epoch": 0.049023861171366596, "grad_norm": 2.884092654192528, "learning_rate": 4.90238611713666e-06, "loss": 0.9107, "step": 11074 }, { "epoch": 0.04902828810482979, "grad_norm": 3.655365535528981, "learning_rate": 4.902828810482979e-06, "loss": 1.2505, "step": 11075 }, { "epoch": 0.04903271503829298, "grad_norm": 2.880146166200775, "learning_rate": 4.9032715038292976e-06, "loss": 0.9101, "step": 11076 }, { "epoch": 0.04903714197175617, "grad_norm": 3.3391955200143695, "learning_rate": 4.903714197175617e-06, "loss": 0.9453, "step": 11077 }, { "epoch": 0.04904156890521935, "grad_norm": 3.0395191468173106, "learning_rate": 4.904156890521936e-06, "loss": 1.1556, "step": 11078 }, { "epoch": 0.04904599583868254, "grad_norm": 2.840424666755446, "learning_rate": 4.904599583868255e-06, "loss": 0.7498, "step": 11079 }, { "epoch": 0.04905042277214573, "grad_norm": 2.734193386775348, "learning_rate": 4.905042277214574e-06, "loss": 0.6851, "step": 11080 }, { "epoch": 0.04905484970560892, "grad_norm": 3.029891840418872, "learning_rate": 4.905484970560893e-06, "loss": 0.8811, "step": 11081 }, { "epoch": 0.049059276639072114, "grad_norm": 2.685385272366579, "learning_rate": 4.905927663907212e-06, "loss": 0.7276, "step": 11082 }, { "epoch": 0.049063703572535304, "grad_norm": 2.6197897220243336, "learning_rate": 4.906370357253531e-06, "loss": 0.5846, "step": 11083 }, { "epoch": 0.049068130505998495, "grad_norm": 2.6433949467949733, "learning_rate": 4.90681305059985e-06, "loss": 0.7985, "step": 11084 }, { "epoch": 0.049072557439461685, "grad_norm": 3.2478527167476607, "learning_rate": 4.9072557439461685e-06, "loss": 0.9037, "step": 11085 }, { "epoch": 0.049076984372924876, "grad_norm": 2.544324175451458, "learning_rate": 4.907698437292488e-06, "loss": 0.9069, "step": 11086 }, { "epoch": 0.049081411306388066, "grad_norm": 3.0151991386810733, "learning_rate": 4.908141130638807e-06, "loss": 0.9138, "step": 11087 }, { "epoch": 0.04908583823985126, "grad_norm": 2.4790059403168114, "learning_rate": 4.908583823985126e-06, "loss": 0.7086, "step": 11088 }, { "epoch": 0.04909026517331445, "grad_norm": 3.0788454544559483, "learning_rate": 4.909026517331445e-06, "loss": 1.0051, "step": 11089 }, { "epoch": 0.04909469210677764, "grad_norm": 2.2853110210015153, "learning_rate": 4.9094692106777645e-06, "loss": 0.5393, "step": 11090 }, { "epoch": 0.04909911904024083, "grad_norm": 2.3676230583042392, "learning_rate": 4.909911904024083e-06, "loss": 0.5223, "step": 11091 }, { "epoch": 0.04910354597370402, "grad_norm": 3.569470448191669, "learning_rate": 4.910354597370402e-06, "loss": 0.9509, "step": 11092 }, { "epoch": 0.0491079729071672, "grad_norm": 3.043393995323511, "learning_rate": 4.910797290716721e-06, "loss": 0.7812, "step": 11093 }, { "epoch": 0.04911239984063039, "grad_norm": 3.477979754810281, "learning_rate": 4.9112399840630394e-06, "loss": 0.986, "step": 11094 }, { "epoch": 0.04911682677409358, "grad_norm": 2.5955450702912604, "learning_rate": 4.911682677409359e-06, "loss": 0.7422, "step": 11095 }, { "epoch": 0.049121253707556774, "grad_norm": 2.796923831255576, "learning_rate": 4.912125370755678e-06, "loss": 0.6316, "step": 11096 }, { "epoch": 0.049125680641019964, "grad_norm": 3.716090113266114, "learning_rate": 4.912568064101997e-06, "loss": 1.1137, "step": 11097 }, { "epoch": 0.049130107574483155, "grad_norm": 3.447076031635451, "learning_rate": 4.913010757448316e-06, "loss": 1.0408, "step": 11098 }, { "epoch": 0.049134534507946345, "grad_norm": 2.381819478759862, "learning_rate": 4.9134534507946355e-06, "loss": 0.6744, "step": 11099 }, { "epoch": 0.049138961441409536, "grad_norm": 2.7610142604612906, "learning_rate": 4.913896144140954e-06, "loss": 0.7943, "step": 11100 }, { "epoch": 0.049143388374872726, "grad_norm": 2.7491568897980594, "learning_rate": 4.914338837487273e-06, "loss": 0.6118, "step": 11101 }, { "epoch": 0.04914781530833592, "grad_norm": 2.70313926900022, "learning_rate": 4.914781530833592e-06, "loss": 0.5355, "step": 11102 }, { "epoch": 0.04915224224179911, "grad_norm": 2.311280221176253, "learning_rate": 4.915224224179911e-06, "loss": 0.6596, "step": 11103 }, { "epoch": 0.0491566691752623, "grad_norm": 3.010229444733013, "learning_rate": 4.91566691752623e-06, "loss": 0.7645, "step": 11104 }, { "epoch": 0.04916109610872549, "grad_norm": 2.409212224079061, "learning_rate": 4.916109610872549e-06, "loss": 0.8446, "step": 11105 }, { "epoch": 0.04916552304218868, "grad_norm": 3.9941953801184478, "learning_rate": 4.916552304218868e-06, "loss": 1.1049, "step": 11106 }, { "epoch": 0.04916994997565187, "grad_norm": 3.280156294864459, "learning_rate": 4.916994997565187e-06, "loss": 0.7915, "step": 11107 }, { "epoch": 0.04917437690911505, "grad_norm": 2.5479190441099284, "learning_rate": 4.917437690911506e-06, "loss": 0.6229, "step": 11108 }, { "epoch": 0.04917880384257824, "grad_norm": 2.592629084852161, "learning_rate": 4.917880384257825e-06, "loss": 0.5262, "step": 11109 }, { "epoch": 0.049183230776041434, "grad_norm": 2.4380151203764697, "learning_rate": 4.918323077604144e-06, "loss": 0.8412, "step": 11110 }, { "epoch": 0.049187657709504624, "grad_norm": 3.1167655204814997, "learning_rate": 4.918765770950464e-06, "loss": 1.1189, "step": 11111 }, { "epoch": 0.049192084642967815, "grad_norm": 2.6830256873907157, "learning_rate": 4.919208464296782e-06, "loss": 0.765, "step": 11112 }, { "epoch": 0.049196511576431005, "grad_norm": 2.34073768262829, "learning_rate": 4.919651157643101e-06, "loss": 0.4654, "step": 11113 }, { "epoch": 0.049200938509894196, "grad_norm": 3.3660579186745414, "learning_rate": 4.92009385098942e-06, "loss": 0.9737, "step": 11114 }, { "epoch": 0.049205365443357386, "grad_norm": 2.871733364424213, "learning_rate": 4.920536544335739e-06, "loss": 0.9354, "step": 11115 }, { "epoch": 0.04920979237682058, "grad_norm": 3.883435838154957, "learning_rate": 4.920979237682058e-06, "loss": 1.0151, "step": 11116 }, { "epoch": 0.04921421931028377, "grad_norm": 3.2964489151530016, "learning_rate": 4.921421931028377e-06, "loss": 1.0405, "step": 11117 }, { "epoch": 0.04921864624374696, "grad_norm": 4.063117136818909, "learning_rate": 4.921864624374696e-06, "loss": 1.0231, "step": 11118 }, { "epoch": 0.04922307317721015, "grad_norm": 3.246680707680576, "learning_rate": 4.922307317721015e-06, "loss": 0.863, "step": 11119 }, { "epoch": 0.04922750011067334, "grad_norm": 3.60785450469047, "learning_rate": 4.922750011067335e-06, "loss": 1.0399, "step": 11120 }, { "epoch": 0.04923192704413653, "grad_norm": 2.8701616100466407, "learning_rate": 4.923192704413653e-06, "loss": 0.922, "step": 11121 }, { "epoch": 0.04923635397759972, "grad_norm": 2.6107637566811297, "learning_rate": 4.9236353977599725e-06, "loss": 0.5593, "step": 11122 }, { "epoch": 0.049240780911062904, "grad_norm": 2.6686591958781114, "learning_rate": 4.924078091106291e-06, "loss": 0.7863, "step": 11123 }, { "epoch": 0.049245207844526094, "grad_norm": 3.480617278959051, "learning_rate": 4.9245207844526096e-06, "loss": 0.9962, "step": 11124 }, { "epoch": 0.049249634777989285, "grad_norm": 2.3812146222684762, "learning_rate": 4.924963477798929e-06, "loss": 0.5455, "step": 11125 }, { "epoch": 0.049254061711452475, "grad_norm": 2.640401004762526, "learning_rate": 4.925406171145248e-06, "loss": 0.8083, "step": 11126 }, { "epoch": 0.049258488644915666, "grad_norm": 2.9041415040425216, "learning_rate": 4.925848864491567e-06, "loss": 0.7523, "step": 11127 }, { "epoch": 0.049262915578378856, "grad_norm": 2.4752600310435735, "learning_rate": 4.926291557837886e-06, "loss": 0.6346, "step": 11128 }, { "epoch": 0.04926734251184205, "grad_norm": 3.070367248672931, "learning_rate": 4.9267342511842056e-06, "loss": 0.863, "step": 11129 }, { "epoch": 0.04927176944530524, "grad_norm": 3.063346337359873, "learning_rate": 4.927176944530524e-06, "loss": 0.5959, "step": 11130 }, { "epoch": 0.04927619637876843, "grad_norm": 2.6213480873778945, "learning_rate": 4.9276196378768435e-06, "loss": 0.8169, "step": 11131 }, { "epoch": 0.04928062331223162, "grad_norm": 2.796988138480193, "learning_rate": 4.928062331223162e-06, "loss": 0.9919, "step": 11132 }, { "epoch": 0.04928505024569481, "grad_norm": 2.850483828020036, "learning_rate": 4.928505024569481e-06, "loss": 0.7932, "step": 11133 }, { "epoch": 0.049289477179158, "grad_norm": 2.7783260547188426, "learning_rate": 4.9289477179158e-06, "loss": 0.5886, "step": 11134 }, { "epoch": 0.04929390411262119, "grad_norm": 3.2539459230057948, "learning_rate": 4.929390411262119e-06, "loss": 0.8757, "step": 11135 }, { "epoch": 0.04929833104608438, "grad_norm": 2.483343246693433, "learning_rate": 4.929833104608438e-06, "loss": 0.7113, "step": 11136 }, { "epoch": 0.04930275797954757, "grad_norm": 2.8714890171582295, "learning_rate": 4.930275797954757e-06, "loss": 0.8027, "step": 11137 }, { "epoch": 0.049307184913010754, "grad_norm": 2.413791393622361, "learning_rate": 4.9307184913010765e-06, "loss": 0.6822, "step": 11138 }, { "epoch": 0.049311611846473945, "grad_norm": 2.365559368689667, "learning_rate": 4.931161184647395e-06, "loss": 0.439, "step": 11139 }, { "epoch": 0.049316038779937135, "grad_norm": 2.437892762490036, "learning_rate": 4.931603877993714e-06, "loss": 0.6728, "step": 11140 }, { "epoch": 0.049320465713400326, "grad_norm": 2.665149636184111, "learning_rate": 4.932046571340034e-06, "loss": 0.7415, "step": 11141 }, { "epoch": 0.049324892646863516, "grad_norm": 2.594369316286098, "learning_rate": 4.932489264686352e-06, "loss": 0.763, "step": 11142 }, { "epoch": 0.04932931958032671, "grad_norm": 4.096131647570814, "learning_rate": 4.932931958032671e-06, "loss": 1.0561, "step": 11143 }, { "epoch": 0.0493337465137899, "grad_norm": 2.4172073853985836, "learning_rate": 4.93337465137899e-06, "loss": 0.6992, "step": 11144 }, { "epoch": 0.04933817344725309, "grad_norm": 2.6899818608023605, "learning_rate": 4.933817344725309e-06, "loss": 0.6824, "step": 11145 }, { "epoch": 0.04934260038071628, "grad_norm": 3.4844370331830836, "learning_rate": 4.934260038071628e-06, "loss": 1.2679, "step": 11146 }, { "epoch": 0.04934702731417947, "grad_norm": 3.039064917548516, "learning_rate": 4.9347027314179475e-06, "loss": 1.0062, "step": 11147 }, { "epoch": 0.04935145424764266, "grad_norm": 2.3533962878742285, "learning_rate": 4.935145424764266e-06, "loss": 0.5771, "step": 11148 }, { "epoch": 0.04935588118110585, "grad_norm": 2.54605951970229, "learning_rate": 4.935588118110585e-06, "loss": 0.5205, "step": 11149 }, { "epoch": 0.04936030811456904, "grad_norm": 3.3799033067494046, "learning_rate": 4.936030811456905e-06, "loss": 0.5229, "step": 11150 }, { "epoch": 0.04936473504803223, "grad_norm": 2.6280744265385656, "learning_rate": 4.936473504803223e-06, "loss": 0.7827, "step": 11151 }, { "epoch": 0.04936916198149542, "grad_norm": 2.9629035963852317, "learning_rate": 4.936916198149543e-06, "loss": 0.9327, "step": 11152 }, { "epoch": 0.049373588914958605, "grad_norm": 2.9283442406201705, "learning_rate": 4.937358891495861e-06, "loss": 0.8472, "step": 11153 }, { "epoch": 0.049378015848421795, "grad_norm": 2.838811916218129, "learning_rate": 4.9378015848421805e-06, "loss": 0.5365, "step": 11154 }, { "epoch": 0.049382442781884986, "grad_norm": 2.834308424664868, "learning_rate": 4.938244278188499e-06, "loss": 0.7569, "step": 11155 }, { "epoch": 0.049386869715348176, "grad_norm": 2.7131648497134706, "learning_rate": 4.938686971534818e-06, "loss": 0.8823, "step": 11156 }, { "epoch": 0.04939129664881137, "grad_norm": 2.5967192528267886, "learning_rate": 4.939129664881137e-06, "loss": 0.5748, "step": 11157 }, { "epoch": 0.04939572358227456, "grad_norm": 3.3020747309070546, "learning_rate": 4.939572358227456e-06, "loss": 0.9735, "step": 11158 }, { "epoch": 0.04940015051573775, "grad_norm": 3.3183360530029886, "learning_rate": 4.940015051573776e-06, "loss": 0.7412, "step": 11159 }, { "epoch": 0.04940457744920094, "grad_norm": 3.162986793331181, "learning_rate": 4.940457744920094e-06, "loss": 0.7023, "step": 11160 }, { "epoch": 0.04940900438266413, "grad_norm": 2.2683484903678512, "learning_rate": 4.9409004382664136e-06, "loss": 0.5051, "step": 11161 }, { "epoch": 0.04941343131612732, "grad_norm": 2.938527167267245, "learning_rate": 4.941343131612732e-06, "loss": 0.6933, "step": 11162 }, { "epoch": 0.04941785824959051, "grad_norm": 2.9032797912319173, "learning_rate": 4.9417858249590515e-06, "loss": 0.9699, "step": 11163 }, { "epoch": 0.0494222851830537, "grad_norm": 2.7882490448712773, "learning_rate": 4.94222851830537e-06, "loss": 0.7812, "step": 11164 }, { "epoch": 0.04942671211651689, "grad_norm": 2.151374230003869, "learning_rate": 4.942671211651689e-06, "loss": 0.5739, "step": 11165 }, { "epoch": 0.04943113904998008, "grad_norm": 2.632790774496727, "learning_rate": 4.943113904998008e-06, "loss": 0.5897, "step": 11166 }, { "epoch": 0.04943556598344327, "grad_norm": 2.8372515559095155, "learning_rate": 4.943556598344327e-06, "loss": 0.8898, "step": 11167 }, { "epoch": 0.049439992916906456, "grad_norm": 3.3308836345510926, "learning_rate": 4.943999291690647e-06, "loss": 0.944, "step": 11168 }, { "epoch": 0.049444419850369646, "grad_norm": 3.010117494190742, "learning_rate": 4.944441985036965e-06, "loss": 0.8995, "step": 11169 }, { "epoch": 0.04944884678383284, "grad_norm": 2.588656521216461, "learning_rate": 4.9448846783832845e-06, "loss": 0.846, "step": 11170 }, { "epoch": 0.04945327371729603, "grad_norm": 2.739445027019113, "learning_rate": 4.945327371729604e-06, "loss": 0.6224, "step": 11171 }, { "epoch": 0.04945770065075922, "grad_norm": 3.0572235832793826, "learning_rate": 4.945770065075922e-06, "loss": 0.7003, "step": 11172 }, { "epoch": 0.04946212758422241, "grad_norm": 2.7564717995438137, "learning_rate": 4.946212758422242e-06, "loss": 0.9428, "step": 11173 }, { "epoch": 0.0494665545176856, "grad_norm": 2.5638150736176217, "learning_rate": 4.94665545176856e-06, "loss": 0.7702, "step": 11174 }, { "epoch": 0.04947098145114879, "grad_norm": 2.9144662902774714, "learning_rate": 4.947098145114879e-06, "loss": 0.7762, "step": 11175 }, { "epoch": 0.04947540838461198, "grad_norm": 2.2605687369734673, "learning_rate": 4.947540838461198e-06, "loss": 0.474, "step": 11176 }, { "epoch": 0.04947983531807517, "grad_norm": 3.2520772812337384, "learning_rate": 4.9479835318075176e-06, "loss": 0.8587, "step": 11177 }, { "epoch": 0.04948426225153836, "grad_norm": 2.4217376439210305, "learning_rate": 4.948426225153836e-06, "loss": 0.6362, "step": 11178 }, { "epoch": 0.04948868918500155, "grad_norm": 2.3933206006840284, "learning_rate": 4.9488689185001555e-06, "loss": 0.5823, "step": 11179 }, { "epoch": 0.04949311611846474, "grad_norm": 3.083850959802848, "learning_rate": 4.949311611846475e-06, "loss": 0.8259, "step": 11180 }, { "epoch": 0.04949754305192793, "grad_norm": 2.830164075018993, "learning_rate": 4.949754305192793e-06, "loss": 0.5356, "step": 11181 }, { "epoch": 0.04950196998539112, "grad_norm": 3.1342715467916067, "learning_rate": 4.950196998539113e-06, "loss": 0.8774, "step": 11182 }, { "epoch": 0.049506396918854306, "grad_norm": 2.740452201576925, "learning_rate": 4.950639691885431e-06, "loss": 0.9564, "step": 11183 }, { "epoch": 0.0495108238523175, "grad_norm": 3.105722868254574, "learning_rate": 4.951082385231751e-06, "loss": 0.7965, "step": 11184 }, { "epoch": 0.04951525078578069, "grad_norm": 2.521569558942251, "learning_rate": 4.951525078578069e-06, "loss": 0.5813, "step": 11185 }, { "epoch": 0.04951967771924388, "grad_norm": 2.2759121913451184, "learning_rate": 4.9519677719243885e-06, "loss": 0.6868, "step": 11186 }, { "epoch": 0.04952410465270707, "grad_norm": 2.6301950742316436, "learning_rate": 4.952410465270707e-06, "loss": 0.4846, "step": 11187 }, { "epoch": 0.04952853158617026, "grad_norm": 2.999666916712967, "learning_rate": 4.952853158617026e-06, "loss": 0.7929, "step": 11188 }, { "epoch": 0.04953295851963345, "grad_norm": 2.9679165512044, "learning_rate": 4.953295851963346e-06, "loss": 0.6737, "step": 11189 }, { "epoch": 0.04953738545309664, "grad_norm": 2.563524092517303, "learning_rate": 4.953738545309664e-06, "loss": 0.8453, "step": 11190 }, { "epoch": 0.04954181238655983, "grad_norm": 3.367461982707745, "learning_rate": 4.954181238655984e-06, "loss": 0.618, "step": 11191 }, { "epoch": 0.04954623932002302, "grad_norm": 2.8877110631264427, "learning_rate": 4.954623932002303e-06, "loss": 0.8495, "step": 11192 }, { "epoch": 0.04955066625348621, "grad_norm": 3.8911461064627804, "learning_rate": 4.9550666253486216e-06, "loss": 0.4992, "step": 11193 }, { "epoch": 0.0495550931869494, "grad_norm": 2.6253628213839395, "learning_rate": 4.95550931869494e-06, "loss": 0.6968, "step": 11194 }, { "epoch": 0.04955952012041259, "grad_norm": 2.552689414736015, "learning_rate": 4.9559520120412595e-06, "loss": 0.6251, "step": 11195 }, { "epoch": 0.04956394705387578, "grad_norm": 2.7019276359056263, "learning_rate": 4.956394705387578e-06, "loss": 0.5253, "step": 11196 }, { "epoch": 0.04956837398733897, "grad_norm": 2.945025432366945, "learning_rate": 4.956837398733897e-06, "loss": 0.3343, "step": 11197 }, { "epoch": 0.04957280092080216, "grad_norm": 3.4413242096185854, "learning_rate": 4.957280092080217e-06, "loss": 0.7517, "step": 11198 }, { "epoch": 0.04957722785426535, "grad_norm": 3.805873214399527, "learning_rate": 4.957722785426535e-06, "loss": 1.0244, "step": 11199 }, { "epoch": 0.04958165478772854, "grad_norm": 2.986281784957187, "learning_rate": 4.958165478772855e-06, "loss": 0.5794, "step": 11200 }, { "epoch": 0.04958608172119173, "grad_norm": 3.0338192904765506, "learning_rate": 4.958608172119174e-06, "loss": 0.664, "step": 11201 }, { "epoch": 0.04959050865465492, "grad_norm": 2.868939035326061, "learning_rate": 4.9590508654654925e-06, "loss": 0.7113, "step": 11202 }, { "epoch": 0.04959493558811811, "grad_norm": 3.1951929726205037, "learning_rate": 4.959493558811812e-06, "loss": 0.737, "step": 11203 }, { "epoch": 0.0495993625215813, "grad_norm": 2.7716998942363507, "learning_rate": 4.95993625215813e-06, "loss": 0.4717, "step": 11204 }, { "epoch": 0.04960378945504449, "grad_norm": 2.676916420656235, "learning_rate": 4.960378945504449e-06, "loss": 0.5739, "step": 11205 }, { "epoch": 0.04960821638850768, "grad_norm": 2.857422171422857, "learning_rate": 4.960821638850768e-06, "loss": 0.5676, "step": 11206 }, { "epoch": 0.04961264332197087, "grad_norm": 2.7864191042697724, "learning_rate": 4.961264332197088e-06, "loss": 0.4042, "step": 11207 }, { "epoch": 0.04961707025543406, "grad_norm": 2.478028714159487, "learning_rate": 4.961707025543406e-06, "loss": 0.4595, "step": 11208 }, { "epoch": 0.04962149718889725, "grad_norm": 3.293375516435335, "learning_rate": 4.9621497188897256e-06, "loss": 1.0097, "step": 11209 }, { "epoch": 0.04962592412236044, "grad_norm": 3.471080830876587, "learning_rate": 4.962592412236045e-06, "loss": 1.0575, "step": 11210 }, { "epoch": 0.049630351055823634, "grad_norm": 2.9595587822846583, "learning_rate": 4.9630351055823635e-06, "loss": 0.9121, "step": 11211 }, { "epoch": 0.049634777989286824, "grad_norm": 3.5624275110516854, "learning_rate": 4.963477798928683e-06, "loss": 0.9928, "step": 11212 }, { "epoch": 0.04963920492275001, "grad_norm": 2.5318294422506686, "learning_rate": 4.963920492275001e-06, "loss": 0.6602, "step": 11213 }, { "epoch": 0.0496436318562132, "grad_norm": 3.7582689070861446, "learning_rate": 4.964363185621321e-06, "loss": 0.8401, "step": 11214 }, { "epoch": 0.04964805878967639, "grad_norm": 3.460323490500186, "learning_rate": 4.964805878967639e-06, "loss": 1.0961, "step": 11215 }, { "epoch": 0.04965248572313958, "grad_norm": 2.739968207496882, "learning_rate": 4.965248572313959e-06, "loss": 0.7013, "step": 11216 }, { "epoch": 0.04965691265660277, "grad_norm": 3.256478357403827, "learning_rate": 4.965691265660277e-06, "loss": 0.9164, "step": 11217 }, { "epoch": 0.04966133959006596, "grad_norm": 4.134857935957547, "learning_rate": 4.9661339590065965e-06, "loss": 1.3858, "step": 11218 }, { "epoch": 0.04966576652352915, "grad_norm": 3.0247296402821466, "learning_rate": 4.966576652352916e-06, "loss": 1.1122, "step": 11219 }, { "epoch": 0.04967019345699234, "grad_norm": 2.653867831288881, "learning_rate": 4.967019345699234e-06, "loss": 0.7283, "step": 11220 }, { "epoch": 0.04967462039045553, "grad_norm": 3.255434895177829, "learning_rate": 4.967462039045554e-06, "loss": 1.1609, "step": 11221 }, { "epoch": 0.04967904732391872, "grad_norm": 3.0248684886463275, "learning_rate": 4.967904732391873e-06, "loss": 1.0466, "step": 11222 }, { "epoch": 0.04968347425738191, "grad_norm": 2.3552085848328326, "learning_rate": 4.968347425738192e-06, "loss": 0.5316, "step": 11223 }, { "epoch": 0.0496879011908451, "grad_norm": 3.2551309818663454, "learning_rate": 4.96879011908451e-06, "loss": 0.738, "step": 11224 }, { "epoch": 0.049692328124308294, "grad_norm": 3.151884188891624, "learning_rate": 4.9692328124308296e-06, "loss": 0.8343, "step": 11225 }, { "epoch": 0.049696755057771484, "grad_norm": 2.5424355729091324, "learning_rate": 4.969675505777148e-06, "loss": 0.6983, "step": 11226 }, { "epoch": 0.049701181991234675, "grad_norm": 2.6293743837990227, "learning_rate": 4.9701181991234675e-06, "loss": 0.8789, "step": 11227 }, { "epoch": 0.049705608924697865, "grad_norm": 2.861609865494645, "learning_rate": 4.970560892469787e-06, "loss": 0.7226, "step": 11228 }, { "epoch": 0.04971003585816105, "grad_norm": 3.097842593858523, "learning_rate": 4.971003585816105e-06, "loss": 0.6959, "step": 11229 }, { "epoch": 0.04971446279162424, "grad_norm": 3.3110835319385723, "learning_rate": 4.971446279162425e-06, "loss": 0.8035, "step": 11230 }, { "epoch": 0.04971888972508743, "grad_norm": 3.0101161919018007, "learning_rate": 4.971888972508744e-06, "loss": 0.872, "step": 11231 }, { "epoch": 0.04972331665855062, "grad_norm": 3.1708604386813173, "learning_rate": 4.972331665855063e-06, "loss": 0.9283, "step": 11232 }, { "epoch": 0.04972774359201381, "grad_norm": 3.0088524016037046, "learning_rate": 4.972774359201382e-06, "loss": 0.7143, "step": 11233 }, { "epoch": 0.049732170525477, "grad_norm": 3.70871249815585, "learning_rate": 4.9732170525477005e-06, "loss": 1.0037, "step": 11234 }, { "epoch": 0.04973659745894019, "grad_norm": 3.1139138129006665, "learning_rate": 4.973659745894019e-06, "loss": 1.0978, "step": 11235 }, { "epoch": 0.04974102439240338, "grad_norm": 3.1157159761556623, "learning_rate": 4.974102439240338e-06, "loss": 0.9992, "step": 11236 }, { "epoch": 0.04974545132586657, "grad_norm": 3.3861743734400482, "learning_rate": 4.974545132586658e-06, "loss": 1.0314, "step": 11237 }, { "epoch": 0.04974987825932976, "grad_norm": 3.3835678787690187, "learning_rate": 4.974987825932976e-06, "loss": 0.8697, "step": 11238 }, { "epoch": 0.049754305192792954, "grad_norm": 3.1538952057789604, "learning_rate": 4.975430519279296e-06, "loss": 1.0522, "step": 11239 }, { "epoch": 0.049758732126256144, "grad_norm": 2.17414597792864, "learning_rate": 4.975873212625615e-06, "loss": 0.4172, "step": 11240 }, { "epoch": 0.049763159059719335, "grad_norm": 3.792749522138572, "learning_rate": 4.9763159059719336e-06, "loss": 1.1665, "step": 11241 }, { "epoch": 0.049767585993182525, "grad_norm": 3.3512095449837895, "learning_rate": 4.976758599318253e-06, "loss": 0.9028, "step": 11242 }, { "epoch": 0.049772012926645716, "grad_norm": 2.4426207139468947, "learning_rate": 4.9772012926645715e-06, "loss": 0.8363, "step": 11243 }, { "epoch": 0.0497764398601089, "grad_norm": 2.8007130659726722, "learning_rate": 4.977643986010891e-06, "loss": 0.6599, "step": 11244 }, { "epoch": 0.04978086679357209, "grad_norm": 2.7828503638541187, "learning_rate": 4.978086679357209e-06, "loss": 0.486, "step": 11245 }, { "epoch": 0.04978529372703528, "grad_norm": 3.2507206068744905, "learning_rate": 4.978529372703529e-06, "loss": 0.4506, "step": 11246 }, { "epoch": 0.04978972066049847, "grad_norm": 2.850944768173115, "learning_rate": 4.978972066049847e-06, "loss": 1.0811, "step": 11247 }, { "epoch": 0.04979414759396166, "grad_norm": 2.4178594282838834, "learning_rate": 4.979414759396167e-06, "loss": 0.7098, "step": 11248 }, { "epoch": 0.04979857452742485, "grad_norm": 2.5297314226506873, "learning_rate": 4.979857452742486e-06, "loss": 0.7142, "step": 11249 }, { "epoch": 0.04980300146088804, "grad_norm": 2.548098000342212, "learning_rate": 4.9803001460888045e-06, "loss": 0.6192, "step": 11250 }, { "epoch": 0.04980742839435123, "grad_norm": 2.843278801605036, "learning_rate": 4.980742839435124e-06, "loss": 0.811, "step": 11251 }, { "epoch": 0.049811855327814424, "grad_norm": 3.1079036284564543, "learning_rate": 4.981185532781443e-06, "loss": 0.8324, "step": 11252 }, { "epoch": 0.049816282261277614, "grad_norm": 3.404120137422491, "learning_rate": 4.981628226127762e-06, "loss": 1.0771, "step": 11253 }, { "epoch": 0.049820709194740805, "grad_norm": 2.9557572087118382, "learning_rate": 4.982070919474081e-06, "loss": 0.6566, "step": 11254 }, { "epoch": 0.049825136128203995, "grad_norm": 2.5346255746738513, "learning_rate": 4.9825136128204e-06, "loss": 0.8513, "step": 11255 }, { "epoch": 0.049829563061667186, "grad_norm": 2.4939100319773275, "learning_rate": 4.982956306166718e-06, "loss": 0.5703, "step": 11256 }, { "epoch": 0.049833989995130376, "grad_norm": 2.791104810210776, "learning_rate": 4.9833989995130376e-06, "loss": 0.8956, "step": 11257 }, { "epoch": 0.04983841692859357, "grad_norm": 2.5778109773980544, "learning_rate": 4.983841692859357e-06, "loss": 0.7754, "step": 11258 }, { "epoch": 0.04984284386205675, "grad_norm": 2.8790610457552916, "learning_rate": 4.9842843862056755e-06, "loss": 0.9656, "step": 11259 }, { "epoch": 0.04984727079551994, "grad_norm": 2.6992422273552297, "learning_rate": 4.984727079551995e-06, "loss": 0.6644, "step": 11260 }, { "epoch": 0.04985169772898313, "grad_norm": 2.8348981445479597, "learning_rate": 4.985169772898314e-06, "loss": 0.9554, "step": 11261 }, { "epoch": 0.04985612466244632, "grad_norm": 2.5406477331983934, "learning_rate": 4.985612466244633e-06, "loss": 0.4285, "step": 11262 }, { "epoch": 0.04986055159590951, "grad_norm": 2.7720969165361833, "learning_rate": 4.986055159590952e-06, "loss": 0.7241, "step": 11263 }, { "epoch": 0.0498649785293727, "grad_norm": 2.9578265206144407, "learning_rate": 4.986497852937271e-06, "loss": 0.6506, "step": 11264 }, { "epoch": 0.04986940546283589, "grad_norm": 2.515282630389034, "learning_rate": 4.98694054628359e-06, "loss": 0.6048, "step": 11265 }, { "epoch": 0.049873832396299084, "grad_norm": 3.120838777684792, "learning_rate": 4.9873832396299085e-06, "loss": 0.8651, "step": 11266 }, { "epoch": 0.049878259329762274, "grad_norm": 2.8274648067427806, "learning_rate": 4.987825932976228e-06, "loss": 0.8199, "step": 11267 }, { "epoch": 0.049882686263225465, "grad_norm": 3.207021329740282, "learning_rate": 4.988268626322546e-06, "loss": 0.9448, "step": 11268 }, { "epoch": 0.049887113196688655, "grad_norm": 2.7441956473093416, "learning_rate": 4.988711319668866e-06, "loss": 0.6704, "step": 11269 }, { "epoch": 0.049891540130151846, "grad_norm": 2.5245108837186505, "learning_rate": 4.989154013015185e-06, "loss": 0.855, "step": 11270 }, { "epoch": 0.049895967063615036, "grad_norm": 3.11366486190085, "learning_rate": 4.989596706361504e-06, "loss": 0.8644, "step": 11271 }, { "epoch": 0.04990039399707823, "grad_norm": 3.1033923110137187, "learning_rate": 4.990039399707823e-06, "loss": 0.8892, "step": 11272 }, { "epoch": 0.04990482093054142, "grad_norm": 3.150960899840874, "learning_rate": 4.990482093054142e-06, "loss": 0.6289, "step": 11273 }, { "epoch": 0.0499092478640046, "grad_norm": 2.8828223686409236, "learning_rate": 4.990924786400461e-06, "loss": 0.6991, "step": 11274 }, { "epoch": 0.04991367479746779, "grad_norm": 3.1971794342909794, "learning_rate": 4.9913674797467795e-06, "loss": 1.0346, "step": 11275 }, { "epoch": 0.04991810173093098, "grad_norm": 2.746365329539874, "learning_rate": 4.991810173093099e-06, "loss": 0.8591, "step": 11276 }, { "epoch": 0.04992252866439417, "grad_norm": 3.2916354346929078, "learning_rate": 4.992252866439417e-06, "loss": 1.0375, "step": 11277 }, { "epoch": 0.04992695559785736, "grad_norm": 2.677028408499517, "learning_rate": 4.992695559785737e-06, "loss": 0.8143, "step": 11278 }, { "epoch": 0.04993138253132055, "grad_norm": 2.7023342733198668, "learning_rate": 4.993138253132056e-06, "loss": 0.5838, "step": 11279 }, { "epoch": 0.049935809464783744, "grad_norm": 3.4387675527441215, "learning_rate": 4.993580946478375e-06, "loss": 0.8247, "step": 11280 }, { "epoch": 0.049940236398246934, "grad_norm": 3.3109917628399264, "learning_rate": 4.994023639824694e-06, "loss": 0.578, "step": 11281 }, { "epoch": 0.049944663331710125, "grad_norm": 3.7123280544566226, "learning_rate": 4.994466333171013e-06, "loss": 1.2009, "step": 11282 }, { "epoch": 0.049949090265173315, "grad_norm": 2.5156368190589125, "learning_rate": 4.994909026517332e-06, "loss": 0.5583, "step": 11283 }, { "epoch": 0.049953517198636506, "grad_norm": 2.8639799740520218, "learning_rate": 4.995351719863651e-06, "loss": 0.6303, "step": 11284 }, { "epoch": 0.049957944132099696, "grad_norm": 2.8107807204341655, "learning_rate": 4.99579441320997e-06, "loss": 0.7703, "step": 11285 }, { "epoch": 0.04996237106556289, "grad_norm": 2.6233793081387446, "learning_rate": 4.996237106556288e-06, "loss": 0.7844, "step": 11286 }, { "epoch": 0.04996679799902608, "grad_norm": 3.567860202944493, "learning_rate": 4.996679799902608e-06, "loss": 1.2504, "step": 11287 }, { "epoch": 0.04997122493248927, "grad_norm": 2.996453137933468, "learning_rate": 4.997122493248927e-06, "loss": 0.3966, "step": 11288 }, { "epoch": 0.04997565186595245, "grad_norm": 2.6161576206615504, "learning_rate": 4.9975651865952456e-06, "loss": 0.5001, "step": 11289 }, { "epoch": 0.04998007879941564, "grad_norm": 3.2309561306226904, "learning_rate": 4.998007879941565e-06, "loss": 0.8896, "step": 11290 }, { "epoch": 0.04998450573287883, "grad_norm": 2.6678152686196097, "learning_rate": 4.998450573287884e-06, "loss": 0.5031, "step": 11291 }, { "epoch": 0.04998893266634202, "grad_norm": 2.3148500487944443, "learning_rate": 4.998893266634203e-06, "loss": 0.6119, "step": 11292 }, { "epoch": 0.049993359599805214, "grad_norm": 3.162929112164135, "learning_rate": 4.999335959980522e-06, "loss": 0.6688, "step": 11293 }, { "epoch": 0.049997786533268404, "grad_norm": 3.235715266854422, "learning_rate": 4.999778653326841e-06, "loss": 1.1399, "step": 11294 }, { "epoch": 0.050002213466731595, "grad_norm": 2.9068705294405053, "learning_rate": 5.00022134667316e-06, "loss": 0.6541, "step": 11295 }, { "epoch": 0.050006640400194785, "grad_norm": 3.022065570562521, "learning_rate": 5.0006640400194795e-06, "loss": 0.9331, "step": 11296 }, { "epoch": 0.050011067333657976, "grad_norm": 3.8917265912988688, "learning_rate": 5.001106733365798e-06, "loss": 1.1116, "step": 11297 }, { "epoch": 0.050015494267121166, "grad_norm": 2.6031493665371723, "learning_rate": 5.0015494267121165e-06, "loss": 0.6591, "step": 11298 }, { "epoch": 0.05001992120058436, "grad_norm": 3.1454129219667393, "learning_rate": 5.001992120058437e-06, "loss": 0.886, "step": 11299 }, { "epoch": 0.05002434813404755, "grad_norm": 2.937382121930635, "learning_rate": 5.002434813404755e-06, "loss": 1.0158, "step": 11300 }, { "epoch": 0.05002877506751074, "grad_norm": 3.4620100458343996, "learning_rate": 5.002877506751074e-06, "loss": 1.1879, "step": 11301 }, { "epoch": 0.05003320200097393, "grad_norm": 2.688886563184597, "learning_rate": 5.003320200097393e-06, "loss": 0.7176, "step": 11302 }, { "epoch": 0.05003762893443712, "grad_norm": 2.861595437040289, "learning_rate": 5.0037628934437125e-06, "loss": 0.6461, "step": 11303 }, { "epoch": 0.0500420558679003, "grad_norm": 2.7295945819531897, "learning_rate": 5.004205586790031e-06, "loss": 0.753, "step": 11304 }, { "epoch": 0.05004648280136349, "grad_norm": 2.0670716403020166, "learning_rate": 5.00464828013635e-06, "loss": 0.5421, "step": 11305 }, { "epoch": 0.05005090973482668, "grad_norm": 2.475936893677632, "learning_rate": 5.005090973482669e-06, "loss": 0.5255, "step": 11306 }, { "epoch": 0.050055336668289874, "grad_norm": 2.966518671749878, "learning_rate": 5.0055336668289875e-06, "loss": 0.9594, "step": 11307 }, { "epoch": 0.050059763601753064, "grad_norm": 2.6784155374801983, "learning_rate": 5.005976360175308e-06, "loss": 0.5221, "step": 11308 }, { "epoch": 0.050064190535216255, "grad_norm": 2.6892275817836966, "learning_rate": 5.006419053521626e-06, "loss": 0.7043, "step": 11309 }, { "epoch": 0.050068617468679445, "grad_norm": 3.257214959569668, "learning_rate": 5.006861746867945e-06, "loss": 0.8445, "step": 11310 }, { "epoch": 0.050073044402142636, "grad_norm": 2.636599052598379, "learning_rate": 5.007304440214265e-06, "loss": 0.7093, "step": 11311 }, { "epoch": 0.050077471335605826, "grad_norm": 3.2984221227886144, "learning_rate": 5.0077471335605835e-06, "loss": 1.1162, "step": 11312 }, { "epoch": 0.05008189826906902, "grad_norm": 3.1653736279160176, "learning_rate": 5.008189826906902e-06, "loss": 0.6654, "step": 11313 }, { "epoch": 0.05008632520253221, "grad_norm": 3.035278789156446, "learning_rate": 5.008632520253221e-06, "loss": 1.1986, "step": 11314 }, { "epoch": 0.0500907521359954, "grad_norm": 2.7816260026890274, "learning_rate": 5.00907521359954e-06, "loss": 0.8175, "step": 11315 }, { "epoch": 0.05009517906945859, "grad_norm": 3.4806902363099144, "learning_rate": 5.009517906945858e-06, "loss": 0.614, "step": 11316 }, { "epoch": 0.05009960600292178, "grad_norm": 3.7940542255321428, "learning_rate": 5.009960600292179e-06, "loss": 1.0383, "step": 11317 }, { "epoch": 0.05010403293638497, "grad_norm": 2.6851835240349793, "learning_rate": 5.010403293638497e-06, "loss": 0.5467, "step": 11318 }, { "epoch": 0.05010845986984815, "grad_norm": 2.685324310842195, "learning_rate": 5.010845986984816e-06, "loss": 0.7346, "step": 11319 }, { "epoch": 0.05011288680331134, "grad_norm": 3.182762544538337, "learning_rate": 5.011288680331136e-06, "loss": 0.8861, "step": 11320 }, { "epoch": 0.050117313736774534, "grad_norm": 3.0512784892026588, "learning_rate": 5.011731373677454e-06, "loss": 0.9289, "step": 11321 }, { "epoch": 0.050121740670237724, "grad_norm": 2.656889140278457, "learning_rate": 5.012174067023773e-06, "loss": 0.8066, "step": 11322 }, { "epoch": 0.050126167603700915, "grad_norm": 2.589696893326569, "learning_rate": 5.012616760370092e-06, "loss": 0.4684, "step": 11323 }, { "epoch": 0.050130594537164105, "grad_norm": 2.626317671251272, "learning_rate": 5.013059453716411e-06, "loss": 0.7286, "step": 11324 }, { "epoch": 0.050135021470627296, "grad_norm": 3.026697286825789, "learning_rate": 5.01350214706273e-06, "loss": 1.0354, "step": 11325 }, { "epoch": 0.050139448404090486, "grad_norm": 3.0455534389576466, "learning_rate": 5.0139448404090496e-06, "loss": 0.8425, "step": 11326 }, { "epoch": 0.05014387533755368, "grad_norm": 2.4776240616450043, "learning_rate": 5.014387533755368e-06, "loss": 0.4551, "step": 11327 }, { "epoch": 0.05014830227101687, "grad_norm": 2.832944084944314, "learning_rate": 5.014830227101687e-06, "loss": 0.7276, "step": 11328 }, { "epoch": 0.05015272920448006, "grad_norm": 2.3690730211833815, "learning_rate": 5.015272920448007e-06, "loss": 0.5265, "step": 11329 }, { "epoch": 0.05015715613794325, "grad_norm": 2.604626579634701, "learning_rate": 5.015715613794325e-06, "loss": 0.5607, "step": 11330 }, { "epoch": 0.05016158307140644, "grad_norm": 2.9671218468891856, "learning_rate": 5.016158307140644e-06, "loss": 0.9236, "step": 11331 }, { "epoch": 0.05016601000486963, "grad_norm": 2.541046693964618, "learning_rate": 5.016601000486963e-06, "loss": 0.6744, "step": 11332 }, { "epoch": 0.05017043693833282, "grad_norm": 2.8661090257309825, "learning_rate": 5.017043693833283e-06, "loss": 0.6148, "step": 11333 }, { "epoch": 0.050174863871796004, "grad_norm": 2.741241807965176, "learning_rate": 5.017486387179601e-06, "loss": 0.8265, "step": 11334 }, { "epoch": 0.050179290805259194, "grad_norm": 2.972557672306051, "learning_rate": 5.0179290805259205e-06, "loss": 0.9949, "step": 11335 }, { "epoch": 0.050183717738722385, "grad_norm": 2.6394877446236507, "learning_rate": 5.018371773872239e-06, "loss": 0.7578, "step": 11336 }, { "epoch": 0.050188144672185575, "grad_norm": 3.4277773640755873, "learning_rate": 5.0188144672185576e-06, "loss": 1.0848, "step": 11337 }, { "epoch": 0.050192571605648766, "grad_norm": 2.4578622919909514, "learning_rate": 5.019257160564878e-06, "loss": 0.5958, "step": 11338 }, { "epoch": 0.050196998539111956, "grad_norm": 2.966160205973009, "learning_rate": 5.019699853911196e-06, "loss": 1.0452, "step": 11339 }, { "epoch": 0.05020142547257515, "grad_norm": 3.32982560300554, "learning_rate": 5.020142547257515e-06, "loss": 0.8519, "step": 11340 }, { "epoch": 0.05020585240603834, "grad_norm": 3.258008565293778, "learning_rate": 5.020585240603835e-06, "loss": 1.1445, "step": 11341 }, { "epoch": 0.05021027933950153, "grad_norm": 2.2611208665990885, "learning_rate": 5.0210279339501536e-06, "loss": 0.5816, "step": 11342 }, { "epoch": 0.05021470627296472, "grad_norm": 2.8873525513052996, "learning_rate": 5.021470627296472e-06, "loss": 0.7121, "step": 11343 }, { "epoch": 0.05021913320642791, "grad_norm": 2.7891529115129603, "learning_rate": 5.0219133206427915e-06, "loss": 0.8392, "step": 11344 }, { "epoch": 0.0502235601398911, "grad_norm": 2.837110986044062, "learning_rate": 5.02235601398911e-06, "loss": 0.5611, "step": 11345 }, { "epoch": 0.05022798707335429, "grad_norm": 2.5214930117158865, "learning_rate": 5.022798707335429e-06, "loss": 0.8541, "step": 11346 }, { "epoch": 0.05023241400681748, "grad_norm": 3.4956109873175714, "learning_rate": 5.023241400681749e-06, "loss": 0.9962, "step": 11347 }, { "epoch": 0.05023684094028067, "grad_norm": 2.7585849871965658, "learning_rate": 5.023684094028067e-06, "loss": 0.8537, "step": 11348 }, { "epoch": 0.050241267873743854, "grad_norm": 2.5095431305186806, "learning_rate": 5.024126787374386e-06, "loss": 0.6167, "step": 11349 }, { "epoch": 0.050245694807207045, "grad_norm": 2.732850517741104, "learning_rate": 5.024569480720706e-06, "loss": 0.787, "step": 11350 }, { "epoch": 0.050250121740670235, "grad_norm": 3.056350572844501, "learning_rate": 5.0250121740670245e-06, "loss": 0.8694, "step": 11351 }, { "epoch": 0.050254548674133426, "grad_norm": 3.1378630332276756, "learning_rate": 5.025454867413343e-06, "loss": 0.626, "step": 11352 }, { "epoch": 0.050258975607596616, "grad_norm": 2.906103379234896, "learning_rate": 5.025897560759662e-06, "loss": 0.9488, "step": 11353 }, { "epoch": 0.05026340254105981, "grad_norm": 2.9449611343592634, "learning_rate": 5.026340254105982e-06, "loss": 0.8065, "step": 11354 }, { "epoch": 0.050267829474523, "grad_norm": 4.377599999742001, "learning_rate": 5.0267829474523e-06, "loss": 1.2017, "step": 11355 }, { "epoch": 0.05027225640798619, "grad_norm": 3.0046586783526292, "learning_rate": 5.02722564079862e-06, "loss": 1.147, "step": 11356 }, { "epoch": 0.05027668334144938, "grad_norm": 2.883371895738176, "learning_rate": 5.027668334144938e-06, "loss": 0.8203, "step": 11357 }, { "epoch": 0.05028111027491257, "grad_norm": 4.415280719703257, "learning_rate": 5.028111027491257e-06, "loss": 0.9975, "step": 11358 }, { "epoch": 0.05028553720837576, "grad_norm": 2.769434127672722, "learning_rate": 5.028553720837577e-06, "loss": 0.8066, "step": 11359 }, { "epoch": 0.05028996414183895, "grad_norm": 4.305585719787274, "learning_rate": 5.0289964141838955e-06, "loss": 1.5519, "step": 11360 }, { "epoch": 0.05029439107530214, "grad_norm": 2.551533505882354, "learning_rate": 5.029439107530214e-06, "loss": 0.7966, "step": 11361 }, { "epoch": 0.05029881800876533, "grad_norm": 2.542692983220412, "learning_rate": 5.029881800876533e-06, "loss": 0.666, "step": 11362 }, { "epoch": 0.05030324494222852, "grad_norm": 2.4646139569654215, "learning_rate": 5.030324494222853e-06, "loss": 0.5994, "step": 11363 }, { "epoch": 0.050307671875691705, "grad_norm": 2.3694040726786576, "learning_rate": 5.030767187569171e-06, "loss": 0.5689, "step": 11364 }, { "epoch": 0.050312098809154895, "grad_norm": 2.7356631938999594, "learning_rate": 5.031209880915491e-06, "loss": 0.9341, "step": 11365 }, { "epoch": 0.050316525742618086, "grad_norm": 2.2881557794164937, "learning_rate": 5.031652574261809e-06, "loss": 0.719, "step": 11366 }, { "epoch": 0.050320952676081276, "grad_norm": 2.7131976264717155, "learning_rate": 5.032095267608128e-06, "loss": 0.6733, "step": 11367 }, { "epoch": 0.05032537960954447, "grad_norm": 2.519700076715816, "learning_rate": 5.032537960954448e-06, "loss": 0.8744, "step": 11368 }, { "epoch": 0.05032980654300766, "grad_norm": 3.1807640296169404, "learning_rate": 5.032980654300766e-06, "loss": 1.1902, "step": 11369 }, { "epoch": 0.05033423347647085, "grad_norm": 2.4448760427950456, "learning_rate": 5.033423347647085e-06, "loss": 0.6074, "step": 11370 }, { "epoch": 0.05033866040993404, "grad_norm": 2.5032914445358343, "learning_rate": 5.033866040993405e-06, "loss": 0.729, "step": 11371 }, { "epoch": 0.05034308734339723, "grad_norm": 3.0173258374799508, "learning_rate": 5.034308734339724e-06, "loss": 1.2412, "step": 11372 }, { "epoch": 0.05034751427686042, "grad_norm": 2.1090781496164417, "learning_rate": 5.034751427686042e-06, "loss": 0.4304, "step": 11373 }, { "epoch": 0.05035194121032361, "grad_norm": 2.141992385364429, "learning_rate": 5.0351941210323616e-06, "loss": 0.2972, "step": 11374 }, { "epoch": 0.0503563681437868, "grad_norm": 3.4121204158066756, "learning_rate": 5.03563681437868e-06, "loss": 1.1216, "step": 11375 }, { "epoch": 0.05036079507724999, "grad_norm": 2.475940167258005, "learning_rate": 5.0360795077249995e-06, "loss": 0.6813, "step": 11376 }, { "epoch": 0.05036522201071318, "grad_norm": 3.8340363582773844, "learning_rate": 5.036522201071319e-06, "loss": 1.1129, "step": 11377 }, { "epoch": 0.05036964894417637, "grad_norm": 2.8529513735422123, "learning_rate": 5.036964894417637e-06, "loss": 0.8004, "step": 11378 }, { "epoch": 0.05037407587763956, "grad_norm": 2.823264321572083, "learning_rate": 5.037407587763956e-06, "loss": 0.696, "step": 11379 }, { "epoch": 0.050378502811102746, "grad_norm": 2.768211909755512, "learning_rate": 5.037850281110276e-06, "loss": 0.8301, "step": 11380 }, { "epoch": 0.05038292974456594, "grad_norm": 2.717544223892683, "learning_rate": 5.038292974456595e-06, "loss": 0.7425, "step": 11381 }, { "epoch": 0.05038735667802913, "grad_norm": 2.3364617851317133, "learning_rate": 5.038735667802913e-06, "loss": 0.4748, "step": 11382 }, { "epoch": 0.05039178361149232, "grad_norm": 4.181824016097141, "learning_rate": 5.0391783611492325e-06, "loss": 0.8542, "step": 11383 }, { "epoch": 0.05039621054495551, "grad_norm": 2.9021613261377364, "learning_rate": 5.039621054495552e-06, "loss": 0.8071, "step": 11384 }, { "epoch": 0.0504006374784187, "grad_norm": 3.3203420238565693, "learning_rate": 5.04006374784187e-06, "loss": 1.2523, "step": 11385 }, { "epoch": 0.05040506441188189, "grad_norm": 3.7352601634422493, "learning_rate": 5.04050644118819e-06, "loss": 0.9852, "step": 11386 }, { "epoch": 0.05040949134534508, "grad_norm": 3.3040664056563807, "learning_rate": 5.040949134534508e-06, "loss": 0.7027, "step": 11387 }, { "epoch": 0.05041391827880827, "grad_norm": 3.166217449730791, "learning_rate": 5.041391827880827e-06, "loss": 0.7988, "step": 11388 }, { "epoch": 0.05041834521227146, "grad_norm": 2.917044088576053, "learning_rate": 5.041834521227147e-06, "loss": 0.7061, "step": 11389 }, { "epoch": 0.05042277214573465, "grad_norm": 3.249407317455532, "learning_rate": 5.0422772145734656e-06, "loss": 0.8422, "step": 11390 }, { "epoch": 0.05042719907919784, "grad_norm": 2.7160024589312535, "learning_rate": 5.042719907919784e-06, "loss": 0.7373, "step": 11391 }, { "epoch": 0.05043162601266103, "grad_norm": 2.802481251168178, "learning_rate": 5.043162601266104e-06, "loss": 0.7499, "step": 11392 }, { "epoch": 0.05043605294612422, "grad_norm": 3.1902864700256335, "learning_rate": 5.043605294612423e-06, "loss": 1.0649, "step": 11393 }, { "epoch": 0.05044047987958741, "grad_norm": 3.1882403614302395, "learning_rate": 5.044047987958741e-06, "loss": 0.8947, "step": 11394 }, { "epoch": 0.0504449068130506, "grad_norm": 2.6421252055716917, "learning_rate": 5.044490681305061e-06, "loss": 0.626, "step": 11395 }, { "epoch": 0.05044933374651379, "grad_norm": 4.173287932217488, "learning_rate": 5.044933374651379e-06, "loss": 1.3037, "step": 11396 }, { "epoch": 0.05045376067997698, "grad_norm": 2.608815780386164, "learning_rate": 5.045376067997698e-06, "loss": 0.7052, "step": 11397 }, { "epoch": 0.05045818761344017, "grad_norm": 2.750158974527218, "learning_rate": 5.045818761344018e-06, "loss": 0.8677, "step": 11398 }, { "epoch": 0.05046261454690336, "grad_norm": 3.4409186769094124, "learning_rate": 5.0462614546903365e-06, "loss": 1.0388, "step": 11399 }, { "epoch": 0.05046704148036655, "grad_norm": 3.1011444908790855, "learning_rate": 5.046704148036655e-06, "loss": 0.8257, "step": 11400 }, { "epoch": 0.05047146841382974, "grad_norm": 2.4402424452163634, "learning_rate": 5.047146841382975e-06, "loss": 0.6812, "step": 11401 }, { "epoch": 0.05047589534729293, "grad_norm": 2.4941732246118735, "learning_rate": 5.047589534729294e-06, "loss": 0.6103, "step": 11402 }, { "epoch": 0.05048032228075612, "grad_norm": 2.721481477797574, "learning_rate": 5.048032228075612e-06, "loss": 0.6483, "step": 11403 }, { "epoch": 0.05048474921421931, "grad_norm": 2.7123905260261765, "learning_rate": 5.048474921421932e-06, "loss": 0.6647, "step": 11404 }, { "epoch": 0.0504891761476825, "grad_norm": 2.6941646273122433, "learning_rate": 5.04891761476825e-06, "loss": 0.4566, "step": 11405 }, { "epoch": 0.05049360308114569, "grad_norm": 3.462683135642404, "learning_rate": 5.0493603081145696e-06, "loss": 0.9699, "step": 11406 }, { "epoch": 0.05049803001460888, "grad_norm": 2.6940570783623525, "learning_rate": 5.049803001460889e-06, "loss": 0.7912, "step": 11407 }, { "epoch": 0.05050245694807207, "grad_norm": 3.111509093596069, "learning_rate": 5.0502456948072075e-06, "loss": 1.1459, "step": 11408 }, { "epoch": 0.050506883881535264, "grad_norm": 3.4619810738918337, "learning_rate": 5.050688388153526e-06, "loss": 0.743, "step": 11409 }, { "epoch": 0.05051131081499845, "grad_norm": 2.434868023766151, "learning_rate": 5.051131081499846e-06, "loss": 0.7973, "step": 11410 }, { "epoch": 0.05051573774846164, "grad_norm": 2.5597181392519697, "learning_rate": 5.051573774846165e-06, "loss": 0.5001, "step": 11411 }, { "epoch": 0.05052016468192483, "grad_norm": 2.749661300023615, "learning_rate": 5.052016468192483e-06, "loss": 0.7652, "step": 11412 }, { "epoch": 0.05052459161538802, "grad_norm": 2.8876804597417034, "learning_rate": 5.052459161538803e-06, "loss": 0.9178, "step": 11413 }, { "epoch": 0.05052901854885121, "grad_norm": 2.4581788090362604, "learning_rate": 5.052901854885122e-06, "loss": 0.6283, "step": 11414 }, { "epoch": 0.0505334454823144, "grad_norm": 3.648805790557, "learning_rate": 5.0533445482314405e-06, "loss": 1.1092, "step": 11415 }, { "epoch": 0.05053787241577759, "grad_norm": 2.5565997721074596, "learning_rate": 5.05378724157776e-06, "loss": 0.4377, "step": 11416 }, { "epoch": 0.05054229934924078, "grad_norm": 3.039422203060991, "learning_rate": 5.054229934924078e-06, "loss": 0.7987, "step": 11417 }, { "epoch": 0.05054672628270397, "grad_norm": 2.566889987742537, "learning_rate": 5.054672628270397e-06, "loss": 0.5327, "step": 11418 }, { "epoch": 0.05055115321616716, "grad_norm": 2.5593830964846824, "learning_rate": 5.055115321616717e-06, "loss": 0.7978, "step": 11419 }, { "epoch": 0.05055558014963035, "grad_norm": 3.2495274965840144, "learning_rate": 5.055558014963036e-06, "loss": 0.8347, "step": 11420 }, { "epoch": 0.05056000708309354, "grad_norm": 2.4325783581772513, "learning_rate": 5.056000708309354e-06, "loss": 0.796, "step": 11421 }, { "epoch": 0.050564434016556734, "grad_norm": 2.4870825161253567, "learning_rate": 5.056443401655674e-06, "loss": 0.5031, "step": 11422 }, { "epoch": 0.050568860950019924, "grad_norm": 3.401779105335035, "learning_rate": 5.056886095001993e-06, "loss": 0.753, "step": 11423 }, { "epoch": 0.050573287883483115, "grad_norm": 2.9354885607324164, "learning_rate": 5.0573287883483115e-06, "loss": 0.7451, "step": 11424 }, { "epoch": 0.0505777148169463, "grad_norm": 2.49785477168225, "learning_rate": 5.057771481694631e-06, "loss": 0.746, "step": 11425 }, { "epoch": 0.05058214175040949, "grad_norm": 2.8641307924115895, "learning_rate": 5.058214175040949e-06, "loss": 0.9679, "step": 11426 }, { "epoch": 0.05058656868387268, "grad_norm": 2.8358500140469767, "learning_rate": 5.058656868387269e-06, "loss": 0.8044, "step": 11427 }, { "epoch": 0.05059099561733587, "grad_norm": 2.49442531388718, "learning_rate": 5.059099561733588e-06, "loss": 0.6377, "step": 11428 }, { "epoch": 0.05059542255079906, "grad_norm": 2.4538474829563772, "learning_rate": 5.059542255079907e-06, "loss": 0.6333, "step": 11429 }, { "epoch": 0.05059984948426225, "grad_norm": 3.1113633206440428, "learning_rate": 5.059984948426225e-06, "loss": 1.1029, "step": 11430 }, { "epoch": 0.05060427641772544, "grad_norm": 3.7306934222159174, "learning_rate": 5.060427641772545e-06, "loss": 1.0212, "step": 11431 }, { "epoch": 0.05060870335118863, "grad_norm": 4.313343866452206, "learning_rate": 5.060870335118864e-06, "loss": 1.1979, "step": 11432 }, { "epoch": 0.05061313028465182, "grad_norm": 2.6358686478880617, "learning_rate": 5.061313028465182e-06, "loss": 0.6463, "step": 11433 }, { "epoch": 0.05061755721811501, "grad_norm": 2.778897024970202, "learning_rate": 5.061755721811502e-06, "loss": 0.6074, "step": 11434 }, { "epoch": 0.0506219841515782, "grad_norm": 2.3740953956666457, "learning_rate": 5.06219841515782e-06, "loss": 0.4947, "step": 11435 }, { "epoch": 0.050626411085041394, "grad_norm": 3.1503036501658777, "learning_rate": 5.06264110850414e-06, "loss": 0.7003, "step": 11436 }, { "epoch": 0.050630838018504584, "grad_norm": 2.381382837895245, "learning_rate": 5.063083801850459e-06, "loss": 0.7998, "step": 11437 }, { "epoch": 0.050635264951967775, "grad_norm": 2.486032729149704, "learning_rate": 5.0635264951967776e-06, "loss": 0.7597, "step": 11438 }, { "epoch": 0.050639691885430965, "grad_norm": 2.9783380538617688, "learning_rate": 5.063969188543096e-06, "loss": 0.9371, "step": 11439 }, { "epoch": 0.05064411881889415, "grad_norm": 3.1795197364751195, "learning_rate": 5.064411881889416e-06, "loss": 0.9532, "step": 11440 }, { "epoch": 0.05064854575235734, "grad_norm": 2.646879170774324, "learning_rate": 5.064854575235735e-06, "loss": 0.7627, "step": 11441 }, { "epoch": 0.05065297268582053, "grad_norm": 2.7734952636399566, "learning_rate": 5.065297268582053e-06, "loss": 0.8464, "step": 11442 }, { "epoch": 0.05065739961928372, "grad_norm": 2.747645963840237, "learning_rate": 5.065739961928373e-06, "loss": 1.0532, "step": 11443 }, { "epoch": 0.05066182655274691, "grad_norm": 4.6419944121795655, "learning_rate": 5.066182655274692e-06, "loss": 1.3206, "step": 11444 }, { "epoch": 0.0506662534862101, "grad_norm": 2.931706725255092, "learning_rate": 5.066625348621011e-06, "loss": 0.92, "step": 11445 }, { "epoch": 0.05067068041967329, "grad_norm": 2.9075884692992005, "learning_rate": 5.06706804196733e-06, "loss": 0.8525, "step": 11446 }, { "epoch": 0.05067510735313648, "grad_norm": 3.2482818319862954, "learning_rate": 5.0675107353136485e-06, "loss": 0.9306, "step": 11447 }, { "epoch": 0.05067953428659967, "grad_norm": 2.725907535168959, "learning_rate": 5.067953428659967e-06, "loss": 0.9034, "step": 11448 }, { "epoch": 0.05068396122006286, "grad_norm": 2.6012627759266285, "learning_rate": 5.068396122006287e-06, "loss": 0.5874, "step": 11449 }, { "epoch": 0.050688388153526054, "grad_norm": 2.7826807819533537, "learning_rate": 5.068838815352606e-06, "loss": 0.74, "step": 11450 }, { "epoch": 0.050692815086989244, "grad_norm": 2.5550371462489974, "learning_rate": 5.069281508698924e-06, "loss": 0.599, "step": 11451 }, { "epoch": 0.050697242020452435, "grad_norm": 2.977405387343383, "learning_rate": 5.0697242020452445e-06, "loss": 0.9334, "step": 11452 }, { "epoch": 0.050701668953915625, "grad_norm": 3.3719676757426256, "learning_rate": 5.070166895391563e-06, "loss": 0.9929, "step": 11453 }, { "epoch": 0.050706095887378816, "grad_norm": 2.8532144788246585, "learning_rate": 5.070609588737882e-06, "loss": 0.893, "step": 11454 }, { "epoch": 0.050710522820842, "grad_norm": 2.6844694919553245, "learning_rate": 5.071052282084201e-06, "loss": 0.7898, "step": 11455 }, { "epoch": 0.05071494975430519, "grad_norm": 2.8684353408187233, "learning_rate": 5.0714949754305195e-06, "loss": 0.6763, "step": 11456 }, { "epoch": 0.05071937668776838, "grad_norm": 2.396330363294545, "learning_rate": 5.071937668776839e-06, "loss": 0.6504, "step": 11457 }, { "epoch": 0.05072380362123157, "grad_norm": 2.6795324266591236, "learning_rate": 5.072380362123158e-06, "loss": 0.9397, "step": 11458 }, { "epoch": 0.05072823055469476, "grad_norm": 3.063749432049282, "learning_rate": 5.072823055469477e-06, "loss": 0.9627, "step": 11459 }, { "epoch": 0.05073265748815795, "grad_norm": 3.264847537449206, "learning_rate": 5.073265748815795e-06, "loss": 1.0132, "step": 11460 }, { "epoch": 0.05073708442162114, "grad_norm": 2.8953620391455313, "learning_rate": 5.0737084421621155e-06, "loss": 0.8218, "step": 11461 }, { "epoch": 0.05074151135508433, "grad_norm": 2.3394107520951213, "learning_rate": 5.074151135508434e-06, "loss": 0.7559, "step": 11462 }, { "epoch": 0.050745938288547524, "grad_norm": 2.863743384234696, "learning_rate": 5.0745938288547525e-06, "loss": 0.8814, "step": 11463 }, { "epoch": 0.050750365222010714, "grad_norm": 3.042844401964791, "learning_rate": 5.075036522201072e-06, "loss": 0.6276, "step": 11464 }, { "epoch": 0.050754792155473905, "grad_norm": 2.492004685223071, "learning_rate": 5.075479215547391e-06, "loss": 0.6749, "step": 11465 }, { "epoch": 0.050759219088937095, "grad_norm": 3.055411711968678, "learning_rate": 5.07592190889371e-06, "loss": 0.8967, "step": 11466 }, { "epoch": 0.050763646022400286, "grad_norm": 2.9172628408737435, "learning_rate": 5.076364602240029e-06, "loss": 0.8996, "step": 11467 }, { "epoch": 0.050768072955863476, "grad_norm": 2.9928289746866756, "learning_rate": 5.076807295586348e-06, "loss": 1.0011, "step": 11468 }, { "epoch": 0.05077249988932667, "grad_norm": 4.190654158675893, "learning_rate": 5.077249988932666e-06, "loss": 1.369, "step": 11469 }, { "epoch": 0.05077692682278985, "grad_norm": 2.7322803238615103, "learning_rate": 5.0776926822789864e-06, "loss": 0.8767, "step": 11470 }, { "epoch": 0.05078135375625304, "grad_norm": 2.359750076598398, "learning_rate": 5.078135375625305e-06, "loss": 0.6327, "step": 11471 }, { "epoch": 0.05078578068971623, "grad_norm": 3.2966524095987553, "learning_rate": 5.0785780689716235e-06, "loss": 0.8463, "step": 11472 }, { "epoch": 0.05079020762317942, "grad_norm": 2.2280105217115076, "learning_rate": 5.079020762317944e-06, "loss": 0.5742, "step": 11473 }, { "epoch": 0.05079463455664261, "grad_norm": 2.767430220917334, "learning_rate": 5.079463455664262e-06, "loss": 0.8969, "step": 11474 }, { "epoch": 0.0507990614901058, "grad_norm": 2.768097547803056, "learning_rate": 5.079906149010581e-06, "loss": 0.8735, "step": 11475 }, { "epoch": 0.05080348842356899, "grad_norm": 2.6400805107106855, "learning_rate": 5.0803488423569e-06, "loss": 0.6336, "step": 11476 }, { "epoch": 0.050807915357032184, "grad_norm": 2.6833543014697203, "learning_rate": 5.080791535703219e-06, "loss": 0.76, "step": 11477 }, { "epoch": 0.050812342290495374, "grad_norm": 2.2087701541863405, "learning_rate": 5.081234229049537e-06, "loss": 0.5619, "step": 11478 }, { "epoch": 0.050816769223958565, "grad_norm": 2.6440499599993257, "learning_rate": 5.081676922395857e-06, "loss": 0.498, "step": 11479 }, { "epoch": 0.050821196157421755, "grad_norm": 2.35285429818992, "learning_rate": 5.082119615742176e-06, "loss": 0.5687, "step": 11480 }, { "epoch": 0.050825623090884946, "grad_norm": 2.794694182751921, "learning_rate": 5.082562309088494e-06, "loss": 0.7294, "step": 11481 }, { "epoch": 0.050830050024348136, "grad_norm": 2.96630665705217, "learning_rate": 5.083005002434815e-06, "loss": 0.7032, "step": 11482 }, { "epoch": 0.05083447695781133, "grad_norm": 2.224828596074935, "learning_rate": 5.083447695781133e-06, "loss": 0.6275, "step": 11483 }, { "epoch": 0.05083890389127452, "grad_norm": 2.46404938892216, "learning_rate": 5.083890389127452e-06, "loss": 0.697, "step": 11484 }, { "epoch": 0.0508433308247377, "grad_norm": 2.549180400391735, "learning_rate": 5.084333082473771e-06, "loss": 0.7606, "step": 11485 }, { "epoch": 0.05084775775820089, "grad_norm": 2.6935298312503364, "learning_rate": 5.08477577582009e-06, "loss": 0.676, "step": 11486 }, { "epoch": 0.05085218469166408, "grad_norm": 2.6850357072259587, "learning_rate": 5.085218469166409e-06, "loss": 0.8283, "step": 11487 }, { "epoch": 0.05085661162512727, "grad_norm": 3.1862989388035485, "learning_rate": 5.085661162512728e-06, "loss": 0.9112, "step": 11488 }, { "epoch": 0.05086103855859046, "grad_norm": 3.488302856986392, "learning_rate": 5.086103855859047e-06, "loss": 1.197, "step": 11489 }, { "epoch": 0.05086546549205365, "grad_norm": 2.4443968987610813, "learning_rate": 5.086546549205365e-06, "loss": 0.5258, "step": 11490 }, { "epoch": 0.050869892425516844, "grad_norm": 2.6667018720493783, "learning_rate": 5.086989242551686e-06, "loss": 0.7307, "step": 11491 }, { "epoch": 0.050874319358980034, "grad_norm": 2.464078473060962, "learning_rate": 5.087431935898004e-06, "loss": 0.7778, "step": 11492 }, { "epoch": 0.050878746292443225, "grad_norm": 2.5397851967664824, "learning_rate": 5.087874629244323e-06, "loss": 0.6373, "step": 11493 }, { "epoch": 0.050883173225906415, "grad_norm": 2.5350956175650996, "learning_rate": 5.088317322590642e-06, "loss": 0.6982, "step": 11494 }, { "epoch": 0.050887600159369606, "grad_norm": 2.528042612453685, "learning_rate": 5.088760015936961e-06, "loss": 0.8117, "step": 11495 }, { "epoch": 0.050892027092832796, "grad_norm": 2.883095081127302, "learning_rate": 5.08920270928328e-06, "loss": 1.0496, "step": 11496 }, { "epoch": 0.05089645402629599, "grad_norm": 2.729217223566744, "learning_rate": 5.089645402629599e-06, "loss": 0.6904, "step": 11497 }, { "epoch": 0.05090088095975918, "grad_norm": 2.3986945967910343, "learning_rate": 5.090088095975918e-06, "loss": 0.5808, "step": 11498 }, { "epoch": 0.05090530789322237, "grad_norm": 3.0610897654059825, "learning_rate": 5.090530789322236e-06, "loss": 0.8199, "step": 11499 }, { "epoch": 0.05090973482668555, "grad_norm": 2.7420828556646426, "learning_rate": 5.0909734826685565e-06, "loss": 0.734, "step": 11500 }, { "epoch": 0.05091416176014874, "grad_norm": 2.5357326332113774, "learning_rate": 5.091416176014875e-06, "loss": 0.6727, "step": 11501 }, { "epoch": 0.05091858869361193, "grad_norm": 3.1399729345708014, "learning_rate": 5.091858869361194e-06, "loss": 0.9613, "step": 11502 }, { "epoch": 0.05092301562707512, "grad_norm": 2.5341880932550804, "learning_rate": 5.092301562707514e-06, "loss": 0.6956, "step": 11503 }, { "epoch": 0.050927442560538314, "grad_norm": 3.0552334212906773, "learning_rate": 5.092744256053832e-06, "loss": 0.6544, "step": 11504 }, { "epoch": 0.050931869494001504, "grad_norm": 3.1214149507999385, "learning_rate": 5.093186949400151e-06, "loss": 0.8485, "step": 11505 }, { "epoch": 0.050936296427464695, "grad_norm": 2.734643720216407, "learning_rate": 5.09362964274647e-06, "loss": 0.7501, "step": 11506 }, { "epoch": 0.050940723360927885, "grad_norm": 2.4373632582878177, "learning_rate": 5.094072336092789e-06, "loss": 0.7363, "step": 11507 }, { "epoch": 0.050945150294391076, "grad_norm": 2.6088431676409694, "learning_rate": 5.094515029439108e-06, "loss": 0.653, "step": 11508 }, { "epoch": 0.050949577227854266, "grad_norm": 2.6936254634605836, "learning_rate": 5.0949577227854275e-06, "loss": 0.5776, "step": 11509 }, { "epoch": 0.05095400416131746, "grad_norm": 2.4889944004909874, "learning_rate": 5.095400416131746e-06, "loss": 0.8088, "step": 11510 }, { "epoch": 0.05095843109478065, "grad_norm": 2.987239048242748, "learning_rate": 5.0958431094780645e-06, "loss": 0.6172, "step": 11511 }, { "epoch": 0.05096285802824384, "grad_norm": 2.6858195702135292, "learning_rate": 5.096285802824385e-06, "loss": 0.8201, "step": 11512 }, { "epoch": 0.05096728496170703, "grad_norm": 2.4204426338153655, "learning_rate": 5.096728496170703e-06, "loss": 0.5571, "step": 11513 }, { "epoch": 0.05097171189517022, "grad_norm": 2.661226687162348, "learning_rate": 5.097171189517022e-06, "loss": 0.7974, "step": 11514 }, { "epoch": 0.0509761388286334, "grad_norm": 2.477704634732666, "learning_rate": 5.097613882863341e-06, "loss": 0.692, "step": 11515 }, { "epoch": 0.05098056576209659, "grad_norm": 2.864592869388918, "learning_rate": 5.09805657620966e-06, "loss": 0.9193, "step": 11516 }, { "epoch": 0.05098499269555978, "grad_norm": 2.511452008420368, "learning_rate": 5.098499269555979e-06, "loss": 0.5963, "step": 11517 }, { "epoch": 0.050989419629022974, "grad_norm": 2.8738841051922774, "learning_rate": 5.0989419629022984e-06, "loss": 0.7042, "step": 11518 }, { "epoch": 0.050993846562486164, "grad_norm": 2.746474179862919, "learning_rate": 5.099384656248617e-06, "loss": 1.0241, "step": 11519 }, { "epoch": 0.050998273495949355, "grad_norm": 3.226261474776566, "learning_rate": 5.0998273495949355e-06, "loss": 1.1407, "step": 11520 }, { "epoch": 0.051002700429412545, "grad_norm": 2.7757910502553953, "learning_rate": 5.100270042941256e-06, "loss": 0.7873, "step": 11521 }, { "epoch": 0.051007127362875736, "grad_norm": 2.8962912596694323, "learning_rate": 5.100712736287574e-06, "loss": 0.639, "step": 11522 }, { "epoch": 0.051011554296338926, "grad_norm": 3.6886273891182753, "learning_rate": 5.101155429633893e-06, "loss": 0.8776, "step": 11523 }, { "epoch": 0.05101598122980212, "grad_norm": 2.866752641823026, "learning_rate": 5.101598122980212e-06, "loss": 0.5484, "step": 11524 }, { "epoch": 0.05102040816326531, "grad_norm": 2.7049824784730303, "learning_rate": 5.1020408163265315e-06, "loss": 0.7109, "step": 11525 }, { "epoch": 0.0510248350967285, "grad_norm": 3.221713799900294, "learning_rate": 5.10248350967285e-06, "loss": 0.9835, "step": 11526 }, { "epoch": 0.05102926203019169, "grad_norm": 4.35391785816513, "learning_rate": 5.102926203019169e-06, "loss": 1.4647, "step": 11527 }, { "epoch": 0.05103368896365488, "grad_norm": 2.812655785032146, "learning_rate": 5.103368896365488e-06, "loss": 0.9276, "step": 11528 }, { "epoch": 0.05103811589711807, "grad_norm": 3.1242714298154217, "learning_rate": 5.103811589711806e-06, "loss": 0.9876, "step": 11529 }, { "epoch": 0.05104254283058126, "grad_norm": 2.3691506679953, "learning_rate": 5.104254283058127e-06, "loss": 0.6476, "step": 11530 }, { "epoch": 0.05104696976404444, "grad_norm": 3.190530198379894, "learning_rate": 5.104696976404445e-06, "loss": 0.8843, "step": 11531 }, { "epoch": 0.051051396697507634, "grad_norm": 2.480957404758907, "learning_rate": 5.105139669750764e-06, "loss": 0.6678, "step": 11532 }, { "epoch": 0.051055823630970824, "grad_norm": 2.942766484561735, "learning_rate": 5.105582363097084e-06, "loss": 0.6277, "step": 11533 }, { "epoch": 0.051060250564434015, "grad_norm": 3.2628681070452563, "learning_rate": 5.1060250564434024e-06, "loss": 1.0246, "step": 11534 }, { "epoch": 0.051064677497897205, "grad_norm": 2.202248217415353, "learning_rate": 5.106467749789721e-06, "loss": 0.4479, "step": 11535 }, { "epoch": 0.051069104431360396, "grad_norm": 2.6149955696807585, "learning_rate": 5.10691044313604e-06, "loss": 0.8104, "step": 11536 }, { "epoch": 0.051073531364823586, "grad_norm": 2.7609754290491733, "learning_rate": 5.107353136482359e-06, "loss": 0.7391, "step": 11537 }, { "epoch": 0.05107795829828678, "grad_norm": 2.7569377156185713, "learning_rate": 5.107795829828678e-06, "loss": 0.7828, "step": 11538 }, { "epoch": 0.05108238523174997, "grad_norm": 2.482731252717213, "learning_rate": 5.108238523174998e-06, "loss": 0.7494, "step": 11539 }, { "epoch": 0.05108681216521316, "grad_norm": 2.6514854249878983, "learning_rate": 5.108681216521316e-06, "loss": 0.6351, "step": 11540 }, { "epoch": 0.05109123909867635, "grad_norm": 2.6195577358133297, "learning_rate": 5.109123909867635e-06, "loss": 0.8025, "step": 11541 }, { "epoch": 0.05109566603213954, "grad_norm": 3.592907676235706, "learning_rate": 5.109566603213955e-06, "loss": 0.5281, "step": 11542 }, { "epoch": 0.05110009296560273, "grad_norm": 2.843229426516209, "learning_rate": 5.110009296560273e-06, "loss": 0.8593, "step": 11543 }, { "epoch": 0.05110451989906592, "grad_norm": 3.4227435587981394, "learning_rate": 5.110451989906592e-06, "loss": 1.0606, "step": 11544 }, { "epoch": 0.05110894683252911, "grad_norm": 2.704063467588918, "learning_rate": 5.110894683252911e-06, "loss": 0.6154, "step": 11545 }, { "epoch": 0.051113373765992294, "grad_norm": 2.4729449224629025, "learning_rate": 5.111337376599231e-06, "loss": 0.7572, "step": 11546 }, { "epoch": 0.051117800699455485, "grad_norm": 2.9260798044047234, "learning_rate": 5.111780069945549e-06, "loss": 0.8538, "step": 11547 }, { "epoch": 0.051122227632918675, "grad_norm": 2.743777149663684, "learning_rate": 5.1122227632918685e-06, "loss": 0.9995, "step": 11548 }, { "epoch": 0.051126654566381866, "grad_norm": 2.7389244381424356, "learning_rate": 5.112665456638187e-06, "loss": 0.812, "step": 11549 }, { "epoch": 0.051131081499845056, "grad_norm": 3.0131483244842205, "learning_rate": 5.113108149984506e-06, "loss": 1.1093, "step": 11550 }, { "epoch": 0.05113550843330825, "grad_norm": 3.212633677864741, "learning_rate": 5.113550843330826e-06, "loss": 0.9494, "step": 11551 }, { "epoch": 0.05113993536677144, "grad_norm": 2.4040588196649373, "learning_rate": 5.113993536677144e-06, "loss": 0.6231, "step": 11552 }, { "epoch": 0.05114436230023463, "grad_norm": 3.2626923389879288, "learning_rate": 5.114436230023463e-06, "loss": 0.9565, "step": 11553 }, { "epoch": 0.05114878923369782, "grad_norm": 2.709195204274965, "learning_rate": 5.114878923369783e-06, "loss": 1.0163, "step": 11554 }, { "epoch": 0.05115321616716101, "grad_norm": 3.103340717007328, "learning_rate": 5.115321616716102e-06, "loss": 0.7423, "step": 11555 }, { "epoch": 0.0511576431006242, "grad_norm": 2.8617820648267998, "learning_rate": 5.11576431006242e-06, "loss": 0.7257, "step": 11556 }, { "epoch": 0.05116207003408739, "grad_norm": 2.5932741029809194, "learning_rate": 5.1162070034087395e-06, "loss": 0.6926, "step": 11557 }, { "epoch": 0.05116649696755058, "grad_norm": 2.568296494289571, "learning_rate": 5.116649696755058e-06, "loss": 0.6481, "step": 11558 }, { "epoch": 0.05117092390101377, "grad_norm": 2.849893896793278, "learning_rate": 5.1170923901013765e-06, "loss": 0.7717, "step": 11559 }, { "epoch": 0.05117535083447696, "grad_norm": 2.497968543220263, "learning_rate": 5.117535083447697e-06, "loss": 0.6903, "step": 11560 }, { "epoch": 0.051179777767940145, "grad_norm": 2.9355539247529165, "learning_rate": 5.117977776794015e-06, "loss": 0.8016, "step": 11561 }, { "epoch": 0.051184204701403335, "grad_norm": 3.8013830796527843, "learning_rate": 5.118420470140334e-06, "loss": 1.0553, "step": 11562 }, { "epoch": 0.051188631634866526, "grad_norm": 2.9988268938630727, "learning_rate": 5.118863163486654e-06, "loss": 0.7841, "step": 11563 }, { "epoch": 0.051193058568329716, "grad_norm": 2.4077990312314683, "learning_rate": 5.1193058568329725e-06, "loss": 0.5781, "step": 11564 }, { "epoch": 0.05119748550179291, "grad_norm": 2.605422554046542, "learning_rate": 5.119748550179291e-06, "loss": 0.7716, "step": 11565 }, { "epoch": 0.0512019124352561, "grad_norm": 4.707055439661322, "learning_rate": 5.1201912435256104e-06, "loss": 1.2923, "step": 11566 }, { "epoch": 0.05120633936871929, "grad_norm": 3.7608107282139076, "learning_rate": 5.120633936871929e-06, "loss": 1.019, "step": 11567 }, { "epoch": 0.05121076630218248, "grad_norm": 2.5173622013208017, "learning_rate": 5.121076630218248e-06, "loss": 0.8915, "step": 11568 }, { "epoch": 0.05121519323564567, "grad_norm": 3.5929950412275127, "learning_rate": 5.121519323564568e-06, "loss": 0.8645, "step": 11569 }, { "epoch": 0.05121962016910886, "grad_norm": 2.902447115906059, "learning_rate": 5.121962016910886e-06, "loss": 0.5311, "step": 11570 }, { "epoch": 0.05122404710257205, "grad_norm": 2.703709398653743, "learning_rate": 5.122404710257205e-06, "loss": 0.6081, "step": 11571 }, { "epoch": 0.05122847403603524, "grad_norm": 2.935260734690298, "learning_rate": 5.122847403603525e-06, "loss": 0.595, "step": 11572 }, { "epoch": 0.05123290096949843, "grad_norm": 3.1401746702887348, "learning_rate": 5.1232900969498435e-06, "loss": 1.0598, "step": 11573 }, { "epoch": 0.05123732790296162, "grad_norm": 3.0190155188551735, "learning_rate": 5.123732790296162e-06, "loss": 0.817, "step": 11574 }, { "epoch": 0.05124175483642481, "grad_norm": 2.994413226699352, "learning_rate": 5.124175483642481e-06, "loss": 0.6402, "step": 11575 }, { "epoch": 0.051246181769887995, "grad_norm": 3.2599585841244934, "learning_rate": 5.124618176988801e-06, "loss": 0.6898, "step": 11576 }, { "epoch": 0.051250608703351186, "grad_norm": 2.521420912519557, "learning_rate": 5.125060870335119e-06, "loss": 0.7023, "step": 11577 }, { "epoch": 0.051255035636814376, "grad_norm": 2.844200570872092, "learning_rate": 5.125503563681439e-06, "loss": 0.6681, "step": 11578 }, { "epoch": 0.05125946257027757, "grad_norm": 3.1444274236837337, "learning_rate": 5.125946257027757e-06, "loss": 0.8965, "step": 11579 }, { "epoch": 0.05126388950374076, "grad_norm": 3.999278928544807, "learning_rate": 5.126388950374076e-06, "loss": 0.852, "step": 11580 }, { "epoch": 0.05126831643720395, "grad_norm": 3.158886282238056, "learning_rate": 5.126831643720396e-06, "loss": 0.9608, "step": 11581 }, { "epoch": 0.05127274337066714, "grad_norm": 3.0721899040329923, "learning_rate": 5.1272743370667144e-06, "loss": 0.682, "step": 11582 }, { "epoch": 0.05127717030413033, "grad_norm": 2.7979647373453203, "learning_rate": 5.127717030413033e-06, "loss": 0.9123, "step": 11583 }, { "epoch": 0.05128159723759352, "grad_norm": 3.225569904174649, "learning_rate": 5.128159723759353e-06, "loss": 0.957, "step": 11584 }, { "epoch": 0.05128602417105671, "grad_norm": 2.7961599203118674, "learning_rate": 5.128602417105672e-06, "loss": 0.6684, "step": 11585 }, { "epoch": 0.0512904511045199, "grad_norm": 3.3214415351152287, "learning_rate": 5.12904511045199e-06, "loss": 1.0202, "step": 11586 }, { "epoch": 0.05129487803798309, "grad_norm": 3.056307497009434, "learning_rate": 5.12948780379831e-06, "loss": 1.0584, "step": 11587 }, { "epoch": 0.05129930497144628, "grad_norm": 2.8924641614713664, "learning_rate": 5.129930497144628e-06, "loss": 0.9784, "step": 11588 }, { "epoch": 0.05130373190490947, "grad_norm": 2.561376444609232, "learning_rate": 5.1303731904909475e-06, "loss": 0.7516, "step": 11589 }, { "epoch": 0.05130815883837266, "grad_norm": 3.068760090046208, "learning_rate": 5.130815883837267e-06, "loss": 0.7218, "step": 11590 }, { "epoch": 0.051312585771835846, "grad_norm": 3.8058867320765803, "learning_rate": 5.131258577183585e-06, "loss": 0.9981, "step": 11591 }, { "epoch": 0.05131701270529904, "grad_norm": 2.512566253708792, "learning_rate": 5.131701270529904e-06, "loss": 0.6169, "step": 11592 }, { "epoch": 0.05132143963876223, "grad_norm": 2.9957770039082985, "learning_rate": 5.132143963876224e-06, "loss": 0.509, "step": 11593 }, { "epoch": 0.05132586657222542, "grad_norm": 2.5458585091421715, "learning_rate": 5.132586657222543e-06, "loss": 0.6511, "step": 11594 }, { "epoch": 0.05133029350568861, "grad_norm": 2.867935257833667, "learning_rate": 5.133029350568861e-06, "loss": 0.6854, "step": 11595 }, { "epoch": 0.0513347204391518, "grad_norm": 2.7167271332974137, "learning_rate": 5.1334720439151805e-06, "loss": 0.6221, "step": 11596 }, { "epoch": 0.05133914737261499, "grad_norm": 2.8546609048954936, "learning_rate": 5.133914737261499e-06, "loss": 0.6423, "step": 11597 }, { "epoch": 0.05134357430607818, "grad_norm": 2.9441237371507762, "learning_rate": 5.1343574306078184e-06, "loss": 0.8937, "step": 11598 }, { "epoch": 0.05134800123954137, "grad_norm": 2.865120033402153, "learning_rate": 5.134800123954138e-06, "loss": 0.8926, "step": 11599 }, { "epoch": 0.05135242817300456, "grad_norm": 2.708135516066071, "learning_rate": 5.135242817300456e-06, "loss": 0.7582, "step": 11600 }, { "epoch": 0.05135685510646775, "grad_norm": 2.810507568451517, "learning_rate": 5.135685510646775e-06, "loss": 0.7727, "step": 11601 }, { "epoch": 0.05136128203993094, "grad_norm": 2.552899894611991, "learning_rate": 5.136128203993095e-06, "loss": 0.6867, "step": 11602 }, { "epoch": 0.05136570897339413, "grad_norm": 3.079725309256889, "learning_rate": 5.136570897339414e-06, "loss": 0.9921, "step": 11603 }, { "epoch": 0.05137013590685732, "grad_norm": 2.9326996061245434, "learning_rate": 5.137013590685732e-06, "loss": 0.6611, "step": 11604 }, { "epoch": 0.05137456284032051, "grad_norm": 2.242039694975125, "learning_rate": 5.1374562840320515e-06, "loss": 0.7428, "step": 11605 }, { "epoch": 0.0513789897737837, "grad_norm": 2.6794696950920267, "learning_rate": 5.137898977378371e-06, "loss": 0.727, "step": 11606 }, { "epoch": 0.05138341670724689, "grad_norm": 2.4743079176938068, "learning_rate": 5.138341670724689e-06, "loss": 0.489, "step": 11607 }, { "epoch": 0.05138784364071008, "grad_norm": 3.087930883011254, "learning_rate": 5.138784364071009e-06, "loss": 0.6727, "step": 11608 }, { "epoch": 0.05139227057417327, "grad_norm": 2.685207007896818, "learning_rate": 5.139227057417327e-06, "loss": 0.7469, "step": 11609 }, { "epoch": 0.05139669750763646, "grad_norm": 2.753744276283196, "learning_rate": 5.139669750763646e-06, "loss": 0.4954, "step": 11610 }, { "epoch": 0.05140112444109965, "grad_norm": 2.7705718185238846, "learning_rate": 5.140112444109966e-06, "loss": 0.94, "step": 11611 }, { "epoch": 0.05140555137456284, "grad_norm": 3.224697360543522, "learning_rate": 5.1405551374562845e-06, "loss": 0.6279, "step": 11612 }, { "epoch": 0.05140997830802603, "grad_norm": 3.0785230653084388, "learning_rate": 5.140997830802603e-06, "loss": 0.7998, "step": 11613 }, { "epoch": 0.05141440524148922, "grad_norm": 2.8459686774119235, "learning_rate": 5.141440524148923e-06, "loss": 0.7632, "step": 11614 }, { "epoch": 0.05141883217495241, "grad_norm": 2.603804076548856, "learning_rate": 5.141883217495242e-06, "loss": 0.4996, "step": 11615 }, { "epoch": 0.0514232591084156, "grad_norm": 3.135341692279784, "learning_rate": 5.14232591084156e-06, "loss": 0.7859, "step": 11616 }, { "epoch": 0.05142768604187879, "grad_norm": 3.8527149266453105, "learning_rate": 5.14276860418788e-06, "loss": 0.8889, "step": 11617 }, { "epoch": 0.05143211297534198, "grad_norm": 3.241509400162642, "learning_rate": 5.143211297534198e-06, "loss": 0.8957, "step": 11618 }, { "epoch": 0.05143653990880517, "grad_norm": 2.9724586411804927, "learning_rate": 5.143653990880518e-06, "loss": 0.8026, "step": 11619 }, { "epoch": 0.051440966842268364, "grad_norm": 2.5792657734558384, "learning_rate": 5.144096684226837e-06, "loss": 0.6314, "step": 11620 }, { "epoch": 0.05144539377573155, "grad_norm": 2.542573240444493, "learning_rate": 5.1445393775731555e-06, "loss": 0.6829, "step": 11621 }, { "epoch": 0.05144982070919474, "grad_norm": 2.5341562138858986, "learning_rate": 5.144982070919474e-06, "loss": 0.9445, "step": 11622 }, { "epoch": 0.05145424764265793, "grad_norm": 2.5217622878329014, "learning_rate": 5.145424764265794e-06, "loss": 0.567, "step": 11623 }, { "epoch": 0.05145867457612112, "grad_norm": 2.97605770283237, "learning_rate": 5.145867457612113e-06, "loss": 0.8742, "step": 11624 }, { "epoch": 0.05146310150958431, "grad_norm": 2.845034816791309, "learning_rate": 5.146310150958431e-06, "loss": 0.5178, "step": 11625 }, { "epoch": 0.0514675284430475, "grad_norm": 3.0620290862103645, "learning_rate": 5.146752844304751e-06, "loss": 1.1577, "step": 11626 }, { "epoch": 0.05147195537651069, "grad_norm": 3.516539993373513, "learning_rate": 5.14719553765107e-06, "loss": 0.9432, "step": 11627 }, { "epoch": 0.05147638230997388, "grad_norm": 3.1203158188514584, "learning_rate": 5.1476382309973885e-06, "loss": 0.9567, "step": 11628 }, { "epoch": 0.05148080924343707, "grad_norm": 2.3670856803693137, "learning_rate": 5.148080924343708e-06, "loss": 0.7439, "step": 11629 }, { "epoch": 0.05148523617690026, "grad_norm": 2.4319870731287563, "learning_rate": 5.1485236176900264e-06, "loss": 0.6011, "step": 11630 }, { "epoch": 0.05148966311036345, "grad_norm": 2.410084864065849, "learning_rate": 5.148966311036345e-06, "loss": 0.5592, "step": 11631 }, { "epoch": 0.05149409004382664, "grad_norm": 2.986954483072847, "learning_rate": 5.149409004382665e-06, "loss": 0.6649, "step": 11632 }, { "epoch": 0.051498516977289834, "grad_norm": 2.634047570359728, "learning_rate": 5.149851697728984e-06, "loss": 0.8525, "step": 11633 }, { "epoch": 0.051502943910753024, "grad_norm": 2.928914874120459, "learning_rate": 5.150294391075302e-06, "loss": 0.9503, "step": 11634 }, { "epoch": 0.051507370844216215, "grad_norm": 3.126344794197182, "learning_rate": 5.1507370844216224e-06, "loss": 0.739, "step": 11635 }, { "epoch": 0.0515117977776794, "grad_norm": 3.4327400499329106, "learning_rate": 5.151179777767941e-06, "loss": 0.8737, "step": 11636 }, { "epoch": 0.05151622471114259, "grad_norm": 2.977838817163769, "learning_rate": 5.1516224711142595e-06, "loss": 0.7716, "step": 11637 }, { "epoch": 0.05152065164460578, "grad_norm": 3.0978247704413677, "learning_rate": 5.152065164460579e-06, "loss": 0.6022, "step": 11638 }, { "epoch": 0.05152507857806897, "grad_norm": 2.4663832441639, "learning_rate": 5.152507857806897e-06, "loss": 0.7828, "step": 11639 }, { "epoch": 0.05152950551153216, "grad_norm": 3.7900467898590127, "learning_rate": 5.152950551153216e-06, "loss": 0.6818, "step": 11640 }, { "epoch": 0.05153393244499535, "grad_norm": 2.875752766513733, "learning_rate": 5.153393244499536e-06, "loss": 0.7912, "step": 11641 }, { "epoch": 0.05153835937845854, "grad_norm": 2.251467846189517, "learning_rate": 5.153835937845855e-06, "loss": 0.5772, "step": 11642 }, { "epoch": 0.05154278631192173, "grad_norm": 2.8373353427325547, "learning_rate": 5.154278631192173e-06, "loss": 0.6396, "step": 11643 }, { "epoch": 0.05154721324538492, "grad_norm": 2.2196697288357905, "learning_rate": 5.154721324538493e-06, "loss": 0.5949, "step": 11644 }, { "epoch": 0.05155164017884811, "grad_norm": 2.6200444982804614, "learning_rate": 5.155164017884812e-06, "loss": 0.7431, "step": 11645 }, { "epoch": 0.0515560671123113, "grad_norm": 3.195547775231757, "learning_rate": 5.1556067112311304e-06, "loss": 0.8782, "step": 11646 }, { "epoch": 0.051560494045774494, "grad_norm": 2.27471583725989, "learning_rate": 5.15604940457745e-06, "loss": 0.5646, "step": 11647 }, { "epoch": 0.051564920979237684, "grad_norm": 3.988219716227388, "learning_rate": 5.156492097923768e-06, "loss": 1.0974, "step": 11648 }, { "epoch": 0.051569347912700875, "grad_norm": 3.8526215202346696, "learning_rate": 5.156934791270088e-06, "loss": 0.9618, "step": 11649 }, { "epoch": 0.051573774846164065, "grad_norm": 3.212002927165973, "learning_rate": 5.157377484616407e-06, "loss": 0.7962, "step": 11650 }, { "epoch": 0.05157820177962725, "grad_norm": 2.7913024372547013, "learning_rate": 5.157820177962726e-06, "loss": 0.5962, "step": 11651 }, { "epoch": 0.05158262871309044, "grad_norm": 2.612664234757639, "learning_rate": 5.158262871309044e-06, "loss": 0.7282, "step": 11652 }, { "epoch": 0.05158705564655363, "grad_norm": 2.633034270926791, "learning_rate": 5.158705564655364e-06, "loss": 0.5654, "step": 11653 }, { "epoch": 0.05159148258001682, "grad_norm": 2.884021468861686, "learning_rate": 5.159148258001683e-06, "loss": 0.9506, "step": 11654 }, { "epoch": 0.05159590951348001, "grad_norm": 2.5284110354332747, "learning_rate": 5.159590951348001e-06, "loss": 0.5367, "step": 11655 }, { "epoch": 0.0516003364469432, "grad_norm": 2.49190194601769, "learning_rate": 5.160033644694321e-06, "loss": 0.5993, "step": 11656 }, { "epoch": 0.05160476338040639, "grad_norm": 2.5496803563807857, "learning_rate": 5.16047633804064e-06, "loss": 0.4513, "step": 11657 }, { "epoch": 0.05160919031386958, "grad_norm": 3.1228720333940796, "learning_rate": 5.160919031386959e-06, "loss": 0.6705, "step": 11658 }, { "epoch": 0.05161361724733277, "grad_norm": 2.542432062130335, "learning_rate": 5.161361724733278e-06, "loss": 0.7501, "step": 11659 }, { "epoch": 0.05161804418079596, "grad_norm": 2.5883637426539305, "learning_rate": 5.1618044180795965e-06, "loss": 1.0305, "step": 11660 }, { "epoch": 0.051622471114259154, "grad_norm": 2.6354733472940355, "learning_rate": 5.162247111425915e-06, "loss": 0.9795, "step": 11661 }, { "epoch": 0.051626898047722344, "grad_norm": 3.495762089481727, "learning_rate": 5.162689804772235e-06, "loss": 1.0389, "step": 11662 }, { "epoch": 0.051631324981185535, "grad_norm": 2.8471095280850367, "learning_rate": 5.163132498118554e-06, "loss": 0.6832, "step": 11663 }, { "epoch": 0.051635751914648725, "grad_norm": 2.8128667625171304, "learning_rate": 5.163575191464872e-06, "loss": 0.6659, "step": 11664 }, { "epoch": 0.051640178848111916, "grad_norm": 2.840307275527949, "learning_rate": 5.1640178848111925e-06, "loss": 0.5893, "step": 11665 }, { "epoch": 0.0516446057815751, "grad_norm": 2.50425110434218, "learning_rate": 5.164460578157511e-06, "loss": 0.9348, "step": 11666 }, { "epoch": 0.05164903271503829, "grad_norm": 2.8038882123948956, "learning_rate": 5.16490327150383e-06, "loss": 0.5353, "step": 11667 }, { "epoch": 0.05165345964850148, "grad_norm": 2.7319231230507017, "learning_rate": 5.165345964850149e-06, "loss": 0.9702, "step": 11668 }, { "epoch": 0.05165788658196467, "grad_norm": 2.675319211897344, "learning_rate": 5.1657886581964675e-06, "loss": 0.7812, "step": 11669 }, { "epoch": 0.05166231351542786, "grad_norm": 2.3956955664304846, "learning_rate": 5.166231351542787e-06, "loss": 0.4637, "step": 11670 }, { "epoch": 0.05166674044889105, "grad_norm": 3.555068067407212, "learning_rate": 5.166674044889106e-06, "loss": 0.6943, "step": 11671 }, { "epoch": 0.05167116738235424, "grad_norm": 3.709991944828996, "learning_rate": 5.167116738235425e-06, "loss": 0.5867, "step": 11672 }, { "epoch": 0.05167559431581743, "grad_norm": 3.1547150884793904, "learning_rate": 5.167559431581743e-06, "loss": 0.5009, "step": 11673 }, { "epoch": 0.051680021249280624, "grad_norm": 2.866322689175752, "learning_rate": 5.1680021249280635e-06, "loss": 0.8343, "step": 11674 }, { "epoch": 0.051684448182743814, "grad_norm": 3.530775148998443, "learning_rate": 5.168444818274382e-06, "loss": 1.0349, "step": 11675 }, { "epoch": 0.051688875116207005, "grad_norm": 2.620815056548534, "learning_rate": 5.1688875116207005e-06, "loss": 0.7163, "step": 11676 }, { "epoch": 0.051693302049670195, "grad_norm": 2.6802178384402495, "learning_rate": 5.16933020496702e-06, "loss": 0.7371, "step": 11677 }, { "epoch": 0.051697728983133386, "grad_norm": 3.9218374045433326, "learning_rate": 5.1697728983133384e-06, "loss": 1.0063, "step": 11678 }, { "epoch": 0.051702155916596576, "grad_norm": 4.448027802111534, "learning_rate": 5.170215591659658e-06, "loss": 1.212, "step": 11679 }, { "epoch": 0.05170658285005977, "grad_norm": 3.2776217991985095, "learning_rate": 5.170658285005977e-06, "loss": 0.9863, "step": 11680 }, { "epoch": 0.05171100978352296, "grad_norm": 2.3780875982470917, "learning_rate": 5.171100978352296e-06, "loss": 0.7863, "step": 11681 }, { "epoch": 0.05171543671698614, "grad_norm": 3.4090975886012322, "learning_rate": 5.171543671698614e-06, "loss": 1.1399, "step": 11682 }, { "epoch": 0.05171986365044933, "grad_norm": 2.603009174855068, "learning_rate": 5.1719863650449344e-06, "loss": 0.6288, "step": 11683 }, { "epoch": 0.05172429058391252, "grad_norm": 2.8978877381678525, "learning_rate": 5.172429058391253e-06, "loss": 0.7883, "step": 11684 }, { "epoch": 0.05172871751737571, "grad_norm": 2.879776076539564, "learning_rate": 5.1728717517375715e-06, "loss": 0.815, "step": 11685 }, { "epoch": 0.0517331444508389, "grad_norm": 2.84104929178817, "learning_rate": 5.173314445083891e-06, "loss": 0.9506, "step": 11686 }, { "epoch": 0.05173757138430209, "grad_norm": 2.593082617634159, "learning_rate": 5.17375713843021e-06, "loss": 0.6813, "step": 11687 }, { "epoch": 0.051741998317765284, "grad_norm": 3.5962768423393388, "learning_rate": 5.174199831776529e-06, "loss": 1.0149, "step": 11688 }, { "epoch": 0.051746425251228474, "grad_norm": 2.6771932048338583, "learning_rate": 5.174642525122848e-06, "loss": 0.9189, "step": 11689 }, { "epoch": 0.051750852184691665, "grad_norm": 2.6543050083407405, "learning_rate": 5.175085218469167e-06, "loss": 0.6575, "step": 11690 }, { "epoch": 0.051755279118154855, "grad_norm": 2.581023296659134, "learning_rate": 5.175527911815485e-06, "loss": 0.5326, "step": 11691 }, { "epoch": 0.051759706051618046, "grad_norm": 2.665855990952967, "learning_rate": 5.175970605161805e-06, "loss": 0.8925, "step": 11692 }, { "epoch": 0.051764132985081236, "grad_norm": 3.198816934926563, "learning_rate": 5.176413298508124e-06, "loss": 1.0664, "step": 11693 }, { "epoch": 0.05176855991854443, "grad_norm": 3.5204508011831708, "learning_rate": 5.1768559918544424e-06, "loss": 1.395, "step": 11694 }, { "epoch": 0.05177298685200762, "grad_norm": 3.394897315414233, "learning_rate": 5.177298685200763e-06, "loss": 1.1719, "step": 11695 }, { "epoch": 0.05177741378547081, "grad_norm": 3.434496667144594, "learning_rate": 5.177741378547081e-06, "loss": 1.0243, "step": 11696 }, { "epoch": 0.05178184071893399, "grad_norm": 2.8138029344277964, "learning_rate": 5.1781840718934e-06, "loss": 0.9323, "step": 11697 }, { "epoch": 0.05178626765239718, "grad_norm": 3.2398355122469873, "learning_rate": 5.178626765239719e-06, "loss": 0.7164, "step": 11698 }, { "epoch": 0.05179069458586037, "grad_norm": 3.2671058616840654, "learning_rate": 5.179069458586038e-06, "loss": 1.0115, "step": 11699 }, { "epoch": 0.05179512151932356, "grad_norm": 2.5490061245776587, "learning_rate": 5.179512151932357e-06, "loss": 0.7037, "step": 11700 }, { "epoch": 0.05179954845278675, "grad_norm": 2.421011990913213, "learning_rate": 5.179954845278676e-06, "loss": 0.6908, "step": 11701 }, { "epoch": 0.051803975386249944, "grad_norm": 2.2951817463393294, "learning_rate": 5.180397538624995e-06, "loss": 0.5366, "step": 11702 }, { "epoch": 0.051808402319713134, "grad_norm": 2.5324482667702135, "learning_rate": 5.180840231971313e-06, "loss": 0.6076, "step": 11703 }, { "epoch": 0.051812829253176325, "grad_norm": 3.2135135509693717, "learning_rate": 5.181282925317634e-06, "loss": 0.9692, "step": 11704 }, { "epoch": 0.051817256186639515, "grad_norm": 2.933052161806312, "learning_rate": 5.181725618663952e-06, "loss": 0.484, "step": 11705 }, { "epoch": 0.051821683120102706, "grad_norm": 2.53533138339609, "learning_rate": 5.182168312010271e-06, "loss": 0.7508, "step": 11706 }, { "epoch": 0.051826110053565896, "grad_norm": 2.614976835802405, "learning_rate": 5.18261100535659e-06, "loss": 0.4864, "step": 11707 }, { "epoch": 0.05183053698702909, "grad_norm": 2.53408704576542, "learning_rate": 5.183053698702909e-06, "loss": 0.5324, "step": 11708 }, { "epoch": 0.05183496392049228, "grad_norm": 3.070185585012273, "learning_rate": 5.183496392049228e-06, "loss": 0.8353, "step": 11709 }, { "epoch": 0.05183939085395547, "grad_norm": 2.7324919552166147, "learning_rate": 5.183939085395547e-06, "loss": 0.624, "step": 11710 }, { "epoch": 0.05184381778741866, "grad_norm": 2.891974442649948, "learning_rate": 5.184381778741866e-06, "loss": 0.8388, "step": 11711 }, { "epoch": 0.05184824472088184, "grad_norm": 2.4332147304019336, "learning_rate": 5.184824472088184e-06, "loss": 0.7028, "step": 11712 }, { "epoch": 0.05185267165434503, "grad_norm": 2.339990750026931, "learning_rate": 5.1852671654345045e-06, "loss": 0.7006, "step": 11713 }, { "epoch": 0.05185709858780822, "grad_norm": 2.3724573828467816, "learning_rate": 5.185709858780823e-06, "loss": 0.706, "step": 11714 }, { "epoch": 0.051861525521271414, "grad_norm": 2.363977436907463, "learning_rate": 5.186152552127142e-06, "loss": 0.7672, "step": 11715 }, { "epoch": 0.051865952454734604, "grad_norm": 3.1387472057526424, "learning_rate": 5.186595245473461e-06, "loss": 0.7105, "step": 11716 }, { "epoch": 0.051870379388197795, "grad_norm": 3.2877004883080736, "learning_rate": 5.18703793881978e-06, "loss": 0.8625, "step": 11717 }, { "epoch": 0.051874806321660985, "grad_norm": 3.5339839489788267, "learning_rate": 5.187480632166099e-06, "loss": 1.1596, "step": 11718 }, { "epoch": 0.051879233255124176, "grad_norm": 2.6280559067230316, "learning_rate": 5.187923325512418e-06, "loss": 0.6405, "step": 11719 }, { "epoch": 0.051883660188587366, "grad_norm": 2.4226532834472256, "learning_rate": 5.188366018858737e-06, "loss": 0.6236, "step": 11720 }, { "epoch": 0.05188808712205056, "grad_norm": 2.8508723509939804, "learning_rate": 5.188808712205055e-06, "loss": 0.8275, "step": 11721 }, { "epoch": 0.05189251405551375, "grad_norm": 3.5074267930527077, "learning_rate": 5.1892514055513755e-06, "loss": 1.3151, "step": 11722 }, { "epoch": 0.05189694098897694, "grad_norm": 2.3980908085600867, "learning_rate": 5.189694098897694e-06, "loss": 0.8201, "step": 11723 }, { "epoch": 0.05190136792244013, "grad_norm": 2.843326396796174, "learning_rate": 5.1901367922440125e-06, "loss": 0.9729, "step": 11724 }, { "epoch": 0.05190579485590332, "grad_norm": 2.6708835314941837, "learning_rate": 5.190579485590333e-06, "loss": 1.0113, "step": 11725 }, { "epoch": 0.05191022178936651, "grad_norm": 2.8318623152898814, "learning_rate": 5.191022178936651e-06, "loss": 0.4492, "step": 11726 }, { "epoch": 0.05191464872282969, "grad_norm": 3.1670793334104435, "learning_rate": 5.19146487228297e-06, "loss": 0.7651, "step": 11727 }, { "epoch": 0.05191907565629288, "grad_norm": 2.571068149557372, "learning_rate": 5.191907565629289e-06, "loss": 0.8563, "step": 11728 }, { "epoch": 0.051923502589756074, "grad_norm": 2.5938668857042315, "learning_rate": 5.192350258975608e-06, "loss": 0.4922, "step": 11729 }, { "epoch": 0.051927929523219264, "grad_norm": 2.226909290010762, "learning_rate": 5.192792952321927e-06, "loss": 0.6018, "step": 11730 }, { "epoch": 0.051932356456682455, "grad_norm": 3.059998899959647, "learning_rate": 5.1932356456682464e-06, "loss": 0.87, "step": 11731 }, { "epoch": 0.051936783390145645, "grad_norm": 2.3254556599198914, "learning_rate": 5.193678339014565e-06, "loss": 0.6323, "step": 11732 }, { "epoch": 0.051941210323608836, "grad_norm": 3.030334663112407, "learning_rate": 5.1941210323608835e-06, "loss": 0.622, "step": 11733 }, { "epoch": 0.051945637257072026, "grad_norm": 2.5983784656509568, "learning_rate": 5.194563725707204e-06, "loss": 0.6123, "step": 11734 }, { "epoch": 0.05195006419053522, "grad_norm": 2.650110813299678, "learning_rate": 5.195006419053522e-06, "loss": 0.7947, "step": 11735 }, { "epoch": 0.05195449112399841, "grad_norm": 2.7808592303373, "learning_rate": 5.195449112399841e-06, "loss": 0.8371, "step": 11736 }, { "epoch": 0.0519589180574616, "grad_norm": 2.6979814643238806, "learning_rate": 5.19589180574616e-06, "loss": 0.8022, "step": 11737 }, { "epoch": 0.05196334499092479, "grad_norm": 2.8210475861474875, "learning_rate": 5.1963344990924795e-06, "loss": 0.3882, "step": 11738 }, { "epoch": 0.05196777192438798, "grad_norm": 2.9550377758845134, "learning_rate": 5.196777192438798e-06, "loss": 1.1648, "step": 11739 }, { "epoch": 0.05197219885785117, "grad_norm": 2.768905674848694, "learning_rate": 5.197219885785117e-06, "loss": 0.7616, "step": 11740 }, { "epoch": 0.05197662579131436, "grad_norm": 3.5729906870998898, "learning_rate": 5.197662579131436e-06, "loss": 1.1373, "step": 11741 }, { "epoch": 0.05198105272477754, "grad_norm": 2.3085246593485103, "learning_rate": 5.1981052724777544e-06, "loss": 0.6504, "step": 11742 }, { "epoch": 0.051985479658240734, "grad_norm": 4.5813147380092305, "learning_rate": 5.198547965824075e-06, "loss": 1.2144, "step": 11743 }, { "epoch": 0.051989906591703924, "grad_norm": 2.259747196050949, "learning_rate": 5.198990659170393e-06, "loss": 0.6746, "step": 11744 }, { "epoch": 0.051994333525167115, "grad_norm": 2.442351705784799, "learning_rate": 5.199433352516712e-06, "loss": 0.592, "step": 11745 }, { "epoch": 0.051998760458630305, "grad_norm": 3.370920267754721, "learning_rate": 5.199876045863032e-06, "loss": 0.7861, "step": 11746 }, { "epoch": 0.052003187392093496, "grad_norm": 2.9630744813724945, "learning_rate": 5.2003187392093504e-06, "loss": 0.7178, "step": 11747 }, { "epoch": 0.052007614325556686, "grad_norm": 3.0424513087909943, "learning_rate": 5.200761432555669e-06, "loss": 0.7051, "step": 11748 }, { "epoch": 0.05201204125901988, "grad_norm": 2.842954682131802, "learning_rate": 5.201204125901988e-06, "loss": 0.7967, "step": 11749 }, { "epoch": 0.05201646819248307, "grad_norm": 2.4097073365636423, "learning_rate": 5.201646819248307e-06, "loss": 0.5976, "step": 11750 }, { "epoch": 0.05202089512594626, "grad_norm": 3.0710647939360336, "learning_rate": 5.202089512594625e-06, "loss": 0.7759, "step": 11751 }, { "epoch": 0.05202532205940945, "grad_norm": 3.225891815108576, "learning_rate": 5.202532205940946e-06, "loss": 0.7011, "step": 11752 }, { "epoch": 0.05202974899287264, "grad_norm": 2.791483732009247, "learning_rate": 5.202974899287264e-06, "loss": 0.7539, "step": 11753 }, { "epoch": 0.05203417592633583, "grad_norm": 2.5361501006924603, "learning_rate": 5.203417592633583e-06, "loss": 0.8122, "step": 11754 }, { "epoch": 0.05203860285979902, "grad_norm": 3.3027161584700666, "learning_rate": 5.203860285979903e-06, "loss": 0.9031, "step": 11755 }, { "epoch": 0.05204302979326221, "grad_norm": 2.7549702889503735, "learning_rate": 5.204302979326221e-06, "loss": 0.8478, "step": 11756 }, { "epoch": 0.052047456726725394, "grad_norm": 3.3280565622556075, "learning_rate": 5.20474567267254e-06, "loss": 0.793, "step": 11757 }, { "epoch": 0.052051883660188585, "grad_norm": 2.612287295234874, "learning_rate": 5.205188366018859e-06, "loss": 0.864, "step": 11758 }, { "epoch": 0.052056310593651775, "grad_norm": 2.9549237813302107, "learning_rate": 5.205631059365178e-06, "loss": 0.8803, "step": 11759 }, { "epoch": 0.052060737527114966, "grad_norm": 2.48342146659049, "learning_rate": 5.206073752711497e-06, "loss": 0.6289, "step": 11760 }, { "epoch": 0.052065164460578156, "grad_norm": 3.340504755564278, "learning_rate": 5.2065164460578165e-06, "loss": 0.9868, "step": 11761 }, { "epoch": 0.05206959139404135, "grad_norm": 3.673263874845966, "learning_rate": 5.206959139404135e-06, "loss": 1.1219, "step": 11762 }, { "epoch": 0.05207401832750454, "grad_norm": 3.0068305573891436, "learning_rate": 5.207401832750454e-06, "loss": 0.5147, "step": 11763 }, { "epoch": 0.05207844526096773, "grad_norm": 2.5375053147047657, "learning_rate": 5.207844526096774e-06, "loss": 0.6658, "step": 11764 }, { "epoch": 0.05208287219443092, "grad_norm": 3.8182698682672105, "learning_rate": 5.208287219443092e-06, "loss": 0.9299, "step": 11765 }, { "epoch": 0.05208729912789411, "grad_norm": 2.4724446208078446, "learning_rate": 5.208729912789411e-06, "loss": 0.6203, "step": 11766 }, { "epoch": 0.0520917260613573, "grad_norm": 2.5748652704312245, "learning_rate": 5.20917260613573e-06, "loss": 0.4535, "step": 11767 }, { "epoch": 0.05209615299482049, "grad_norm": 2.418934155046871, "learning_rate": 5.20961529948205e-06, "loss": 0.4933, "step": 11768 }, { "epoch": 0.05210057992828368, "grad_norm": 3.254238942363299, "learning_rate": 5.210057992828368e-06, "loss": 0.7161, "step": 11769 }, { "epoch": 0.05210500686174687, "grad_norm": 2.7076358815659014, "learning_rate": 5.2105006861746875e-06, "loss": 0.7499, "step": 11770 }, { "epoch": 0.05210943379521006, "grad_norm": 2.6167515493815716, "learning_rate": 5.210943379521006e-06, "loss": 0.7861, "step": 11771 }, { "epoch": 0.052113860728673245, "grad_norm": 2.8054440339314484, "learning_rate": 5.2113860728673245e-06, "loss": 0.6258, "step": 11772 }, { "epoch": 0.052118287662136435, "grad_norm": 2.385139928224018, "learning_rate": 5.211828766213645e-06, "loss": 0.6942, "step": 11773 }, { "epoch": 0.052122714595599626, "grad_norm": 2.7488231734847424, "learning_rate": 5.212271459559963e-06, "loss": 0.6318, "step": 11774 }, { "epoch": 0.052127141529062816, "grad_norm": 3.4852936495861546, "learning_rate": 5.212714152906282e-06, "loss": 0.9288, "step": 11775 }, { "epoch": 0.05213156846252601, "grad_norm": 3.861817683668743, "learning_rate": 5.213156846252602e-06, "loss": 1.1317, "step": 11776 }, { "epoch": 0.0521359953959892, "grad_norm": 2.5364687186794903, "learning_rate": 5.2135995395989205e-06, "loss": 0.6788, "step": 11777 }, { "epoch": 0.05214042232945239, "grad_norm": 2.860218704567318, "learning_rate": 5.214042232945239e-06, "loss": 0.6942, "step": 11778 }, { "epoch": 0.05214484926291558, "grad_norm": 2.4695897225538825, "learning_rate": 5.2144849262915584e-06, "loss": 0.6502, "step": 11779 }, { "epoch": 0.05214927619637877, "grad_norm": 2.3287653182436894, "learning_rate": 5.214927619637877e-06, "loss": 0.6903, "step": 11780 }, { "epoch": 0.05215370312984196, "grad_norm": 2.803090239918128, "learning_rate": 5.215370312984196e-06, "loss": 0.7323, "step": 11781 }, { "epoch": 0.05215813006330515, "grad_norm": 2.369429232598381, "learning_rate": 5.215813006330516e-06, "loss": 0.6966, "step": 11782 }, { "epoch": 0.05216255699676834, "grad_norm": 2.928986719819407, "learning_rate": 5.216255699676834e-06, "loss": 0.905, "step": 11783 }, { "epoch": 0.05216698393023153, "grad_norm": 2.983462323102964, "learning_rate": 5.216698393023153e-06, "loss": 0.8553, "step": 11784 }, { "epoch": 0.05217141086369472, "grad_norm": 3.281722186346889, "learning_rate": 5.217141086369473e-06, "loss": 0.9389, "step": 11785 }, { "epoch": 0.05217583779715791, "grad_norm": 2.589231284094048, "learning_rate": 5.2175837797157915e-06, "loss": 0.5497, "step": 11786 }, { "epoch": 0.052180264730621095, "grad_norm": 2.7062936983910033, "learning_rate": 5.21802647306211e-06, "loss": 0.6942, "step": 11787 }, { "epoch": 0.052184691664084286, "grad_norm": 3.001245741334648, "learning_rate": 5.218469166408429e-06, "loss": 1.0433, "step": 11788 }, { "epoch": 0.052189118597547476, "grad_norm": 2.5875705734285113, "learning_rate": 5.218911859754749e-06, "loss": 0.8586, "step": 11789 }, { "epoch": 0.05219354553101067, "grad_norm": 2.340120954536691, "learning_rate": 5.219354553101067e-06, "loss": 0.6365, "step": 11790 }, { "epoch": 0.05219797246447386, "grad_norm": 2.9868056175948365, "learning_rate": 5.219797246447387e-06, "loss": 0.9379, "step": 11791 }, { "epoch": 0.05220239939793705, "grad_norm": 2.652668056558666, "learning_rate": 5.220239939793705e-06, "loss": 0.7578, "step": 11792 }, { "epoch": 0.05220682633140024, "grad_norm": 2.5159695594085925, "learning_rate": 5.220682633140024e-06, "loss": 0.6203, "step": 11793 }, { "epoch": 0.05221125326486343, "grad_norm": 3.3444792251910114, "learning_rate": 5.221125326486344e-06, "loss": 0.8373, "step": 11794 }, { "epoch": 0.05221568019832662, "grad_norm": 2.243232681893831, "learning_rate": 5.2215680198326624e-06, "loss": 0.6515, "step": 11795 }, { "epoch": 0.05222010713178981, "grad_norm": 2.9019906071578596, "learning_rate": 5.222010713178981e-06, "loss": 0.6782, "step": 11796 }, { "epoch": 0.052224534065253, "grad_norm": 3.367536553380884, "learning_rate": 5.2224534065253e-06, "loss": 1.1054, "step": 11797 }, { "epoch": 0.05222896099871619, "grad_norm": 3.559962564583817, "learning_rate": 5.22289609987162e-06, "loss": 1.0211, "step": 11798 }, { "epoch": 0.05223338793217938, "grad_norm": 3.1670112704187017, "learning_rate": 5.223338793217938e-06, "loss": 1.1436, "step": 11799 }, { "epoch": 0.05223781486564257, "grad_norm": 2.5985237635920786, "learning_rate": 5.223781486564258e-06, "loss": 0.864, "step": 11800 }, { "epoch": 0.05224224179910576, "grad_norm": 2.5471327565954476, "learning_rate": 5.224224179910576e-06, "loss": 0.7593, "step": 11801 }, { "epoch": 0.052246668732568946, "grad_norm": 3.0510334438180737, "learning_rate": 5.224666873256895e-06, "loss": 0.8588, "step": 11802 }, { "epoch": 0.05225109566603214, "grad_norm": 2.9441035260196777, "learning_rate": 5.225109566603215e-06, "loss": 0.8098, "step": 11803 }, { "epoch": 0.05225552259949533, "grad_norm": 3.085408493080398, "learning_rate": 5.225552259949533e-06, "loss": 1.0531, "step": 11804 }, { "epoch": 0.05225994953295852, "grad_norm": 3.325102126465471, "learning_rate": 5.225994953295852e-06, "loss": 0.9712, "step": 11805 }, { "epoch": 0.05226437646642171, "grad_norm": 2.505027181429183, "learning_rate": 5.226437646642172e-06, "loss": 0.7307, "step": 11806 }, { "epoch": 0.0522688033998849, "grad_norm": 2.6682114568580486, "learning_rate": 5.226880339988491e-06, "loss": 0.8102, "step": 11807 }, { "epoch": 0.05227323033334809, "grad_norm": 3.0304474423577323, "learning_rate": 5.227323033334809e-06, "loss": 0.7094, "step": 11808 }, { "epoch": 0.05227765726681128, "grad_norm": 2.4794527130337856, "learning_rate": 5.2277657266811285e-06, "loss": 0.6697, "step": 11809 }, { "epoch": 0.05228208420027447, "grad_norm": 2.743166167291714, "learning_rate": 5.228208420027447e-06, "loss": 0.7579, "step": 11810 }, { "epoch": 0.05228651113373766, "grad_norm": 2.870056539664101, "learning_rate": 5.2286511133737664e-06, "loss": 0.7472, "step": 11811 }, { "epoch": 0.05229093806720085, "grad_norm": 2.460809208372, "learning_rate": 5.229093806720086e-06, "loss": 0.5652, "step": 11812 }, { "epoch": 0.05229536500066404, "grad_norm": 2.3600380531392697, "learning_rate": 5.229536500066404e-06, "loss": 0.771, "step": 11813 }, { "epoch": 0.05229979193412723, "grad_norm": 2.5408633575129302, "learning_rate": 5.229979193412723e-06, "loss": 0.6926, "step": 11814 }, { "epoch": 0.05230421886759042, "grad_norm": 2.7593680689530165, "learning_rate": 5.230421886759043e-06, "loss": 0.8783, "step": 11815 }, { "epoch": 0.05230864580105361, "grad_norm": 2.937800542756098, "learning_rate": 5.230864580105362e-06, "loss": 0.7775, "step": 11816 }, { "epoch": 0.052313072734516804, "grad_norm": 3.207113512937954, "learning_rate": 5.23130727345168e-06, "loss": 0.6115, "step": 11817 }, { "epoch": 0.05231749966797999, "grad_norm": 3.1112569903113716, "learning_rate": 5.2317499667979995e-06, "loss": 0.8059, "step": 11818 }, { "epoch": 0.05232192660144318, "grad_norm": 4.257232979775352, "learning_rate": 5.232192660144319e-06, "loss": 0.9265, "step": 11819 }, { "epoch": 0.05232635353490637, "grad_norm": 3.8358868592733844, "learning_rate": 5.232635353490637e-06, "loss": 0.8945, "step": 11820 }, { "epoch": 0.05233078046836956, "grad_norm": 2.6657170752532346, "learning_rate": 5.233078046836957e-06, "loss": 0.8183, "step": 11821 }, { "epoch": 0.05233520740183275, "grad_norm": 2.732719572800811, "learning_rate": 5.233520740183275e-06, "loss": 0.7896, "step": 11822 }, { "epoch": 0.05233963433529594, "grad_norm": 2.8127362505160116, "learning_rate": 5.233963433529594e-06, "loss": 0.8292, "step": 11823 }, { "epoch": 0.05234406126875913, "grad_norm": 2.9147232972610326, "learning_rate": 5.234406126875914e-06, "loss": 0.6766, "step": 11824 }, { "epoch": 0.05234848820222232, "grad_norm": 2.594683148359508, "learning_rate": 5.2348488202222325e-06, "loss": 0.4105, "step": 11825 }, { "epoch": 0.05235291513568551, "grad_norm": 2.885416686852856, "learning_rate": 5.235291513568551e-06, "loss": 0.8742, "step": 11826 }, { "epoch": 0.0523573420691487, "grad_norm": 2.420176216117343, "learning_rate": 5.235734206914871e-06, "loss": 0.6641, "step": 11827 }, { "epoch": 0.05236176900261189, "grad_norm": 2.561633743024815, "learning_rate": 5.23617690026119e-06, "loss": 0.5114, "step": 11828 }, { "epoch": 0.05236619593607508, "grad_norm": 3.8484211354988687, "learning_rate": 5.236619593607508e-06, "loss": 1.1681, "step": 11829 }, { "epoch": 0.05237062286953827, "grad_norm": 2.7298690202788505, "learning_rate": 5.237062286953828e-06, "loss": 0.9321, "step": 11830 }, { "epoch": 0.052375049803001464, "grad_norm": 2.566638706761149, "learning_rate": 5.237504980300146e-06, "loss": 0.6411, "step": 11831 }, { "epoch": 0.052379476736464654, "grad_norm": 2.4817660834102364, "learning_rate": 5.237947673646465e-06, "loss": 0.8325, "step": 11832 }, { "epoch": 0.05238390366992784, "grad_norm": 3.16662267353461, "learning_rate": 5.238390366992785e-06, "loss": 1.0432, "step": 11833 }, { "epoch": 0.05238833060339103, "grad_norm": 3.1111926160595624, "learning_rate": 5.2388330603391035e-06, "loss": 0.9787, "step": 11834 }, { "epoch": 0.05239275753685422, "grad_norm": 2.6202484442826197, "learning_rate": 5.239275753685422e-06, "loss": 0.7512, "step": 11835 }, { "epoch": 0.05239718447031741, "grad_norm": 2.6358195888202895, "learning_rate": 5.239718447031742e-06, "loss": 0.8037, "step": 11836 }, { "epoch": 0.0524016114037806, "grad_norm": 3.3635724803331115, "learning_rate": 5.240161140378061e-06, "loss": 1.0378, "step": 11837 }, { "epoch": 0.05240603833724379, "grad_norm": 2.785952505521263, "learning_rate": 5.240603833724379e-06, "loss": 1.004, "step": 11838 }, { "epoch": 0.05241046527070698, "grad_norm": 3.5185271037776653, "learning_rate": 5.241046527070699e-06, "loss": 0.8564, "step": 11839 }, { "epoch": 0.05241489220417017, "grad_norm": 3.293888499871768, "learning_rate": 5.241489220417017e-06, "loss": 0.6373, "step": 11840 }, { "epoch": 0.05241931913763336, "grad_norm": 2.7342248023519624, "learning_rate": 5.2419319137633365e-06, "loss": 0.8375, "step": 11841 }, { "epoch": 0.05242374607109655, "grad_norm": 2.8104240524649717, "learning_rate": 5.242374607109656e-06, "loss": 0.4489, "step": 11842 }, { "epoch": 0.05242817300455974, "grad_norm": 2.5712532939810666, "learning_rate": 5.2428173004559744e-06, "loss": 0.5769, "step": 11843 }, { "epoch": 0.052432599938022934, "grad_norm": 4.349532144671061, "learning_rate": 5.243259993802293e-06, "loss": 0.9335, "step": 11844 }, { "epoch": 0.052437026871486124, "grad_norm": 2.3442385665960406, "learning_rate": 5.243702687148613e-06, "loss": 0.6099, "step": 11845 }, { "epoch": 0.052441453804949315, "grad_norm": 2.6268492860035804, "learning_rate": 5.244145380494932e-06, "loss": 0.5034, "step": 11846 }, { "epoch": 0.052445880738412505, "grad_norm": 2.3006460734694087, "learning_rate": 5.24458807384125e-06, "loss": 0.804, "step": 11847 }, { "epoch": 0.05245030767187569, "grad_norm": 2.3472159610740366, "learning_rate": 5.24503076718757e-06, "loss": 0.7948, "step": 11848 }, { "epoch": 0.05245473460533888, "grad_norm": 2.2689635472247907, "learning_rate": 5.245473460533889e-06, "loss": 0.6421, "step": 11849 }, { "epoch": 0.05245916153880207, "grad_norm": 4.246559039864255, "learning_rate": 5.2459161538802075e-06, "loss": 1.0258, "step": 11850 }, { "epoch": 0.05246358847226526, "grad_norm": 2.379133548942415, "learning_rate": 5.246358847226527e-06, "loss": 0.6956, "step": 11851 }, { "epoch": 0.05246801540572845, "grad_norm": 2.2091496064126734, "learning_rate": 5.246801540572845e-06, "loss": 0.4108, "step": 11852 }, { "epoch": 0.05247244233919164, "grad_norm": 3.3672475237958635, "learning_rate": 5.247244233919164e-06, "loss": 0.8827, "step": 11853 }, { "epoch": 0.05247686927265483, "grad_norm": 2.74428583608354, "learning_rate": 5.247686927265484e-06, "loss": 0.735, "step": 11854 }, { "epoch": 0.05248129620611802, "grad_norm": 2.578393467361586, "learning_rate": 5.248129620611803e-06, "loss": 0.5218, "step": 11855 }, { "epoch": 0.05248572313958121, "grad_norm": 2.945332489449144, "learning_rate": 5.248572313958121e-06, "loss": 1.0501, "step": 11856 }, { "epoch": 0.0524901500730444, "grad_norm": 2.423919159421531, "learning_rate": 5.249015007304441e-06, "loss": 0.645, "step": 11857 }, { "epoch": 0.052494577006507594, "grad_norm": 2.6360938697193936, "learning_rate": 5.24945770065076e-06, "loss": 0.7659, "step": 11858 }, { "epoch": 0.052499003939970784, "grad_norm": 3.537367594602461, "learning_rate": 5.2499003939970784e-06, "loss": 0.8027, "step": 11859 }, { "epoch": 0.052503430873433975, "grad_norm": 2.5267467169621716, "learning_rate": 5.250343087343398e-06, "loss": 0.588, "step": 11860 }, { "epoch": 0.052507857806897165, "grad_norm": 2.4422490527567353, "learning_rate": 5.250785780689716e-06, "loss": 0.6103, "step": 11861 }, { "epoch": 0.052512284740360356, "grad_norm": 3.0647231076582306, "learning_rate": 5.251228474036036e-06, "loss": 0.938, "step": 11862 }, { "epoch": 0.05251671167382354, "grad_norm": 3.1230058089159436, "learning_rate": 5.251671167382355e-06, "loss": 0.9503, "step": 11863 }, { "epoch": 0.05252113860728673, "grad_norm": 3.184060259713615, "learning_rate": 5.252113860728674e-06, "loss": 0.486, "step": 11864 }, { "epoch": 0.05252556554074992, "grad_norm": 2.4574274690468867, "learning_rate": 5.252556554074992e-06, "loss": 0.6805, "step": 11865 }, { "epoch": 0.05252999247421311, "grad_norm": 3.6547570897626174, "learning_rate": 5.252999247421312e-06, "loss": 0.8275, "step": 11866 }, { "epoch": 0.0525344194076763, "grad_norm": 2.904590761573751, "learning_rate": 5.253441940767631e-06, "loss": 0.582, "step": 11867 }, { "epoch": 0.05253884634113949, "grad_norm": 3.2110389925622043, "learning_rate": 5.253884634113949e-06, "loss": 0.9442, "step": 11868 }, { "epoch": 0.05254327327460268, "grad_norm": 3.525886736682557, "learning_rate": 5.254327327460269e-06, "loss": 0.964, "step": 11869 }, { "epoch": 0.05254770020806587, "grad_norm": 2.562596561198923, "learning_rate": 5.254770020806588e-06, "loss": 0.7896, "step": 11870 }, { "epoch": 0.05255212714152906, "grad_norm": 3.628684429447893, "learning_rate": 5.255212714152907e-06, "loss": 0.7614, "step": 11871 }, { "epoch": 0.052556554074992254, "grad_norm": 2.6838590652459975, "learning_rate": 5.255655407499226e-06, "loss": 0.624, "step": 11872 }, { "epoch": 0.052560981008455444, "grad_norm": 2.5446730879571096, "learning_rate": 5.2560981008455446e-06, "loss": 0.6943, "step": 11873 }, { "epoch": 0.052565407941918635, "grad_norm": 2.563296222040742, "learning_rate": 5.256540794191863e-06, "loss": 0.7332, "step": 11874 }, { "epoch": 0.052569834875381825, "grad_norm": 3.1075478921533324, "learning_rate": 5.256983487538183e-06, "loss": 0.6696, "step": 11875 }, { "epoch": 0.052574261808845016, "grad_norm": 3.1358505115998274, "learning_rate": 5.257426180884502e-06, "loss": 0.9995, "step": 11876 }, { "epoch": 0.052578688742308206, "grad_norm": 2.6041117860035166, "learning_rate": 5.25786887423082e-06, "loss": 0.6484, "step": 11877 }, { "epoch": 0.05258311567577139, "grad_norm": 3.23994574877528, "learning_rate": 5.25831156757714e-06, "loss": 0.8024, "step": 11878 }, { "epoch": 0.05258754260923458, "grad_norm": 2.7980561604091614, "learning_rate": 5.258754260923459e-06, "loss": 0.7148, "step": 11879 }, { "epoch": 0.05259196954269777, "grad_norm": 2.590811059156728, "learning_rate": 5.259196954269778e-06, "loss": 0.6931, "step": 11880 }, { "epoch": 0.05259639647616096, "grad_norm": 3.5912801537509287, "learning_rate": 5.259639647616097e-06, "loss": 0.9077, "step": 11881 }, { "epoch": 0.05260082340962415, "grad_norm": 2.314162359972003, "learning_rate": 5.2600823409624155e-06, "loss": 0.4163, "step": 11882 }, { "epoch": 0.05260525034308734, "grad_norm": 2.988802847117656, "learning_rate": 5.260525034308734e-06, "loss": 1.0587, "step": 11883 }, { "epoch": 0.05260967727655053, "grad_norm": 3.7095429950744006, "learning_rate": 5.260967727655054e-06, "loss": 1.0955, "step": 11884 }, { "epoch": 0.052614104210013724, "grad_norm": 3.5034922724599067, "learning_rate": 5.261410421001373e-06, "loss": 1.3922, "step": 11885 }, { "epoch": 0.052618531143476914, "grad_norm": 3.3764636303016204, "learning_rate": 5.261853114347691e-06, "loss": 0.8488, "step": 11886 }, { "epoch": 0.052622958076940105, "grad_norm": 3.4293066658845803, "learning_rate": 5.2622958076940115e-06, "loss": 0.7335, "step": 11887 }, { "epoch": 0.052627385010403295, "grad_norm": 4.348455511897072, "learning_rate": 5.26273850104033e-06, "loss": 0.7188, "step": 11888 }, { "epoch": 0.052631811943866486, "grad_norm": 5.124565147900972, "learning_rate": 5.2631811943866486e-06, "loss": 0.6125, "step": 11889 }, { "epoch": 0.052636238877329676, "grad_norm": 3.4524002911870406, "learning_rate": 5.263623887732968e-06, "loss": 0.9455, "step": 11890 }, { "epoch": 0.05264066581079287, "grad_norm": 2.63251910857836, "learning_rate": 5.2640665810792864e-06, "loss": 0.6866, "step": 11891 }, { "epoch": 0.05264509274425606, "grad_norm": 2.7462000759582934, "learning_rate": 5.264509274425606e-06, "loss": 1.0843, "step": 11892 }, { "epoch": 0.05264951967771924, "grad_norm": 2.9884592673638224, "learning_rate": 5.264951967771925e-06, "loss": 0.9492, "step": 11893 }, { "epoch": 0.05265394661118243, "grad_norm": 3.3610884437380664, "learning_rate": 5.265394661118244e-06, "loss": 0.8675, "step": 11894 }, { "epoch": 0.05265837354464562, "grad_norm": 2.035486867804516, "learning_rate": 5.265837354464562e-06, "loss": 0.365, "step": 11895 }, { "epoch": 0.05266280047810881, "grad_norm": 2.3598405030775806, "learning_rate": 5.2662800478108825e-06, "loss": 0.5844, "step": 11896 }, { "epoch": 0.052667227411572, "grad_norm": 2.9903843640862253, "learning_rate": 5.266722741157201e-06, "loss": 1.1397, "step": 11897 }, { "epoch": 0.05267165434503519, "grad_norm": 3.715736537221041, "learning_rate": 5.2671654345035195e-06, "loss": 1.3357, "step": 11898 }, { "epoch": 0.052676081278498384, "grad_norm": 2.9598064798650365, "learning_rate": 5.267608127849839e-06, "loss": 0.9405, "step": 11899 }, { "epoch": 0.052680508211961574, "grad_norm": 3.4259345045374565, "learning_rate": 5.268050821196158e-06, "loss": 0.8176, "step": 11900 }, { "epoch": 0.052684935145424765, "grad_norm": 3.7598223602298346, "learning_rate": 5.268493514542477e-06, "loss": 0.9388, "step": 11901 }, { "epoch": 0.052689362078887955, "grad_norm": 3.2786914361096704, "learning_rate": 5.268936207888796e-06, "loss": 1.3325, "step": 11902 }, { "epoch": 0.052693789012351146, "grad_norm": 2.709198311496357, "learning_rate": 5.269378901235115e-06, "loss": 0.8181, "step": 11903 }, { "epoch": 0.052698215945814336, "grad_norm": 2.9627364948529102, "learning_rate": 5.269821594581433e-06, "loss": 0.5852, "step": 11904 }, { "epoch": 0.05270264287927753, "grad_norm": 2.7250443311223376, "learning_rate": 5.270264287927753e-06, "loss": 0.8665, "step": 11905 }, { "epoch": 0.05270706981274072, "grad_norm": 2.915722123192865, "learning_rate": 5.270706981274072e-06, "loss": 0.554, "step": 11906 }, { "epoch": 0.05271149674620391, "grad_norm": 3.1569465890096873, "learning_rate": 5.2711496746203904e-06, "loss": 0.8446, "step": 11907 }, { "epoch": 0.05271592367966709, "grad_norm": 5.165453580645674, "learning_rate": 5.271592367966711e-06, "loss": 1.1808, "step": 11908 }, { "epoch": 0.05272035061313028, "grad_norm": 3.3713445105149082, "learning_rate": 5.272035061313029e-06, "loss": 1.1178, "step": 11909 }, { "epoch": 0.05272477754659347, "grad_norm": 2.601945506109281, "learning_rate": 5.272477754659348e-06, "loss": 0.6189, "step": 11910 }, { "epoch": 0.05272920448005666, "grad_norm": 2.200683546576962, "learning_rate": 5.272920448005667e-06, "loss": 0.5677, "step": 11911 }, { "epoch": 0.05273363141351985, "grad_norm": 2.3855261626004034, "learning_rate": 5.273363141351986e-06, "loss": 0.759, "step": 11912 }, { "epoch": 0.052738058346983044, "grad_norm": 2.519055693333908, "learning_rate": 5.273805834698304e-06, "loss": 0.6735, "step": 11913 }, { "epoch": 0.052742485280446234, "grad_norm": 3.1727533734802194, "learning_rate": 5.274248528044624e-06, "loss": 0.9556, "step": 11914 }, { "epoch": 0.052746912213909425, "grad_norm": 2.885988987831934, "learning_rate": 5.274691221390943e-06, "loss": 0.6706, "step": 11915 }, { "epoch": 0.052751339147372615, "grad_norm": 2.9626844802940164, "learning_rate": 5.275133914737261e-06, "loss": 0.989, "step": 11916 }, { "epoch": 0.052755766080835806, "grad_norm": 2.3967028459660753, "learning_rate": 5.275576608083582e-06, "loss": 0.7084, "step": 11917 }, { "epoch": 0.052760193014298996, "grad_norm": 2.3178923587920326, "learning_rate": 5.2760193014299e-06, "loss": 0.5775, "step": 11918 }, { "epoch": 0.05276461994776219, "grad_norm": 2.555741956387478, "learning_rate": 5.276461994776219e-06, "loss": 0.9657, "step": 11919 }, { "epoch": 0.05276904688122538, "grad_norm": 2.2068292691511684, "learning_rate": 5.276904688122538e-06, "loss": 0.5076, "step": 11920 }, { "epoch": 0.05277347381468857, "grad_norm": 2.9213069734325168, "learning_rate": 5.2773473814688566e-06, "loss": 0.8385, "step": 11921 }, { "epoch": 0.05277790074815176, "grad_norm": 3.207155241769396, "learning_rate": 5.277790074815176e-06, "loss": 0.8596, "step": 11922 }, { "epoch": 0.05278232768161494, "grad_norm": 2.532503095778962, "learning_rate": 5.278232768161495e-06, "loss": 0.5558, "step": 11923 }, { "epoch": 0.05278675461507813, "grad_norm": 2.9961061123877784, "learning_rate": 5.278675461507814e-06, "loss": 0.6697, "step": 11924 }, { "epoch": 0.05279118154854132, "grad_norm": 2.757186644536699, "learning_rate": 5.279118154854132e-06, "loss": 0.7787, "step": 11925 }, { "epoch": 0.052795608482004514, "grad_norm": 3.1764012774690915, "learning_rate": 5.2795608482004526e-06, "loss": 0.7566, "step": 11926 }, { "epoch": 0.052800035415467704, "grad_norm": 3.173137299934183, "learning_rate": 5.280003541546771e-06, "loss": 1.027, "step": 11927 }, { "epoch": 0.052804462348930895, "grad_norm": 2.4407846501643102, "learning_rate": 5.28044623489309e-06, "loss": 0.729, "step": 11928 }, { "epoch": 0.052808889282394085, "grad_norm": 2.303912465228866, "learning_rate": 5.280888928239409e-06, "loss": 0.6573, "step": 11929 }, { "epoch": 0.052813316215857276, "grad_norm": 2.9605647169364406, "learning_rate": 5.281331621585728e-06, "loss": 0.7036, "step": 11930 }, { "epoch": 0.052817743149320466, "grad_norm": 3.1054848334754874, "learning_rate": 5.281774314932047e-06, "loss": 0.7594, "step": 11931 }, { "epoch": 0.05282217008278366, "grad_norm": 3.1120860563721737, "learning_rate": 5.282217008278366e-06, "loss": 0.7513, "step": 11932 }, { "epoch": 0.05282659701624685, "grad_norm": 2.621092477082336, "learning_rate": 5.282659701624685e-06, "loss": 0.7129, "step": 11933 }, { "epoch": 0.05283102394971004, "grad_norm": 3.4070684986821957, "learning_rate": 5.283102394971003e-06, "loss": 0.9155, "step": 11934 }, { "epoch": 0.05283545088317323, "grad_norm": 2.584687398232177, "learning_rate": 5.2835450883173235e-06, "loss": 0.6501, "step": 11935 }, { "epoch": 0.05283987781663642, "grad_norm": 2.688375644291221, "learning_rate": 5.283987781663642e-06, "loss": 0.5368, "step": 11936 }, { "epoch": 0.05284430475009961, "grad_norm": 3.2037869720100214, "learning_rate": 5.2844304750099606e-06, "loss": 0.7295, "step": 11937 }, { "epoch": 0.05284873168356279, "grad_norm": 3.749338393308329, "learning_rate": 5.284873168356281e-06, "loss": 1.2844, "step": 11938 }, { "epoch": 0.05285315861702598, "grad_norm": 3.6664221793327854, "learning_rate": 5.285315861702599e-06, "loss": 0.5989, "step": 11939 }, { "epoch": 0.052857585550489174, "grad_norm": 2.745495928048196, "learning_rate": 5.285758555048918e-06, "loss": 0.8126, "step": 11940 }, { "epoch": 0.052862012483952364, "grad_norm": 2.8808302411325415, "learning_rate": 5.286201248395237e-06, "loss": 0.9383, "step": 11941 }, { "epoch": 0.052866439417415555, "grad_norm": 2.615291520488549, "learning_rate": 5.286643941741556e-06, "loss": 0.8609, "step": 11942 }, { "epoch": 0.052870866350878745, "grad_norm": 2.1967977508127716, "learning_rate": 5.287086635087875e-06, "loss": 0.6739, "step": 11943 }, { "epoch": 0.052875293284341936, "grad_norm": 2.596623399254749, "learning_rate": 5.2875293284341945e-06, "loss": 0.8654, "step": 11944 }, { "epoch": 0.052879720217805126, "grad_norm": 2.752534303959437, "learning_rate": 5.287972021780513e-06, "loss": 0.8571, "step": 11945 }, { "epoch": 0.05288414715126832, "grad_norm": 2.811259245457843, "learning_rate": 5.2884147151268315e-06, "loss": 0.7984, "step": 11946 }, { "epoch": 0.05288857408473151, "grad_norm": 2.4782569218318686, "learning_rate": 5.288857408473152e-06, "loss": 0.7891, "step": 11947 }, { "epoch": 0.0528930010181947, "grad_norm": 2.401843856198412, "learning_rate": 5.28930010181947e-06, "loss": 0.8224, "step": 11948 }, { "epoch": 0.05289742795165789, "grad_norm": 2.62880171416178, "learning_rate": 5.289742795165789e-06, "loss": 0.5402, "step": 11949 }, { "epoch": 0.05290185488512108, "grad_norm": 4.640062894532445, "learning_rate": 5.290185488512108e-06, "loss": 1.2302, "step": 11950 }, { "epoch": 0.05290628181858427, "grad_norm": 3.1346953376178264, "learning_rate": 5.290628181858427e-06, "loss": 1.0316, "step": 11951 }, { "epoch": 0.05291070875204746, "grad_norm": 2.376790850237769, "learning_rate": 5.291070875204746e-06, "loss": 0.2925, "step": 11952 }, { "epoch": 0.05291513568551064, "grad_norm": 3.2852051528142403, "learning_rate": 5.291513568551065e-06, "loss": 0.6823, "step": 11953 }, { "epoch": 0.052919562618973834, "grad_norm": 3.14640961874955, "learning_rate": 5.291956261897384e-06, "loss": 1.0058, "step": 11954 }, { "epoch": 0.052923989552437024, "grad_norm": 3.042306054796361, "learning_rate": 5.2923989552437024e-06, "loss": 0.7737, "step": 11955 }, { "epoch": 0.052928416485900215, "grad_norm": 2.749083097793256, "learning_rate": 5.292841648590023e-06, "loss": 0.6238, "step": 11956 }, { "epoch": 0.052932843419363405, "grad_norm": 2.8334148313954874, "learning_rate": 5.293284341936341e-06, "loss": 0.5173, "step": 11957 }, { "epoch": 0.052937270352826596, "grad_norm": 3.0275155506079594, "learning_rate": 5.29372703528266e-06, "loss": 1.235, "step": 11958 }, { "epoch": 0.052941697286289786, "grad_norm": 3.1285323885853815, "learning_rate": 5.294169728628979e-06, "loss": 0.4187, "step": 11959 }, { "epoch": 0.05294612421975298, "grad_norm": 2.7445922149018265, "learning_rate": 5.2946124219752985e-06, "loss": 0.8272, "step": 11960 }, { "epoch": 0.05295055115321617, "grad_norm": 2.924190170487583, "learning_rate": 5.295055115321617e-06, "loss": 0.822, "step": 11961 }, { "epoch": 0.05295497808667936, "grad_norm": 3.0510563429507176, "learning_rate": 5.295497808667936e-06, "loss": 0.9369, "step": 11962 }, { "epoch": 0.05295940502014255, "grad_norm": 3.337415676206325, "learning_rate": 5.295940502014255e-06, "loss": 1.1107, "step": 11963 }, { "epoch": 0.05296383195360574, "grad_norm": 2.7884782659668788, "learning_rate": 5.296383195360573e-06, "loss": 0.7409, "step": 11964 }, { "epoch": 0.05296825888706893, "grad_norm": 3.2181546547265527, "learning_rate": 5.296825888706894e-06, "loss": 0.7959, "step": 11965 }, { "epoch": 0.05297268582053212, "grad_norm": 2.256938897191384, "learning_rate": 5.297268582053212e-06, "loss": 0.6976, "step": 11966 }, { "epoch": 0.05297711275399531, "grad_norm": 2.544594546217666, "learning_rate": 5.297711275399531e-06, "loss": 0.6473, "step": 11967 }, { "epoch": 0.0529815396874585, "grad_norm": 2.4557090539891258, "learning_rate": 5.298153968745851e-06, "loss": 0.6225, "step": 11968 }, { "epoch": 0.052985966620921685, "grad_norm": 3.5308703102260743, "learning_rate": 5.298596662092169e-06, "loss": 0.8545, "step": 11969 }, { "epoch": 0.052990393554384875, "grad_norm": 3.3045745778674434, "learning_rate": 5.299039355438488e-06, "loss": 1.159, "step": 11970 }, { "epoch": 0.052994820487848066, "grad_norm": 2.8216004231193623, "learning_rate": 5.299482048784807e-06, "loss": 0.6455, "step": 11971 }, { "epoch": 0.052999247421311256, "grad_norm": 2.321460426975574, "learning_rate": 5.299924742131126e-06, "loss": 0.5829, "step": 11972 }, { "epoch": 0.05300367435477445, "grad_norm": 3.6729097656093357, "learning_rate": 5.300367435477445e-06, "loss": 1.0472, "step": 11973 }, { "epoch": 0.05300810128823764, "grad_norm": 2.4453041620941494, "learning_rate": 5.3008101288237646e-06, "loss": 0.8583, "step": 11974 }, { "epoch": 0.05301252822170083, "grad_norm": 2.8510945634450584, "learning_rate": 5.301252822170083e-06, "loss": 0.8118, "step": 11975 }, { "epoch": 0.05301695515516402, "grad_norm": 3.3716026599659368, "learning_rate": 5.301695515516402e-06, "loss": 0.9068, "step": 11976 }, { "epoch": 0.05302138208862721, "grad_norm": 2.4326021609655832, "learning_rate": 5.302138208862722e-06, "loss": 0.6333, "step": 11977 }, { "epoch": 0.0530258090220904, "grad_norm": 2.613216769680889, "learning_rate": 5.30258090220904e-06, "loss": 0.7788, "step": 11978 }, { "epoch": 0.05303023595555359, "grad_norm": 3.018378042619975, "learning_rate": 5.303023595555359e-06, "loss": 0.8893, "step": 11979 }, { "epoch": 0.05303466288901678, "grad_norm": 3.3972325780529316, "learning_rate": 5.303466288901678e-06, "loss": 0.9297, "step": 11980 }, { "epoch": 0.05303908982247997, "grad_norm": 2.500097387487858, "learning_rate": 5.303908982247998e-06, "loss": 0.767, "step": 11981 }, { "epoch": 0.05304351675594316, "grad_norm": 2.6177511269053912, "learning_rate": 5.304351675594316e-06, "loss": 0.6587, "step": 11982 }, { "epoch": 0.05304794368940635, "grad_norm": 2.559329373604137, "learning_rate": 5.3047943689406355e-06, "loss": 0.9059, "step": 11983 }, { "epoch": 0.053052370622869535, "grad_norm": 3.0677741488166994, "learning_rate": 5.305237062286954e-06, "loss": 1.0159, "step": 11984 }, { "epoch": 0.053056797556332726, "grad_norm": 2.873857284032191, "learning_rate": 5.3056797556332726e-06, "loss": 0.6268, "step": 11985 }, { "epoch": 0.053061224489795916, "grad_norm": 2.518916089363091, "learning_rate": 5.306122448979593e-06, "loss": 0.7499, "step": 11986 }, { "epoch": 0.05306565142325911, "grad_norm": 2.8524301856785175, "learning_rate": 5.306565142325911e-06, "loss": 0.8026, "step": 11987 }, { "epoch": 0.0530700783567223, "grad_norm": 2.8113803359129137, "learning_rate": 5.30700783567223e-06, "loss": 0.7865, "step": 11988 }, { "epoch": 0.05307450529018549, "grad_norm": 3.7743943791240215, "learning_rate": 5.30745052901855e-06, "loss": 1.3061, "step": 11989 }, { "epoch": 0.05307893222364868, "grad_norm": 2.471728476158732, "learning_rate": 5.3078932223648686e-06, "loss": 0.5774, "step": 11990 }, { "epoch": 0.05308335915711187, "grad_norm": 2.493232894757681, "learning_rate": 5.308335915711187e-06, "loss": 0.6695, "step": 11991 }, { "epoch": 0.05308778609057506, "grad_norm": 2.8597197760515827, "learning_rate": 5.3087786090575065e-06, "loss": 0.5823, "step": 11992 }, { "epoch": 0.05309221302403825, "grad_norm": 2.7983117545216296, "learning_rate": 5.309221302403825e-06, "loss": 0.5784, "step": 11993 }, { "epoch": 0.05309663995750144, "grad_norm": 2.571494657055821, "learning_rate": 5.3096639957501435e-06, "loss": 0.6088, "step": 11994 }, { "epoch": 0.05310106689096463, "grad_norm": 2.631033096636965, "learning_rate": 5.310106689096464e-06, "loss": 0.7827, "step": 11995 }, { "epoch": 0.05310549382442782, "grad_norm": 2.9968705829827393, "learning_rate": 5.310549382442782e-06, "loss": 0.8656, "step": 11996 }, { "epoch": 0.05310992075789101, "grad_norm": 2.5476639449129355, "learning_rate": 5.310992075789101e-06, "loss": 0.6939, "step": 11997 }, { "epoch": 0.0531143476913542, "grad_norm": 2.4594242163435753, "learning_rate": 5.311434769135421e-06, "loss": 0.6113, "step": 11998 }, { "epoch": 0.053118774624817386, "grad_norm": 3.537658140526639, "learning_rate": 5.3118774624817395e-06, "loss": 1.0112, "step": 11999 }, { "epoch": 0.053123201558280576, "grad_norm": 3.1226958675102505, "learning_rate": 5.312320155828058e-06, "loss": 0.7276, "step": 12000 }, { "epoch": 0.05312762849174377, "grad_norm": 2.531269771540635, "learning_rate": 5.312762849174377e-06, "loss": 0.6362, "step": 12001 }, { "epoch": 0.05313205542520696, "grad_norm": 2.6107088445535385, "learning_rate": 5.313205542520696e-06, "loss": 0.5199, "step": 12002 }, { "epoch": 0.05313648235867015, "grad_norm": 3.089603301708643, "learning_rate": 5.313648235867015e-06, "loss": 0.8655, "step": 12003 }, { "epoch": 0.05314090929213334, "grad_norm": 2.8769270658051584, "learning_rate": 5.314090929213335e-06, "loss": 0.6822, "step": 12004 }, { "epoch": 0.05314533622559653, "grad_norm": 3.590058716564174, "learning_rate": 5.314533622559653e-06, "loss": 0.7831, "step": 12005 }, { "epoch": 0.05314976315905972, "grad_norm": 2.5694646511932326, "learning_rate": 5.314976315905972e-06, "loss": 0.5575, "step": 12006 }, { "epoch": 0.05315419009252291, "grad_norm": 2.6649522363014007, "learning_rate": 5.315419009252292e-06, "loss": 0.8537, "step": 12007 }, { "epoch": 0.0531586170259861, "grad_norm": 2.4080734205934213, "learning_rate": 5.3158617025986105e-06, "loss": 0.7116, "step": 12008 }, { "epoch": 0.05316304395944929, "grad_norm": 2.350791670230334, "learning_rate": 5.316304395944929e-06, "loss": 0.5631, "step": 12009 }, { "epoch": 0.05316747089291248, "grad_norm": 3.3114581098485663, "learning_rate": 5.316747089291248e-06, "loss": 0.7028, "step": 12010 }, { "epoch": 0.05317189782637567, "grad_norm": 2.8668264246868262, "learning_rate": 5.317189782637568e-06, "loss": 0.712, "step": 12011 }, { "epoch": 0.05317632475983886, "grad_norm": 3.753456770003201, "learning_rate": 5.317632475983886e-06, "loss": 0.9874, "step": 12012 }, { "epoch": 0.05318075169330205, "grad_norm": 2.6341455691001254, "learning_rate": 5.318075169330206e-06, "loss": 0.8251, "step": 12013 }, { "epoch": 0.05318517862676524, "grad_norm": 3.7971307485763313, "learning_rate": 5.318517862676524e-06, "loss": 1.357, "step": 12014 }, { "epoch": 0.05318960556022843, "grad_norm": 2.9470314910681408, "learning_rate": 5.318960556022843e-06, "loss": 0.75, "step": 12015 }, { "epoch": 0.05319403249369162, "grad_norm": 2.3680138497442234, "learning_rate": 5.319403249369163e-06, "loss": 0.8605, "step": 12016 }, { "epoch": 0.05319845942715481, "grad_norm": 2.7483863626926914, "learning_rate": 5.319845942715481e-06, "loss": 0.4762, "step": 12017 }, { "epoch": 0.053202886360618, "grad_norm": 2.3570446469202633, "learning_rate": 5.3202886360618e-06, "loss": 0.7893, "step": 12018 }, { "epoch": 0.05320731329408119, "grad_norm": 5.173720458129722, "learning_rate": 5.32073132940812e-06, "loss": 1.5928, "step": 12019 }, { "epoch": 0.05321174022754438, "grad_norm": 3.036661339456625, "learning_rate": 5.321174022754439e-06, "loss": 0.9382, "step": 12020 }, { "epoch": 0.05321616716100757, "grad_norm": 2.5928221048350792, "learning_rate": 5.321616716100757e-06, "loss": 0.5832, "step": 12021 }, { "epoch": 0.05322059409447076, "grad_norm": 3.5976732436505, "learning_rate": 5.3220594094470766e-06, "loss": 1.161, "step": 12022 }, { "epoch": 0.05322502102793395, "grad_norm": 3.9485820961669997, "learning_rate": 5.322502102793395e-06, "loss": 0.8506, "step": 12023 }, { "epoch": 0.05322944796139714, "grad_norm": 2.667175749998842, "learning_rate": 5.3229447961397145e-06, "loss": 0.5578, "step": 12024 }, { "epoch": 0.05323387489486033, "grad_norm": 3.1126407830743577, "learning_rate": 5.323387489486034e-06, "loss": 0.8826, "step": 12025 }, { "epoch": 0.05323830182832352, "grad_norm": 2.752512683616193, "learning_rate": 5.323830182832352e-06, "loss": 0.713, "step": 12026 }, { "epoch": 0.05324272876178671, "grad_norm": 4.023764078300075, "learning_rate": 5.324272876178671e-06, "loss": 0.9793, "step": 12027 }, { "epoch": 0.053247155695249904, "grad_norm": 2.680191213198474, "learning_rate": 5.324715569524991e-06, "loss": 1.043, "step": 12028 }, { "epoch": 0.05325158262871309, "grad_norm": 2.344048357572443, "learning_rate": 5.32515826287131e-06, "loss": 0.6897, "step": 12029 }, { "epoch": 0.05325600956217628, "grad_norm": 3.22228798069043, "learning_rate": 5.325600956217628e-06, "loss": 1.2353, "step": 12030 }, { "epoch": 0.05326043649563947, "grad_norm": 2.9891882042520628, "learning_rate": 5.3260436495639475e-06, "loss": 0.7278, "step": 12031 }, { "epoch": 0.05326486342910266, "grad_norm": 3.6783472421554015, "learning_rate": 5.326486342910266e-06, "loss": 1.1055, "step": 12032 }, { "epoch": 0.05326929036256585, "grad_norm": 2.491990593346075, "learning_rate": 5.326929036256585e-06, "loss": 0.5247, "step": 12033 }, { "epoch": 0.05327371729602904, "grad_norm": 3.52616616901158, "learning_rate": 5.327371729602905e-06, "loss": 1.0413, "step": 12034 }, { "epoch": 0.05327814422949223, "grad_norm": 3.041448833081498, "learning_rate": 5.327814422949223e-06, "loss": 0.9302, "step": 12035 }, { "epoch": 0.05328257116295542, "grad_norm": 2.909340311960178, "learning_rate": 5.328257116295542e-06, "loss": 1.0141, "step": 12036 }, { "epoch": 0.05328699809641861, "grad_norm": 3.360618187310934, "learning_rate": 5.328699809641862e-06, "loss": 0.8084, "step": 12037 }, { "epoch": 0.0532914250298818, "grad_norm": 2.252887148665321, "learning_rate": 5.3291425029881806e-06, "loss": 0.7115, "step": 12038 }, { "epoch": 0.05329585196334499, "grad_norm": 2.416738604622843, "learning_rate": 5.329585196334499e-06, "loss": 0.6007, "step": 12039 }, { "epoch": 0.05330027889680818, "grad_norm": 2.896465332201529, "learning_rate": 5.3300278896808185e-06, "loss": 0.7511, "step": 12040 }, { "epoch": 0.05330470583027137, "grad_norm": 3.6858571460744827, "learning_rate": 5.330470583027138e-06, "loss": 1.3619, "step": 12041 }, { "epoch": 0.053309132763734564, "grad_norm": 3.2304280587638288, "learning_rate": 5.330913276373456e-06, "loss": 0.8299, "step": 12042 }, { "epoch": 0.053313559697197754, "grad_norm": 2.527442937656409, "learning_rate": 5.331355969719776e-06, "loss": 0.794, "step": 12043 }, { "epoch": 0.05331798663066094, "grad_norm": 2.6070927175682486, "learning_rate": 5.331798663066094e-06, "loss": 0.6886, "step": 12044 }, { "epoch": 0.05332241356412413, "grad_norm": 2.7344379774807686, "learning_rate": 5.332241356412413e-06, "loss": 0.8024, "step": 12045 }, { "epoch": 0.05332684049758732, "grad_norm": 2.768420710796614, "learning_rate": 5.332684049758733e-06, "loss": 0.6491, "step": 12046 }, { "epoch": 0.05333126743105051, "grad_norm": 2.6179014121364843, "learning_rate": 5.3331267431050515e-06, "loss": 0.6332, "step": 12047 }, { "epoch": 0.0533356943645137, "grad_norm": 2.2880509008464, "learning_rate": 5.33356943645137e-06, "loss": 0.5949, "step": 12048 }, { "epoch": 0.05334012129797689, "grad_norm": 2.339517985937468, "learning_rate": 5.33401212979769e-06, "loss": 0.8564, "step": 12049 }, { "epoch": 0.05334454823144008, "grad_norm": 2.12769396727546, "learning_rate": 5.334454823144009e-06, "loss": 0.6078, "step": 12050 }, { "epoch": 0.05334897516490327, "grad_norm": 2.267484297895081, "learning_rate": 5.334897516490327e-06, "loss": 0.677, "step": 12051 }, { "epoch": 0.05335340209836646, "grad_norm": 3.083350735035973, "learning_rate": 5.335340209836647e-06, "loss": 0.8435, "step": 12052 }, { "epoch": 0.05335782903182965, "grad_norm": 2.7485731485200606, "learning_rate": 5.335782903182965e-06, "loss": 0.4048, "step": 12053 }, { "epoch": 0.05336225596529284, "grad_norm": 2.711294497042461, "learning_rate": 5.3362255965292846e-06, "loss": 0.6654, "step": 12054 }, { "epoch": 0.053366682898756033, "grad_norm": 2.455505631631277, "learning_rate": 5.336668289875604e-06, "loss": 0.7502, "step": 12055 }, { "epoch": 0.053371109832219224, "grad_norm": 2.7257697632869573, "learning_rate": 5.3371109832219225e-06, "loss": 0.7302, "step": 12056 }, { "epoch": 0.053375536765682415, "grad_norm": 2.693746649353641, "learning_rate": 5.337553676568241e-06, "loss": 1.004, "step": 12057 }, { "epoch": 0.053379963699145605, "grad_norm": 2.9586041853679723, "learning_rate": 5.337996369914561e-06, "loss": 0.9111, "step": 12058 }, { "epoch": 0.05338439063260879, "grad_norm": 2.9386256971186646, "learning_rate": 5.33843906326088e-06, "loss": 0.6488, "step": 12059 }, { "epoch": 0.05338881756607198, "grad_norm": 3.4815589690125117, "learning_rate": 5.338881756607198e-06, "loss": 1.1508, "step": 12060 }, { "epoch": 0.05339324449953517, "grad_norm": 3.1942462903419715, "learning_rate": 5.339324449953518e-06, "loss": 0.5668, "step": 12061 }, { "epoch": 0.05339767143299836, "grad_norm": 2.4147342082166543, "learning_rate": 5.339767143299837e-06, "loss": 0.7726, "step": 12062 }, { "epoch": 0.05340209836646155, "grad_norm": 2.938374671124519, "learning_rate": 5.3402098366461555e-06, "loss": 0.7238, "step": 12063 }, { "epoch": 0.05340652529992474, "grad_norm": 2.5290280142704478, "learning_rate": 5.340652529992475e-06, "loss": 0.4651, "step": 12064 }, { "epoch": 0.05341095223338793, "grad_norm": 3.3222683360568066, "learning_rate": 5.341095223338793e-06, "loss": 0.9886, "step": 12065 }, { "epoch": 0.05341537916685112, "grad_norm": 2.4530331305163107, "learning_rate": 5.341537916685112e-06, "loss": 0.6652, "step": 12066 }, { "epoch": 0.05341980610031431, "grad_norm": 2.423502500441624, "learning_rate": 5.341980610031432e-06, "loss": 0.7323, "step": 12067 }, { "epoch": 0.0534242330337775, "grad_norm": 3.4255765154326667, "learning_rate": 5.342423303377751e-06, "loss": 0.9115, "step": 12068 }, { "epoch": 0.053428659967240694, "grad_norm": 3.3175254854910774, "learning_rate": 5.342865996724069e-06, "loss": 1.1407, "step": 12069 }, { "epoch": 0.053433086900703884, "grad_norm": 2.890877093284282, "learning_rate": 5.343308690070389e-06, "loss": 0.8262, "step": 12070 }, { "epoch": 0.053437513834167075, "grad_norm": 2.6652264649146473, "learning_rate": 5.343751383416708e-06, "loss": 0.9228, "step": 12071 }, { "epoch": 0.053441940767630265, "grad_norm": 3.1618764972535853, "learning_rate": 5.3441940767630265e-06, "loss": 0.8108, "step": 12072 }, { "epoch": 0.053446367701093456, "grad_norm": 2.9205445348173034, "learning_rate": 5.344636770109346e-06, "loss": 0.8419, "step": 12073 }, { "epoch": 0.05345079463455664, "grad_norm": 2.6894767350649578, "learning_rate": 5.345079463455664e-06, "loss": 0.8965, "step": 12074 }, { "epoch": 0.05345522156801983, "grad_norm": 2.6597557704209605, "learning_rate": 5.345522156801983e-06, "loss": 0.5902, "step": 12075 }, { "epoch": 0.05345964850148302, "grad_norm": 2.5504603834527604, "learning_rate": 5.345964850148303e-06, "loss": 0.8001, "step": 12076 }, { "epoch": 0.05346407543494621, "grad_norm": 2.513085825932345, "learning_rate": 5.346407543494622e-06, "loss": 0.7845, "step": 12077 }, { "epoch": 0.0534685023684094, "grad_norm": 2.730774695927702, "learning_rate": 5.34685023684094e-06, "loss": 0.5348, "step": 12078 }, { "epoch": 0.05347292930187259, "grad_norm": 2.100797908307141, "learning_rate": 5.34729293018726e-06, "loss": 0.5631, "step": 12079 }, { "epoch": 0.05347735623533578, "grad_norm": 3.2398776977624006, "learning_rate": 5.347735623533579e-06, "loss": 0.5261, "step": 12080 }, { "epoch": 0.05348178316879897, "grad_norm": 2.858230702085895, "learning_rate": 5.348178316879897e-06, "loss": 0.9004, "step": 12081 }, { "epoch": 0.05348621010226216, "grad_norm": 3.1756989524118273, "learning_rate": 5.348621010226217e-06, "loss": 1.1636, "step": 12082 }, { "epoch": 0.053490637035725354, "grad_norm": 3.1461730150304397, "learning_rate": 5.349063703572535e-06, "loss": 1.0709, "step": 12083 }, { "epoch": 0.053495063969188544, "grad_norm": 2.787887231066551, "learning_rate": 5.349506396918855e-06, "loss": 0.733, "step": 12084 }, { "epoch": 0.053499490902651735, "grad_norm": 3.5037743480344186, "learning_rate": 5.349949090265174e-06, "loss": 1.0021, "step": 12085 }, { "epoch": 0.053503917836114925, "grad_norm": 3.0008320227596017, "learning_rate": 5.3503917836114926e-06, "loss": 0.9024, "step": 12086 }, { "epoch": 0.053508344769578116, "grad_norm": 2.892565979347209, "learning_rate": 5.350834476957811e-06, "loss": 0.8067, "step": 12087 }, { "epoch": 0.053512771703041306, "grad_norm": 3.3856534746106743, "learning_rate": 5.351277170304131e-06, "loss": 0.9294, "step": 12088 }, { "epoch": 0.05351719863650449, "grad_norm": 2.8668068227245187, "learning_rate": 5.35171986365045e-06, "loss": 0.5358, "step": 12089 }, { "epoch": 0.05352162556996768, "grad_norm": 2.327738774193696, "learning_rate": 5.352162556996768e-06, "loss": 0.5354, "step": 12090 }, { "epoch": 0.05352605250343087, "grad_norm": 3.3740258272435915, "learning_rate": 5.352605250343088e-06, "loss": 0.7824, "step": 12091 }, { "epoch": 0.05353047943689406, "grad_norm": 2.8665551239674767, "learning_rate": 5.353047943689407e-06, "loss": 0.9759, "step": 12092 }, { "epoch": 0.05353490637035725, "grad_norm": 2.654667058483447, "learning_rate": 5.353490637035726e-06, "loss": 0.7083, "step": 12093 }, { "epoch": 0.05353933330382044, "grad_norm": 2.7327910477603323, "learning_rate": 5.353933330382045e-06, "loss": 0.7454, "step": 12094 }, { "epoch": 0.05354376023728363, "grad_norm": 2.749544607120653, "learning_rate": 5.3543760237283635e-06, "loss": 0.6031, "step": 12095 }, { "epoch": 0.053548187170746823, "grad_norm": 2.3340782079128157, "learning_rate": 5.354818717074682e-06, "loss": 0.7183, "step": 12096 }, { "epoch": 0.053552614104210014, "grad_norm": 2.960475555352543, "learning_rate": 5.355261410421002e-06, "loss": 1.0028, "step": 12097 }, { "epoch": 0.053557041037673205, "grad_norm": 2.4421872178955892, "learning_rate": 5.355704103767321e-06, "loss": 0.6793, "step": 12098 }, { "epoch": 0.053561467971136395, "grad_norm": 2.565240116121603, "learning_rate": 5.356146797113639e-06, "loss": 0.7775, "step": 12099 }, { "epoch": 0.053565894904599586, "grad_norm": 3.0607123721947893, "learning_rate": 5.3565894904599595e-06, "loss": 0.9048, "step": 12100 }, { "epoch": 0.053570321838062776, "grad_norm": 3.2034931937485367, "learning_rate": 5.357032183806278e-06, "loss": 1.094, "step": 12101 }, { "epoch": 0.05357474877152597, "grad_norm": 3.771896909732856, "learning_rate": 5.3574748771525966e-06, "loss": 1.1877, "step": 12102 }, { "epoch": 0.05357917570498916, "grad_norm": 2.7672583133959123, "learning_rate": 5.357917570498916e-06, "loss": 0.7066, "step": 12103 }, { "epoch": 0.05358360263845234, "grad_norm": 2.666216392308866, "learning_rate": 5.3583602638452345e-06, "loss": 0.6286, "step": 12104 }, { "epoch": 0.05358802957191553, "grad_norm": 2.3569046644445373, "learning_rate": 5.358802957191554e-06, "loss": 0.5044, "step": 12105 }, { "epoch": 0.05359245650537872, "grad_norm": 2.6152699852956665, "learning_rate": 5.359245650537873e-06, "loss": 0.8863, "step": 12106 }, { "epoch": 0.05359688343884191, "grad_norm": 2.5032669171378927, "learning_rate": 5.359688343884192e-06, "loss": 0.7207, "step": 12107 }, { "epoch": 0.0536013103723051, "grad_norm": 2.964158582654239, "learning_rate": 5.36013103723051e-06, "loss": 0.7211, "step": 12108 }, { "epoch": 0.05360573730576829, "grad_norm": 3.263658954954849, "learning_rate": 5.3605737305768305e-06, "loss": 1.1986, "step": 12109 }, { "epoch": 0.053610164239231484, "grad_norm": 2.8630625302728494, "learning_rate": 5.361016423923149e-06, "loss": 0.8664, "step": 12110 }, { "epoch": 0.053614591172694674, "grad_norm": 2.3478603122000776, "learning_rate": 5.3614591172694675e-06, "loss": 0.7157, "step": 12111 }, { "epoch": 0.053619018106157865, "grad_norm": 2.9407140240332574, "learning_rate": 5.361901810615787e-06, "loss": 0.9057, "step": 12112 }, { "epoch": 0.053623445039621055, "grad_norm": 2.2635617349751422, "learning_rate": 5.362344503962105e-06, "loss": 0.5973, "step": 12113 }, { "epoch": 0.053627871973084246, "grad_norm": 2.778212305254437, "learning_rate": 5.362787197308425e-06, "loss": 0.9046, "step": 12114 }, { "epoch": 0.053632298906547436, "grad_norm": 2.4492662873658277, "learning_rate": 5.363229890654744e-06, "loss": 0.7219, "step": 12115 }, { "epoch": 0.05363672584001063, "grad_norm": 2.820333195619603, "learning_rate": 5.363672584001063e-06, "loss": 0.8372, "step": 12116 }, { "epoch": 0.05364115277347382, "grad_norm": 3.04119510574818, "learning_rate": 5.364115277347381e-06, "loss": 0.5743, "step": 12117 }, { "epoch": 0.05364557970693701, "grad_norm": 2.8384215806406883, "learning_rate": 5.364557970693701e-06, "loss": 0.719, "step": 12118 }, { "epoch": 0.0536500066404002, "grad_norm": 2.615263307933786, "learning_rate": 5.36500066404002e-06, "loss": 0.6207, "step": 12119 }, { "epoch": 0.05365443357386338, "grad_norm": 3.0627162979093994, "learning_rate": 5.3654433573863385e-06, "loss": 0.7147, "step": 12120 }, { "epoch": 0.05365886050732657, "grad_norm": 2.7045062171957275, "learning_rate": 5.365886050732658e-06, "loss": 0.86, "step": 12121 }, { "epoch": 0.05366328744078976, "grad_norm": 4.010849464787424, "learning_rate": 5.366328744078977e-06, "loss": 1.1382, "step": 12122 }, { "epoch": 0.05366771437425295, "grad_norm": 3.2148895322064575, "learning_rate": 5.366771437425296e-06, "loss": 0.8846, "step": 12123 }, { "epoch": 0.053672141307716144, "grad_norm": 3.3722770526634744, "learning_rate": 5.367214130771615e-06, "loss": 0.964, "step": 12124 }, { "epoch": 0.053676568241179334, "grad_norm": 2.7979892297051943, "learning_rate": 5.367656824117934e-06, "loss": 0.8093, "step": 12125 }, { "epoch": 0.053680995174642525, "grad_norm": 3.2579526870442956, "learning_rate": 5.368099517464252e-06, "loss": 0.767, "step": 12126 }, { "epoch": 0.053685422108105715, "grad_norm": 2.6961788334114942, "learning_rate": 5.368542210810572e-06, "loss": 0.5023, "step": 12127 }, { "epoch": 0.053689849041568906, "grad_norm": 3.001747421855141, "learning_rate": 5.368984904156891e-06, "loss": 0.8906, "step": 12128 }, { "epoch": 0.053694275975032096, "grad_norm": 2.323496006766384, "learning_rate": 5.369427597503209e-06, "loss": 0.5404, "step": 12129 }, { "epoch": 0.05369870290849529, "grad_norm": 2.7136954258042234, "learning_rate": 5.36987029084953e-06, "loss": 0.666, "step": 12130 }, { "epoch": 0.05370312984195848, "grad_norm": 3.1076166820058977, "learning_rate": 5.370312984195848e-06, "loss": 0.9057, "step": 12131 }, { "epoch": 0.05370755677542167, "grad_norm": 2.904762275652607, "learning_rate": 5.370755677542167e-06, "loss": 0.9067, "step": 12132 }, { "epoch": 0.05371198370888486, "grad_norm": 2.7081867017543613, "learning_rate": 5.371198370888486e-06, "loss": 0.8018, "step": 12133 }, { "epoch": 0.05371641064234805, "grad_norm": 2.3578893205001425, "learning_rate": 5.3716410642348046e-06, "loss": 0.6064, "step": 12134 }, { "epoch": 0.05372083757581123, "grad_norm": 2.728614850904072, "learning_rate": 5.372083757581124e-06, "loss": 0.9321, "step": 12135 }, { "epoch": 0.05372526450927442, "grad_norm": 3.0515107575454206, "learning_rate": 5.372526450927443e-06, "loss": 0.8316, "step": 12136 }, { "epoch": 0.053729691442737613, "grad_norm": 3.065707991553659, "learning_rate": 5.372969144273762e-06, "loss": 0.6294, "step": 12137 }, { "epoch": 0.053734118376200804, "grad_norm": 2.6662354099768573, "learning_rate": 5.37341183762008e-06, "loss": 0.5276, "step": 12138 }, { "epoch": 0.053738545309663995, "grad_norm": 2.93132499213247, "learning_rate": 5.3738545309664006e-06, "loss": 1.01, "step": 12139 }, { "epoch": 0.053742972243127185, "grad_norm": 3.3142866821687833, "learning_rate": 5.374297224312719e-06, "loss": 0.7875, "step": 12140 }, { "epoch": 0.053747399176590376, "grad_norm": 3.427297601511178, "learning_rate": 5.374739917659038e-06, "loss": 0.799, "step": 12141 }, { "epoch": 0.053751826110053566, "grad_norm": 3.9559516841374336, "learning_rate": 5.375182611005357e-06, "loss": 1.1453, "step": 12142 }, { "epoch": 0.05375625304351676, "grad_norm": 2.94464608998248, "learning_rate": 5.375625304351676e-06, "loss": 1.1592, "step": 12143 }, { "epoch": 0.05376067997697995, "grad_norm": 3.129846758361315, "learning_rate": 5.376067997697995e-06, "loss": 0.7013, "step": 12144 }, { "epoch": 0.05376510691044314, "grad_norm": 3.7426425339622122, "learning_rate": 5.376510691044314e-06, "loss": 0.9027, "step": 12145 }, { "epoch": 0.05376953384390633, "grad_norm": 3.4179892316512985, "learning_rate": 5.376953384390633e-06, "loss": 0.991, "step": 12146 }, { "epoch": 0.05377396077736952, "grad_norm": 3.05790624082081, "learning_rate": 5.377396077736951e-06, "loss": 0.7102, "step": 12147 }, { "epoch": 0.05377838771083271, "grad_norm": 3.027152399749305, "learning_rate": 5.3778387710832715e-06, "loss": 1.0521, "step": 12148 }, { "epoch": 0.0537828146442959, "grad_norm": 2.6385143929267616, "learning_rate": 5.37828146442959e-06, "loss": 0.7414, "step": 12149 }, { "epoch": 0.05378724157775908, "grad_norm": 3.8981154599764416, "learning_rate": 5.3787241577759086e-06, "loss": 1.1071, "step": 12150 }, { "epoch": 0.053791668511222274, "grad_norm": 3.7228732088753613, "learning_rate": 5.379166851122229e-06, "loss": 0.9062, "step": 12151 }, { "epoch": 0.053796095444685464, "grad_norm": 3.733227264022133, "learning_rate": 5.379609544468547e-06, "loss": 0.9364, "step": 12152 }, { "epoch": 0.053800522378148655, "grad_norm": 2.693332817167292, "learning_rate": 5.380052237814866e-06, "loss": 0.6461, "step": 12153 }, { "epoch": 0.053804949311611845, "grad_norm": 2.6305949330504466, "learning_rate": 5.380494931161185e-06, "loss": 0.6913, "step": 12154 }, { "epoch": 0.053809376245075036, "grad_norm": 3.0353711707492885, "learning_rate": 5.380937624507504e-06, "loss": 0.4599, "step": 12155 }, { "epoch": 0.053813803178538226, "grad_norm": 3.152058496381341, "learning_rate": 5.381380317853822e-06, "loss": 0.7863, "step": 12156 }, { "epoch": 0.05381823011200142, "grad_norm": 2.463245908836327, "learning_rate": 5.3818230112001425e-06, "loss": 0.7828, "step": 12157 }, { "epoch": 0.05382265704546461, "grad_norm": 2.8320547839116084, "learning_rate": 5.382265704546461e-06, "loss": 0.9806, "step": 12158 }, { "epoch": 0.0538270839789278, "grad_norm": 2.5956534807767038, "learning_rate": 5.3827083978927795e-06, "loss": 0.6453, "step": 12159 }, { "epoch": 0.05383151091239099, "grad_norm": 3.884812169023004, "learning_rate": 5.3831510912391e-06, "loss": 0.9772, "step": 12160 }, { "epoch": 0.05383593784585418, "grad_norm": 2.5010870328365518, "learning_rate": 5.383593784585418e-06, "loss": 0.7982, "step": 12161 }, { "epoch": 0.05384036477931737, "grad_norm": 2.18652385765568, "learning_rate": 5.384036477931737e-06, "loss": 0.4813, "step": 12162 }, { "epoch": 0.05384479171278056, "grad_norm": 3.6047127207293226, "learning_rate": 5.384479171278056e-06, "loss": 1.0057, "step": 12163 }, { "epoch": 0.05384921864624375, "grad_norm": 4.599924593186278, "learning_rate": 5.384921864624375e-06, "loss": 1.2374, "step": 12164 }, { "epoch": 0.053853645579706934, "grad_norm": 2.1473990688720486, "learning_rate": 5.385364557970694e-06, "loss": 0.4812, "step": 12165 }, { "epoch": 0.053858072513170124, "grad_norm": 2.8704219741740413, "learning_rate": 5.385807251317013e-06, "loss": 0.7469, "step": 12166 }, { "epoch": 0.053862499446633315, "grad_norm": 3.0862251269755356, "learning_rate": 5.386249944663332e-06, "loss": 1.1402, "step": 12167 }, { "epoch": 0.053866926380096505, "grad_norm": 3.034763189849327, "learning_rate": 5.3866926380096505e-06, "loss": 0.7214, "step": 12168 }, { "epoch": 0.053871353313559696, "grad_norm": 2.589513428390512, "learning_rate": 5.387135331355971e-06, "loss": 0.8066, "step": 12169 }, { "epoch": 0.053875780247022886, "grad_norm": 3.451371444492238, "learning_rate": 5.387578024702289e-06, "loss": 1.1348, "step": 12170 }, { "epoch": 0.05388020718048608, "grad_norm": 2.8143187809712558, "learning_rate": 5.388020718048608e-06, "loss": 0.7884, "step": 12171 }, { "epoch": 0.05388463411394927, "grad_norm": 2.8171716575398236, "learning_rate": 5.388463411394927e-06, "loss": 0.6635, "step": 12172 }, { "epoch": 0.05388906104741246, "grad_norm": 2.9581240851730053, "learning_rate": 5.3889061047412465e-06, "loss": 0.9247, "step": 12173 }, { "epoch": 0.05389348798087565, "grad_norm": 2.7189684390212365, "learning_rate": 5.389348798087565e-06, "loss": 0.6386, "step": 12174 }, { "epoch": 0.05389791491433884, "grad_norm": 3.2436102442776136, "learning_rate": 5.389791491433884e-06, "loss": 0.8339, "step": 12175 }, { "epoch": 0.05390234184780203, "grad_norm": 2.9900524163883486, "learning_rate": 5.390234184780203e-06, "loss": 0.6463, "step": 12176 }, { "epoch": 0.05390676878126522, "grad_norm": 2.4053205286373176, "learning_rate": 5.390676878126521e-06, "loss": 0.675, "step": 12177 }, { "epoch": 0.05391119571472841, "grad_norm": 2.8774226260196984, "learning_rate": 5.391119571472842e-06, "loss": 0.6083, "step": 12178 }, { "epoch": 0.0539156226481916, "grad_norm": 2.583105640166552, "learning_rate": 5.39156226481916e-06, "loss": 0.999, "step": 12179 }, { "epoch": 0.053920049581654785, "grad_norm": 4.06227961044905, "learning_rate": 5.392004958165479e-06, "loss": 0.7669, "step": 12180 }, { "epoch": 0.053924476515117975, "grad_norm": 2.733384435504967, "learning_rate": 5.392447651511799e-06, "loss": 0.75, "step": 12181 }, { "epoch": 0.053928903448581166, "grad_norm": 2.5311942790412965, "learning_rate": 5.392890344858117e-06, "loss": 0.6937, "step": 12182 }, { "epoch": 0.053933330382044356, "grad_norm": 2.623819049761299, "learning_rate": 5.393333038204436e-06, "loss": 0.7349, "step": 12183 }, { "epoch": 0.05393775731550755, "grad_norm": 2.5055585723148037, "learning_rate": 5.393775731550755e-06, "loss": 0.8745, "step": 12184 }, { "epoch": 0.05394218424897074, "grad_norm": 2.7745281479263064, "learning_rate": 5.394218424897074e-06, "loss": 0.7079, "step": 12185 }, { "epoch": 0.05394661118243393, "grad_norm": 2.5871041241796346, "learning_rate": 5.394661118243393e-06, "loss": 0.9227, "step": 12186 }, { "epoch": 0.05395103811589712, "grad_norm": 2.8941056756648442, "learning_rate": 5.3951038115897126e-06, "loss": 0.797, "step": 12187 }, { "epoch": 0.05395546504936031, "grad_norm": 3.005361392235811, "learning_rate": 5.395546504936031e-06, "loss": 0.6379, "step": 12188 }, { "epoch": 0.0539598919828235, "grad_norm": 2.8273988588908234, "learning_rate": 5.39598919828235e-06, "loss": 0.5363, "step": 12189 }, { "epoch": 0.05396431891628669, "grad_norm": 2.693213594859305, "learning_rate": 5.39643189162867e-06, "loss": 0.9162, "step": 12190 }, { "epoch": 0.05396874584974988, "grad_norm": 3.9649272258158903, "learning_rate": 5.396874584974988e-06, "loss": 1.4473, "step": 12191 }, { "epoch": 0.05397317278321307, "grad_norm": 3.030229033864752, "learning_rate": 5.397317278321307e-06, "loss": 0.8581, "step": 12192 }, { "epoch": 0.05397759971667626, "grad_norm": 3.4075029674123103, "learning_rate": 5.397759971667626e-06, "loss": 1.0527, "step": 12193 }, { "epoch": 0.05398202665013945, "grad_norm": 3.3579044818478843, "learning_rate": 5.398202665013945e-06, "loss": 1.4231, "step": 12194 }, { "epoch": 0.053986453583602635, "grad_norm": 2.331020146115037, "learning_rate": 5.398645358360264e-06, "loss": 0.6736, "step": 12195 }, { "epoch": 0.053990880517065826, "grad_norm": 2.9127574819200412, "learning_rate": 5.3990880517065835e-06, "loss": 0.8634, "step": 12196 }, { "epoch": 0.053995307450529016, "grad_norm": 3.051434524731643, "learning_rate": 5.399530745052902e-06, "loss": 0.8774, "step": 12197 }, { "epoch": 0.05399973438399221, "grad_norm": 2.3906505185941813, "learning_rate": 5.3999734383992206e-06, "loss": 0.627, "step": 12198 }, { "epoch": 0.0540041613174554, "grad_norm": 2.8061800847715417, "learning_rate": 5.400416131745541e-06, "loss": 0.7159, "step": 12199 }, { "epoch": 0.05400858825091859, "grad_norm": 2.9222542114333327, "learning_rate": 5.400858825091859e-06, "loss": 1.1561, "step": 12200 }, { "epoch": 0.05401301518438178, "grad_norm": 3.0463557591430406, "learning_rate": 5.401301518438178e-06, "loss": 0.8775, "step": 12201 }, { "epoch": 0.05401744211784497, "grad_norm": 2.3675432829856278, "learning_rate": 5.401744211784497e-06, "loss": 0.8765, "step": 12202 }, { "epoch": 0.05402186905130816, "grad_norm": 2.7516399527462334, "learning_rate": 5.4021869051308166e-06, "loss": 0.8185, "step": 12203 }, { "epoch": 0.05402629598477135, "grad_norm": 2.572125733113456, "learning_rate": 5.402629598477135e-06, "loss": 0.7358, "step": 12204 }, { "epoch": 0.05403072291823454, "grad_norm": 4.247018066301856, "learning_rate": 5.4030722918234545e-06, "loss": 1.2619, "step": 12205 }, { "epoch": 0.05403514985169773, "grad_norm": 3.600157827190591, "learning_rate": 5.403514985169773e-06, "loss": 1.0332, "step": 12206 }, { "epoch": 0.05403957678516092, "grad_norm": 2.9156299995924617, "learning_rate": 5.4039576785160915e-06, "loss": 0.7138, "step": 12207 }, { "epoch": 0.05404400371862411, "grad_norm": 3.3234766065999826, "learning_rate": 5.404400371862412e-06, "loss": 0.7724, "step": 12208 }, { "epoch": 0.0540484306520873, "grad_norm": 2.1205915093840293, "learning_rate": 5.40484306520873e-06, "loss": 0.4397, "step": 12209 }, { "epoch": 0.054052857585550486, "grad_norm": 2.2966974570440883, "learning_rate": 5.405285758555049e-06, "loss": 0.5753, "step": 12210 }, { "epoch": 0.054057284519013676, "grad_norm": 2.6172100450393545, "learning_rate": 5.405728451901369e-06, "loss": 0.8101, "step": 12211 }, { "epoch": 0.05406171145247687, "grad_norm": 2.954905413982817, "learning_rate": 5.4061711452476875e-06, "loss": 0.6709, "step": 12212 }, { "epoch": 0.05406613838594006, "grad_norm": 3.027873291498684, "learning_rate": 5.406613838594006e-06, "loss": 0.9009, "step": 12213 }, { "epoch": 0.05407056531940325, "grad_norm": 2.6095805221453547, "learning_rate": 5.407056531940325e-06, "loss": 0.9001, "step": 12214 }, { "epoch": 0.05407499225286644, "grad_norm": 2.7469016176092285, "learning_rate": 5.407499225286644e-06, "loss": 0.5366, "step": 12215 }, { "epoch": 0.05407941918632963, "grad_norm": 2.3516371310637165, "learning_rate": 5.407941918632963e-06, "loss": 0.5187, "step": 12216 }, { "epoch": 0.05408384611979282, "grad_norm": 2.873904950294337, "learning_rate": 5.408384611979283e-06, "loss": 1.0204, "step": 12217 }, { "epoch": 0.05408827305325601, "grad_norm": 2.1804299438790093, "learning_rate": 5.408827305325601e-06, "loss": 0.4663, "step": 12218 }, { "epoch": 0.0540926999867192, "grad_norm": 3.7909036581311017, "learning_rate": 5.40926999867192e-06, "loss": 1.0302, "step": 12219 }, { "epoch": 0.05409712692018239, "grad_norm": 2.6535163525322587, "learning_rate": 5.40971269201824e-06, "loss": 0.8533, "step": 12220 }, { "epoch": 0.05410155385364558, "grad_norm": 2.4885955067210457, "learning_rate": 5.4101553853645585e-06, "loss": 0.5414, "step": 12221 }, { "epoch": 0.05410598078710877, "grad_norm": 2.687184256437891, "learning_rate": 5.410598078710877e-06, "loss": 0.7064, "step": 12222 }, { "epoch": 0.05411040772057196, "grad_norm": 2.6861157404588196, "learning_rate": 5.411040772057196e-06, "loss": 0.3214, "step": 12223 }, { "epoch": 0.05411483465403515, "grad_norm": 2.3858027159916917, "learning_rate": 5.411483465403516e-06, "loss": 0.6878, "step": 12224 }, { "epoch": 0.05411926158749834, "grad_norm": 2.9804110737942944, "learning_rate": 5.411926158749834e-06, "loss": 0.7017, "step": 12225 }, { "epoch": 0.05412368852096153, "grad_norm": 3.6745279884675948, "learning_rate": 5.412368852096154e-06, "loss": 0.9929, "step": 12226 }, { "epoch": 0.05412811545442472, "grad_norm": 2.5512125325550525, "learning_rate": 5.412811545442472e-06, "loss": 0.841, "step": 12227 }, { "epoch": 0.05413254238788791, "grad_norm": 3.1729189456494877, "learning_rate": 5.413254238788791e-06, "loss": 0.7712, "step": 12228 }, { "epoch": 0.0541369693213511, "grad_norm": 2.7192558345607085, "learning_rate": 5.413696932135111e-06, "loss": 0.6274, "step": 12229 }, { "epoch": 0.05414139625481429, "grad_norm": 2.737213407521391, "learning_rate": 5.414139625481429e-06, "loss": 0.5888, "step": 12230 }, { "epoch": 0.05414582318827748, "grad_norm": 2.797028863110929, "learning_rate": 5.414582318827748e-06, "loss": 0.7249, "step": 12231 }, { "epoch": 0.05415025012174067, "grad_norm": 3.5259157053306063, "learning_rate": 5.415025012174067e-06, "loss": 1.0211, "step": 12232 }, { "epoch": 0.05415467705520386, "grad_norm": 2.6409897260497144, "learning_rate": 5.415467705520387e-06, "loss": 0.7276, "step": 12233 }, { "epoch": 0.05415910398866705, "grad_norm": 2.4353581044627273, "learning_rate": 5.415910398866705e-06, "loss": 0.5955, "step": 12234 }, { "epoch": 0.05416353092213024, "grad_norm": 2.8363144311610573, "learning_rate": 5.4163530922130246e-06, "loss": 0.7737, "step": 12235 }, { "epoch": 0.05416795785559343, "grad_norm": 2.7249087727554087, "learning_rate": 5.416795785559343e-06, "loss": 0.9376, "step": 12236 }, { "epoch": 0.05417238478905662, "grad_norm": 4.11875230458611, "learning_rate": 5.417238478905662e-06, "loss": 1.1029, "step": 12237 }, { "epoch": 0.05417681172251981, "grad_norm": 2.342971408018996, "learning_rate": 5.417681172251982e-06, "loss": 0.7193, "step": 12238 }, { "epoch": 0.054181238655983004, "grad_norm": 3.3138307804306053, "learning_rate": 5.4181238655983e-06, "loss": 0.8776, "step": 12239 }, { "epoch": 0.05418566558944619, "grad_norm": 2.6417275184908293, "learning_rate": 5.418566558944619e-06, "loss": 0.6437, "step": 12240 }, { "epoch": 0.05419009252290938, "grad_norm": 3.4610175157883223, "learning_rate": 5.419009252290939e-06, "loss": 1.2044, "step": 12241 }, { "epoch": 0.05419451945637257, "grad_norm": 2.631986053004791, "learning_rate": 5.419451945637258e-06, "loss": 0.6484, "step": 12242 }, { "epoch": 0.05419894638983576, "grad_norm": 2.8324014230537338, "learning_rate": 5.419894638983576e-06, "loss": 0.8332, "step": 12243 }, { "epoch": 0.05420337332329895, "grad_norm": 2.688345398013522, "learning_rate": 5.4203373323298955e-06, "loss": 0.8417, "step": 12244 }, { "epoch": 0.05420780025676214, "grad_norm": 2.8260590073721987, "learning_rate": 5.420780025676214e-06, "loss": 0.7691, "step": 12245 }, { "epoch": 0.05421222719022533, "grad_norm": 2.5277288866004395, "learning_rate": 5.421222719022533e-06, "loss": 0.6446, "step": 12246 }, { "epoch": 0.05421665412368852, "grad_norm": 2.6845040494695116, "learning_rate": 5.421665412368853e-06, "loss": 0.8145, "step": 12247 }, { "epoch": 0.05422108105715171, "grad_norm": 2.291049525336749, "learning_rate": 5.422108105715171e-06, "loss": 0.5817, "step": 12248 }, { "epoch": 0.0542255079906149, "grad_norm": 2.553943003749313, "learning_rate": 5.42255079906149e-06, "loss": 0.4933, "step": 12249 }, { "epoch": 0.05422993492407809, "grad_norm": 2.122437546202301, "learning_rate": 5.42299349240781e-06, "loss": 0.5051, "step": 12250 }, { "epoch": 0.05423436185754128, "grad_norm": 2.6366154610492, "learning_rate": 5.4234361857541286e-06, "loss": 0.6998, "step": 12251 }, { "epoch": 0.05423878879100447, "grad_norm": 2.937058187192175, "learning_rate": 5.423878879100447e-06, "loss": 0.6197, "step": 12252 }, { "epoch": 0.054243215724467664, "grad_norm": 2.9591127922859997, "learning_rate": 5.4243215724467665e-06, "loss": 0.7653, "step": 12253 }, { "epoch": 0.054247642657930854, "grad_norm": 2.7045297166705513, "learning_rate": 5.424764265793086e-06, "loss": 0.5694, "step": 12254 }, { "epoch": 0.05425206959139404, "grad_norm": 2.558684446403205, "learning_rate": 5.425206959139404e-06, "loss": 0.7829, "step": 12255 }, { "epoch": 0.05425649652485723, "grad_norm": 3.1687057131625744, "learning_rate": 5.425649652485724e-06, "loss": 1.0279, "step": 12256 }, { "epoch": 0.05426092345832042, "grad_norm": 2.783017712908329, "learning_rate": 5.426092345832042e-06, "loss": 0.9077, "step": 12257 }, { "epoch": 0.05426535039178361, "grad_norm": 2.6112667693195855, "learning_rate": 5.426535039178361e-06, "loss": 0.8549, "step": 12258 }, { "epoch": 0.0542697773252468, "grad_norm": 2.7373710803695217, "learning_rate": 5.426977732524681e-06, "loss": 0.566, "step": 12259 }, { "epoch": 0.05427420425870999, "grad_norm": 2.80278740117898, "learning_rate": 5.4274204258709995e-06, "loss": 0.7762, "step": 12260 }, { "epoch": 0.05427863119217318, "grad_norm": 3.511583674450295, "learning_rate": 5.427863119217318e-06, "loss": 1.0518, "step": 12261 }, { "epoch": 0.05428305812563637, "grad_norm": 3.488883743155687, "learning_rate": 5.428305812563638e-06, "loss": 0.9201, "step": 12262 }, { "epoch": 0.05428748505909956, "grad_norm": 3.5658173120109673, "learning_rate": 5.428748505909957e-06, "loss": 1.2551, "step": 12263 }, { "epoch": 0.05429191199256275, "grad_norm": 3.285460089080331, "learning_rate": 5.429191199256275e-06, "loss": 0.877, "step": 12264 }, { "epoch": 0.05429633892602594, "grad_norm": 2.6384871211209355, "learning_rate": 5.429633892602595e-06, "loss": 0.7055, "step": 12265 }, { "epoch": 0.054300765859489133, "grad_norm": 3.079620533931255, "learning_rate": 5.430076585948913e-06, "loss": 0.5706, "step": 12266 }, { "epoch": 0.054305192792952324, "grad_norm": 2.7083227788103, "learning_rate": 5.430519279295232e-06, "loss": 0.5589, "step": 12267 }, { "epoch": 0.054309619726415514, "grad_norm": 3.963081555463831, "learning_rate": 5.430961972641552e-06, "loss": 1.0425, "step": 12268 }, { "epoch": 0.054314046659878705, "grad_norm": 2.6183395030907133, "learning_rate": 5.4314046659878705e-06, "loss": 0.8581, "step": 12269 }, { "epoch": 0.054318473593341896, "grad_norm": 2.723314873974549, "learning_rate": 5.431847359334189e-06, "loss": 0.6741, "step": 12270 }, { "epoch": 0.05432290052680508, "grad_norm": 2.6019164864651882, "learning_rate": 5.432290052680509e-06, "loss": 0.8416, "step": 12271 }, { "epoch": 0.05432732746026827, "grad_norm": 2.6251009238557805, "learning_rate": 5.432732746026828e-06, "loss": 0.7519, "step": 12272 }, { "epoch": 0.05433175439373146, "grad_norm": 2.4316684979013803, "learning_rate": 5.433175439373146e-06, "loss": 0.715, "step": 12273 }, { "epoch": 0.05433618132719465, "grad_norm": 2.8910821711962473, "learning_rate": 5.433618132719466e-06, "loss": 0.6821, "step": 12274 }, { "epoch": 0.05434060826065784, "grad_norm": 3.7981629110075104, "learning_rate": 5.434060826065784e-06, "loss": 0.6996, "step": 12275 }, { "epoch": 0.05434503519412103, "grad_norm": 3.0253228142382165, "learning_rate": 5.4345035194121035e-06, "loss": 0.6308, "step": 12276 }, { "epoch": 0.05434946212758422, "grad_norm": 2.518101373266399, "learning_rate": 5.434946212758423e-06, "loss": 0.6694, "step": 12277 }, { "epoch": 0.05435388906104741, "grad_norm": 2.8790462107980965, "learning_rate": 5.435388906104741e-06, "loss": 0.8736, "step": 12278 }, { "epoch": 0.0543583159945106, "grad_norm": 2.6038098886631142, "learning_rate": 5.43583159945106e-06, "loss": 0.656, "step": 12279 }, { "epoch": 0.054362742927973794, "grad_norm": 2.275120129723318, "learning_rate": 5.43627429279738e-06, "loss": 0.4408, "step": 12280 }, { "epoch": 0.054367169861436984, "grad_norm": 2.551156207882083, "learning_rate": 5.436716986143699e-06, "loss": 0.6149, "step": 12281 }, { "epoch": 0.054371596794900175, "grad_norm": 2.92508403182987, "learning_rate": 5.437159679490017e-06, "loss": 0.8318, "step": 12282 }, { "epoch": 0.054376023728363365, "grad_norm": 3.425086279977978, "learning_rate": 5.437602372836337e-06, "loss": 0.9741, "step": 12283 }, { "epoch": 0.054380450661826556, "grad_norm": 2.2335629080574337, "learning_rate": 5.438045066182656e-06, "loss": 0.6797, "step": 12284 }, { "epoch": 0.054384877595289746, "grad_norm": 2.355935023323204, "learning_rate": 5.4384877595289745e-06, "loss": 0.5318, "step": 12285 }, { "epoch": 0.05438930452875293, "grad_norm": 4.001650515728277, "learning_rate": 5.438930452875294e-06, "loss": 0.9169, "step": 12286 }, { "epoch": 0.05439373146221612, "grad_norm": 3.5654998707576593, "learning_rate": 5.439373146221612e-06, "loss": 1.2091, "step": 12287 }, { "epoch": 0.05439815839567931, "grad_norm": 2.2678398876289863, "learning_rate": 5.439815839567931e-06, "loss": 0.6788, "step": 12288 }, { "epoch": 0.0544025853291425, "grad_norm": 2.8966585087698005, "learning_rate": 5.440258532914251e-06, "loss": 0.7122, "step": 12289 }, { "epoch": 0.05440701226260569, "grad_norm": 2.629863124510741, "learning_rate": 5.44070122626057e-06, "loss": 0.6484, "step": 12290 }, { "epoch": 0.05441143919606888, "grad_norm": 2.3117843033948984, "learning_rate": 5.441143919606888e-06, "loss": 0.4681, "step": 12291 }, { "epoch": 0.05441586612953207, "grad_norm": 2.6896714419894217, "learning_rate": 5.441586612953208e-06, "loss": 0.5899, "step": 12292 }, { "epoch": 0.05442029306299526, "grad_norm": 2.782756006350279, "learning_rate": 5.442029306299527e-06, "loss": 0.7614, "step": 12293 }, { "epoch": 0.054424719996458454, "grad_norm": 2.6411032366743386, "learning_rate": 5.442471999645845e-06, "loss": 0.7866, "step": 12294 }, { "epoch": 0.054429146929921644, "grad_norm": 2.58549495336926, "learning_rate": 5.442914692992165e-06, "loss": 0.5744, "step": 12295 }, { "epoch": 0.054433573863384835, "grad_norm": 2.3702951189225496, "learning_rate": 5.443357386338483e-06, "loss": 0.5761, "step": 12296 }, { "epoch": 0.054438000796848025, "grad_norm": 2.829091879656924, "learning_rate": 5.443800079684803e-06, "loss": 0.8806, "step": 12297 }, { "epoch": 0.054442427730311216, "grad_norm": 2.8821688628643756, "learning_rate": 5.444242773031122e-06, "loss": 0.6457, "step": 12298 }, { "epoch": 0.054446854663774406, "grad_norm": 2.196760774078533, "learning_rate": 5.444685466377441e-06, "loss": 0.4425, "step": 12299 }, { "epoch": 0.0544512815972376, "grad_norm": 2.6417967713958825, "learning_rate": 5.445128159723759e-06, "loss": 0.615, "step": 12300 }, { "epoch": 0.05445570853070078, "grad_norm": 2.3535960800639564, "learning_rate": 5.445570853070079e-06, "loss": 0.6842, "step": 12301 }, { "epoch": 0.05446013546416397, "grad_norm": 3.063329139638316, "learning_rate": 5.446013546416398e-06, "loss": 0.6998, "step": 12302 }, { "epoch": 0.05446456239762716, "grad_norm": 2.6495371672822183, "learning_rate": 5.446456239762716e-06, "loss": 0.6962, "step": 12303 }, { "epoch": 0.05446898933109035, "grad_norm": 3.4566375277614414, "learning_rate": 5.446898933109036e-06, "loss": 0.9171, "step": 12304 }, { "epoch": 0.05447341626455354, "grad_norm": 2.7985342903575394, "learning_rate": 5.447341626455355e-06, "loss": 0.7194, "step": 12305 }, { "epoch": 0.05447784319801673, "grad_norm": 2.839167146139015, "learning_rate": 5.447784319801674e-06, "loss": 0.8375, "step": 12306 }, { "epoch": 0.054482270131479923, "grad_norm": 2.7397969989023947, "learning_rate": 5.448227013147993e-06, "loss": 0.868, "step": 12307 }, { "epoch": 0.054486697064943114, "grad_norm": 2.8205120899340113, "learning_rate": 5.4486697064943115e-06, "loss": 0.789, "step": 12308 }, { "epoch": 0.054491123998406304, "grad_norm": 2.5843624714861493, "learning_rate": 5.44911239984063e-06, "loss": 0.7621, "step": 12309 }, { "epoch": 0.054495550931869495, "grad_norm": 2.8045243553846704, "learning_rate": 5.44955509318695e-06, "loss": 0.7308, "step": 12310 }, { "epoch": 0.054499977865332686, "grad_norm": 2.4592649333071224, "learning_rate": 5.449997786533269e-06, "loss": 0.8041, "step": 12311 }, { "epoch": 0.054504404798795876, "grad_norm": 3.157077105765905, "learning_rate": 5.450440479879587e-06, "loss": 0.8042, "step": 12312 }, { "epoch": 0.054508831732259067, "grad_norm": 2.6273022787027416, "learning_rate": 5.450883173225907e-06, "loss": 0.8476, "step": 12313 }, { "epoch": 0.05451325866572226, "grad_norm": 2.8260336669412087, "learning_rate": 5.451325866572226e-06, "loss": 0.6682, "step": 12314 }, { "epoch": 0.05451768559918545, "grad_norm": 3.0731355516525563, "learning_rate": 5.451768559918545e-06, "loss": 1.0404, "step": 12315 }, { "epoch": 0.05452211253264863, "grad_norm": 2.9238332230950483, "learning_rate": 5.452211253264864e-06, "loss": 0.8989, "step": 12316 }, { "epoch": 0.05452653946611182, "grad_norm": 2.8444456914274276, "learning_rate": 5.4526539466111825e-06, "loss": 0.6462, "step": 12317 }, { "epoch": 0.05453096639957501, "grad_norm": 2.728305958486592, "learning_rate": 5.453096639957501e-06, "loss": 0.7313, "step": 12318 }, { "epoch": 0.0545353933330382, "grad_norm": 2.759209935060526, "learning_rate": 5.453539333303821e-06, "loss": 0.75, "step": 12319 }, { "epoch": 0.05453982026650139, "grad_norm": 2.8778661371150367, "learning_rate": 5.45398202665014e-06, "loss": 0.6771, "step": 12320 }, { "epoch": 0.054544247199964584, "grad_norm": 2.7677524463528944, "learning_rate": 5.454424719996458e-06, "loss": 0.6996, "step": 12321 }, { "epoch": 0.054548674133427774, "grad_norm": 2.329466020582346, "learning_rate": 5.4548674133427785e-06, "loss": 0.631, "step": 12322 }, { "epoch": 0.054553101066890965, "grad_norm": 2.967209403729816, "learning_rate": 5.455310106689097e-06, "loss": 0.5303, "step": 12323 }, { "epoch": 0.054557528000354155, "grad_norm": 2.388407598940164, "learning_rate": 5.4557528000354155e-06, "loss": 0.6362, "step": 12324 }, { "epoch": 0.054561954933817346, "grad_norm": 2.5845602410713338, "learning_rate": 5.456195493381735e-06, "loss": 0.5051, "step": 12325 }, { "epoch": 0.054566381867280536, "grad_norm": 2.9068036592185282, "learning_rate": 5.456638186728053e-06, "loss": 0.8939, "step": 12326 }, { "epoch": 0.05457080880074373, "grad_norm": 3.0704513864444594, "learning_rate": 5.457080880074373e-06, "loss": 0.7521, "step": 12327 }, { "epoch": 0.05457523573420692, "grad_norm": 2.7224789497505912, "learning_rate": 5.457523573420692e-06, "loss": 0.7319, "step": 12328 }, { "epoch": 0.05457966266767011, "grad_norm": 2.864113893088101, "learning_rate": 5.457966266767011e-06, "loss": 0.7765, "step": 12329 }, { "epoch": 0.0545840896011333, "grad_norm": 2.351727280557303, "learning_rate": 5.458408960113329e-06, "loss": 0.6373, "step": 12330 }, { "epoch": 0.05458851653459648, "grad_norm": 2.7076637278622946, "learning_rate": 5.4588516534596494e-06, "loss": 0.7149, "step": 12331 }, { "epoch": 0.05459294346805967, "grad_norm": 2.919064362760932, "learning_rate": 5.459294346805968e-06, "loss": 0.9126, "step": 12332 }, { "epoch": 0.05459737040152286, "grad_norm": 2.7925765291896334, "learning_rate": 5.4597370401522865e-06, "loss": 0.6032, "step": 12333 }, { "epoch": 0.05460179733498605, "grad_norm": 3.4252934902683805, "learning_rate": 5.460179733498606e-06, "loss": 0.9579, "step": 12334 }, { "epoch": 0.054606224268449244, "grad_norm": 3.7489798520711717, "learning_rate": 5.460622426844925e-06, "loss": 1.0264, "step": 12335 }, { "epoch": 0.054610651201912434, "grad_norm": 2.5764438877142286, "learning_rate": 5.461065120191244e-06, "loss": 0.7976, "step": 12336 }, { "epoch": 0.054615078135375625, "grad_norm": 3.0626574472065875, "learning_rate": 5.461507813537563e-06, "loss": 0.8289, "step": 12337 }, { "epoch": 0.054619505068838815, "grad_norm": 3.2278541407942027, "learning_rate": 5.461950506883882e-06, "loss": 1.0731, "step": 12338 }, { "epoch": 0.054623932002302006, "grad_norm": 3.329057431188217, "learning_rate": 5.4623932002302e-06, "loss": 1.033, "step": 12339 }, { "epoch": 0.054628358935765196, "grad_norm": 2.9620919818396354, "learning_rate": 5.46283589357652e-06, "loss": 0.6681, "step": 12340 }, { "epoch": 0.05463278586922839, "grad_norm": 3.443526121393158, "learning_rate": 5.463278586922839e-06, "loss": 1.0423, "step": 12341 }, { "epoch": 0.05463721280269158, "grad_norm": 2.4954882883767002, "learning_rate": 5.463721280269157e-06, "loss": 0.7666, "step": 12342 }, { "epoch": 0.05464163973615477, "grad_norm": 2.689254846968287, "learning_rate": 5.464163973615478e-06, "loss": 0.7192, "step": 12343 }, { "epoch": 0.05464606666961796, "grad_norm": 2.543869388099555, "learning_rate": 5.464606666961796e-06, "loss": 0.6205, "step": 12344 }, { "epoch": 0.05465049360308115, "grad_norm": 2.7671718011826205, "learning_rate": 5.465049360308115e-06, "loss": 0.6481, "step": 12345 }, { "epoch": 0.05465492053654433, "grad_norm": 2.4566653416316733, "learning_rate": 5.465492053654434e-06, "loss": 0.4049, "step": 12346 }, { "epoch": 0.05465934747000752, "grad_norm": 2.349784974673587, "learning_rate": 5.465934747000753e-06, "loss": 0.8152, "step": 12347 }, { "epoch": 0.054663774403470713, "grad_norm": 3.3534838925848125, "learning_rate": 5.466377440347071e-06, "loss": 1.0768, "step": 12348 }, { "epoch": 0.054668201336933904, "grad_norm": 2.7768578933326022, "learning_rate": 5.466820133693391e-06, "loss": 0.7327, "step": 12349 }, { "epoch": 0.054672628270397094, "grad_norm": 2.7928179195386273, "learning_rate": 5.46726282703971e-06, "loss": 0.8435, "step": 12350 }, { "epoch": 0.054677055203860285, "grad_norm": 2.4367097300501444, "learning_rate": 5.467705520386028e-06, "loss": 0.6398, "step": 12351 }, { "epoch": 0.054681482137323476, "grad_norm": 3.0717230958605235, "learning_rate": 5.468148213732349e-06, "loss": 0.8085, "step": 12352 }, { "epoch": 0.054685909070786666, "grad_norm": 3.0579511764086456, "learning_rate": 5.468590907078667e-06, "loss": 0.9533, "step": 12353 }, { "epoch": 0.054690336004249857, "grad_norm": 2.7709121854242684, "learning_rate": 5.469033600424986e-06, "loss": 0.8455, "step": 12354 }, { "epoch": 0.05469476293771305, "grad_norm": 3.364730176911698, "learning_rate": 5.469476293771305e-06, "loss": 0.9422, "step": 12355 }, { "epoch": 0.05469918987117624, "grad_norm": 3.0081645713512786, "learning_rate": 5.4699189871176235e-06, "loss": 0.8644, "step": 12356 }, { "epoch": 0.05470361680463943, "grad_norm": 2.671134056279262, "learning_rate": 5.470361680463943e-06, "loss": 0.8252, "step": 12357 }, { "epoch": 0.05470804373810262, "grad_norm": 2.384345980588711, "learning_rate": 5.470804373810262e-06, "loss": 0.5532, "step": 12358 }, { "epoch": 0.05471247067156581, "grad_norm": 2.7863888686035407, "learning_rate": 5.471247067156581e-06, "loss": 0.7747, "step": 12359 }, { "epoch": 0.054716897605029, "grad_norm": 2.905025240414013, "learning_rate": 5.471689760502899e-06, "loss": 0.7177, "step": 12360 }, { "epoch": 0.05472132453849218, "grad_norm": 2.2248875366394487, "learning_rate": 5.4721324538492195e-06, "loss": 0.6187, "step": 12361 }, { "epoch": 0.054725751471955374, "grad_norm": 2.897500573531283, "learning_rate": 5.472575147195538e-06, "loss": 0.9796, "step": 12362 }, { "epoch": 0.054730178405418564, "grad_norm": 2.5049337623651415, "learning_rate": 5.473017840541857e-06, "loss": 0.6916, "step": 12363 }, { "epoch": 0.054734605338881755, "grad_norm": 2.7672720071000776, "learning_rate": 5.473460533888176e-06, "loss": 0.7825, "step": 12364 }, { "epoch": 0.054739032272344945, "grad_norm": 3.4440312798019126, "learning_rate": 5.473903227234495e-06, "loss": 1.1491, "step": 12365 }, { "epoch": 0.054743459205808136, "grad_norm": 3.7767232838181277, "learning_rate": 5.474345920580814e-06, "loss": 0.9893, "step": 12366 }, { "epoch": 0.054747886139271326, "grad_norm": 2.9452744299973306, "learning_rate": 5.474788613927133e-06, "loss": 0.6081, "step": 12367 }, { "epoch": 0.05475231307273452, "grad_norm": 3.2506153152114394, "learning_rate": 5.475231307273452e-06, "loss": 0.7344, "step": 12368 }, { "epoch": 0.05475674000619771, "grad_norm": 2.598273891676868, "learning_rate": 5.47567400061977e-06, "loss": 0.6466, "step": 12369 }, { "epoch": 0.0547611669396609, "grad_norm": 2.142282481816237, "learning_rate": 5.4761166939660905e-06, "loss": 0.6191, "step": 12370 }, { "epoch": 0.05476559387312409, "grad_norm": 2.4320727630839563, "learning_rate": 5.476559387312409e-06, "loss": 0.6071, "step": 12371 }, { "epoch": 0.05477002080658728, "grad_norm": 3.3244200569988123, "learning_rate": 5.4770020806587275e-06, "loss": 0.8192, "step": 12372 }, { "epoch": 0.05477444774005047, "grad_norm": 3.10537416503778, "learning_rate": 5.477444774005048e-06, "loss": 0.7756, "step": 12373 }, { "epoch": 0.05477887467351366, "grad_norm": 2.86252077096107, "learning_rate": 5.477887467351366e-06, "loss": 0.6526, "step": 12374 }, { "epoch": 0.05478330160697685, "grad_norm": 3.7375906618011885, "learning_rate": 5.478330160697685e-06, "loss": 0.9009, "step": 12375 }, { "epoch": 0.054787728540440034, "grad_norm": 2.5490762661013466, "learning_rate": 5.478772854044004e-06, "loss": 0.7316, "step": 12376 }, { "epoch": 0.054792155473903224, "grad_norm": 3.2027333257735795, "learning_rate": 5.479215547390323e-06, "loss": 0.788, "step": 12377 }, { "epoch": 0.054796582407366415, "grad_norm": 3.0591697526743875, "learning_rate": 5.479658240736642e-06, "loss": 0.8866, "step": 12378 }, { "epoch": 0.054801009340829605, "grad_norm": 2.387252253245947, "learning_rate": 5.4801009340829614e-06, "loss": 0.4826, "step": 12379 }, { "epoch": 0.054805436274292796, "grad_norm": 3.1142364911705704, "learning_rate": 5.48054362742928e-06, "loss": 0.7357, "step": 12380 }, { "epoch": 0.054809863207755986, "grad_norm": 2.6204201765869786, "learning_rate": 5.4809863207755985e-06, "loss": 0.784, "step": 12381 }, { "epoch": 0.05481429014121918, "grad_norm": 2.318634989633717, "learning_rate": 5.481429014121919e-06, "loss": 0.5285, "step": 12382 }, { "epoch": 0.05481871707468237, "grad_norm": 4.217571208495168, "learning_rate": 5.481871707468237e-06, "loss": 1.1423, "step": 12383 }, { "epoch": 0.05482314400814556, "grad_norm": 3.063555476125744, "learning_rate": 5.482314400814556e-06, "loss": 0.8231, "step": 12384 }, { "epoch": 0.05482757094160875, "grad_norm": 2.7595157262828076, "learning_rate": 5.482757094160875e-06, "loss": 0.8585, "step": 12385 }, { "epoch": 0.05483199787507194, "grad_norm": 2.9869402284676414, "learning_rate": 5.4831997875071945e-06, "loss": 0.8581, "step": 12386 }, { "epoch": 0.05483642480853513, "grad_norm": 3.774366108025418, "learning_rate": 5.483642480853513e-06, "loss": 0.9695, "step": 12387 }, { "epoch": 0.05484085174199832, "grad_norm": 2.8676721311722497, "learning_rate": 5.484085174199832e-06, "loss": 0.7853, "step": 12388 }, { "epoch": 0.05484527867546151, "grad_norm": 2.4661368130536134, "learning_rate": 5.484527867546151e-06, "loss": 0.6426, "step": 12389 }, { "epoch": 0.0548497056089247, "grad_norm": 2.4017111868109935, "learning_rate": 5.484970560892469e-06, "loss": 0.6962, "step": 12390 }, { "epoch": 0.054854132542387884, "grad_norm": 2.071088700187351, "learning_rate": 5.48541325423879e-06, "loss": 0.4833, "step": 12391 }, { "epoch": 0.054858559475851075, "grad_norm": 2.706801838024018, "learning_rate": 5.485855947585108e-06, "loss": 0.7628, "step": 12392 }, { "epoch": 0.054862986409314266, "grad_norm": 2.3248292880634347, "learning_rate": 5.486298640931427e-06, "loss": 0.5826, "step": 12393 }, { "epoch": 0.054867413342777456, "grad_norm": 2.568033332256766, "learning_rate": 5.486741334277746e-06, "loss": 0.673, "step": 12394 }, { "epoch": 0.054871840276240647, "grad_norm": 4.020375443361622, "learning_rate": 5.4871840276240654e-06, "loss": 0.8646, "step": 12395 }, { "epoch": 0.05487626720970384, "grad_norm": 2.729640235120251, "learning_rate": 5.487626720970384e-06, "loss": 0.9437, "step": 12396 }, { "epoch": 0.05488069414316703, "grad_norm": 3.0791591124067508, "learning_rate": 5.488069414316703e-06, "loss": 0.6386, "step": 12397 }, { "epoch": 0.05488512107663022, "grad_norm": 3.45126122829599, "learning_rate": 5.488512107663022e-06, "loss": 1.0459, "step": 12398 }, { "epoch": 0.05488954801009341, "grad_norm": 3.57252622823114, "learning_rate": 5.48895480100934e-06, "loss": 1.1163, "step": 12399 }, { "epoch": 0.0548939749435566, "grad_norm": 2.934646815455251, "learning_rate": 5.489397494355661e-06, "loss": 0.9451, "step": 12400 }, { "epoch": 0.05489840187701979, "grad_norm": 2.4352326457101876, "learning_rate": 5.489840187701979e-06, "loss": 0.6608, "step": 12401 }, { "epoch": 0.05490282881048298, "grad_norm": 2.51428155598244, "learning_rate": 5.490282881048298e-06, "loss": 0.7986, "step": 12402 }, { "epoch": 0.05490725574394617, "grad_norm": 2.81554106051704, "learning_rate": 5.490725574394618e-06, "loss": 0.506, "step": 12403 }, { "epoch": 0.05491168267740936, "grad_norm": 3.275223062098117, "learning_rate": 5.491168267740936e-06, "loss": 1.2081, "step": 12404 }, { "epoch": 0.05491610961087255, "grad_norm": 3.297272888768266, "learning_rate": 5.491610961087255e-06, "loss": 1.1469, "step": 12405 }, { "epoch": 0.054920536544335735, "grad_norm": 3.2122782172139726, "learning_rate": 5.492053654433574e-06, "loss": 0.7894, "step": 12406 }, { "epoch": 0.054924963477798926, "grad_norm": 2.3781624223882107, "learning_rate": 5.492496347779893e-06, "loss": 0.5362, "step": 12407 }, { "epoch": 0.054929390411262116, "grad_norm": 2.958876777686281, "learning_rate": 5.492939041126212e-06, "loss": 0.8974, "step": 12408 }, { "epoch": 0.05493381734472531, "grad_norm": 2.546759673961847, "learning_rate": 5.4933817344725315e-06, "loss": 0.5745, "step": 12409 }, { "epoch": 0.0549382442781885, "grad_norm": 3.013890920102999, "learning_rate": 5.49382442781885e-06, "loss": 0.8228, "step": 12410 }, { "epoch": 0.05494267121165169, "grad_norm": 2.5382046337217368, "learning_rate": 5.494267121165169e-06, "loss": 0.8604, "step": 12411 }, { "epoch": 0.05494709814511488, "grad_norm": 2.581639867649408, "learning_rate": 5.494709814511489e-06, "loss": 0.6932, "step": 12412 }, { "epoch": 0.05495152507857807, "grad_norm": 2.4371319912882066, "learning_rate": 5.495152507857807e-06, "loss": 0.6419, "step": 12413 }, { "epoch": 0.05495595201204126, "grad_norm": 3.044511671287017, "learning_rate": 5.495595201204126e-06, "loss": 0.9301, "step": 12414 }, { "epoch": 0.05496037894550445, "grad_norm": 3.220094362226217, "learning_rate": 5.496037894550445e-06, "loss": 0.9876, "step": 12415 }, { "epoch": 0.05496480587896764, "grad_norm": 2.579906057595913, "learning_rate": 5.496480587896765e-06, "loss": 0.6037, "step": 12416 }, { "epoch": 0.05496923281243083, "grad_norm": 2.7192569203066173, "learning_rate": 5.496923281243083e-06, "loss": 0.6366, "step": 12417 }, { "epoch": 0.05497365974589402, "grad_norm": 2.935857494293627, "learning_rate": 5.4973659745894025e-06, "loss": 0.8793, "step": 12418 }, { "epoch": 0.05497808667935721, "grad_norm": 3.0744823599969515, "learning_rate": 5.497808667935721e-06, "loss": 0.979, "step": 12419 }, { "epoch": 0.0549825136128204, "grad_norm": 2.575088242177772, "learning_rate": 5.4982513612820395e-06, "loss": 0.9012, "step": 12420 }, { "epoch": 0.05498694054628359, "grad_norm": 2.430598776515806, "learning_rate": 5.49869405462836e-06, "loss": 0.5803, "step": 12421 }, { "epoch": 0.054991367479746776, "grad_norm": 2.6084690557085826, "learning_rate": 5.499136747974678e-06, "loss": 0.8683, "step": 12422 }, { "epoch": 0.05499579441320997, "grad_norm": 2.881562007796433, "learning_rate": 5.499579441320997e-06, "loss": 0.8547, "step": 12423 }, { "epoch": 0.05500022134667316, "grad_norm": 2.938084586544687, "learning_rate": 5.500022134667317e-06, "loss": 0.7131, "step": 12424 }, { "epoch": 0.05500464828013635, "grad_norm": 3.05329918431559, "learning_rate": 5.5004648280136355e-06, "loss": 0.8597, "step": 12425 }, { "epoch": 0.05500907521359954, "grad_norm": 2.9292232041355053, "learning_rate": 5.500907521359954e-06, "loss": 0.6857, "step": 12426 }, { "epoch": 0.05501350214706273, "grad_norm": 2.8070847461900743, "learning_rate": 5.5013502147062734e-06, "loss": 0.8708, "step": 12427 }, { "epoch": 0.05501792908052592, "grad_norm": 2.2702853483121337, "learning_rate": 5.501792908052592e-06, "loss": 0.4803, "step": 12428 }, { "epoch": 0.05502235601398911, "grad_norm": 2.6780904251503808, "learning_rate": 5.5022356013989105e-06, "loss": 0.8562, "step": 12429 }, { "epoch": 0.0550267829474523, "grad_norm": 2.5974223708896504, "learning_rate": 5.502678294745231e-06, "loss": 0.8746, "step": 12430 }, { "epoch": 0.05503120988091549, "grad_norm": 3.01111426920088, "learning_rate": 5.503120988091549e-06, "loss": 1.0604, "step": 12431 }, { "epoch": 0.05503563681437868, "grad_norm": 2.866078033434099, "learning_rate": 5.503563681437868e-06, "loss": 0.7326, "step": 12432 }, { "epoch": 0.05504006374784187, "grad_norm": 2.470237292662092, "learning_rate": 5.504006374784188e-06, "loss": 0.6608, "step": 12433 }, { "epoch": 0.05504449068130506, "grad_norm": 2.526654630514341, "learning_rate": 5.5044490681305065e-06, "loss": 0.8947, "step": 12434 }, { "epoch": 0.05504891761476825, "grad_norm": 3.8320150306082206, "learning_rate": 5.504891761476825e-06, "loss": 1.3212, "step": 12435 }, { "epoch": 0.05505334454823144, "grad_norm": 3.7246725668344993, "learning_rate": 5.505334454823144e-06, "loss": 1.1573, "step": 12436 }, { "epoch": 0.05505777148169463, "grad_norm": 2.2965399542437326, "learning_rate": 5.505777148169463e-06, "loss": 0.8203, "step": 12437 }, { "epoch": 0.05506219841515782, "grad_norm": 2.615349744858598, "learning_rate": 5.506219841515782e-06, "loss": 0.797, "step": 12438 }, { "epoch": 0.05506662534862101, "grad_norm": 2.8832649241721224, "learning_rate": 5.506662534862102e-06, "loss": 0.6123, "step": 12439 }, { "epoch": 0.0550710522820842, "grad_norm": 2.595016006912686, "learning_rate": 5.50710522820842e-06, "loss": 0.6933, "step": 12440 }, { "epoch": 0.05507547921554739, "grad_norm": 2.465886676533122, "learning_rate": 5.507547921554739e-06, "loss": 0.6605, "step": 12441 }, { "epoch": 0.05507990614901058, "grad_norm": 2.6573516791345644, "learning_rate": 5.507990614901059e-06, "loss": 0.6059, "step": 12442 }, { "epoch": 0.05508433308247377, "grad_norm": 2.9255051892977852, "learning_rate": 5.5084333082473774e-06, "loss": 0.8962, "step": 12443 }, { "epoch": 0.05508876001593696, "grad_norm": 2.5846367937644685, "learning_rate": 5.508876001593696e-06, "loss": 0.6031, "step": 12444 }, { "epoch": 0.05509318694940015, "grad_norm": 2.5583274666919635, "learning_rate": 5.509318694940015e-06, "loss": 0.7707, "step": 12445 }, { "epoch": 0.05509761388286334, "grad_norm": 2.224557758163294, "learning_rate": 5.509761388286335e-06, "loss": 0.6947, "step": 12446 }, { "epoch": 0.05510204081632653, "grad_norm": 2.6772503631371314, "learning_rate": 5.510204081632653e-06, "loss": 0.548, "step": 12447 }, { "epoch": 0.05510646774978972, "grad_norm": 3.0742620657733584, "learning_rate": 5.510646774978973e-06, "loss": 0.5876, "step": 12448 }, { "epoch": 0.05511089468325291, "grad_norm": 2.4623658354197024, "learning_rate": 5.511089468325291e-06, "loss": 0.6453, "step": 12449 }, { "epoch": 0.055115321616716104, "grad_norm": 3.583432598833663, "learning_rate": 5.51153216167161e-06, "loss": 1.0901, "step": 12450 }, { "epoch": 0.055119748550179294, "grad_norm": 2.885814347191396, "learning_rate": 5.51197485501793e-06, "loss": 0.8225, "step": 12451 }, { "epoch": 0.05512417548364248, "grad_norm": 2.7903525161896097, "learning_rate": 5.512417548364248e-06, "loss": 0.726, "step": 12452 }, { "epoch": 0.05512860241710567, "grad_norm": 2.849627406602967, "learning_rate": 5.512860241710567e-06, "loss": 0.7577, "step": 12453 }, { "epoch": 0.05513302935056886, "grad_norm": 2.7794359515929075, "learning_rate": 5.513302935056887e-06, "loss": 0.7333, "step": 12454 }, { "epoch": 0.05513745628403205, "grad_norm": 2.668521345946399, "learning_rate": 5.513745628403206e-06, "loss": 0.6534, "step": 12455 }, { "epoch": 0.05514188321749524, "grad_norm": 2.515369421705514, "learning_rate": 5.514188321749524e-06, "loss": 0.5202, "step": 12456 }, { "epoch": 0.05514631015095843, "grad_norm": 3.440565194515327, "learning_rate": 5.5146310150958435e-06, "loss": 0.9065, "step": 12457 }, { "epoch": 0.05515073708442162, "grad_norm": 3.201512127546486, "learning_rate": 5.515073708442162e-06, "loss": 1.0846, "step": 12458 }, { "epoch": 0.05515516401788481, "grad_norm": 2.8709957135394024, "learning_rate": 5.5155164017884814e-06, "loss": 0.9271, "step": 12459 }, { "epoch": 0.055159590951348, "grad_norm": 3.150459719097427, "learning_rate": 5.515959095134801e-06, "loss": 0.9253, "step": 12460 }, { "epoch": 0.05516401788481119, "grad_norm": 2.9481176041247092, "learning_rate": 5.516401788481119e-06, "loss": 0.6581, "step": 12461 }, { "epoch": 0.05516844481827438, "grad_norm": 2.6754665189038036, "learning_rate": 5.516844481827438e-06, "loss": 0.6378, "step": 12462 }, { "epoch": 0.05517287175173757, "grad_norm": 2.626209923294506, "learning_rate": 5.517287175173758e-06, "loss": 0.7125, "step": 12463 }, { "epoch": 0.055177298685200764, "grad_norm": 3.6729829386234916, "learning_rate": 5.517729868520077e-06, "loss": 1.0687, "step": 12464 }, { "epoch": 0.055181725618663954, "grad_norm": 3.0494232927258267, "learning_rate": 5.518172561866395e-06, "loss": 0.9817, "step": 12465 }, { "epoch": 0.055186152552127145, "grad_norm": 2.6136330152149263, "learning_rate": 5.5186152552127145e-06, "loss": 0.7754, "step": 12466 }, { "epoch": 0.05519057948559033, "grad_norm": 2.7163742330159644, "learning_rate": 5.519057948559033e-06, "loss": 0.8315, "step": 12467 }, { "epoch": 0.05519500641905352, "grad_norm": 2.93919460725683, "learning_rate": 5.519500641905352e-06, "loss": 0.9458, "step": 12468 }, { "epoch": 0.05519943335251671, "grad_norm": 2.483861578511763, "learning_rate": 5.519943335251672e-06, "loss": 0.6046, "step": 12469 }, { "epoch": 0.0552038602859799, "grad_norm": 2.2615199437791293, "learning_rate": 5.52038602859799e-06, "loss": 0.5645, "step": 12470 }, { "epoch": 0.05520828721944309, "grad_norm": 2.5874971811614143, "learning_rate": 5.520828721944309e-06, "loss": 0.8871, "step": 12471 }, { "epoch": 0.05521271415290628, "grad_norm": 2.319827566907397, "learning_rate": 5.521271415290629e-06, "loss": 0.559, "step": 12472 }, { "epoch": 0.05521714108636947, "grad_norm": 2.3387179171998937, "learning_rate": 5.5217141086369475e-06, "loss": 0.7253, "step": 12473 }, { "epoch": 0.05522156801983266, "grad_norm": 2.9774539301258796, "learning_rate": 5.522156801983266e-06, "loss": 0.8419, "step": 12474 }, { "epoch": 0.05522599495329585, "grad_norm": 2.406090890431854, "learning_rate": 5.5225994953295854e-06, "loss": 0.6012, "step": 12475 }, { "epoch": 0.05523042188675904, "grad_norm": 2.475006179434825, "learning_rate": 5.523042188675905e-06, "loss": 0.8299, "step": 12476 }, { "epoch": 0.05523484882022223, "grad_norm": 2.8446660001141706, "learning_rate": 5.523484882022223e-06, "loss": 0.7064, "step": 12477 }, { "epoch": 0.055239275753685424, "grad_norm": 2.9702172914539857, "learning_rate": 5.523927575368543e-06, "loss": 0.9022, "step": 12478 }, { "epoch": 0.055243702687148614, "grad_norm": 2.720748989712894, "learning_rate": 5.524370268714861e-06, "loss": 0.8726, "step": 12479 }, { "epoch": 0.055248129620611805, "grad_norm": 2.412360550141145, "learning_rate": 5.52481296206118e-06, "loss": 0.6392, "step": 12480 }, { "epoch": 0.055252556554074995, "grad_norm": 2.370950404235627, "learning_rate": 5.5252556554075e-06, "loss": 0.8218, "step": 12481 }, { "epoch": 0.05525698348753818, "grad_norm": 2.7050576619333984, "learning_rate": 5.5256983487538185e-06, "loss": 0.7592, "step": 12482 }, { "epoch": 0.05526141042100137, "grad_norm": 2.889939680565837, "learning_rate": 5.526141042100137e-06, "loss": 0.7453, "step": 12483 }, { "epoch": 0.05526583735446456, "grad_norm": 2.4967273167464206, "learning_rate": 5.526583735446457e-06, "loss": 0.6686, "step": 12484 }, { "epoch": 0.05527026428792775, "grad_norm": 2.5667509023126946, "learning_rate": 5.527026428792776e-06, "loss": 0.6251, "step": 12485 }, { "epoch": 0.05527469122139094, "grad_norm": 4.257630826990459, "learning_rate": 5.527469122139095e-06, "loss": 1.0917, "step": 12486 }, { "epoch": 0.05527911815485413, "grad_norm": 2.3207517106064652, "learning_rate": 5.527911815485414e-06, "loss": 0.69, "step": 12487 }, { "epoch": 0.05528354508831732, "grad_norm": 3.3987697193649535, "learning_rate": 5.528354508831732e-06, "loss": 1.0882, "step": 12488 }, { "epoch": 0.05528797202178051, "grad_norm": 3.2616875679662596, "learning_rate": 5.528797202178052e-06, "loss": 0.8451, "step": 12489 }, { "epoch": 0.0552923989552437, "grad_norm": 3.2488162988379856, "learning_rate": 5.529239895524371e-06, "loss": 0.5667, "step": 12490 }, { "epoch": 0.055296825888706894, "grad_norm": 2.8623845697609354, "learning_rate": 5.5296825888706894e-06, "loss": 0.8266, "step": 12491 }, { "epoch": 0.055301252822170084, "grad_norm": 2.5526842752495726, "learning_rate": 5.53012528221701e-06, "loss": 0.7115, "step": 12492 }, { "epoch": 0.055305679755633275, "grad_norm": 3.0812260204747153, "learning_rate": 5.530567975563328e-06, "loss": 0.7836, "step": 12493 }, { "epoch": 0.055310106689096465, "grad_norm": 3.0545820861983444, "learning_rate": 5.531010668909647e-06, "loss": 1.0722, "step": 12494 }, { "epoch": 0.055314533622559656, "grad_norm": 2.3012516676270423, "learning_rate": 5.531453362255966e-06, "loss": 0.5306, "step": 12495 }, { "epoch": 0.055318960556022846, "grad_norm": 2.379355103955851, "learning_rate": 5.531896055602285e-06, "loss": 0.5057, "step": 12496 }, { "epoch": 0.05532338748948603, "grad_norm": 3.025088193385321, "learning_rate": 5.532338748948604e-06, "loss": 0.6757, "step": 12497 }, { "epoch": 0.05532781442294922, "grad_norm": 3.0196516218768865, "learning_rate": 5.532781442294923e-06, "loss": 0.5705, "step": 12498 }, { "epoch": 0.05533224135641241, "grad_norm": 3.6020199687611822, "learning_rate": 5.533224135641242e-06, "loss": 0.912, "step": 12499 }, { "epoch": 0.0553366682898756, "grad_norm": 2.7465720195978296, "learning_rate": 5.53366682898756e-06, "loss": 0.7563, "step": 12500 }, { "epoch": 0.05534109522333879, "grad_norm": 2.5863847052975193, "learning_rate": 5.534109522333881e-06, "loss": 0.5188, "step": 12501 }, { "epoch": 0.05534552215680198, "grad_norm": 3.203181345738889, "learning_rate": 5.534552215680199e-06, "loss": 0.9463, "step": 12502 }, { "epoch": 0.05534994909026517, "grad_norm": 3.0196290460401864, "learning_rate": 5.534994909026518e-06, "loss": 0.8946, "step": 12503 }, { "epoch": 0.05535437602372836, "grad_norm": 2.4118397953048047, "learning_rate": 5.535437602372837e-06, "loss": 0.5352, "step": 12504 }, { "epoch": 0.055358802957191554, "grad_norm": 2.5274044927916273, "learning_rate": 5.535880295719156e-06, "loss": 0.6443, "step": 12505 }, { "epoch": 0.055363229890654744, "grad_norm": 4.84081467768269, "learning_rate": 5.536322989065475e-06, "loss": 1.8977, "step": 12506 }, { "epoch": 0.055367656824117935, "grad_norm": 3.2212412178000847, "learning_rate": 5.536765682411794e-06, "loss": 0.9127, "step": 12507 }, { "epoch": 0.055372083757581125, "grad_norm": 2.550979790759869, "learning_rate": 5.537208375758113e-06, "loss": 0.7511, "step": 12508 }, { "epoch": 0.055376510691044316, "grad_norm": 3.8401130730997077, "learning_rate": 5.537651069104431e-06, "loss": 1.0742, "step": 12509 }, { "epoch": 0.055380937624507506, "grad_norm": 2.3025436480527355, "learning_rate": 5.5380937624507515e-06, "loss": 0.7196, "step": 12510 }, { "epoch": 0.0553853645579707, "grad_norm": 4.1732570112294605, "learning_rate": 5.53853645579707e-06, "loss": 0.9011, "step": 12511 }, { "epoch": 0.05538979149143388, "grad_norm": 2.763792634210354, "learning_rate": 5.538979149143389e-06, "loss": 0.7326, "step": 12512 }, { "epoch": 0.05539421842489707, "grad_norm": 2.495280596605575, "learning_rate": 5.539421842489708e-06, "loss": 0.553, "step": 12513 }, { "epoch": 0.05539864535836026, "grad_norm": 3.068402401961782, "learning_rate": 5.539864535836027e-06, "loss": 0.8006, "step": 12514 }, { "epoch": 0.05540307229182345, "grad_norm": 2.6897709776988408, "learning_rate": 5.540307229182346e-06, "loss": 0.8561, "step": 12515 }, { "epoch": 0.05540749922528664, "grad_norm": 2.510557968992406, "learning_rate": 5.540749922528665e-06, "loss": 0.6815, "step": 12516 }, { "epoch": 0.05541192615874983, "grad_norm": 3.4055651437259074, "learning_rate": 5.541192615874984e-06, "loss": 0.886, "step": 12517 }, { "epoch": 0.05541635309221302, "grad_norm": 3.248172163699315, "learning_rate": 5.541635309221302e-06, "loss": 1.032, "step": 12518 }, { "epoch": 0.055420780025676214, "grad_norm": 2.8061693213537002, "learning_rate": 5.5420780025676225e-06, "loss": 0.896, "step": 12519 }, { "epoch": 0.055425206959139404, "grad_norm": 3.114397564969059, "learning_rate": 5.542520695913941e-06, "loss": 1.1267, "step": 12520 }, { "epoch": 0.055429633892602595, "grad_norm": 2.3186516522011664, "learning_rate": 5.5429633892602595e-06, "loss": 0.8135, "step": 12521 }, { "epoch": 0.055434060826065785, "grad_norm": 3.0770803554530506, "learning_rate": 5.54340608260658e-06, "loss": 0.6893, "step": 12522 }, { "epoch": 0.055438487759528976, "grad_norm": 2.6759965321932024, "learning_rate": 5.543848775952898e-06, "loss": 0.9325, "step": 12523 }, { "epoch": 0.055442914692992167, "grad_norm": 3.933806803477155, "learning_rate": 5.544291469299217e-06, "loss": 1.2914, "step": 12524 }, { "epoch": 0.05544734162645536, "grad_norm": 5.045342039778513, "learning_rate": 5.544734162645536e-06, "loss": 1.2486, "step": 12525 }, { "epoch": 0.05545176855991855, "grad_norm": 2.8151042547312226, "learning_rate": 5.545176855991855e-06, "loss": 0.8018, "step": 12526 }, { "epoch": 0.05545619549338173, "grad_norm": 2.817372471811045, "learning_rate": 5.545619549338174e-06, "loss": 0.7643, "step": 12527 }, { "epoch": 0.05546062242684492, "grad_norm": 3.27587327344442, "learning_rate": 5.5460622426844934e-06, "loss": 0.9554, "step": 12528 }, { "epoch": 0.05546504936030811, "grad_norm": 2.4940182157621273, "learning_rate": 5.546504936030812e-06, "loss": 0.6323, "step": 12529 }, { "epoch": 0.0554694762937713, "grad_norm": 3.0493956724900806, "learning_rate": 5.5469476293771305e-06, "loss": 0.675, "step": 12530 }, { "epoch": 0.05547390322723449, "grad_norm": 2.3233065789750644, "learning_rate": 5.547390322723451e-06, "loss": 0.5127, "step": 12531 }, { "epoch": 0.055478330160697684, "grad_norm": 2.719740810513622, "learning_rate": 5.547833016069769e-06, "loss": 0.6032, "step": 12532 }, { "epoch": 0.055482757094160874, "grad_norm": 3.272893555490125, "learning_rate": 5.548275709416088e-06, "loss": 0.8708, "step": 12533 }, { "epoch": 0.055487184027624065, "grad_norm": 2.8966286750898336, "learning_rate": 5.548718402762407e-06, "loss": 0.6593, "step": 12534 }, { "epoch": 0.055491610961087255, "grad_norm": 2.489146120662655, "learning_rate": 5.5491610961087265e-06, "loss": 0.6327, "step": 12535 }, { "epoch": 0.055496037894550446, "grad_norm": 2.638329123385926, "learning_rate": 5.549603789455045e-06, "loss": 0.8563, "step": 12536 }, { "epoch": 0.055500464828013636, "grad_norm": 2.741592587580418, "learning_rate": 5.550046482801364e-06, "loss": 0.8477, "step": 12537 }, { "epoch": 0.05550489176147683, "grad_norm": 2.9260723511457045, "learning_rate": 5.550489176147683e-06, "loss": 0.8231, "step": 12538 }, { "epoch": 0.05550931869494002, "grad_norm": 3.4882736477858236, "learning_rate": 5.5509318694940014e-06, "loss": 0.8401, "step": 12539 }, { "epoch": 0.05551374562840321, "grad_norm": 2.8510297293915015, "learning_rate": 5.551374562840322e-06, "loss": 0.7105, "step": 12540 }, { "epoch": 0.0555181725618664, "grad_norm": 2.5617897081790915, "learning_rate": 5.55181725618664e-06, "loss": 0.5948, "step": 12541 }, { "epoch": 0.05552259949532958, "grad_norm": 2.3591692850663404, "learning_rate": 5.552259949532959e-06, "loss": 0.8839, "step": 12542 }, { "epoch": 0.05552702642879277, "grad_norm": 2.378850535368469, "learning_rate": 5.552702642879279e-06, "loss": 0.6393, "step": 12543 }, { "epoch": 0.05553145336225596, "grad_norm": 2.617247609663841, "learning_rate": 5.5531453362255974e-06, "loss": 0.8412, "step": 12544 }, { "epoch": 0.05553588029571915, "grad_norm": 4.59057345126757, "learning_rate": 5.553588029571916e-06, "loss": 0.9753, "step": 12545 }, { "epoch": 0.055540307229182344, "grad_norm": 3.041605553515162, "learning_rate": 5.554030722918235e-06, "loss": 0.9036, "step": 12546 }, { "epoch": 0.055544734162645534, "grad_norm": 3.037961210250499, "learning_rate": 5.554473416264554e-06, "loss": 0.9455, "step": 12547 }, { "epoch": 0.055549161096108725, "grad_norm": 3.0868866551709977, "learning_rate": 5.554916109610872e-06, "loss": 0.972, "step": 12548 }, { "epoch": 0.055553588029571915, "grad_norm": 3.525390263998482, "learning_rate": 5.555358802957193e-06, "loss": 1.0317, "step": 12549 }, { "epoch": 0.055558014963035106, "grad_norm": 3.0330568598263086, "learning_rate": 5.555801496303511e-06, "loss": 0.7703, "step": 12550 }, { "epoch": 0.055562441896498296, "grad_norm": 3.3385459390885246, "learning_rate": 5.55624418964983e-06, "loss": 0.7758, "step": 12551 }, { "epoch": 0.05556686882996149, "grad_norm": 2.668480595944504, "learning_rate": 5.55668688299615e-06, "loss": 0.8318, "step": 12552 }, { "epoch": 0.05557129576342468, "grad_norm": 2.554720299529684, "learning_rate": 5.557129576342468e-06, "loss": 0.6827, "step": 12553 }, { "epoch": 0.05557572269688787, "grad_norm": 2.653874084848879, "learning_rate": 5.557572269688787e-06, "loss": 0.7904, "step": 12554 }, { "epoch": 0.05558014963035106, "grad_norm": 2.7008692807654677, "learning_rate": 5.558014963035106e-06, "loss": 0.9618, "step": 12555 }, { "epoch": 0.05558457656381425, "grad_norm": 2.975600028163099, "learning_rate": 5.558457656381425e-06, "loss": 0.8587, "step": 12556 }, { "epoch": 0.05558900349727743, "grad_norm": 2.5635944272607607, "learning_rate": 5.558900349727744e-06, "loss": 1.0005, "step": 12557 }, { "epoch": 0.05559343043074062, "grad_norm": 4.2759467218893965, "learning_rate": 5.5593430430740635e-06, "loss": 1.0016, "step": 12558 }, { "epoch": 0.05559785736420381, "grad_norm": 2.4335058752686742, "learning_rate": 5.559785736420382e-06, "loss": 0.7273, "step": 12559 }, { "epoch": 0.055602284297667004, "grad_norm": 2.5360687742749994, "learning_rate": 5.560228429766701e-06, "loss": 0.7873, "step": 12560 }, { "epoch": 0.055606711231130194, "grad_norm": 2.956911952031485, "learning_rate": 5.560671123113021e-06, "loss": 1.0888, "step": 12561 }, { "epoch": 0.055611138164593385, "grad_norm": 2.2702218483085477, "learning_rate": 5.561113816459339e-06, "loss": 0.752, "step": 12562 }, { "epoch": 0.055615565098056575, "grad_norm": 2.66243569207056, "learning_rate": 5.561556509805658e-06, "loss": 0.7827, "step": 12563 }, { "epoch": 0.055619992031519766, "grad_norm": 2.4560311690826238, "learning_rate": 5.561999203151977e-06, "loss": 0.7425, "step": 12564 }, { "epoch": 0.055624418964982957, "grad_norm": 2.617488536821733, "learning_rate": 5.562441896498297e-06, "loss": 0.7672, "step": 12565 }, { "epoch": 0.05562884589844615, "grad_norm": 2.442461913925162, "learning_rate": 5.562884589844615e-06, "loss": 0.72, "step": 12566 }, { "epoch": 0.05563327283190934, "grad_norm": 3.2651747242632374, "learning_rate": 5.5633272831909345e-06, "loss": 0.7783, "step": 12567 }, { "epoch": 0.05563769976537253, "grad_norm": 2.9408090661285526, "learning_rate": 5.563769976537253e-06, "loss": 0.8499, "step": 12568 }, { "epoch": 0.05564212669883572, "grad_norm": 2.7449588599802803, "learning_rate": 5.5642126698835715e-06, "loss": 0.5371, "step": 12569 }, { "epoch": 0.05564655363229891, "grad_norm": 3.1156498328716955, "learning_rate": 5.564655363229892e-06, "loss": 0.7957, "step": 12570 }, { "epoch": 0.0556509805657621, "grad_norm": 2.244436131526012, "learning_rate": 5.56509805657621e-06, "loss": 0.7011, "step": 12571 }, { "epoch": 0.05565540749922529, "grad_norm": 2.6742446145282757, "learning_rate": 5.565540749922529e-06, "loss": 0.7495, "step": 12572 }, { "epoch": 0.055659834432688474, "grad_norm": 2.667302883268351, "learning_rate": 5.565983443268849e-06, "loss": 0.7672, "step": 12573 }, { "epoch": 0.055664261366151664, "grad_norm": 3.081635115264869, "learning_rate": 5.5664261366151675e-06, "loss": 0.9529, "step": 12574 }, { "epoch": 0.055668688299614855, "grad_norm": 2.853141836966318, "learning_rate": 5.566868829961486e-06, "loss": 0.7475, "step": 12575 }, { "epoch": 0.055673115233078045, "grad_norm": 3.618170955213872, "learning_rate": 5.5673115233078054e-06, "loss": 1.1726, "step": 12576 }, { "epoch": 0.055677542166541236, "grad_norm": 2.7863552573347556, "learning_rate": 5.567754216654124e-06, "loss": 0.6823, "step": 12577 }, { "epoch": 0.055681969100004426, "grad_norm": 2.7737030515251493, "learning_rate": 5.568196910000443e-06, "loss": 0.9542, "step": 12578 }, { "epoch": 0.05568639603346762, "grad_norm": 2.7582031226393857, "learning_rate": 5.568639603346763e-06, "loss": 0.7159, "step": 12579 }, { "epoch": 0.05569082296693081, "grad_norm": 2.7192313245683137, "learning_rate": 5.569082296693081e-06, "loss": 0.7673, "step": 12580 }, { "epoch": 0.055695249900394, "grad_norm": 2.5089207259341637, "learning_rate": 5.5695249900394e-06, "loss": 0.6653, "step": 12581 }, { "epoch": 0.05569967683385719, "grad_norm": 2.259534038391501, "learning_rate": 5.56996768338572e-06, "loss": 0.5299, "step": 12582 }, { "epoch": 0.05570410376732038, "grad_norm": 2.9780148735889775, "learning_rate": 5.5704103767320385e-06, "loss": 1.0779, "step": 12583 }, { "epoch": 0.05570853070078357, "grad_norm": 3.108368480086188, "learning_rate": 5.570853070078357e-06, "loss": 0.7517, "step": 12584 }, { "epoch": 0.05571295763424676, "grad_norm": 3.1156940094814902, "learning_rate": 5.571295763424676e-06, "loss": 0.8381, "step": 12585 }, { "epoch": 0.05571738456770995, "grad_norm": 3.3909192421966354, "learning_rate": 5.571738456770996e-06, "loss": 1.0742, "step": 12586 }, { "epoch": 0.05572181150117314, "grad_norm": 3.3658840133594476, "learning_rate": 5.572181150117314e-06, "loss": 1.1974, "step": 12587 }, { "epoch": 0.055726238434636324, "grad_norm": 3.375422930186836, "learning_rate": 5.572623843463634e-06, "loss": 0.5862, "step": 12588 }, { "epoch": 0.055730665368099515, "grad_norm": 2.574137284160726, "learning_rate": 5.573066536809952e-06, "loss": 0.6995, "step": 12589 }, { "epoch": 0.055735092301562705, "grad_norm": 4.732265532725971, "learning_rate": 5.573509230156271e-06, "loss": 1.2636, "step": 12590 }, { "epoch": 0.055739519235025896, "grad_norm": 2.8289492064454183, "learning_rate": 5.573951923502591e-06, "loss": 0.965, "step": 12591 }, { "epoch": 0.055743946168489086, "grad_norm": 2.9351543688176913, "learning_rate": 5.5743946168489094e-06, "loss": 0.9257, "step": 12592 }, { "epoch": 0.05574837310195228, "grad_norm": 3.823477400154222, "learning_rate": 5.574837310195228e-06, "loss": 1.3789, "step": 12593 }, { "epoch": 0.05575280003541547, "grad_norm": 2.9664247162837896, "learning_rate": 5.575280003541547e-06, "loss": 0.9297, "step": 12594 }, { "epoch": 0.05575722696887866, "grad_norm": 2.759942199915092, "learning_rate": 5.575722696887867e-06, "loss": 0.6339, "step": 12595 }, { "epoch": 0.05576165390234185, "grad_norm": 2.834357459067095, "learning_rate": 5.576165390234185e-06, "loss": 0.6415, "step": 12596 }, { "epoch": 0.05576608083580504, "grad_norm": 2.7450159116959716, "learning_rate": 5.576608083580505e-06, "loss": 0.5779, "step": 12597 }, { "epoch": 0.05577050776926823, "grad_norm": 2.275372017464915, "learning_rate": 5.577050776926823e-06, "loss": 0.6643, "step": 12598 }, { "epoch": 0.05577493470273142, "grad_norm": 2.877495645375284, "learning_rate": 5.577493470273142e-06, "loss": 0.6861, "step": 12599 }, { "epoch": 0.05577936163619461, "grad_norm": 2.897838973376851, "learning_rate": 5.577936163619462e-06, "loss": 0.6223, "step": 12600 }, { "epoch": 0.0557837885696578, "grad_norm": 3.2516990501424194, "learning_rate": 5.57837885696578e-06, "loss": 1.0627, "step": 12601 }, { "epoch": 0.05578821550312099, "grad_norm": 3.4675006730373488, "learning_rate": 5.578821550312099e-06, "loss": 1.0608, "step": 12602 }, { "epoch": 0.055792642436584175, "grad_norm": 3.2673522298516153, "learning_rate": 5.579264243658419e-06, "loss": 1.1789, "step": 12603 }, { "epoch": 0.055797069370047365, "grad_norm": 2.7321279411429584, "learning_rate": 5.579706937004738e-06, "loss": 0.71, "step": 12604 }, { "epoch": 0.055801496303510556, "grad_norm": 2.9206748455863747, "learning_rate": 5.580149630351056e-06, "loss": 0.6661, "step": 12605 }, { "epoch": 0.055805923236973747, "grad_norm": 2.66617878282991, "learning_rate": 5.5805923236973755e-06, "loss": 0.7066, "step": 12606 }, { "epoch": 0.05581035017043694, "grad_norm": 2.2811154316694706, "learning_rate": 5.581035017043694e-06, "loss": 0.4551, "step": 12607 }, { "epoch": 0.05581477710390013, "grad_norm": 3.5344002979225357, "learning_rate": 5.5814777103900134e-06, "loss": 0.9454, "step": 12608 }, { "epoch": 0.05581920403736332, "grad_norm": 2.723294804512373, "learning_rate": 5.581920403736333e-06, "loss": 0.6484, "step": 12609 }, { "epoch": 0.05582363097082651, "grad_norm": 2.4406845060441134, "learning_rate": 5.582363097082651e-06, "loss": 0.5813, "step": 12610 }, { "epoch": 0.0558280579042897, "grad_norm": 3.32342132930769, "learning_rate": 5.58280579042897e-06, "loss": 0.9497, "step": 12611 }, { "epoch": 0.05583248483775289, "grad_norm": 2.8090317504452345, "learning_rate": 5.58324848377529e-06, "loss": 0.8007, "step": 12612 }, { "epoch": 0.05583691177121608, "grad_norm": 3.4189199077274273, "learning_rate": 5.583691177121609e-06, "loss": 1.0354, "step": 12613 }, { "epoch": 0.05584133870467927, "grad_norm": 3.318149899114852, "learning_rate": 5.584133870467927e-06, "loss": 0.6691, "step": 12614 }, { "epoch": 0.05584576563814246, "grad_norm": 2.4921428596056563, "learning_rate": 5.5845765638142465e-06, "loss": 0.6177, "step": 12615 }, { "epoch": 0.05585019257160565, "grad_norm": 2.3092790054578516, "learning_rate": 5.585019257160566e-06, "loss": 0.5936, "step": 12616 }, { "epoch": 0.05585461950506884, "grad_norm": 3.2065382480209372, "learning_rate": 5.585461950506884e-06, "loss": 0.6528, "step": 12617 }, { "epoch": 0.055859046438532026, "grad_norm": 2.469982534415972, "learning_rate": 5.585904643853204e-06, "loss": 0.6955, "step": 12618 }, { "epoch": 0.055863473371995216, "grad_norm": 3.433573592514333, "learning_rate": 5.586347337199522e-06, "loss": 0.6688, "step": 12619 }, { "epoch": 0.05586790030545841, "grad_norm": 2.6794885983324708, "learning_rate": 5.586790030545841e-06, "loss": 0.5686, "step": 12620 }, { "epoch": 0.0558723272389216, "grad_norm": 2.739759573647129, "learning_rate": 5.587232723892161e-06, "loss": 0.8212, "step": 12621 }, { "epoch": 0.05587675417238479, "grad_norm": 4.021960973937598, "learning_rate": 5.5876754172384795e-06, "loss": 0.9343, "step": 12622 }, { "epoch": 0.05588118110584798, "grad_norm": 3.1590169019529433, "learning_rate": 5.588118110584798e-06, "loss": 0.9259, "step": 12623 }, { "epoch": 0.05588560803931117, "grad_norm": 3.95928613761353, "learning_rate": 5.588560803931118e-06, "loss": 1.1077, "step": 12624 }, { "epoch": 0.05589003497277436, "grad_norm": 2.75383448828235, "learning_rate": 5.589003497277437e-06, "loss": 0.9681, "step": 12625 }, { "epoch": 0.05589446190623755, "grad_norm": 3.481543169400312, "learning_rate": 5.589446190623755e-06, "loss": 1.0353, "step": 12626 }, { "epoch": 0.05589888883970074, "grad_norm": 3.0658368148260102, "learning_rate": 5.589888883970075e-06, "loss": 0.9576, "step": 12627 }, { "epoch": 0.05590331577316393, "grad_norm": 2.20912543898903, "learning_rate": 5.590331577316393e-06, "loss": 0.5247, "step": 12628 }, { "epoch": 0.05590774270662712, "grad_norm": 2.92878658942785, "learning_rate": 5.590774270662712e-06, "loss": 0.9042, "step": 12629 }, { "epoch": 0.05591216964009031, "grad_norm": 3.354099068088181, "learning_rate": 5.591216964009032e-06, "loss": 0.9222, "step": 12630 }, { "epoch": 0.0559165965735535, "grad_norm": 2.454999292654805, "learning_rate": 5.5916596573553505e-06, "loss": 0.7304, "step": 12631 }, { "epoch": 0.05592102350701669, "grad_norm": 2.688443039938436, "learning_rate": 5.592102350701669e-06, "loss": 0.9097, "step": 12632 }, { "epoch": 0.055925450440479876, "grad_norm": 3.5882884233738848, "learning_rate": 5.592545044047989e-06, "loss": 0.7039, "step": 12633 }, { "epoch": 0.05592987737394307, "grad_norm": 2.965098161938289, "learning_rate": 5.592987737394308e-06, "loss": 0.7445, "step": 12634 }, { "epoch": 0.05593430430740626, "grad_norm": 2.6207064350572207, "learning_rate": 5.593430430740626e-06, "loss": 0.943, "step": 12635 }, { "epoch": 0.05593873124086945, "grad_norm": 2.4802546404986203, "learning_rate": 5.593873124086946e-06, "loss": 0.4031, "step": 12636 }, { "epoch": 0.05594315817433264, "grad_norm": 3.045796445089369, "learning_rate": 5.594315817433264e-06, "loss": 0.9171, "step": 12637 }, { "epoch": 0.05594758510779583, "grad_norm": 3.02366863499297, "learning_rate": 5.5947585107795835e-06, "loss": 0.5866, "step": 12638 }, { "epoch": 0.05595201204125902, "grad_norm": 3.1776922322321552, "learning_rate": 5.595201204125903e-06, "loss": 0.9484, "step": 12639 }, { "epoch": 0.05595643897472221, "grad_norm": 2.738517132658167, "learning_rate": 5.5956438974722214e-06, "loss": 0.8591, "step": 12640 }, { "epoch": 0.0559608659081854, "grad_norm": 2.8909268325909547, "learning_rate": 5.59608659081854e-06, "loss": 0.7572, "step": 12641 }, { "epoch": 0.05596529284164859, "grad_norm": 2.4036113601416695, "learning_rate": 5.59652928416486e-06, "loss": 0.483, "step": 12642 }, { "epoch": 0.05596971977511178, "grad_norm": 2.2566292076623293, "learning_rate": 5.596971977511179e-06, "loss": 0.7027, "step": 12643 }, { "epoch": 0.05597414670857497, "grad_norm": 2.8030580488602257, "learning_rate": 5.597414670857497e-06, "loss": 0.8858, "step": 12644 }, { "epoch": 0.05597857364203816, "grad_norm": 3.627694631509369, "learning_rate": 5.597857364203817e-06, "loss": 1.4129, "step": 12645 }, { "epoch": 0.05598300057550135, "grad_norm": 2.508221160554609, "learning_rate": 5.598300057550136e-06, "loss": 0.6795, "step": 12646 }, { "epoch": 0.05598742750896454, "grad_norm": 3.9086760057670427, "learning_rate": 5.5987427508964545e-06, "loss": 1.3178, "step": 12647 }, { "epoch": 0.05599185444242773, "grad_norm": 2.9052656348808275, "learning_rate": 5.599185444242774e-06, "loss": 0.953, "step": 12648 }, { "epoch": 0.05599628137589092, "grad_norm": 3.148614229484967, "learning_rate": 5.599628137589092e-06, "loss": 0.8404, "step": 12649 }, { "epoch": 0.05600070830935411, "grad_norm": 2.6918450935146416, "learning_rate": 5.600070830935411e-06, "loss": 0.5869, "step": 12650 }, { "epoch": 0.0560051352428173, "grad_norm": 2.812927141988041, "learning_rate": 5.600513524281731e-06, "loss": 0.6384, "step": 12651 }, { "epoch": 0.05600956217628049, "grad_norm": 3.056172274880304, "learning_rate": 5.60095621762805e-06, "loss": 0.8515, "step": 12652 }, { "epoch": 0.05601398910974368, "grad_norm": 3.7720739334980555, "learning_rate": 5.601398910974368e-06, "loss": 0.9313, "step": 12653 }, { "epoch": 0.05601841604320687, "grad_norm": 2.7197762666613015, "learning_rate": 5.601841604320688e-06, "loss": 0.8655, "step": 12654 }, { "epoch": 0.05602284297667006, "grad_norm": 2.4065822640491064, "learning_rate": 5.602284297667007e-06, "loss": 0.6709, "step": 12655 }, { "epoch": 0.05602726991013325, "grad_norm": 3.1503786164707823, "learning_rate": 5.6027269910133254e-06, "loss": 0.8617, "step": 12656 }, { "epoch": 0.05603169684359644, "grad_norm": 2.4987442844032195, "learning_rate": 5.603169684359645e-06, "loss": 0.5792, "step": 12657 }, { "epoch": 0.05603612377705963, "grad_norm": 3.5048097450489677, "learning_rate": 5.603612377705963e-06, "loss": 0.6193, "step": 12658 }, { "epoch": 0.05604055071052282, "grad_norm": 2.4748885795787423, "learning_rate": 5.604055071052283e-06, "loss": 0.5423, "step": 12659 }, { "epoch": 0.05604497764398601, "grad_norm": 2.58602755911632, "learning_rate": 5.604497764398602e-06, "loss": 0.7368, "step": 12660 }, { "epoch": 0.056049404577449204, "grad_norm": 2.507250829977117, "learning_rate": 5.604940457744921e-06, "loss": 0.6994, "step": 12661 }, { "epoch": 0.056053831510912394, "grad_norm": 2.4231841190486763, "learning_rate": 5.605383151091239e-06, "loss": 0.6405, "step": 12662 }, { "epoch": 0.05605825844437558, "grad_norm": 2.3608517592698384, "learning_rate": 5.605825844437559e-06, "loss": 0.7169, "step": 12663 }, { "epoch": 0.05606268537783877, "grad_norm": 2.7244591381466186, "learning_rate": 5.606268537783878e-06, "loss": 0.7256, "step": 12664 }, { "epoch": 0.05606711231130196, "grad_norm": 2.3762939133862897, "learning_rate": 5.606711231130196e-06, "loss": 0.5543, "step": 12665 }, { "epoch": 0.05607153924476515, "grad_norm": 3.012460435908944, "learning_rate": 5.607153924476516e-06, "loss": 0.9359, "step": 12666 }, { "epoch": 0.05607596617822834, "grad_norm": 2.8082680796014263, "learning_rate": 5.607596617822835e-06, "loss": 0.8339, "step": 12667 }, { "epoch": 0.05608039311169153, "grad_norm": 2.5801562416706343, "learning_rate": 5.608039311169154e-06, "loss": 0.6899, "step": 12668 }, { "epoch": 0.05608482004515472, "grad_norm": 2.8229076653962686, "learning_rate": 5.608482004515473e-06, "loss": 1.0371, "step": 12669 }, { "epoch": 0.05608924697861791, "grad_norm": 3.5620981528552655, "learning_rate": 5.6089246978617915e-06, "loss": 1.2531, "step": 12670 }, { "epoch": 0.0560936739120811, "grad_norm": 2.9738775841242147, "learning_rate": 5.60936739120811e-06, "loss": 0.9317, "step": 12671 }, { "epoch": 0.05609810084554429, "grad_norm": 2.350891895360913, "learning_rate": 5.60981008455443e-06, "loss": 0.514, "step": 12672 }, { "epoch": 0.05610252777900748, "grad_norm": 2.9853560579798777, "learning_rate": 5.610252777900749e-06, "loss": 0.6111, "step": 12673 }, { "epoch": 0.05610695471247067, "grad_norm": 2.7164783544500373, "learning_rate": 5.610695471247067e-06, "loss": 0.8181, "step": 12674 }, { "epoch": 0.056111381645933864, "grad_norm": 3.5888871903560315, "learning_rate": 5.611138164593387e-06, "loss": 1.4695, "step": 12675 }, { "epoch": 0.056115808579397054, "grad_norm": 2.5213202843833384, "learning_rate": 5.611580857939706e-06, "loss": 0.5814, "step": 12676 }, { "epoch": 0.056120235512860245, "grad_norm": 2.838650026233252, "learning_rate": 5.612023551286025e-06, "loss": 0.5787, "step": 12677 }, { "epoch": 0.05612466244632343, "grad_norm": 3.5815205501491025, "learning_rate": 5.612466244632344e-06, "loss": 1.2942, "step": 12678 }, { "epoch": 0.05612908937978662, "grad_norm": 2.5028240860142468, "learning_rate": 5.6129089379786625e-06, "loss": 0.8891, "step": 12679 }, { "epoch": 0.05613351631324981, "grad_norm": 2.4040778430301475, "learning_rate": 5.613351631324981e-06, "loss": 0.6998, "step": 12680 }, { "epoch": 0.056137943246713, "grad_norm": 2.3182839460005913, "learning_rate": 5.613794324671301e-06, "loss": 0.669, "step": 12681 }, { "epoch": 0.05614237018017619, "grad_norm": 3.7922836298617733, "learning_rate": 5.61423701801762e-06, "loss": 0.9796, "step": 12682 }, { "epoch": 0.05614679711363938, "grad_norm": 3.647212649307795, "learning_rate": 5.614679711363938e-06, "loss": 0.9587, "step": 12683 }, { "epoch": 0.05615122404710257, "grad_norm": 2.4069894914822654, "learning_rate": 5.6151224047102585e-06, "loss": 0.5016, "step": 12684 }, { "epoch": 0.05615565098056576, "grad_norm": 3.6853556138168257, "learning_rate": 5.615565098056577e-06, "loss": 1.0489, "step": 12685 }, { "epoch": 0.05616007791402895, "grad_norm": 2.499735814304758, "learning_rate": 5.6160077914028956e-06, "loss": 0.714, "step": 12686 }, { "epoch": 0.05616450484749214, "grad_norm": 2.8306494708403362, "learning_rate": 5.616450484749215e-06, "loss": 0.8059, "step": 12687 }, { "epoch": 0.05616893178095533, "grad_norm": 2.9371680522574137, "learning_rate": 5.6168931780955334e-06, "loss": 0.9057, "step": 12688 }, { "epoch": 0.056173358714418524, "grad_norm": 2.3179621462014643, "learning_rate": 5.617335871441853e-06, "loss": 0.5884, "step": 12689 }, { "epoch": 0.056177785647881714, "grad_norm": 2.877319384847699, "learning_rate": 5.617778564788172e-06, "loss": 0.5918, "step": 12690 }, { "epoch": 0.056182212581344905, "grad_norm": 3.544584468481919, "learning_rate": 5.618221258134491e-06, "loss": 1.2264, "step": 12691 }, { "epoch": 0.056186639514808095, "grad_norm": 2.3316871011587983, "learning_rate": 5.618663951480809e-06, "loss": 0.433, "step": 12692 }, { "epoch": 0.05619106644827128, "grad_norm": 2.2809953862168753, "learning_rate": 5.6191066448271295e-06, "loss": 0.6914, "step": 12693 }, { "epoch": 0.05619549338173447, "grad_norm": 2.328699965988361, "learning_rate": 5.619549338173448e-06, "loss": 0.551, "step": 12694 }, { "epoch": 0.05619992031519766, "grad_norm": 2.686089090379698, "learning_rate": 5.6199920315197665e-06, "loss": 0.7186, "step": 12695 }, { "epoch": 0.05620434724866085, "grad_norm": 2.767145581840891, "learning_rate": 5.620434724866086e-06, "loss": 0.7026, "step": 12696 }, { "epoch": 0.05620877418212404, "grad_norm": 2.878118706209996, "learning_rate": 5.620877418212405e-06, "loss": 0.9216, "step": 12697 }, { "epoch": 0.05621320111558723, "grad_norm": 2.5856585460239656, "learning_rate": 5.621320111558724e-06, "loss": 0.5795, "step": 12698 }, { "epoch": 0.05621762804905042, "grad_norm": 2.6342311811573125, "learning_rate": 5.621762804905043e-06, "loss": 0.9493, "step": 12699 }, { "epoch": 0.05622205498251361, "grad_norm": 2.313045819060829, "learning_rate": 5.622205498251362e-06, "loss": 0.5078, "step": 12700 }, { "epoch": 0.0562264819159768, "grad_norm": 2.876829336817355, "learning_rate": 5.62264819159768e-06, "loss": 0.6802, "step": 12701 }, { "epoch": 0.056230908849439994, "grad_norm": 3.2975607886553266, "learning_rate": 5.623090884944e-06, "loss": 0.8603, "step": 12702 }, { "epoch": 0.056235335782903184, "grad_norm": 3.576579099358898, "learning_rate": 5.623533578290319e-06, "loss": 0.8889, "step": 12703 }, { "epoch": 0.056239762716366375, "grad_norm": 2.524360552260706, "learning_rate": 5.6239762716366374e-06, "loss": 0.8216, "step": 12704 }, { "epoch": 0.056244189649829565, "grad_norm": 2.5526197715586285, "learning_rate": 5.624418964982958e-06, "loss": 0.7371, "step": 12705 }, { "epoch": 0.056248616583292756, "grad_norm": 2.388903646699062, "learning_rate": 5.624861658329276e-06, "loss": 0.7504, "step": 12706 }, { "epoch": 0.056253043516755946, "grad_norm": 3.412090689878527, "learning_rate": 5.625304351675595e-06, "loss": 1.115, "step": 12707 }, { "epoch": 0.05625747045021914, "grad_norm": 2.381641905815464, "learning_rate": 5.625747045021914e-06, "loss": 0.6193, "step": 12708 }, { "epoch": 0.05626189738368232, "grad_norm": 2.801985536045929, "learning_rate": 5.626189738368233e-06, "loss": 0.7676, "step": 12709 }, { "epoch": 0.05626632431714551, "grad_norm": 3.225684933867891, "learning_rate": 5.626632431714551e-06, "loss": 0.6417, "step": 12710 }, { "epoch": 0.0562707512506087, "grad_norm": 2.7206840467171376, "learning_rate": 5.627075125060871e-06, "loss": 0.926, "step": 12711 }, { "epoch": 0.05627517818407189, "grad_norm": 3.138542861791627, "learning_rate": 5.62751781840719e-06, "loss": 0.8749, "step": 12712 }, { "epoch": 0.05627960511753508, "grad_norm": 3.709702827495791, "learning_rate": 5.627960511753508e-06, "loss": 1.2151, "step": 12713 }, { "epoch": 0.05628403205099827, "grad_norm": 2.990415849542426, "learning_rate": 5.628403205099829e-06, "loss": 0.6467, "step": 12714 }, { "epoch": 0.05628845898446146, "grad_norm": 2.361592118475199, "learning_rate": 5.628845898446147e-06, "loss": 0.6227, "step": 12715 }, { "epoch": 0.056292885917924654, "grad_norm": 2.3710321397228613, "learning_rate": 5.629288591792466e-06, "loss": 0.5774, "step": 12716 }, { "epoch": 0.056297312851387844, "grad_norm": 2.662439254129421, "learning_rate": 5.629731285138785e-06, "loss": 0.6059, "step": 12717 }, { "epoch": 0.056301739784851035, "grad_norm": 2.606996832686422, "learning_rate": 5.6301739784851036e-06, "loss": 0.5441, "step": 12718 }, { "epoch": 0.056306166718314225, "grad_norm": 2.749202376265435, "learning_rate": 5.630616671831423e-06, "loss": 0.7873, "step": 12719 }, { "epoch": 0.056310593651777416, "grad_norm": 2.690656209859811, "learning_rate": 5.631059365177742e-06, "loss": 0.9088, "step": 12720 }, { "epoch": 0.056315020585240606, "grad_norm": 2.648688187441011, "learning_rate": 5.631502058524061e-06, "loss": 0.8578, "step": 12721 }, { "epoch": 0.0563194475187038, "grad_norm": 3.6579261297589967, "learning_rate": 5.631944751870379e-06, "loss": 0.7849, "step": 12722 }, { "epoch": 0.05632387445216699, "grad_norm": 2.6443673750553907, "learning_rate": 5.6323874452166996e-06, "loss": 0.674, "step": 12723 }, { "epoch": 0.05632830138563017, "grad_norm": 3.7785967905835247, "learning_rate": 5.632830138563018e-06, "loss": 1.1938, "step": 12724 }, { "epoch": 0.05633272831909336, "grad_norm": 2.6213286926769106, "learning_rate": 5.633272831909337e-06, "loss": 0.7706, "step": 12725 }, { "epoch": 0.05633715525255655, "grad_norm": 2.7242284333246434, "learning_rate": 5.633715525255656e-06, "loss": 0.9089, "step": 12726 }, { "epoch": 0.05634158218601974, "grad_norm": 2.8926269705890095, "learning_rate": 5.634158218601975e-06, "loss": 0.9362, "step": 12727 }, { "epoch": 0.05634600911948293, "grad_norm": 2.6363707487982624, "learning_rate": 5.634600911948294e-06, "loss": 0.8331, "step": 12728 }, { "epoch": 0.05635043605294612, "grad_norm": 2.721816095594589, "learning_rate": 5.635043605294613e-06, "loss": 0.8487, "step": 12729 }, { "epoch": 0.056354862986409314, "grad_norm": 2.789474269652183, "learning_rate": 5.635486298640932e-06, "loss": 0.7894, "step": 12730 }, { "epoch": 0.056359289919872504, "grad_norm": 2.7992661802967373, "learning_rate": 5.63592899198725e-06, "loss": 0.8678, "step": 12731 }, { "epoch": 0.056363716853335695, "grad_norm": 3.5290925970504357, "learning_rate": 5.6363716853335705e-06, "loss": 0.8743, "step": 12732 }, { "epoch": 0.056368143786798885, "grad_norm": 2.556008901455821, "learning_rate": 5.636814378679889e-06, "loss": 0.5194, "step": 12733 }, { "epoch": 0.056372570720262076, "grad_norm": 2.465815895048034, "learning_rate": 5.6372570720262076e-06, "loss": 0.7089, "step": 12734 }, { "epoch": 0.056376997653725267, "grad_norm": 3.120712163423186, "learning_rate": 5.637699765372528e-06, "loss": 1.0643, "step": 12735 }, { "epoch": 0.05638142458718846, "grad_norm": 2.7368369213662347, "learning_rate": 5.638142458718846e-06, "loss": 0.8359, "step": 12736 }, { "epoch": 0.05638585152065165, "grad_norm": 3.2273775273614116, "learning_rate": 5.638585152065165e-06, "loss": 0.9236, "step": 12737 }, { "epoch": 0.05639027845411484, "grad_norm": 2.5802307981987695, "learning_rate": 5.639027845411484e-06, "loss": 0.5447, "step": 12738 }, { "epoch": 0.05639470538757802, "grad_norm": 2.537318479644064, "learning_rate": 5.639470538757803e-06, "loss": 0.8257, "step": 12739 }, { "epoch": 0.05639913232104121, "grad_norm": 2.913543153602509, "learning_rate": 5.639913232104122e-06, "loss": 0.8163, "step": 12740 }, { "epoch": 0.0564035592545044, "grad_norm": 3.519539323910075, "learning_rate": 5.6403559254504415e-06, "loss": 0.9589, "step": 12741 }, { "epoch": 0.05640798618796759, "grad_norm": 2.7282226517329433, "learning_rate": 5.64079861879676e-06, "loss": 0.9046, "step": 12742 }, { "epoch": 0.056412413121430784, "grad_norm": 3.2271802765832227, "learning_rate": 5.6412413121430785e-06, "loss": 0.7726, "step": 12743 }, { "epoch": 0.056416840054893974, "grad_norm": 2.7747199109568608, "learning_rate": 5.641684005489399e-06, "loss": 0.6085, "step": 12744 }, { "epoch": 0.056421266988357165, "grad_norm": 2.6029728330679895, "learning_rate": 5.642126698835717e-06, "loss": 0.8192, "step": 12745 }, { "epoch": 0.056425693921820355, "grad_norm": 3.8713705289971547, "learning_rate": 5.642569392182036e-06, "loss": 1.2282, "step": 12746 }, { "epoch": 0.056430120855283546, "grad_norm": 2.358036805853737, "learning_rate": 5.643012085528355e-06, "loss": 0.5895, "step": 12747 }, { "epoch": 0.056434547788746736, "grad_norm": 2.564523096421463, "learning_rate": 5.643454778874674e-06, "loss": 0.7249, "step": 12748 }, { "epoch": 0.05643897472220993, "grad_norm": 3.1607746785899056, "learning_rate": 5.643897472220993e-06, "loss": 1.0239, "step": 12749 }, { "epoch": 0.05644340165567312, "grad_norm": 3.0280658981573785, "learning_rate": 5.644340165567312e-06, "loss": 0.8102, "step": 12750 }, { "epoch": 0.05644782858913631, "grad_norm": 3.023301843435148, "learning_rate": 5.644782858913631e-06, "loss": 0.8075, "step": 12751 }, { "epoch": 0.0564522555225995, "grad_norm": 3.1467056715404382, "learning_rate": 5.6452255522599494e-06, "loss": 0.8317, "step": 12752 }, { "epoch": 0.05645668245606269, "grad_norm": 2.668857146467996, "learning_rate": 5.64566824560627e-06, "loss": 0.8497, "step": 12753 }, { "epoch": 0.05646110938952587, "grad_norm": 2.9230642403189013, "learning_rate": 5.646110938952588e-06, "loss": 0.6591, "step": 12754 }, { "epoch": 0.05646553632298906, "grad_norm": 2.776510561418933, "learning_rate": 5.646553632298907e-06, "loss": 0.837, "step": 12755 }, { "epoch": 0.05646996325645225, "grad_norm": 2.7122598910538835, "learning_rate": 5.646996325645226e-06, "loss": 0.9162, "step": 12756 }, { "epoch": 0.056474390189915444, "grad_norm": 2.811791826354376, "learning_rate": 5.6474390189915455e-06, "loss": 0.5346, "step": 12757 }, { "epoch": 0.056478817123378634, "grad_norm": 2.846825112955604, "learning_rate": 5.647881712337864e-06, "loss": 0.6506, "step": 12758 }, { "epoch": 0.056483244056841825, "grad_norm": 2.3530192211322354, "learning_rate": 5.648324405684183e-06, "loss": 0.6357, "step": 12759 }, { "epoch": 0.056487670990305015, "grad_norm": 2.384215241092728, "learning_rate": 5.648767099030502e-06, "loss": 0.5478, "step": 12760 }, { "epoch": 0.056492097923768206, "grad_norm": 2.5668928873783954, "learning_rate": 5.64920979237682e-06, "loss": 0.8093, "step": 12761 }, { "epoch": 0.056496524857231396, "grad_norm": 2.9973678544016726, "learning_rate": 5.649652485723141e-06, "loss": 0.9598, "step": 12762 }, { "epoch": 0.05650095179069459, "grad_norm": 3.0941254992196727, "learning_rate": 5.650095179069459e-06, "loss": 1.188, "step": 12763 }, { "epoch": 0.05650537872415778, "grad_norm": 2.4955848543844863, "learning_rate": 5.650537872415778e-06, "loss": 0.7936, "step": 12764 }, { "epoch": 0.05650980565762097, "grad_norm": 3.491093582279411, "learning_rate": 5.650980565762098e-06, "loss": 0.8024, "step": 12765 }, { "epoch": 0.05651423259108416, "grad_norm": 3.0173719199763145, "learning_rate": 5.651423259108416e-06, "loss": 0.6744, "step": 12766 }, { "epoch": 0.05651865952454735, "grad_norm": 2.4324892798975934, "learning_rate": 5.651865952454735e-06, "loss": 0.6402, "step": 12767 }, { "epoch": 0.05652308645801054, "grad_norm": 3.378709699552106, "learning_rate": 5.652308645801054e-06, "loss": 1.0324, "step": 12768 }, { "epoch": 0.05652751339147372, "grad_norm": 2.728216119854636, "learning_rate": 5.652751339147373e-06, "loss": 0.6279, "step": 12769 }, { "epoch": 0.05653194032493691, "grad_norm": 2.7872559213946313, "learning_rate": 5.653194032493692e-06, "loss": 0.7223, "step": 12770 }, { "epoch": 0.056536367258400104, "grad_norm": 2.9933309832736597, "learning_rate": 5.6536367258400116e-06, "loss": 0.658, "step": 12771 }, { "epoch": 0.056540794191863294, "grad_norm": 2.2054371667744856, "learning_rate": 5.65407941918633e-06, "loss": 0.7194, "step": 12772 }, { "epoch": 0.056545221125326485, "grad_norm": 2.704367111708631, "learning_rate": 5.654522112532649e-06, "loss": 0.9531, "step": 12773 }, { "epoch": 0.056549648058789675, "grad_norm": 2.6884564310159553, "learning_rate": 5.654964805878969e-06, "loss": 0.673, "step": 12774 }, { "epoch": 0.056554074992252866, "grad_norm": 2.195246193408476, "learning_rate": 5.655407499225287e-06, "loss": 0.6237, "step": 12775 }, { "epoch": 0.056558501925716057, "grad_norm": 2.860379758210765, "learning_rate": 5.655850192571606e-06, "loss": 0.5092, "step": 12776 }, { "epoch": 0.05656292885917925, "grad_norm": 3.6036373016062524, "learning_rate": 5.656292885917925e-06, "loss": 1.2095, "step": 12777 }, { "epoch": 0.05656735579264244, "grad_norm": 2.8083589399966886, "learning_rate": 5.656735579264245e-06, "loss": 0.587, "step": 12778 }, { "epoch": 0.05657178272610563, "grad_norm": 3.062035531221916, "learning_rate": 5.657178272610563e-06, "loss": 1.0013, "step": 12779 }, { "epoch": 0.05657620965956882, "grad_norm": 3.3657193922289186, "learning_rate": 5.6576209659568825e-06, "loss": 1.2382, "step": 12780 }, { "epoch": 0.05658063659303201, "grad_norm": 2.4584084106600854, "learning_rate": 5.658063659303201e-06, "loss": 0.6933, "step": 12781 }, { "epoch": 0.0565850635264952, "grad_norm": 2.925446015728382, "learning_rate": 5.6585063526495196e-06, "loss": 0.7151, "step": 12782 }, { "epoch": 0.05658949045995839, "grad_norm": 2.9901535319035153, "learning_rate": 5.65894904599584e-06, "loss": 0.8045, "step": 12783 }, { "epoch": 0.056593917393421574, "grad_norm": 2.317812881856027, "learning_rate": 5.659391739342158e-06, "loss": 0.5455, "step": 12784 }, { "epoch": 0.056598344326884764, "grad_norm": 2.485991697892347, "learning_rate": 5.659834432688477e-06, "loss": 0.535, "step": 12785 }, { "epoch": 0.056602771260347955, "grad_norm": 3.1389314479997132, "learning_rate": 5.660277126034797e-06, "loss": 1.0759, "step": 12786 }, { "epoch": 0.056607198193811145, "grad_norm": 4.453343262646873, "learning_rate": 5.6607198193811156e-06, "loss": 0.9486, "step": 12787 }, { "epoch": 0.056611625127274336, "grad_norm": 2.846454626905445, "learning_rate": 5.661162512727434e-06, "loss": 0.541, "step": 12788 }, { "epoch": 0.056616052060737526, "grad_norm": 2.5776622707162726, "learning_rate": 5.6616052060737535e-06, "loss": 0.5824, "step": 12789 }, { "epoch": 0.05662047899420072, "grad_norm": 3.000132071440678, "learning_rate": 5.662047899420072e-06, "loss": 0.5286, "step": 12790 }, { "epoch": 0.05662490592766391, "grad_norm": 3.147640752974034, "learning_rate": 5.6624905927663905e-06, "loss": 0.8339, "step": 12791 }, { "epoch": 0.0566293328611271, "grad_norm": 2.6537798854701333, "learning_rate": 5.662933286112711e-06, "loss": 0.7669, "step": 12792 }, { "epoch": 0.05663375979459029, "grad_norm": 3.2434195171576805, "learning_rate": 5.663375979459029e-06, "loss": 0.7169, "step": 12793 }, { "epoch": 0.05663818672805348, "grad_norm": 2.682448706928613, "learning_rate": 5.663818672805348e-06, "loss": 0.6692, "step": 12794 }, { "epoch": 0.05664261366151667, "grad_norm": 2.6321646142756046, "learning_rate": 5.664261366151668e-06, "loss": 0.6726, "step": 12795 }, { "epoch": 0.05664704059497986, "grad_norm": 3.2718305276802897, "learning_rate": 5.6647040594979865e-06, "loss": 0.822, "step": 12796 }, { "epoch": 0.05665146752844305, "grad_norm": 2.443127076642097, "learning_rate": 5.665146752844305e-06, "loss": 0.8982, "step": 12797 }, { "epoch": 0.05665589446190624, "grad_norm": 2.182168020919569, "learning_rate": 5.665589446190624e-06, "loss": 0.828, "step": 12798 }, { "epoch": 0.056660321395369424, "grad_norm": 2.947444074020216, "learning_rate": 5.666032139536943e-06, "loss": 0.835, "step": 12799 }, { "epoch": 0.056664748328832615, "grad_norm": 3.5493732028658727, "learning_rate": 5.666474832883262e-06, "loss": 0.8256, "step": 12800 }, { "epoch": 0.056669175262295805, "grad_norm": 2.772034856404598, "learning_rate": 5.666917526229582e-06, "loss": 0.7193, "step": 12801 }, { "epoch": 0.056673602195758996, "grad_norm": 2.4454484792207847, "learning_rate": 5.6673602195759e-06, "loss": 0.6674, "step": 12802 }, { "epoch": 0.056678029129222186, "grad_norm": 2.9953771031981407, "learning_rate": 5.667802912922219e-06, "loss": 0.6573, "step": 12803 }, { "epoch": 0.05668245606268538, "grad_norm": 2.6099455376462397, "learning_rate": 5.668245606268539e-06, "loss": 0.5929, "step": 12804 }, { "epoch": 0.05668688299614857, "grad_norm": 3.217043433201182, "learning_rate": 5.6686882996148575e-06, "loss": 1.1897, "step": 12805 }, { "epoch": 0.05669130992961176, "grad_norm": 2.855498367693373, "learning_rate": 5.669130992961176e-06, "loss": 0.7677, "step": 12806 }, { "epoch": 0.05669573686307495, "grad_norm": 2.563813331217033, "learning_rate": 5.669573686307495e-06, "loss": 0.5008, "step": 12807 }, { "epoch": 0.05670016379653814, "grad_norm": 3.399212407719867, "learning_rate": 5.670016379653815e-06, "loss": 0.5929, "step": 12808 }, { "epoch": 0.05670459073000133, "grad_norm": 3.711197215086783, "learning_rate": 5.670459073000133e-06, "loss": 1.0804, "step": 12809 }, { "epoch": 0.05670901766346452, "grad_norm": 3.9909816646480722, "learning_rate": 5.670901766346453e-06, "loss": 0.976, "step": 12810 }, { "epoch": 0.05671344459692771, "grad_norm": 2.8015620437028135, "learning_rate": 5.671344459692771e-06, "loss": 0.7896, "step": 12811 }, { "epoch": 0.0567178715303909, "grad_norm": 3.636201220829598, "learning_rate": 5.67178715303909e-06, "loss": 0.9875, "step": 12812 }, { "epoch": 0.05672229846385409, "grad_norm": 2.784759397039019, "learning_rate": 5.67222984638541e-06, "loss": 0.7321, "step": 12813 }, { "epoch": 0.056726725397317275, "grad_norm": 3.100654778291883, "learning_rate": 5.672672539731728e-06, "loss": 0.9316, "step": 12814 }, { "epoch": 0.056731152330780465, "grad_norm": 2.8009521805381743, "learning_rate": 5.673115233078047e-06, "loss": 0.5432, "step": 12815 }, { "epoch": 0.056735579264243656, "grad_norm": 3.116958856623382, "learning_rate": 5.673557926424367e-06, "loss": 0.648, "step": 12816 }, { "epoch": 0.056740006197706847, "grad_norm": 3.0312579471489087, "learning_rate": 5.674000619770686e-06, "loss": 0.8622, "step": 12817 }, { "epoch": 0.05674443313117004, "grad_norm": 3.078708445657981, "learning_rate": 5.674443313117004e-06, "loss": 0.8093, "step": 12818 }, { "epoch": 0.05674886006463323, "grad_norm": 2.8592920050907114, "learning_rate": 5.6748860064633236e-06, "loss": 0.9288, "step": 12819 }, { "epoch": 0.05675328699809642, "grad_norm": 2.54585718285905, "learning_rate": 5.675328699809642e-06, "loss": 0.7814, "step": 12820 }, { "epoch": 0.05675771393155961, "grad_norm": 2.484161968985925, "learning_rate": 5.6757713931559615e-06, "loss": 0.714, "step": 12821 }, { "epoch": 0.0567621408650228, "grad_norm": 2.3218125698403163, "learning_rate": 5.676214086502281e-06, "loss": 0.5521, "step": 12822 }, { "epoch": 0.05676656779848599, "grad_norm": 3.4115076772225117, "learning_rate": 5.676656779848599e-06, "loss": 0.9763, "step": 12823 }, { "epoch": 0.05677099473194918, "grad_norm": 3.446203627184045, "learning_rate": 5.677099473194918e-06, "loss": 1.0514, "step": 12824 }, { "epoch": 0.05677542166541237, "grad_norm": 3.279396921516688, "learning_rate": 5.677542166541238e-06, "loss": 0.8555, "step": 12825 }, { "epoch": 0.05677984859887556, "grad_norm": 2.8073764183299033, "learning_rate": 5.677984859887557e-06, "loss": 0.8051, "step": 12826 }, { "epoch": 0.05678427553233875, "grad_norm": 3.094687020909868, "learning_rate": 5.678427553233875e-06, "loss": 0.9446, "step": 12827 }, { "epoch": 0.05678870246580194, "grad_norm": 2.7263426749413346, "learning_rate": 5.6788702465801945e-06, "loss": 0.7669, "step": 12828 }, { "epoch": 0.056793129399265126, "grad_norm": 3.11458397678075, "learning_rate": 5.679312939926513e-06, "loss": 0.776, "step": 12829 }, { "epoch": 0.056797556332728316, "grad_norm": 2.295465428424413, "learning_rate": 5.679755633272832e-06, "loss": 0.5009, "step": 12830 }, { "epoch": 0.05680198326619151, "grad_norm": 2.781823313738419, "learning_rate": 5.680198326619152e-06, "loss": 0.8766, "step": 12831 }, { "epoch": 0.0568064101996547, "grad_norm": 2.811674211042388, "learning_rate": 5.68064101996547e-06, "loss": 0.6823, "step": 12832 }, { "epoch": 0.05681083713311789, "grad_norm": 2.9764631063390974, "learning_rate": 5.681083713311789e-06, "loss": 0.8799, "step": 12833 }, { "epoch": 0.05681526406658108, "grad_norm": 2.5522506756444017, "learning_rate": 5.681526406658109e-06, "loss": 0.5864, "step": 12834 }, { "epoch": 0.05681969100004427, "grad_norm": 2.4012643209071287, "learning_rate": 5.6819691000044276e-06, "loss": 0.5421, "step": 12835 }, { "epoch": 0.05682411793350746, "grad_norm": 2.6951770468095395, "learning_rate": 5.682411793350746e-06, "loss": 0.8451, "step": 12836 }, { "epoch": 0.05682854486697065, "grad_norm": 2.876830531432811, "learning_rate": 5.6828544866970655e-06, "loss": 0.9016, "step": 12837 }, { "epoch": 0.05683297180043384, "grad_norm": 2.545573838519868, "learning_rate": 5.683297180043385e-06, "loss": 0.6132, "step": 12838 }, { "epoch": 0.05683739873389703, "grad_norm": 3.5164900801913834, "learning_rate": 5.683739873389703e-06, "loss": 0.9726, "step": 12839 }, { "epoch": 0.05684182566736022, "grad_norm": 2.7590941561124054, "learning_rate": 5.684182566736023e-06, "loss": 0.8317, "step": 12840 }, { "epoch": 0.05684625260082341, "grad_norm": 2.7403244195264156, "learning_rate": 5.684625260082341e-06, "loss": 0.589, "step": 12841 }, { "epoch": 0.0568506795342866, "grad_norm": 2.5614261759474175, "learning_rate": 5.68506795342866e-06, "loss": 0.5487, "step": 12842 }, { "epoch": 0.05685510646774979, "grad_norm": 3.3988014306313183, "learning_rate": 5.68551064677498e-06, "loss": 0.8564, "step": 12843 }, { "epoch": 0.056859533401212976, "grad_norm": 2.619443621907859, "learning_rate": 5.6859533401212985e-06, "loss": 0.6419, "step": 12844 }, { "epoch": 0.05686396033467617, "grad_norm": 2.961250192776671, "learning_rate": 5.686396033467617e-06, "loss": 0.8707, "step": 12845 }, { "epoch": 0.05686838726813936, "grad_norm": 3.238188468674149, "learning_rate": 5.686838726813937e-06, "loss": 1.1237, "step": 12846 }, { "epoch": 0.05687281420160255, "grad_norm": 3.336187964357025, "learning_rate": 5.687281420160256e-06, "loss": 0.9072, "step": 12847 }, { "epoch": 0.05687724113506574, "grad_norm": 4.000669146082235, "learning_rate": 5.687724113506574e-06, "loss": 1.3629, "step": 12848 }, { "epoch": 0.05688166806852893, "grad_norm": 2.610683767881878, "learning_rate": 5.688166806852894e-06, "loss": 0.6717, "step": 12849 }, { "epoch": 0.05688609500199212, "grad_norm": 2.6338498679555666, "learning_rate": 5.688609500199212e-06, "loss": 0.6966, "step": 12850 }, { "epoch": 0.05689052193545531, "grad_norm": 2.1455887312818747, "learning_rate": 5.6890521935455316e-06, "loss": 0.7512, "step": 12851 }, { "epoch": 0.0568949488689185, "grad_norm": 2.6782307961586, "learning_rate": 5.689494886891851e-06, "loss": 0.8555, "step": 12852 }, { "epoch": 0.05689937580238169, "grad_norm": 2.973372197278871, "learning_rate": 5.6899375802381695e-06, "loss": 0.9469, "step": 12853 }, { "epoch": 0.05690380273584488, "grad_norm": 3.5167473211695146, "learning_rate": 5.690380273584488e-06, "loss": 0.8813, "step": 12854 }, { "epoch": 0.05690822966930807, "grad_norm": 4.9023763911624565, "learning_rate": 5.690822966930808e-06, "loss": 1.0869, "step": 12855 }, { "epoch": 0.05691265660277126, "grad_norm": 3.5262004886551637, "learning_rate": 5.691265660277127e-06, "loss": 0.7647, "step": 12856 }, { "epoch": 0.05691708353623445, "grad_norm": 2.6892244728629446, "learning_rate": 5.691708353623445e-06, "loss": 0.5535, "step": 12857 }, { "epoch": 0.05692151046969764, "grad_norm": 2.389434590443589, "learning_rate": 5.692151046969765e-06, "loss": 0.4823, "step": 12858 }, { "epoch": 0.056925937403160834, "grad_norm": 2.5394714342569404, "learning_rate": 5.692593740316084e-06, "loss": 0.6915, "step": 12859 }, { "epoch": 0.05693036433662402, "grad_norm": 3.206093485117287, "learning_rate": 5.6930364336624025e-06, "loss": 0.9842, "step": 12860 }, { "epoch": 0.05693479127008721, "grad_norm": 2.6276937321076845, "learning_rate": 5.693479127008722e-06, "loss": 0.6708, "step": 12861 }, { "epoch": 0.0569392182035504, "grad_norm": 2.8353727442855337, "learning_rate": 5.69392182035504e-06, "loss": 0.382, "step": 12862 }, { "epoch": 0.05694364513701359, "grad_norm": 2.1696748922686595, "learning_rate": 5.694364513701359e-06, "loss": 0.6021, "step": 12863 }, { "epoch": 0.05694807207047678, "grad_norm": 2.84193172962437, "learning_rate": 5.694807207047679e-06, "loss": 0.7469, "step": 12864 }, { "epoch": 0.05695249900393997, "grad_norm": 2.1066109979861722, "learning_rate": 5.695249900393998e-06, "loss": 0.5188, "step": 12865 }, { "epoch": 0.05695692593740316, "grad_norm": 2.5512310698490928, "learning_rate": 5.695692593740316e-06, "loss": 0.7922, "step": 12866 }, { "epoch": 0.05696135287086635, "grad_norm": 2.9023429064430157, "learning_rate": 5.696135287086636e-06, "loss": 0.5161, "step": 12867 }, { "epoch": 0.05696577980432954, "grad_norm": 3.287032111899315, "learning_rate": 5.696577980432955e-06, "loss": 1.1882, "step": 12868 }, { "epoch": 0.05697020673779273, "grad_norm": 3.112837981301723, "learning_rate": 5.6970206737792735e-06, "loss": 0.9027, "step": 12869 }, { "epoch": 0.05697463367125592, "grad_norm": 2.8196045033031996, "learning_rate": 5.697463367125593e-06, "loss": 0.7877, "step": 12870 }, { "epoch": 0.05697906060471911, "grad_norm": 2.421293948482111, "learning_rate": 5.697906060471911e-06, "loss": 0.595, "step": 12871 }, { "epoch": 0.056983487538182304, "grad_norm": 2.5439411031139865, "learning_rate": 5.69834875381823e-06, "loss": 0.5308, "step": 12872 }, { "epoch": 0.056987914471645494, "grad_norm": 2.600569740438978, "learning_rate": 5.69879144716455e-06, "loss": 0.6292, "step": 12873 }, { "epoch": 0.056992341405108685, "grad_norm": 2.9666975338366335, "learning_rate": 5.699234140510869e-06, "loss": 0.7433, "step": 12874 }, { "epoch": 0.05699676833857187, "grad_norm": 2.994454535410798, "learning_rate": 5.699676833857187e-06, "loss": 0.8841, "step": 12875 }, { "epoch": 0.05700119527203506, "grad_norm": 2.6740463913170474, "learning_rate": 5.700119527203507e-06, "loss": 0.4587, "step": 12876 }, { "epoch": 0.05700562220549825, "grad_norm": 2.734553947532189, "learning_rate": 5.700562220549826e-06, "loss": 0.8838, "step": 12877 }, { "epoch": 0.05701004913896144, "grad_norm": 3.2147672765227346, "learning_rate": 5.701004913896144e-06, "loss": 1.0733, "step": 12878 }, { "epoch": 0.05701447607242463, "grad_norm": 3.125395339092706, "learning_rate": 5.701447607242464e-06, "loss": 1.0698, "step": 12879 }, { "epoch": 0.05701890300588782, "grad_norm": 3.427197809771548, "learning_rate": 5.701890300588782e-06, "loss": 0.9118, "step": 12880 }, { "epoch": 0.05702332993935101, "grad_norm": 2.8749415889333525, "learning_rate": 5.702332993935102e-06, "loss": 0.7805, "step": 12881 }, { "epoch": 0.0570277568728142, "grad_norm": 2.933969882048323, "learning_rate": 5.702775687281421e-06, "loss": 0.7841, "step": 12882 }, { "epoch": 0.05703218380627739, "grad_norm": 2.6960200831708154, "learning_rate": 5.7032183806277396e-06, "loss": 0.7119, "step": 12883 }, { "epoch": 0.05703661073974058, "grad_norm": 2.1955809470015484, "learning_rate": 5.703661073974058e-06, "loss": 0.665, "step": 12884 }, { "epoch": 0.05704103767320377, "grad_norm": 3.018421764724794, "learning_rate": 5.704103767320378e-06, "loss": 0.6512, "step": 12885 }, { "epoch": 0.057045464606666964, "grad_norm": 4.753691188019132, "learning_rate": 5.704546460666697e-06, "loss": 1.3767, "step": 12886 }, { "epoch": 0.057049891540130154, "grad_norm": 2.564872076243674, "learning_rate": 5.704989154013015e-06, "loss": 0.8775, "step": 12887 }, { "epoch": 0.057054318473593345, "grad_norm": 2.556527998953916, "learning_rate": 5.705431847359335e-06, "loss": 0.4789, "step": 12888 }, { "epoch": 0.057058745407056535, "grad_norm": 2.782892353016458, "learning_rate": 5.705874540705654e-06, "loss": 0.6902, "step": 12889 }, { "epoch": 0.05706317234051972, "grad_norm": 3.010921088210831, "learning_rate": 5.706317234051973e-06, "loss": 0.7213, "step": 12890 }, { "epoch": 0.05706759927398291, "grad_norm": 2.578912896786648, "learning_rate": 5.706759927398292e-06, "loss": 0.6279, "step": 12891 }, { "epoch": 0.0570720262074461, "grad_norm": 2.724792032075633, "learning_rate": 5.7072026207446105e-06, "loss": 0.6092, "step": 12892 }, { "epoch": 0.05707645314090929, "grad_norm": 2.898358590775503, "learning_rate": 5.707645314090929e-06, "loss": 0.9187, "step": 12893 }, { "epoch": 0.05708088007437248, "grad_norm": 2.816800892533258, "learning_rate": 5.708088007437249e-06, "loss": 0.6413, "step": 12894 }, { "epoch": 0.05708530700783567, "grad_norm": 2.7883251144137615, "learning_rate": 5.708530700783568e-06, "loss": 0.7432, "step": 12895 }, { "epoch": 0.05708973394129886, "grad_norm": 2.766165381655992, "learning_rate": 5.708973394129886e-06, "loss": 0.7041, "step": 12896 }, { "epoch": 0.05709416087476205, "grad_norm": 3.2521795481096856, "learning_rate": 5.7094160874762065e-06, "loss": 1.0255, "step": 12897 }, { "epoch": 0.05709858780822524, "grad_norm": 2.34502654270118, "learning_rate": 5.709858780822525e-06, "loss": 0.7654, "step": 12898 }, { "epoch": 0.05710301474168843, "grad_norm": 3.0323191694285527, "learning_rate": 5.7103014741688436e-06, "loss": 1.0166, "step": 12899 }, { "epoch": 0.057107441675151624, "grad_norm": 2.80025199619233, "learning_rate": 5.710744167515163e-06, "loss": 0.5701, "step": 12900 }, { "epoch": 0.057111868608614814, "grad_norm": 2.4045571135734116, "learning_rate": 5.7111868608614815e-06, "loss": 0.5777, "step": 12901 }, { "epoch": 0.057116295542078005, "grad_norm": 2.5594239933934575, "learning_rate": 5.711629554207801e-06, "loss": 0.5314, "step": 12902 }, { "epoch": 0.057120722475541195, "grad_norm": 2.2878712337959857, "learning_rate": 5.71207224755412e-06, "loss": 0.5581, "step": 12903 }, { "epoch": 0.057125149409004386, "grad_norm": 2.919460739802819, "learning_rate": 5.712514940900439e-06, "loss": 0.799, "step": 12904 }, { "epoch": 0.05712957634246757, "grad_norm": 2.4057292639121552, "learning_rate": 5.712957634246757e-06, "loss": 0.6223, "step": 12905 }, { "epoch": 0.05713400327593076, "grad_norm": 2.603632210704119, "learning_rate": 5.7134003275930775e-06, "loss": 0.6588, "step": 12906 }, { "epoch": 0.05713843020939395, "grad_norm": 2.9146682963923705, "learning_rate": 5.713843020939396e-06, "loss": 0.6333, "step": 12907 }, { "epoch": 0.05714285714285714, "grad_norm": 3.034430359958342, "learning_rate": 5.7142857142857145e-06, "loss": 0.7125, "step": 12908 }, { "epoch": 0.05714728407632033, "grad_norm": 2.5439715186033522, "learning_rate": 5.714728407632034e-06, "loss": 0.4151, "step": 12909 }, { "epoch": 0.05715171100978352, "grad_norm": 2.5315250270157272, "learning_rate": 5.715171100978352e-06, "loss": 0.8237, "step": 12910 }, { "epoch": 0.05715613794324671, "grad_norm": 2.770056631162645, "learning_rate": 5.715613794324672e-06, "loss": 0.6783, "step": 12911 }, { "epoch": 0.0571605648767099, "grad_norm": 2.909683432439725, "learning_rate": 5.716056487670991e-06, "loss": 0.8392, "step": 12912 }, { "epoch": 0.057164991810173094, "grad_norm": 2.240069747310539, "learning_rate": 5.71649918101731e-06, "loss": 0.6582, "step": 12913 }, { "epoch": 0.057169418743636284, "grad_norm": 2.921578993366881, "learning_rate": 5.716941874363628e-06, "loss": 0.9354, "step": 12914 }, { "epoch": 0.057173845677099475, "grad_norm": 2.6271199142008523, "learning_rate": 5.717384567709948e-06, "loss": 0.5925, "step": 12915 }, { "epoch": 0.057178272610562665, "grad_norm": 2.5470575821230463, "learning_rate": 5.717827261056267e-06, "loss": 0.7367, "step": 12916 }, { "epoch": 0.057182699544025856, "grad_norm": 2.879036946404977, "learning_rate": 5.7182699544025855e-06, "loss": 0.5349, "step": 12917 }, { "epoch": 0.057187126477489046, "grad_norm": 3.2335714980392396, "learning_rate": 5.718712647748905e-06, "loss": 1.0956, "step": 12918 }, { "epoch": 0.05719155341095224, "grad_norm": 3.505310112072952, "learning_rate": 5.719155341095224e-06, "loss": 0.8378, "step": 12919 }, { "epoch": 0.05719598034441542, "grad_norm": 2.6496289532633273, "learning_rate": 5.719598034441543e-06, "loss": 0.8162, "step": 12920 }, { "epoch": 0.05720040727787861, "grad_norm": 3.228627556511757, "learning_rate": 5.720040727787862e-06, "loss": 0.743, "step": 12921 }, { "epoch": 0.0572048342113418, "grad_norm": 3.0351293716446905, "learning_rate": 5.720483421134181e-06, "loss": 0.8455, "step": 12922 }, { "epoch": 0.05720926114480499, "grad_norm": 4.012287562210327, "learning_rate": 5.720926114480499e-06, "loss": 1.2389, "step": 12923 }, { "epoch": 0.05721368807826818, "grad_norm": 2.687559368668109, "learning_rate": 5.721368807826819e-06, "loss": 0.7124, "step": 12924 }, { "epoch": 0.05721811501173137, "grad_norm": 2.953831327870765, "learning_rate": 5.721811501173138e-06, "loss": 1.0689, "step": 12925 }, { "epoch": 0.05722254194519456, "grad_norm": 2.6365035721764998, "learning_rate": 5.722254194519456e-06, "loss": 0.5773, "step": 12926 }, { "epoch": 0.057226968878657754, "grad_norm": 2.870269497211719, "learning_rate": 5.722696887865777e-06, "loss": 0.6702, "step": 12927 }, { "epoch": 0.057231395812120944, "grad_norm": 3.8300709988461064, "learning_rate": 5.723139581212095e-06, "loss": 1.1121, "step": 12928 }, { "epoch": 0.057235822745584135, "grad_norm": 2.30513578079034, "learning_rate": 5.723582274558414e-06, "loss": 0.5535, "step": 12929 }, { "epoch": 0.057240249679047325, "grad_norm": 2.6031016462950904, "learning_rate": 5.724024967904733e-06, "loss": 0.5562, "step": 12930 }, { "epoch": 0.057244676612510516, "grad_norm": 2.5553989034966365, "learning_rate": 5.7244676612510516e-06, "loss": 0.4699, "step": 12931 }, { "epoch": 0.057249103545973706, "grad_norm": 3.597230289583753, "learning_rate": 5.724910354597371e-06, "loss": 1.0484, "step": 12932 }, { "epoch": 0.0572535304794369, "grad_norm": 2.7090679952237773, "learning_rate": 5.72535304794369e-06, "loss": 0.5781, "step": 12933 }, { "epoch": 0.05725795741290009, "grad_norm": 2.3981950441377875, "learning_rate": 5.725795741290009e-06, "loss": 0.595, "step": 12934 }, { "epoch": 0.05726238434636327, "grad_norm": 4.118203438347983, "learning_rate": 5.726238434636327e-06, "loss": 1.3148, "step": 12935 }, { "epoch": 0.05726681127982646, "grad_norm": 2.9057860164855733, "learning_rate": 5.7266811279826476e-06, "loss": 0.8065, "step": 12936 }, { "epoch": 0.05727123821328965, "grad_norm": 3.293235948968405, "learning_rate": 5.727123821328966e-06, "loss": 1.1114, "step": 12937 }, { "epoch": 0.05727566514675284, "grad_norm": 2.5496693030039883, "learning_rate": 5.727566514675285e-06, "loss": 0.7211, "step": 12938 }, { "epoch": 0.05728009208021603, "grad_norm": 2.606653356089815, "learning_rate": 5.728009208021604e-06, "loss": 0.7387, "step": 12939 }, { "epoch": 0.05728451901367922, "grad_norm": 2.253408322766362, "learning_rate": 5.728451901367923e-06, "loss": 0.8616, "step": 12940 }, { "epoch": 0.057288945947142414, "grad_norm": 3.33364985239026, "learning_rate": 5.728894594714242e-06, "loss": 0.9039, "step": 12941 }, { "epoch": 0.057293372880605604, "grad_norm": 2.6880884392775677, "learning_rate": 5.729337288060561e-06, "loss": 0.6519, "step": 12942 }, { "epoch": 0.057297799814068795, "grad_norm": 3.579316960637455, "learning_rate": 5.72977998140688e-06, "loss": 0.4043, "step": 12943 }, { "epoch": 0.057302226747531985, "grad_norm": 3.7280271457946537, "learning_rate": 5.730222674753198e-06, "loss": 1.3863, "step": 12944 }, { "epoch": 0.057306653680995176, "grad_norm": 2.5566706761443463, "learning_rate": 5.7306653680995185e-06, "loss": 0.5388, "step": 12945 }, { "epoch": 0.057311080614458366, "grad_norm": 2.5192152125484535, "learning_rate": 5.731108061445837e-06, "loss": 0.7752, "step": 12946 }, { "epoch": 0.05731550754792156, "grad_norm": 2.2805096430652645, "learning_rate": 5.7315507547921556e-06, "loss": 0.5485, "step": 12947 }, { "epoch": 0.05731993448138475, "grad_norm": 2.3817929688153616, "learning_rate": 5.731993448138475e-06, "loss": 0.5964, "step": 12948 }, { "epoch": 0.05732436141484794, "grad_norm": 2.7064755011547597, "learning_rate": 5.732436141484794e-06, "loss": 0.6694, "step": 12949 }, { "epoch": 0.05732878834831112, "grad_norm": 2.7253429926833226, "learning_rate": 5.732878834831113e-06, "loss": 0.8924, "step": 12950 }, { "epoch": 0.05733321528177431, "grad_norm": 2.5745288107648348, "learning_rate": 5.733321528177432e-06, "loss": 0.7686, "step": 12951 }, { "epoch": 0.0573376422152375, "grad_norm": 2.745921349886025, "learning_rate": 5.733764221523751e-06, "loss": 0.7875, "step": 12952 }, { "epoch": 0.05734206914870069, "grad_norm": 3.0114266630815787, "learning_rate": 5.734206914870069e-06, "loss": 0.8985, "step": 12953 }, { "epoch": 0.057346496082163884, "grad_norm": 2.811729426748471, "learning_rate": 5.7346496082163895e-06, "loss": 0.688, "step": 12954 }, { "epoch": 0.057350923015627074, "grad_norm": 2.9710537040824816, "learning_rate": 5.735092301562708e-06, "loss": 0.8273, "step": 12955 }, { "epoch": 0.057355349949090265, "grad_norm": 2.4455868092948747, "learning_rate": 5.7355349949090265e-06, "loss": 0.637, "step": 12956 }, { "epoch": 0.057359776882553455, "grad_norm": 2.61326857336536, "learning_rate": 5.735977688255347e-06, "loss": 0.8516, "step": 12957 }, { "epoch": 0.057364203816016646, "grad_norm": 2.78159248564923, "learning_rate": 5.736420381601665e-06, "loss": 1.0032, "step": 12958 }, { "epoch": 0.057368630749479836, "grad_norm": 2.956802075609746, "learning_rate": 5.736863074947984e-06, "loss": 0.8722, "step": 12959 }, { "epoch": 0.05737305768294303, "grad_norm": 2.6903119985677924, "learning_rate": 5.737305768294303e-06, "loss": 0.8198, "step": 12960 }, { "epoch": 0.05737748461640622, "grad_norm": 2.7778196085561953, "learning_rate": 5.737748461640622e-06, "loss": 0.785, "step": 12961 }, { "epoch": 0.05738191154986941, "grad_norm": 2.6346128425622726, "learning_rate": 5.738191154986941e-06, "loss": 0.497, "step": 12962 }, { "epoch": 0.0573863384833326, "grad_norm": 2.6529307402908313, "learning_rate": 5.73863384833326e-06, "loss": 0.992, "step": 12963 }, { "epoch": 0.05739076541679579, "grad_norm": 3.1657041966334822, "learning_rate": 5.739076541679579e-06, "loss": 0.5067, "step": 12964 }, { "epoch": 0.05739519235025897, "grad_norm": 2.381361009964075, "learning_rate": 5.7395192350258975e-06, "loss": 0.7102, "step": 12965 }, { "epoch": 0.05739961928372216, "grad_norm": 4.053636062174656, "learning_rate": 5.739961928372218e-06, "loss": 1.0611, "step": 12966 }, { "epoch": 0.05740404621718535, "grad_norm": 3.688354275754619, "learning_rate": 5.740404621718536e-06, "loss": 0.8955, "step": 12967 }, { "epoch": 0.057408473150648544, "grad_norm": 2.562921118815205, "learning_rate": 5.740847315064855e-06, "loss": 0.5007, "step": 12968 }, { "epoch": 0.057412900084111734, "grad_norm": 2.412884592807621, "learning_rate": 5.741290008411174e-06, "loss": 0.5324, "step": 12969 }, { "epoch": 0.057417327017574925, "grad_norm": 3.61938190973343, "learning_rate": 5.7417327017574935e-06, "loss": 0.863, "step": 12970 }, { "epoch": 0.057421753951038115, "grad_norm": 2.7448648657019237, "learning_rate": 5.742175395103812e-06, "loss": 0.4989, "step": 12971 }, { "epoch": 0.057426180884501306, "grad_norm": 2.5374000333025672, "learning_rate": 5.742618088450131e-06, "loss": 0.9321, "step": 12972 }, { "epoch": 0.057430607817964496, "grad_norm": 3.2252976962953985, "learning_rate": 5.74306078179645e-06, "loss": 0.9364, "step": 12973 }, { "epoch": 0.05743503475142769, "grad_norm": 2.6980335588816198, "learning_rate": 5.743503475142768e-06, "loss": 0.8185, "step": 12974 }, { "epoch": 0.05743946168489088, "grad_norm": 2.413608980388874, "learning_rate": 5.743946168489089e-06, "loss": 0.501, "step": 12975 }, { "epoch": 0.05744388861835407, "grad_norm": 2.4741968580979927, "learning_rate": 5.744388861835407e-06, "loss": 0.7331, "step": 12976 }, { "epoch": 0.05744831555181726, "grad_norm": 2.9245660828949522, "learning_rate": 5.744831555181726e-06, "loss": 0.8502, "step": 12977 }, { "epoch": 0.05745274248528045, "grad_norm": 2.360582319992004, "learning_rate": 5.745274248528046e-06, "loss": 0.782, "step": 12978 }, { "epoch": 0.05745716941874364, "grad_norm": 2.881186995437956, "learning_rate": 5.745716941874364e-06, "loss": 0.7359, "step": 12979 }, { "epoch": 0.05746159635220682, "grad_norm": 2.486497882730505, "learning_rate": 5.746159635220683e-06, "loss": 0.7148, "step": 12980 }, { "epoch": 0.05746602328567001, "grad_norm": 2.7281768700631166, "learning_rate": 5.746602328567002e-06, "loss": 0.8061, "step": 12981 }, { "epoch": 0.057470450219133204, "grad_norm": 2.386525847478805, "learning_rate": 5.747045021913321e-06, "loss": 0.4656, "step": 12982 }, { "epoch": 0.057474877152596394, "grad_norm": 2.5333609776079027, "learning_rate": 5.747487715259639e-06, "loss": 0.7307, "step": 12983 }, { "epoch": 0.057479304086059585, "grad_norm": 2.309215029322327, "learning_rate": 5.7479304086059596e-06, "loss": 0.712, "step": 12984 }, { "epoch": 0.057483731019522775, "grad_norm": 3.3151253334092803, "learning_rate": 5.748373101952278e-06, "loss": 0.5525, "step": 12985 }, { "epoch": 0.057488157952985966, "grad_norm": 2.47160052624363, "learning_rate": 5.748815795298597e-06, "loss": 0.9193, "step": 12986 }, { "epoch": 0.057492584886449156, "grad_norm": 2.5256170411905448, "learning_rate": 5.749258488644917e-06, "loss": 0.8532, "step": 12987 }, { "epoch": 0.05749701181991235, "grad_norm": 2.3415452432832278, "learning_rate": 5.749701181991235e-06, "loss": 0.8788, "step": 12988 }, { "epoch": 0.05750143875337554, "grad_norm": 3.122140556948871, "learning_rate": 5.750143875337554e-06, "loss": 0.757, "step": 12989 }, { "epoch": 0.05750586568683873, "grad_norm": 2.6965304023553283, "learning_rate": 5.750586568683873e-06, "loss": 0.7373, "step": 12990 }, { "epoch": 0.05751029262030192, "grad_norm": 3.105026242311915, "learning_rate": 5.751029262030192e-06, "loss": 0.7234, "step": 12991 }, { "epoch": 0.05751471955376511, "grad_norm": 3.142002375070656, "learning_rate": 5.751471955376511e-06, "loss": 0.8972, "step": 12992 }, { "epoch": 0.0575191464872283, "grad_norm": 2.7246168133536255, "learning_rate": 5.7519146487228305e-06, "loss": 0.5977, "step": 12993 }, { "epoch": 0.05752357342069149, "grad_norm": 2.829623202524833, "learning_rate": 5.752357342069149e-06, "loss": 0.7822, "step": 12994 }, { "epoch": 0.057528000354154674, "grad_norm": 2.4541584110084425, "learning_rate": 5.7528000354154676e-06, "loss": 0.8338, "step": 12995 }, { "epoch": 0.057532427287617864, "grad_norm": 2.569584671881908, "learning_rate": 5.753242728761788e-06, "loss": 0.6744, "step": 12996 }, { "epoch": 0.057536854221081055, "grad_norm": 2.206012708618393, "learning_rate": 5.753685422108106e-06, "loss": 0.5873, "step": 12997 }, { "epoch": 0.057541281154544245, "grad_norm": 2.696539515974129, "learning_rate": 5.754128115454425e-06, "loss": 1.0393, "step": 12998 }, { "epoch": 0.057545708088007436, "grad_norm": 2.6918762709853077, "learning_rate": 5.754570808800744e-06, "loss": 0.5454, "step": 12999 }, { "epoch": 0.057550135021470626, "grad_norm": 3.065057418898947, "learning_rate": 5.7550135021470636e-06, "loss": 0.9862, "step": 13000 }, { "epoch": 0.05755456195493382, "grad_norm": 3.199925335167863, "learning_rate": 5.755456195493382e-06, "loss": 0.6369, "step": 13001 }, { "epoch": 0.05755898888839701, "grad_norm": 2.7754114676659296, "learning_rate": 5.7558988888397015e-06, "loss": 0.5888, "step": 13002 }, { "epoch": 0.0575634158218602, "grad_norm": 2.6063618782040496, "learning_rate": 5.75634158218602e-06, "loss": 0.4912, "step": 13003 }, { "epoch": 0.05756784275532339, "grad_norm": 3.6343966500325493, "learning_rate": 5.7567842755323385e-06, "loss": 0.8173, "step": 13004 }, { "epoch": 0.05757226968878658, "grad_norm": 2.4221541496578043, "learning_rate": 5.757226968878659e-06, "loss": 0.5283, "step": 13005 }, { "epoch": 0.05757669662224977, "grad_norm": 2.9851656705161305, "learning_rate": 5.757669662224977e-06, "loss": 0.8121, "step": 13006 }, { "epoch": 0.05758112355571296, "grad_norm": 3.360583829830676, "learning_rate": 5.758112355571296e-06, "loss": 1.4392, "step": 13007 }, { "epoch": 0.05758555048917615, "grad_norm": 3.614786438867073, "learning_rate": 5.758555048917616e-06, "loss": 0.6985, "step": 13008 }, { "epoch": 0.05758997742263934, "grad_norm": 3.1373513620986033, "learning_rate": 5.7589977422639345e-06, "loss": 0.7062, "step": 13009 }, { "epoch": 0.05759440435610253, "grad_norm": 2.8886410531751983, "learning_rate": 5.759440435610253e-06, "loss": 0.7269, "step": 13010 }, { "epoch": 0.057598831289565715, "grad_norm": 3.1624468710548017, "learning_rate": 5.759883128956572e-06, "loss": 0.6884, "step": 13011 }, { "epoch": 0.057603258223028905, "grad_norm": 3.1687603861996294, "learning_rate": 5.760325822302891e-06, "loss": 0.7128, "step": 13012 }, { "epoch": 0.057607685156492096, "grad_norm": 2.9489938993281393, "learning_rate": 5.76076851564921e-06, "loss": 0.8019, "step": 13013 }, { "epoch": 0.057612112089955286, "grad_norm": 2.689848308925606, "learning_rate": 5.76121120899553e-06, "loss": 0.8076, "step": 13014 }, { "epoch": 0.05761653902341848, "grad_norm": 3.59330589405338, "learning_rate": 5.761653902341848e-06, "loss": 0.9301, "step": 13015 }, { "epoch": 0.05762096595688167, "grad_norm": 3.8147640141747075, "learning_rate": 5.762096595688167e-06, "loss": 1.1959, "step": 13016 }, { "epoch": 0.05762539289034486, "grad_norm": 3.8268574753322477, "learning_rate": 5.762539289034487e-06, "loss": 1.1557, "step": 13017 }, { "epoch": 0.05762981982380805, "grad_norm": 2.5664791458356913, "learning_rate": 5.7629819823808055e-06, "loss": 0.8452, "step": 13018 }, { "epoch": 0.05763424675727124, "grad_norm": 2.498255816992544, "learning_rate": 5.763424675727124e-06, "loss": 0.5931, "step": 13019 }, { "epoch": 0.05763867369073443, "grad_norm": 2.605628927109661, "learning_rate": 5.763867369073443e-06, "loss": 0.609, "step": 13020 }, { "epoch": 0.05764310062419762, "grad_norm": 2.6993222052213377, "learning_rate": 5.764310062419763e-06, "loss": 0.741, "step": 13021 }, { "epoch": 0.05764752755766081, "grad_norm": 3.0618818109378414, "learning_rate": 5.764752755766081e-06, "loss": 0.9842, "step": 13022 }, { "epoch": 0.057651954491124, "grad_norm": 2.9561473885507494, "learning_rate": 5.765195449112401e-06, "loss": 0.8576, "step": 13023 }, { "epoch": 0.05765638142458719, "grad_norm": 2.4124957085652823, "learning_rate": 5.765638142458719e-06, "loss": 0.8103, "step": 13024 }, { "epoch": 0.05766080835805038, "grad_norm": 2.7578745554078443, "learning_rate": 5.766080835805038e-06, "loss": 0.6051, "step": 13025 }, { "epoch": 0.057665235291513565, "grad_norm": 2.239142665585178, "learning_rate": 5.766523529151358e-06, "loss": 0.4111, "step": 13026 }, { "epoch": 0.057669662224976756, "grad_norm": 2.697941988704752, "learning_rate": 5.766966222497676e-06, "loss": 0.4683, "step": 13027 }, { "epoch": 0.057674089158439946, "grad_norm": 2.8596227665507508, "learning_rate": 5.767408915843995e-06, "loss": 0.6929, "step": 13028 }, { "epoch": 0.05767851609190314, "grad_norm": 2.44912767573849, "learning_rate": 5.767851609190314e-06, "loss": 0.6343, "step": 13029 }, { "epoch": 0.05768294302536633, "grad_norm": 3.049079293777952, "learning_rate": 5.768294302536634e-06, "loss": 0.6494, "step": 13030 }, { "epoch": 0.05768736995882952, "grad_norm": 2.693027386955935, "learning_rate": 5.768736995882952e-06, "loss": 0.7075, "step": 13031 }, { "epoch": 0.05769179689229271, "grad_norm": 2.963003411479208, "learning_rate": 5.7691796892292716e-06, "loss": 0.7987, "step": 13032 }, { "epoch": 0.0576962238257559, "grad_norm": 2.264845619725574, "learning_rate": 5.76962238257559e-06, "loss": 0.6277, "step": 13033 }, { "epoch": 0.05770065075921909, "grad_norm": 2.625209341157001, "learning_rate": 5.770065075921909e-06, "loss": 0.5436, "step": 13034 }, { "epoch": 0.05770507769268228, "grad_norm": 2.664424516335154, "learning_rate": 5.770507769268229e-06, "loss": 0.929, "step": 13035 }, { "epoch": 0.05770950462614547, "grad_norm": 2.406117477012396, "learning_rate": 5.770950462614547e-06, "loss": 0.6804, "step": 13036 }, { "epoch": 0.05771393155960866, "grad_norm": 2.3577929279863956, "learning_rate": 5.771393155960866e-06, "loss": 0.4789, "step": 13037 }, { "epoch": 0.05771835849307185, "grad_norm": 3.1520022391425653, "learning_rate": 5.771835849307186e-06, "loss": 1.1439, "step": 13038 }, { "epoch": 0.05772278542653504, "grad_norm": 3.4609405848612997, "learning_rate": 5.772278542653505e-06, "loss": 1.1217, "step": 13039 }, { "epoch": 0.05772721235999823, "grad_norm": 2.6268165288233245, "learning_rate": 5.772721235999823e-06, "loss": 0.7812, "step": 13040 }, { "epoch": 0.057731639293461416, "grad_norm": 2.436538909437251, "learning_rate": 5.7731639293461425e-06, "loss": 0.7868, "step": 13041 }, { "epoch": 0.05773606622692461, "grad_norm": 3.460147214384379, "learning_rate": 5.773606622692461e-06, "loss": 0.7892, "step": 13042 }, { "epoch": 0.0577404931603878, "grad_norm": 2.4198694885069196, "learning_rate": 5.77404931603878e-06, "loss": 0.792, "step": 13043 }, { "epoch": 0.05774492009385099, "grad_norm": 3.3010848276915516, "learning_rate": 5.7744920093851e-06, "loss": 1.0237, "step": 13044 }, { "epoch": 0.05774934702731418, "grad_norm": 2.4102423768344554, "learning_rate": 5.774934702731418e-06, "loss": 0.5484, "step": 13045 }, { "epoch": 0.05775377396077737, "grad_norm": 2.567934810078546, "learning_rate": 5.775377396077737e-06, "loss": 0.6987, "step": 13046 }, { "epoch": 0.05775820089424056, "grad_norm": 2.4801249595864387, "learning_rate": 5.775820089424057e-06, "loss": 0.7141, "step": 13047 }, { "epoch": 0.05776262782770375, "grad_norm": 3.0888838030654964, "learning_rate": 5.7762627827703756e-06, "loss": 0.6854, "step": 13048 }, { "epoch": 0.05776705476116694, "grad_norm": 2.340878375716231, "learning_rate": 5.776705476116694e-06, "loss": 0.5651, "step": 13049 }, { "epoch": 0.05777148169463013, "grad_norm": 3.5267423529328097, "learning_rate": 5.7771481694630135e-06, "loss": 0.8583, "step": 13050 }, { "epoch": 0.05777590862809332, "grad_norm": 2.594277002004825, "learning_rate": 5.777590862809333e-06, "loss": 0.692, "step": 13051 }, { "epoch": 0.05778033556155651, "grad_norm": 3.118579960767207, "learning_rate": 5.778033556155651e-06, "loss": 0.998, "step": 13052 }, { "epoch": 0.0577847624950197, "grad_norm": 3.1613171225699785, "learning_rate": 5.778476249501971e-06, "loss": 0.6128, "step": 13053 }, { "epoch": 0.05778918942848289, "grad_norm": 2.4681337557185987, "learning_rate": 5.778918942848289e-06, "loss": 0.7909, "step": 13054 }, { "epoch": 0.05779361636194608, "grad_norm": 2.8455978746176536, "learning_rate": 5.779361636194608e-06, "loss": 0.6821, "step": 13055 }, { "epoch": 0.05779804329540927, "grad_norm": 2.571868446759535, "learning_rate": 5.779804329540928e-06, "loss": 0.5966, "step": 13056 }, { "epoch": 0.05780247022887246, "grad_norm": 2.912460640426098, "learning_rate": 5.7802470228872465e-06, "loss": 0.8013, "step": 13057 }, { "epoch": 0.05780689716233565, "grad_norm": 2.790678233055613, "learning_rate": 5.780689716233565e-06, "loss": 0.4031, "step": 13058 }, { "epoch": 0.05781132409579884, "grad_norm": 2.42356920360122, "learning_rate": 5.781132409579885e-06, "loss": 0.5633, "step": 13059 }, { "epoch": 0.05781575102926203, "grad_norm": 3.161571025271822, "learning_rate": 5.781575102926204e-06, "loss": 0.886, "step": 13060 }, { "epoch": 0.05782017796272522, "grad_norm": 2.461254772153736, "learning_rate": 5.782017796272522e-06, "loss": 0.535, "step": 13061 }, { "epoch": 0.05782460489618841, "grad_norm": 2.495143268547849, "learning_rate": 5.782460489618842e-06, "loss": 0.4682, "step": 13062 }, { "epoch": 0.0578290318296516, "grad_norm": 2.2464313522931656, "learning_rate": 5.78290318296516e-06, "loss": 0.5885, "step": 13063 }, { "epoch": 0.05783345876311479, "grad_norm": 2.884353370173723, "learning_rate": 5.783345876311479e-06, "loss": 0.6273, "step": 13064 }, { "epoch": 0.05783788569657798, "grad_norm": 3.0456496871222565, "learning_rate": 5.783788569657799e-06, "loss": 0.6605, "step": 13065 }, { "epoch": 0.05784231263004117, "grad_norm": 2.82346421409084, "learning_rate": 5.7842312630041175e-06, "loss": 0.5247, "step": 13066 }, { "epoch": 0.05784673956350436, "grad_norm": 3.4946221319642943, "learning_rate": 5.784673956350436e-06, "loss": 0.7909, "step": 13067 }, { "epoch": 0.05785116649696755, "grad_norm": 2.819021710283181, "learning_rate": 5.785116649696756e-06, "loss": 0.7613, "step": 13068 }, { "epoch": 0.05785559343043074, "grad_norm": 2.947065852622407, "learning_rate": 5.785559343043075e-06, "loss": 0.7923, "step": 13069 }, { "epoch": 0.057860020363893934, "grad_norm": 3.4075236332114844, "learning_rate": 5.786002036389393e-06, "loss": 1.0007, "step": 13070 }, { "epoch": 0.05786444729735712, "grad_norm": 2.3910538399379604, "learning_rate": 5.786444729735713e-06, "loss": 0.6876, "step": 13071 }, { "epoch": 0.05786887423082031, "grad_norm": 2.541473128489709, "learning_rate": 5.786887423082031e-06, "loss": 0.6481, "step": 13072 }, { "epoch": 0.0578733011642835, "grad_norm": 2.679242128155917, "learning_rate": 5.7873301164283505e-06, "loss": 0.6786, "step": 13073 }, { "epoch": 0.05787772809774669, "grad_norm": 2.9497566760633074, "learning_rate": 5.78777280977467e-06, "loss": 0.8325, "step": 13074 }, { "epoch": 0.05788215503120988, "grad_norm": 2.778833194333301, "learning_rate": 5.788215503120988e-06, "loss": 0.8822, "step": 13075 }, { "epoch": 0.05788658196467307, "grad_norm": 2.5309183271487496, "learning_rate": 5.788658196467307e-06, "loss": 0.824, "step": 13076 }, { "epoch": 0.05789100889813626, "grad_norm": 2.557698030166927, "learning_rate": 5.789100889813627e-06, "loss": 0.8322, "step": 13077 }, { "epoch": 0.05789543583159945, "grad_norm": 2.705531104925278, "learning_rate": 5.789543583159946e-06, "loss": 0.9179, "step": 13078 }, { "epoch": 0.05789986276506264, "grad_norm": 2.574368023023174, "learning_rate": 5.789986276506264e-06, "loss": 0.6448, "step": 13079 }, { "epoch": 0.05790428969852583, "grad_norm": 2.9854088303261137, "learning_rate": 5.7904289698525836e-06, "loss": 0.9104, "step": 13080 }, { "epoch": 0.05790871663198902, "grad_norm": 2.6462604169420674, "learning_rate": 5.790871663198903e-06, "loss": 0.7194, "step": 13081 }, { "epoch": 0.05791314356545221, "grad_norm": 2.709755150385271, "learning_rate": 5.7913143565452215e-06, "loss": 0.8998, "step": 13082 }, { "epoch": 0.057917570498915404, "grad_norm": 2.9475676946355382, "learning_rate": 5.791757049891541e-06, "loss": 0.6581, "step": 13083 }, { "epoch": 0.057921997432378594, "grad_norm": 2.9565263075397374, "learning_rate": 5.792199743237859e-06, "loss": 0.8008, "step": 13084 }, { "epoch": 0.057926424365841785, "grad_norm": 2.5694835855508322, "learning_rate": 5.792642436584178e-06, "loss": 0.9204, "step": 13085 }, { "epoch": 0.05793085129930497, "grad_norm": 2.992508239608337, "learning_rate": 5.793085129930498e-06, "loss": 1.0097, "step": 13086 }, { "epoch": 0.05793527823276816, "grad_norm": 2.684262663275034, "learning_rate": 5.793527823276817e-06, "loss": 0.7256, "step": 13087 }, { "epoch": 0.05793970516623135, "grad_norm": 2.627412075923892, "learning_rate": 5.793970516623135e-06, "loss": 0.6972, "step": 13088 }, { "epoch": 0.05794413209969454, "grad_norm": 2.5835454557916466, "learning_rate": 5.794413209969455e-06, "loss": 0.8407, "step": 13089 }, { "epoch": 0.05794855903315773, "grad_norm": 2.9752804257523864, "learning_rate": 5.794855903315774e-06, "loss": 1.1272, "step": 13090 }, { "epoch": 0.05795298596662092, "grad_norm": 2.2781985518015224, "learning_rate": 5.795298596662092e-06, "loss": 0.6997, "step": 13091 }, { "epoch": 0.05795741290008411, "grad_norm": 2.535617788265814, "learning_rate": 5.795741290008412e-06, "loss": 0.5973, "step": 13092 }, { "epoch": 0.0579618398335473, "grad_norm": 3.2514798759208032, "learning_rate": 5.79618398335473e-06, "loss": 0.7079, "step": 13093 }, { "epoch": 0.05796626676701049, "grad_norm": 2.5262202447694095, "learning_rate": 5.79662667670105e-06, "loss": 0.5428, "step": 13094 }, { "epoch": 0.05797069370047368, "grad_norm": 3.6602392095891356, "learning_rate": 5.797069370047369e-06, "loss": 1.0098, "step": 13095 }, { "epoch": 0.05797512063393687, "grad_norm": 3.049764790476628, "learning_rate": 5.797512063393688e-06, "loss": 1.1158, "step": 13096 }, { "epoch": 0.057979547567400064, "grad_norm": 2.4781024803110534, "learning_rate": 5.797954756740006e-06, "loss": 0.4063, "step": 13097 }, { "epoch": 0.057983974500863254, "grad_norm": 3.2966246037079747, "learning_rate": 5.798397450086326e-06, "loss": 0.8207, "step": 13098 }, { "epoch": 0.057988401434326445, "grad_norm": 3.9970006870088355, "learning_rate": 5.798840143432645e-06, "loss": 1.444, "step": 13099 }, { "epoch": 0.057992828367789635, "grad_norm": 3.6541988768139047, "learning_rate": 5.799282836778963e-06, "loss": 0.9149, "step": 13100 }, { "epoch": 0.05799725530125282, "grad_norm": 3.335833586362901, "learning_rate": 5.799725530125283e-06, "loss": 0.9668, "step": 13101 }, { "epoch": 0.05800168223471601, "grad_norm": 2.7662413558307697, "learning_rate": 5.800168223471602e-06, "loss": 0.87, "step": 13102 }, { "epoch": 0.0580061091681792, "grad_norm": 2.446424216951622, "learning_rate": 5.800610916817921e-06, "loss": 0.5152, "step": 13103 }, { "epoch": 0.05801053610164239, "grad_norm": 2.1971258259298403, "learning_rate": 5.80105361016424e-06, "loss": 0.4461, "step": 13104 }, { "epoch": 0.05801496303510558, "grad_norm": 2.904729110768847, "learning_rate": 5.8014963035105585e-06, "loss": 0.6575, "step": 13105 }, { "epoch": 0.05801938996856877, "grad_norm": 2.875845530142592, "learning_rate": 5.801938996856877e-06, "loss": 0.8382, "step": 13106 }, { "epoch": 0.05802381690203196, "grad_norm": 3.059022705032443, "learning_rate": 5.802381690203197e-06, "loss": 0.7199, "step": 13107 }, { "epoch": 0.05802824383549515, "grad_norm": 3.0246852445122245, "learning_rate": 5.802824383549516e-06, "loss": 0.9488, "step": 13108 }, { "epoch": 0.05803267076895834, "grad_norm": 2.9584467546396156, "learning_rate": 5.803267076895834e-06, "loss": 0.9161, "step": 13109 }, { "epoch": 0.05803709770242153, "grad_norm": 2.5105397031834804, "learning_rate": 5.803709770242154e-06, "loss": 0.7209, "step": 13110 }, { "epoch": 0.058041524635884724, "grad_norm": 3.2491352617323166, "learning_rate": 5.804152463588473e-06, "loss": 0.5542, "step": 13111 }, { "epoch": 0.058045951569347914, "grad_norm": 2.870559841524233, "learning_rate": 5.804595156934792e-06, "loss": 0.6844, "step": 13112 }, { "epoch": 0.058050378502811105, "grad_norm": 2.942650534790013, "learning_rate": 5.805037850281111e-06, "loss": 0.8891, "step": 13113 }, { "epoch": 0.058054805436274295, "grad_norm": 3.5561490392108315, "learning_rate": 5.8054805436274295e-06, "loss": 0.8759, "step": 13114 }, { "epoch": 0.058059232369737486, "grad_norm": 2.619114323336072, "learning_rate": 5.805923236973748e-06, "loss": 0.8516, "step": 13115 }, { "epoch": 0.05806365930320067, "grad_norm": 2.279621568727624, "learning_rate": 5.806365930320068e-06, "loss": 0.8118, "step": 13116 }, { "epoch": 0.05806808623666386, "grad_norm": 2.5960021988441477, "learning_rate": 5.806808623666387e-06, "loss": 0.7017, "step": 13117 }, { "epoch": 0.05807251317012705, "grad_norm": 2.947334561670837, "learning_rate": 5.807251317012705e-06, "loss": 0.6884, "step": 13118 }, { "epoch": 0.05807694010359024, "grad_norm": 2.4016256605994655, "learning_rate": 5.8076940103590255e-06, "loss": 0.6517, "step": 13119 }, { "epoch": 0.05808136703705343, "grad_norm": 3.0914877505540614, "learning_rate": 5.808136703705344e-06, "loss": 0.8099, "step": 13120 }, { "epoch": 0.05808579397051662, "grad_norm": 3.0522286801717042, "learning_rate": 5.8085793970516625e-06, "loss": 0.7318, "step": 13121 }, { "epoch": 0.05809022090397981, "grad_norm": 2.4315063845126277, "learning_rate": 5.809022090397982e-06, "loss": 0.5135, "step": 13122 }, { "epoch": 0.058094647837443, "grad_norm": 2.8927766482977675, "learning_rate": 5.8094647837443e-06, "loss": 0.6912, "step": 13123 }, { "epoch": 0.058099074770906194, "grad_norm": 2.8582780479288217, "learning_rate": 5.80990747709062e-06, "loss": 0.667, "step": 13124 }, { "epoch": 0.058103501704369384, "grad_norm": 2.5932861324130507, "learning_rate": 5.810350170436939e-06, "loss": 0.7134, "step": 13125 }, { "epoch": 0.058107928637832575, "grad_norm": 2.6450625430132604, "learning_rate": 5.810792863783258e-06, "loss": 0.9585, "step": 13126 }, { "epoch": 0.058112355571295765, "grad_norm": 2.8393646736112625, "learning_rate": 5.811235557129576e-06, "loss": 0.6014, "step": 13127 }, { "epoch": 0.058116782504758956, "grad_norm": 2.755070548305799, "learning_rate": 5.8116782504758964e-06, "loss": 0.7644, "step": 13128 }, { "epoch": 0.058121209438222146, "grad_norm": 2.6757378109380823, "learning_rate": 5.812120943822215e-06, "loss": 0.5782, "step": 13129 }, { "epoch": 0.05812563637168534, "grad_norm": 3.687438309076741, "learning_rate": 5.8125636371685335e-06, "loss": 0.8464, "step": 13130 }, { "epoch": 0.05813006330514852, "grad_norm": 2.759634876944061, "learning_rate": 5.813006330514853e-06, "loss": 0.7501, "step": 13131 }, { "epoch": 0.05813449023861171, "grad_norm": 2.530127392629403, "learning_rate": 5.813449023861172e-06, "loss": 0.4756, "step": 13132 }, { "epoch": 0.0581389171720749, "grad_norm": 3.0826998638405656, "learning_rate": 5.813891717207491e-06, "loss": 0.6232, "step": 13133 }, { "epoch": 0.05814334410553809, "grad_norm": 2.6967639693059535, "learning_rate": 5.81433441055381e-06, "loss": 0.8257, "step": 13134 }, { "epoch": 0.05814777103900128, "grad_norm": 2.8700052665496023, "learning_rate": 5.814777103900129e-06, "loss": 0.771, "step": 13135 }, { "epoch": 0.05815219797246447, "grad_norm": 3.779131757679395, "learning_rate": 5.815219797246447e-06, "loss": 1.0932, "step": 13136 }, { "epoch": 0.05815662490592766, "grad_norm": 3.4764677058806357, "learning_rate": 5.815662490592767e-06, "loss": 1.007, "step": 13137 }, { "epoch": 0.058161051839390854, "grad_norm": 2.54302162014272, "learning_rate": 5.816105183939086e-06, "loss": 0.7405, "step": 13138 }, { "epoch": 0.058165478772854044, "grad_norm": 2.632421902922327, "learning_rate": 5.816547877285404e-06, "loss": 0.7185, "step": 13139 }, { "epoch": 0.058169905706317235, "grad_norm": 2.93109157713503, "learning_rate": 5.816990570631725e-06, "loss": 0.893, "step": 13140 }, { "epoch": 0.058174332639780425, "grad_norm": 2.9283864098735166, "learning_rate": 5.817433263978043e-06, "loss": 0.8451, "step": 13141 }, { "epoch": 0.058178759573243616, "grad_norm": 2.7471661891626558, "learning_rate": 5.817875957324362e-06, "loss": 0.7672, "step": 13142 }, { "epoch": 0.058183186506706806, "grad_norm": 3.3082264340507654, "learning_rate": 5.818318650670681e-06, "loss": 0.7554, "step": 13143 }, { "epoch": 0.05818761344017, "grad_norm": 2.665999423623112, "learning_rate": 5.818761344017e-06, "loss": 0.7829, "step": 13144 }, { "epoch": 0.05819204037363319, "grad_norm": 2.3744134414368085, "learning_rate": 5.819204037363318e-06, "loss": 0.6512, "step": 13145 }, { "epoch": 0.05819646730709637, "grad_norm": 2.794213044936661, "learning_rate": 5.819646730709638e-06, "loss": 0.9468, "step": 13146 }, { "epoch": 0.05820089424055956, "grad_norm": 3.133052971641617, "learning_rate": 5.820089424055957e-06, "loss": 0.69, "step": 13147 }, { "epoch": 0.05820532117402275, "grad_norm": 3.3324029322809987, "learning_rate": 5.820532117402275e-06, "loss": 0.9363, "step": 13148 }, { "epoch": 0.05820974810748594, "grad_norm": 2.5986945132380628, "learning_rate": 5.820974810748596e-06, "loss": 0.649, "step": 13149 }, { "epoch": 0.05821417504094913, "grad_norm": 2.899050129445447, "learning_rate": 5.821417504094914e-06, "loss": 0.8301, "step": 13150 }, { "epoch": 0.05821860197441232, "grad_norm": 2.6103596992595373, "learning_rate": 5.821860197441233e-06, "loss": 0.704, "step": 13151 }, { "epoch": 0.058223028907875514, "grad_norm": 2.864359724272903, "learning_rate": 5.822302890787552e-06, "loss": 0.5728, "step": 13152 }, { "epoch": 0.058227455841338704, "grad_norm": 3.4372711557177853, "learning_rate": 5.8227455841338705e-06, "loss": 0.8232, "step": 13153 }, { "epoch": 0.058231882774801895, "grad_norm": 2.3828137163058325, "learning_rate": 5.82318827748019e-06, "loss": 0.6875, "step": 13154 }, { "epoch": 0.058236309708265085, "grad_norm": 2.2347287865359333, "learning_rate": 5.823630970826509e-06, "loss": 0.4444, "step": 13155 }, { "epoch": 0.058240736641728276, "grad_norm": 2.2877663436194813, "learning_rate": 5.824073664172828e-06, "loss": 0.6159, "step": 13156 }, { "epoch": 0.058245163575191466, "grad_norm": 3.9600801680705997, "learning_rate": 5.824516357519146e-06, "loss": 0.9696, "step": 13157 }, { "epoch": 0.05824959050865466, "grad_norm": 2.6561795531862327, "learning_rate": 5.8249590508654665e-06, "loss": 0.617, "step": 13158 }, { "epoch": 0.05825401744211785, "grad_norm": 2.5882259939109287, "learning_rate": 5.825401744211785e-06, "loss": 0.801, "step": 13159 }, { "epoch": 0.05825844437558104, "grad_norm": 3.105919647226945, "learning_rate": 5.825844437558104e-06, "loss": 0.5803, "step": 13160 }, { "epoch": 0.05826287130904423, "grad_norm": 3.0304745468562446, "learning_rate": 5.826287130904423e-06, "loss": 0.7258, "step": 13161 }, { "epoch": 0.05826729824250741, "grad_norm": 3.568760866135189, "learning_rate": 5.826729824250742e-06, "loss": 1.1188, "step": 13162 }, { "epoch": 0.0582717251759706, "grad_norm": 3.194523248572484, "learning_rate": 5.827172517597061e-06, "loss": 1.1082, "step": 13163 }, { "epoch": 0.05827615210943379, "grad_norm": 2.9049164620038863, "learning_rate": 5.82761521094338e-06, "loss": 0.7681, "step": 13164 }, { "epoch": 0.058280579042896984, "grad_norm": 2.9939177459944557, "learning_rate": 5.828057904289699e-06, "loss": 0.5993, "step": 13165 }, { "epoch": 0.058285005976360174, "grad_norm": 2.33547942721492, "learning_rate": 5.828500597636017e-06, "loss": 0.6861, "step": 13166 }, { "epoch": 0.058289432909823365, "grad_norm": 3.388278889290094, "learning_rate": 5.8289432909823375e-06, "loss": 0.7894, "step": 13167 }, { "epoch": 0.058293859843286555, "grad_norm": 2.5968638709380802, "learning_rate": 5.829385984328656e-06, "loss": 0.7404, "step": 13168 }, { "epoch": 0.058298286776749746, "grad_norm": 3.419199727091409, "learning_rate": 5.8298286776749745e-06, "loss": 1.0278, "step": 13169 }, { "epoch": 0.058302713710212936, "grad_norm": 2.5126145448144226, "learning_rate": 5.830271371021295e-06, "loss": 0.6232, "step": 13170 }, { "epoch": 0.05830714064367613, "grad_norm": 2.851076028748683, "learning_rate": 5.830714064367613e-06, "loss": 0.5798, "step": 13171 }, { "epoch": 0.05831156757713932, "grad_norm": 2.778442718701102, "learning_rate": 5.831156757713932e-06, "loss": 0.7493, "step": 13172 }, { "epoch": 0.05831599451060251, "grad_norm": 2.688817336102144, "learning_rate": 5.831599451060251e-06, "loss": 0.7557, "step": 13173 }, { "epoch": 0.0583204214440657, "grad_norm": 3.0574850093959203, "learning_rate": 5.83204214440657e-06, "loss": 0.962, "step": 13174 }, { "epoch": 0.05832484837752889, "grad_norm": 2.457722559726019, "learning_rate": 5.832484837752889e-06, "loss": 0.6239, "step": 13175 }, { "epoch": 0.05832927531099208, "grad_norm": 2.6456125220087676, "learning_rate": 5.8329275310992084e-06, "loss": 0.713, "step": 13176 }, { "epoch": 0.05833370224445526, "grad_norm": 1.9769723348031385, "learning_rate": 5.833370224445527e-06, "loss": 0.3558, "step": 13177 }, { "epoch": 0.05833812917791845, "grad_norm": 3.0535211920275267, "learning_rate": 5.8338129177918455e-06, "loss": 0.8641, "step": 13178 }, { "epoch": 0.058342556111381644, "grad_norm": 2.5485081639941893, "learning_rate": 5.834255611138166e-06, "loss": 0.6983, "step": 13179 }, { "epoch": 0.058346983044844834, "grad_norm": 2.9539229332380192, "learning_rate": 5.834698304484484e-06, "loss": 1.0326, "step": 13180 }, { "epoch": 0.058351409978308025, "grad_norm": 2.3869585535718896, "learning_rate": 5.835140997830803e-06, "loss": 0.5822, "step": 13181 }, { "epoch": 0.058355836911771215, "grad_norm": 2.842382175899351, "learning_rate": 5.835583691177122e-06, "loss": 0.7058, "step": 13182 }, { "epoch": 0.058360263845234406, "grad_norm": 3.4183435490120417, "learning_rate": 5.8360263845234415e-06, "loss": 0.9566, "step": 13183 }, { "epoch": 0.058364690778697596, "grad_norm": 2.7940309860076726, "learning_rate": 5.83646907786976e-06, "loss": 0.8901, "step": 13184 }, { "epoch": 0.05836911771216079, "grad_norm": 2.765722470849182, "learning_rate": 5.836911771216079e-06, "loss": 0.7256, "step": 13185 }, { "epoch": 0.05837354464562398, "grad_norm": 2.30675269051301, "learning_rate": 5.837354464562398e-06, "loss": 0.3977, "step": 13186 }, { "epoch": 0.05837797157908717, "grad_norm": 3.6830363991962267, "learning_rate": 5.837797157908716e-06, "loss": 1.212, "step": 13187 }, { "epoch": 0.05838239851255036, "grad_norm": 2.8070969304126714, "learning_rate": 5.838239851255037e-06, "loss": 0.9728, "step": 13188 }, { "epoch": 0.05838682544601355, "grad_norm": 2.369455427508993, "learning_rate": 5.838682544601355e-06, "loss": 0.4696, "step": 13189 }, { "epoch": 0.05839125237947674, "grad_norm": 2.2748926245174284, "learning_rate": 5.839125237947674e-06, "loss": 0.6167, "step": 13190 }, { "epoch": 0.05839567931293993, "grad_norm": 2.19853952835533, "learning_rate": 5.839567931293993e-06, "loss": 0.5835, "step": 13191 }, { "epoch": 0.05840010624640311, "grad_norm": 2.5565772346566416, "learning_rate": 5.8400106246403124e-06, "loss": 0.6835, "step": 13192 }, { "epoch": 0.058404533179866304, "grad_norm": 2.4186409369584636, "learning_rate": 5.840453317986631e-06, "loss": 0.7569, "step": 13193 }, { "epoch": 0.058408960113329494, "grad_norm": 3.187238077752117, "learning_rate": 5.84089601133295e-06, "loss": 0.9843, "step": 13194 }, { "epoch": 0.058413387046792685, "grad_norm": 2.507706695595924, "learning_rate": 5.841338704679269e-06, "loss": 0.6932, "step": 13195 }, { "epoch": 0.058417813980255875, "grad_norm": 3.55585438447007, "learning_rate": 5.841781398025587e-06, "loss": 1.0363, "step": 13196 }, { "epoch": 0.058422240913719066, "grad_norm": 2.59631864037655, "learning_rate": 5.842224091371908e-06, "loss": 0.7049, "step": 13197 }, { "epoch": 0.058426667847182256, "grad_norm": 2.393844356649971, "learning_rate": 5.842666784718226e-06, "loss": 0.4863, "step": 13198 }, { "epoch": 0.05843109478064545, "grad_norm": 2.2702360522085705, "learning_rate": 5.843109478064545e-06, "loss": 0.8474, "step": 13199 }, { "epoch": 0.05843552171410864, "grad_norm": 4.422041719359774, "learning_rate": 5.843552171410865e-06, "loss": 0.8023, "step": 13200 }, { "epoch": 0.05843994864757183, "grad_norm": 2.67623302205785, "learning_rate": 5.843994864757183e-06, "loss": 0.8105, "step": 13201 }, { "epoch": 0.05844437558103502, "grad_norm": 2.6833547067384043, "learning_rate": 5.844437558103502e-06, "loss": 0.5538, "step": 13202 }, { "epoch": 0.05844880251449821, "grad_norm": 2.355436625186942, "learning_rate": 5.844880251449821e-06, "loss": 0.441, "step": 13203 }, { "epoch": 0.0584532294479614, "grad_norm": 2.274889226249797, "learning_rate": 5.84532294479614e-06, "loss": 0.6108, "step": 13204 }, { "epoch": 0.05845765638142459, "grad_norm": 3.1307502386434334, "learning_rate": 5.845765638142459e-06, "loss": 0.7312, "step": 13205 }, { "epoch": 0.05846208331488778, "grad_norm": 3.7588482381572397, "learning_rate": 5.8462083314887785e-06, "loss": 1.0921, "step": 13206 }, { "epoch": 0.058466510248350964, "grad_norm": 2.6901732378558463, "learning_rate": 5.846651024835097e-06, "loss": 0.7183, "step": 13207 }, { "epoch": 0.058470937181814155, "grad_norm": 2.383136643014825, "learning_rate": 5.847093718181416e-06, "loss": 0.7129, "step": 13208 }, { "epoch": 0.058475364115277345, "grad_norm": 2.60947097738063, "learning_rate": 5.847536411527736e-06, "loss": 0.7681, "step": 13209 }, { "epoch": 0.058479791048740536, "grad_norm": 3.626997844765051, "learning_rate": 5.847979104874054e-06, "loss": 0.8986, "step": 13210 }, { "epoch": 0.058484217982203726, "grad_norm": 3.1621969086106487, "learning_rate": 5.848421798220373e-06, "loss": 1.0467, "step": 13211 }, { "epoch": 0.05848864491566692, "grad_norm": 2.730413303892248, "learning_rate": 5.848864491566692e-06, "loss": 1.1176, "step": 13212 }, { "epoch": 0.05849307184913011, "grad_norm": 3.1122701262267043, "learning_rate": 5.849307184913012e-06, "loss": 0.8257, "step": 13213 }, { "epoch": 0.0584974987825933, "grad_norm": 2.9669205821929707, "learning_rate": 5.84974987825933e-06, "loss": 0.8819, "step": 13214 }, { "epoch": 0.05850192571605649, "grad_norm": 2.9038482104302865, "learning_rate": 5.8501925716056495e-06, "loss": 0.6286, "step": 13215 }, { "epoch": 0.05850635264951968, "grad_norm": 2.8414698807983445, "learning_rate": 5.850635264951968e-06, "loss": 1.0613, "step": 13216 }, { "epoch": 0.05851077958298287, "grad_norm": 2.478482438069084, "learning_rate": 5.8510779582982865e-06, "loss": 0.479, "step": 13217 }, { "epoch": 0.05851520651644606, "grad_norm": 2.9660030417171037, "learning_rate": 5.851520651644607e-06, "loss": 0.8359, "step": 13218 }, { "epoch": 0.05851963344990925, "grad_norm": 2.50530363132635, "learning_rate": 5.851963344990925e-06, "loss": 0.8178, "step": 13219 }, { "epoch": 0.05852406038337244, "grad_norm": 2.9847885254367768, "learning_rate": 5.852406038337244e-06, "loss": 0.892, "step": 13220 }, { "epoch": 0.05852848731683563, "grad_norm": 2.717871465814794, "learning_rate": 5.852848731683564e-06, "loss": 0.9127, "step": 13221 }, { "epoch": 0.058532914250298815, "grad_norm": 2.7586835615308076, "learning_rate": 5.8532914250298825e-06, "loss": 0.8019, "step": 13222 }, { "epoch": 0.058537341183762005, "grad_norm": 3.3463815256474527, "learning_rate": 5.853734118376201e-06, "loss": 0.6789, "step": 13223 }, { "epoch": 0.058541768117225196, "grad_norm": 2.5743518925315723, "learning_rate": 5.8541768117225204e-06, "loss": 0.7811, "step": 13224 }, { "epoch": 0.058546195050688386, "grad_norm": 3.3723683172235117, "learning_rate": 5.854619505068839e-06, "loss": 0.7134, "step": 13225 }, { "epoch": 0.05855062198415158, "grad_norm": 3.176489210205252, "learning_rate": 5.8550621984151575e-06, "loss": 0.7901, "step": 13226 }, { "epoch": 0.05855504891761477, "grad_norm": 2.792910681032592, "learning_rate": 5.855504891761478e-06, "loss": 0.8786, "step": 13227 }, { "epoch": 0.05855947585107796, "grad_norm": 2.619332743251483, "learning_rate": 5.855947585107796e-06, "loss": 0.659, "step": 13228 }, { "epoch": 0.05856390278454115, "grad_norm": 3.6836455929339866, "learning_rate": 5.856390278454115e-06, "loss": 0.9291, "step": 13229 }, { "epoch": 0.05856832971800434, "grad_norm": 2.391553361897007, "learning_rate": 5.856832971800435e-06, "loss": 0.6604, "step": 13230 }, { "epoch": 0.05857275665146753, "grad_norm": 2.4816464262396734, "learning_rate": 5.8572756651467535e-06, "loss": 0.4329, "step": 13231 }, { "epoch": 0.05857718358493072, "grad_norm": 3.337711285585026, "learning_rate": 5.857718358493072e-06, "loss": 0.7505, "step": 13232 }, { "epoch": 0.05858161051839391, "grad_norm": 2.3516107486261477, "learning_rate": 5.858161051839391e-06, "loss": 0.7071, "step": 13233 }, { "epoch": 0.0585860374518571, "grad_norm": 2.7938466542852067, "learning_rate": 5.85860374518571e-06, "loss": 0.5291, "step": 13234 }, { "epoch": 0.05859046438532029, "grad_norm": 2.5213721145081274, "learning_rate": 5.859046438532029e-06, "loss": 0.6535, "step": 13235 }, { "epoch": 0.05859489131878348, "grad_norm": 2.6428086421646038, "learning_rate": 5.859489131878349e-06, "loss": 0.6596, "step": 13236 }, { "epoch": 0.058599318252246665, "grad_norm": 3.4995556794137705, "learning_rate": 5.859931825224667e-06, "loss": 0.9694, "step": 13237 }, { "epoch": 0.058603745185709856, "grad_norm": 2.499522196970572, "learning_rate": 5.860374518570986e-06, "loss": 0.7798, "step": 13238 }, { "epoch": 0.058608172119173046, "grad_norm": 2.7453783284633553, "learning_rate": 5.860817211917306e-06, "loss": 0.6492, "step": 13239 }, { "epoch": 0.05861259905263624, "grad_norm": 2.7595857824297236, "learning_rate": 5.8612599052636244e-06, "loss": 0.8912, "step": 13240 }, { "epoch": 0.05861702598609943, "grad_norm": 2.1628519401322452, "learning_rate": 5.861702598609943e-06, "loss": 0.5052, "step": 13241 }, { "epoch": 0.05862145291956262, "grad_norm": 4.760069585767583, "learning_rate": 5.862145291956262e-06, "loss": 1.0587, "step": 13242 }, { "epoch": 0.05862587985302581, "grad_norm": 2.6979028502351814, "learning_rate": 5.862587985302582e-06, "loss": 0.7378, "step": 13243 }, { "epoch": 0.058630306786489, "grad_norm": 2.6896734900912014, "learning_rate": 5.8630306786489e-06, "loss": 0.6205, "step": 13244 }, { "epoch": 0.05863473371995219, "grad_norm": 2.392820320186749, "learning_rate": 5.86347337199522e-06, "loss": 0.4883, "step": 13245 }, { "epoch": 0.05863916065341538, "grad_norm": 2.445791292693798, "learning_rate": 5.863916065341538e-06, "loss": 0.708, "step": 13246 }, { "epoch": 0.05864358758687857, "grad_norm": 3.0615843099843176, "learning_rate": 5.864358758687857e-06, "loss": 0.8585, "step": 13247 }, { "epoch": 0.05864801452034176, "grad_norm": 2.5316696831622476, "learning_rate": 5.864801452034177e-06, "loss": 0.5909, "step": 13248 }, { "epoch": 0.05865244145380495, "grad_norm": 3.1515429661598473, "learning_rate": 5.865244145380495e-06, "loss": 1.0714, "step": 13249 }, { "epoch": 0.05865686838726814, "grad_norm": 2.514767180654248, "learning_rate": 5.865686838726814e-06, "loss": 0.7217, "step": 13250 }, { "epoch": 0.05866129532073133, "grad_norm": 2.8390836841716194, "learning_rate": 5.866129532073134e-06, "loss": 0.7353, "step": 13251 }, { "epoch": 0.058665722254194516, "grad_norm": 2.8698319863492063, "learning_rate": 5.866572225419453e-06, "loss": 0.6281, "step": 13252 }, { "epoch": 0.05867014918765771, "grad_norm": 2.8189618889153145, "learning_rate": 5.867014918765771e-06, "loss": 0.8411, "step": 13253 }, { "epoch": 0.0586745761211209, "grad_norm": 2.3633902848463397, "learning_rate": 5.8674576121120905e-06, "loss": 0.755, "step": 13254 }, { "epoch": 0.05867900305458409, "grad_norm": 3.175123526622182, "learning_rate": 5.867900305458409e-06, "loss": 0.7773, "step": 13255 }, { "epoch": 0.05868342998804728, "grad_norm": 2.741387991271075, "learning_rate": 5.8683429988047284e-06, "loss": 0.7941, "step": 13256 }, { "epoch": 0.05868785692151047, "grad_norm": 2.5611938948886848, "learning_rate": 5.868785692151048e-06, "loss": 0.5853, "step": 13257 }, { "epoch": 0.05869228385497366, "grad_norm": 3.230934479316934, "learning_rate": 5.869228385497366e-06, "loss": 0.7429, "step": 13258 }, { "epoch": 0.05869671078843685, "grad_norm": 3.709849213897086, "learning_rate": 5.869671078843685e-06, "loss": 0.8742, "step": 13259 }, { "epoch": 0.05870113772190004, "grad_norm": 3.0591864095854393, "learning_rate": 5.870113772190005e-06, "loss": 1.1254, "step": 13260 }, { "epoch": 0.05870556465536323, "grad_norm": 3.0679381429581296, "learning_rate": 5.870556465536324e-06, "loss": 0.8101, "step": 13261 }, { "epoch": 0.05870999158882642, "grad_norm": 2.4157225848931807, "learning_rate": 5.870999158882642e-06, "loss": 0.8641, "step": 13262 }, { "epoch": 0.05871441852228961, "grad_norm": 2.4487382607219366, "learning_rate": 5.8714418522289615e-06, "loss": 0.8293, "step": 13263 }, { "epoch": 0.0587188454557528, "grad_norm": 2.538443082548906, "learning_rate": 5.87188454557528e-06, "loss": 0.8028, "step": 13264 }, { "epoch": 0.05872327238921599, "grad_norm": 2.5127298638031075, "learning_rate": 5.872327238921599e-06, "loss": 0.5885, "step": 13265 }, { "epoch": 0.05872769932267918, "grad_norm": 2.5682424964882054, "learning_rate": 5.872769932267919e-06, "loss": 0.7358, "step": 13266 }, { "epoch": 0.05873212625614237, "grad_norm": 3.0344178105771658, "learning_rate": 5.873212625614237e-06, "loss": 0.8276, "step": 13267 }, { "epoch": 0.05873655318960556, "grad_norm": 2.9670873772543493, "learning_rate": 5.873655318960556e-06, "loss": 0.7184, "step": 13268 }, { "epoch": 0.05874098012306875, "grad_norm": 2.7470810469348756, "learning_rate": 5.874098012306876e-06, "loss": 0.8173, "step": 13269 }, { "epoch": 0.05874540705653194, "grad_norm": 2.521040535480246, "learning_rate": 5.8745407056531945e-06, "loss": 0.7788, "step": 13270 }, { "epoch": 0.05874983398999513, "grad_norm": 2.3387239806381284, "learning_rate": 5.874983398999513e-06, "loss": 0.6318, "step": 13271 }, { "epoch": 0.05875426092345832, "grad_norm": 3.114137953830849, "learning_rate": 5.8754260923458324e-06, "loss": 0.6907, "step": 13272 }, { "epoch": 0.05875868785692151, "grad_norm": 2.362290939763483, "learning_rate": 5.875868785692152e-06, "loss": 0.6102, "step": 13273 }, { "epoch": 0.0587631147903847, "grad_norm": 2.7057543868922433, "learning_rate": 5.87631147903847e-06, "loss": 0.8627, "step": 13274 }, { "epoch": 0.05876754172384789, "grad_norm": 2.7823201854393136, "learning_rate": 5.87675417238479e-06, "loss": 0.7612, "step": 13275 }, { "epoch": 0.05877196865731108, "grad_norm": 2.7959924844239197, "learning_rate": 5.877196865731108e-06, "loss": 0.8095, "step": 13276 }, { "epoch": 0.05877639559077427, "grad_norm": 2.720699253376909, "learning_rate": 5.877639559077427e-06, "loss": 0.8173, "step": 13277 }, { "epoch": 0.05878082252423746, "grad_norm": 2.448655299165554, "learning_rate": 5.878082252423747e-06, "loss": 0.5956, "step": 13278 }, { "epoch": 0.05878524945770065, "grad_norm": 3.0313518179786656, "learning_rate": 5.8785249457700655e-06, "loss": 0.9515, "step": 13279 }, { "epoch": 0.05878967639116384, "grad_norm": 2.7546041059909734, "learning_rate": 5.878967639116384e-06, "loss": 0.9331, "step": 13280 }, { "epoch": 0.058794103324627034, "grad_norm": 2.617880938187823, "learning_rate": 5.879410332462704e-06, "loss": 0.7615, "step": 13281 }, { "epoch": 0.05879853025809022, "grad_norm": 2.9817543727818454, "learning_rate": 5.879853025809023e-06, "loss": 0.6247, "step": 13282 }, { "epoch": 0.05880295719155341, "grad_norm": 2.7439294068237308, "learning_rate": 5.880295719155341e-06, "loss": 0.8425, "step": 13283 }, { "epoch": 0.0588073841250166, "grad_norm": 4.378955715997256, "learning_rate": 5.880738412501661e-06, "loss": 1.4261, "step": 13284 }, { "epoch": 0.05881181105847979, "grad_norm": 2.2816253073482167, "learning_rate": 5.881181105847979e-06, "loss": 0.7516, "step": 13285 }, { "epoch": 0.05881623799194298, "grad_norm": 2.757765946865024, "learning_rate": 5.8816237991942985e-06, "loss": 0.8577, "step": 13286 }, { "epoch": 0.05882066492540617, "grad_norm": 3.154134254094324, "learning_rate": 5.882066492540618e-06, "loss": 0.9105, "step": 13287 }, { "epoch": 0.05882509185886936, "grad_norm": 2.4445022937361562, "learning_rate": 5.8825091858869364e-06, "loss": 0.6872, "step": 13288 }, { "epoch": 0.05882951879233255, "grad_norm": 2.5452552492396983, "learning_rate": 5.882951879233255e-06, "loss": 0.7945, "step": 13289 }, { "epoch": 0.05883394572579574, "grad_norm": 3.5039171806868623, "learning_rate": 5.883394572579575e-06, "loss": 0.9715, "step": 13290 }, { "epoch": 0.05883837265925893, "grad_norm": 2.4351488681917375, "learning_rate": 5.883837265925894e-06, "loss": 0.548, "step": 13291 }, { "epoch": 0.05884279959272212, "grad_norm": 2.781529771506169, "learning_rate": 5.884279959272212e-06, "loss": 0.9216, "step": 13292 }, { "epoch": 0.05884722652618531, "grad_norm": 2.433594739379205, "learning_rate": 5.884722652618532e-06, "loss": 0.6639, "step": 13293 }, { "epoch": 0.058851653459648504, "grad_norm": 2.546455807915599, "learning_rate": 5.885165345964851e-06, "loss": 0.7252, "step": 13294 }, { "epoch": 0.058856080393111694, "grad_norm": 2.2775697365768033, "learning_rate": 5.8856080393111695e-06, "loss": 0.7456, "step": 13295 }, { "epoch": 0.058860507326574885, "grad_norm": 3.5004464350813063, "learning_rate": 5.886050732657489e-06, "loss": 1.2043, "step": 13296 }, { "epoch": 0.05886493426003807, "grad_norm": 2.2917351224841416, "learning_rate": 5.886493426003807e-06, "loss": 0.4009, "step": 13297 }, { "epoch": 0.05886936119350126, "grad_norm": 2.922477991663611, "learning_rate": 5.886936119350126e-06, "loss": 0.8572, "step": 13298 }, { "epoch": 0.05887378812696445, "grad_norm": 2.314504739649327, "learning_rate": 5.887378812696446e-06, "loss": 0.6574, "step": 13299 }, { "epoch": 0.05887821506042764, "grad_norm": 2.5987643271979834, "learning_rate": 5.887821506042765e-06, "loss": 0.8, "step": 13300 }, { "epoch": 0.05888264199389083, "grad_norm": 2.3640016584397365, "learning_rate": 5.888264199389083e-06, "loss": 0.8788, "step": 13301 }, { "epoch": 0.05888706892735402, "grad_norm": 2.9134354447689392, "learning_rate": 5.888706892735403e-06, "loss": 1.1102, "step": 13302 }, { "epoch": 0.05889149586081721, "grad_norm": 2.7237206011854864, "learning_rate": 5.889149586081722e-06, "loss": 0.6704, "step": 13303 }, { "epoch": 0.0588959227942804, "grad_norm": 3.1169148076292625, "learning_rate": 5.8895922794280404e-06, "loss": 0.7306, "step": 13304 }, { "epoch": 0.05890034972774359, "grad_norm": 2.9315415156599895, "learning_rate": 5.89003497277436e-06, "loss": 0.9726, "step": 13305 }, { "epoch": 0.05890477666120678, "grad_norm": 2.606062367654902, "learning_rate": 5.890477666120678e-06, "loss": 0.6602, "step": 13306 }, { "epoch": 0.05890920359466997, "grad_norm": 3.2832954191331956, "learning_rate": 5.890920359466997e-06, "loss": 1.0588, "step": 13307 }, { "epoch": 0.058913630528133164, "grad_norm": 2.78577623702866, "learning_rate": 5.891363052813317e-06, "loss": 0.7883, "step": 13308 }, { "epoch": 0.058918057461596354, "grad_norm": 2.50973792806753, "learning_rate": 5.891805746159636e-06, "loss": 0.4936, "step": 13309 }, { "epoch": 0.058922484395059545, "grad_norm": 2.9525522601919403, "learning_rate": 5.892248439505954e-06, "loss": 0.899, "step": 13310 }, { "epoch": 0.058926911328522735, "grad_norm": 2.467730660960602, "learning_rate": 5.892691132852274e-06, "loss": 0.6847, "step": 13311 }, { "epoch": 0.058931338261985926, "grad_norm": 3.2010922103541857, "learning_rate": 5.893133826198593e-06, "loss": 0.9644, "step": 13312 }, { "epoch": 0.05893576519544911, "grad_norm": 2.7081551149538816, "learning_rate": 5.893576519544911e-06, "loss": 0.7128, "step": 13313 }, { "epoch": 0.0589401921289123, "grad_norm": 2.3417380956745806, "learning_rate": 5.894019212891231e-06, "loss": 0.6305, "step": 13314 }, { "epoch": 0.05894461906237549, "grad_norm": 3.1899371617618675, "learning_rate": 5.894461906237549e-06, "loss": 0.9139, "step": 13315 }, { "epoch": 0.05894904599583868, "grad_norm": 2.489974488894733, "learning_rate": 5.894904599583869e-06, "loss": 0.6861, "step": 13316 }, { "epoch": 0.05895347292930187, "grad_norm": 2.785337852334539, "learning_rate": 5.895347292930188e-06, "loss": 0.8268, "step": 13317 }, { "epoch": 0.05895789986276506, "grad_norm": 2.631409344565849, "learning_rate": 5.8957899862765065e-06, "loss": 0.6491, "step": 13318 }, { "epoch": 0.05896232679622825, "grad_norm": 2.857298541251986, "learning_rate": 5.896232679622825e-06, "loss": 0.7956, "step": 13319 }, { "epoch": 0.05896675372969144, "grad_norm": 2.3696350123572607, "learning_rate": 5.896675372969145e-06, "loss": 0.5068, "step": 13320 }, { "epoch": 0.05897118066315463, "grad_norm": 2.8776132373695744, "learning_rate": 5.897118066315464e-06, "loss": 0.6789, "step": 13321 }, { "epoch": 0.058975607596617824, "grad_norm": 2.508468072676448, "learning_rate": 5.897560759661782e-06, "loss": 0.6656, "step": 13322 }, { "epoch": 0.058980034530081014, "grad_norm": 2.8530215601484312, "learning_rate": 5.898003453008102e-06, "loss": 0.6767, "step": 13323 }, { "epoch": 0.058984461463544205, "grad_norm": 2.4481069946425493, "learning_rate": 5.898446146354421e-06, "loss": 0.8798, "step": 13324 }, { "epoch": 0.058988888397007395, "grad_norm": 4.1812721886434545, "learning_rate": 5.89888883970074e-06, "loss": 1.2468, "step": 13325 }, { "epoch": 0.058993315330470586, "grad_norm": 2.5016712010770656, "learning_rate": 5.899331533047059e-06, "loss": 0.803, "step": 13326 }, { "epoch": 0.058997742263933776, "grad_norm": 3.16526993283879, "learning_rate": 5.8997742263933775e-06, "loss": 1.0017, "step": 13327 }, { "epoch": 0.05900216919739696, "grad_norm": 2.5007257618769687, "learning_rate": 5.900216919739696e-06, "loss": 0.6192, "step": 13328 }, { "epoch": 0.05900659613086015, "grad_norm": 3.4126545637515493, "learning_rate": 5.900659613086016e-06, "loss": 1.0568, "step": 13329 }, { "epoch": 0.05901102306432334, "grad_norm": 2.8132255182314454, "learning_rate": 5.901102306432335e-06, "loss": 0.4542, "step": 13330 }, { "epoch": 0.05901544999778653, "grad_norm": 2.6398347504357913, "learning_rate": 5.901544999778653e-06, "loss": 0.6726, "step": 13331 }, { "epoch": 0.05901987693124972, "grad_norm": 2.5606152474319592, "learning_rate": 5.9019876931249735e-06, "loss": 0.7718, "step": 13332 }, { "epoch": 0.05902430386471291, "grad_norm": 3.392741258019436, "learning_rate": 5.902430386471292e-06, "loss": 0.7819, "step": 13333 }, { "epoch": 0.0590287307981761, "grad_norm": 2.584237224839374, "learning_rate": 5.9028730798176105e-06, "loss": 0.8845, "step": 13334 }, { "epoch": 0.059033157731639294, "grad_norm": 2.78061912648936, "learning_rate": 5.90331577316393e-06, "loss": 0.9112, "step": 13335 }, { "epoch": 0.059037584665102484, "grad_norm": 2.79276221581001, "learning_rate": 5.9037584665102484e-06, "loss": 0.8094, "step": 13336 }, { "epoch": 0.059042011598565675, "grad_norm": 4.0567209207150805, "learning_rate": 5.904201159856568e-06, "loss": 0.7317, "step": 13337 }, { "epoch": 0.059046438532028865, "grad_norm": 2.472450747807238, "learning_rate": 5.904643853202887e-06, "loss": 0.6505, "step": 13338 }, { "epoch": 0.059050865465492056, "grad_norm": 3.809634064288324, "learning_rate": 5.905086546549206e-06, "loss": 0.8637, "step": 13339 }, { "epoch": 0.059055292398955246, "grad_norm": 2.3482644177273566, "learning_rate": 5.905529239895524e-06, "loss": 0.6065, "step": 13340 }, { "epoch": 0.05905971933241844, "grad_norm": 2.744010133075569, "learning_rate": 5.9059719332418444e-06, "loss": 0.8481, "step": 13341 }, { "epoch": 0.05906414626588163, "grad_norm": 2.3586042956353417, "learning_rate": 5.906414626588163e-06, "loss": 0.4807, "step": 13342 }, { "epoch": 0.05906857319934481, "grad_norm": 3.071257332665557, "learning_rate": 5.9068573199344815e-06, "loss": 0.7978, "step": 13343 }, { "epoch": 0.059073000132808, "grad_norm": 2.711442629241422, "learning_rate": 5.907300013280801e-06, "loss": 0.8075, "step": 13344 }, { "epoch": 0.05907742706627119, "grad_norm": 2.460035716226167, "learning_rate": 5.907742706627119e-06, "loss": 0.7174, "step": 13345 }, { "epoch": 0.05908185399973438, "grad_norm": 2.8754877330228603, "learning_rate": 5.908185399973439e-06, "loss": 0.7091, "step": 13346 }, { "epoch": 0.05908628093319757, "grad_norm": 2.5066324827132815, "learning_rate": 5.908628093319758e-06, "loss": 0.701, "step": 13347 }, { "epoch": 0.05909070786666076, "grad_norm": 3.905805579113932, "learning_rate": 5.909070786666077e-06, "loss": 0.9162, "step": 13348 }, { "epoch": 0.059095134800123954, "grad_norm": 3.391418131897299, "learning_rate": 5.909513480012395e-06, "loss": 1.0139, "step": 13349 }, { "epoch": 0.059099561733587144, "grad_norm": 2.5903942244273734, "learning_rate": 5.909956173358715e-06, "loss": 0.6143, "step": 13350 }, { "epoch": 0.059103988667050335, "grad_norm": 2.8866013244878372, "learning_rate": 5.910398866705034e-06, "loss": 0.8823, "step": 13351 }, { "epoch": 0.059108415600513525, "grad_norm": 2.403002022633297, "learning_rate": 5.9108415600513524e-06, "loss": 0.5904, "step": 13352 }, { "epoch": 0.059112842533976716, "grad_norm": 2.88107729009917, "learning_rate": 5.911284253397672e-06, "loss": 1.0563, "step": 13353 }, { "epoch": 0.059117269467439906, "grad_norm": 3.0278771690345274, "learning_rate": 5.911726946743991e-06, "loss": 0.8381, "step": 13354 }, { "epoch": 0.0591216964009031, "grad_norm": 2.7462710749479595, "learning_rate": 5.91216964009031e-06, "loss": 0.8615, "step": 13355 }, { "epoch": 0.05912612333436629, "grad_norm": 2.5479422690491917, "learning_rate": 5.912612333436629e-06, "loss": 0.7378, "step": 13356 }, { "epoch": 0.05913055026782948, "grad_norm": 2.7029564373027837, "learning_rate": 5.913055026782948e-06, "loss": 0.6598, "step": 13357 }, { "epoch": 0.05913497720129266, "grad_norm": 3.0281994716842107, "learning_rate": 5.913497720129266e-06, "loss": 0.9019, "step": 13358 }, { "epoch": 0.05913940413475585, "grad_norm": 2.586326983982685, "learning_rate": 5.913940413475586e-06, "loss": 0.7094, "step": 13359 }, { "epoch": 0.05914383106821904, "grad_norm": 3.062445091694374, "learning_rate": 5.914383106821905e-06, "loss": 0.9156, "step": 13360 }, { "epoch": 0.05914825800168223, "grad_norm": 2.4191851785481493, "learning_rate": 5.914825800168223e-06, "loss": 0.4492, "step": 13361 }, { "epoch": 0.05915268493514542, "grad_norm": 3.102923166623983, "learning_rate": 5.915268493514544e-06, "loss": 0.7953, "step": 13362 }, { "epoch": 0.059157111868608614, "grad_norm": 2.555204617307028, "learning_rate": 5.915711186860862e-06, "loss": 0.7455, "step": 13363 }, { "epoch": 0.059161538802071804, "grad_norm": 2.754665420312097, "learning_rate": 5.916153880207181e-06, "loss": 0.8885, "step": 13364 }, { "epoch": 0.059165965735534995, "grad_norm": 2.6713036718464953, "learning_rate": 5.9165965735535e-06, "loss": 0.8833, "step": 13365 }, { "epoch": 0.059170392668998185, "grad_norm": 3.122512707285682, "learning_rate": 5.9170392668998185e-06, "loss": 0.797, "step": 13366 }, { "epoch": 0.059174819602461376, "grad_norm": 2.512711883024853, "learning_rate": 5.917481960246138e-06, "loss": 0.7449, "step": 13367 }, { "epoch": 0.059179246535924566, "grad_norm": 2.8083072268850557, "learning_rate": 5.917924653592457e-06, "loss": 0.8703, "step": 13368 }, { "epoch": 0.05918367346938776, "grad_norm": 2.7486647157936317, "learning_rate": 5.918367346938776e-06, "loss": 0.7908, "step": 13369 }, { "epoch": 0.05918810040285095, "grad_norm": 3.5127214637697843, "learning_rate": 5.918810040285094e-06, "loss": 1.0759, "step": 13370 }, { "epoch": 0.05919252733631414, "grad_norm": 3.1524509153277633, "learning_rate": 5.9192527336314145e-06, "loss": 0.8216, "step": 13371 }, { "epoch": 0.05919695426977733, "grad_norm": 3.2460318859593373, "learning_rate": 5.919695426977733e-06, "loss": 1.0081, "step": 13372 }, { "epoch": 0.05920138120324051, "grad_norm": 2.1115762773867313, "learning_rate": 5.920138120324052e-06, "loss": 0.4373, "step": 13373 }, { "epoch": 0.0592058081367037, "grad_norm": 3.3567413970522786, "learning_rate": 5.920580813670371e-06, "loss": 0.5835, "step": 13374 }, { "epoch": 0.05921023507016689, "grad_norm": 2.541124032925638, "learning_rate": 5.92102350701669e-06, "loss": 0.6205, "step": 13375 }, { "epoch": 0.059214662003630084, "grad_norm": 2.1405748050809277, "learning_rate": 5.921466200363009e-06, "loss": 0.2986, "step": 13376 }, { "epoch": 0.059219088937093274, "grad_norm": 3.0406749055045617, "learning_rate": 5.921908893709328e-06, "loss": 0.8414, "step": 13377 }, { "epoch": 0.059223515870556465, "grad_norm": 3.3740349270326706, "learning_rate": 5.922351587055647e-06, "loss": 1.2049, "step": 13378 }, { "epoch": 0.059227942804019655, "grad_norm": 3.0440624303082866, "learning_rate": 5.922794280401965e-06, "loss": 0.6347, "step": 13379 }, { "epoch": 0.059232369737482846, "grad_norm": 2.861810628140162, "learning_rate": 5.9232369737482855e-06, "loss": 0.8893, "step": 13380 }, { "epoch": 0.059236796670946036, "grad_norm": 2.7418006784329685, "learning_rate": 5.923679667094604e-06, "loss": 0.77, "step": 13381 }, { "epoch": 0.05924122360440923, "grad_norm": 3.6010270306275274, "learning_rate": 5.9241223604409225e-06, "loss": 0.672, "step": 13382 }, { "epoch": 0.05924565053787242, "grad_norm": 2.295600844716302, "learning_rate": 5.924565053787243e-06, "loss": 0.7127, "step": 13383 }, { "epoch": 0.05925007747133561, "grad_norm": 3.342344204057721, "learning_rate": 5.925007747133561e-06, "loss": 0.7823, "step": 13384 }, { "epoch": 0.0592545044047988, "grad_norm": 2.537756915045048, "learning_rate": 5.92545044047988e-06, "loss": 0.7528, "step": 13385 }, { "epoch": 0.05925893133826199, "grad_norm": 3.3701655239273114, "learning_rate": 5.925893133826199e-06, "loss": 1.076, "step": 13386 }, { "epoch": 0.05926335827172518, "grad_norm": 2.374009555625774, "learning_rate": 5.926335827172518e-06, "loss": 0.7527, "step": 13387 }, { "epoch": 0.05926778520518836, "grad_norm": 2.716079092088397, "learning_rate": 5.926778520518836e-06, "loss": 0.9427, "step": 13388 }, { "epoch": 0.05927221213865155, "grad_norm": 2.6218237717159147, "learning_rate": 5.9272212138651564e-06, "loss": 0.5893, "step": 13389 }, { "epoch": 0.059276639072114744, "grad_norm": 2.6977531060138618, "learning_rate": 5.927663907211475e-06, "loss": 0.6288, "step": 13390 }, { "epoch": 0.059281066005577934, "grad_norm": 2.3080016754418082, "learning_rate": 5.9281066005577935e-06, "loss": 0.7407, "step": 13391 }, { "epoch": 0.059285492939041125, "grad_norm": 2.2710305512212043, "learning_rate": 5.928549293904114e-06, "loss": 0.4771, "step": 13392 }, { "epoch": 0.059289919872504315, "grad_norm": 2.6188498825311974, "learning_rate": 5.928991987250432e-06, "loss": 0.7394, "step": 13393 }, { "epoch": 0.059294346805967506, "grad_norm": 2.695726353387156, "learning_rate": 5.929434680596751e-06, "loss": 0.9281, "step": 13394 }, { "epoch": 0.059298773739430696, "grad_norm": 2.6534264887008345, "learning_rate": 5.92987737394307e-06, "loss": 0.5014, "step": 13395 }, { "epoch": 0.05930320067289389, "grad_norm": 2.801540392538502, "learning_rate": 5.930320067289389e-06, "loss": 0.987, "step": 13396 }, { "epoch": 0.05930762760635708, "grad_norm": 2.6980870027786334, "learning_rate": 5.930762760635708e-06, "loss": 0.6119, "step": 13397 }, { "epoch": 0.05931205453982027, "grad_norm": 3.0089897865319437, "learning_rate": 5.931205453982027e-06, "loss": 0.697, "step": 13398 }, { "epoch": 0.05931648147328346, "grad_norm": 2.707008788776108, "learning_rate": 5.931648147328346e-06, "loss": 0.8719, "step": 13399 }, { "epoch": 0.05932090840674665, "grad_norm": 2.4227484395668912, "learning_rate": 5.9320908406746644e-06, "loss": 0.6594, "step": 13400 }, { "epoch": 0.05932533534020984, "grad_norm": 3.6926585415322686, "learning_rate": 5.932533534020985e-06, "loss": 1.0944, "step": 13401 }, { "epoch": 0.05932976227367303, "grad_norm": 2.5699283375911888, "learning_rate": 5.932976227367303e-06, "loss": 0.6802, "step": 13402 }, { "epoch": 0.05933418920713621, "grad_norm": 2.2119657840253737, "learning_rate": 5.933418920713622e-06, "loss": 0.4329, "step": 13403 }, { "epoch": 0.059338616140599404, "grad_norm": 2.8171166905595606, "learning_rate": 5.933861614059941e-06, "loss": 0.5219, "step": 13404 }, { "epoch": 0.059343043074062594, "grad_norm": 2.660404673111868, "learning_rate": 5.9343043074062604e-06, "loss": 0.5985, "step": 13405 }, { "epoch": 0.059347470007525785, "grad_norm": 2.618944222734004, "learning_rate": 5.934747000752579e-06, "loss": 0.6612, "step": 13406 }, { "epoch": 0.059351896940988975, "grad_norm": 3.0033071762503325, "learning_rate": 5.935189694098898e-06, "loss": 0.7131, "step": 13407 }, { "epoch": 0.059356323874452166, "grad_norm": 2.773646615099749, "learning_rate": 5.935632387445217e-06, "loss": 0.6837, "step": 13408 }, { "epoch": 0.059360750807915356, "grad_norm": 2.367336839262611, "learning_rate": 5.936075080791535e-06, "loss": 0.7426, "step": 13409 }, { "epoch": 0.05936517774137855, "grad_norm": 3.020039875097594, "learning_rate": 5.936517774137856e-06, "loss": 0.5808, "step": 13410 }, { "epoch": 0.05936960467484174, "grad_norm": 2.970091810784572, "learning_rate": 5.936960467484174e-06, "loss": 0.9693, "step": 13411 }, { "epoch": 0.05937403160830493, "grad_norm": 2.4747081695666835, "learning_rate": 5.937403160830493e-06, "loss": 0.6027, "step": 13412 }, { "epoch": 0.05937845854176812, "grad_norm": 2.524498876759879, "learning_rate": 5.937845854176813e-06, "loss": 0.604, "step": 13413 }, { "epoch": 0.05938288547523131, "grad_norm": 2.6894499040757642, "learning_rate": 5.938288547523131e-06, "loss": 0.8087, "step": 13414 }, { "epoch": 0.0593873124086945, "grad_norm": 2.6229018182210235, "learning_rate": 5.93873124086945e-06, "loss": 0.6306, "step": 13415 }, { "epoch": 0.05939173934215769, "grad_norm": 2.709438724645706, "learning_rate": 5.939173934215769e-06, "loss": 0.6934, "step": 13416 }, { "epoch": 0.05939616627562088, "grad_norm": 3.744752009507207, "learning_rate": 5.939616627562088e-06, "loss": 0.8846, "step": 13417 }, { "epoch": 0.059400593209084064, "grad_norm": 2.664450960726663, "learning_rate": 5.940059320908407e-06, "loss": 0.8656, "step": 13418 }, { "epoch": 0.059405020142547255, "grad_norm": 2.82157273128452, "learning_rate": 5.9405020142547265e-06, "loss": 0.4836, "step": 13419 }, { "epoch": 0.059409447076010445, "grad_norm": 2.6447275121911202, "learning_rate": 5.940944707601045e-06, "loss": 0.5926, "step": 13420 }, { "epoch": 0.059413874009473636, "grad_norm": 2.5301162352768243, "learning_rate": 5.941387400947364e-06, "loss": 0.5794, "step": 13421 }, { "epoch": 0.059418300942936826, "grad_norm": 3.0864973080269924, "learning_rate": 5.941830094293684e-06, "loss": 0.981, "step": 13422 }, { "epoch": 0.05942272787640002, "grad_norm": 2.388775654869385, "learning_rate": 5.942272787640002e-06, "loss": 0.6744, "step": 13423 }, { "epoch": 0.05942715480986321, "grad_norm": 2.808862020660432, "learning_rate": 5.942715480986321e-06, "loss": 0.9532, "step": 13424 }, { "epoch": 0.0594315817433264, "grad_norm": 2.6347619013298056, "learning_rate": 5.94315817433264e-06, "loss": 0.5088, "step": 13425 }, { "epoch": 0.05943600867678959, "grad_norm": 4.097198147827391, "learning_rate": 5.943600867678959e-06, "loss": 0.9888, "step": 13426 }, { "epoch": 0.05944043561025278, "grad_norm": 2.3756213857866677, "learning_rate": 5.944043561025278e-06, "loss": 0.6358, "step": 13427 }, { "epoch": 0.05944486254371597, "grad_norm": 2.9391967916030897, "learning_rate": 5.9444862543715975e-06, "loss": 0.8368, "step": 13428 }, { "epoch": 0.05944928947717916, "grad_norm": 2.6238372635319887, "learning_rate": 5.944928947717916e-06, "loss": 0.8271, "step": 13429 }, { "epoch": 0.05945371641064235, "grad_norm": 4.145702674543941, "learning_rate": 5.9453716410642345e-06, "loss": 0.6553, "step": 13430 }, { "epoch": 0.05945814334410554, "grad_norm": 2.8281378971860747, "learning_rate": 5.945814334410555e-06, "loss": 0.8908, "step": 13431 }, { "epoch": 0.05946257027756873, "grad_norm": 2.8136070603624512, "learning_rate": 5.946257027756873e-06, "loss": 0.9691, "step": 13432 }, { "epoch": 0.059466997211031915, "grad_norm": 2.6045842299147717, "learning_rate": 5.946699721103192e-06, "loss": 0.8474, "step": 13433 }, { "epoch": 0.059471424144495105, "grad_norm": 2.4889108615601363, "learning_rate": 5.947142414449511e-06, "loss": 0.741, "step": 13434 }, { "epoch": 0.059475851077958296, "grad_norm": 2.9779621185898666, "learning_rate": 5.9475851077958305e-06, "loss": 0.871, "step": 13435 }, { "epoch": 0.059480278011421486, "grad_norm": 2.840630083916939, "learning_rate": 5.948027801142149e-06, "loss": 0.5842, "step": 13436 }, { "epoch": 0.05948470494488468, "grad_norm": 3.7749825921751685, "learning_rate": 5.9484704944884684e-06, "loss": 1.1401, "step": 13437 }, { "epoch": 0.05948913187834787, "grad_norm": 3.090260770074537, "learning_rate": 5.948913187834787e-06, "loss": 0.7048, "step": 13438 }, { "epoch": 0.05949355881181106, "grad_norm": 2.3953681580373987, "learning_rate": 5.9493558811811055e-06, "loss": 0.4719, "step": 13439 }, { "epoch": 0.05949798574527425, "grad_norm": 3.0122339987292466, "learning_rate": 5.949798574527426e-06, "loss": 1.0022, "step": 13440 }, { "epoch": 0.05950241267873744, "grad_norm": 2.10611437277737, "learning_rate": 5.950241267873744e-06, "loss": 0.5984, "step": 13441 }, { "epoch": 0.05950683961220063, "grad_norm": 2.510182953257277, "learning_rate": 5.950683961220063e-06, "loss": 0.9832, "step": 13442 }, { "epoch": 0.05951126654566382, "grad_norm": 2.6552614592923875, "learning_rate": 5.951126654566383e-06, "loss": 0.4817, "step": 13443 }, { "epoch": 0.05951569347912701, "grad_norm": 2.6211562667756167, "learning_rate": 5.9515693479127015e-06, "loss": 0.5821, "step": 13444 }, { "epoch": 0.0595201204125902, "grad_norm": 2.602564135228965, "learning_rate": 5.95201204125902e-06, "loss": 0.5215, "step": 13445 }, { "epoch": 0.05952454734605339, "grad_norm": 2.8601440401099163, "learning_rate": 5.952454734605339e-06, "loss": 0.8236, "step": 13446 }, { "epoch": 0.05952897427951658, "grad_norm": 2.6493915570635584, "learning_rate": 5.952897427951658e-06, "loss": 0.6387, "step": 13447 }, { "epoch": 0.059533401212979765, "grad_norm": 3.0129175187570483, "learning_rate": 5.953340121297977e-06, "loss": 0.4876, "step": 13448 }, { "epoch": 0.059537828146442956, "grad_norm": 2.6401104894277245, "learning_rate": 5.953782814644297e-06, "loss": 0.764, "step": 13449 }, { "epoch": 0.059542255079906146, "grad_norm": 3.2841031252670723, "learning_rate": 5.954225507990615e-06, "loss": 1.0331, "step": 13450 }, { "epoch": 0.05954668201336934, "grad_norm": 2.5685157833188206, "learning_rate": 5.954668201336934e-06, "loss": 0.7229, "step": 13451 }, { "epoch": 0.05955110894683253, "grad_norm": 2.713237748787142, "learning_rate": 5.955110894683254e-06, "loss": 0.755, "step": 13452 }, { "epoch": 0.05955553588029572, "grad_norm": 2.8530867982017893, "learning_rate": 5.9555535880295724e-06, "loss": 0.981, "step": 13453 }, { "epoch": 0.05955996281375891, "grad_norm": 2.3420135251678853, "learning_rate": 5.955996281375891e-06, "loss": 0.4783, "step": 13454 }, { "epoch": 0.0595643897472221, "grad_norm": 2.6352367391086604, "learning_rate": 5.95643897472221e-06, "loss": 0.7636, "step": 13455 }, { "epoch": 0.05956881668068529, "grad_norm": 2.9426615866398707, "learning_rate": 5.95688166806853e-06, "loss": 0.6136, "step": 13456 }, { "epoch": 0.05957324361414848, "grad_norm": 3.7045577054810863, "learning_rate": 5.957324361414848e-06, "loss": 1.0373, "step": 13457 }, { "epoch": 0.05957767054761167, "grad_norm": 2.441131308791221, "learning_rate": 5.957767054761168e-06, "loss": 0.5446, "step": 13458 }, { "epoch": 0.05958209748107486, "grad_norm": 2.1790429649297733, "learning_rate": 5.958209748107486e-06, "loss": 0.4205, "step": 13459 }, { "epoch": 0.05958652441453805, "grad_norm": 2.6165985397295413, "learning_rate": 5.958652441453805e-06, "loss": 0.8262, "step": 13460 }, { "epoch": 0.05959095134800124, "grad_norm": 3.1048505631779832, "learning_rate": 5.959095134800125e-06, "loss": 0.9777, "step": 13461 }, { "epoch": 0.05959537828146443, "grad_norm": 2.2867851122332103, "learning_rate": 5.959537828146443e-06, "loss": 0.7277, "step": 13462 }, { "epoch": 0.05959980521492762, "grad_norm": 2.4177401968660948, "learning_rate": 5.959980521492762e-06, "loss": 0.5805, "step": 13463 }, { "epoch": 0.05960423214839081, "grad_norm": 2.3169808998926205, "learning_rate": 5.960423214839081e-06, "loss": 0.6693, "step": 13464 }, { "epoch": 0.059608659081854, "grad_norm": 4.050153561410578, "learning_rate": 5.960865908185401e-06, "loss": 0.9493, "step": 13465 }, { "epoch": 0.05961308601531719, "grad_norm": 2.4577833495193375, "learning_rate": 5.961308601531719e-06, "loss": 0.4662, "step": 13466 }, { "epoch": 0.05961751294878038, "grad_norm": 2.974214820173577, "learning_rate": 5.9617512948780385e-06, "loss": 0.9662, "step": 13467 }, { "epoch": 0.05962193988224357, "grad_norm": 2.5631093764550883, "learning_rate": 5.962193988224357e-06, "loss": 0.5992, "step": 13468 }, { "epoch": 0.05962636681570676, "grad_norm": 3.1890684010741537, "learning_rate": 5.962636681570676e-06, "loss": 0.9151, "step": 13469 }, { "epoch": 0.05963079374916995, "grad_norm": 3.598788884654516, "learning_rate": 5.963079374916996e-06, "loss": 1.0909, "step": 13470 }, { "epoch": 0.05963522068263314, "grad_norm": 3.3207248773753837, "learning_rate": 5.963522068263314e-06, "loss": 1.1556, "step": 13471 }, { "epoch": 0.05963964761609633, "grad_norm": 3.2663673406991243, "learning_rate": 5.963964761609633e-06, "loss": 1.2199, "step": 13472 }, { "epoch": 0.05964407454955952, "grad_norm": 2.4999348460983586, "learning_rate": 5.964407454955953e-06, "loss": 0.628, "step": 13473 }, { "epoch": 0.05964850148302271, "grad_norm": 3.006124481417663, "learning_rate": 5.964850148302272e-06, "loss": 0.6011, "step": 13474 }, { "epoch": 0.0596529284164859, "grad_norm": 2.53953165408563, "learning_rate": 5.96529284164859e-06, "loss": 0.796, "step": 13475 }, { "epoch": 0.05965735534994909, "grad_norm": 3.217825446528983, "learning_rate": 5.9657355349949095e-06, "loss": 0.7856, "step": 13476 }, { "epoch": 0.05966178228341228, "grad_norm": 2.837367942570079, "learning_rate": 5.966178228341228e-06, "loss": 0.7271, "step": 13477 }, { "epoch": 0.059666209216875474, "grad_norm": 3.0290275405937153, "learning_rate": 5.966620921687547e-06, "loss": 0.6191, "step": 13478 }, { "epoch": 0.05967063615033866, "grad_norm": 2.67663891874428, "learning_rate": 5.967063615033867e-06, "loss": 0.6759, "step": 13479 }, { "epoch": 0.05967506308380185, "grad_norm": 2.3441793866263474, "learning_rate": 5.967506308380185e-06, "loss": 0.6546, "step": 13480 }, { "epoch": 0.05967949001726504, "grad_norm": 2.597307665959658, "learning_rate": 5.967949001726504e-06, "loss": 0.622, "step": 13481 }, { "epoch": 0.05968391695072823, "grad_norm": 2.214673366015226, "learning_rate": 5.968391695072824e-06, "loss": 0.5101, "step": 13482 }, { "epoch": 0.05968834388419142, "grad_norm": 3.4614572823541185, "learning_rate": 5.9688343884191425e-06, "loss": 1.0736, "step": 13483 }, { "epoch": 0.05969277081765461, "grad_norm": 2.5649626449402176, "learning_rate": 5.969277081765461e-06, "loss": 0.7531, "step": 13484 }, { "epoch": 0.0596971977511178, "grad_norm": 2.496027497660614, "learning_rate": 5.9697197751117804e-06, "loss": 0.5436, "step": 13485 }, { "epoch": 0.05970162468458099, "grad_norm": 2.9038110442997445, "learning_rate": 5.9701624684581e-06, "loss": 0.5677, "step": 13486 }, { "epoch": 0.05970605161804418, "grad_norm": 3.6590011204024635, "learning_rate": 5.970605161804418e-06, "loss": 0.9276, "step": 13487 }, { "epoch": 0.05971047855150737, "grad_norm": 2.1869983168143445, "learning_rate": 5.971047855150738e-06, "loss": 0.5257, "step": 13488 }, { "epoch": 0.05971490548497056, "grad_norm": 3.024255460488428, "learning_rate": 5.971490548497056e-06, "loss": 0.8152, "step": 13489 }, { "epoch": 0.05971933241843375, "grad_norm": 2.58711781318969, "learning_rate": 5.971933241843375e-06, "loss": 0.6737, "step": 13490 }, { "epoch": 0.05972375935189694, "grad_norm": 2.8668165826600225, "learning_rate": 5.972375935189695e-06, "loss": 0.7708, "step": 13491 }, { "epoch": 0.059728186285360134, "grad_norm": 3.4053406805125386, "learning_rate": 5.9728186285360135e-06, "loss": 0.5599, "step": 13492 }, { "epoch": 0.059732613218823324, "grad_norm": 2.807169466421697, "learning_rate": 5.973261321882332e-06, "loss": 0.636, "step": 13493 }, { "epoch": 0.05973704015228651, "grad_norm": 2.6882193452496197, "learning_rate": 5.973704015228652e-06, "loss": 0.8982, "step": 13494 }, { "epoch": 0.0597414670857497, "grad_norm": 3.0962151289713864, "learning_rate": 5.974146708574971e-06, "loss": 0.787, "step": 13495 }, { "epoch": 0.05974589401921289, "grad_norm": 2.856670207363813, "learning_rate": 5.974589401921289e-06, "loss": 0.9995, "step": 13496 }, { "epoch": 0.05975032095267608, "grad_norm": 2.8277439412898104, "learning_rate": 5.975032095267609e-06, "loss": 0.6346, "step": 13497 }, { "epoch": 0.05975474788613927, "grad_norm": 3.2191030041671223, "learning_rate": 5.975474788613927e-06, "loss": 1.0076, "step": 13498 }, { "epoch": 0.05975917481960246, "grad_norm": 2.7192279308660425, "learning_rate": 5.975917481960246e-06, "loss": 0.9009, "step": 13499 }, { "epoch": 0.05976360175306565, "grad_norm": 2.813279658314807, "learning_rate": 5.976360175306566e-06, "loss": 0.9775, "step": 13500 }, { "epoch": 0.05976802868652884, "grad_norm": 2.9546319531298835, "learning_rate": 5.9768028686528844e-06, "loss": 0.7506, "step": 13501 }, { "epoch": 0.05977245561999203, "grad_norm": 2.5225601565968208, "learning_rate": 5.977245561999203e-06, "loss": 0.633, "step": 13502 }, { "epoch": 0.05977688255345522, "grad_norm": 3.628086659669066, "learning_rate": 5.977688255345523e-06, "loss": 0.7753, "step": 13503 }, { "epoch": 0.05978130948691841, "grad_norm": 2.712174383601387, "learning_rate": 5.978130948691842e-06, "loss": 0.6038, "step": 13504 }, { "epoch": 0.059785736420381604, "grad_norm": 2.767876733720094, "learning_rate": 5.97857364203816e-06, "loss": 0.649, "step": 13505 }, { "epoch": 0.059790163353844794, "grad_norm": 2.3494852822743466, "learning_rate": 5.97901633538448e-06, "loss": 0.6809, "step": 13506 }, { "epoch": 0.059794590287307985, "grad_norm": 2.891135219406741, "learning_rate": 5.979459028730798e-06, "loss": 0.4077, "step": 13507 }, { "epoch": 0.059799017220771175, "grad_norm": 3.2756462884617807, "learning_rate": 5.9799017220771175e-06, "loss": 0.777, "step": 13508 }, { "epoch": 0.05980344415423436, "grad_norm": 2.2126601598694164, "learning_rate": 5.980344415423437e-06, "loss": 0.617, "step": 13509 }, { "epoch": 0.05980787108769755, "grad_norm": 3.325030138964809, "learning_rate": 5.980787108769755e-06, "loss": 0.9478, "step": 13510 }, { "epoch": 0.05981229802116074, "grad_norm": 2.741171675521277, "learning_rate": 5.981229802116074e-06, "loss": 0.7072, "step": 13511 }, { "epoch": 0.05981672495462393, "grad_norm": 2.441154528075578, "learning_rate": 5.981672495462394e-06, "loss": 0.8025, "step": 13512 }, { "epoch": 0.05982115188808712, "grad_norm": 3.433355069699887, "learning_rate": 5.982115188808713e-06, "loss": 0.8057, "step": 13513 }, { "epoch": 0.05982557882155031, "grad_norm": 3.0058324436064106, "learning_rate": 5.982557882155031e-06, "loss": 1.2535, "step": 13514 }, { "epoch": 0.0598300057550135, "grad_norm": 3.8428337614517254, "learning_rate": 5.9830005755013506e-06, "loss": 0.9866, "step": 13515 }, { "epoch": 0.05983443268847669, "grad_norm": 2.7577893208180595, "learning_rate": 5.98344326884767e-06, "loss": 0.591, "step": 13516 }, { "epoch": 0.05983885962193988, "grad_norm": 2.648320573321822, "learning_rate": 5.9838859621939884e-06, "loss": 0.8456, "step": 13517 }, { "epoch": 0.05984328655540307, "grad_norm": 2.798521494783693, "learning_rate": 5.984328655540308e-06, "loss": 0.9288, "step": 13518 }, { "epoch": 0.059847713488866264, "grad_norm": 2.720030051245705, "learning_rate": 5.984771348886626e-06, "loss": 0.7045, "step": 13519 }, { "epoch": 0.059852140422329454, "grad_norm": 2.3074128142205783, "learning_rate": 5.985214042232945e-06, "loss": 0.8164, "step": 13520 }, { "epoch": 0.059856567355792645, "grad_norm": 2.158391367826424, "learning_rate": 5.985656735579265e-06, "loss": 0.5694, "step": 13521 }, { "epoch": 0.059860994289255835, "grad_norm": 2.2997136151720503, "learning_rate": 5.986099428925584e-06, "loss": 0.6562, "step": 13522 }, { "epoch": 0.059865421222719026, "grad_norm": 2.67200286943447, "learning_rate": 5.986542122271902e-06, "loss": 0.7742, "step": 13523 }, { "epoch": 0.05986984815618221, "grad_norm": 3.2037233144669623, "learning_rate": 5.986984815618222e-06, "loss": 0.87, "step": 13524 }, { "epoch": 0.0598742750896454, "grad_norm": 2.6434175857097424, "learning_rate": 5.987427508964541e-06, "loss": 0.6955, "step": 13525 }, { "epoch": 0.05987870202310859, "grad_norm": 2.4906441791820666, "learning_rate": 5.987870202310859e-06, "loss": 0.8361, "step": 13526 }, { "epoch": 0.05988312895657178, "grad_norm": 3.0234874829738274, "learning_rate": 5.988312895657179e-06, "loss": 0.7429, "step": 13527 }, { "epoch": 0.05988755589003497, "grad_norm": 2.573060275769742, "learning_rate": 5.988755589003497e-06, "loss": 0.7426, "step": 13528 }, { "epoch": 0.05989198282349816, "grad_norm": 2.7309416607047576, "learning_rate": 5.989198282349817e-06, "loss": 0.5546, "step": 13529 }, { "epoch": 0.05989640975696135, "grad_norm": 4.160653047606005, "learning_rate": 5.989640975696136e-06, "loss": 0.9892, "step": 13530 }, { "epoch": 0.05990083669042454, "grad_norm": 3.1076023150371834, "learning_rate": 5.9900836690424546e-06, "loss": 0.956, "step": 13531 }, { "epoch": 0.05990526362388773, "grad_norm": 3.2170242062725434, "learning_rate": 5.990526362388773e-06, "loss": 0.7965, "step": 13532 }, { "epoch": 0.059909690557350924, "grad_norm": 3.9801281700931823, "learning_rate": 5.990969055735093e-06, "loss": 1.287, "step": 13533 }, { "epoch": 0.059914117490814114, "grad_norm": 2.6380359182733404, "learning_rate": 5.991411749081412e-06, "loss": 0.6224, "step": 13534 }, { "epoch": 0.059918544424277305, "grad_norm": 2.7038137239323135, "learning_rate": 5.99185444242773e-06, "loss": 0.5731, "step": 13535 }, { "epoch": 0.059922971357740495, "grad_norm": 3.207354032051531, "learning_rate": 5.99229713577405e-06, "loss": 0.824, "step": 13536 }, { "epoch": 0.059927398291203686, "grad_norm": 2.882869057424665, "learning_rate": 5.992739829120369e-06, "loss": 0.8833, "step": 13537 }, { "epoch": 0.059931825224666876, "grad_norm": 2.605904782382117, "learning_rate": 5.993182522466688e-06, "loss": 0.7728, "step": 13538 }, { "epoch": 0.05993625215813006, "grad_norm": 2.888239748866629, "learning_rate": 5.993625215813007e-06, "loss": 0.8584, "step": 13539 }, { "epoch": 0.05994067909159325, "grad_norm": 1.9279705389813038, "learning_rate": 5.9940679091593255e-06, "loss": 0.491, "step": 13540 }, { "epoch": 0.05994510602505644, "grad_norm": 2.657386623343238, "learning_rate": 5.994510602505644e-06, "loss": 0.9364, "step": 13541 }, { "epoch": 0.05994953295851963, "grad_norm": 2.3462599654446037, "learning_rate": 5.994953295851964e-06, "loss": 0.669, "step": 13542 }, { "epoch": 0.05995395989198282, "grad_norm": 3.9469170846468056, "learning_rate": 5.995395989198283e-06, "loss": 1.4611, "step": 13543 }, { "epoch": 0.05995838682544601, "grad_norm": 3.4562263166777436, "learning_rate": 5.995838682544601e-06, "loss": 0.9461, "step": 13544 }, { "epoch": 0.0599628137589092, "grad_norm": 2.9027906897338553, "learning_rate": 5.996281375890921e-06, "loss": 0.6857, "step": 13545 }, { "epoch": 0.059967240692372394, "grad_norm": 2.8704137839659793, "learning_rate": 5.99672406923724e-06, "loss": 0.609, "step": 13546 }, { "epoch": 0.059971667625835584, "grad_norm": 2.8174874615040215, "learning_rate": 5.9971667625835586e-06, "loss": 0.5215, "step": 13547 }, { "epoch": 0.059976094559298775, "grad_norm": 2.584952099105565, "learning_rate": 5.997609455929878e-06, "loss": 0.8448, "step": 13548 }, { "epoch": 0.059980521492761965, "grad_norm": 3.1503921044274414, "learning_rate": 5.9980521492761964e-06, "loss": 0.9928, "step": 13549 }, { "epoch": 0.059984948426225156, "grad_norm": 2.2471257344962043, "learning_rate": 5.998494842622515e-06, "loss": 0.4536, "step": 13550 }, { "epoch": 0.059989375359688346, "grad_norm": 2.54102495477695, "learning_rate": 5.998937535968835e-06, "loss": 0.7608, "step": 13551 }, { "epoch": 0.05999380229315154, "grad_norm": 2.898024428256599, "learning_rate": 5.999380229315154e-06, "loss": 0.5509, "step": 13552 }, { "epoch": 0.05999822922661473, "grad_norm": 3.0312095840171325, "learning_rate": 5.999822922661472e-06, "loss": 0.6582, "step": 13553 }, { "epoch": 0.06000265616007791, "grad_norm": 2.7423446528225854, "learning_rate": 6.0002656160077925e-06, "loss": 0.7902, "step": 13554 }, { "epoch": 0.0600070830935411, "grad_norm": 2.888676017451085, "learning_rate": 6.000708309354111e-06, "loss": 0.6965, "step": 13555 }, { "epoch": 0.06001151002700429, "grad_norm": 2.8612675425133034, "learning_rate": 6.0011510027004295e-06, "loss": 0.9984, "step": 13556 }, { "epoch": 0.06001593696046748, "grad_norm": 2.4291520508630335, "learning_rate": 6.001593696046749e-06, "loss": 0.5658, "step": 13557 }, { "epoch": 0.06002036389393067, "grad_norm": 2.680957916110708, "learning_rate": 6.002036389393067e-06, "loss": 0.7382, "step": 13558 }, { "epoch": 0.06002479082739386, "grad_norm": 2.850807365988333, "learning_rate": 6.002479082739387e-06, "loss": 0.6904, "step": 13559 }, { "epoch": 0.060029217760857054, "grad_norm": 2.7802129347588354, "learning_rate": 6.002921776085706e-06, "loss": 0.9835, "step": 13560 }, { "epoch": 0.060033644694320244, "grad_norm": 3.159598619877941, "learning_rate": 6.003364469432025e-06, "loss": 0.9949, "step": 13561 }, { "epoch": 0.060038071627783435, "grad_norm": 2.724955414911723, "learning_rate": 6.003807162778343e-06, "loss": 1.1286, "step": 13562 }, { "epoch": 0.060042498561246625, "grad_norm": 2.537980385042789, "learning_rate": 6.004249856124663e-06, "loss": 0.8098, "step": 13563 }, { "epoch": 0.060046925494709816, "grad_norm": 2.5359580509483908, "learning_rate": 6.004692549470982e-06, "loss": 0.7443, "step": 13564 }, { "epoch": 0.060051352428173006, "grad_norm": 2.248738991436568, "learning_rate": 6.0051352428173004e-06, "loss": 0.5842, "step": 13565 }, { "epoch": 0.0600557793616362, "grad_norm": 2.6014909803488884, "learning_rate": 6.00557793616362e-06, "loss": 0.5844, "step": 13566 }, { "epoch": 0.06006020629509939, "grad_norm": 3.297931231597245, "learning_rate": 6.006020629509939e-06, "loss": 0.8597, "step": 13567 }, { "epoch": 0.06006463322856258, "grad_norm": 3.5639496061406057, "learning_rate": 6.006463322856258e-06, "loss": 0.8254, "step": 13568 }, { "epoch": 0.06006906016202576, "grad_norm": 3.128593915609716, "learning_rate": 6.006906016202577e-06, "loss": 0.8023, "step": 13569 }, { "epoch": 0.06007348709548895, "grad_norm": 2.618191437314359, "learning_rate": 6.007348709548896e-06, "loss": 0.7898, "step": 13570 }, { "epoch": 0.06007791402895214, "grad_norm": 2.507625264423145, "learning_rate": 6.007791402895214e-06, "loss": 0.6178, "step": 13571 }, { "epoch": 0.06008234096241533, "grad_norm": 3.48074370270601, "learning_rate": 6.008234096241534e-06, "loss": 1.0362, "step": 13572 }, { "epoch": 0.06008676789587852, "grad_norm": 3.6998496557884817, "learning_rate": 6.008676789587853e-06, "loss": 0.9667, "step": 13573 }, { "epoch": 0.060091194829341714, "grad_norm": 2.703263597131395, "learning_rate": 6.009119482934171e-06, "loss": 0.881, "step": 13574 }, { "epoch": 0.060095621762804904, "grad_norm": 2.767608677360256, "learning_rate": 6.009562176280492e-06, "loss": 0.7122, "step": 13575 }, { "epoch": 0.060100048696268095, "grad_norm": 2.326949274710389, "learning_rate": 6.01000486962681e-06, "loss": 0.6356, "step": 13576 }, { "epoch": 0.060104475629731285, "grad_norm": 3.005867417990424, "learning_rate": 6.010447562973129e-06, "loss": 0.8957, "step": 13577 }, { "epoch": 0.060108902563194476, "grad_norm": 2.711830374964046, "learning_rate": 6.010890256319448e-06, "loss": 0.6621, "step": 13578 }, { "epoch": 0.060113329496657666, "grad_norm": 2.253262064618646, "learning_rate": 6.0113329496657666e-06, "loss": 0.5226, "step": 13579 }, { "epoch": 0.06011775643012086, "grad_norm": 2.7026881929137647, "learning_rate": 6.011775643012085e-06, "loss": 0.8077, "step": 13580 }, { "epoch": 0.06012218336358405, "grad_norm": 2.1763089190415505, "learning_rate": 6.012218336358405e-06, "loss": 0.4356, "step": 13581 }, { "epoch": 0.06012661029704724, "grad_norm": 3.5692749709452873, "learning_rate": 6.012661029704724e-06, "loss": 0.8087, "step": 13582 }, { "epoch": 0.06013103723051043, "grad_norm": 3.5042090188727646, "learning_rate": 6.013103723051042e-06, "loss": 0.7451, "step": 13583 }, { "epoch": 0.06013546416397361, "grad_norm": 2.6872943918776744, "learning_rate": 6.0135464163973626e-06, "loss": 0.724, "step": 13584 }, { "epoch": 0.0601398910974368, "grad_norm": 2.897501436676855, "learning_rate": 6.013989109743681e-06, "loss": 0.9126, "step": 13585 }, { "epoch": 0.06014431803089999, "grad_norm": 2.564491518347659, "learning_rate": 6.01443180309e-06, "loss": 0.8862, "step": 13586 }, { "epoch": 0.060148744964363184, "grad_norm": 2.651845166154231, "learning_rate": 6.014874496436319e-06, "loss": 0.7478, "step": 13587 }, { "epoch": 0.060153171897826374, "grad_norm": 2.9581470649346318, "learning_rate": 6.0153171897826375e-06, "loss": 0.5572, "step": 13588 }, { "epoch": 0.060157598831289565, "grad_norm": 3.291199487663047, "learning_rate": 6.015759883128957e-06, "loss": 1.1117, "step": 13589 }, { "epoch": 0.060162025764752755, "grad_norm": 3.1359786821340827, "learning_rate": 6.016202576475276e-06, "loss": 0.9131, "step": 13590 }, { "epoch": 0.060166452698215946, "grad_norm": 3.532249335545456, "learning_rate": 6.016645269821595e-06, "loss": 1.045, "step": 13591 }, { "epoch": 0.060170879631679136, "grad_norm": 3.9758994677049992, "learning_rate": 6.017087963167913e-06, "loss": 1.2168, "step": 13592 }, { "epoch": 0.06017530656514233, "grad_norm": 2.497685958260543, "learning_rate": 6.0175306565142335e-06, "loss": 0.804, "step": 13593 }, { "epoch": 0.06017973349860552, "grad_norm": 2.6422600848540716, "learning_rate": 6.017973349860552e-06, "loss": 0.5581, "step": 13594 }, { "epoch": 0.06018416043206871, "grad_norm": 2.204107770447693, "learning_rate": 6.0184160432068706e-06, "loss": 0.503, "step": 13595 }, { "epoch": 0.0601885873655319, "grad_norm": 2.8543326489364484, "learning_rate": 6.01885873655319e-06, "loss": 0.9607, "step": 13596 }, { "epoch": 0.06019301429899509, "grad_norm": 3.520128639606571, "learning_rate": 6.019301429899509e-06, "loss": 1.0259, "step": 13597 }, { "epoch": 0.06019744123245828, "grad_norm": 3.10644775070359, "learning_rate": 6.019744123245828e-06, "loss": 1.1886, "step": 13598 }, { "epoch": 0.06020186816592147, "grad_norm": 2.9543634075092284, "learning_rate": 6.020186816592147e-06, "loss": 0.8543, "step": 13599 }, { "epoch": 0.06020629509938465, "grad_norm": 2.849221506665164, "learning_rate": 6.020629509938466e-06, "loss": 0.897, "step": 13600 }, { "epoch": 0.060210722032847844, "grad_norm": 2.7368352850280875, "learning_rate": 6.021072203284784e-06, "loss": 0.7122, "step": 13601 }, { "epoch": 0.060215148966311034, "grad_norm": 2.931127921945487, "learning_rate": 6.0215148966311045e-06, "loss": 0.9815, "step": 13602 }, { "epoch": 0.060219575899774225, "grad_norm": 2.9876567430635146, "learning_rate": 6.021957589977423e-06, "loss": 0.9144, "step": 13603 }, { "epoch": 0.060224002833237415, "grad_norm": 3.102233010641512, "learning_rate": 6.0224002833237415e-06, "loss": 1.0207, "step": 13604 }, { "epoch": 0.060228429766700606, "grad_norm": 2.8411164712725667, "learning_rate": 6.022842976670062e-06, "loss": 0.9863, "step": 13605 }, { "epoch": 0.060232856700163796, "grad_norm": 3.038875968690137, "learning_rate": 6.02328567001638e-06, "loss": 0.939, "step": 13606 }, { "epoch": 0.06023728363362699, "grad_norm": 3.3360849862118305, "learning_rate": 6.023728363362699e-06, "loss": 0.9321, "step": 13607 }, { "epoch": 0.06024171056709018, "grad_norm": 2.8105247312248958, "learning_rate": 6.024171056709018e-06, "loss": 0.8568, "step": 13608 }, { "epoch": 0.06024613750055337, "grad_norm": 2.451400749527919, "learning_rate": 6.024613750055337e-06, "loss": 0.4434, "step": 13609 }, { "epoch": 0.06025056443401656, "grad_norm": 2.5164766038599815, "learning_rate": 6.025056443401656e-06, "loss": 0.4667, "step": 13610 }, { "epoch": 0.06025499136747975, "grad_norm": 2.284103218881201, "learning_rate": 6.025499136747975e-06, "loss": 0.6766, "step": 13611 }, { "epoch": 0.06025941830094294, "grad_norm": 3.7456155353247773, "learning_rate": 6.025941830094294e-06, "loss": 0.8409, "step": 13612 }, { "epoch": 0.06026384523440613, "grad_norm": 2.514265286973654, "learning_rate": 6.0263845234406124e-06, "loss": 0.7312, "step": 13613 }, { "epoch": 0.06026827216786932, "grad_norm": 2.591989111232129, "learning_rate": 6.026827216786933e-06, "loss": 0.8629, "step": 13614 }, { "epoch": 0.060272699101332504, "grad_norm": 1.9921742775375446, "learning_rate": 6.027269910133251e-06, "loss": 0.4875, "step": 13615 }, { "epoch": 0.060277126034795694, "grad_norm": 2.838655874622026, "learning_rate": 6.02771260347957e-06, "loss": 0.8973, "step": 13616 }, { "epoch": 0.060281552968258885, "grad_norm": 2.56420930522619, "learning_rate": 6.028155296825889e-06, "loss": 0.805, "step": 13617 }, { "epoch": 0.060285979901722075, "grad_norm": 2.8414014622826884, "learning_rate": 6.0285979901722085e-06, "loss": 0.6384, "step": 13618 }, { "epoch": 0.060290406835185266, "grad_norm": 2.757282544676645, "learning_rate": 6.029040683518527e-06, "loss": 0.8728, "step": 13619 }, { "epoch": 0.060294833768648456, "grad_norm": 3.4184210128819976, "learning_rate": 6.029483376864846e-06, "loss": 1.1899, "step": 13620 }, { "epoch": 0.06029926070211165, "grad_norm": 2.9841087729183395, "learning_rate": 6.029926070211165e-06, "loss": 0.8852, "step": 13621 }, { "epoch": 0.06030368763557484, "grad_norm": 3.4526490953835, "learning_rate": 6.030368763557483e-06, "loss": 1.2172, "step": 13622 }, { "epoch": 0.06030811456903803, "grad_norm": 2.9462204554596134, "learning_rate": 6.030811456903804e-06, "loss": 0.9383, "step": 13623 }, { "epoch": 0.06031254150250122, "grad_norm": 2.6006505337880137, "learning_rate": 6.031254150250122e-06, "loss": 0.7289, "step": 13624 }, { "epoch": 0.06031696843596441, "grad_norm": 2.905453635490055, "learning_rate": 6.031696843596441e-06, "loss": 0.6248, "step": 13625 }, { "epoch": 0.0603213953694276, "grad_norm": 3.088319561118161, "learning_rate": 6.03213953694276e-06, "loss": 1.041, "step": 13626 }, { "epoch": 0.06032582230289079, "grad_norm": 2.9019532360104106, "learning_rate": 6.032582230289079e-06, "loss": 0.7317, "step": 13627 }, { "epoch": 0.06033024923635398, "grad_norm": 2.417401075903868, "learning_rate": 6.033024923635398e-06, "loss": 0.5362, "step": 13628 }, { "epoch": 0.06033467616981717, "grad_norm": 3.006536087413727, "learning_rate": 6.033467616981717e-06, "loss": 0.9869, "step": 13629 }, { "epoch": 0.060339103103280355, "grad_norm": 2.4766267148171415, "learning_rate": 6.033910310328036e-06, "loss": 0.5597, "step": 13630 }, { "epoch": 0.060343530036743545, "grad_norm": 3.6524180084698843, "learning_rate": 6.034353003674354e-06, "loss": 1.1136, "step": 13631 }, { "epoch": 0.060347956970206736, "grad_norm": 2.617996830567852, "learning_rate": 6.0347956970206746e-06, "loss": 0.7141, "step": 13632 }, { "epoch": 0.060352383903669926, "grad_norm": 3.187102291572472, "learning_rate": 6.035238390366993e-06, "loss": 0.8711, "step": 13633 }, { "epoch": 0.06035681083713312, "grad_norm": 2.2421775893789833, "learning_rate": 6.035681083713312e-06, "loss": 0.5954, "step": 13634 }, { "epoch": 0.06036123777059631, "grad_norm": 2.721239275035873, "learning_rate": 6.036123777059632e-06, "loss": 0.5593, "step": 13635 }, { "epoch": 0.0603656647040595, "grad_norm": 3.9141949624937085, "learning_rate": 6.03656647040595e-06, "loss": 0.639, "step": 13636 }, { "epoch": 0.06037009163752269, "grad_norm": 2.889228219358191, "learning_rate": 6.037009163752269e-06, "loss": 0.7835, "step": 13637 }, { "epoch": 0.06037451857098588, "grad_norm": 2.5263769713804853, "learning_rate": 6.037451857098588e-06, "loss": 0.7142, "step": 13638 }, { "epoch": 0.06037894550444907, "grad_norm": 2.9162450611771704, "learning_rate": 6.037894550444907e-06, "loss": 1.0465, "step": 13639 }, { "epoch": 0.06038337243791226, "grad_norm": 2.150553982327094, "learning_rate": 6.038337243791226e-06, "loss": 0.582, "step": 13640 }, { "epoch": 0.06038779937137545, "grad_norm": 2.478897835627452, "learning_rate": 6.0387799371375455e-06, "loss": 0.675, "step": 13641 }, { "epoch": 0.06039222630483864, "grad_norm": 2.755324739071405, "learning_rate": 6.039222630483864e-06, "loss": 0.5503, "step": 13642 }, { "epoch": 0.06039665323830183, "grad_norm": 2.606270890560882, "learning_rate": 6.0396653238301826e-06, "loss": 0.6046, "step": 13643 }, { "epoch": 0.06040108017176502, "grad_norm": 3.01024874510133, "learning_rate": 6.040108017176503e-06, "loss": 0.9805, "step": 13644 }, { "epoch": 0.060405507105228205, "grad_norm": 2.59583063162985, "learning_rate": 6.040550710522821e-06, "loss": 0.9351, "step": 13645 }, { "epoch": 0.060409934038691396, "grad_norm": 2.4619134585317006, "learning_rate": 6.04099340386914e-06, "loss": 0.5861, "step": 13646 }, { "epoch": 0.060414360972154586, "grad_norm": 3.2488041870827518, "learning_rate": 6.041436097215459e-06, "loss": 0.7484, "step": 13647 }, { "epoch": 0.06041878790561778, "grad_norm": 2.8752604226793217, "learning_rate": 6.0418787905617786e-06, "loss": 0.7806, "step": 13648 }, { "epoch": 0.06042321483908097, "grad_norm": 3.08234322049147, "learning_rate": 6.042321483908097e-06, "loss": 0.5875, "step": 13649 }, { "epoch": 0.06042764177254416, "grad_norm": 3.170577476724892, "learning_rate": 6.0427641772544165e-06, "loss": 1.0197, "step": 13650 }, { "epoch": 0.06043206870600735, "grad_norm": 3.3002614726524686, "learning_rate": 6.043206870600735e-06, "loss": 1.1618, "step": 13651 }, { "epoch": 0.06043649563947054, "grad_norm": 2.6250420390150353, "learning_rate": 6.0436495639470535e-06, "loss": 0.5608, "step": 13652 }, { "epoch": 0.06044092257293373, "grad_norm": 2.545118135981514, "learning_rate": 6.044092257293374e-06, "loss": 0.7865, "step": 13653 }, { "epoch": 0.06044534950639692, "grad_norm": 2.4985464712686145, "learning_rate": 6.044534950639692e-06, "loss": 0.639, "step": 13654 }, { "epoch": 0.06044977643986011, "grad_norm": 2.43189588386175, "learning_rate": 6.044977643986011e-06, "loss": 0.5944, "step": 13655 }, { "epoch": 0.0604542033733233, "grad_norm": 3.116195792770391, "learning_rate": 6.045420337332331e-06, "loss": 0.8548, "step": 13656 }, { "epoch": 0.06045863030678649, "grad_norm": 2.5611255086973777, "learning_rate": 6.0458630306786495e-06, "loss": 0.6743, "step": 13657 }, { "epoch": 0.06046305724024968, "grad_norm": 2.4736882779853566, "learning_rate": 6.046305724024968e-06, "loss": 0.9624, "step": 13658 }, { "epoch": 0.06046748417371287, "grad_norm": 2.9856320209049247, "learning_rate": 6.046748417371287e-06, "loss": 0.9154, "step": 13659 }, { "epoch": 0.060471911107176056, "grad_norm": 3.9357453350909926, "learning_rate": 6.047191110717606e-06, "loss": 0.6793, "step": 13660 }, { "epoch": 0.060476338040639246, "grad_norm": 2.6924135422690743, "learning_rate": 6.0476338040639245e-06, "loss": 0.3929, "step": 13661 }, { "epoch": 0.06048076497410244, "grad_norm": 2.536714291716383, "learning_rate": 6.048076497410245e-06, "loss": 0.4334, "step": 13662 }, { "epoch": 0.06048519190756563, "grad_norm": 2.495087873667745, "learning_rate": 6.048519190756563e-06, "loss": 0.8785, "step": 13663 }, { "epoch": 0.06048961884102882, "grad_norm": 2.22846988564036, "learning_rate": 6.048961884102882e-06, "loss": 0.5582, "step": 13664 }, { "epoch": 0.06049404577449201, "grad_norm": 3.865162047074021, "learning_rate": 6.049404577449202e-06, "loss": 0.9576, "step": 13665 }, { "epoch": 0.0604984727079552, "grad_norm": 2.7453657451831455, "learning_rate": 6.0498472707955205e-06, "loss": 0.781, "step": 13666 }, { "epoch": 0.06050289964141839, "grad_norm": 2.3133712624226925, "learning_rate": 6.050289964141839e-06, "loss": 0.8817, "step": 13667 }, { "epoch": 0.06050732657488158, "grad_norm": 3.6418199042604407, "learning_rate": 6.050732657488158e-06, "loss": 1.1759, "step": 13668 }, { "epoch": 0.06051175350834477, "grad_norm": 2.446007131913787, "learning_rate": 6.051175350834477e-06, "loss": 0.6067, "step": 13669 }, { "epoch": 0.06051618044180796, "grad_norm": 2.1788051667089716, "learning_rate": 6.051618044180796e-06, "loss": 0.5572, "step": 13670 }, { "epoch": 0.06052060737527115, "grad_norm": 3.3519862237921987, "learning_rate": 6.052060737527116e-06, "loss": 0.7205, "step": 13671 }, { "epoch": 0.06052503430873434, "grad_norm": 3.780513673397495, "learning_rate": 6.052503430873434e-06, "loss": 1.0432, "step": 13672 }, { "epoch": 0.06052946124219753, "grad_norm": 2.333287559114299, "learning_rate": 6.052946124219754e-06, "loss": 0.6256, "step": 13673 }, { "epoch": 0.06053388817566072, "grad_norm": 2.999061705108533, "learning_rate": 6.053388817566073e-06, "loss": 0.9558, "step": 13674 }, { "epoch": 0.06053831510912391, "grad_norm": 2.603740478791544, "learning_rate": 6.053831510912391e-06, "loss": 0.7433, "step": 13675 }, { "epoch": 0.0605427420425871, "grad_norm": 2.095348973274945, "learning_rate": 6.054274204258711e-06, "loss": 0.5454, "step": 13676 }, { "epoch": 0.06054716897605029, "grad_norm": 2.3231705440697064, "learning_rate": 6.054716897605029e-06, "loss": 0.6224, "step": 13677 }, { "epoch": 0.06055159590951348, "grad_norm": 3.1785318141672163, "learning_rate": 6.055159590951349e-06, "loss": 1.0273, "step": 13678 }, { "epoch": 0.06055602284297667, "grad_norm": 2.451851269666504, "learning_rate": 6.055602284297668e-06, "loss": 0.6257, "step": 13679 }, { "epoch": 0.06056044977643986, "grad_norm": 2.843239547692598, "learning_rate": 6.0560449776439866e-06, "loss": 0.7677, "step": 13680 }, { "epoch": 0.06056487670990305, "grad_norm": 3.0664769970407755, "learning_rate": 6.056487670990305e-06, "loss": 0.8351, "step": 13681 }, { "epoch": 0.06056930364336624, "grad_norm": 2.6837020069240185, "learning_rate": 6.056930364336625e-06, "loss": 0.8133, "step": 13682 }, { "epoch": 0.06057373057682943, "grad_norm": 2.684272076226344, "learning_rate": 6.057373057682944e-06, "loss": 0.6355, "step": 13683 }, { "epoch": 0.06057815751029262, "grad_norm": 2.6483593730415116, "learning_rate": 6.057815751029262e-06, "loss": 0.6953, "step": 13684 }, { "epoch": 0.06058258444375581, "grad_norm": 2.2945775868521117, "learning_rate": 6.058258444375582e-06, "loss": 0.406, "step": 13685 }, { "epoch": 0.060587011377219, "grad_norm": 2.987337981131261, "learning_rate": 6.058701137721901e-06, "loss": 0.7274, "step": 13686 }, { "epoch": 0.06059143831068219, "grad_norm": 2.6728745764229913, "learning_rate": 6.05914383106822e-06, "loss": 0.8531, "step": 13687 }, { "epoch": 0.06059586524414538, "grad_norm": 2.991001385401383, "learning_rate": 6.059586524414539e-06, "loss": 0.999, "step": 13688 }, { "epoch": 0.060600292177608574, "grad_norm": 2.243159800355183, "learning_rate": 6.0600292177608575e-06, "loss": 0.7609, "step": 13689 }, { "epoch": 0.06060471911107176, "grad_norm": 2.9626518809753635, "learning_rate": 6.060471911107176e-06, "loss": 0.9182, "step": 13690 }, { "epoch": 0.06060914604453495, "grad_norm": 2.5982610516633793, "learning_rate": 6.060914604453496e-06, "loss": 0.651, "step": 13691 }, { "epoch": 0.06061357297799814, "grad_norm": 2.7326523967435628, "learning_rate": 6.061357297799815e-06, "loss": 0.6472, "step": 13692 }, { "epoch": 0.06061799991146133, "grad_norm": 3.3706960537166037, "learning_rate": 6.061799991146133e-06, "loss": 0.6706, "step": 13693 }, { "epoch": 0.06062242684492452, "grad_norm": 3.557802528791868, "learning_rate": 6.0622426844924535e-06, "loss": 1.2076, "step": 13694 }, { "epoch": 0.06062685377838771, "grad_norm": 2.4816584629746137, "learning_rate": 6.062685377838772e-06, "loss": 0.7252, "step": 13695 }, { "epoch": 0.0606312807118509, "grad_norm": 2.3756509219208874, "learning_rate": 6.0631280711850906e-06, "loss": 0.6577, "step": 13696 }, { "epoch": 0.06063570764531409, "grad_norm": 3.085721773468878, "learning_rate": 6.06357076453141e-06, "loss": 0.9409, "step": 13697 }, { "epoch": 0.06064013457877728, "grad_norm": 2.30913540412834, "learning_rate": 6.0640134578777285e-06, "loss": 0.8959, "step": 13698 }, { "epoch": 0.06064456151224047, "grad_norm": 2.280327369456442, "learning_rate": 6.064456151224048e-06, "loss": 0.8283, "step": 13699 }, { "epoch": 0.06064898844570366, "grad_norm": 3.0582575773819856, "learning_rate": 6.064898844570367e-06, "loss": 0.8852, "step": 13700 }, { "epoch": 0.06065341537916685, "grad_norm": 3.003090891718258, "learning_rate": 6.065341537916686e-06, "loss": 0.532, "step": 13701 }, { "epoch": 0.06065784231263004, "grad_norm": 3.1100909860749386, "learning_rate": 6.065784231263004e-06, "loss": 0.9171, "step": 13702 }, { "epoch": 0.060662269246093234, "grad_norm": 2.8740017229228463, "learning_rate": 6.0662269246093245e-06, "loss": 0.5395, "step": 13703 }, { "epoch": 0.060666696179556424, "grad_norm": 2.855439942941609, "learning_rate": 6.066669617955643e-06, "loss": 0.8499, "step": 13704 }, { "epoch": 0.06067112311301961, "grad_norm": 2.5521732363291556, "learning_rate": 6.0671123113019615e-06, "loss": 0.681, "step": 13705 }, { "epoch": 0.0606755500464828, "grad_norm": 2.6009645821800054, "learning_rate": 6.067555004648281e-06, "loss": 0.8132, "step": 13706 }, { "epoch": 0.06067997697994599, "grad_norm": 2.7588267174756123, "learning_rate": 6.067997697994599e-06, "loss": 0.8805, "step": 13707 }, { "epoch": 0.06068440391340918, "grad_norm": 2.5633607532410254, "learning_rate": 6.068440391340919e-06, "loss": 0.8472, "step": 13708 }, { "epoch": 0.06068883084687237, "grad_norm": 3.2312673339016746, "learning_rate": 6.068883084687238e-06, "loss": 0.9603, "step": 13709 }, { "epoch": 0.06069325778033556, "grad_norm": 3.203101923340247, "learning_rate": 6.069325778033557e-06, "loss": 0.9189, "step": 13710 }, { "epoch": 0.06069768471379875, "grad_norm": 2.998849183654472, "learning_rate": 6.069768471379875e-06, "loss": 1.0554, "step": 13711 }, { "epoch": 0.06070211164726194, "grad_norm": 3.362810536240792, "learning_rate": 6.070211164726195e-06, "loss": 0.9547, "step": 13712 }, { "epoch": 0.06070653858072513, "grad_norm": 2.869746870793277, "learning_rate": 6.070653858072514e-06, "loss": 0.9947, "step": 13713 }, { "epoch": 0.06071096551418832, "grad_norm": 2.3084015786338177, "learning_rate": 6.0710965514188325e-06, "loss": 0.6624, "step": 13714 }, { "epoch": 0.06071539244765151, "grad_norm": 2.6523621224723883, "learning_rate": 6.071539244765152e-06, "loss": 0.8302, "step": 13715 }, { "epoch": 0.060719819381114704, "grad_norm": 2.9064556616904484, "learning_rate": 6.071981938111471e-06, "loss": 0.66, "step": 13716 }, { "epoch": 0.060724246314577894, "grad_norm": 2.2978913492729194, "learning_rate": 6.07242463145779e-06, "loss": 0.6032, "step": 13717 }, { "epoch": 0.060728673248041085, "grad_norm": 2.79337741142072, "learning_rate": 6.072867324804109e-06, "loss": 1.0649, "step": 13718 }, { "epoch": 0.060733100181504275, "grad_norm": 2.4977446650514086, "learning_rate": 6.073310018150428e-06, "loss": 0.9638, "step": 13719 }, { "epoch": 0.06073752711496746, "grad_norm": 2.381390406062521, "learning_rate": 6.073752711496746e-06, "loss": 0.6752, "step": 13720 }, { "epoch": 0.06074195404843065, "grad_norm": 2.8849792563584966, "learning_rate": 6.074195404843066e-06, "loss": 0.9664, "step": 13721 }, { "epoch": 0.06074638098189384, "grad_norm": 2.4569026882325358, "learning_rate": 6.074638098189385e-06, "loss": 0.5758, "step": 13722 }, { "epoch": 0.06075080791535703, "grad_norm": 2.408161122294107, "learning_rate": 6.075080791535703e-06, "loss": 0.597, "step": 13723 }, { "epoch": 0.06075523484882022, "grad_norm": 3.1494820750991477, "learning_rate": 6.075523484882024e-06, "loss": 0.9078, "step": 13724 }, { "epoch": 0.06075966178228341, "grad_norm": 2.6080542072688693, "learning_rate": 6.075966178228342e-06, "loss": 0.8851, "step": 13725 }, { "epoch": 0.0607640887157466, "grad_norm": 2.8211479333895855, "learning_rate": 6.076408871574661e-06, "loss": 0.9382, "step": 13726 }, { "epoch": 0.06076851564920979, "grad_norm": 3.0644503293118865, "learning_rate": 6.07685156492098e-06, "loss": 0.9548, "step": 13727 }, { "epoch": 0.06077294258267298, "grad_norm": 2.6261175028754904, "learning_rate": 6.0772942582672986e-06, "loss": 0.6818, "step": 13728 }, { "epoch": 0.06077736951613617, "grad_norm": 4.301595113293064, "learning_rate": 6.077736951613618e-06, "loss": 1.0711, "step": 13729 }, { "epoch": 0.060781796449599364, "grad_norm": 2.5548011423585772, "learning_rate": 6.078179644959937e-06, "loss": 0.5106, "step": 13730 }, { "epoch": 0.060786223383062554, "grad_norm": 2.6526373060869104, "learning_rate": 6.078622338306256e-06, "loss": 0.7122, "step": 13731 }, { "epoch": 0.060790650316525745, "grad_norm": 3.000127719667222, "learning_rate": 6.079065031652574e-06, "loss": 0.8681, "step": 13732 }, { "epoch": 0.060795077249988935, "grad_norm": 2.7748956695626963, "learning_rate": 6.0795077249988946e-06, "loss": 0.3257, "step": 13733 }, { "epoch": 0.060799504183452126, "grad_norm": 2.571524657948714, "learning_rate": 6.079950418345213e-06, "loss": 0.796, "step": 13734 }, { "epoch": 0.06080393111691531, "grad_norm": 2.4499499183176807, "learning_rate": 6.080393111691532e-06, "loss": 0.5841, "step": 13735 }, { "epoch": 0.0608083580503785, "grad_norm": 2.3737257673891166, "learning_rate": 6.080835805037851e-06, "loss": 0.6608, "step": 13736 }, { "epoch": 0.06081278498384169, "grad_norm": 2.4990085334729875, "learning_rate": 6.08127849838417e-06, "loss": 0.6385, "step": 13737 }, { "epoch": 0.06081721191730488, "grad_norm": 3.190703343358153, "learning_rate": 6.081721191730489e-06, "loss": 1.024, "step": 13738 }, { "epoch": 0.06082163885076807, "grad_norm": 2.1892835585775225, "learning_rate": 6.082163885076808e-06, "loss": 0.6162, "step": 13739 }, { "epoch": 0.06082606578423126, "grad_norm": 2.616419554944316, "learning_rate": 6.082606578423127e-06, "loss": 0.6517, "step": 13740 }, { "epoch": 0.06083049271769445, "grad_norm": 3.1528983364570475, "learning_rate": 6.083049271769445e-06, "loss": 0.7048, "step": 13741 }, { "epoch": 0.06083491965115764, "grad_norm": 2.862714090606835, "learning_rate": 6.0834919651157655e-06, "loss": 1.0195, "step": 13742 }, { "epoch": 0.06083934658462083, "grad_norm": 2.658213906233695, "learning_rate": 6.083934658462084e-06, "loss": 0.8077, "step": 13743 }, { "epoch": 0.060843773518084024, "grad_norm": 2.8491795476052046, "learning_rate": 6.0843773518084026e-06, "loss": 0.981, "step": 13744 }, { "epoch": 0.060848200451547214, "grad_norm": 2.6794063564762864, "learning_rate": 6.084820045154722e-06, "loss": 0.5203, "step": 13745 }, { "epoch": 0.060852627385010405, "grad_norm": 3.239138167624363, "learning_rate": 6.085262738501041e-06, "loss": 0.9821, "step": 13746 }, { "epoch": 0.060857054318473595, "grad_norm": 2.429739072359684, "learning_rate": 6.08570543184736e-06, "loss": 0.7103, "step": 13747 }, { "epoch": 0.060861481251936786, "grad_norm": 2.79042131378733, "learning_rate": 6.086148125193679e-06, "loss": 0.8564, "step": 13748 }, { "epoch": 0.060865908185399976, "grad_norm": 2.595625727205623, "learning_rate": 6.086590818539998e-06, "loss": 0.6252, "step": 13749 }, { "epoch": 0.06087033511886317, "grad_norm": 3.5950493166742294, "learning_rate": 6.087033511886316e-06, "loss": 0.657, "step": 13750 }, { "epoch": 0.06087476205232635, "grad_norm": 2.4678056421599646, "learning_rate": 6.0874762052326365e-06, "loss": 0.8193, "step": 13751 }, { "epoch": 0.06087918898578954, "grad_norm": 3.2829633725515963, "learning_rate": 6.087918898578955e-06, "loss": 0.456, "step": 13752 }, { "epoch": 0.06088361591925273, "grad_norm": 2.305204519150689, "learning_rate": 6.0883615919252735e-06, "loss": 0.7132, "step": 13753 }, { "epoch": 0.06088804285271592, "grad_norm": 3.1266487865912485, "learning_rate": 6.088804285271594e-06, "loss": 0.7723, "step": 13754 }, { "epoch": 0.06089246978617911, "grad_norm": 2.681254135907456, "learning_rate": 6.089246978617912e-06, "loss": 0.9336, "step": 13755 }, { "epoch": 0.0608968967196423, "grad_norm": 2.858237447958404, "learning_rate": 6.089689671964231e-06, "loss": 0.9741, "step": 13756 }, { "epoch": 0.060901323653105494, "grad_norm": 2.989214381659104, "learning_rate": 6.09013236531055e-06, "loss": 0.6585, "step": 13757 }, { "epoch": 0.060905750586568684, "grad_norm": 3.2660569413324407, "learning_rate": 6.090575058656869e-06, "loss": 0.6743, "step": 13758 }, { "epoch": 0.060910177520031875, "grad_norm": 2.92035427332162, "learning_rate": 6.091017752003188e-06, "loss": 1.0249, "step": 13759 }, { "epoch": 0.060914604453495065, "grad_norm": 2.539104291241242, "learning_rate": 6.091460445349507e-06, "loss": 0.7848, "step": 13760 }, { "epoch": 0.060919031386958256, "grad_norm": 2.231175901071565, "learning_rate": 6.091903138695826e-06, "loss": 0.6398, "step": 13761 }, { "epoch": 0.060923458320421446, "grad_norm": 2.6428756047635047, "learning_rate": 6.0923458320421445e-06, "loss": 0.4523, "step": 13762 }, { "epoch": 0.06092788525388464, "grad_norm": 3.4167909442094095, "learning_rate": 6.092788525388465e-06, "loss": 1.0028, "step": 13763 }, { "epoch": 0.06093231218734783, "grad_norm": 3.0688472650646235, "learning_rate": 6.093231218734783e-06, "loss": 0.9976, "step": 13764 }, { "epoch": 0.06093673912081102, "grad_norm": 2.589167238134635, "learning_rate": 6.093673912081102e-06, "loss": 0.7607, "step": 13765 }, { "epoch": 0.0609411660542742, "grad_norm": 2.616636478006594, "learning_rate": 6.094116605427421e-06, "loss": 0.8065, "step": 13766 }, { "epoch": 0.06094559298773739, "grad_norm": 3.331378851028696, "learning_rate": 6.0945592987737405e-06, "loss": 0.7953, "step": 13767 }, { "epoch": 0.06095001992120058, "grad_norm": 2.463478861207089, "learning_rate": 6.095001992120059e-06, "loss": 0.7062, "step": 13768 }, { "epoch": 0.06095444685466377, "grad_norm": 3.1619630799699827, "learning_rate": 6.095444685466378e-06, "loss": 0.4333, "step": 13769 }, { "epoch": 0.06095887378812696, "grad_norm": 2.7467446185547244, "learning_rate": 6.095887378812697e-06, "loss": 0.7085, "step": 13770 }, { "epoch": 0.060963300721590154, "grad_norm": 3.1249065995880474, "learning_rate": 6.096330072159015e-06, "loss": 0.9046, "step": 13771 }, { "epoch": 0.060967727655053344, "grad_norm": 2.7925123368102764, "learning_rate": 6.096772765505336e-06, "loss": 1.0731, "step": 13772 }, { "epoch": 0.060972154588516535, "grad_norm": 2.7916549757226057, "learning_rate": 6.097215458851654e-06, "loss": 0.8792, "step": 13773 }, { "epoch": 0.060976581521979725, "grad_norm": 2.816269825138992, "learning_rate": 6.097658152197973e-06, "loss": 0.654, "step": 13774 }, { "epoch": 0.060981008455442916, "grad_norm": 3.3192403630498437, "learning_rate": 6.098100845544293e-06, "loss": 1.1277, "step": 13775 }, { "epoch": 0.060985435388906106, "grad_norm": 2.47939904970827, "learning_rate": 6.098543538890611e-06, "loss": 0.6851, "step": 13776 }, { "epoch": 0.0609898623223693, "grad_norm": 2.560874967080901, "learning_rate": 6.09898623223693e-06, "loss": 0.7444, "step": 13777 }, { "epoch": 0.06099428925583249, "grad_norm": 3.532466181070089, "learning_rate": 6.099428925583249e-06, "loss": 1.0304, "step": 13778 }, { "epoch": 0.06099871618929568, "grad_norm": 3.0753495958732002, "learning_rate": 6.099871618929568e-06, "loss": 1.3193, "step": 13779 }, { "epoch": 0.06100314312275887, "grad_norm": 2.664478427727271, "learning_rate": 6.100314312275886e-06, "loss": 0.7243, "step": 13780 }, { "epoch": 0.06100757005622205, "grad_norm": 2.7474507731579925, "learning_rate": 6.1007570056222066e-06, "loss": 0.6923, "step": 13781 }, { "epoch": 0.06101199698968524, "grad_norm": 2.368212793342741, "learning_rate": 6.101199698968525e-06, "loss": 0.4708, "step": 13782 }, { "epoch": 0.06101642392314843, "grad_norm": 3.6024253325313964, "learning_rate": 6.101642392314844e-06, "loss": 1.2244, "step": 13783 }, { "epoch": 0.06102085085661162, "grad_norm": 2.8001627702578435, "learning_rate": 6.102085085661164e-06, "loss": 1.0197, "step": 13784 }, { "epoch": 0.061025277790074814, "grad_norm": 2.1450840978530152, "learning_rate": 6.102527779007482e-06, "loss": 0.6223, "step": 13785 }, { "epoch": 0.061029704723538004, "grad_norm": 3.42426886344861, "learning_rate": 6.102970472353801e-06, "loss": 1.2415, "step": 13786 }, { "epoch": 0.061034131657001195, "grad_norm": 3.295528248639207, "learning_rate": 6.10341316570012e-06, "loss": 1.1593, "step": 13787 }, { "epoch": 0.061038558590464385, "grad_norm": 3.1398118271917554, "learning_rate": 6.103855859046439e-06, "loss": 0.9867, "step": 13788 }, { "epoch": 0.061042985523927576, "grad_norm": 2.73806291749898, "learning_rate": 6.104298552392758e-06, "loss": 0.6943, "step": 13789 }, { "epoch": 0.061047412457390766, "grad_norm": 2.2473135907213413, "learning_rate": 6.1047412457390775e-06, "loss": 0.5393, "step": 13790 }, { "epoch": 0.06105183939085396, "grad_norm": 2.914352761931931, "learning_rate": 6.105183939085396e-06, "loss": 1.028, "step": 13791 }, { "epoch": 0.06105626632431715, "grad_norm": 2.662377950185078, "learning_rate": 6.1056266324317146e-06, "loss": 0.7335, "step": 13792 }, { "epoch": 0.06106069325778034, "grad_norm": 2.349095219792017, "learning_rate": 6.106069325778035e-06, "loss": 0.4115, "step": 13793 }, { "epoch": 0.06106512019124353, "grad_norm": 3.3781152858293915, "learning_rate": 6.106512019124353e-06, "loss": 0.8456, "step": 13794 }, { "epoch": 0.06106954712470672, "grad_norm": 2.852235493496869, "learning_rate": 6.106954712470672e-06, "loss": 0.8164, "step": 13795 }, { "epoch": 0.0610739740581699, "grad_norm": 3.2599296551304735, "learning_rate": 6.107397405816991e-06, "loss": 0.6764, "step": 13796 }, { "epoch": 0.06107840099163309, "grad_norm": 3.5394719310078244, "learning_rate": 6.1078400991633106e-06, "loss": 1.0365, "step": 13797 }, { "epoch": 0.061082827925096284, "grad_norm": 3.1206331096708517, "learning_rate": 6.108282792509629e-06, "loss": 0.9745, "step": 13798 }, { "epoch": 0.061087254858559474, "grad_norm": 3.3025677636768527, "learning_rate": 6.1087254858559485e-06, "loss": 0.9735, "step": 13799 }, { "epoch": 0.061091681792022665, "grad_norm": 4.188823893885516, "learning_rate": 6.109168179202267e-06, "loss": 1.2809, "step": 13800 }, { "epoch": 0.061096108725485855, "grad_norm": 2.7818515442752756, "learning_rate": 6.1096108725485855e-06, "loss": 0.7704, "step": 13801 }, { "epoch": 0.061100535658949046, "grad_norm": 2.9191897215790426, "learning_rate": 6.110053565894906e-06, "loss": 0.9582, "step": 13802 }, { "epoch": 0.061104962592412236, "grad_norm": 2.4368490857304805, "learning_rate": 6.110496259241224e-06, "loss": 0.7018, "step": 13803 }, { "epoch": 0.06110938952587543, "grad_norm": 2.866079572373361, "learning_rate": 6.110938952587543e-06, "loss": 0.566, "step": 13804 }, { "epoch": 0.06111381645933862, "grad_norm": 3.4336134748672467, "learning_rate": 6.111381645933863e-06, "loss": 0.9187, "step": 13805 }, { "epoch": 0.06111824339280181, "grad_norm": 2.4051879213491585, "learning_rate": 6.1118243392801815e-06, "loss": 0.6646, "step": 13806 }, { "epoch": 0.061122670326265, "grad_norm": 4.311478486612752, "learning_rate": 6.1122670326265e-06, "loss": 1.7574, "step": 13807 }, { "epoch": 0.06112709725972819, "grad_norm": 3.0578148158610867, "learning_rate": 6.112709725972819e-06, "loss": 0.8704, "step": 13808 }, { "epoch": 0.06113152419319138, "grad_norm": 2.639431754756552, "learning_rate": 6.113152419319138e-06, "loss": 0.6579, "step": 13809 }, { "epoch": 0.06113595112665457, "grad_norm": 3.2243084710650916, "learning_rate": 6.113595112665457e-06, "loss": 0.8404, "step": 13810 }, { "epoch": 0.06114037806011775, "grad_norm": 3.4115022678701385, "learning_rate": 6.114037806011777e-06, "loss": 1.0688, "step": 13811 }, { "epoch": 0.061144804993580944, "grad_norm": 2.4802546481470698, "learning_rate": 6.114480499358095e-06, "loss": 0.8298, "step": 13812 }, { "epoch": 0.061149231927044134, "grad_norm": 2.6084370246580098, "learning_rate": 6.114923192704414e-06, "loss": 0.6705, "step": 13813 }, { "epoch": 0.061153658860507325, "grad_norm": 3.7431107549279634, "learning_rate": 6.115365886050734e-06, "loss": 1.1432, "step": 13814 }, { "epoch": 0.061158085793970515, "grad_norm": 3.20343832246272, "learning_rate": 6.1158085793970525e-06, "loss": 0.7904, "step": 13815 }, { "epoch": 0.061162512727433706, "grad_norm": 2.405036689341502, "learning_rate": 6.116251272743371e-06, "loss": 0.7443, "step": 13816 }, { "epoch": 0.061166939660896896, "grad_norm": 2.3668772820678727, "learning_rate": 6.11669396608969e-06, "loss": 0.7353, "step": 13817 }, { "epoch": 0.06117136659436009, "grad_norm": 2.7108479046149783, "learning_rate": 6.11713665943601e-06, "loss": 0.6574, "step": 13818 }, { "epoch": 0.06117579352782328, "grad_norm": 2.7902882250117456, "learning_rate": 6.117579352782328e-06, "loss": 0.809, "step": 13819 }, { "epoch": 0.06118022046128647, "grad_norm": 3.332613207854987, "learning_rate": 6.118022046128648e-06, "loss": 1.0576, "step": 13820 }, { "epoch": 0.06118464739474966, "grad_norm": 2.678160023406945, "learning_rate": 6.118464739474966e-06, "loss": 0.769, "step": 13821 }, { "epoch": 0.06118907432821285, "grad_norm": 3.6456117914471795, "learning_rate": 6.118907432821285e-06, "loss": 1.0574, "step": 13822 }, { "epoch": 0.06119350126167604, "grad_norm": 2.2356730539910954, "learning_rate": 6.119350126167605e-06, "loss": 0.5369, "step": 13823 }, { "epoch": 0.06119792819513923, "grad_norm": 2.69930789493312, "learning_rate": 6.119792819513923e-06, "loss": 0.7613, "step": 13824 }, { "epoch": 0.06120235512860242, "grad_norm": 2.7771030265865213, "learning_rate": 6.120235512860242e-06, "loss": 1.1296, "step": 13825 }, { "epoch": 0.061206782062065604, "grad_norm": 2.696050047586553, "learning_rate": 6.120678206206561e-06, "loss": 0.6578, "step": 13826 }, { "epoch": 0.061211208995528794, "grad_norm": 2.8459056895467385, "learning_rate": 6.121120899552881e-06, "loss": 0.6798, "step": 13827 }, { "epoch": 0.061215635928991985, "grad_norm": 2.0521227747885984, "learning_rate": 6.121563592899199e-06, "loss": 0.447, "step": 13828 }, { "epoch": 0.061220062862455175, "grad_norm": 2.394460284427727, "learning_rate": 6.1220062862455186e-06, "loss": 0.587, "step": 13829 }, { "epoch": 0.061224489795918366, "grad_norm": 2.495934597026243, "learning_rate": 6.122448979591837e-06, "loss": 0.721, "step": 13830 }, { "epoch": 0.061228916729381556, "grad_norm": 2.99134112726127, "learning_rate": 6.122891672938156e-06, "loss": 0.6441, "step": 13831 }, { "epoch": 0.06123334366284475, "grad_norm": 2.729249326088893, "learning_rate": 6.123334366284476e-06, "loss": 0.9493, "step": 13832 }, { "epoch": 0.06123777059630794, "grad_norm": 2.4976614435821842, "learning_rate": 6.123777059630794e-06, "loss": 0.6008, "step": 13833 }, { "epoch": 0.06124219752977113, "grad_norm": 2.4775281218593537, "learning_rate": 6.124219752977113e-06, "loss": 0.6796, "step": 13834 }, { "epoch": 0.06124662446323432, "grad_norm": 2.8939374488739658, "learning_rate": 6.124662446323433e-06, "loss": 0.7002, "step": 13835 }, { "epoch": 0.06125105139669751, "grad_norm": 2.2468940779238253, "learning_rate": 6.125105139669752e-06, "loss": 0.6221, "step": 13836 }, { "epoch": 0.0612554783301607, "grad_norm": 2.334479352163224, "learning_rate": 6.12554783301607e-06, "loss": 0.6675, "step": 13837 }, { "epoch": 0.06125990526362389, "grad_norm": 2.477178590888304, "learning_rate": 6.1259905263623895e-06, "loss": 0.851, "step": 13838 }, { "epoch": 0.06126433219708708, "grad_norm": 2.221836656409033, "learning_rate": 6.126433219708708e-06, "loss": 0.6409, "step": 13839 }, { "epoch": 0.06126875913055027, "grad_norm": 3.230553765945912, "learning_rate": 6.126875913055027e-06, "loss": 0.7874, "step": 13840 }, { "epoch": 0.061273186064013455, "grad_norm": 2.560498309059243, "learning_rate": 6.127318606401347e-06, "loss": 0.8201, "step": 13841 }, { "epoch": 0.061277612997476645, "grad_norm": 2.755736347033953, "learning_rate": 6.127761299747665e-06, "loss": 0.7013, "step": 13842 }, { "epoch": 0.061282039930939836, "grad_norm": 2.8863725553376502, "learning_rate": 6.128203993093984e-06, "loss": 0.8906, "step": 13843 }, { "epoch": 0.061286466864403026, "grad_norm": 2.525864299435895, "learning_rate": 6.128646686440304e-06, "loss": 0.7668, "step": 13844 }, { "epoch": 0.06129089379786622, "grad_norm": 2.521310588701491, "learning_rate": 6.1290893797866226e-06, "loss": 0.4413, "step": 13845 }, { "epoch": 0.06129532073132941, "grad_norm": 2.6142502724527867, "learning_rate": 6.129532073132941e-06, "loss": 0.7631, "step": 13846 }, { "epoch": 0.0612997476647926, "grad_norm": 2.676331990607876, "learning_rate": 6.1299747664792605e-06, "loss": 0.4814, "step": 13847 }, { "epoch": 0.06130417459825579, "grad_norm": 2.5656750826970645, "learning_rate": 6.13041745982558e-06, "loss": 0.655, "step": 13848 }, { "epoch": 0.06130860153171898, "grad_norm": 2.684401636295326, "learning_rate": 6.130860153171898e-06, "loss": 0.9077, "step": 13849 }, { "epoch": 0.06131302846518217, "grad_norm": 2.651711757498343, "learning_rate": 6.131302846518218e-06, "loss": 0.8157, "step": 13850 }, { "epoch": 0.06131745539864536, "grad_norm": 2.617382414180828, "learning_rate": 6.131745539864536e-06, "loss": 0.8456, "step": 13851 }, { "epoch": 0.06132188233210855, "grad_norm": 2.6606821114186845, "learning_rate": 6.132188233210855e-06, "loss": 0.4612, "step": 13852 }, { "epoch": 0.06132630926557174, "grad_norm": 3.043777013139737, "learning_rate": 6.132630926557175e-06, "loss": 0.7102, "step": 13853 }, { "epoch": 0.06133073619903493, "grad_norm": 2.300302090415983, "learning_rate": 6.1330736199034935e-06, "loss": 0.6912, "step": 13854 }, { "epoch": 0.06133516313249812, "grad_norm": 3.4889477769655897, "learning_rate": 6.133516313249812e-06, "loss": 1.1804, "step": 13855 }, { "epoch": 0.061339590065961305, "grad_norm": 2.7511496548220755, "learning_rate": 6.133959006596132e-06, "loss": 0.8137, "step": 13856 }, { "epoch": 0.061344016999424496, "grad_norm": 3.0612816562004137, "learning_rate": 6.134401699942451e-06, "loss": 0.8239, "step": 13857 }, { "epoch": 0.061348443932887686, "grad_norm": 3.023203114102135, "learning_rate": 6.134844393288769e-06, "loss": 0.6496, "step": 13858 }, { "epoch": 0.06135287086635088, "grad_norm": 2.708930661762861, "learning_rate": 6.135287086635089e-06, "loss": 0.6409, "step": 13859 }, { "epoch": 0.06135729779981407, "grad_norm": 2.5797417225592345, "learning_rate": 6.135729779981407e-06, "loss": 0.6274, "step": 13860 }, { "epoch": 0.06136172473327726, "grad_norm": 2.654793713967705, "learning_rate": 6.136172473327726e-06, "loss": 1.0161, "step": 13861 }, { "epoch": 0.06136615166674045, "grad_norm": 2.8579156312620784, "learning_rate": 6.136615166674046e-06, "loss": 0.942, "step": 13862 }, { "epoch": 0.06137057860020364, "grad_norm": 2.8862952862806552, "learning_rate": 6.1370578600203645e-06, "loss": 0.6652, "step": 13863 }, { "epoch": 0.06137500553366683, "grad_norm": 2.529074354409144, "learning_rate": 6.137500553366683e-06, "loss": 0.8903, "step": 13864 }, { "epoch": 0.06137943246713002, "grad_norm": 3.313570270924707, "learning_rate": 6.137943246713003e-06, "loss": 1.179, "step": 13865 }, { "epoch": 0.06138385940059321, "grad_norm": 2.4777735990415906, "learning_rate": 6.138385940059322e-06, "loss": 0.7273, "step": 13866 }, { "epoch": 0.0613882863340564, "grad_norm": 3.235859036573009, "learning_rate": 6.13882863340564e-06, "loss": 0.8124, "step": 13867 }, { "epoch": 0.06139271326751959, "grad_norm": 2.3650099105227818, "learning_rate": 6.13927132675196e-06, "loss": 0.5618, "step": 13868 }, { "epoch": 0.06139714020098278, "grad_norm": 3.06206487191708, "learning_rate": 6.139714020098278e-06, "loss": 0.7305, "step": 13869 }, { "epoch": 0.06140156713444597, "grad_norm": 2.0441475160964817, "learning_rate": 6.1401567134445975e-06, "loss": 0.5249, "step": 13870 }, { "epoch": 0.061405994067909156, "grad_norm": 3.6388010201783336, "learning_rate": 6.140599406790917e-06, "loss": 1.3601, "step": 13871 }, { "epoch": 0.061410421001372346, "grad_norm": 3.109279689558647, "learning_rate": 6.141042100137235e-06, "loss": 0.9261, "step": 13872 }, { "epoch": 0.06141484793483554, "grad_norm": 2.3449165996227337, "learning_rate": 6.141484793483554e-06, "loss": 0.5392, "step": 13873 }, { "epoch": 0.06141927486829873, "grad_norm": 2.600784433840032, "learning_rate": 6.141927486829874e-06, "loss": 0.5745, "step": 13874 }, { "epoch": 0.06142370180176192, "grad_norm": 2.4639773495695114, "learning_rate": 6.142370180176193e-06, "loss": 0.7024, "step": 13875 }, { "epoch": 0.06142812873522511, "grad_norm": 2.7125338251602327, "learning_rate": 6.142812873522511e-06, "loss": 0.9023, "step": 13876 }, { "epoch": 0.0614325556686883, "grad_norm": 2.5537299440306014, "learning_rate": 6.1432555668688306e-06, "loss": 0.721, "step": 13877 }, { "epoch": 0.06143698260215149, "grad_norm": 3.262589382762048, "learning_rate": 6.14369826021515e-06, "loss": 0.4324, "step": 13878 }, { "epoch": 0.06144140953561468, "grad_norm": 2.989398520163288, "learning_rate": 6.1441409535614685e-06, "loss": 0.8856, "step": 13879 }, { "epoch": 0.06144583646907787, "grad_norm": 4.877393712218512, "learning_rate": 6.144583646907788e-06, "loss": 1.289, "step": 13880 }, { "epoch": 0.06145026340254106, "grad_norm": 2.527885816294625, "learning_rate": 6.145026340254106e-06, "loss": 0.4948, "step": 13881 }, { "epoch": 0.06145469033600425, "grad_norm": 3.322665979174657, "learning_rate": 6.145469033600425e-06, "loss": 0.8382, "step": 13882 }, { "epoch": 0.06145911726946744, "grad_norm": 2.6567110196709702, "learning_rate": 6.145911726946745e-06, "loss": 0.6839, "step": 13883 }, { "epoch": 0.06146354420293063, "grad_norm": 3.001356064397014, "learning_rate": 6.146354420293064e-06, "loss": 1.0129, "step": 13884 }, { "epoch": 0.06146797113639382, "grad_norm": 3.6326616114932677, "learning_rate": 6.146797113639382e-06, "loss": 0.7648, "step": 13885 }, { "epoch": 0.06147239806985701, "grad_norm": 2.659680404813483, "learning_rate": 6.147239806985702e-06, "loss": 0.598, "step": 13886 }, { "epoch": 0.0614768250033202, "grad_norm": 3.210373546254097, "learning_rate": 6.147682500332021e-06, "loss": 0.9578, "step": 13887 }, { "epoch": 0.06148125193678339, "grad_norm": 2.9433540539085317, "learning_rate": 6.148125193678339e-06, "loss": 0.6152, "step": 13888 }, { "epoch": 0.06148567887024658, "grad_norm": 2.453103746678099, "learning_rate": 6.148567887024659e-06, "loss": 0.7507, "step": 13889 }, { "epoch": 0.06149010580370977, "grad_norm": 2.881069588110604, "learning_rate": 6.149010580370977e-06, "loss": 0.8836, "step": 13890 }, { "epoch": 0.06149453273717296, "grad_norm": 2.924709188043285, "learning_rate": 6.149453273717297e-06, "loss": 0.7752, "step": 13891 }, { "epoch": 0.06149895967063615, "grad_norm": 2.5001545037410047, "learning_rate": 6.149895967063616e-06, "loss": 0.8748, "step": 13892 }, { "epoch": 0.06150338660409934, "grad_norm": 2.7497475270300864, "learning_rate": 6.1503386604099346e-06, "loss": 0.6561, "step": 13893 }, { "epoch": 0.06150781353756253, "grad_norm": 2.3149812523534568, "learning_rate": 6.150781353756253e-06, "loss": 0.8443, "step": 13894 }, { "epoch": 0.06151224047102572, "grad_norm": 2.7324876180674074, "learning_rate": 6.151224047102573e-06, "loss": 0.9138, "step": 13895 }, { "epoch": 0.06151666740448891, "grad_norm": 3.238881051018464, "learning_rate": 6.151666740448892e-06, "loss": 0.6641, "step": 13896 }, { "epoch": 0.0615210943379521, "grad_norm": 2.673229348800214, "learning_rate": 6.15210943379521e-06, "loss": 0.6896, "step": 13897 }, { "epoch": 0.06152552127141529, "grad_norm": 2.9904203846867388, "learning_rate": 6.15255212714153e-06, "loss": 0.736, "step": 13898 }, { "epoch": 0.06152994820487848, "grad_norm": 2.4809058172046403, "learning_rate": 6.152994820487849e-06, "loss": 0.6291, "step": 13899 }, { "epoch": 0.061534375138341674, "grad_norm": 3.0744818945211407, "learning_rate": 6.153437513834168e-06, "loss": 0.8801, "step": 13900 }, { "epoch": 0.061538802071804864, "grad_norm": 2.4668804077325914, "learning_rate": 6.153880207180487e-06, "loss": 0.8385, "step": 13901 }, { "epoch": 0.06154322900526805, "grad_norm": 3.1080517517538397, "learning_rate": 6.1543229005268055e-06, "loss": 0.7155, "step": 13902 }, { "epoch": 0.06154765593873124, "grad_norm": 3.9271162209760373, "learning_rate": 6.154765593873124e-06, "loss": 0.9537, "step": 13903 }, { "epoch": 0.06155208287219443, "grad_norm": 3.4099810995047237, "learning_rate": 6.155208287219444e-06, "loss": 0.9481, "step": 13904 }, { "epoch": 0.06155650980565762, "grad_norm": 2.7231041753953082, "learning_rate": 6.155650980565763e-06, "loss": 0.6837, "step": 13905 }, { "epoch": 0.06156093673912081, "grad_norm": 3.7109700281542977, "learning_rate": 6.156093673912081e-06, "loss": 1.1785, "step": 13906 }, { "epoch": 0.061565363672584, "grad_norm": 3.1395928097920187, "learning_rate": 6.156536367258401e-06, "loss": 1.0975, "step": 13907 }, { "epoch": 0.06156979060604719, "grad_norm": 2.6190486142412777, "learning_rate": 6.15697906060472e-06, "loss": 0.8417, "step": 13908 }, { "epoch": 0.06157421753951038, "grad_norm": 2.6185918791538345, "learning_rate": 6.157421753951039e-06, "loss": 0.8444, "step": 13909 }, { "epoch": 0.06157864447297357, "grad_norm": 3.0620936229008433, "learning_rate": 6.157864447297358e-06, "loss": 0.7442, "step": 13910 }, { "epoch": 0.06158307140643676, "grad_norm": 2.4517751233376344, "learning_rate": 6.1583071406436765e-06, "loss": 0.898, "step": 13911 }, { "epoch": 0.06158749833989995, "grad_norm": 3.3147731016831905, "learning_rate": 6.158749833989995e-06, "loss": 1.3232, "step": 13912 }, { "epoch": 0.06159192527336314, "grad_norm": 3.0070314751684952, "learning_rate": 6.159192527336315e-06, "loss": 0.7365, "step": 13913 }, { "epoch": 0.061596352206826334, "grad_norm": 2.6726180699114694, "learning_rate": 6.159635220682634e-06, "loss": 0.712, "step": 13914 }, { "epoch": 0.061600779140289524, "grad_norm": 3.1357334113380144, "learning_rate": 6.160077914028952e-06, "loss": 0.8262, "step": 13915 }, { "epoch": 0.061605206073752715, "grad_norm": 3.193097547024809, "learning_rate": 6.1605206073752725e-06, "loss": 0.7047, "step": 13916 }, { "epoch": 0.0616096330072159, "grad_norm": 2.582695187397945, "learning_rate": 6.160963300721591e-06, "loss": 0.7072, "step": 13917 }, { "epoch": 0.06161405994067909, "grad_norm": 2.5803443393918006, "learning_rate": 6.1614059940679095e-06, "loss": 0.7374, "step": 13918 }, { "epoch": 0.06161848687414228, "grad_norm": 2.876995519787692, "learning_rate": 6.161848687414229e-06, "loss": 0.9312, "step": 13919 }, { "epoch": 0.06162291380760547, "grad_norm": 2.6057374326947005, "learning_rate": 6.162291380760547e-06, "loss": 0.8777, "step": 13920 }, { "epoch": 0.06162734074106866, "grad_norm": 2.8649674553382196, "learning_rate": 6.162734074106867e-06, "loss": 0.43, "step": 13921 }, { "epoch": 0.06163176767453185, "grad_norm": 2.567616071462901, "learning_rate": 6.163176767453186e-06, "loss": 0.802, "step": 13922 }, { "epoch": 0.06163619460799504, "grad_norm": 2.9679946893544242, "learning_rate": 6.163619460799505e-06, "loss": 0.8618, "step": 13923 }, { "epoch": 0.06164062154145823, "grad_norm": 2.714992319115213, "learning_rate": 6.164062154145823e-06, "loss": 0.8161, "step": 13924 }, { "epoch": 0.06164504847492142, "grad_norm": 2.835742842560332, "learning_rate": 6.1645048474921434e-06, "loss": 0.7499, "step": 13925 }, { "epoch": 0.06164947540838461, "grad_norm": 2.8881134947055536, "learning_rate": 6.164947540838462e-06, "loss": 0.7415, "step": 13926 }, { "epoch": 0.061653902341847804, "grad_norm": 3.7644494800924915, "learning_rate": 6.1653902341847805e-06, "loss": 1.0299, "step": 13927 }, { "epoch": 0.061658329275310994, "grad_norm": 2.847899883958943, "learning_rate": 6.1658329275311e-06, "loss": 0.4485, "step": 13928 }, { "epoch": 0.061662756208774185, "grad_norm": 2.628721505735787, "learning_rate": 6.166275620877419e-06, "loss": 0.7446, "step": 13929 }, { "epoch": 0.061667183142237375, "grad_norm": 3.215797295038056, "learning_rate": 6.166718314223738e-06, "loss": 0.8495, "step": 13930 }, { "epoch": 0.061671610075700566, "grad_norm": 2.948940642758207, "learning_rate": 6.167161007570057e-06, "loss": 0.7908, "step": 13931 }, { "epoch": 0.06167603700916375, "grad_norm": 2.634601664302895, "learning_rate": 6.167603700916376e-06, "loss": 0.7401, "step": 13932 }, { "epoch": 0.06168046394262694, "grad_norm": 3.2553836339302133, "learning_rate": 6.168046394262694e-06, "loss": 0.9761, "step": 13933 }, { "epoch": 0.06168489087609013, "grad_norm": 2.5853482059668416, "learning_rate": 6.168489087609014e-06, "loss": 0.6679, "step": 13934 }, { "epoch": 0.06168931780955332, "grad_norm": 3.5099197588669977, "learning_rate": 6.168931780955333e-06, "loss": 0.4676, "step": 13935 }, { "epoch": 0.06169374474301651, "grad_norm": 2.554498726818545, "learning_rate": 6.169374474301651e-06, "loss": 0.5879, "step": 13936 }, { "epoch": 0.0616981716764797, "grad_norm": 2.766803852017835, "learning_rate": 6.169817167647972e-06, "loss": 0.8864, "step": 13937 }, { "epoch": 0.06170259860994289, "grad_norm": 2.694260942654523, "learning_rate": 6.17025986099429e-06, "loss": 0.5376, "step": 13938 }, { "epoch": 0.06170702554340608, "grad_norm": 3.0215318070305326, "learning_rate": 6.170702554340609e-06, "loss": 0.8295, "step": 13939 }, { "epoch": 0.06171145247686927, "grad_norm": 2.8115581036893498, "learning_rate": 6.171145247686928e-06, "loss": 0.9651, "step": 13940 }, { "epoch": 0.061715879410332464, "grad_norm": 2.6701536434265436, "learning_rate": 6.171587941033247e-06, "loss": 0.7771, "step": 13941 }, { "epoch": 0.061720306343795654, "grad_norm": 2.488243857105552, "learning_rate": 6.172030634379565e-06, "loss": 0.5685, "step": 13942 }, { "epoch": 0.061724733277258845, "grad_norm": 3.2097040495446847, "learning_rate": 6.172473327725885e-06, "loss": 0.7898, "step": 13943 }, { "epoch": 0.061729160210722035, "grad_norm": 2.682546949835054, "learning_rate": 6.172916021072204e-06, "loss": 0.5398, "step": 13944 }, { "epoch": 0.061733587144185226, "grad_norm": 3.690937199550354, "learning_rate": 6.173358714418522e-06, "loss": 0.9534, "step": 13945 }, { "epoch": 0.061738014077648416, "grad_norm": 3.07842745297943, "learning_rate": 6.173801407764843e-06, "loss": 0.8336, "step": 13946 }, { "epoch": 0.0617424410111116, "grad_norm": 3.6621597640334134, "learning_rate": 6.174244101111161e-06, "loss": 0.7072, "step": 13947 }, { "epoch": 0.06174686794457479, "grad_norm": 3.9702396186162594, "learning_rate": 6.17468679445748e-06, "loss": 1.248, "step": 13948 }, { "epoch": 0.06175129487803798, "grad_norm": 2.6983889191204184, "learning_rate": 6.175129487803799e-06, "loss": 0.9063, "step": 13949 }, { "epoch": 0.06175572181150117, "grad_norm": 3.005644630026732, "learning_rate": 6.1755721811501175e-06, "loss": 0.7853, "step": 13950 }, { "epoch": 0.06176014874496436, "grad_norm": 3.7611537021346955, "learning_rate": 6.176014874496437e-06, "loss": 1.2656, "step": 13951 }, { "epoch": 0.06176457567842755, "grad_norm": 2.6716328811519197, "learning_rate": 6.176457567842756e-06, "loss": 0.8493, "step": 13952 }, { "epoch": 0.06176900261189074, "grad_norm": 3.0484158887514514, "learning_rate": 6.176900261189075e-06, "loss": 0.5246, "step": 13953 }, { "epoch": 0.06177342954535393, "grad_norm": 3.854921433582756, "learning_rate": 6.177342954535393e-06, "loss": 0.9024, "step": 13954 }, { "epoch": 0.061777856478817124, "grad_norm": 2.3633544327535123, "learning_rate": 6.1777856478817135e-06, "loss": 0.8555, "step": 13955 }, { "epoch": 0.061782283412280314, "grad_norm": 2.5482525734897505, "learning_rate": 6.178228341228032e-06, "loss": 0.706, "step": 13956 }, { "epoch": 0.061786710345743505, "grad_norm": 2.9617709937816192, "learning_rate": 6.178671034574351e-06, "loss": 0.7928, "step": 13957 }, { "epoch": 0.061791137279206695, "grad_norm": 3.7454267583443657, "learning_rate": 6.17911372792067e-06, "loss": 1.0413, "step": 13958 }, { "epoch": 0.061795564212669886, "grad_norm": 3.07480804464943, "learning_rate": 6.179556421266989e-06, "loss": 1.0176, "step": 13959 }, { "epoch": 0.061799991146133076, "grad_norm": 2.6855663109977255, "learning_rate": 6.179999114613308e-06, "loss": 0.966, "step": 13960 }, { "epoch": 0.06180441807959627, "grad_norm": 2.355723543327527, "learning_rate": 6.180441807959627e-06, "loss": 0.6375, "step": 13961 }, { "epoch": 0.06180884501305945, "grad_norm": 3.0542966805934757, "learning_rate": 6.180884501305946e-06, "loss": 0.9815, "step": 13962 }, { "epoch": 0.06181327194652264, "grad_norm": 2.4830188988298514, "learning_rate": 6.181327194652264e-06, "loss": 0.7497, "step": 13963 }, { "epoch": 0.06181769887998583, "grad_norm": 2.3906763381242397, "learning_rate": 6.1817698879985845e-06, "loss": 0.5409, "step": 13964 }, { "epoch": 0.06182212581344902, "grad_norm": 2.5485796394007636, "learning_rate": 6.182212581344903e-06, "loss": 0.9299, "step": 13965 }, { "epoch": 0.06182655274691221, "grad_norm": 2.7684227222198303, "learning_rate": 6.1826552746912215e-06, "loss": 0.7586, "step": 13966 }, { "epoch": 0.0618309796803754, "grad_norm": 2.3501983028894475, "learning_rate": 6.183097968037542e-06, "loss": 0.6195, "step": 13967 }, { "epoch": 0.061835406613838594, "grad_norm": 3.032405625541217, "learning_rate": 6.18354066138386e-06, "loss": 1.0276, "step": 13968 }, { "epoch": 0.061839833547301784, "grad_norm": 2.8392083149967418, "learning_rate": 6.183983354730179e-06, "loss": 0.6257, "step": 13969 }, { "epoch": 0.061844260480764975, "grad_norm": 2.3466953030809896, "learning_rate": 6.184426048076498e-06, "loss": 0.7684, "step": 13970 }, { "epoch": 0.061848687414228165, "grad_norm": 3.3623240157550214, "learning_rate": 6.184868741422817e-06, "loss": 0.9685, "step": 13971 }, { "epoch": 0.061853114347691356, "grad_norm": 2.706045600049378, "learning_rate": 6.185311434769136e-06, "loss": 0.8129, "step": 13972 }, { "epoch": 0.061857541281154546, "grad_norm": 2.4091515611931342, "learning_rate": 6.1857541281154554e-06, "loss": 0.5508, "step": 13973 }, { "epoch": 0.06186196821461774, "grad_norm": 2.8007612654109444, "learning_rate": 6.186196821461774e-06, "loss": 0.5663, "step": 13974 }, { "epoch": 0.06186639514808093, "grad_norm": 2.803184617616208, "learning_rate": 6.1866395148080925e-06, "loss": 0.8734, "step": 13975 }, { "epoch": 0.06187082208154412, "grad_norm": 2.62609901248877, "learning_rate": 6.187082208154413e-06, "loss": 0.4876, "step": 13976 }, { "epoch": 0.0618752490150073, "grad_norm": 2.952971844292531, "learning_rate": 6.187524901500731e-06, "loss": 0.6775, "step": 13977 }, { "epoch": 0.06187967594847049, "grad_norm": 2.980469012155038, "learning_rate": 6.18796759484705e-06, "loss": 0.8957, "step": 13978 }, { "epoch": 0.06188410288193368, "grad_norm": 4.125325401017998, "learning_rate": 6.188410288193369e-06, "loss": 1.2577, "step": 13979 }, { "epoch": 0.06188852981539687, "grad_norm": 2.5509346515258993, "learning_rate": 6.188852981539688e-06, "loss": 0.8866, "step": 13980 }, { "epoch": 0.06189295674886006, "grad_norm": 2.6428403796984195, "learning_rate": 6.189295674886007e-06, "loss": 0.563, "step": 13981 }, { "epoch": 0.061897383682323254, "grad_norm": 2.9754296303943324, "learning_rate": 6.189738368232326e-06, "loss": 0.5761, "step": 13982 }, { "epoch": 0.061901810615786444, "grad_norm": 3.7912665780184303, "learning_rate": 6.190181061578645e-06, "loss": 0.8797, "step": 13983 }, { "epoch": 0.061906237549249635, "grad_norm": 2.3391991688855738, "learning_rate": 6.190623754924963e-06, "loss": 0.6528, "step": 13984 }, { "epoch": 0.061910664482712825, "grad_norm": 3.115667787894782, "learning_rate": 6.191066448271284e-06, "loss": 0.7829, "step": 13985 }, { "epoch": 0.061915091416176016, "grad_norm": 2.2992436666486853, "learning_rate": 6.191509141617602e-06, "loss": 0.5961, "step": 13986 }, { "epoch": 0.061919518349639206, "grad_norm": 2.9682665882810326, "learning_rate": 6.191951834963921e-06, "loss": 0.894, "step": 13987 }, { "epoch": 0.0619239452831024, "grad_norm": 2.8894759798101495, "learning_rate": 6.19239452831024e-06, "loss": 0.9218, "step": 13988 }, { "epoch": 0.06192837221656559, "grad_norm": 2.470191451906966, "learning_rate": 6.1928372216565594e-06, "loss": 0.7282, "step": 13989 }, { "epoch": 0.06193279915002878, "grad_norm": 2.4867519312651654, "learning_rate": 6.193279915002878e-06, "loss": 0.7044, "step": 13990 }, { "epoch": 0.06193722608349197, "grad_norm": 3.2054088308749225, "learning_rate": 6.193722608349197e-06, "loss": 1.0932, "step": 13991 }, { "epoch": 0.06194165301695515, "grad_norm": 2.7802068584752573, "learning_rate": 6.194165301695516e-06, "loss": 0.6253, "step": 13992 }, { "epoch": 0.06194607995041834, "grad_norm": 3.0046872485835445, "learning_rate": 6.194607995041834e-06, "loss": 0.9159, "step": 13993 }, { "epoch": 0.06195050688388153, "grad_norm": 2.9812265784396317, "learning_rate": 6.195050688388155e-06, "loss": 0.8766, "step": 13994 }, { "epoch": 0.06195493381734472, "grad_norm": 2.951371802508567, "learning_rate": 6.195493381734473e-06, "loss": 0.9737, "step": 13995 }, { "epoch": 0.061959360750807914, "grad_norm": 2.8336455514006147, "learning_rate": 6.195936075080792e-06, "loss": 0.6903, "step": 13996 }, { "epoch": 0.061963787684271104, "grad_norm": 2.4572103715969993, "learning_rate": 6.196378768427112e-06, "loss": 0.8241, "step": 13997 }, { "epoch": 0.061968214617734295, "grad_norm": 2.415583530526315, "learning_rate": 6.19682146177343e-06, "loss": 0.7165, "step": 13998 }, { "epoch": 0.061972641551197485, "grad_norm": 2.5435358209368024, "learning_rate": 6.197264155119749e-06, "loss": 0.5969, "step": 13999 }, { "epoch": 0.061977068484660676, "grad_norm": 2.328280975459141, "learning_rate": 6.197706848466068e-06, "loss": 0.6103, "step": 14000 }, { "epoch": 0.061981495418123866, "grad_norm": 2.767388979971041, "learning_rate": 6.198149541812387e-06, "loss": 0.7235, "step": 14001 }, { "epoch": 0.06198592235158706, "grad_norm": 2.714588463483862, "learning_rate": 6.198592235158706e-06, "loss": 0.8099, "step": 14002 }, { "epoch": 0.06199034928505025, "grad_norm": 2.95135259439065, "learning_rate": 6.1990349285050255e-06, "loss": 0.5432, "step": 14003 }, { "epoch": 0.06199477621851344, "grad_norm": 2.3088664380964703, "learning_rate": 6.199477621851344e-06, "loss": 0.628, "step": 14004 }, { "epoch": 0.06199920315197663, "grad_norm": 4.030677024817774, "learning_rate": 6.199920315197663e-06, "loss": 0.8518, "step": 14005 }, { "epoch": 0.06200363008543982, "grad_norm": 2.7976669970103645, "learning_rate": 6.200363008543983e-06, "loss": 0.9615, "step": 14006 }, { "epoch": 0.062008057018903, "grad_norm": 2.446519912204384, "learning_rate": 6.200805701890301e-06, "loss": 0.6209, "step": 14007 }, { "epoch": 0.06201248395236619, "grad_norm": 2.748095411382277, "learning_rate": 6.20124839523662e-06, "loss": 0.7658, "step": 14008 }, { "epoch": 0.062016910885829384, "grad_norm": 2.9214885472844156, "learning_rate": 6.201691088582939e-06, "loss": 0.8596, "step": 14009 }, { "epoch": 0.062021337819292574, "grad_norm": 2.5715338076760617, "learning_rate": 6.202133781929259e-06, "loss": 0.7676, "step": 14010 }, { "epoch": 0.062025764752755765, "grad_norm": 3.129138110352358, "learning_rate": 6.202576475275577e-06, "loss": 0.7914, "step": 14011 }, { "epoch": 0.062030191686218955, "grad_norm": 2.4450606649989335, "learning_rate": 6.2030191686218965e-06, "loss": 0.5754, "step": 14012 }, { "epoch": 0.062034618619682146, "grad_norm": 2.2570688054684065, "learning_rate": 6.203461861968215e-06, "loss": 0.6275, "step": 14013 }, { "epoch": 0.062039045553145336, "grad_norm": 2.764373937279571, "learning_rate": 6.2039045553145335e-06, "loss": 0.5844, "step": 14014 }, { "epoch": 0.06204347248660853, "grad_norm": 2.4377690817525868, "learning_rate": 6.204347248660854e-06, "loss": 0.6823, "step": 14015 }, { "epoch": 0.06204789942007172, "grad_norm": 2.6320140015339692, "learning_rate": 6.204789942007172e-06, "loss": 0.785, "step": 14016 }, { "epoch": 0.06205232635353491, "grad_norm": 2.2472735688342382, "learning_rate": 6.205232635353491e-06, "loss": 0.659, "step": 14017 }, { "epoch": 0.0620567532869981, "grad_norm": 2.6604134286163457, "learning_rate": 6.205675328699811e-06, "loss": 0.5315, "step": 14018 }, { "epoch": 0.06206118022046129, "grad_norm": 2.8274948458371507, "learning_rate": 6.2061180220461295e-06, "loss": 0.8231, "step": 14019 }, { "epoch": 0.06206560715392448, "grad_norm": 2.729688494138188, "learning_rate": 6.206560715392448e-06, "loss": 0.9136, "step": 14020 }, { "epoch": 0.06207003408738767, "grad_norm": 2.1649409498989303, "learning_rate": 6.2070034087387674e-06, "loss": 0.4687, "step": 14021 }, { "epoch": 0.06207446102085085, "grad_norm": 2.7596664461946716, "learning_rate": 6.207446102085086e-06, "loss": 0.7885, "step": 14022 }, { "epoch": 0.062078887954314044, "grad_norm": 2.916397651717068, "learning_rate": 6.2078887954314045e-06, "loss": 0.8046, "step": 14023 }, { "epoch": 0.062083314887777234, "grad_norm": 3.123830646871466, "learning_rate": 6.208331488777725e-06, "loss": 0.9916, "step": 14024 }, { "epoch": 0.062087741821240425, "grad_norm": 3.2146490126032687, "learning_rate": 6.208774182124043e-06, "loss": 0.9171, "step": 14025 }, { "epoch": 0.062092168754703615, "grad_norm": 2.390358287359966, "learning_rate": 6.209216875470362e-06, "loss": 0.4734, "step": 14026 }, { "epoch": 0.062096595688166806, "grad_norm": 2.424095989937991, "learning_rate": 6.209659568816682e-06, "loss": 0.7737, "step": 14027 }, { "epoch": 0.062101022621629996, "grad_norm": 2.873248055910205, "learning_rate": 6.2101022621630005e-06, "loss": 0.4848, "step": 14028 }, { "epoch": 0.06210544955509319, "grad_norm": 2.369098898027907, "learning_rate": 6.210544955509319e-06, "loss": 0.5929, "step": 14029 }, { "epoch": 0.06210987648855638, "grad_norm": 3.1846687155569686, "learning_rate": 6.210987648855638e-06, "loss": 0.7099, "step": 14030 }, { "epoch": 0.06211430342201957, "grad_norm": 2.5835504773962024, "learning_rate": 6.211430342201957e-06, "loss": 0.8557, "step": 14031 }, { "epoch": 0.06211873035548276, "grad_norm": 2.2740345168934186, "learning_rate": 6.211873035548276e-06, "loss": 0.5906, "step": 14032 }, { "epoch": 0.06212315728894595, "grad_norm": 2.4672995984941952, "learning_rate": 6.212315728894596e-06, "loss": 0.821, "step": 14033 }, { "epoch": 0.06212758422240914, "grad_norm": 3.095150966754808, "learning_rate": 6.212758422240914e-06, "loss": 0.6609, "step": 14034 }, { "epoch": 0.06213201115587233, "grad_norm": 3.3963958570960955, "learning_rate": 6.213201115587233e-06, "loss": 1.1271, "step": 14035 }, { "epoch": 0.06213643808933552, "grad_norm": 3.1962763945272306, "learning_rate": 6.213643808933553e-06, "loss": 0.7804, "step": 14036 }, { "epoch": 0.062140865022798704, "grad_norm": 2.3702036212839825, "learning_rate": 6.2140865022798714e-06, "loss": 0.6236, "step": 14037 }, { "epoch": 0.062145291956261894, "grad_norm": 3.0442238815927634, "learning_rate": 6.21452919562619e-06, "loss": 0.4788, "step": 14038 }, { "epoch": 0.062149718889725085, "grad_norm": 2.827024679239491, "learning_rate": 6.214971888972509e-06, "loss": 0.6285, "step": 14039 }, { "epoch": 0.062154145823188275, "grad_norm": 2.7069886972502504, "learning_rate": 6.215414582318829e-06, "loss": 0.7355, "step": 14040 }, { "epoch": 0.062158572756651466, "grad_norm": 2.868585494471468, "learning_rate": 6.215857275665147e-06, "loss": 0.9314, "step": 14041 }, { "epoch": 0.062162999690114656, "grad_norm": 2.4865054973634093, "learning_rate": 6.216299969011467e-06, "loss": 0.7184, "step": 14042 }, { "epoch": 0.06216742662357785, "grad_norm": 2.592692204931318, "learning_rate": 6.216742662357785e-06, "loss": 0.5419, "step": 14043 }, { "epoch": 0.06217185355704104, "grad_norm": 2.4965636279611982, "learning_rate": 6.217185355704104e-06, "loss": 0.6865, "step": 14044 }, { "epoch": 0.06217628049050423, "grad_norm": 2.5823979115641595, "learning_rate": 6.217628049050424e-06, "loss": 0.6306, "step": 14045 }, { "epoch": 0.06218070742396742, "grad_norm": 2.402423809710155, "learning_rate": 6.218070742396742e-06, "loss": 0.7322, "step": 14046 }, { "epoch": 0.06218513435743061, "grad_norm": 2.370875969391694, "learning_rate": 6.218513435743061e-06, "loss": 0.6673, "step": 14047 }, { "epoch": 0.0621895612908938, "grad_norm": 4.1429629472911955, "learning_rate": 6.218956129089381e-06, "loss": 1.0166, "step": 14048 }, { "epoch": 0.06219398822435699, "grad_norm": 2.3979012344209227, "learning_rate": 6.2193988224357e-06, "loss": 0.6624, "step": 14049 }, { "epoch": 0.06219841515782018, "grad_norm": 2.7493578866035424, "learning_rate": 6.219841515782018e-06, "loss": 0.8745, "step": 14050 }, { "epoch": 0.06220284209128337, "grad_norm": 3.0888676131401316, "learning_rate": 6.2202842091283375e-06, "loss": 0.5871, "step": 14051 }, { "epoch": 0.06220726902474656, "grad_norm": 2.332339465770151, "learning_rate": 6.220726902474656e-06, "loss": 0.4059, "step": 14052 }, { "epoch": 0.062211695958209745, "grad_norm": 2.3263172153878253, "learning_rate": 6.2211695958209754e-06, "loss": 0.7351, "step": 14053 }, { "epoch": 0.062216122891672936, "grad_norm": 2.4005986079633574, "learning_rate": 6.221612289167295e-06, "loss": 0.7394, "step": 14054 }, { "epoch": 0.062220549825136126, "grad_norm": 2.751915783167234, "learning_rate": 6.222054982513613e-06, "loss": 0.7863, "step": 14055 }, { "epoch": 0.06222497675859932, "grad_norm": 2.755136374254148, "learning_rate": 6.222497675859932e-06, "loss": 0.6047, "step": 14056 }, { "epoch": 0.06222940369206251, "grad_norm": 3.734866517223695, "learning_rate": 6.222940369206252e-06, "loss": 1.2425, "step": 14057 }, { "epoch": 0.0622338306255257, "grad_norm": 2.374866740197204, "learning_rate": 6.223383062552571e-06, "loss": 0.5283, "step": 14058 }, { "epoch": 0.06223825755898889, "grad_norm": 2.6206994166375166, "learning_rate": 6.223825755898889e-06, "loss": 0.8382, "step": 14059 }, { "epoch": 0.06224268449245208, "grad_norm": 2.881862357595767, "learning_rate": 6.2242684492452085e-06, "loss": 0.8949, "step": 14060 }, { "epoch": 0.06224711142591527, "grad_norm": 2.5873820046426808, "learning_rate": 6.224711142591527e-06, "loss": 0.7029, "step": 14061 }, { "epoch": 0.06225153835937846, "grad_norm": 2.554659622878517, "learning_rate": 6.225153835937846e-06, "loss": 0.6222, "step": 14062 }, { "epoch": 0.06225596529284165, "grad_norm": 2.3161025439494747, "learning_rate": 6.225596529284166e-06, "loss": 0.7139, "step": 14063 }, { "epoch": 0.06226039222630484, "grad_norm": 2.552179448784709, "learning_rate": 6.226039222630484e-06, "loss": 0.8192, "step": 14064 }, { "epoch": 0.06226481915976803, "grad_norm": 2.6630102852350443, "learning_rate": 6.226481915976803e-06, "loss": 0.6882, "step": 14065 }, { "epoch": 0.06226924609323122, "grad_norm": 3.8565479376061003, "learning_rate": 6.226924609323123e-06, "loss": 0.9811, "step": 14066 }, { "epoch": 0.06227367302669441, "grad_norm": 3.4226644222137765, "learning_rate": 6.2273673026694415e-06, "loss": 0.9439, "step": 14067 }, { "epoch": 0.062278099960157596, "grad_norm": 2.470538788787228, "learning_rate": 6.22780999601576e-06, "loss": 0.7398, "step": 14068 }, { "epoch": 0.062282526893620786, "grad_norm": 2.533995462707552, "learning_rate": 6.2282526893620794e-06, "loss": 0.7359, "step": 14069 }, { "epoch": 0.06228695382708398, "grad_norm": 3.134998065599029, "learning_rate": 6.228695382708399e-06, "loss": 0.8802, "step": 14070 }, { "epoch": 0.06229138076054717, "grad_norm": 2.362872764343054, "learning_rate": 6.229138076054717e-06, "loss": 0.6389, "step": 14071 }, { "epoch": 0.06229580769401036, "grad_norm": 2.7567843126053773, "learning_rate": 6.229580769401037e-06, "loss": 0.9797, "step": 14072 }, { "epoch": 0.06230023462747355, "grad_norm": 3.606109813307412, "learning_rate": 6.230023462747355e-06, "loss": 1.255, "step": 14073 }, { "epoch": 0.06230466156093674, "grad_norm": 2.3998439397716282, "learning_rate": 6.230466156093674e-06, "loss": 0.6903, "step": 14074 }, { "epoch": 0.06230908849439993, "grad_norm": 2.8781672790026755, "learning_rate": 6.230908849439994e-06, "loss": 0.7878, "step": 14075 }, { "epoch": 0.06231351542786312, "grad_norm": 2.5387476011096526, "learning_rate": 6.2313515427863125e-06, "loss": 0.561, "step": 14076 }, { "epoch": 0.06231794236132631, "grad_norm": 2.521740676795625, "learning_rate": 6.231794236132631e-06, "loss": 0.5628, "step": 14077 }, { "epoch": 0.0623223692947895, "grad_norm": 2.373783741053776, "learning_rate": 6.232236929478951e-06, "loss": 0.5155, "step": 14078 }, { "epoch": 0.06232679622825269, "grad_norm": 3.1051577155955403, "learning_rate": 6.23267962282527e-06, "loss": 0.6369, "step": 14079 }, { "epoch": 0.06233122316171588, "grad_norm": 2.6812169889350317, "learning_rate": 6.233122316171588e-06, "loss": 0.8566, "step": 14080 }, { "epoch": 0.06233565009517907, "grad_norm": 2.53162887455845, "learning_rate": 6.233565009517908e-06, "loss": 0.9039, "step": 14081 }, { "epoch": 0.06234007702864226, "grad_norm": 3.420682532993694, "learning_rate": 6.234007702864226e-06, "loss": 1.0281, "step": 14082 }, { "epoch": 0.062344503962105446, "grad_norm": 2.717646807117781, "learning_rate": 6.2344503962105455e-06, "loss": 0.7694, "step": 14083 }, { "epoch": 0.06234893089556864, "grad_norm": 2.4027263705577604, "learning_rate": 6.234893089556865e-06, "loss": 0.9678, "step": 14084 }, { "epoch": 0.06235335782903183, "grad_norm": 2.92711856138377, "learning_rate": 6.2353357829031834e-06, "loss": 0.9645, "step": 14085 }, { "epoch": 0.06235778476249502, "grad_norm": 2.8112401517194283, "learning_rate": 6.235778476249502e-06, "loss": 0.8543, "step": 14086 }, { "epoch": 0.06236221169595821, "grad_norm": 3.0168459288523355, "learning_rate": 6.236221169595822e-06, "loss": 0.9233, "step": 14087 }, { "epoch": 0.0623666386294214, "grad_norm": 3.233931933505042, "learning_rate": 6.236663862942141e-06, "loss": 0.9815, "step": 14088 }, { "epoch": 0.06237106556288459, "grad_norm": 2.632787175608604, "learning_rate": 6.237106556288459e-06, "loss": 0.5787, "step": 14089 }, { "epoch": 0.06237549249634778, "grad_norm": 2.4270164489881156, "learning_rate": 6.237549249634779e-06, "loss": 0.7471, "step": 14090 }, { "epoch": 0.06237991942981097, "grad_norm": 2.5057226344552577, "learning_rate": 6.237991942981098e-06, "loss": 0.6699, "step": 14091 }, { "epoch": 0.06238434636327416, "grad_norm": 2.4088220048443865, "learning_rate": 6.2384346363274165e-06, "loss": 0.7809, "step": 14092 }, { "epoch": 0.06238877329673735, "grad_norm": 3.1871048176728607, "learning_rate": 6.238877329673736e-06, "loss": 0.7394, "step": 14093 }, { "epoch": 0.06239320023020054, "grad_norm": 3.0083728492269555, "learning_rate": 6.239320023020054e-06, "loss": 0.9114, "step": 14094 }, { "epoch": 0.06239762716366373, "grad_norm": 2.848024340893319, "learning_rate": 6.239762716366373e-06, "loss": 0.6122, "step": 14095 }, { "epoch": 0.06240205409712692, "grad_norm": 3.0813620890730387, "learning_rate": 6.240205409712693e-06, "loss": 0.79, "step": 14096 }, { "epoch": 0.062406481030590114, "grad_norm": 3.8504557304963964, "learning_rate": 6.240648103059012e-06, "loss": 1.2215, "step": 14097 }, { "epoch": 0.0624109079640533, "grad_norm": 2.2433659477255627, "learning_rate": 6.24109079640533e-06, "loss": 0.6406, "step": 14098 }, { "epoch": 0.06241533489751649, "grad_norm": 3.267373946512666, "learning_rate": 6.24153348975165e-06, "loss": 1.0181, "step": 14099 }, { "epoch": 0.06241976183097968, "grad_norm": 2.5558918647109734, "learning_rate": 6.241976183097969e-06, "loss": 0.8081, "step": 14100 }, { "epoch": 0.06242418876444287, "grad_norm": 2.4397125013963867, "learning_rate": 6.2424188764442874e-06, "loss": 0.6404, "step": 14101 }, { "epoch": 0.06242861569790606, "grad_norm": 2.313259250866978, "learning_rate": 6.242861569790607e-06, "loss": 0.4503, "step": 14102 }, { "epoch": 0.06243304263136925, "grad_norm": 2.695421116315612, "learning_rate": 6.243304263136925e-06, "loss": 0.8462, "step": 14103 }, { "epoch": 0.06243746956483244, "grad_norm": 2.20979590049296, "learning_rate": 6.243746956483244e-06, "loss": 0.6742, "step": 14104 }, { "epoch": 0.06244189649829563, "grad_norm": 2.724957984454666, "learning_rate": 6.244189649829564e-06, "loss": 0.7884, "step": 14105 }, { "epoch": 0.06244632343175882, "grad_norm": 2.7162715704261937, "learning_rate": 6.244632343175883e-06, "loss": 0.8031, "step": 14106 }, { "epoch": 0.06245075036522201, "grad_norm": 2.6660391431229833, "learning_rate": 6.245075036522201e-06, "loss": 0.7896, "step": 14107 }, { "epoch": 0.0624551772986852, "grad_norm": 2.9644667345493034, "learning_rate": 6.245517729868521e-06, "loss": 0.808, "step": 14108 }, { "epoch": 0.06245960423214839, "grad_norm": 2.5689294439840586, "learning_rate": 6.24596042321484e-06, "loss": 0.7569, "step": 14109 }, { "epoch": 0.06246403116561158, "grad_norm": 2.8680146628956527, "learning_rate": 6.246403116561158e-06, "loss": 0.9036, "step": 14110 }, { "epoch": 0.062468458099074774, "grad_norm": 3.2499898804517247, "learning_rate": 6.246845809907478e-06, "loss": 0.9649, "step": 14111 }, { "epoch": 0.062472885032537964, "grad_norm": 2.492053823223272, "learning_rate": 6.247288503253796e-06, "loss": 0.6899, "step": 14112 }, { "epoch": 0.06247731196600115, "grad_norm": 2.13044477106532, "learning_rate": 6.247731196600116e-06, "loss": 0.5393, "step": 14113 }, { "epoch": 0.06248173889946434, "grad_norm": 2.291645762567151, "learning_rate": 6.248173889946435e-06, "loss": 0.6214, "step": 14114 }, { "epoch": 0.06248616583292753, "grad_norm": 2.5359896044439805, "learning_rate": 6.2486165832927535e-06, "loss": 0.7549, "step": 14115 }, { "epoch": 0.06249059276639072, "grad_norm": 2.285658632948928, "learning_rate": 6.249059276639072e-06, "loss": 0.5866, "step": 14116 }, { "epoch": 0.06249501969985391, "grad_norm": 2.3307118352268863, "learning_rate": 6.249501969985392e-06, "loss": 0.6715, "step": 14117 }, { "epoch": 0.0624994466333171, "grad_norm": 2.7814316146993385, "learning_rate": 6.249944663331711e-06, "loss": 0.7349, "step": 14118 }, { "epoch": 0.06250387356678029, "grad_norm": 2.9809792286575605, "learning_rate": 6.250387356678029e-06, "loss": 0.6888, "step": 14119 }, { "epoch": 0.06250830050024349, "grad_norm": 2.6141311789928774, "learning_rate": 6.250830050024349e-06, "loss": 0.8826, "step": 14120 }, { "epoch": 0.06251272743370667, "grad_norm": 2.952867748745803, "learning_rate": 6.251272743370668e-06, "loss": 0.6517, "step": 14121 }, { "epoch": 0.06251715436716986, "grad_norm": 2.600379415789009, "learning_rate": 6.251715436716987e-06, "loss": 0.6693, "step": 14122 }, { "epoch": 0.06252158130063305, "grad_norm": 2.5431389318179747, "learning_rate": 6.252158130063306e-06, "loss": 0.6539, "step": 14123 }, { "epoch": 0.06252600823409624, "grad_norm": 2.9468267300267725, "learning_rate": 6.2526008234096245e-06, "loss": 0.9365, "step": 14124 }, { "epoch": 0.06253043516755943, "grad_norm": 3.538436287646793, "learning_rate": 6.253043516755943e-06, "loss": 0.8942, "step": 14125 }, { "epoch": 0.06253486210102262, "grad_norm": 2.661109368723797, "learning_rate": 6.253486210102263e-06, "loss": 0.8725, "step": 14126 }, { "epoch": 0.06253928903448581, "grad_norm": 2.2797410831586338, "learning_rate": 6.253928903448582e-06, "loss": 0.5787, "step": 14127 }, { "epoch": 0.062543715967949, "grad_norm": 3.495786206160044, "learning_rate": 6.2543715967949e-06, "loss": 1.1042, "step": 14128 }, { "epoch": 0.0625481429014122, "grad_norm": 2.379438016200158, "learning_rate": 6.2548142901412205e-06, "loss": 0.7395, "step": 14129 }, { "epoch": 0.06255256983487538, "grad_norm": 2.7472095998833077, "learning_rate": 6.255256983487539e-06, "loss": 0.842, "step": 14130 }, { "epoch": 0.06255699676833858, "grad_norm": 3.2390261457740444, "learning_rate": 6.2556996768338575e-06, "loss": 1.0873, "step": 14131 }, { "epoch": 0.06256142370180176, "grad_norm": 3.5311833613440657, "learning_rate": 6.256142370180177e-06, "loss": 1.0276, "step": 14132 }, { "epoch": 0.06256585063526496, "grad_norm": 2.4446651564565336, "learning_rate": 6.2565850635264954e-06, "loss": 0.6162, "step": 14133 }, { "epoch": 0.06257027756872814, "grad_norm": 2.854913052672476, "learning_rate": 6.257027756872815e-06, "loss": 0.9272, "step": 14134 }, { "epoch": 0.06257470450219134, "grad_norm": 2.9094062985706706, "learning_rate": 6.257470450219134e-06, "loss": 0.9504, "step": 14135 }, { "epoch": 0.06257913143565452, "grad_norm": 2.9660822112049314, "learning_rate": 6.257913143565453e-06, "loss": 0.81, "step": 14136 }, { "epoch": 0.0625835583691177, "grad_norm": 2.4646160728863675, "learning_rate": 6.258355836911771e-06, "loss": 0.786, "step": 14137 }, { "epoch": 0.0625879853025809, "grad_norm": 2.5526774749087893, "learning_rate": 6.2587985302580914e-06, "loss": 0.6624, "step": 14138 }, { "epoch": 0.06259241223604409, "grad_norm": 2.7106018990026612, "learning_rate": 6.25924122360441e-06, "loss": 0.7704, "step": 14139 }, { "epoch": 0.06259683916950728, "grad_norm": 2.690484968558263, "learning_rate": 6.2596839169507285e-06, "loss": 0.9104, "step": 14140 }, { "epoch": 0.06260126610297047, "grad_norm": 2.4700394958083893, "learning_rate": 6.260126610297048e-06, "loss": 0.5308, "step": 14141 }, { "epoch": 0.06260569303643367, "grad_norm": 3.4190903939013437, "learning_rate": 6.260569303643366e-06, "loss": 0.8777, "step": 14142 }, { "epoch": 0.06261011996989685, "grad_norm": 2.328035422435149, "learning_rate": 6.261011996989686e-06, "loss": 0.5893, "step": 14143 }, { "epoch": 0.06261454690336005, "grad_norm": 3.1989475486335572, "learning_rate": 6.261454690336005e-06, "loss": 0.9027, "step": 14144 }, { "epoch": 0.06261897383682323, "grad_norm": 2.72391291224602, "learning_rate": 6.261897383682324e-06, "loss": 0.6927, "step": 14145 }, { "epoch": 0.06262340077028643, "grad_norm": 3.388697724429754, "learning_rate": 6.262340077028642e-06, "loss": 1.0358, "step": 14146 }, { "epoch": 0.06262782770374961, "grad_norm": 3.0877773271604263, "learning_rate": 6.262782770374962e-06, "loss": 0.8529, "step": 14147 }, { "epoch": 0.06263225463721281, "grad_norm": 3.2164953705246253, "learning_rate": 6.263225463721281e-06, "loss": 0.8058, "step": 14148 }, { "epoch": 0.06263668157067599, "grad_norm": 2.8496475020208494, "learning_rate": 6.2636681570675994e-06, "loss": 0.8608, "step": 14149 }, { "epoch": 0.06264110850413919, "grad_norm": 3.8998540648443742, "learning_rate": 6.264110850413919e-06, "loss": 1.1275, "step": 14150 }, { "epoch": 0.06264553543760237, "grad_norm": 2.6724280103203726, "learning_rate": 6.264553543760238e-06, "loss": 0.5453, "step": 14151 }, { "epoch": 0.06264996237106556, "grad_norm": 2.501704211478448, "learning_rate": 6.264996237106557e-06, "loss": 0.5988, "step": 14152 }, { "epoch": 0.06265438930452875, "grad_norm": 2.7170121453185865, "learning_rate": 6.265438930452876e-06, "loss": 0.5696, "step": 14153 }, { "epoch": 0.06265881623799194, "grad_norm": 2.801271176189139, "learning_rate": 6.265881623799195e-06, "loss": 0.7982, "step": 14154 }, { "epoch": 0.06266324317145514, "grad_norm": 3.8068528928611953, "learning_rate": 6.266324317145513e-06, "loss": 0.9167, "step": 14155 }, { "epoch": 0.06266767010491832, "grad_norm": 2.870948903353325, "learning_rate": 6.266767010491833e-06, "loss": 0.6426, "step": 14156 }, { "epoch": 0.06267209703838152, "grad_norm": 2.456286152686275, "learning_rate": 6.267209703838152e-06, "loss": 0.7048, "step": 14157 }, { "epoch": 0.0626765239718447, "grad_norm": 2.5584061188586436, "learning_rate": 6.26765239718447e-06, "loss": 0.7153, "step": 14158 }, { "epoch": 0.0626809509053079, "grad_norm": 2.833752257043871, "learning_rate": 6.268095090530791e-06, "loss": 0.771, "step": 14159 }, { "epoch": 0.06268537783877108, "grad_norm": 3.6016927012596223, "learning_rate": 6.268537783877109e-06, "loss": 0.8768, "step": 14160 }, { "epoch": 0.06268980477223428, "grad_norm": 2.547434944791729, "learning_rate": 6.268980477223428e-06, "loss": 0.4956, "step": 14161 }, { "epoch": 0.06269423170569746, "grad_norm": 2.796777130007948, "learning_rate": 6.269423170569747e-06, "loss": 0.7531, "step": 14162 }, { "epoch": 0.06269865863916066, "grad_norm": 3.9035500705897186, "learning_rate": 6.2698658639160655e-06, "loss": 1.3696, "step": 14163 }, { "epoch": 0.06270308557262384, "grad_norm": 2.552222209612584, "learning_rate": 6.270308557262385e-06, "loss": 0.7471, "step": 14164 }, { "epoch": 0.06270751250608704, "grad_norm": 2.7000782002320936, "learning_rate": 6.270751250608704e-06, "loss": 0.7893, "step": 14165 }, { "epoch": 0.06271193943955022, "grad_norm": 2.23195669458854, "learning_rate": 6.271193943955023e-06, "loss": 0.5776, "step": 14166 }, { "epoch": 0.06271636637301341, "grad_norm": 3.2082462736409805, "learning_rate": 6.271636637301341e-06, "loss": 0.7669, "step": 14167 }, { "epoch": 0.0627207933064766, "grad_norm": 2.6985176743416313, "learning_rate": 6.2720793306476615e-06, "loss": 0.6356, "step": 14168 }, { "epoch": 0.06272522023993979, "grad_norm": 2.809550325922852, "learning_rate": 6.27252202399398e-06, "loss": 0.6661, "step": 14169 }, { "epoch": 0.06272964717340299, "grad_norm": 3.2937040891559595, "learning_rate": 6.272964717340299e-06, "loss": 0.7106, "step": 14170 }, { "epoch": 0.06273407410686617, "grad_norm": 3.8456088969376756, "learning_rate": 6.273407410686618e-06, "loss": 1.3042, "step": 14171 }, { "epoch": 0.06273850104032937, "grad_norm": 2.7185012504675288, "learning_rate": 6.273850104032937e-06, "loss": 0.844, "step": 14172 }, { "epoch": 0.06274292797379255, "grad_norm": 3.7878646844982735, "learning_rate": 6.274292797379256e-06, "loss": 0.8777, "step": 14173 }, { "epoch": 0.06274735490725575, "grad_norm": 3.464124449234576, "learning_rate": 6.274735490725575e-06, "loss": 1.2346, "step": 14174 }, { "epoch": 0.06275178184071893, "grad_norm": 2.8089877777556858, "learning_rate": 6.275178184071894e-06, "loss": 0.8914, "step": 14175 }, { "epoch": 0.06275620877418213, "grad_norm": 2.5279072982587714, "learning_rate": 6.275620877418212e-06, "loss": 0.6869, "step": 14176 }, { "epoch": 0.06276063570764531, "grad_norm": 3.0384424210380847, "learning_rate": 6.2760635707645325e-06, "loss": 0.96, "step": 14177 }, { "epoch": 0.06276506264110851, "grad_norm": 2.20765487045066, "learning_rate": 6.276506264110851e-06, "loss": 0.5138, "step": 14178 }, { "epoch": 0.0627694895745717, "grad_norm": 3.161233877135923, "learning_rate": 6.2769489574571695e-06, "loss": 0.5272, "step": 14179 }, { "epoch": 0.06277391650803489, "grad_norm": 2.6601735965230775, "learning_rate": 6.27739165080349e-06, "loss": 0.6993, "step": 14180 }, { "epoch": 0.06277834344149807, "grad_norm": 2.8599260293348827, "learning_rate": 6.277834344149808e-06, "loss": 0.9705, "step": 14181 }, { "epoch": 0.06278277037496126, "grad_norm": 2.1709909362340523, "learning_rate": 6.278277037496127e-06, "loss": 0.6781, "step": 14182 }, { "epoch": 0.06278719730842446, "grad_norm": 2.8311147720237924, "learning_rate": 6.278719730842446e-06, "loss": 0.7322, "step": 14183 }, { "epoch": 0.06279162424188764, "grad_norm": 2.5879276887192892, "learning_rate": 6.279162424188765e-06, "loss": 0.8149, "step": 14184 }, { "epoch": 0.06279605117535084, "grad_norm": 2.6706851647185244, "learning_rate": 6.279605117535083e-06, "loss": 0.6132, "step": 14185 }, { "epoch": 0.06280047810881402, "grad_norm": 2.255238902324706, "learning_rate": 6.2800478108814034e-06, "loss": 0.6024, "step": 14186 }, { "epoch": 0.06280490504227722, "grad_norm": 3.4331899342863212, "learning_rate": 6.280490504227722e-06, "loss": 0.8947, "step": 14187 }, { "epoch": 0.0628093319757404, "grad_norm": 2.7703411495657306, "learning_rate": 6.2809331975740405e-06, "loss": 0.6031, "step": 14188 }, { "epoch": 0.0628137589092036, "grad_norm": 2.0070283827407955, "learning_rate": 6.281375890920361e-06, "loss": 0.6431, "step": 14189 }, { "epoch": 0.06281818584266678, "grad_norm": 2.7910308569734203, "learning_rate": 6.281818584266679e-06, "loss": 0.8941, "step": 14190 }, { "epoch": 0.06282261277612998, "grad_norm": 2.848141991573175, "learning_rate": 6.282261277612998e-06, "loss": 0.6626, "step": 14191 }, { "epoch": 0.06282703970959316, "grad_norm": 2.3719238533411757, "learning_rate": 6.282703970959317e-06, "loss": 0.6235, "step": 14192 }, { "epoch": 0.06283146664305636, "grad_norm": 2.8672599269349597, "learning_rate": 6.283146664305636e-06, "loss": 0.6621, "step": 14193 }, { "epoch": 0.06283589357651954, "grad_norm": 2.844475711288314, "learning_rate": 6.283589357651955e-06, "loss": 0.6166, "step": 14194 }, { "epoch": 0.06284032050998274, "grad_norm": 3.383406743593768, "learning_rate": 6.284032050998274e-06, "loss": 1.008, "step": 14195 }, { "epoch": 0.06284474744344593, "grad_norm": 2.2488731108023328, "learning_rate": 6.284474744344593e-06, "loss": 0.7543, "step": 14196 }, { "epoch": 0.06284917437690911, "grad_norm": 3.2924753211558007, "learning_rate": 6.2849174376909114e-06, "loss": 0.373, "step": 14197 }, { "epoch": 0.0628536013103723, "grad_norm": 2.8788457509283996, "learning_rate": 6.285360131037232e-06, "loss": 0.962, "step": 14198 }, { "epoch": 0.06285802824383549, "grad_norm": 2.7985806998988676, "learning_rate": 6.28580282438355e-06, "loss": 0.6876, "step": 14199 }, { "epoch": 0.06286245517729869, "grad_norm": 3.798755194662633, "learning_rate": 6.286245517729869e-06, "loss": 1.1358, "step": 14200 }, { "epoch": 0.06286688211076187, "grad_norm": 2.805919611698544, "learning_rate": 6.286688211076188e-06, "loss": 1.0063, "step": 14201 }, { "epoch": 0.06287130904422507, "grad_norm": 2.7306771697578403, "learning_rate": 6.2871309044225074e-06, "loss": 0.8076, "step": 14202 }, { "epoch": 0.06287573597768825, "grad_norm": 2.3808732433386006, "learning_rate": 6.287573597768826e-06, "loss": 0.6689, "step": 14203 }, { "epoch": 0.06288016291115145, "grad_norm": 2.7480432257636984, "learning_rate": 6.288016291115145e-06, "loss": 0.665, "step": 14204 }, { "epoch": 0.06288458984461463, "grad_norm": 3.200880640837116, "learning_rate": 6.288458984461464e-06, "loss": 0.8846, "step": 14205 }, { "epoch": 0.06288901677807783, "grad_norm": 2.7787245880953493, "learning_rate": 6.288901677807782e-06, "loss": 0.6506, "step": 14206 }, { "epoch": 0.06289344371154101, "grad_norm": 3.075557753730521, "learning_rate": 6.289344371154103e-06, "loss": 1.1082, "step": 14207 }, { "epoch": 0.06289787064500421, "grad_norm": 2.52432764519976, "learning_rate": 6.289787064500421e-06, "loss": 0.6555, "step": 14208 }, { "epoch": 0.0629022975784674, "grad_norm": 2.8319677981927542, "learning_rate": 6.29022975784674e-06, "loss": 0.8638, "step": 14209 }, { "epoch": 0.06290672451193059, "grad_norm": 2.836639456343314, "learning_rate": 6.29067245119306e-06, "loss": 0.8439, "step": 14210 }, { "epoch": 0.06291115144539378, "grad_norm": 3.013748793463138, "learning_rate": 6.291115144539378e-06, "loss": 0.577, "step": 14211 }, { "epoch": 0.06291557837885696, "grad_norm": 2.8377267659849275, "learning_rate": 6.291557837885697e-06, "loss": 0.71, "step": 14212 }, { "epoch": 0.06292000531232016, "grad_norm": 2.3322436648344484, "learning_rate": 6.292000531232016e-06, "loss": 0.7019, "step": 14213 }, { "epoch": 0.06292443224578334, "grad_norm": 2.83702211287354, "learning_rate": 6.292443224578335e-06, "loss": 0.6509, "step": 14214 }, { "epoch": 0.06292885917924654, "grad_norm": 2.2308900008715993, "learning_rate": 6.292885917924654e-06, "loss": 0.4465, "step": 14215 }, { "epoch": 0.06293328611270972, "grad_norm": 2.6256931971261093, "learning_rate": 6.2933286112709735e-06, "loss": 0.5923, "step": 14216 }, { "epoch": 0.06293771304617292, "grad_norm": 2.6842084780225774, "learning_rate": 6.293771304617292e-06, "loss": 0.6835, "step": 14217 }, { "epoch": 0.0629421399796361, "grad_norm": 2.6368071419987014, "learning_rate": 6.294213997963611e-06, "loss": 0.6912, "step": 14218 }, { "epoch": 0.0629465669130993, "grad_norm": 3.778416476827137, "learning_rate": 6.294656691309931e-06, "loss": 1.0892, "step": 14219 }, { "epoch": 0.06295099384656248, "grad_norm": 2.607090981988044, "learning_rate": 6.295099384656249e-06, "loss": 0.6617, "step": 14220 }, { "epoch": 0.06295542078002568, "grad_norm": 2.4870853537129065, "learning_rate": 6.295542078002568e-06, "loss": 0.787, "step": 14221 }, { "epoch": 0.06295984771348886, "grad_norm": 3.8577522523478924, "learning_rate": 6.295984771348887e-06, "loss": 1.3393, "step": 14222 }, { "epoch": 0.06296427464695206, "grad_norm": 2.8184573116195466, "learning_rate": 6.296427464695206e-06, "loss": 0.8899, "step": 14223 }, { "epoch": 0.06296870158041525, "grad_norm": 2.4678866943666393, "learning_rate": 6.296870158041525e-06, "loss": 0.958, "step": 14224 }, { "epoch": 0.06297312851387844, "grad_norm": 2.462820672094401, "learning_rate": 6.2973128513878445e-06, "loss": 0.5275, "step": 14225 }, { "epoch": 0.06297755544734163, "grad_norm": 3.040267502247387, "learning_rate": 6.297755544734163e-06, "loss": 0.7668, "step": 14226 }, { "epoch": 0.06298198238080481, "grad_norm": 3.251756649252236, "learning_rate": 6.2981982380804815e-06, "loss": 1.0927, "step": 14227 }, { "epoch": 0.06298640931426801, "grad_norm": 4.314729835804701, "learning_rate": 6.298640931426802e-06, "loss": 1.3475, "step": 14228 }, { "epoch": 0.06299083624773119, "grad_norm": 3.206091913932741, "learning_rate": 6.29908362477312e-06, "loss": 1.0364, "step": 14229 }, { "epoch": 0.06299526318119439, "grad_norm": 2.6718803158851654, "learning_rate": 6.299526318119439e-06, "loss": 1.0116, "step": 14230 }, { "epoch": 0.06299969011465757, "grad_norm": 2.817626444192976, "learning_rate": 6.299969011465758e-06, "loss": 0.8246, "step": 14231 }, { "epoch": 0.06300411704812077, "grad_norm": 2.4741153587721514, "learning_rate": 6.3004117048120775e-06, "loss": 0.7657, "step": 14232 }, { "epoch": 0.06300854398158395, "grad_norm": 3.235676971059373, "learning_rate": 6.300854398158396e-06, "loss": 1.0579, "step": 14233 }, { "epoch": 0.06301297091504715, "grad_norm": 2.292435889971789, "learning_rate": 6.3012970915047154e-06, "loss": 0.7418, "step": 14234 }, { "epoch": 0.06301739784851033, "grad_norm": 2.615165343894717, "learning_rate": 6.301739784851034e-06, "loss": 0.5736, "step": 14235 }, { "epoch": 0.06302182478197353, "grad_norm": 2.8393356310286046, "learning_rate": 6.3021824781973525e-06, "loss": 0.9121, "step": 14236 }, { "epoch": 0.06302625171543672, "grad_norm": 2.6819090235917993, "learning_rate": 6.302625171543673e-06, "loss": 0.5176, "step": 14237 }, { "epoch": 0.06303067864889991, "grad_norm": 3.380219748640268, "learning_rate": 6.303067864889991e-06, "loss": 0.8615, "step": 14238 }, { "epoch": 0.0630351055823631, "grad_norm": 3.4569639175379305, "learning_rate": 6.30351055823631e-06, "loss": 1.0949, "step": 14239 }, { "epoch": 0.0630395325158263, "grad_norm": 3.071966975543104, "learning_rate": 6.30395325158263e-06, "loss": 0.4426, "step": 14240 }, { "epoch": 0.06304395944928948, "grad_norm": 2.584399825017129, "learning_rate": 6.3043959449289485e-06, "loss": 0.7503, "step": 14241 }, { "epoch": 0.06304838638275266, "grad_norm": 2.7326519100632938, "learning_rate": 6.304838638275267e-06, "loss": 0.5784, "step": 14242 }, { "epoch": 0.06305281331621586, "grad_norm": 2.514652549404249, "learning_rate": 6.305281331621586e-06, "loss": 0.7235, "step": 14243 }, { "epoch": 0.06305724024967904, "grad_norm": 2.87682523586261, "learning_rate": 6.305724024967905e-06, "loss": 0.8678, "step": 14244 }, { "epoch": 0.06306166718314224, "grad_norm": 2.2304675087973584, "learning_rate": 6.306166718314224e-06, "loss": 0.6514, "step": 14245 }, { "epoch": 0.06306609411660542, "grad_norm": 3.2018507470284563, "learning_rate": 6.306609411660544e-06, "loss": 0.93, "step": 14246 }, { "epoch": 0.06307052105006862, "grad_norm": 2.533717002452726, "learning_rate": 6.307052105006862e-06, "loss": 0.6534, "step": 14247 }, { "epoch": 0.0630749479835318, "grad_norm": 3.1171136975348075, "learning_rate": 6.307494798353181e-06, "loss": 0.9562, "step": 14248 }, { "epoch": 0.063079374916995, "grad_norm": 2.2659447151672816, "learning_rate": 6.307937491699501e-06, "loss": 0.5216, "step": 14249 }, { "epoch": 0.06308380185045818, "grad_norm": 2.459333395477118, "learning_rate": 6.3083801850458194e-06, "loss": 0.7137, "step": 14250 }, { "epoch": 0.06308822878392138, "grad_norm": 2.4239503749358087, "learning_rate": 6.308822878392138e-06, "loss": 0.7444, "step": 14251 }, { "epoch": 0.06309265571738457, "grad_norm": 3.3133991326483123, "learning_rate": 6.309265571738457e-06, "loss": 1.0551, "step": 14252 }, { "epoch": 0.06309708265084776, "grad_norm": 2.730466929799894, "learning_rate": 6.309708265084777e-06, "loss": 0.7238, "step": 14253 }, { "epoch": 0.06310150958431095, "grad_norm": 4.09780876567145, "learning_rate": 6.310150958431095e-06, "loss": 1.2197, "step": 14254 }, { "epoch": 0.06310593651777414, "grad_norm": 4.113443778863353, "learning_rate": 6.310593651777415e-06, "loss": 0.4767, "step": 14255 }, { "epoch": 0.06311036345123733, "grad_norm": 2.8000099004651386, "learning_rate": 6.311036345123733e-06, "loss": 0.9185, "step": 14256 }, { "epoch": 0.06311479038470051, "grad_norm": 3.5939873162999394, "learning_rate": 6.311479038470052e-06, "loss": 1.062, "step": 14257 }, { "epoch": 0.06311921731816371, "grad_norm": 2.2754298481650532, "learning_rate": 6.311921731816372e-06, "loss": 0.7862, "step": 14258 }, { "epoch": 0.06312364425162689, "grad_norm": 2.5318975957528695, "learning_rate": 6.31236442516269e-06, "loss": 0.5775, "step": 14259 }, { "epoch": 0.06312807118509009, "grad_norm": 3.6961076771772112, "learning_rate": 6.312807118509009e-06, "loss": 1.0701, "step": 14260 }, { "epoch": 0.06313249811855327, "grad_norm": 2.377845128181629, "learning_rate": 6.313249811855328e-06, "loss": 0.6737, "step": 14261 }, { "epoch": 0.06313692505201647, "grad_norm": 2.3039338317831337, "learning_rate": 6.313692505201648e-06, "loss": 0.4425, "step": 14262 }, { "epoch": 0.06314135198547965, "grad_norm": 2.471376998875917, "learning_rate": 6.314135198547966e-06, "loss": 0.5765, "step": 14263 }, { "epoch": 0.06314577891894285, "grad_norm": 3.01294978363448, "learning_rate": 6.3145778918942855e-06, "loss": 0.4949, "step": 14264 }, { "epoch": 0.06315020585240604, "grad_norm": 3.287703718516605, "learning_rate": 6.315020585240604e-06, "loss": 1.1692, "step": 14265 }, { "epoch": 0.06315463278586923, "grad_norm": 2.6916244013329877, "learning_rate": 6.315463278586923e-06, "loss": 0.7191, "step": 14266 }, { "epoch": 0.06315905971933242, "grad_norm": 2.8018309833411794, "learning_rate": 6.315905971933243e-06, "loss": 0.9825, "step": 14267 }, { "epoch": 0.06316348665279561, "grad_norm": 3.136716484693738, "learning_rate": 6.316348665279561e-06, "loss": 0.6787, "step": 14268 }, { "epoch": 0.0631679135862588, "grad_norm": 2.3734165795492954, "learning_rate": 6.31679135862588e-06, "loss": 0.5382, "step": 14269 }, { "epoch": 0.063172340519722, "grad_norm": 2.5648355822369484, "learning_rate": 6.3172340519722e-06, "loss": 0.7524, "step": 14270 }, { "epoch": 0.06317676745318518, "grad_norm": 3.0444680427696493, "learning_rate": 6.317676745318519e-06, "loss": 1.095, "step": 14271 }, { "epoch": 0.06318119438664836, "grad_norm": 2.344765974129069, "learning_rate": 6.318119438664837e-06, "loss": 0.7338, "step": 14272 }, { "epoch": 0.06318562132011156, "grad_norm": 2.4370967078111727, "learning_rate": 6.3185621320111565e-06, "loss": 0.6702, "step": 14273 }, { "epoch": 0.06319004825357474, "grad_norm": 2.3709534223522404, "learning_rate": 6.319004825357475e-06, "loss": 0.6902, "step": 14274 }, { "epoch": 0.06319447518703794, "grad_norm": 3.0529817940476005, "learning_rate": 6.319447518703794e-06, "loss": 0.9029, "step": 14275 }, { "epoch": 0.06319890212050112, "grad_norm": 3.6582007468963322, "learning_rate": 6.319890212050114e-06, "loss": 1.2271, "step": 14276 }, { "epoch": 0.06320332905396432, "grad_norm": 2.280909506941327, "learning_rate": 6.320332905396432e-06, "loss": 0.5924, "step": 14277 }, { "epoch": 0.0632077559874275, "grad_norm": 2.9498486383872975, "learning_rate": 6.320775598742751e-06, "loss": 0.7583, "step": 14278 }, { "epoch": 0.0632121829208907, "grad_norm": 2.823368426187938, "learning_rate": 6.321218292089071e-06, "loss": 0.9242, "step": 14279 }, { "epoch": 0.06321660985435389, "grad_norm": 2.8063775917928733, "learning_rate": 6.3216609854353895e-06, "loss": 0.4489, "step": 14280 }, { "epoch": 0.06322103678781708, "grad_norm": 2.769196161392748, "learning_rate": 6.322103678781708e-06, "loss": 0.5882, "step": 14281 }, { "epoch": 0.06322546372128027, "grad_norm": 3.4941896959721688, "learning_rate": 6.3225463721280274e-06, "loss": 0.8536, "step": 14282 }, { "epoch": 0.06322989065474346, "grad_norm": 2.4327572658289562, "learning_rate": 6.322989065474347e-06, "loss": 0.7119, "step": 14283 }, { "epoch": 0.06323431758820665, "grad_norm": 2.927571851279476, "learning_rate": 6.323431758820665e-06, "loss": 0.796, "step": 14284 }, { "epoch": 0.06323874452166985, "grad_norm": 2.50829933264122, "learning_rate": 6.323874452166985e-06, "loss": 0.569, "step": 14285 }, { "epoch": 0.06324317145513303, "grad_norm": 3.255985457498624, "learning_rate": 6.324317145513303e-06, "loss": 0.8098, "step": 14286 }, { "epoch": 0.06324759838859621, "grad_norm": 3.1440986986243122, "learning_rate": 6.324759838859622e-06, "loss": 0.8203, "step": 14287 }, { "epoch": 0.06325202532205941, "grad_norm": 3.730528674103762, "learning_rate": 6.325202532205942e-06, "loss": 1.1587, "step": 14288 }, { "epoch": 0.0632564522555226, "grad_norm": 2.82185488563886, "learning_rate": 6.3256452255522605e-06, "loss": 0.7665, "step": 14289 }, { "epoch": 0.06326087918898579, "grad_norm": 2.396014985460479, "learning_rate": 6.326087918898579e-06, "loss": 0.5162, "step": 14290 }, { "epoch": 0.06326530612244897, "grad_norm": 2.2830691152087677, "learning_rate": 6.326530612244899e-06, "loss": 0.6761, "step": 14291 }, { "epoch": 0.06326973305591217, "grad_norm": 3.697414578916813, "learning_rate": 6.326973305591218e-06, "loss": 0.98, "step": 14292 }, { "epoch": 0.06327415998937536, "grad_norm": 3.3465251828602263, "learning_rate": 6.327415998937536e-06, "loss": 0.8329, "step": 14293 }, { "epoch": 0.06327858692283855, "grad_norm": 2.7528476579320116, "learning_rate": 6.327858692283856e-06, "loss": 0.7594, "step": 14294 }, { "epoch": 0.06328301385630174, "grad_norm": 2.642472726799759, "learning_rate": 6.328301385630174e-06, "loss": 1.0244, "step": 14295 }, { "epoch": 0.06328744078976493, "grad_norm": 2.9596560312291547, "learning_rate": 6.328744078976493e-06, "loss": 1.0355, "step": 14296 }, { "epoch": 0.06329186772322812, "grad_norm": 3.2908302483097343, "learning_rate": 6.329186772322813e-06, "loss": 1.196, "step": 14297 }, { "epoch": 0.06329629465669132, "grad_norm": 2.855011660670881, "learning_rate": 6.3296294656691314e-06, "loss": 0.6241, "step": 14298 }, { "epoch": 0.0633007215901545, "grad_norm": 2.8436798232353517, "learning_rate": 6.33007215901545e-06, "loss": 0.8455, "step": 14299 }, { "epoch": 0.0633051485236177, "grad_norm": 2.5185774543663313, "learning_rate": 6.33051485236177e-06, "loss": 0.5453, "step": 14300 }, { "epoch": 0.06330957545708088, "grad_norm": 2.12978861187604, "learning_rate": 6.330957545708089e-06, "loss": 0.5785, "step": 14301 }, { "epoch": 0.06331400239054406, "grad_norm": 3.2315196499376353, "learning_rate": 6.331400239054407e-06, "loss": 0.7837, "step": 14302 }, { "epoch": 0.06331842932400726, "grad_norm": 2.7174521636415245, "learning_rate": 6.331842932400727e-06, "loss": 0.8728, "step": 14303 }, { "epoch": 0.06332285625747044, "grad_norm": 2.6053902035864325, "learning_rate": 6.332285625747045e-06, "loss": 0.6022, "step": 14304 }, { "epoch": 0.06332728319093364, "grad_norm": 2.382282909686426, "learning_rate": 6.3327283190933645e-06, "loss": 0.5183, "step": 14305 }, { "epoch": 0.06333171012439683, "grad_norm": 2.563055930785952, "learning_rate": 6.333171012439684e-06, "loss": 0.6182, "step": 14306 }, { "epoch": 0.06333613705786002, "grad_norm": 2.7909406553592144, "learning_rate": 6.333613705786002e-06, "loss": 0.7399, "step": 14307 }, { "epoch": 0.0633405639913232, "grad_norm": 3.4637483520048717, "learning_rate": 6.334056399132321e-06, "loss": 1.2224, "step": 14308 }, { "epoch": 0.0633449909247864, "grad_norm": 2.635180784996869, "learning_rate": 6.334499092478641e-06, "loss": 0.6117, "step": 14309 }, { "epoch": 0.06334941785824959, "grad_norm": 2.5257643705191897, "learning_rate": 6.33494178582496e-06, "loss": 0.6868, "step": 14310 }, { "epoch": 0.06335384479171279, "grad_norm": 3.220528067508056, "learning_rate": 6.335384479171278e-06, "loss": 0.8827, "step": 14311 }, { "epoch": 0.06335827172517597, "grad_norm": 2.4749392272648594, "learning_rate": 6.3358271725175975e-06, "loss": 0.5884, "step": 14312 }, { "epoch": 0.06336269865863917, "grad_norm": 3.1692508014963603, "learning_rate": 6.336269865863917e-06, "loss": 0.8741, "step": 14313 }, { "epoch": 0.06336712559210235, "grad_norm": 3.0005554732361435, "learning_rate": 6.3367125592102354e-06, "loss": 0.5185, "step": 14314 }, { "epoch": 0.06337155252556555, "grad_norm": 3.079988894492759, "learning_rate": 6.337155252556555e-06, "loss": 0.6428, "step": 14315 }, { "epoch": 0.06337597945902873, "grad_norm": 3.42834718814836, "learning_rate": 6.337597945902873e-06, "loss": 0.8435, "step": 14316 }, { "epoch": 0.06338040639249191, "grad_norm": 3.465772332041169, "learning_rate": 6.338040639249192e-06, "loss": 1.0219, "step": 14317 }, { "epoch": 0.06338483332595511, "grad_norm": 2.300774951246812, "learning_rate": 6.338483332595512e-06, "loss": 0.6113, "step": 14318 }, { "epoch": 0.0633892602594183, "grad_norm": 4.624390571632593, "learning_rate": 6.338926025941831e-06, "loss": 0.5878, "step": 14319 }, { "epoch": 0.06339368719288149, "grad_norm": 3.1899218506349185, "learning_rate": 6.339368719288149e-06, "loss": 0.7106, "step": 14320 }, { "epoch": 0.06339811412634468, "grad_norm": 2.6302969042729587, "learning_rate": 6.339811412634469e-06, "loss": 0.5037, "step": 14321 }, { "epoch": 0.06340254105980787, "grad_norm": 2.812490490447087, "learning_rate": 6.340254105980788e-06, "loss": 0.783, "step": 14322 }, { "epoch": 0.06340696799327106, "grad_norm": 2.155495635379702, "learning_rate": 6.340696799327106e-06, "loss": 0.496, "step": 14323 }, { "epoch": 0.06341139492673425, "grad_norm": 2.559759797868202, "learning_rate": 6.341139492673426e-06, "loss": 0.7265, "step": 14324 }, { "epoch": 0.06341582186019744, "grad_norm": 3.017511684397272, "learning_rate": 6.341582186019744e-06, "loss": 0.9258, "step": 14325 }, { "epoch": 0.06342024879366064, "grad_norm": 2.8184465775956773, "learning_rate": 6.342024879366064e-06, "loss": 0.7209, "step": 14326 }, { "epoch": 0.06342467572712382, "grad_norm": 2.5147599348844984, "learning_rate": 6.342467572712383e-06, "loss": 0.6501, "step": 14327 }, { "epoch": 0.06342910266058702, "grad_norm": 2.582745066300656, "learning_rate": 6.3429102660587016e-06, "loss": 0.6557, "step": 14328 }, { "epoch": 0.0634335295940502, "grad_norm": 2.2757365269539074, "learning_rate": 6.34335295940502e-06, "loss": 0.6753, "step": 14329 }, { "epoch": 0.0634379565275134, "grad_norm": 2.6418877672337446, "learning_rate": 6.34379565275134e-06, "loss": 0.6248, "step": 14330 }, { "epoch": 0.06344238346097658, "grad_norm": 2.900622935420145, "learning_rate": 6.344238346097659e-06, "loss": 0.8817, "step": 14331 }, { "epoch": 0.06344681039443976, "grad_norm": 2.8660518825961754, "learning_rate": 6.344681039443977e-06, "loss": 0.8016, "step": 14332 }, { "epoch": 0.06345123732790296, "grad_norm": 2.824960708462423, "learning_rate": 6.345123732790297e-06, "loss": 0.6824, "step": 14333 }, { "epoch": 0.06345566426136615, "grad_norm": 2.7310626287958404, "learning_rate": 6.345566426136616e-06, "loss": 0.7842, "step": 14334 }, { "epoch": 0.06346009119482934, "grad_norm": 3.323878091433103, "learning_rate": 6.346009119482935e-06, "loss": 1.0724, "step": 14335 }, { "epoch": 0.06346451812829253, "grad_norm": 2.2786009263712144, "learning_rate": 6.346451812829254e-06, "loss": 0.6558, "step": 14336 }, { "epoch": 0.06346894506175572, "grad_norm": 2.6123083437371357, "learning_rate": 6.3468945061755725e-06, "loss": 0.7614, "step": 14337 }, { "epoch": 0.06347337199521891, "grad_norm": 2.810525105394834, "learning_rate": 6.347337199521891e-06, "loss": 0.7381, "step": 14338 }, { "epoch": 0.0634777989286821, "grad_norm": 2.7301736260406226, "learning_rate": 6.347779892868211e-06, "loss": 0.879, "step": 14339 }, { "epoch": 0.06348222586214529, "grad_norm": 3.8060352868886724, "learning_rate": 6.34822258621453e-06, "loss": 0.9506, "step": 14340 }, { "epoch": 0.06348665279560849, "grad_norm": 2.8922531997231222, "learning_rate": 6.348665279560848e-06, "loss": 0.7735, "step": 14341 }, { "epoch": 0.06349107972907167, "grad_norm": 2.5683677146596198, "learning_rate": 6.349107972907168e-06, "loss": 0.8985, "step": 14342 }, { "epoch": 0.06349550666253487, "grad_norm": 2.6908192856750133, "learning_rate": 6.349550666253487e-06, "loss": 0.6688, "step": 14343 }, { "epoch": 0.06349993359599805, "grad_norm": 2.4635731092781907, "learning_rate": 6.3499933595998056e-06, "loss": 0.7998, "step": 14344 }, { "epoch": 0.06350436052946125, "grad_norm": 2.3954064435686657, "learning_rate": 6.350436052946125e-06, "loss": 0.7583, "step": 14345 }, { "epoch": 0.06350878746292443, "grad_norm": 2.390658811449625, "learning_rate": 6.3508787462924434e-06, "loss": 0.5065, "step": 14346 }, { "epoch": 0.06351321439638762, "grad_norm": 2.690858042683299, "learning_rate": 6.351321439638762e-06, "loss": 0.7339, "step": 14347 }, { "epoch": 0.06351764132985081, "grad_norm": 3.1252320312986095, "learning_rate": 6.351764132985082e-06, "loss": 0.8774, "step": 14348 }, { "epoch": 0.063522068263314, "grad_norm": 2.6840000990214334, "learning_rate": 6.352206826331401e-06, "loss": 0.5597, "step": 14349 }, { "epoch": 0.0635264951967772, "grad_norm": 2.828131123928688, "learning_rate": 6.352649519677719e-06, "loss": 0.7159, "step": 14350 }, { "epoch": 0.06353092213024038, "grad_norm": 2.9934086627922185, "learning_rate": 6.3530922130240395e-06, "loss": 0.9827, "step": 14351 }, { "epoch": 0.06353534906370358, "grad_norm": 3.8771598858716576, "learning_rate": 6.353534906370358e-06, "loss": 0.9674, "step": 14352 }, { "epoch": 0.06353977599716676, "grad_norm": 2.4353663937992462, "learning_rate": 6.3539775997166765e-06, "loss": 0.5563, "step": 14353 }, { "epoch": 0.06354420293062996, "grad_norm": 3.389056532665284, "learning_rate": 6.354420293062996e-06, "loss": 0.9017, "step": 14354 }, { "epoch": 0.06354862986409314, "grad_norm": 2.8352283376524694, "learning_rate": 6.354862986409314e-06, "loss": 0.7087, "step": 14355 }, { "epoch": 0.06355305679755634, "grad_norm": 2.886727125840435, "learning_rate": 6.355305679755634e-06, "loss": 0.7171, "step": 14356 }, { "epoch": 0.06355748373101952, "grad_norm": 2.6275419699365052, "learning_rate": 6.355748373101953e-06, "loss": 0.6733, "step": 14357 }, { "epoch": 0.06356191066448272, "grad_norm": 3.3889367942118533, "learning_rate": 6.356191066448272e-06, "loss": 1.1086, "step": 14358 }, { "epoch": 0.0635663375979459, "grad_norm": 2.6375707583472514, "learning_rate": 6.35663375979459e-06, "loss": 0.8486, "step": 14359 }, { "epoch": 0.0635707645314091, "grad_norm": 3.5718450582107506, "learning_rate": 6.35707645314091e-06, "loss": 0.9124, "step": 14360 }, { "epoch": 0.06357519146487228, "grad_norm": 2.828675621920169, "learning_rate": 6.357519146487229e-06, "loss": 0.8048, "step": 14361 }, { "epoch": 0.06357961839833547, "grad_norm": 2.4331153901458373, "learning_rate": 6.3579618398335474e-06, "loss": 0.4689, "step": 14362 }, { "epoch": 0.06358404533179866, "grad_norm": 2.8327180346454806, "learning_rate": 6.358404533179867e-06, "loss": 0.6701, "step": 14363 }, { "epoch": 0.06358847226526185, "grad_norm": 1.9898010435378213, "learning_rate": 6.358847226526186e-06, "loss": 0.4322, "step": 14364 }, { "epoch": 0.06359289919872504, "grad_norm": 3.145808610267019, "learning_rate": 6.359289919872505e-06, "loss": 0.9043, "step": 14365 }, { "epoch": 0.06359732613218823, "grad_norm": 2.6340299799550904, "learning_rate": 6.359732613218824e-06, "loss": 0.7134, "step": 14366 }, { "epoch": 0.06360175306565143, "grad_norm": 5.113677899169874, "learning_rate": 6.360175306565143e-06, "loss": 0.836, "step": 14367 }, { "epoch": 0.06360617999911461, "grad_norm": 2.603768206209338, "learning_rate": 6.360617999911461e-06, "loss": 0.6121, "step": 14368 }, { "epoch": 0.0636106069325778, "grad_norm": 2.410606421959884, "learning_rate": 6.361060693257781e-06, "loss": 0.5543, "step": 14369 }, { "epoch": 0.06361503386604099, "grad_norm": 3.443592012693391, "learning_rate": 6.3615033866041e-06, "loss": 0.4821, "step": 14370 }, { "epoch": 0.06361946079950419, "grad_norm": 2.6627989163420613, "learning_rate": 6.361946079950418e-06, "loss": 0.8434, "step": 14371 }, { "epoch": 0.06362388773296737, "grad_norm": 2.546056563122282, "learning_rate": 6.362388773296739e-06, "loss": 0.7956, "step": 14372 }, { "epoch": 0.06362831466643057, "grad_norm": 2.651443006921501, "learning_rate": 6.362831466643057e-06, "loss": 0.7767, "step": 14373 }, { "epoch": 0.06363274159989375, "grad_norm": 2.5557697770894023, "learning_rate": 6.363274159989376e-06, "loss": 0.8713, "step": 14374 }, { "epoch": 0.06363716853335695, "grad_norm": 3.2159907737806925, "learning_rate": 6.363716853335695e-06, "loss": 1.0705, "step": 14375 }, { "epoch": 0.06364159546682013, "grad_norm": 3.53396381458421, "learning_rate": 6.3641595466820136e-06, "loss": 1.317, "step": 14376 }, { "epoch": 0.06364602240028332, "grad_norm": 2.772673267339218, "learning_rate": 6.364602240028332e-06, "loss": 0.6976, "step": 14377 }, { "epoch": 0.06365044933374651, "grad_norm": 2.473234046843878, "learning_rate": 6.365044933374652e-06, "loss": 0.5319, "step": 14378 }, { "epoch": 0.0636548762672097, "grad_norm": 2.6439267374566215, "learning_rate": 6.365487626720971e-06, "loss": 0.7604, "step": 14379 }, { "epoch": 0.0636593032006729, "grad_norm": 2.30454710542081, "learning_rate": 6.365930320067289e-06, "loss": 0.4949, "step": 14380 }, { "epoch": 0.06366373013413608, "grad_norm": 2.9091259316110887, "learning_rate": 6.3663730134136096e-06, "loss": 0.8333, "step": 14381 }, { "epoch": 0.06366815706759928, "grad_norm": 3.8295474636237534, "learning_rate": 6.366815706759928e-06, "loss": 1.0916, "step": 14382 }, { "epoch": 0.06367258400106246, "grad_norm": 2.817557596332791, "learning_rate": 6.367258400106247e-06, "loss": 0.7213, "step": 14383 }, { "epoch": 0.06367701093452566, "grad_norm": 2.788994371290638, "learning_rate": 6.367701093452566e-06, "loss": 0.5262, "step": 14384 }, { "epoch": 0.06368143786798884, "grad_norm": 2.699020317439447, "learning_rate": 6.3681437867988845e-06, "loss": 0.7706, "step": 14385 }, { "epoch": 0.06368586480145204, "grad_norm": 2.2946779096336885, "learning_rate": 6.368586480145204e-06, "loss": 0.6253, "step": 14386 }, { "epoch": 0.06369029173491522, "grad_norm": 2.4116621611500593, "learning_rate": 6.369029173491523e-06, "loss": 0.7342, "step": 14387 }, { "epoch": 0.06369471866837842, "grad_norm": 2.1600114270099415, "learning_rate": 6.369471866837842e-06, "loss": 0.4967, "step": 14388 }, { "epoch": 0.0636991456018416, "grad_norm": 2.5798492230563217, "learning_rate": 6.36991456018416e-06, "loss": 0.6804, "step": 14389 }, { "epoch": 0.0637035725353048, "grad_norm": 2.5919768674676353, "learning_rate": 6.3703572535304805e-06, "loss": 0.5934, "step": 14390 }, { "epoch": 0.06370799946876798, "grad_norm": 3.0581610430673005, "learning_rate": 6.370799946876799e-06, "loss": 1.2333, "step": 14391 }, { "epoch": 0.06371242640223117, "grad_norm": 2.924212743330219, "learning_rate": 6.3712426402231176e-06, "loss": 0.7395, "step": 14392 }, { "epoch": 0.06371685333569437, "grad_norm": 3.2051326583598283, "learning_rate": 6.371685333569437e-06, "loss": 0.8529, "step": 14393 }, { "epoch": 0.06372128026915755, "grad_norm": 2.6117975084648424, "learning_rate": 6.372128026915756e-06, "loss": 0.6869, "step": 14394 }, { "epoch": 0.06372570720262075, "grad_norm": 2.666783584975028, "learning_rate": 6.372570720262075e-06, "loss": 0.9665, "step": 14395 }, { "epoch": 0.06373013413608393, "grad_norm": 2.204523935052087, "learning_rate": 6.373013413608394e-06, "loss": 0.4938, "step": 14396 }, { "epoch": 0.06373456106954713, "grad_norm": 2.8247576118358513, "learning_rate": 6.373456106954713e-06, "loss": 0.6328, "step": 14397 }, { "epoch": 0.06373898800301031, "grad_norm": 3.619278254440817, "learning_rate": 6.373898800301031e-06, "loss": 0.8697, "step": 14398 }, { "epoch": 0.06374341493647351, "grad_norm": 2.4604671828269686, "learning_rate": 6.3743414936473515e-06, "loss": 0.7045, "step": 14399 }, { "epoch": 0.06374784186993669, "grad_norm": 3.356461156668159, "learning_rate": 6.37478418699367e-06, "loss": 1.0613, "step": 14400 }, { "epoch": 0.06375226880339989, "grad_norm": 2.459189170518281, "learning_rate": 6.3752268803399885e-06, "loss": 0.7047, "step": 14401 }, { "epoch": 0.06375669573686307, "grad_norm": 2.6631090958862482, "learning_rate": 6.375669573686309e-06, "loss": 0.7963, "step": 14402 }, { "epoch": 0.06376112267032627, "grad_norm": 2.9570920206871207, "learning_rate": 6.376112267032627e-06, "loss": 0.757, "step": 14403 }, { "epoch": 0.06376554960378945, "grad_norm": 2.440111691068868, "learning_rate": 6.376554960378946e-06, "loss": 0.6298, "step": 14404 }, { "epoch": 0.06376997653725265, "grad_norm": 3.4313340545523814, "learning_rate": 6.376997653725265e-06, "loss": 1.1542, "step": 14405 }, { "epoch": 0.06377440347071583, "grad_norm": 2.3813671456995533, "learning_rate": 6.377440347071584e-06, "loss": 0.6917, "step": 14406 }, { "epoch": 0.06377883040417902, "grad_norm": 3.1563422719094563, "learning_rate": 6.377883040417903e-06, "loss": 0.6053, "step": 14407 }, { "epoch": 0.06378325733764222, "grad_norm": 2.5405851523897365, "learning_rate": 6.378325733764222e-06, "loss": 0.7552, "step": 14408 }, { "epoch": 0.0637876842711054, "grad_norm": 2.0989563291175446, "learning_rate": 6.378768427110541e-06, "loss": 0.5071, "step": 14409 }, { "epoch": 0.0637921112045686, "grad_norm": 2.326878808938386, "learning_rate": 6.3792111204568594e-06, "loss": 0.6858, "step": 14410 }, { "epoch": 0.06379653813803178, "grad_norm": 3.5505807465699606, "learning_rate": 6.37965381380318e-06, "loss": 1.1791, "step": 14411 }, { "epoch": 0.06380096507149498, "grad_norm": 2.631057031232008, "learning_rate": 6.380096507149498e-06, "loss": 0.6552, "step": 14412 }, { "epoch": 0.06380539200495816, "grad_norm": 2.4847097381972887, "learning_rate": 6.380539200495817e-06, "loss": 0.6872, "step": 14413 }, { "epoch": 0.06380981893842136, "grad_norm": 2.6479235136223784, "learning_rate": 6.380981893842136e-06, "loss": 0.836, "step": 14414 }, { "epoch": 0.06381424587188454, "grad_norm": 3.4064909314050023, "learning_rate": 6.3814245871884555e-06, "loss": 0.97, "step": 14415 }, { "epoch": 0.06381867280534774, "grad_norm": 3.004902329379489, "learning_rate": 6.381867280534774e-06, "loss": 0.7873, "step": 14416 }, { "epoch": 0.06382309973881092, "grad_norm": 2.801270650910737, "learning_rate": 6.382309973881093e-06, "loss": 0.98, "step": 14417 }, { "epoch": 0.06382752667227412, "grad_norm": 3.2570917852255485, "learning_rate": 6.382752667227412e-06, "loss": 1.0255, "step": 14418 }, { "epoch": 0.0638319536057373, "grad_norm": 2.7921474120422594, "learning_rate": 6.38319536057373e-06, "loss": 0.876, "step": 14419 }, { "epoch": 0.0638363805392005, "grad_norm": 2.2946957370594303, "learning_rate": 6.383638053920051e-06, "loss": 0.7298, "step": 14420 }, { "epoch": 0.06384080747266369, "grad_norm": 2.761252845933565, "learning_rate": 6.384080747266369e-06, "loss": 0.6042, "step": 14421 }, { "epoch": 0.06384523440612688, "grad_norm": 2.9367807618286856, "learning_rate": 6.384523440612688e-06, "loss": 0.6592, "step": 14422 }, { "epoch": 0.06384966133959007, "grad_norm": 2.703340667567556, "learning_rate": 6.384966133959007e-06, "loss": 0.6324, "step": 14423 }, { "epoch": 0.06385408827305325, "grad_norm": 2.750416624800257, "learning_rate": 6.385408827305326e-06, "loss": 0.7061, "step": 14424 }, { "epoch": 0.06385851520651645, "grad_norm": 2.859130608494039, "learning_rate": 6.385851520651645e-06, "loss": 1.0111, "step": 14425 }, { "epoch": 0.06386294213997963, "grad_norm": 3.8142396415425512, "learning_rate": 6.386294213997964e-06, "loss": 1.2374, "step": 14426 }, { "epoch": 0.06386736907344283, "grad_norm": 2.3316171761270468, "learning_rate": 6.386736907344283e-06, "loss": 0.4995, "step": 14427 }, { "epoch": 0.06387179600690601, "grad_norm": 3.128167291293623, "learning_rate": 6.387179600690601e-06, "loss": 1.0497, "step": 14428 }, { "epoch": 0.06387622294036921, "grad_norm": 2.4380536779446147, "learning_rate": 6.3876222940369216e-06, "loss": 0.6609, "step": 14429 }, { "epoch": 0.06388064987383239, "grad_norm": 2.3895016490009144, "learning_rate": 6.38806498738324e-06, "loss": 0.5135, "step": 14430 }, { "epoch": 0.06388507680729559, "grad_norm": 2.346099749921376, "learning_rate": 6.388507680729559e-06, "loss": 0.6478, "step": 14431 }, { "epoch": 0.06388950374075877, "grad_norm": 2.9282445383399445, "learning_rate": 6.388950374075879e-06, "loss": 0.6683, "step": 14432 }, { "epoch": 0.06389393067422197, "grad_norm": 2.2257177917161703, "learning_rate": 6.389393067422197e-06, "loss": 0.6428, "step": 14433 }, { "epoch": 0.06389835760768516, "grad_norm": 3.0305565940716197, "learning_rate": 6.389835760768516e-06, "loss": 0.8323, "step": 14434 }, { "epoch": 0.06390278454114835, "grad_norm": 3.523688965899384, "learning_rate": 6.390278454114835e-06, "loss": 0.769, "step": 14435 }, { "epoch": 0.06390721147461154, "grad_norm": 3.496900397869517, "learning_rate": 6.390721147461154e-06, "loss": 1.2652, "step": 14436 }, { "epoch": 0.06391163840807473, "grad_norm": 3.128263514752505, "learning_rate": 6.391163840807473e-06, "loss": 0.7906, "step": 14437 }, { "epoch": 0.06391606534153792, "grad_norm": 2.6594262971406075, "learning_rate": 6.3916065341537925e-06, "loss": 0.9143, "step": 14438 }, { "epoch": 0.0639204922750011, "grad_norm": 2.5386178017773298, "learning_rate": 6.392049227500111e-06, "loss": 1.0047, "step": 14439 }, { "epoch": 0.0639249192084643, "grad_norm": 2.475992664693031, "learning_rate": 6.3924919208464296e-06, "loss": 0.5862, "step": 14440 }, { "epoch": 0.06392934614192748, "grad_norm": 2.4737972556281638, "learning_rate": 6.39293461419275e-06, "loss": 0.6087, "step": 14441 }, { "epoch": 0.06393377307539068, "grad_norm": 2.525027195065243, "learning_rate": 6.393377307539068e-06, "loss": 0.8886, "step": 14442 }, { "epoch": 0.06393820000885386, "grad_norm": 2.3285124444722354, "learning_rate": 6.393820000885387e-06, "loss": 0.4297, "step": 14443 }, { "epoch": 0.06394262694231706, "grad_norm": 2.890070443224238, "learning_rate": 6.394262694231706e-06, "loss": 1.0166, "step": 14444 }, { "epoch": 0.06394705387578024, "grad_norm": 2.7394686276419247, "learning_rate": 6.3947053875780256e-06, "loss": 0.894, "step": 14445 }, { "epoch": 0.06395148080924344, "grad_norm": 2.7845774881286722, "learning_rate": 6.395148080924344e-06, "loss": 0.7464, "step": 14446 }, { "epoch": 0.06395590774270662, "grad_norm": 2.4388379375833793, "learning_rate": 6.3955907742706635e-06, "loss": 0.607, "step": 14447 }, { "epoch": 0.06396033467616982, "grad_norm": 2.3934640084570873, "learning_rate": 6.396033467616982e-06, "loss": 0.5581, "step": 14448 }, { "epoch": 0.063964761609633, "grad_norm": 2.8274960029012477, "learning_rate": 6.3964761609633005e-06, "loss": 0.8738, "step": 14449 }, { "epoch": 0.0639691885430962, "grad_norm": 2.5918170042235964, "learning_rate": 6.396918854309621e-06, "loss": 0.7246, "step": 14450 }, { "epoch": 0.06397361547655939, "grad_norm": 2.925133637883783, "learning_rate": 6.397361547655939e-06, "loss": 0.768, "step": 14451 }, { "epoch": 0.06397804241002258, "grad_norm": 2.607640891091191, "learning_rate": 6.397804241002258e-06, "loss": 0.5905, "step": 14452 }, { "epoch": 0.06398246934348577, "grad_norm": 2.2888815266722022, "learning_rate": 6.398246934348578e-06, "loss": 0.5465, "step": 14453 }, { "epoch": 0.06398689627694895, "grad_norm": 2.6256708032607383, "learning_rate": 6.3986896276948965e-06, "loss": 0.7952, "step": 14454 }, { "epoch": 0.06399132321041215, "grad_norm": 2.908507001803066, "learning_rate": 6.399132321041215e-06, "loss": 0.6614, "step": 14455 }, { "epoch": 0.06399575014387533, "grad_norm": 2.6602889133332446, "learning_rate": 6.399575014387534e-06, "loss": 0.7049, "step": 14456 }, { "epoch": 0.06400017707733853, "grad_norm": 2.0832024708997254, "learning_rate": 6.400017707733853e-06, "loss": 0.5671, "step": 14457 }, { "epoch": 0.06400460401080171, "grad_norm": 2.6680799839515905, "learning_rate": 6.4004604010801714e-06, "loss": 0.5868, "step": 14458 }, { "epoch": 0.06400903094426491, "grad_norm": 2.8433713725449006, "learning_rate": 6.400903094426492e-06, "loss": 0.5644, "step": 14459 }, { "epoch": 0.0640134578777281, "grad_norm": 2.2689178074169996, "learning_rate": 6.40134578777281e-06, "loss": 0.5225, "step": 14460 }, { "epoch": 0.06401788481119129, "grad_norm": 3.0186265741784633, "learning_rate": 6.401788481119129e-06, "loss": 1.1148, "step": 14461 }, { "epoch": 0.06402231174465448, "grad_norm": 3.259351941248395, "learning_rate": 6.402231174465449e-06, "loss": 0.7276, "step": 14462 }, { "epoch": 0.06402673867811767, "grad_norm": 2.817154858926085, "learning_rate": 6.4026738678117675e-06, "loss": 0.8612, "step": 14463 }, { "epoch": 0.06403116561158086, "grad_norm": 2.294439296246693, "learning_rate": 6.403116561158086e-06, "loss": 0.5111, "step": 14464 }, { "epoch": 0.06403559254504405, "grad_norm": 2.611354106177851, "learning_rate": 6.403559254504405e-06, "loss": 0.822, "step": 14465 }, { "epoch": 0.06404001947850724, "grad_norm": 2.6335578005317086, "learning_rate": 6.404001947850724e-06, "loss": 0.3996, "step": 14466 }, { "epoch": 0.06404444641197043, "grad_norm": 3.03039571842958, "learning_rate": 6.404444641197043e-06, "loss": 0.6897, "step": 14467 }, { "epoch": 0.06404887334543362, "grad_norm": 2.871983672428243, "learning_rate": 6.404887334543363e-06, "loss": 0.9147, "step": 14468 }, { "epoch": 0.0640533002788968, "grad_norm": 2.674639418276132, "learning_rate": 6.405330027889681e-06, "loss": 0.8631, "step": 14469 }, { "epoch": 0.06405772721236, "grad_norm": 3.859089596208615, "learning_rate": 6.405772721236e-06, "loss": 0.7647, "step": 14470 }, { "epoch": 0.06406215414582318, "grad_norm": 2.5522010603845477, "learning_rate": 6.40621541458232e-06, "loss": 0.6258, "step": 14471 }, { "epoch": 0.06406658107928638, "grad_norm": 2.5165870838050126, "learning_rate": 6.406658107928638e-06, "loss": 0.6091, "step": 14472 }, { "epoch": 0.06407100801274956, "grad_norm": 2.672209667467581, "learning_rate": 6.407100801274957e-06, "loss": 0.807, "step": 14473 }, { "epoch": 0.06407543494621276, "grad_norm": 2.798001656872276, "learning_rate": 6.407543494621276e-06, "loss": 0.6945, "step": 14474 }, { "epoch": 0.06407986187967595, "grad_norm": 2.2629951681739486, "learning_rate": 6.407986187967596e-06, "loss": 0.6894, "step": 14475 }, { "epoch": 0.06408428881313914, "grad_norm": 3.1694980587628887, "learning_rate": 6.408428881313914e-06, "loss": 0.8238, "step": 14476 }, { "epoch": 0.06408871574660233, "grad_norm": 2.3843584692404756, "learning_rate": 6.4088715746602336e-06, "loss": 0.3649, "step": 14477 }, { "epoch": 0.06409314268006552, "grad_norm": 2.767996955953378, "learning_rate": 6.409314268006552e-06, "loss": 0.5735, "step": 14478 }, { "epoch": 0.06409756961352871, "grad_norm": 2.496629268172423, "learning_rate": 6.409756961352871e-06, "loss": 0.9578, "step": 14479 }, { "epoch": 0.0641019965469919, "grad_norm": 3.0209099586641948, "learning_rate": 6.410199654699191e-06, "loss": 0.8457, "step": 14480 }, { "epoch": 0.06410642348045509, "grad_norm": 2.634308108326872, "learning_rate": 6.410642348045509e-06, "loss": 0.4281, "step": 14481 }, { "epoch": 0.06411085041391829, "grad_norm": 3.309192371858698, "learning_rate": 6.411085041391828e-06, "loss": 1.0616, "step": 14482 }, { "epoch": 0.06411527734738147, "grad_norm": 2.3531691647063964, "learning_rate": 6.411527734738148e-06, "loss": 0.6792, "step": 14483 }, { "epoch": 0.06411970428084465, "grad_norm": 2.3278641520884134, "learning_rate": 6.411970428084467e-06, "loss": 0.7361, "step": 14484 }, { "epoch": 0.06412413121430785, "grad_norm": 4.27613197502404, "learning_rate": 6.412413121430785e-06, "loss": 1.1066, "step": 14485 }, { "epoch": 0.06412855814777103, "grad_norm": 2.7809527918501526, "learning_rate": 6.4128558147771045e-06, "loss": 0.8572, "step": 14486 }, { "epoch": 0.06413298508123423, "grad_norm": 2.4179464432490154, "learning_rate": 6.413298508123423e-06, "loss": 0.5809, "step": 14487 }, { "epoch": 0.06413741201469741, "grad_norm": 3.244153646652355, "learning_rate": 6.413741201469742e-06, "loss": 1.1487, "step": 14488 }, { "epoch": 0.06414183894816061, "grad_norm": 2.1844880707203025, "learning_rate": 6.414183894816062e-06, "loss": 0.4138, "step": 14489 }, { "epoch": 0.0641462658816238, "grad_norm": 2.61362880221164, "learning_rate": 6.41462658816238e-06, "loss": 0.9229, "step": 14490 }, { "epoch": 0.064150692815087, "grad_norm": 2.2066079196826975, "learning_rate": 6.415069281508699e-06, "loss": 0.5709, "step": 14491 }, { "epoch": 0.06415511974855018, "grad_norm": 2.429248758415642, "learning_rate": 6.415511974855019e-06, "loss": 0.5465, "step": 14492 }, { "epoch": 0.06415954668201337, "grad_norm": 2.4041870569643122, "learning_rate": 6.4159546682013376e-06, "loss": 0.464, "step": 14493 }, { "epoch": 0.06416397361547656, "grad_norm": 2.6531840006409926, "learning_rate": 6.416397361547656e-06, "loss": 0.7479, "step": 14494 }, { "epoch": 0.06416840054893976, "grad_norm": 2.9081709542433023, "learning_rate": 6.4168400548939755e-06, "loss": 0.9496, "step": 14495 }, { "epoch": 0.06417282748240294, "grad_norm": 2.6611455615461534, "learning_rate": 6.417282748240294e-06, "loss": 0.7392, "step": 14496 }, { "epoch": 0.06417725441586614, "grad_norm": 2.6435353065689577, "learning_rate": 6.417725441586613e-06, "loss": 0.6649, "step": 14497 }, { "epoch": 0.06418168134932932, "grad_norm": 2.9922995849190253, "learning_rate": 6.418168134932933e-06, "loss": 0.8965, "step": 14498 }, { "epoch": 0.0641861082827925, "grad_norm": 2.7111319087472014, "learning_rate": 6.418610828279251e-06, "loss": 0.7396, "step": 14499 }, { "epoch": 0.0641905352162557, "grad_norm": 5.1220467959632705, "learning_rate": 6.41905352162557e-06, "loss": 0.7311, "step": 14500 }, { "epoch": 0.06419496214971888, "grad_norm": 2.9844569759697843, "learning_rate": 6.41949621497189e-06, "loss": 0.7484, "step": 14501 }, { "epoch": 0.06419938908318208, "grad_norm": 2.9793095091701547, "learning_rate": 6.4199389083182085e-06, "loss": 1.0533, "step": 14502 }, { "epoch": 0.06420381601664527, "grad_norm": 2.574027247801512, "learning_rate": 6.420381601664527e-06, "loss": 0.7008, "step": 14503 }, { "epoch": 0.06420824295010846, "grad_norm": 2.5330717923702495, "learning_rate": 6.420824295010846e-06, "loss": 0.6446, "step": 14504 }, { "epoch": 0.06421266988357165, "grad_norm": 2.7483134269984886, "learning_rate": 6.421266988357166e-06, "loss": 0.7145, "step": 14505 }, { "epoch": 0.06421709681703484, "grad_norm": 3.0879094985026985, "learning_rate": 6.421709681703484e-06, "loss": 0.7957, "step": 14506 }, { "epoch": 0.06422152375049803, "grad_norm": 2.820512405062153, "learning_rate": 6.422152375049804e-06, "loss": 0.7339, "step": 14507 }, { "epoch": 0.06422595068396122, "grad_norm": 3.6316098673855137, "learning_rate": 6.422595068396122e-06, "loss": 0.9453, "step": 14508 }, { "epoch": 0.06423037761742441, "grad_norm": 3.4588909060541684, "learning_rate": 6.423037761742441e-06, "loss": 1.0005, "step": 14509 }, { "epoch": 0.0642348045508876, "grad_norm": 2.435191055839009, "learning_rate": 6.423480455088761e-06, "loss": 0.5536, "step": 14510 }, { "epoch": 0.06423923148435079, "grad_norm": 2.891787886909947, "learning_rate": 6.4239231484350795e-06, "loss": 1.0623, "step": 14511 }, { "epoch": 0.06424365841781399, "grad_norm": 2.688371556646058, "learning_rate": 6.424365841781398e-06, "loss": 0.6608, "step": 14512 }, { "epoch": 0.06424808535127717, "grad_norm": 2.597297703432545, "learning_rate": 6.424808535127718e-06, "loss": 0.6097, "step": 14513 }, { "epoch": 0.06425251228474035, "grad_norm": 2.539446078032953, "learning_rate": 6.425251228474037e-06, "loss": 0.5759, "step": 14514 }, { "epoch": 0.06425693921820355, "grad_norm": 2.367706506828019, "learning_rate": 6.425693921820355e-06, "loss": 0.7457, "step": 14515 }, { "epoch": 0.06426136615166674, "grad_norm": 2.4692804793969447, "learning_rate": 6.426136615166675e-06, "loss": 0.7076, "step": 14516 }, { "epoch": 0.06426579308512993, "grad_norm": 2.7266643111224984, "learning_rate": 6.426579308512993e-06, "loss": 0.8132, "step": 14517 }, { "epoch": 0.06427022001859312, "grad_norm": 2.7227950710376243, "learning_rate": 6.4270220018593125e-06, "loss": 0.7302, "step": 14518 }, { "epoch": 0.06427464695205631, "grad_norm": 2.9755265433567737, "learning_rate": 6.427464695205632e-06, "loss": 0.9515, "step": 14519 }, { "epoch": 0.0642790738855195, "grad_norm": 2.7463817695342954, "learning_rate": 6.42790738855195e-06, "loss": 0.9621, "step": 14520 }, { "epoch": 0.0642835008189827, "grad_norm": 2.738194816329185, "learning_rate": 6.428350081898269e-06, "loss": 0.7604, "step": 14521 }, { "epoch": 0.06428792775244588, "grad_norm": 2.4470066015478866, "learning_rate": 6.428792775244589e-06, "loss": 0.4997, "step": 14522 }, { "epoch": 0.06429235468590908, "grad_norm": 2.729164710331112, "learning_rate": 6.429235468590908e-06, "loss": 0.7649, "step": 14523 }, { "epoch": 0.06429678161937226, "grad_norm": 2.342727964480316, "learning_rate": 6.429678161937226e-06, "loss": 0.6879, "step": 14524 }, { "epoch": 0.06430120855283546, "grad_norm": 2.9416380354263616, "learning_rate": 6.4301208552835456e-06, "loss": 0.9259, "step": 14525 }, { "epoch": 0.06430563548629864, "grad_norm": 3.0054184608604264, "learning_rate": 6.430563548629865e-06, "loss": 0.6685, "step": 14526 }, { "epoch": 0.06431006241976184, "grad_norm": 2.708495589421839, "learning_rate": 6.4310062419761835e-06, "loss": 0.8216, "step": 14527 }, { "epoch": 0.06431448935322502, "grad_norm": 2.865479370581019, "learning_rate": 6.431448935322503e-06, "loss": 1.0306, "step": 14528 }, { "epoch": 0.0643189162866882, "grad_norm": 3.2398737718010198, "learning_rate": 6.431891628668821e-06, "loss": 1.0485, "step": 14529 }, { "epoch": 0.0643233432201514, "grad_norm": 2.7289438392613508, "learning_rate": 6.43233432201514e-06, "loss": 0.9337, "step": 14530 }, { "epoch": 0.06432777015361459, "grad_norm": 2.509792390270454, "learning_rate": 6.43277701536146e-06, "loss": 0.8984, "step": 14531 }, { "epoch": 0.06433219708707778, "grad_norm": 4.0437370741155, "learning_rate": 6.433219708707779e-06, "loss": 0.5699, "step": 14532 }, { "epoch": 0.06433662402054097, "grad_norm": 3.6850368902041937, "learning_rate": 6.433662402054097e-06, "loss": 1.2756, "step": 14533 }, { "epoch": 0.06434105095400416, "grad_norm": 2.575296852797914, "learning_rate": 6.434105095400417e-06, "loss": 0.9055, "step": 14534 }, { "epoch": 0.06434547788746735, "grad_norm": 2.7058901967645523, "learning_rate": 6.434547788746736e-06, "loss": 0.9743, "step": 14535 }, { "epoch": 0.06434990482093055, "grad_norm": 2.9298644123621638, "learning_rate": 6.434990482093054e-06, "loss": 0.7052, "step": 14536 }, { "epoch": 0.06435433175439373, "grad_norm": 3.109197049171666, "learning_rate": 6.435433175439374e-06, "loss": 0.7517, "step": 14537 }, { "epoch": 0.06435875868785693, "grad_norm": 2.4475052881823083, "learning_rate": 6.435875868785692e-06, "loss": 0.6447, "step": 14538 }, { "epoch": 0.06436318562132011, "grad_norm": 3.2328125318845937, "learning_rate": 6.436318562132011e-06, "loss": 0.7634, "step": 14539 }, { "epoch": 0.06436761255478331, "grad_norm": 2.480278658508136, "learning_rate": 6.436761255478331e-06, "loss": 0.5655, "step": 14540 }, { "epoch": 0.06437203948824649, "grad_norm": 3.566216089184505, "learning_rate": 6.4372039488246496e-06, "loss": 1.3863, "step": 14541 }, { "epoch": 0.06437646642170969, "grad_norm": 2.4753397105732837, "learning_rate": 6.437646642170968e-06, "loss": 0.5916, "step": 14542 }, { "epoch": 0.06438089335517287, "grad_norm": 2.762667621566224, "learning_rate": 6.438089335517288e-06, "loss": 0.9759, "step": 14543 }, { "epoch": 0.06438532028863606, "grad_norm": 2.612975984474925, "learning_rate": 6.438532028863607e-06, "loss": 0.6524, "step": 14544 }, { "epoch": 0.06438974722209925, "grad_norm": 3.32679951790855, "learning_rate": 6.438974722209925e-06, "loss": 1.043, "step": 14545 }, { "epoch": 0.06439417415556244, "grad_norm": 2.618118106330513, "learning_rate": 6.439417415556245e-06, "loss": 0.5416, "step": 14546 }, { "epoch": 0.06439860108902563, "grad_norm": 3.297700422783007, "learning_rate": 6.439860108902563e-06, "loss": 0.929, "step": 14547 }, { "epoch": 0.06440302802248882, "grad_norm": 3.1040510036121725, "learning_rate": 6.440302802248883e-06, "loss": 0.9439, "step": 14548 }, { "epoch": 0.06440745495595201, "grad_norm": 2.9862524259152265, "learning_rate": 6.440745495595202e-06, "loss": 0.6826, "step": 14549 }, { "epoch": 0.0644118818894152, "grad_norm": 3.137936014702467, "learning_rate": 6.4411881889415205e-06, "loss": 0.9539, "step": 14550 }, { "epoch": 0.0644163088228784, "grad_norm": 3.247523034506593, "learning_rate": 6.441630882287839e-06, "loss": 0.9944, "step": 14551 }, { "epoch": 0.06442073575634158, "grad_norm": 3.086547385988044, "learning_rate": 6.442073575634159e-06, "loss": 0.7343, "step": 14552 }, { "epoch": 0.06442516268980478, "grad_norm": 3.3405192989598977, "learning_rate": 6.442516268980478e-06, "loss": 0.7119, "step": 14553 }, { "epoch": 0.06442958962326796, "grad_norm": 3.1762603785549586, "learning_rate": 6.442958962326796e-06, "loss": 1.1718, "step": 14554 }, { "epoch": 0.06443401655673116, "grad_norm": 3.7345923230083318, "learning_rate": 6.443401655673116e-06, "loss": 0.943, "step": 14555 }, { "epoch": 0.06443844349019434, "grad_norm": 2.6200648999177005, "learning_rate": 6.443844349019435e-06, "loss": 0.5587, "step": 14556 }, { "epoch": 0.06444287042365754, "grad_norm": 2.427585137637295, "learning_rate": 6.4442870423657536e-06, "loss": 0.7937, "step": 14557 }, { "epoch": 0.06444729735712072, "grad_norm": 2.719949977767592, "learning_rate": 6.444729735712073e-06, "loss": 0.6959, "step": 14558 }, { "epoch": 0.0644517242905839, "grad_norm": 2.259516203781797, "learning_rate": 6.4451724290583915e-06, "loss": 0.583, "step": 14559 }, { "epoch": 0.0644561512240471, "grad_norm": 2.039470577921758, "learning_rate": 6.44561512240471e-06, "loss": 0.4125, "step": 14560 }, { "epoch": 0.06446057815751029, "grad_norm": 2.547623157223425, "learning_rate": 6.44605781575103e-06, "loss": 0.7031, "step": 14561 }, { "epoch": 0.06446500509097348, "grad_norm": 2.5632344585134055, "learning_rate": 6.446500509097349e-06, "loss": 0.7806, "step": 14562 }, { "epoch": 0.06446943202443667, "grad_norm": 3.557319351519038, "learning_rate": 6.446943202443667e-06, "loss": 1.1697, "step": 14563 }, { "epoch": 0.06447385895789987, "grad_norm": 3.401316262231999, "learning_rate": 6.4473858957899875e-06, "loss": 1.0085, "step": 14564 }, { "epoch": 0.06447828589136305, "grad_norm": 4.221179680574048, "learning_rate": 6.447828589136306e-06, "loss": 0.7064, "step": 14565 }, { "epoch": 0.06448271282482625, "grad_norm": 2.747991272562325, "learning_rate": 6.4482712824826245e-06, "loss": 0.8218, "step": 14566 }, { "epoch": 0.06448713975828943, "grad_norm": 3.02007320515976, "learning_rate": 6.448713975828944e-06, "loss": 0.933, "step": 14567 }, { "epoch": 0.06449156669175263, "grad_norm": 2.406146537206913, "learning_rate": 6.449156669175262e-06, "loss": 0.5491, "step": 14568 }, { "epoch": 0.06449599362521581, "grad_norm": 3.0316257308244783, "learning_rate": 6.449599362521582e-06, "loss": 0.8092, "step": 14569 }, { "epoch": 0.06450042055867901, "grad_norm": 2.811639727097085, "learning_rate": 6.450042055867901e-06, "loss": 0.8162, "step": 14570 }, { "epoch": 0.06450484749214219, "grad_norm": 2.6383665593512977, "learning_rate": 6.45048474921422e-06, "loss": 0.7776, "step": 14571 }, { "epoch": 0.06450927442560539, "grad_norm": 2.9160497220856696, "learning_rate": 6.450927442560538e-06, "loss": 0.6936, "step": 14572 }, { "epoch": 0.06451370135906857, "grad_norm": 2.9558190876601893, "learning_rate": 6.451370135906858e-06, "loss": 0.883, "step": 14573 }, { "epoch": 0.06451812829253176, "grad_norm": 3.758402202971446, "learning_rate": 6.451812829253177e-06, "loss": 0.9757, "step": 14574 }, { "epoch": 0.06452255522599495, "grad_norm": 2.82005373470705, "learning_rate": 6.4522555225994955e-06, "loss": 0.8509, "step": 14575 }, { "epoch": 0.06452698215945814, "grad_norm": 3.248471156773966, "learning_rate": 6.452698215945815e-06, "loss": 1.0292, "step": 14576 }, { "epoch": 0.06453140909292134, "grad_norm": 2.433183133222515, "learning_rate": 6.453140909292133e-06, "loss": 0.8238, "step": 14577 }, { "epoch": 0.06453583602638452, "grad_norm": 2.480027229250171, "learning_rate": 6.453583602638453e-06, "loss": 0.6297, "step": 14578 }, { "epoch": 0.06454026295984772, "grad_norm": 3.2532947585792313, "learning_rate": 6.454026295984772e-06, "loss": 1.0346, "step": 14579 }, { "epoch": 0.0645446898933109, "grad_norm": 2.5735451534255085, "learning_rate": 6.454468989331091e-06, "loss": 0.7531, "step": 14580 }, { "epoch": 0.0645491168267741, "grad_norm": 2.5624291501169982, "learning_rate": 6.454911682677409e-06, "loss": 0.7376, "step": 14581 }, { "epoch": 0.06455354376023728, "grad_norm": 2.341888542388588, "learning_rate": 6.455354376023729e-06, "loss": 0.768, "step": 14582 }, { "epoch": 0.06455797069370048, "grad_norm": 2.7324151857257446, "learning_rate": 6.455797069370048e-06, "loss": 0.8263, "step": 14583 }, { "epoch": 0.06456239762716366, "grad_norm": 3.1397364550958766, "learning_rate": 6.456239762716366e-06, "loss": 1.0282, "step": 14584 }, { "epoch": 0.06456682456062686, "grad_norm": 2.8667932396548053, "learning_rate": 6.456682456062686e-06, "loss": 0.9764, "step": 14585 }, { "epoch": 0.06457125149409004, "grad_norm": 3.3854668417363394, "learning_rate": 6.457125149409005e-06, "loss": 0.8913, "step": 14586 }, { "epoch": 0.06457567842755324, "grad_norm": 3.631999748446494, "learning_rate": 6.457567842755324e-06, "loss": 1.0693, "step": 14587 }, { "epoch": 0.06458010536101642, "grad_norm": 2.9981930231161082, "learning_rate": 6.458010536101643e-06, "loss": 0.6891, "step": 14588 }, { "epoch": 0.06458453229447961, "grad_norm": 2.7044516135887164, "learning_rate": 6.4584532294479616e-06, "loss": 0.7176, "step": 14589 }, { "epoch": 0.0645889592279428, "grad_norm": 2.908854771533587, "learning_rate": 6.45889592279428e-06, "loss": 0.9035, "step": 14590 }, { "epoch": 0.06459338616140599, "grad_norm": 2.7459639808898566, "learning_rate": 6.4593386161406e-06, "loss": 0.6385, "step": 14591 }, { "epoch": 0.06459781309486919, "grad_norm": 2.5555128244867205, "learning_rate": 6.459781309486919e-06, "loss": 0.8349, "step": 14592 }, { "epoch": 0.06460224002833237, "grad_norm": 3.386086827099889, "learning_rate": 6.460224002833237e-06, "loss": 0.9447, "step": 14593 }, { "epoch": 0.06460666696179557, "grad_norm": 2.4996153428086347, "learning_rate": 6.4606666961795576e-06, "loss": 0.7431, "step": 14594 }, { "epoch": 0.06461109389525875, "grad_norm": 2.563510782688208, "learning_rate": 6.461109389525876e-06, "loss": 0.4777, "step": 14595 }, { "epoch": 0.06461552082872195, "grad_norm": 2.6058593621668207, "learning_rate": 6.461552082872195e-06, "loss": 0.7713, "step": 14596 }, { "epoch": 0.06461994776218513, "grad_norm": 2.762108499769094, "learning_rate": 6.461994776218514e-06, "loss": 0.9523, "step": 14597 }, { "epoch": 0.06462437469564833, "grad_norm": 2.704777448932049, "learning_rate": 6.4624374695648325e-06, "loss": 0.6091, "step": 14598 }, { "epoch": 0.06462880162911151, "grad_norm": 3.2437333970093936, "learning_rate": 6.462880162911152e-06, "loss": 0.6613, "step": 14599 }, { "epoch": 0.06463322856257471, "grad_norm": 2.5202273037692233, "learning_rate": 6.463322856257471e-06, "loss": 0.7476, "step": 14600 }, { "epoch": 0.0646376554960379, "grad_norm": 2.5449098004691235, "learning_rate": 6.46376554960379e-06, "loss": 0.8944, "step": 14601 }, { "epoch": 0.06464208242950109, "grad_norm": 2.8310969404725923, "learning_rate": 6.464208242950108e-06, "loss": 0.8265, "step": 14602 }, { "epoch": 0.06464650936296427, "grad_norm": 2.673943477208327, "learning_rate": 6.4646509362964285e-06, "loss": 0.4509, "step": 14603 }, { "epoch": 0.06465093629642746, "grad_norm": 2.5740828997107132, "learning_rate": 6.465093629642747e-06, "loss": 0.4454, "step": 14604 }, { "epoch": 0.06465536322989066, "grad_norm": 2.70531964942165, "learning_rate": 6.4655363229890656e-06, "loss": 0.6842, "step": 14605 }, { "epoch": 0.06465979016335384, "grad_norm": 2.808256654893323, "learning_rate": 6.465979016335385e-06, "loss": 0.782, "step": 14606 }, { "epoch": 0.06466421709681704, "grad_norm": 2.508398962855935, "learning_rate": 6.466421709681704e-06, "loss": 0.7095, "step": 14607 }, { "epoch": 0.06466864403028022, "grad_norm": 2.620992717251168, "learning_rate": 6.466864403028023e-06, "loss": 0.7435, "step": 14608 }, { "epoch": 0.06467307096374342, "grad_norm": 3.0542692637808715, "learning_rate": 6.467307096374342e-06, "loss": 0.8547, "step": 14609 }, { "epoch": 0.0646774978972066, "grad_norm": 3.2690319526328544, "learning_rate": 6.467749789720661e-06, "loss": 0.608, "step": 14610 }, { "epoch": 0.0646819248306698, "grad_norm": 2.756886726920303, "learning_rate": 6.468192483066979e-06, "loss": 0.7979, "step": 14611 }, { "epoch": 0.06468635176413298, "grad_norm": 2.6124391294999936, "learning_rate": 6.4686351764132995e-06, "loss": 0.7324, "step": 14612 }, { "epoch": 0.06469077869759618, "grad_norm": 3.684339286967075, "learning_rate": 6.469077869759618e-06, "loss": 0.8554, "step": 14613 }, { "epoch": 0.06469520563105936, "grad_norm": 2.8710770345990793, "learning_rate": 6.4695205631059365e-06, "loss": 0.746, "step": 14614 }, { "epoch": 0.06469963256452256, "grad_norm": 3.0003045359347245, "learning_rate": 6.469963256452257e-06, "loss": 0.7742, "step": 14615 }, { "epoch": 0.06470405949798574, "grad_norm": 3.9182428265935965, "learning_rate": 6.470405949798575e-06, "loss": 1.3179, "step": 14616 }, { "epoch": 0.06470848643144894, "grad_norm": 2.438122891315476, "learning_rate": 6.470848643144894e-06, "loss": 0.7263, "step": 14617 }, { "epoch": 0.06471291336491213, "grad_norm": 3.162686858098723, "learning_rate": 6.471291336491213e-06, "loss": 1.0616, "step": 14618 }, { "epoch": 0.06471734029837531, "grad_norm": 2.792695414723081, "learning_rate": 6.471734029837532e-06, "loss": 0.5932, "step": 14619 }, { "epoch": 0.0647217672318385, "grad_norm": 2.627305063848823, "learning_rate": 6.47217672318385e-06, "loss": 0.6297, "step": 14620 }, { "epoch": 0.06472619416530169, "grad_norm": 2.693535903099202, "learning_rate": 6.47261941653017e-06, "loss": 0.7143, "step": 14621 }, { "epoch": 0.06473062109876489, "grad_norm": 2.4118887583282316, "learning_rate": 6.473062109876489e-06, "loss": 0.6557, "step": 14622 }, { "epoch": 0.06473504803222807, "grad_norm": 2.5962900532968636, "learning_rate": 6.4735048032228075e-06, "loss": 0.7058, "step": 14623 }, { "epoch": 0.06473947496569127, "grad_norm": 2.186244623629601, "learning_rate": 6.473947496569128e-06, "loss": 0.7225, "step": 14624 }, { "epoch": 0.06474390189915445, "grad_norm": 3.24414528914498, "learning_rate": 6.474390189915446e-06, "loss": 0.9515, "step": 14625 }, { "epoch": 0.06474832883261765, "grad_norm": 2.4363964222662, "learning_rate": 6.474832883261765e-06, "loss": 0.5538, "step": 14626 }, { "epoch": 0.06475275576608083, "grad_norm": 3.6505024990155963, "learning_rate": 6.475275576608084e-06, "loss": 0.7842, "step": 14627 }, { "epoch": 0.06475718269954403, "grad_norm": 2.1501733429123657, "learning_rate": 6.475718269954403e-06, "loss": 0.3912, "step": 14628 }, { "epoch": 0.06476160963300721, "grad_norm": 3.302423112945571, "learning_rate": 6.476160963300722e-06, "loss": 0.9358, "step": 14629 }, { "epoch": 0.06476603656647041, "grad_norm": 2.8919293971842057, "learning_rate": 6.476603656647041e-06, "loss": 0.7302, "step": 14630 }, { "epoch": 0.0647704634999336, "grad_norm": 2.650028232591094, "learning_rate": 6.47704634999336e-06, "loss": 0.5764, "step": 14631 }, { "epoch": 0.06477489043339679, "grad_norm": 2.9269489323473934, "learning_rate": 6.477489043339678e-06, "loss": 0.6445, "step": 14632 }, { "epoch": 0.06477931736685998, "grad_norm": 3.0916087020825773, "learning_rate": 6.477931736685999e-06, "loss": 1.0656, "step": 14633 }, { "epoch": 0.06478374430032316, "grad_norm": 2.597543942648439, "learning_rate": 6.478374430032317e-06, "loss": 0.5888, "step": 14634 }, { "epoch": 0.06478817123378636, "grad_norm": 2.356275221932988, "learning_rate": 6.478817123378636e-06, "loss": 0.6648, "step": 14635 }, { "epoch": 0.06479259816724954, "grad_norm": 2.6286603385796994, "learning_rate": 6.479259816724955e-06, "loss": 0.8259, "step": 14636 }, { "epoch": 0.06479702510071274, "grad_norm": 2.4663531489996013, "learning_rate": 6.479702510071274e-06, "loss": 0.7442, "step": 14637 }, { "epoch": 0.06480145203417592, "grad_norm": 2.9794456157515694, "learning_rate": 6.480145203417593e-06, "loss": 0.8265, "step": 14638 }, { "epoch": 0.06480587896763912, "grad_norm": 2.4049664334731595, "learning_rate": 6.480587896763912e-06, "loss": 0.5945, "step": 14639 }, { "epoch": 0.0648103059011023, "grad_norm": 2.850236040152579, "learning_rate": 6.481030590110231e-06, "loss": 0.8981, "step": 14640 }, { "epoch": 0.0648147328345655, "grad_norm": 2.633270511879805, "learning_rate": 6.481473283456549e-06, "loss": 0.5965, "step": 14641 }, { "epoch": 0.06481915976802868, "grad_norm": 2.9431745953966124, "learning_rate": 6.4819159768028696e-06, "loss": 0.6616, "step": 14642 }, { "epoch": 0.06482358670149188, "grad_norm": 3.226210374857761, "learning_rate": 6.482358670149188e-06, "loss": 1.013, "step": 14643 }, { "epoch": 0.06482801363495506, "grad_norm": 2.338367645487408, "learning_rate": 6.482801363495507e-06, "loss": 0.6063, "step": 14644 }, { "epoch": 0.06483244056841826, "grad_norm": 2.322987385479151, "learning_rate": 6.483244056841827e-06, "loss": 0.5425, "step": 14645 }, { "epoch": 0.06483686750188145, "grad_norm": 3.771015732990672, "learning_rate": 6.483686750188145e-06, "loss": 0.7786, "step": 14646 }, { "epoch": 0.06484129443534464, "grad_norm": 2.185272238438549, "learning_rate": 6.484129443534464e-06, "loss": 0.5662, "step": 14647 }, { "epoch": 0.06484572136880783, "grad_norm": 2.7131999901750343, "learning_rate": 6.484572136880783e-06, "loss": 0.7181, "step": 14648 }, { "epoch": 0.06485014830227101, "grad_norm": 2.6391874369835744, "learning_rate": 6.485014830227102e-06, "loss": 0.7648, "step": 14649 }, { "epoch": 0.06485457523573421, "grad_norm": 2.9545523670990286, "learning_rate": 6.485457523573421e-06, "loss": 1.1866, "step": 14650 }, { "epoch": 0.06485900216919739, "grad_norm": 2.942419521755196, "learning_rate": 6.4859002169197405e-06, "loss": 0.739, "step": 14651 }, { "epoch": 0.06486342910266059, "grad_norm": 2.3909295345447146, "learning_rate": 6.486342910266059e-06, "loss": 0.5925, "step": 14652 }, { "epoch": 0.06486785603612377, "grad_norm": 2.605046439399399, "learning_rate": 6.4867856036123776e-06, "loss": 0.75, "step": 14653 }, { "epoch": 0.06487228296958697, "grad_norm": 3.3337326418165745, "learning_rate": 6.487228296958698e-06, "loss": 0.6135, "step": 14654 }, { "epoch": 0.06487670990305015, "grad_norm": 2.239540946674286, "learning_rate": 6.487670990305016e-06, "loss": 0.6305, "step": 14655 }, { "epoch": 0.06488113683651335, "grad_norm": 2.7705534743388096, "learning_rate": 6.488113683651335e-06, "loss": 0.6198, "step": 14656 }, { "epoch": 0.06488556376997653, "grad_norm": 2.1776419196024914, "learning_rate": 6.488556376997654e-06, "loss": 0.486, "step": 14657 }, { "epoch": 0.06488999070343973, "grad_norm": 2.4985703930895493, "learning_rate": 6.488999070343973e-06, "loss": 0.8185, "step": 14658 }, { "epoch": 0.06489441763690292, "grad_norm": 3.6784634728007606, "learning_rate": 6.489441763690292e-06, "loss": 1.4777, "step": 14659 }, { "epoch": 0.06489884457036611, "grad_norm": 3.0286016718541697, "learning_rate": 6.4898844570366115e-06, "loss": 0.834, "step": 14660 }, { "epoch": 0.0649032715038293, "grad_norm": 2.670128314141397, "learning_rate": 6.49032715038293e-06, "loss": 0.7791, "step": 14661 }, { "epoch": 0.0649076984372925, "grad_norm": 4.357739045160381, "learning_rate": 6.4907698437292485e-06, "loss": 1.0618, "step": 14662 }, { "epoch": 0.06491212537075568, "grad_norm": 3.7525932939846287, "learning_rate": 6.491212537075569e-06, "loss": 1.238, "step": 14663 }, { "epoch": 0.06491655230421886, "grad_norm": 2.8341826452487493, "learning_rate": 6.491655230421887e-06, "loss": 0.755, "step": 14664 }, { "epoch": 0.06492097923768206, "grad_norm": 3.5585769617986402, "learning_rate": 6.492097923768206e-06, "loss": 0.8145, "step": 14665 }, { "epoch": 0.06492540617114524, "grad_norm": 2.969601908051701, "learning_rate": 6.492540617114525e-06, "loss": 0.8911, "step": 14666 }, { "epoch": 0.06492983310460844, "grad_norm": 2.9772468131418792, "learning_rate": 6.4929833104608445e-06, "loss": 0.9775, "step": 14667 }, { "epoch": 0.06493426003807162, "grad_norm": 2.626551249100675, "learning_rate": 6.493426003807163e-06, "loss": 0.7947, "step": 14668 }, { "epoch": 0.06493868697153482, "grad_norm": 2.49608143025505, "learning_rate": 6.493868697153482e-06, "loss": 0.9209, "step": 14669 }, { "epoch": 0.064943113904998, "grad_norm": 2.980783876927902, "learning_rate": 6.494311390499801e-06, "loss": 0.9054, "step": 14670 }, { "epoch": 0.0649475408384612, "grad_norm": 2.411494185840975, "learning_rate": 6.4947540838461195e-06, "loss": 0.9493, "step": 14671 }, { "epoch": 0.06495196777192438, "grad_norm": 2.4340381318696687, "learning_rate": 6.49519677719244e-06, "loss": 0.4466, "step": 14672 }, { "epoch": 0.06495639470538758, "grad_norm": 2.2961363095653367, "learning_rate": 6.495639470538758e-06, "loss": 0.3571, "step": 14673 }, { "epoch": 0.06496082163885077, "grad_norm": 2.6722245587182343, "learning_rate": 6.496082163885077e-06, "loss": 0.694, "step": 14674 }, { "epoch": 0.06496524857231396, "grad_norm": 2.722604323162043, "learning_rate": 6.496524857231397e-06, "loss": 0.746, "step": 14675 }, { "epoch": 0.06496967550577715, "grad_norm": 3.267175802440474, "learning_rate": 6.4969675505777155e-06, "loss": 1.0643, "step": 14676 }, { "epoch": 0.06497410243924034, "grad_norm": 2.8611958622766287, "learning_rate": 6.497410243924034e-06, "loss": 0.9357, "step": 14677 }, { "epoch": 0.06497852937270353, "grad_norm": 3.027182964978814, "learning_rate": 6.497852937270353e-06, "loss": 0.7922, "step": 14678 }, { "epoch": 0.06498295630616671, "grad_norm": 3.099947842346819, "learning_rate": 6.498295630616672e-06, "loss": 0.7639, "step": 14679 }, { "epoch": 0.06498738323962991, "grad_norm": 2.684246117103365, "learning_rate": 6.498738323962991e-06, "loss": 0.8029, "step": 14680 }, { "epoch": 0.06499181017309309, "grad_norm": 2.5943879166590205, "learning_rate": 6.499181017309311e-06, "loss": 0.5668, "step": 14681 }, { "epoch": 0.06499623710655629, "grad_norm": 2.2381235349296333, "learning_rate": 6.499623710655629e-06, "loss": 0.5836, "step": 14682 }, { "epoch": 0.06500066404001947, "grad_norm": 3.0172943594403896, "learning_rate": 6.500066404001948e-06, "loss": 0.8668, "step": 14683 }, { "epoch": 0.06500509097348267, "grad_norm": 2.9085008932626324, "learning_rate": 6.500509097348268e-06, "loss": 0.7038, "step": 14684 }, { "epoch": 0.06500951790694585, "grad_norm": 4.3880001716817585, "learning_rate": 6.500951790694586e-06, "loss": 0.9833, "step": 14685 }, { "epoch": 0.06501394484040905, "grad_norm": 2.4163714076544403, "learning_rate": 6.501394484040905e-06, "loss": 0.5607, "step": 14686 }, { "epoch": 0.06501837177387224, "grad_norm": 2.8990279988453396, "learning_rate": 6.501837177387224e-06, "loss": 0.8947, "step": 14687 }, { "epoch": 0.06502279870733543, "grad_norm": 3.6247209255017476, "learning_rate": 6.502279870733544e-06, "loss": 1.2529, "step": 14688 }, { "epoch": 0.06502722564079862, "grad_norm": 4.162094164220802, "learning_rate": 6.502722564079862e-06, "loss": 1.1108, "step": 14689 }, { "epoch": 0.06503165257426181, "grad_norm": 2.815840654695309, "learning_rate": 6.5031652574261816e-06, "loss": 0.8646, "step": 14690 }, { "epoch": 0.065036079507725, "grad_norm": 2.4595444136688767, "learning_rate": 6.5036079507725e-06, "loss": 0.4185, "step": 14691 }, { "epoch": 0.0650405064411882, "grad_norm": 3.6766337793675827, "learning_rate": 6.504050644118819e-06, "loss": 1.011, "step": 14692 }, { "epoch": 0.06504493337465138, "grad_norm": 2.6344227567573655, "learning_rate": 6.504493337465139e-06, "loss": 0.8818, "step": 14693 }, { "epoch": 0.06504936030811456, "grad_norm": 2.8889984416110788, "learning_rate": 6.504936030811457e-06, "loss": 0.9267, "step": 14694 }, { "epoch": 0.06505378724157776, "grad_norm": 3.420229011391661, "learning_rate": 6.505378724157776e-06, "loss": 1.1475, "step": 14695 }, { "epoch": 0.06505821417504094, "grad_norm": 3.8623429074930224, "learning_rate": 6.505821417504096e-06, "loss": 0.9473, "step": 14696 }, { "epoch": 0.06506264110850414, "grad_norm": 2.6448901657217854, "learning_rate": 6.506264110850415e-06, "loss": 0.7223, "step": 14697 }, { "epoch": 0.06506706804196732, "grad_norm": 2.3386383284696115, "learning_rate": 6.506706804196733e-06, "loss": 0.6954, "step": 14698 }, { "epoch": 0.06507149497543052, "grad_norm": 3.576608261735143, "learning_rate": 6.5071494975430525e-06, "loss": 0.8275, "step": 14699 }, { "epoch": 0.0650759219088937, "grad_norm": 2.460275198756858, "learning_rate": 6.507592190889371e-06, "loss": 0.951, "step": 14700 }, { "epoch": 0.0650803488423569, "grad_norm": 2.8653859307694227, "learning_rate": 6.5080348842356896e-06, "loss": 0.97, "step": 14701 }, { "epoch": 0.06508477577582009, "grad_norm": 2.3963708209034227, "learning_rate": 6.50847757758201e-06, "loss": 0.5246, "step": 14702 }, { "epoch": 0.06508920270928328, "grad_norm": 2.518220286473164, "learning_rate": 6.508920270928328e-06, "loss": 0.5543, "step": 14703 }, { "epoch": 0.06509362964274647, "grad_norm": 2.748050704566198, "learning_rate": 6.509362964274647e-06, "loss": 0.8868, "step": 14704 }, { "epoch": 0.06509805657620966, "grad_norm": 2.642131824352598, "learning_rate": 6.509805657620967e-06, "loss": 0.8034, "step": 14705 }, { "epoch": 0.06510248350967285, "grad_norm": 2.3035340015400987, "learning_rate": 6.5102483509672856e-06, "loss": 0.5962, "step": 14706 }, { "epoch": 0.06510691044313605, "grad_norm": 2.9392300182106896, "learning_rate": 6.510691044313604e-06, "loss": 0.9753, "step": 14707 }, { "epoch": 0.06511133737659923, "grad_norm": 2.6146719721962968, "learning_rate": 6.5111337376599235e-06, "loss": 0.7259, "step": 14708 }, { "epoch": 0.06511576431006241, "grad_norm": 3.0806814551048607, "learning_rate": 6.511576431006242e-06, "loss": 0.5818, "step": 14709 }, { "epoch": 0.06512019124352561, "grad_norm": 2.482955899923444, "learning_rate": 6.512019124352561e-06, "loss": 0.6351, "step": 14710 }, { "epoch": 0.0651246181769888, "grad_norm": 2.924671721807773, "learning_rate": 6.512461817698881e-06, "loss": 0.7924, "step": 14711 }, { "epoch": 0.06512904511045199, "grad_norm": 2.8147056488967253, "learning_rate": 6.512904511045199e-06, "loss": 0.8619, "step": 14712 }, { "epoch": 0.06513347204391517, "grad_norm": 2.4827749499111205, "learning_rate": 6.513347204391518e-06, "loss": 0.7451, "step": 14713 }, { "epoch": 0.06513789897737837, "grad_norm": 3.0260963664939964, "learning_rate": 6.513789897737838e-06, "loss": 0.8903, "step": 14714 }, { "epoch": 0.06514232591084156, "grad_norm": 2.3029687373652483, "learning_rate": 6.5142325910841565e-06, "loss": 0.6992, "step": 14715 }, { "epoch": 0.06514675284430475, "grad_norm": 2.471194595820937, "learning_rate": 6.514675284430475e-06, "loss": 0.8368, "step": 14716 }, { "epoch": 0.06515117977776794, "grad_norm": 2.8962508180657816, "learning_rate": 6.515117977776794e-06, "loss": 1.1565, "step": 14717 }, { "epoch": 0.06515560671123113, "grad_norm": 2.687324365752843, "learning_rate": 6.515560671123114e-06, "loss": 0.6899, "step": 14718 }, { "epoch": 0.06516003364469432, "grad_norm": 2.4732651191495774, "learning_rate": 6.516003364469432e-06, "loss": 0.7645, "step": 14719 }, { "epoch": 0.06516446057815752, "grad_norm": 2.0561351006163613, "learning_rate": 6.516446057815752e-06, "loss": 0.5684, "step": 14720 }, { "epoch": 0.0651688875116207, "grad_norm": 2.3774677342669417, "learning_rate": 6.51688875116207e-06, "loss": 0.5794, "step": 14721 }, { "epoch": 0.0651733144450839, "grad_norm": 3.365457929032726, "learning_rate": 6.517331444508389e-06, "loss": 0.8666, "step": 14722 }, { "epoch": 0.06517774137854708, "grad_norm": 2.5222876678956525, "learning_rate": 6.517774137854709e-06, "loss": 0.6152, "step": 14723 }, { "epoch": 0.06518216831201028, "grad_norm": 2.9868111670462945, "learning_rate": 6.5182168312010275e-06, "loss": 0.7936, "step": 14724 }, { "epoch": 0.06518659524547346, "grad_norm": 3.099628465350096, "learning_rate": 6.518659524547346e-06, "loss": 0.8898, "step": 14725 }, { "epoch": 0.06519102217893664, "grad_norm": 2.6207858671031117, "learning_rate": 6.519102217893666e-06, "loss": 0.7146, "step": 14726 }, { "epoch": 0.06519544911239984, "grad_norm": 3.3060513097518576, "learning_rate": 6.519544911239985e-06, "loss": 0.9614, "step": 14727 }, { "epoch": 0.06519987604586303, "grad_norm": 2.9901953508377863, "learning_rate": 6.519987604586303e-06, "loss": 0.783, "step": 14728 }, { "epoch": 0.06520430297932622, "grad_norm": 2.6937297059854797, "learning_rate": 6.520430297932623e-06, "loss": 0.7713, "step": 14729 }, { "epoch": 0.0652087299127894, "grad_norm": 2.6826120030488165, "learning_rate": 6.520872991278941e-06, "loss": 0.8009, "step": 14730 }, { "epoch": 0.0652131568462526, "grad_norm": 2.4178514146871155, "learning_rate": 6.5213156846252605e-06, "loss": 0.6061, "step": 14731 }, { "epoch": 0.06521758377971579, "grad_norm": 2.897750509896203, "learning_rate": 6.52175837797158e-06, "loss": 0.7938, "step": 14732 }, { "epoch": 0.06522201071317899, "grad_norm": 2.4395460587448787, "learning_rate": 6.522201071317898e-06, "loss": 0.528, "step": 14733 }, { "epoch": 0.06522643764664217, "grad_norm": 2.5068033501573566, "learning_rate": 6.522643764664217e-06, "loss": 0.7811, "step": 14734 }, { "epoch": 0.06523086458010537, "grad_norm": 2.487082762834422, "learning_rate": 6.523086458010537e-06, "loss": 0.6961, "step": 14735 }, { "epoch": 0.06523529151356855, "grad_norm": 3.0264522635762057, "learning_rate": 6.523529151356856e-06, "loss": 0.9572, "step": 14736 }, { "epoch": 0.06523971844703175, "grad_norm": 2.855046577518189, "learning_rate": 6.523971844703174e-06, "loss": 1.0389, "step": 14737 }, { "epoch": 0.06524414538049493, "grad_norm": 2.529656839977446, "learning_rate": 6.524414538049494e-06, "loss": 0.6277, "step": 14738 }, { "epoch": 0.06524857231395813, "grad_norm": 2.4130398149038843, "learning_rate": 6.524857231395812e-06, "loss": 0.6194, "step": 14739 }, { "epoch": 0.06525299924742131, "grad_norm": 3.3492577069806364, "learning_rate": 6.5252999247421315e-06, "loss": 0.9332, "step": 14740 }, { "epoch": 0.0652574261808845, "grad_norm": 2.336268747228534, "learning_rate": 6.525742618088451e-06, "loss": 0.6215, "step": 14741 }, { "epoch": 0.06526185311434769, "grad_norm": 3.779651848013399, "learning_rate": 6.526185311434769e-06, "loss": 1.0217, "step": 14742 }, { "epoch": 0.06526628004781088, "grad_norm": 3.0325401890119315, "learning_rate": 6.526628004781088e-06, "loss": 0.8909, "step": 14743 }, { "epoch": 0.06527070698127407, "grad_norm": 2.3826547043687607, "learning_rate": 6.527070698127408e-06, "loss": 0.6961, "step": 14744 }, { "epoch": 0.06527513391473726, "grad_norm": 2.3095589463304074, "learning_rate": 6.527513391473727e-06, "loss": 0.795, "step": 14745 }, { "epoch": 0.06527956084820045, "grad_norm": 2.7808102957680014, "learning_rate": 6.527956084820045e-06, "loss": 1.0653, "step": 14746 }, { "epoch": 0.06528398778166364, "grad_norm": 2.2106155059689025, "learning_rate": 6.5283987781663645e-06, "loss": 0.6033, "step": 14747 }, { "epoch": 0.06528841471512684, "grad_norm": 2.707970251862399, "learning_rate": 6.528841471512684e-06, "loss": 0.6373, "step": 14748 }, { "epoch": 0.06529284164859002, "grad_norm": 3.096723352780461, "learning_rate": 6.529284164859002e-06, "loss": 0.879, "step": 14749 }, { "epoch": 0.06529726858205322, "grad_norm": 2.5849057674266187, "learning_rate": 6.529726858205322e-06, "loss": 0.5021, "step": 14750 }, { "epoch": 0.0653016955155164, "grad_norm": 2.7235250169914504, "learning_rate": 6.53016955155164e-06, "loss": 0.6613, "step": 14751 }, { "epoch": 0.0653061224489796, "grad_norm": 2.3402840205119166, "learning_rate": 6.530612244897959e-06, "loss": 0.599, "step": 14752 }, { "epoch": 0.06531054938244278, "grad_norm": 2.3993864056053242, "learning_rate": 6.531054938244279e-06, "loss": 0.639, "step": 14753 }, { "epoch": 0.06531497631590598, "grad_norm": 2.9224718441369752, "learning_rate": 6.531497631590598e-06, "loss": 0.8999, "step": 14754 }, { "epoch": 0.06531940324936916, "grad_norm": 2.4567101404606797, "learning_rate": 6.531940324936916e-06, "loss": 0.8195, "step": 14755 }, { "epoch": 0.06532383018283235, "grad_norm": 2.486308808710319, "learning_rate": 6.532383018283236e-06, "loss": 0.541, "step": 14756 }, { "epoch": 0.06532825711629554, "grad_norm": 2.2561088022090185, "learning_rate": 6.532825711629555e-06, "loss": 0.6124, "step": 14757 }, { "epoch": 0.06533268404975873, "grad_norm": 2.223885723409207, "learning_rate": 6.533268404975873e-06, "loss": 0.6885, "step": 14758 }, { "epoch": 0.06533711098322192, "grad_norm": 2.4210456544672567, "learning_rate": 6.533711098322193e-06, "loss": 0.5985, "step": 14759 }, { "epoch": 0.06534153791668511, "grad_norm": 3.1221334220233863, "learning_rate": 6.534153791668511e-06, "loss": 1.01, "step": 14760 }, { "epoch": 0.0653459648501483, "grad_norm": 2.6384516468544486, "learning_rate": 6.534596485014831e-06, "loss": 0.7796, "step": 14761 }, { "epoch": 0.06535039178361149, "grad_norm": 2.6644080218680837, "learning_rate": 6.53503917836115e-06, "loss": 0.5932, "step": 14762 }, { "epoch": 0.06535481871707469, "grad_norm": 2.840327155431678, "learning_rate": 6.5354818717074685e-06, "loss": 0.5989, "step": 14763 }, { "epoch": 0.06535924565053787, "grad_norm": 2.9186131505584947, "learning_rate": 6.535924565053787e-06, "loss": 1.0081, "step": 14764 }, { "epoch": 0.06536367258400107, "grad_norm": 2.998423955529592, "learning_rate": 6.536367258400107e-06, "loss": 1.1434, "step": 14765 }, { "epoch": 0.06536809951746425, "grad_norm": 3.0851192543262878, "learning_rate": 6.536809951746426e-06, "loss": 0.7705, "step": 14766 }, { "epoch": 0.06537252645092745, "grad_norm": 3.03712817478286, "learning_rate": 6.537252645092744e-06, "loss": 0.8286, "step": 14767 }, { "epoch": 0.06537695338439063, "grad_norm": 3.4630049166459953, "learning_rate": 6.537695338439064e-06, "loss": 1.1703, "step": 14768 }, { "epoch": 0.06538138031785383, "grad_norm": 3.3301448203197404, "learning_rate": 6.538138031785383e-06, "loss": 0.4995, "step": 14769 }, { "epoch": 0.06538580725131701, "grad_norm": 2.3917177767019897, "learning_rate": 6.538580725131702e-06, "loss": 0.3707, "step": 14770 }, { "epoch": 0.0653902341847802, "grad_norm": 4.2997196136203115, "learning_rate": 6.539023418478021e-06, "loss": 0.9933, "step": 14771 }, { "epoch": 0.0653946611182434, "grad_norm": 3.8365401505378753, "learning_rate": 6.5394661118243395e-06, "loss": 1.1901, "step": 14772 }, { "epoch": 0.06539908805170658, "grad_norm": 3.756953665357258, "learning_rate": 6.539908805170658e-06, "loss": 1.2281, "step": 14773 }, { "epoch": 0.06540351498516978, "grad_norm": 2.0637112560525877, "learning_rate": 6.540351498516978e-06, "loss": 0.4579, "step": 14774 }, { "epoch": 0.06540794191863296, "grad_norm": 2.3061716662144156, "learning_rate": 6.540794191863297e-06, "loss": 0.696, "step": 14775 }, { "epoch": 0.06541236885209616, "grad_norm": 3.047877468854108, "learning_rate": 6.541236885209615e-06, "loss": 0.8006, "step": 14776 }, { "epoch": 0.06541679578555934, "grad_norm": 2.772502273176092, "learning_rate": 6.541679578555935e-06, "loss": 0.8035, "step": 14777 }, { "epoch": 0.06542122271902254, "grad_norm": 3.197004361062559, "learning_rate": 6.542122271902254e-06, "loss": 0.4984, "step": 14778 }, { "epoch": 0.06542564965248572, "grad_norm": 3.147389371131463, "learning_rate": 6.5425649652485725e-06, "loss": 0.6829, "step": 14779 }, { "epoch": 0.06543007658594892, "grad_norm": 3.397906649359789, "learning_rate": 6.543007658594892e-06, "loss": 0.7682, "step": 14780 }, { "epoch": 0.0654345035194121, "grad_norm": 2.3228325214413266, "learning_rate": 6.54345035194121e-06, "loss": 0.5259, "step": 14781 }, { "epoch": 0.0654389304528753, "grad_norm": 2.7878347629125417, "learning_rate": 6.543893045287529e-06, "loss": 0.6986, "step": 14782 }, { "epoch": 0.06544335738633848, "grad_norm": 2.4122367311589015, "learning_rate": 6.544335738633849e-06, "loss": 0.6202, "step": 14783 }, { "epoch": 0.06544778431980168, "grad_norm": 2.453542579953692, "learning_rate": 6.544778431980168e-06, "loss": 0.8028, "step": 14784 }, { "epoch": 0.06545221125326486, "grad_norm": 2.5302809575219007, "learning_rate": 6.545221125326486e-06, "loss": 0.4824, "step": 14785 }, { "epoch": 0.06545663818672805, "grad_norm": 3.5652451928719993, "learning_rate": 6.5456638186728064e-06, "loss": 1.2541, "step": 14786 }, { "epoch": 0.06546106512019124, "grad_norm": 2.401637654583615, "learning_rate": 6.546106512019125e-06, "loss": 0.7184, "step": 14787 }, { "epoch": 0.06546549205365443, "grad_norm": 2.589575571892812, "learning_rate": 6.5465492053654435e-06, "loss": 0.8187, "step": 14788 }, { "epoch": 0.06546991898711763, "grad_norm": 3.941088532828923, "learning_rate": 6.546991898711763e-06, "loss": 0.8762, "step": 14789 }, { "epoch": 0.06547434592058081, "grad_norm": 2.243710437309121, "learning_rate": 6.547434592058081e-06, "loss": 0.7277, "step": 14790 }, { "epoch": 0.065478772854044, "grad_norm": 2.744215759188362, "learning_rate": 6.547877285404401e-06, "loss": 0.7686, "step": 14791 }, { "epoch": 0.06548319978750719, "grad_norm": 2.404032762536438, "learning_rate": 6.54831997875072e-06, "loss": 0.7824, "step": 14792 }, { "epoch": 0.06548762672097039, "grad_norm": 2.6622529462288935, "learning_rate": 6.548762672097039e-06, "loss": 0.5035, "step": 14793 }, { "epoch": 0.06549205365443357, "grad_norm": 2.5446233914942056, "learning_rate": 6.549205365443357e-06, "loss": 1.0696, "step": 14794 }, { "epoch": 0.06549648058789677, "grad_norm": 2.6506684141498624, "learning_rate": 6.549648058789677e-06, "loss": 0.917, "step": 14795 }, { "epoch": 0.06550090752135995, "grad_norm": 2.6987630397352786, "learning_rate": 6.550090752135996e-06, "loss": 0.5853, "step": 14796 }, { "epoch": 0.06550533445482315, "grad_norm": 2.7778390052619013, "learning_rate": 6.550533445482314e-06, "loss": 0.7464, "step": 14797 }, { "epoch": 0.06550976138828633, "grad_norm": 2.4504227186371996, "learning_rate": 6.550976138828634e-06, "loss": 0.6516, "step": 14798 }, { "epoch": 0.06551418832174953, "grad_norm": 2.6660771128443606, "learning_rate": 6.551418832174953e-06, "loss": 0.7347, "step": 14799 }, { "epoch": 0.06551861525521271, "grad_norm": 2.649582766978794, "learning_rate": 6.551861525521272e-06, "loss": 0.6276, "step": 14800 }, { "epoch": 0.0655230421886759, "grad_norm": 3.366349998063517, "learning_rate": 6.552304218867591e-06, "loss": 1.2566, "step": 14801 }, { "epoch": 0.0655274691221391, "grad_norm": 2.7274653057245644, "learning_rate": 6.55274691221391e-06, "loss": 0.6783, "step": 14802 }, { "epoch": 0.06553189605560228, "grad_norm": 3.042253014191279, "learning_rate": 6.553189605560228e-06, "loss": 0.9629, "step": 14803 }, { "epoch": 0.06553632298906548, "grad_norm": 2.292105367555881, "learning_rate": 6.553632298906548e-06, "loss": 0.7408, "step": 14804 }, { "epoch": 0.06554074992252866, "grad_norm": 3.4183805866273818, "learning_rate": 6.554074992252867e-06, "loss": 0.7886, "step": 14805 }, { "epoch": 0.06554517685599186, "grad_norm": 2.478892236168307, "learning_rate": 6.554517685599185e-06, "loss": 0.598, "step": 14806 }, { "epoch": 0.06554960378945504, "grad_norm": 5.1181528845576585, "learning_rate": 6.554960378945506e-06, "loss": 1.0293, "step": 14807 }, { "epoch": 0.06555403072291824, "grad_norm": 2.1562155131924667, "learning_rate": 6.555403072291824e-06, "loss": 0.6087, "step": 14808 }, { "epoch": 0.06555845765638142, "grad_norm": 2.744111315258961, "learning_rate": 6.555845765638143e-06, "loss": 0.5457, "step": 14809 }, { "epoch": 0.06556288458984462, "grad_norm": 2.501447724749321, "learning_rate": 6.556288458984462e-06, "loss": 0.4232, "step": 14810 }, { "epoch": 0.0655673115233078, "grad_norm": 3.1895016610107176, "learning_rate": 6.5567311523307805e-06, "loss": 0.9718, "step": 14811 }, { "epoch": 0.065571738456771, "grad_norm": 2.9718008427216556, "learning_rate": 6.557173845677099e-06, "loss": 0.8529, "step": 14812 }, { "epoch": 0.06557616539023418, "grad_norm": 2.8724363139109657, "learning_rate": 6.557616539023419e-06, "loss": 0.833, "step": 14813 }, { "epoch": 0.06558059232369738, "grad_norm": 2.707858619763123, "learning_rate": 6.558059232369738e-06, "loss": 0.8181, "step": 14814 }, { "epoch": 0.06558501925716057, "grad_norm": 3.4564001681946417, "learning_rate": 6.558501925716056e-06, "loss": 1.0031, "step": 14815 }, { "epoch": 0.06558944619062375, "grad_norm": 2.7038357776594064, "learning_rate": 6.5589446190623765e-06, "loss": 0.6114, "step": 14816 }, { "epoch": 0.06559387312408695, "grad_norm": 2.489256255092289, "learning_rate": 6.559387312408695e-06, "loss": 0.5549, "step": 14817 }, { "epoch": 0.06559830005755013, "grad_norm": 2.66829883602263, "learning_rate": 6.559830005755014e-06, "loss": 0.7538, "step": 14818 }, { "epoch": 0.06560272699101333, "grad_norm": 3.2961863749598015, "learning_rate": 6.560272699101333e-06, "loss": 0.5837, "step": 14819 }, { "epoch": 0.06560715392447651, "grad_norm": 3.3148841959919455, "learning_rate": 6.5607153924476515e-06, "loss": 0.7142, "step": 14820 }, { "epoch": 0.06561158085793971, "grad_norm": 2.3350603520588455, "learning_rate": 6.561158085793971e-06, "loss": 0.5693, "step": 14821 }, { "epoch": 0.06561600779140289, "grad_norm": 2.79214071991256, "learning_rate": 6.56160077914029e-06, "loss": 0.7753, "step": 14822 }, { "epoch": 0.06562043472486609, "grad_norm": 2.5669606032035195, "learning_rate": 6.562043472486609e-06, "loss": 0.711, "step": 14823 }, { "epoch": 0.06562486165832927, "grad_norm": 3.68995038395034, "learning_rate": 6.562486165832927e-06, "loss": 1.0861, "step": 14824 }, { "epoch": 0.06562928859179247, "grad_norm": 2.9190294312841436, "learning_rate": 6.5629288591792475e-06, "loss": 0.7513, "step": 14825 }, { "epoch": 0.06563371552525565, "grad_norm": 2.9131371479137793, "learning_rate": 6.563371552525566e-06, "loss": 0.8287, "step": 14826 }, { "epoch": 0.06563814245871885, "grad_norm": 2.5247580537175973, "learning_rate": 6.5638142458718845e-06, "loss": 0.7606, "step": 14827 }, { "epoch": 0.06564256939218203, "grad_norm": 3.0592046488800966, "learning_rate": 6.564256939218204e-06, "loss": 0.639, "step": 14828 }, { "epoch": 0.06564699632564523, "grad_norm": 2.803070215191333, "learning_rate": 6.564699632564523e-06, "loss": 0.8242, "step": 14829 }, { "epoch": 0.06565142325910842, "grad_norm": 3.1056162727091787, "learning_rate": 6.565142325910842e-06, "loss": 0.547, "step": 14830 }, { "epoch": 0.0656558501925716, "grad_norm": 2.706003395839579, "learning_rate": 6.565585019257161e-06, "loss": 0.8683, "step": 14831 }, { "epoch": 0.0656602771260348, "grad_norm": 2.8667294361125535, "learning_rate": 6.56602771260348e-06, "loss": 0.6825, "step": 14832 }, { "epoch": 0.06566470405949798, "grad_norm": 2.836411085104835, "learning_rate": 6.566470405949798e-06, "loss": 0.9172, "step": 14833 }, { "epoch": 0.06566913099296118, "grad_norm": 3.127783969094276, "learning_rate": 6.5669130992961184e-06, "loss": 1.0263, "step": 14834 }, { "epoch": 0.06567355792642436, "grad_norm": 2.3352997015142134, "learning_rate": 6.567355792642437e-06, "loss": 0.5053, "step": 14835 }, { "epoch": 0.06567798485988756, "grad_norm": 2.3284244729329044, "learning_rate": 6.5677984859887555e-06, "loss": 0.6735, "step": 14836 }, { "epoch": 0.06568241179335074, "grad_norm": 2.757960071005913, "learning_rate": 6.568241179335076e-06, "loss": 0.7791, "step": 14837 }, { "epoch": 0.06568683872681394, "grad_norm": 2.542588060115591, "learning_rate": 6.568683872681394e-06, "loss": 0.7419, "step": 14838 }, { "epoch": 0.06569126566027712, "grad_norm": 2.4372968714271996, "learning_rate": 6.569126566027713e-06, "loss": 0.5615, "step": 14839 }, { "epoch": 0.06569569259374032, "grad_norm": 2.642635429031625, "learning_rate": 6.569569259374032e-06, "loss": 0.6703, "step": 14840 }, { "epoch": 0.0657001195272035, "grad_norm": 2.5115795153660807, "learning_rate": 6.570011952720351e-06, "loss": 0.7165, "step": 14841 }, { "epoch": 0.0657045464606667, "grad_norm": 2.62228163516664, "learning_rate": 6.57045464606667e-06, "loss": 0.5194, "step": 14842 }, { "epoch": 0.06570897339412989, "grad_norm": 2.7945101164496533, "learning_rate": 6.570897339412989e-06, "loss": 0.8139, "step": 14843 }, { "epoch": 0.06571340032759308, "grad_norm": 2.677603999180172, "learning_rate": 6.571340032759308e-06, "loss": 0.8352, "step": 14844 }, { "epoch": 0.06571782726105627, "grad_norm": 2.820386277187013, "learning_rate": 6.571782726105626e-06, "loss": 0.83, "step": 14845 }, { "epoch": 0.06572225419451945, "grad_norm": 3.218734429202397, "learning_rate": 6.572225419451947e-06, "loss": 0.9189, "step": 14846 }, { "epoch": 0.06572668112798265, "grad_norm": 2.278438610387702, "learning_rate": 6.572668112798265e-06, "loss": 0.5802, "step": 14847 }, { "epoch": 0.06573110806144583, "grad_norm": 2.4775253281691114, "learning_rate": 6.573110806144584e-06, "loss": 0.5675, "step": 14848 }, { "epoch": 0.06573553499490903, "grad_norm": 2.83722650603454, "learning_rate": 6.573553499490903e-06, "loss": 0.7444, "step": 14849 }, { "epoch": 0.06573996192837221, "grad_norm": 2.051860062587967, "learning_rate": 6.5739961928372224e-06, "loss": 0.3975, "step": 14850 }, { "epoch": 0.06574438886183541, "grad_norm": 2.660214456477775, "learning_rate": 6.574438886183541e-06, "loss": 0.8754, "step": 14851 }, { "epoch": 0.06574881579529859, "grad_norm": 2.5550952076661635, "learning_rate": 6.57488157952986e-06, "loss": 0.7224, "step": 14852 }, { "epoch": 0.06575324272876179, "grad_norm": 2.8824079712410877, "learning_rate": 6.575324272876179e-06, "loss": 0.8293, "step": 14853 }, { "epoch": 0.06575766966222497, "grad_norm": 2.5529546204974327, "learning_rate": 6.575766966222497e-06, "loss": 0.5381, "step": 14854 }, { "epoch": 0.06576209659568817, "grad_norm": 2.6499282789000205, "learning_rate": 6.576209659568818e-06, "loss": 0.705, "step": 14855 }, { "epoch": 0.06576652352915136, "grad_norm": 2.4561256065932104, "learning_rate": 6.576652352915136e-06, "loss": 0.7054, "step": 14856 }, { "epoch": 0.06577095046261455, "grad_norm": 2.2538944673061057, "learning_rate": 6.577095046261455e-06, "loss": 0.7093, "step": 14857 }, { "epoch": 0.06577537739607774, "grad_norm": 3.3451732765697932, "learning_rate": 6.577537739607774e-06, "loss": 1.0354, "step": 14858 }, { "epoch": 0.06577980432954093, "grad_norm": 2.539848996137, "learning_rate": 6.577980432954093e-06, "loss": 0.5584, "step": 14859 }, { "epoch": 0.06578423126300412, "grad_norm": 2.7268297443600558, "learning_rate": 6.578423126300412e-06, "loss": 0.5774, "step": 14860 }, { "epoch": 0.0657886581964673, "grad_norm": 2.4692468861758305, "learning_rate": 6.578865819646731e-06, "loss": 0.6386, "step": 14861 }, { "epoch": 0.0657930851299305, "grad_norm": 3.0862807701308896, "learning_rate": 6.57930851299305e-06, "loss": 1.1022, "step": 14862 }, { "epoch": 0.06579751206339368, "grad_norm": 2.2517568125322494, "learning_rate": 6.57975120633937e-06, "loss": 0.5797, "step": 14863 }, { "epoch": 0.06580193899685688, "grad_norm": 2.3974788094122057, "learning_rate": 6.5801938996856885e-06, "loss": 0.5792, "step": 14864 }, { "epoch": 0.06580636593032006, "grad_norm": 2.9421588075379277, "learning_rate": 6.580636593032007e-06, "loss": 0.6365, "step": 14865 }, { "epoch": 0.06581079286378326, "grad_norm": 2.7111857543870075, "learning_rate": 6.5810792863783264e-06, "loss": 0.7152, "step": 14866 }, { "epoch": 0.06581521979724644, "grad_norm": 2.527954119595771, "learning_rate": 6.581521979724646e-06, "loss": 0.5592, "step": 14867 }, { "epoch": 0.06581964673070964, "grad_norm": 2.8208590888806326, "learning_rate": 6.581964673070964e-06, "loss": 0.7964, "step": 14868 }, { "epoch": 0.06582407366417282, "grad_norm": 3.664389562341699, "learning_rate": 6.582407366417284e-06, "loss": 0.8159, "step": 14869 }, { "epoch": 0.06582850059763602, "grad_norm": 2.9070502518457175, "learning_rate": 6.582850059763602e-06, "loss": 0.9202, "step": 14870 }, { "epoch": 0.0658329275310992, "grad_norm": 2.539452666822458, "learning_rate": 6.583292753109921e-06, "loss": 0.7524, "step": 14871 }, { "epoch": 0.0658373544645624, "grad_norm": 2.7889843552687563, "learning_rate": 6.583735446456241e-06, "loss": 0.8245, "step": 14872 }, { "epoch": 0.06584178139802559, "grad_norm": 3.0209701364941055, "learning_rate": 6.5841781398025595e-06, "loss": 0.9657, "step": 14873 }, { "epoch": 0.06584620833148878, "grad_norm": 2.6971169117947773, "learning_rate": 6.584620833148878e-06, "loss": 0.8511, "step": 14874 }, { "epoch": 0.06585063526495197, "grad_norm": 2.2825902197267243, "learning_rate": 6.585063526495198e-06, "loss": 0.5881, "step": 14875 }, { "epoch": 0.06585506219841515, "grad_norm": 3.1975495129170297, "learning_rate": 6.585506219841517e-06, "loss": 0.6881, "step": 14876 }, { "epoch": 0.06585948913187835, "grad_norm": 2.930167928570959, "learning_rate": 6.585948913187835e-06, "loss": 0.7662, "step": 14877 }, { "epoch": 0.06586391606534153, "grad_norm": 2.727063517593282, "learning_rate": 6.586391606534155e-06, "loss": 0.7123, "step": 14878 }, { "epoch": 0.06586834299880473, "grad_norm": 2.354019304423394, "learning_rate": 6.586834299880473e-06, "loss": 0.5471, "step": 14879 }, { "epoch": 0.06587276993226791, "grad_norm": 2.5301853447786935, "learning_rate": 6.5872769932267925e-06, "loss": 0.8213, "step": 14880 }, { "epoch": 0.06587719686573111, "grad_norm": 2.6958687807037873, "learning_rate": 6.587719686573112e-06, "loss": 0.6664, "step": 14881 }, { "epoch": 0.0658816237991943, "grad_norm": 2.8639113671896843, "learning_rate": 6.5881623799194304e-06, "loss": 0.7247, "step": 14882 }, { "epoch": 0.06588605073265749, "grad_norm": 2.7275145558882503, "learning_rate": 6.588605073265749e-06, "loss": 0.5414, "step": 14883 }, { "epoch": 0.06589047766612068, "grad_norm": 3.457089131764061, "learning_rate": 6.589047766612069e-06, "loss": 1.0413, "step": 14884 }, { "epoch": 0.06589490459958387, "grad_norm": 2.3583796412130136, "learning_rate": 6.589490459958388e-06, "loss": 0.8146, "step": 14885 }, { "epoch": 0.06589933153304706, "grad_norm": 2.6302019627377486, "learning_rate": 6.589933153304706e-06, "loss": 0.6181, "step": 14886 }, { "epoch": 0.06590375846651025, "grad_norm": 2.613416751508726, "learning_rate": 6.590375846651026e-06, "loss": 0.6665, "step": 14887 }, { "epoch": 0.06590818539997344, "grad_norm": 3.316914862145315, "learning_rate": 6.590818539997345e-06, "loss": 0.9478, "step": 14888 }, { "epoch": 0.06591261233343663, "grad_norm": 3.888079477872574, "learning_rate": 6.5912612333436635e-06, "loss": 0.8982, "step": 14889 }, { "epoch": 0.06591703926689982, "grad_norm": 3.066918452290081, "learning_rate": 6.591703926689983e-06, "loss": 0.6834, "step": 14890 }, { "epoch": 0.065921466200363, "grad_norm": 2.9310106437940915, "learning_rate": 6.592146620036301e-06, "loss": 0.8419, "step": 14891 }, { "epoch": 0.0659258931338262, "grad_norm": 2.5289315506617993, "learning_rate": 6.59258931338262e-06, "loss": 0.7927, "step": 14892 }, { "epoch": 0.06593032006728938, "grad_norm": 3.206004858319386, "learning_rate": 6.59303200672894e-06, "loss": 0.9381, "step": 14893 }, { "epoch": 0.06593474700075258, "grad_norm": 2.8113055240622185, "learning_rate": 6.593474700075259e-06, "loss": 0.835, "step": 14894 }, { "epoch": 0.06593917393421576, "grad_norm": 2.7224121286056433, "learning_rate": 6.593917393421577e-06, "loss": 0.7345, "step": 14895 }, { "epoch": 0.06594360086767896, "grad_norm": 2.4112295917115953, "learning_rate": 6.594360086767897e-06, "loss": 0.6587, "step": 14896 }, { "epoch": 0.06594802780114215, "grad_norm": 2.770514990082993, "learning_rate": 6.594802780114216e-06, "loss": 0.83, "step": 14897 }, { "epoch": 0.06595245473460534, "grad_norm": 2.49985832441995, "learning_rate": 6.5952454734605344e-06, "loss": 0.8832, "step": 14898 }, { "epoch": 0.06595688166806853, "grad_norm": 2.6100821121002604, "learning_rate": 6.595688166806854e-06, "loss": 0.8488, "step": 14899 }, { "epoch": 0.06596130860153172, "grad_norm": 2.312075329603568, "learning_rate": 6.596130860153172e-06, "loss": 0.5715, "step": 14900 }, { "epoch": 0.06596573553499491, "grad_norm": 2.535857194236504, "learning_rate": 6.596573553499491e-06, "loss": 0.7027, "step": 14901 }, { "epoch": 0.0659701624684581, "grad_norm": 2.819461944509304, "learning_rate": 6.597016246845811e-06, "loss": 0.7189, "step": 14902 }, { "epoch": 0.06597458940192129, "grad_norm": 2.4812971414888794, "learning_rate": 6.59745894019213e-06, "loss": 0.5661, "step": 14903 }, { "epoch": 0.06597901633538449, "grad_norm": 2.2155338635708546, "learning_rate": 6.597901633538448e-06, "loss": 0.4592, "step": 14904 }, { "epoch": 0.06598344326884767, "grad_norm": 2.4364408842098944, "learning_rate": 6.598344326884768e-06, "loss": 0.7502, "step": 14905 }, { "epoch": 0.06598787020231085, "grad_norm": 2.4412438381293344, "learning_rate": 6.598787020231087e-06, "loss": 0.8188, "step": 14906 }, { "epoch": 0.06599229713577405, "grad_norm": 3.0399607380252935, "learning_rate": 6.599229713577405e-06, "loss": 1.1462, "step": 14907 }, { "epoch": 0.06599672406923723, "grad_norm": 3.050443216087231, "learning_rate": 6.599672406923725e-06, "loss": 1.0977, "step": 14908 }, { "epoch": 0.06600115100270043, "grad_norm": 3.0905683145261267, "learning_rate": 6.600115100270043e-06, "loss": 0.9235, "step": 14909 }, { "epoch": 0.06600557793616361, "grad_norm": 2.760792009154777, "learning_rate": 6.600557793616363e-06, "loss": 0.7872, "step": 14910 }, { "epoch": 0.06601000486962681, "grad_norm": 3.8610217090237233, "learning_rate": 6.601000486962682e-06, "loss": 0.9367, "step": 14911 }, { "epoch": 0.06601443180309, "grad_norm": 2.8397152247688853, "learning_rate": 6.6014431803090005e-06, "loss": 0.7376, "step": 14912 }, { "epoch": 0.0660188587365532, "grad_norm": 3.2809509995320036, "learning_rate": 6.601885873655319e-06, "loss": 0.919, "step": 14913 }, { "epoch": 0.06602328567001638, "grad_norm": 2.7091242418041737, "learning_rate": 6.602328567001639e-06, "loss": 0.5373, "step": 14914 }, { "epoch": 0.06602771260347957, "grad_norm": 3.064753503533987, "learning_rate": 6.602771260347958e-06, "loss": 0.9812, "step": 14915 }, { "epoch": 0.06603213953694276, "grad_norm": 2.9269688167339303, "learning_rate": 6.603213953694276e-06, "loss": 0.4654, "step": 14916 }, { "epoch": 0.06603656647040596, "grad_norm": 2.6766019906385696, "learning_rate": 6.603656647040596e-06, "loss": 0.6715, "step": 14917 }, { "epoch": 0.06604099340386914, "grad_norm": 3.195794436561139, "learning_rate": 6.604099340386915e-06, "loss": 0.6401, "step": 14918 }, { "epoch": 0.06604542033733234, "grad_norm": 2.4925457690767527, "learning_rate": 6.604542033733234e-06, "loss": 0.7825, "step": 14919 }, { "epoch": 0.06604984727079552, "grad_norm": 3.1224218934348835, "learning_rate": 6.604984727079553e-06, "loss": 0.775, "step": 14920 }, { "epoch": 0.0660542742042587, "grad_norm": 2.561102770206536, "learning_rate": 6.6054274204258715e-06, "loss": 0.8544, "step": 14921 }, { "epoch": 0.0660587011377219, "grad_norm": 3.3270294567860303, "learning_rate": 6.60587011377219e-06, "loss": 1.2217, "step": 14922 }, { "epoch": 0.06606312807118508, "grad_norm": 2.6028911106892916, "learning_rate": 6.60631280711851e-06, "loss": 0.5736, "step": 14923 }, { "epoch": 0.06606755500464828, "grad_norm": 3.170397033681025, "learning_rate": 6.606755500464829e-06, "loss": 0.9607, "step": 14924 }, { "epoch": 0.06607198193811147, "grad_norm": 3.0267441865545877, "learning_rate": 6.607198193811147e-06, "loss": 0.8645, "step": 14925 }, { "epoch": 0.06607640887157466, "grad_norm": 2.78790667477713, "learning_rate": 6.6076408871574675e-06, "loss": 0.9423, "step": 14926 }, { "epoch": 0.06608083580503785, "grad_norm": 2.6977746404967182, "learning_rate": 6.608083580503786e-06, "loss": 0.5399, "step": 14927 }, { "epoch": 0.06608526273850104, "grad_norm": 3.445092185624258, "learning_rate": 6.6085262738501045e-06, "loss": 0.8782, "step": 14928 }, { "epoch": 0.06608968967196423, "grad_norm": 2.625091117966879, "learning_rate": 6.608968967196424e-06, "loss": 0.7255, "step": 14929 }, { "epoch": 0.06609411660542742, "grad_norm": 2.124703168737153, "learning_rate": 6.6094116605427424e-06, "loss": 0.589, "step": 14930 }, { "epoch": 0.06609854353889061, "grad_norm": 2.1281258114155426, "learning_rate": 6.609854353889062e-06, "loss": 0.5079, "step": 14931 }, { "epoch": 0.0661029704723538, "grad_norm": 2.78468575850292, "learning_rate": 6.610297047235381e-06, "loss": 0.5878, "step": 14932 }, { "epoch": 0.06610739740581699, "grad_norm": 3.1177323112348194, "learning_rate": 6.6107397405817e-06, "loss": 0.9707, "step": 14933 }, { "epoch": 0.06611182433928019, "grad_norm": 2.7135252481139585, "learning_rate": 6.611182433928018e-06, "loss": 0.5171, "step": 14934 }, { "epoch": 0.06611625127274337, "grad_norm": 2.7598265231739814, "learning_rate": 6.6116251272743384e-06, "loss": 0.9751, "step": 14935 }, { "epoch": 0.06612067820620655, "grad_norm": 2.507389778497201, "learning_rate": 6.612067820620657e-06, "loss": 0.8327, "step": 14936 }, { "epoch": 0.06612510513966975, "grad_norm": 2.6318746411494343, "learning_rate": 6.6125105139669755e-06, "loss": 0.5971, "step": 14937 }, { "epoch": 0.06612953207313294, "grad_norm": 2.919011566203314, "learning_rate": 6.612953207313295e-06, "loss": 0.6078, "step": 14938 }, { "epoch": 0.06613395900659613, "grad_norm": 2.7174437406819725, "learning_rate": 6.613395900659613e-06, "loss": 0.7063, "step": 14939 }, { "epoch": 0.06613838594005932, "grad_norm": 2.8398231302733623, "learning_rate": 6.613838594005933e-06, "loss": 0.7515, "step": 14940 }, { "epoch": 0.06614281287352251, "grad_norm": 2.490348557203349, "learning_rate": 6.614281287352252e-06, "loss": 0.7478, "step": 14941 }, { "epoch": 0.0661472398069857, "grad_norm": 2.941336748611541, "learning_rate": 6.614723980698571e-06, "loss": 1.0499, "step": 14942 }, { "epoch": 0.0661516667404489, "grad_norm": 3.001979029924071, "learning_rate": 6.615166674044889e-06, "loss": 0.996, "step": 14943 }, { "epoch": 0.06615609367391208, "grad_norm": 3.5001545903059266, "learning_rate": 6.615609367391209e-06, "loss": 1.2911, "step": 14944 }, { "epoch": 0.06616052060737528, "grad_norm": 2.5650301773621544, "learning_rate": 6.616052060737528e-06, "loss": 0.6926, "step": 14945 }, { "epoch": 0.06616494754083846, "grad_norm": 3.274792486292983, "learning_rate": 6.6164947540838464e-06, "loss": 1.2228, "step": 14946 }, { "epoch": 0.06616937447430166, "grad_norm": 2.139812508901742, "learning_rate": 6.616937447430166e-06, "loss": 0.479, "step": 14947 }, { "epoch": 0.06617380140776484, "grad_norm": 2.619648139979007, "learning_rate": 6.617380140776485e-06, "loss": 0.6337, "step": 14948 }, { "epoch": 0.06617822834122804, "grad_norm": 2.7491180481296924, "learning_rate": 6.617822834122804e-06, "loss": 0.6604, "step": 14949 }, { "epoch": 0.06618265527469122, "grad_norm": 2.830980001738273, "learning_rate": 6.618265527469123e-06, "loss": 0.6394, "step": 14950 }, { "epoch": 0.0661870822081544, "grad_norm": 4.009230555993543, "learning_rate": 6.618708220815442e-06, "loss": 1.3963, "step": 14951 }, { "epoch": 0.0661915091416176, "grad_norm": 3.0391250331586708, "learning_rate": 6.61915091416176e-06, "loss": 0.7126, "step": 14952 }, { "epoch": 0.06619593607508079, "grad_norm": 2.9964081414823704, "learning_rate": 6.61959360750808e-06, "loss": 1.0188, "step": 14953 }, { "epoch": 0.06620036300854398, "grad_norm": 2.2818315467771533, "learning_rate": 6.620036300854399e-06, "loss": 0.7545, "step": 14954 }, { "epoch": 0.06620478994200717, "grad_norm": 3.0158417231475085, "learning_rate": 6.620478994200717e-06, "loss": 0.8376, "step": 14955 }, { "epoch": 0.06620921687547036, "grad_norm": 2.6622438963123836, "learning_rate": 6.620921687547038e-06, "loss": 0.6847, "step": 14956 }, { "epoch": 0.06621364380893355, "grad_norm": 2.693527334100606, "learning_rate": 6.621364380893356e-06, "loss": 0.8507, "step": 14957 }, { "epoch": 0.06621807074239675, "grad_norm": 3.2176142174696687, "learning_rate": 6.621807074239675e-06, "loss": 0.7322, "step": 14958 }, { "epoch": 0.06622249767585993, "grad_norm": 3.1275190880746027, "learning_rate": 6.622249767585994e-06, "loss": 0.891, "step": 14959 }, { "epoch": 0.06622692460932313, "grad_norm": 2.7155142270049795, "learning_rate": 6.6226924609323125e-06, "loss": 0.773, "step": 14960 }, { "epoch": 0.06623135154278631, "grad_norm": 2.447044742491134, "learning_rate": 6.623135154278632e-06, "loss": 0.4473, "step": 14961 }, { "epoch": 0.06623577847624951, "grad_norm": 2.5674595983324915, "learning_rate": 6.623577847624951e-06, "loss": 0.6882, "step": 14962 }, { "epoch": 0.06624020540971269, "grad_norm": 3.1582788683918013, "learning_rate": 6.62402054097127e-06, "loss": 0.8563, "step": 14963 }, { "epoch": 0.06624463234317589, "grad_norm": 2.49334397836893, "learning_rate": 6.624463234317588e-06, "loss": 0.6681, "step": 14964 }, { "epoch": 0.06624905927663907, "grad_norm": 2.7388901546599516, "learning_rate": 6.6249059276639085e-06, "loss": 0.8243, "step": 14965 }, { "epoch": 0.06625348621010226, "grad_norm": 2.6153566088036264, "learning_rate": 6.625348621010227e-06, "loss": 0.8052, "step": 14966 }, { "epoch": 0.06625791314356545, "grad_norm": 2.933566480456058, "learning_rate": 6.625791314356546e-06, "loss": 0.6492, "step": 14967 }, { "epoch": 0.06626234007702864, "grad_norm": 3.4572668788026273, "learning_rate": 6.626234007702865e-06, "loss": 0.7989, "step": 14968 }, { "epoch": 0.06626676701049183, "grad_norm": 2.871846431222785, "learning_rate": 6.626676701049184e-06, "loss": 1.017, "step": 14969 }, { "epoch": 0.06627119394395502, "grad_norm": 2.446646477345417, "learning_rate": 6.627119394395503e-06, "loss": 0.7682, "step": 14970 }, { "epoch": 0.06627562087741821, "grad_norm": 2.828829234517564, "learning_rate": 6.627562087741822e-06, "loss": 0.9052, "step": 14971 }, { "epoch": 0.0662800478108814, "grad_norm": 2.8607865839562017, "learning_rate": 6.628004781088141e-06, "loss": 0.7254, "step": 14972 }, { "epoch": 0.0662844747443446, "grad_norm": 2.3553376487547997, "learning_rate": 6.628447474434459e-06, "loss": 0.6697, "step": 14973 }, { "epoch": 0.06628890167780778, "grad_norm": 2.683668296704226, "learning_rate": 6.6288901677807795e-06, "loss": 0.8173, "step": 14974 }, { "epoch": 0.06629332861127098, "grad_norm": 3.0260858351898117, "learning_rate": 6.629332861127098e-06, "loss": 0.9533, "step": 14975 }, { "epoch": 0.06629775554473416, "grad_norm": 2.998343798874433, "learning_rate": 6.6297755544734165e-06, "loss": 1.0298, "step": 14976 }, { "epoch": 0.06630218247819736, "grad_norm": 2.3297421936025806, "learning_rate": 6.630218247819736e-06, "loss": 0.6568, "step": 14977 }, { "epoch": 0.06630660941166054, "grad_norm": 2.586725911460458, "learning_rate": 6.630660941166055e-06, "loss": 0.7058, "step": 14978 }, { "epoch": 0.06631103634512374, "grad_norm": 4.05138070409352, "learning_rate": 6.631103634512374e-06, "loss": 1.0229, "step": 14979 }, { "epoch": 0.06631546327858692, "grad_norm": 2.605145383998562, "learning_rate": 6.631546327858693e-06, "loss": 0.6724, "step": 14980 }, { "epoch": 0.0663198902120501, "grad_norm": 2.1134335628820216, "learning_rate": 6.631989021205012e-06, "loss": 0.5208, "step": 14981 }, { "epoch": 0.0663243171455133, "grad_norm": 2.405385073568248, "learning_rate": 6.63243171455133e-06, "loss": 0.8363, "step": 14982 }, { "epoch": 0.06632874407897649, "grad_norm": 3.038723148379055, "learning_rate": 6.6328744078976504e-06, "loss": 0.8396, "step": 14983 }, { "epoch": 0.06633317101243968, "grad_norm": 2.9314064741859136, "learning_rate": 6.633317101243969e-06, "loss": 0.7922, "step": 14984 }, { "epoch": 0.06633759794590287, "grad_norm": 2.722401803653447, "learning_rate": 6.6337597945902875e-06, "loss": 0.8932, "step": 14985 }, { "epoch": 0.06634202487936607, "grad_norm": 2.570553457853121, "learning_rate": 6.634202487936608e-06, "loss": 0.826, "step": 14986 }, { "epoch": 0.06634645181282925, "grad_norm": 2.8652428147185107, "learning_rate": 6.634645181282926e-06, "loss": 0.7042, "step": 14987 }, { "epoch": 0.06635087874629245, "grad_norm": 2.554099375028269, "learning_rate": 6.635087874629245e-06, "loss": 0.6641, "step": 14988 }, { "epoch": 0.06635530567975563, "grad_norm": 2.356777592608826, "learning_rate": 6.635530567975564e-06, "loss": 0.5586, "step": 14989 }, { "epoch": 0.06635973261321883, "grad_norm": 3.0928511213914778, "learning_rate": 6.635973261321883e-06, "loss": 0.9305, "step": 14990 }, { "epoch": 0.06636415954668201, "grad_norm": 2.40319584834829, "learning_rate": 6.636415954668202e-06, "loss": 0.5994, "step": 14991 }, { "epoch": 0.06636858648014521, "grad_norm": 2.5959650990605363, "learning_rate": 6.636858648014521e-06, "loss": 1.0161, "step": 14992 }, { "epoch": 0.06637301341360839, "grad_norm": 2.2848288071066833, "learning_rate": 6.63730134136084e-06, "loss": 0.6316, "step": 14993 }, { "epoch": 0.06637744034707159, "grad_norm": 2.426578596621753, "learning_rate": 6.6377440347071584e-06, "loss": 0.6496, "step": 14994 }, { "epoch": 0.06638186728053477, "grad_norm": 2.636189962761019, "learning_rate": 6.638186728053479e-06, "loss": 0.7681, "step": 14995 }, { "epoch": 0.06638629421399796, "grad_norm": 3.751680291627207, "learning_rate": 6.638629421399797e-06, "loss": 1.2161, "step": 14996 }, { "epoch": 0.06639072114746115, "grad_norm": 2.4305854961780544, "learning_rate": 6.639072114746116e-06, "loss": 0.7978, "step": 14997 }, { "epoch": 0.06639514808092434, "grad_norm": 2.5461371408012496, "learning_rate": 6.639514808092435e-06, "loss": 0.7109, "step": 14998 }, { "epoch": 0.06639957501438754, "grad_norm": 2.7389528857993293, "learning_rate": 6.6399575014387544e-06, "loss": 0.6095, "step": 14999 }, { "epoch": 0.06640400194785072, "grad_norm": 2.702027291483341, "learning_rate": 6.640400194785073e-06, "loss": 0.8964, "step": 15000 }, { "epoch": 0.06640842888131392, "grad_norm": 2.4232063720484645, "learning_rate": 6.640842888131392e-06, "loss": 0.6083, "step": 15001 }, { "epoch": 0.0664128558147771, "grad_norm": 2.516669774797838, "learning_rate": 6.641285581477711e-06, "loss": 0.7556, "step": 15002 }, { "epoch": 0.0664172827482403, "grad_norm": 2.6852962637486457, "learning_rate": 6.641728274824029e-06, "loss": 0.6121, "step": 15003 }, { "epoch": 0.06642170968170348, "grad_norm": 2.772467042557003, "learning_rate": 6.64217096817035e-06, "loss": 0.9516, "step": 15004 }, { "epoch": 0.06642613661516668, "grad_norm": 2.801404453250259, "learning_rate": 6.642613661516668e-06, "loss": 0.6935, "step": 15005 }, { "epoch": 0.06643056354862986, "grad_norm": 2.318106895855749, "learning_rate": 6.643056354862987e-06, "loss": 0.5305, "step": 15006 }, { "epoch": 0.06643499048209306, "grad_norm": 3.100348704249759, "learning_rate": 6.643499048209307e-06, "loss": 0.64, "step": 15007 }, { "epoch": 0.06643941741555624, "grad_norm": 2.8071368645518184, "learning_rate": 6.643941741555625e-06, "loss": 0.5628, "step": 15008 }, { "epoch": 0.06644384434901944, "grad_norm": 3.3646289591300245, "learning_rate": 6.644384434901944e-06, "loss": 1.3156, "step": 15009 }, { "epoch": 0.06644827128248262, "grad_norm": 2.959428869337981, "learning_rate": 6.644827128248263e-06, "loss": 0.9855, "step": 15010 }, { "epoch": 0.06645269821594582, "grad_norm": 3.5622872639936234, "learning_rate": 6.645269821594582e-06, "loss": 1.1013, "step": 15011 }, { "epoch": 0.066457125149409, "grad_norm": 2.326113991860164, "learning_rate": 6.6457125149409e-06, "loss": 0.6405, "step": 15012 }, { "epoch": 0.06646155208287219, "grad_norm": 3.2479738353825987, "learning_rate": 6.6461552082872205e-06, "loss": 1.2831, "step": 15013 }, { "epoch": 0.06646597901633539, "grad_norm": 3.3202469118700026, "learning_rate": 6.646597901633539e-06, "loss": 0.7194, "step": 15014 }, { "epoch": 0.06647040594979857, "grad_norm": 2.450343211845546, "learning_rate": 6.647040594979858e-06, "loss": 0.7582, "step": 15015 }, { "epoch": 0.06647483288326177, "grad_norm": 3.401321889772286, "learning_rate": 6.647483288326178e-06, "loss": 0.9611, "step": 15016 }, { "epoch": 0.06647925981672495, "grad_norm": 2.176730604776886, "learning_rate": 6.647925981672496e-06, "loss": 0.4831, "step": 15017 }, { "epoch": 0.06648368675018815, "grad_norm": 2.5088989343018446, "learning_rate": 6.648368675018815e-06, "loss": 0.7551, "step": 15018 }, { "epoch": 0.06648811368365133, "grad_norm": 2.487061649751463, "learning_rate": 6.648811368365134e-06, "loss": 0.7917, "step": 15019 }, { "epoch": 0.06649254061711453, "grad_norm": 2.9452736832478545, "learning_rate": 6.649254061711453e-06, "loss": 0.6479, "step": 15020 }, { "epoch": 0.06649696755057771, "grad_norm": 2.6786063124467807, "learning_rate": 6.649696755057772e-06, "loss": 0.6459, "step": 15021 }, { "epoch": 0.06650139448404091, "grad_norm": 3.503539586789254, "learning_rate": 6.6501394484040915e-06, "loss": 1.058, "step": 15022 }, { "epoch": 0.0665058214175041, "grad_norm": 2.2180132827916834, "learning_rate": 6.65058214175041e-06, "loss": 0.5415, "step": 15023 }, { "epoch": 0.06651024835096729, "grad_norm": 3.643889529731908, "learning_rate": 6.6510248350967285e-06, "loss": 0.8726, "step": 15024 }, { "epoch": 0.06651467528443047, "grad_norm": 2.2945070681176016, "learning_rate": 6.651467528443049e-06, "loss": 0.582, "step": 15025 }, { "epoch": 0.06651910221789367, "grad_norm": 2.6760260798107254, "learning_rate": 6.651910221789367e-06, "loss": 0.5884, "step": 15026 }, { "epoch": 0.06652352915135686, "grad_norm": 2.5539441381135455, "learning_rate": 6.652352915135686e-06, "loss": 0.7009, "step": 15027 }, { "epoch": 0.06652795608482004, "grad_norm": 2.268270946579463, "learning_rate": 6.652795608482005e-06, "loss": 0.6756, "step": 15028 }, { "epoch": 0.06653238301828324, "grad_norm": 2.6767946267768874, "learning_rate": 6.6532383018283245e-06, "loss": 0.5377, "step": 15029 }, { "epoch": 0.06653680995174642, "grad_norm": 2.536888655729693, "learning_rate": 6.653680995174643e-06, "loss": 0.5739, "step": 15030 }, { "epoch": 0.06654123688520962, "grad_norm": 2.8997894054212043, "learning_rate": 6.6541236885209624e-06, "loss": 0.8214, "step": 15031 }, { "epoch": 0.0665456638186728, "grad_norm": 2.5935938353584613, "learning_rate": 6.654566381867281e-06, "loss": 0.8183, "step": 15032 }, { "epoch": 0.066550090752136, "grad_norm": 2.587968613305694, "learning_rate": 6.6550090752135995e-06, "loss": 0.7404, "step": 15033 }, { "epoch": 0.06655451768559918, "grad_norm": 2.4843404458821836, "learning_rate": 6.65545176855992e-06, "loss": 0.8049, "step": 15034 }, { "epoch": 0.06655894461906238, "grad_norm": 2.8319119360067297, "learning_rate": 6.655894461906238e-06, "loss": 0.7613, "step": 15035 }, { "epoch": 0.06656337155252556, "grad_norm": 2.6674494715068127, "learning_rate": 6.656337155252557e-06, "loss": 0.9247, "step": 15036 }, { "epoch": 0.06656779848598876, "grad_norm": 3.1270258715224712, "learning_rate": 6.656779848598877e-06, "loss": 0.9106, "step": 15037 }, { "epoch": 0.06657222541945194, "grad_norm": 2.4943054833287044, "learning_rate": 6.6572225419451955e-06, "loss": 0.5902, "step": 15038 }, { "epoch": 0.06657665235291514, "grad_norm": 2.7806932631474495, "learning_rate": 6.657665235291514e-06, "loss": 0.5364, "step": 15039 }, { "epoch": 0.06658107928637833, "grad_norm": 2.90471247259852, "learning_rate": 6.658107928637833e-06, "loss": 0.7018, "step": 15040 }, { "epoch": 0.06658550621984152, "grad_norm": 3.617582859542476, "learning_rate": 6.658550621984152e-06, "loss": 1.0931, "step": 15041 }, { "epoch": 0.0665899331533047, "grad_norm": 2.896923258190817, "learning_rate": 6.658993315330471e-06, "loss": 0.526, "step": 15042 }, { "epoch": 0.06659436008676789, "grad_norm": 3.171407271190546, "learning_rate": 6.659436008676791e-06, "loss": 1.149, "step": 15043 }, { "epoch": 0.06659878702023109, "grad_norm": 2.379118221793581, "learning_rate": 6.659878702023109e-06, "loss": 0.6348, "step": 15044 }, { "epoch": 0.06660321395369427, "grad_norm": 2.759148541503474, "learning_rate": 6.660321395369428e-06, "loss": 0.7927, "step": 15045 }, { "epoch": 0.06660764088715747, "grad_norm": 2.537841034627679, "learning_rate": 6.660764088715748e-06, "loss": 0.7758, "step": 15046 }, { "epoch": 0.06661206782062065, "grad_norm": 2.5316259874030895, "learning_rate": 6.6612067820620664e-06, "loss": 0.83, "step": 15047 }, { "epoch": 0.06661649475408385, "grad_norm": 3.046696031077631, "learning_rate": 6.661649475408385e-06, "loss": 0.8641, "step": 15048 }, { "epoch": 0.06662092168754703, "grad_norm": 2.683229670825532, "learning_rate": 6.662092168754704e-06, "loss": 1.0059, "step": 15049 }, { "epoch": 0.06662534862101023, "grad_norm": 2.70826541952604, "learning_rate": 6.662534862101024e-06, "loss": 0.6736, "step": 15050 }, { "epoch": 0.06662977555447341, "grad_norm": 2.7379018064997522, "learning_rate": 6.662977555447342e-06, "loss": 0.7049, "step": 15051 }, { "epoch": 0.06663420248793661, "grad_norm": 2.7544475153207957, "learning_rate": 6.663420248793662e-06, "loss": 0.9211, "step": 15052 }, { "epoch": 0.0666386294213998, "grad_norm": 2.260664966648245, "learning_rate": 6.66386294213998e-06, "loss": 0.4968, "step": 15053 }, { "epoch": 0.06664305635486299, "grad_norm": 2.732833057041298, "learning_rate": 6.664305635486299e-06, "loss": 0.8603, "step": 15054 }, { "epoch": 0.06664748328832618, "grad_norm": 2.9953466698650333, "learning_rate": 6.664748328832619e-06, "loss": 0.8056, "step": 15055 }, { "epoch": 0.06665191022178937, "grad_norm": 3.048218460923446, "learning_rate": 6.665191022178937e-06, "loss": 0.9224, "step": 15056 }, { "epoch": 0.06665633715525256, "grad_norm": 2.5740173113226428, "learning_rate": 6.665633715525256e-06, "loss": 0.628, "step": 15057 }, { "epoch": 0.06666076408871574, "grad_norm": 3.1798072741800247, "learning_rate": 6.666076408871575e-06, "loss": 0.9147, "step": 15058 }, { "epoch": 0.06666519102217894, "grad_norm": 3.049266185217261, "learning_rate": 6.666519102217895e-06, "loss": 0.7814, "step": 15059 }, { "epoch": 0.06666961795564212, "grad_norm": 2.52957822999911, "learning_rate": 6.666961795564213e-06, "loss": 0.5145, "step": 15060 }, { "epoch": 0.06667404488910532, "grad_norm": 2.6199486569759802, "learning_rate": 6.6674044889105325e-06, "loss": 0.6, "step": 15061 }, { "epoch": 0.0666784718225685, "grad_norm": 2.8165938676923363, "learning_rate": 6.667847182256851e-06, "loss": 0.6109, "step": 15062 }, { "epoch": 0.0666828987560317, "grad_norm": 2.9313791456276026, "learning_rate": 6.66828987560317e-06, "loss": 0.7127, "step": 15063 }, { "epoch": 0.06668732568949488, "grad_norm": 2.7371936692845544, "learning_rate": 6.66873256894949e-06, "loss": 0.6648, "step": 15064 }, { "epoch": 0.06669175262295808, "grad_norm": 2.8201270689962037, "learning_rate": 6.669175262295808e-06, "loss": 0.9377, "step": 15065 }, { "epoch": 0.06669617955642126, "grad_norm": 2.6580615472244347, "learning_rate": 6.669617955642127e-06, "loss": 0.6244, "step": 15066 }, { "epoch": 0.06670060648988446, "grad_norm": 2.40943773057252, "learning_rate": 6.670060648988447e-06, "loss": 0.4951, "step": 15067 }, { "epoch": 0.06670503342334765, "grad_norm": 2.7995863881198915, "learning_rate": 6.670503342334766e-06, "loss": 0.8744, "step": 15068 }, { "epoch": 0.06670946035681084, "grad_norm": 2.8529920959487574, "learning_rate": 6.670946035681084e-06, "loss": 0.9607, "step": 15069 }, { "epoch": 0.06671388729027403, "grad_norm": 2.5577293091886086, "learning_rate": 6.6713887290274035e-06, "loss": 0.8462, "step": 15070 }, { "epoch": 0.06671831422373722, "grad_norm": 2.767608165089083, "learning_rate": 6.671831422373722e-06, "loss": 0.631, "step": 15071 }, { "epoch": 0.06672274115720041, "grad_norm": 3.635510146068062, "learning_rate": 6.672274115720041e-06, "loss": 0.6207, "step": 15072 }, { "epoch": 0.06672716809066359, "grad_norm": 3.3642530883397, "learning_rate": 6.672716809066361e-06, "loss": 1.2772, "step": 15073 }, { "epoch": 0.06673159502412679, "grad_norm": 3.1065421121933263, "learning_rate": 6.673159502412679e-06, "loss": 1.0321, "step": 15074 }, { "epoch": 0.06673602195758997, "grad_norm": 2.743014826344969, "learning_rate": 6.673602195758998e-06, "loss": 0.7656, "step": 15075 }, { "epoch": 0.06674044889105317, "grad_norm": 3.2366087529245204, "learning_rate": 6.674044889105318e-06, "loss": 0.7793, "step": 15076 }, { "epoch": 0.06674487582451635, "grad_norm": 2.6308336161332244, "learning_rate": 6.6744875824516365e-06, "loss": 0.7595, "step": 15077 }, { "epoch": 0.06674930275797955, "grad_norm": 3.3085872781894317, "learning_rate": 6.674930275797955e-06, "loss": 0.6252, "step": 15078 }, { "epoch": 0.06675372969144273, "grad_norm": 2.7461129133265136, "learning_rate": 6.6753729691442744e-06, "loss": 0.7548, "step": 15079 }, { "epoch": 0.06675815662490593, "grad_norm": 2.2557739182880585, "learning_rate": 6.675815662490594e-06, "loss": 0.6461, "step": 15080 }, { "epoch": 0.06676258355836912, "grad_norm": 4.565086799092509, "learning_rate": 6.676258355836912e-06, "loss": 1.3809, "step": 15081 }, { "epoch": 0.06676701049183231, "grad_norm": 2.751493310161658, "learning_rate": 6.676701049183232e-06, "loss": 0.8775, "step": 15082 }, { "epoch": 0.0667714374252955, "grad_norm": 2.9747229432754363, "learning_rate": 6.67714374252955e-06, "loss": 0.9418, "step": 15083 }, { "epoch": 0.0667758643587587, "grad_norm": 2.4417207255891085, "learning_rate": 6.677586435875869e-06, "loss": 0.7017, "step": 15084 }, { "epoch": 0.06678029129222188, "grad_norm": 2.3832638349114834, "learning_rate": 6.678029129222189e-06, "loss": 0.5006, "step": 15085 }, { "epoch": 0.06678471822568507, "grad_norm": 2.4026879222379423, "learning_rate": 6.6784718225685075e-06, "loss": 0.5066, "step": 15086 }, { "epoch": 0.06678914515914826, "grad_norm": 2.751567422172403, "learning_rate": 6.678914515914826e-06, "loss": 0.5754, "step": 15087 }, { "epoch": 0.06679357209261144, "grad_norm": 2.8558933551679297, "learning_rate": 6.679357209261146e-06, "loss": 0.8301, "step": 15088 }, { "epoch": 0.06679799902607464, "grad_norm": 2.857194227676377, "learning_rate": 6.679799902607465e-06, "loss": 0.9846, "step": 15089 }, { "epoch": 0.06680242595953782, "grad_norm": 3.397746190773254, "learning_rate": 6.680242595953783e-06, "loss": 1.0662, "step": 15090 }, { "epoch": 0.06680685289300102, "grad_norm": 3.0046962299609996, "learning_rate": 6.680685289300103e-06, "loss": 0.7363, "step": 15091 }, { "epoch": 0.0668112798264642, "grad_norm": 3.0568895999104173, "learning_rate": 6.681127982646421e-06, "loss": 0.5448, "step": 15092 }, { "epoch": 0.0668157067599274, "grad_norm": 2.8192337899833335, "learning_rate": 6.68157067599274e-06, "loss": 0.7585, "step": 15093 }, { "epoch": 0.06682013369339058, "grad_norm": 2.882648450364399, "learning_rate": 6.68201336933906e-06, "loss": 0.8737, "step": 15094 }, { "epoch": 0.06682456062685378, "grad_norm": 3.237372549703586, "learning_rate": 6.6824560626853784e-06, "loss": 0.7342, "step": 15095 }, { "epoch": 0.06682898756031697, "grad_norm": 2.457742610130612, "learning_rate": 6.682898756031697e-06, "loss": 0.6242, "step": 15096 }, { "epoch": 0.06683341449378016, "grad_norm": 2.465469169811648, "learning_rate": 6.683341449378017e-06, "loss": 0.6833, "step": 15097 }, { "epoch": 0.06683784142724335, "grad_norm": 2.550442012702276, "learning_rate": 6.683784142724336e-06, "loss": 0.7147, "step": 15098 }, { "epoch": 0.06684226836070654, "grad_norm": 2.8424273640625963, "learning_rate": 6.684226836070654e-06, "loss": 0.9935, "step": 15099 }, { "epoch": 0.06684669529416973, "grad_norm": 2.7683937667386473, "learning_rate": 6.684669529416974e-06, "loss": 0.7272, "step": 15100 }, { "epoch": 0.06685112222763293, "grad_norm": 2.5757698491567997, "learning_rate": 6.685112222763292e-06, "loss": 0.7238, "step": 15101 }, { "epoch": 0.06685554916109611, "grad_norm": 2.443159867852058, "learning_rate": 6.6855549161096115e-06, "loss": 0.6872, "step": 15102 }, { "epoch": 0.06685997609455929, "grad_norm": 4.956794554380604, "learning_rate": 6.685997609455931e-06, "loss": 0.872, "step": 15103 }, { "epoch": 0.06686440302802249, "grad_norm": 3.1040062983696783, "learning_rate": 6.686440302802249e-06, "loss": 0.3812, "step": 15104 }, { "epoch": 0.06686882996148567, "grad_norm": 2.489223909741568, "learning_rate": 6.686882996148568e-06, "loss": 0.6714, "step": 15105 }, { "epoch": 0.06687325689494887, "grad_norm": 3.00641988296816, "learning_rate": 6.687325689494888e-06, "loss": 1.0347, "step": 15106 }, { "epoch": 0.06687768382841205, "grad_norm": 2.753814934806258, "learning_rate": 6.687768382841207e-06, "loss": 1.033, "step": 15107 }, { "epoch": 0.06688211076187525, "grad_norm": 2.384000903022519, "learning_rate": 6.688211076187525e-06, "loss": 0.6504, "step": 15108 }, { "epoch": 0.06688653769533844, "grad_norm": 2.5497105866462655, "learning_rate": 6.6886537695338445e-06, "loss": 0.7207, "step": 15109 }, { "epoch": 0.06689096462880163, "grad_norm": 2.7618360776425366, "learning_rate": 6.689096462880164e-06, "loss": 0.6464, "step": 15110 }, { "epoch": 0.06689539156226482, "grad_norm": 2.9458371154181644, "learning_rate": 6.6895391562264824e-06, "loss": 0.7923, "step": 15111 }, { "epoch": 0.06689981849572801, "grad_norm": 3.7538840879528372, "learning_rate": 6.689981849572802e-06, "loss": 0.9053, "step": 15112 }, { "epoch": 0.0669042454291912, "grad_norm": 2.9263398101690057, "learning_rate": 6.69042454291912e-06, "loss": 0.9579, "step": 15113 }, { "epoch": 0.0669086723626544, "grad_norm": 2.3272229262922113, "learning_rate": 6.690867236265439e-06, "loss": 0.6387, "step": 15114 }, { "epoch": 0.06691309929611758, "grad_norm": 2.5886949170429623, "learning_rate": 6.691309929611759e-06, "loss": 0.5971, "step": 15115 }, { "epoch": 0.06691752622958078, "grad_norm": 2.804290036400227, "learning_rate": 6.691752622958078e-06, "loss": 0.8609, "step": 15116 }, { "epoch": 0.06692195316304396, "grad_norm": 2.5069744193197225, "learning_rate": 6.692195316304396e-06, "loss": 0.6624, "step": 15117 }, { "epoch": 0.06692638009650714, "grad_norm": 3.1008604293171245, "learning_rate": 6.692638009650716e-06, "loss": 0.7196, "step": 15118 }, { "epoch": 0.06693080702997034, "grad_norm": 2.7877164384968594, "learning_rate": 6.693080702997035e-06, "loss": 0.5693, "step": 15119 }, { "epoch": 0.06693523396343352, "grad_norm": 3.328727428539955, "learning_rate": 6.693523396343353e-06, "loss": 0.8022, "step": 15120 }, { "epoch": 0.06693966089689672, "grad_norm": 2.7601093047665475, "learning_rate": 6.693966089689673e-06, "loss": 0.5975, "step": 15121 }, { "epoch": 0.0669440878303599, "grad_norm": 2.6963836619138593, "learning_rate": 6.694408783035991e-06, "loss": 0.8911, "step": 15122 }, { "epoch": 0.0669485147638231, "grad_norm": 2.805370218385766, "learning_rate": 6.694851476382311e-06, "loss": 0.9144, "step": 15123 }, { "epoch": 0.06695294169728629, "grad_norm": 2.480813056126253, "learning_rate": 6.69529416972863e-06, "loss": 0.6118, "step": 15124 }, { "epoch": 0.06695736863074948, "grad_norm": 2.4253643002328844, "learning_rate": 6.6957368630749485e-06, "loss": 0.6005, "step": 15125 }, { "epoch": 0.06696179556421267, "grad_norm": 2.6876357601787597, "learning_rate": 6.696179556421267e-06, "loss": 0.7148, "step": 15126 }, { "epoch": 0.06696622249767586, "grad_norm": 2.8516511052738522, "learning_rate": 6.696622249767587e-06, "loss": 0.6333, "step": 15127 }, { "epoch": 0.06697064943113905, "grad_norm": 3.0842573384233747, "learning_rate": 6.697064943113906e-06, "loss": 1.1715, "step": 15128 }, { "epoch": 0.06697507636460225, "grad_norm": 2.5000106924049987, "learning_rate": 6.697507636460224e-06, "loss": 0.5655, "step": 15129 }, { "epoch": 0.06697950329806543, "grad_norm": 2.4493349126886192, "learning_rate": 6.697950329806544e-06, "loss": 0.575, "step": 15130 }, { "epoch": 0.06698393023152863, "grad_norm": 3.558087102955845, "learning_rate": 6.698393023152863e-06, "loss": 1.2868, "step": 15131 }, { "epoch": 0.06698835716499181, "grad_norm": 2.2625280408526645, "learning_rate": 6.698835716499182e-06, "loss": 0.6052, "step": 15132 }, { "epoch": 0.066992784098455, "grad_norm": 2.5933755090872377, "learning_rate": 6.699278409845501e-06, "loss": 0.7811, "step": 15133 }, { "epoch": 0.06699721103191819, "grad_norm": 3.3973958456014146, "learning_rate": 6.6997211031918195e-06, "loss": 0.8374, "step": 15134 }, { "epoch": 0.06700163796538137, "grad_norm": 2.3747794062170415, "learning_rate": 6.700163796538138e-06, "loss": 0.6808, "step": 15135 }, { "epoch": 0.06700606489884457, "grad_norm": 2.04548107686276, "learning_rate": 6.700606489884458e-06, "loss": 0.4741, "step": 15136 }, { "epoch": 0.06701049183230776, "grad_norm": 2.7714298671608337, "learning_rate": 6.701049183230777e-06, "loss": 0.6975, "step": 15137 }, { "epoch": 0.06701491876577095, "grad_norm": 3.801412100128603, "learning_rate": 6.701491876577095e-06, "loss": 1.2115, "step": 15138 }, { "epoch": 0.06701934569923414, "grad_norm": 2.5998246799378038, "learning_rate": 6.701934569923415e-06, "loss": 0.765, "step": 15139 }, { "epoch": 0.06702377263269733, "grad_norm": 2.8872717608982303, "learning_rate": 6.702377263269734e-06, "loss": 0.771, "step": 15140 }, { "epoch": 0.06702819956616052, "grad_norm": 3.350918137934124, "learning_rate": 6.7028199566160526e-06, "loss": 0.9439, "step": 15141 }, { "epoch": 0.06703262649962372, "grad_norm": 3.1355409788587836, "learning_rate": 6.703262649962372e-06, "loss": 0.6661, "step": 15142 }, { "epoch": 0.0670370534330869, "grad_norm": 2.1729124747619055, "learning_rate": 6.7037053433086904e-06, "loss": 0.4955, "step": 15143 }, { "epoch": 0.0670414803665501, "grad_norm": 2.5521990752446273, "learning_rate": 6.704148036655009e-06, "loss": 0.839, "step": 15144 }, { "epoch": 0.06704590730001328, "grad_norm": 2.706592178135468, "learning_rate": 6.704590730001329e-06, "loss": 0.9661, "step": 15145 }, { "epoch": 0.06705033423347648, "grad_norm": 2.6721959150524173, "learning_rate": 6.705033423347648e-06, "loss": 0.6806, "step": 15146 }, { "epoch": 0.06705476116693966, "grad_norm": 2.7535290615030803, "learning_rate": 6.705476116693966e-06, "loss": 0.8032, "step": 15147 }, { "epoch": 0.06705918810040284, "grad_norm": 2.5249298038681482, "learning_rate": 6.7059188100402865e-06, "loss": 0.9099, "step": 15148 }, { "epoch": 0.06706361503386604, "grad_norm": 2.8216023680380977, "learning_rate": 6.706361503386605e-06, "loss": 0.6706, "step": 15149 }, { "epoch": 0.06706804196732923, "grad_norm": 2.84184513160976, "learning_rate": 6.7068041967329235e-06, "loss": 0.8683, "step": 15150 }, { "epoch": 0.06707246890079242, "grad_norm": 2.5149142936867372, "learning_rate": 6.707246890079243e-06, "loss": 0.7093, "step": 15151 }, { "epoch": 0.0670768958342556, "grad_norm": 2.1331531997091635, "learning_rate": 6.707689583425561e-06, "loss": 0.705, "step": 15152 }, { "epoch": 0.0670813227677188, "grad_norm": 2.6547831673537363, "learning_rate": 6.708132276771881e-06, "loss": 0.4327, "step": 15153 }, { "epoch": 0.06708574970118199, "grad_norm": 2.6590091678147765, "learning_rate": 6.7085749701182e-06, "loss": 0.6333, "step": 15154 }, { "epoch": 0.06709017663464518, "grad_norm": 2.465798517444477, "learning_rate": 6.709017663464519e-06, "loss": 0.8141, "step": 15155 }, { "epoch": 0.06709460356810837, "grad_norm": 2.4916858619520825, "learning_rate": 6.709460356810837e-06, "loss": 0.6068, "step": 15156 }, { "epoch": 0.06709903050157157, "grad_norm": 2.742552543612051, "learning_rate": 6.709903050157157e-06, "loss": 0.7739, "step": 15157 }, { "epoch": 0.06710345743503475, "grad_norm": 2.589049927955524, "learning_rate": 6.710345743503476e-06, "loss": 0.6804, "step": 15158 }, { "epoch": 0.06710788436849795, "grad_norm": 2.8113126202391845, "learning_rate": 6.7107884368497944e-06, "loss": 0.8061, "step": 15159 }, { "epoch": 0.06711231130196113, "grad_norm": 3.09923227956, "learning_rate": 6.711231130196114e-06, "loss": 0.681, "step": 15160 }, { "epoch": 0.06711673823542433, "grad_norm": 2.5078231619308817, "learning_rate": 6.711673823542433e-06, "loss": 0.746, "step": 15161 }, { "epoch": 0.06712116516888751, "grad_norm": 3.208066633907022, "learning_rate": 6.712116516888752e-06, "loss": 0.9032, "step": 15162 }, { "epoch": 0.0671255921023507, "grad_norm": 4.037745083690706, "learning_rate": 6.712559210235071e-06, "loss": 1.0828, "step": 15163 }, { "epoch": 0.06713001903581389, "grad_norm": 1.9670948968230115, "learning_rate": 6.71300190358139e-06, "loss": 0.5441, "step": 15164 }, { "epoch": 0.06713444596927708, "grad_norm": 2.4608865463288954, "learning_rate": 6.713444596927708e-06, "loss": 0.6225, "step": 15165 }, { "epoch": 0.06713887290274027, "grad_norm": 2.849465832058459, "learning_rate": 6.713887290274028e-06, "loss": 0.4354, "step": 15166 }, { "epoch": 0.06714329983620346, "grad_norm": 2.49190453953715, "learning_rate": 6.714329983620347e-06, "loss": 0.7402, "step": 15167 }, { "epoch": 0.06714772676966665, "grad_norm": 2.9551757703974197, "learning_rate": 6.714772676966665e-06, "loss": 0.8888, "step": 15168 }, { "epoch": 0.06715215370312984, "grad_norm": 2.74319744879056, "learning_rate": 6.715215370312986e-06, "loss": 0.6396, "step": 15169 }, { "epoch": 0.06715658063659304, "grad_norm": 2.423591720042345, "learning_rate": 6.715658063659304e-06, "loss": 0.6148, "step": 15170 }, { "epoch": 0.06716100757005622, "grad_norm": 2.8575136218893658, "learning_rate": 6.716100757005623e-06, "loss": 1.0263, "step": 15171 }, { "epoch": 0.06716543450351942, "grad_norm": 2.8592980465301423, "learning_rate": 6.716543450351942e-06, "loss": 0.8456, "step": 15172 }, { "epoch": 0.0671698614369826, "grad_norm": 2.790802641527218, "learning_rate": 6.7169861436982606e-06, "loss": 0.8208, "step": 15173 }, { "epoch": 0.0671742883704458, "grad_norm": 2.769802351241136, "learning_rate": 6.717428837044579e-06, "loss": 0.6923, "step": 15174 }, { "epoch": 0.06717871530390898, "grad_norm": 2.5705221056009897, "learning_rate": 6.717871530390899e-06, "loss": 0.5138, "step": 15175 }, { "epoch": 0.06718314223737218, "grad_norm": 3.2290111801218924, "learning_rate": 6.718314223737218e-06, "loss": 0.8792, "step": 15176 }, { "epoch": 0.06718756917083536, "grad_norm": 2.480527340139855, "learning_rate": 6.718756917083536e-06, "loss": 0.8318, "step": 15177 }, { "epoch": 0.06719199610429855, "grad_norm": 2.748512812144659, "learning_rate": 6.7191996104298566e-06, "loss": 0.7941, "step": 15178 }, { "epoch": 0.06719642303776174, "grad_norm": 2.7741838521976017, "learning_rate": 6.719642303776175e-06, "loss": 0.4739, "step": 15179 }, { "epoch": 0.06720084997122493, "grad_norm": 2.822392716023653, "learning_rate": 6.720084997122494e-06, "loss": 0.9096, "step": 15180 }, { "epoch": 0.06720527690468812, "grad_norm": 2.7378771533978377, "learning_rate": 6.720527690468813e-06, "loss": 0.7754, "step": 15181 }, { "epoch": 0.06720970383815131, "grad_norm": 3.3428866939838353, "learning_rate": 6.7209703838151315e-06, "loss": 0.9252, "step": 15182 }, { "epoch": 0.0672141307716145, "grad_norm": 3.00825871873421, "learning_rate": 6.721413077161451e-06, "loss": 0.9474, "step": 15183 }, { "epoch": 0.06721855770507769, "grad_norm": 2.826122176635416, "learning_rate": 6.72185577050777e-06, "loss": 1.0064, "step": 15184 }, { "epoch": 0.06722298463854089, "grad_norm": 2.4260178138923774, "learning_rate": 6.722298463854089e-06, "loss": 0.709, "step": 15185 }, { "epoch": 0.06722741157200407, "grad_norm": 2.326532381909689, "learning_rate": 6.722741157200407e-06, "loss": 0.5377, "step": 15186 }, { "epoch": 0.06723183850546727, "grad_norm": 3.7108629116635306, "learning_rate": 6.7231838505467275e-06, "loss": 0.7803, "step": 15187 }, { "epoch": 0.06723626543893045, "grad_norm": 2.241266031594194, "learning_rate": 6.723626543893046e-06, "loss": 0.538, "step": 15188 }, { "epoch": 0.06724069237239365, "grad_norm": 2.8003052221591145, "learning_rate": 6.7240692372393646e-06, "loss": 0.8025, "step": 15189 }, { "epoch": 0.06724511930585683, "grad_norm": 3.161054290962207, "learning_rate": 6.724511930585684e-06, "loss": 1.12, "step": 15190 }, { "epoch": 0.06724954623932003, "grad_norm": 3.2942748128376156, "learning_rate": 6.724954623932003e-06, "loss": 0.7329, "step": 15191 }, { "epoch": 0.06725397317278321, "grad_norm": 2.8146908419165997, "learning_rate": 6.725397317278322e-06, "loss": 0.7363, "step": 15192 }, { "epoch": 0.0672584001062464, "grad_norm": 2.574836814897026, "learning_rate": 6.725840010624641e-06, "loss": 0.8731, "step": 15193 }, { "epoch": 0.0672628270397096, "grad_norm": 2.772491946215769, "learning_rate": 6.72628270397096e-06, "loss": 0.5858, "step": 15194 }, { "epoch": 0.06726725397317278, "grad_norm": 3.029471909226423, "learning_rate": 6.726725397317278e-06, "loss": 0.8869, "step": 15195 }, { "epoch": 0.06727168090663597, "grad_norm": 3.112303030879272, "learning_rate": 6.7271680906635985e-06, "loss": 0.8362, "step": 15196 }, { "epoch": 0.06727610784009916, "grad_norm": 2.9126427217235213, "learning_rate": 6.727610784009917e-06, "loss": 0.7265, "step": 15197 }, { "epoch": 0.06728053477356236, "grad_norm": 2.971411538763337, "learning_rate": 6.7280534773562355e-06, "loss": 0.8983, "step": 15198 }, { "epoch": 0.06728496170702554, "grad_norm": 2.7745224440754237, "learning_rate": 6.728496170702556e-06, "loss": 0.731, "step": 15199 }, { "epoch": 0.06728938864048874, "grad_norm": 2.537012399209403, "learning_rate": 6.728938864048874e-06, "loss": 0.5563, "step": 15200 }, { "epoch": 0.06729381557395192, "grad_norm": 2.5572774045476567, "learning_rate": 6.729381557395193e-06, "loss": 0.7001, "step": 15201 }, { "epoch": 0.06729824250741512, "grad_norm": 2.693351689253591, "learning_rate": 6.729824250741512e-06, "loss": 0.5419, "step": 15202 }, { "epoch": 0.0673026694408783, "grad_norm": 2.978499767023155, "learning_rate": 6.730266944087831e-06, "loss": 0.7136, "step": 15203 }, { "epoch": 0.0673070963743415, "grad_norm": 2.7235211453434944, "learning_rate": 6.73070963743415e-06, "loss": 0.7611, "step": 15204 }, { "epoch": 0.06731152330780468, "grad_norm": 2.581084101758664, "learning_rate": 6.731152330780469e-06, "loss": 0.8764, "step": 15205 }, { "epoch": 0.06731595024126788, "grad_norm": 2.386463209888257, "learning_rate": 6.731595024126788e-06, "loss": 0.4313, "step": 15206 }, { "epoch": 0.06732037717473106, "grad_norm": 2.7766803540416793, "learning_rate": 6.7320377174731064e-06, "loss": 0.8035, "step": 15207 }, { "epoch": 0.06732480410819425, "grad_norm": 2.3606861543999975, "learning_rate": 6.732480410819427e-06, "loss": 0.5356, "step": 15208 }, { "epoch": 0.06732923104165744, "grad_norm": 3.5973981444941696, "learning_rate": 6.732923104165745e-06, "loss": 1.1137, "step": 15209 }, { "epoch": 0.06733365797512063, "grad_norm": 4.421889897959466, "learning_rate": 6.733365797512064e-06, "loss": 0.9437, "step": 15210 }, { "epoch": 0.06733808490858383, "grad_norm": 2.5120914884298022, "learning_rate": 6.733808490858383e-06, "loss": 0.6292, "step": 15211 }, { "epoch": 0.06734251184204701, "grad_norm": 2.8023146791153133, "learning_rate": 6.7342511842047025e-06, "loss": 0.767, "step": 15212 }, { "epoch": 0.0673469387755102, "grad_norm": 2.208401446102813, "learning_rate": 6.734693877551021e-06, "loss": 0.4584, "step": 15213 }, { "epoch": 0.06735136570897339, "grad_norm": 2.32171655468182, "learning_rate": 6.73513657089734e-06, "loss": 0.7692, "step": 15214 }, { "epoch": 0.06735579264243659, "grad_norm": 2.638699068756476, "learning_rate": 6.735579264243659e-06, "loss": 0.8299, "step": 15215 }, { "epoch": 0.06736021957589977, "grad_norm": 2.1407914420173295, "learning_rate": 6.736021957589977e-06, "loss": 0.5481, "step": 15216 }, { "epoch": 0.06736464650936297, "grad_norm": 3.3104094832657625, "learning_rate": 6.736464650936298e-06, "loss": 1.2957, "step": 15217 }, { "epoch": 0.06736907344282615, "grad_norm": 3.4302776150518137, "learning_rate": 6.736907344282616e-06, "loss": 0.6712, "step": 15218 }, { "epoch": 0.06737350037628935, "grad_norm": 2.1737435119412716, "learning_rate": 6.737350037628935e-06, "loss": 0.7506, "step": 15219 }, { "epoch": 0.06737792730975253, "grad_norm": 3.5213900804311367, "learning_rate": 6.737792730975254e-06, "loss": 0.7125, "step": 15220 }, { "epoch": 0.06738235424321573, "grad_norm": 3.014992557855223, "learning_rate": 6.738235424321573e-06, "loss": 0.986, "step": 15221 }, { "epoch": 0.06738678117667891, "grad_norm": 2.5984205714789113, "learning_rate": 6.738678117667892e-06, "loss": 0.776, "step": 15222 }, { "epoch": 0.0673912081101421, "grad_norm": 2.31439255743219, "learning_rate": 6.739120811014211e-06, "loss": 0.6476, "step": 15223 }, { "epoch": 0.0673956350436053, "grad_norm": 2.2631293028855244, "learning_rate": 6.73956350436053e-06, "loss": 0.5046, "step": 15224 }, { "epoch": 0.06740006197706848, "grad_norm": 2.8497301693593466, "learning_rate": 6.740006197706848e-06, "loss": 0.8048, "step": 15225 }, { "epoch": 0.06740448891053168, "grad_norm": 2.937283860032286, "learning_rate": 6.7404488910531686e-06, "loss": 1.1557, "step": 15226 }, { "epoch": 0.06740891584399486, "grad_norm": 3.4718457899975292, "learning_rate": 6.740891584399487e-06, "loss": 0.7942, "step": 15227 }, { "epoch": 0.06741334277745806, "grad_norm": 2.531519120714572, "learning_rate": 6.741334277745806e-06, "loss": 0.8399, "step": 15228 }, { "epoch": 0.06741776971092124, "grad_norm": 2.5764134485687444, "learning_rate": 6.741776971092126e-06, "loss": 0.4621, "step": 15229 }, { "epoch": 0.06742219664438444, "grad_norm": 2.3438782210510127, "learning_rate": 6.742219664438444e-06, "loss": 0.5857, "step": 15230 }, { "epoch": 0.06742662357784762, "grad_norm": 2.015427184720568, "learning_rate": 6.742662357784763e-06, "loss": 0.5871, "step": 15231 }, { "epoch": 0.06743105051131082, "grad_norm": 3.141924096331645, "learning_rate": 6.743105051131082e-06, "loss": 1.0697, "step": 15232 }, { "epoch": 0.067435477444774, "grad_norm": 2.786003545240772, "learning_rate": 6.743547744477401e-06, "loss": 0.7342, "step": 15233 }, { "epoch": 0.0674399043782372, "grad_norm": 2.356618332233986, "learning_rate": 6.74399043782372e-06, "loss": 0.8227, "step": 15234 }, { "epoch": 0.06744433131170038, "grad_norm": 2.904095033480649, "learning_rate": 6.7444331311700395e-06, "loss": 0.6216, "step": 15235 }, { "epoch": 0.06744875824516358, "grad_norm": 2.6222067820105144, "learning_rate": 6.744875824516358e-06, "loss": 0.7542, "step": 15236 }, { "epoch": 0.06745318517862676, "grad_norm": 3.5518579219094626, "learning_rate": 6.7453185178626766e-06, "loss": 1.1744, "step": 15237 }, { "epoch": 0.06745761211208995, "grad_norm": 2.7019621821534234, "learning_rate": 6.745761211208997e-06, "loss": 0.7073, "step": 15238 }, { "epoch": 0.06746203904555315, "grad_norm": 3.1218882328078843, "learning_rate": 6.746203904555315e-06, "loss": 0.8349, "step": 15239 }, { "epoch": 0.06746646597901633, "grad_norm": 3.2735749194754122, "learning_rate": 6.746646597901634e-06, "loss": 1.2794, "step": 15240 }, { "epoch": 0.06747089291247953, "grad_norm": 2.1300114225009343, "learning_rate": 6.747089291247953e-06, "loss": 0.6505, "step": 15241 }, { "epoch": 0.06747531984594271, "grad_norm": 3.164698568155163, "learning_rate": 6.7475319845942726e-06, "loss": 0.9758, "step": 15242 }, { "epoch": 0.06747974677940591, "grad_norm": 2.800203222998263, "learning_rate": 6.747974677940591e-06, "loss": 0.8352, "step": 15243 }, { "epoch": 0.06748417371286909, "grad_norm": 3.016829244760161, "learning_rate": 6.7484173712869105e-06, "loss": 0.9922, "step": 15244 }, { "epoch": 0.06748860064633229, "grad_norm": 3.3468967465349104, "learning_rate": 6.748860064633229e-06, "loss": 0.9179, "step": 15245 }, { "epoch": 0.06749302757979547, "grad_norm": 3.7507828182401255, "learning_rate": 6.7493027579795475e-06, "loss": 1.0158, "step": 15246 }, { "epoch": 0.06749745451325867, "grad_norm": 2.8504103637629945, "learning_rate": 6.749745451325868e-06, "loss": 1.0134, "step": 15247 }, { "epoch": 0.06750188144672185, "grad_norm": 2.432473455315503, "learning_rate": 6.750188144672186e-06, "loss": 0.5966, "step": 15248 }, { "epoch": 0.06750630838018505, "grad_norm": 3.1382060503627236, "learning_rate": 6.750630838018505e-06, "loss": 1.0017, "step": 15249 }, { "epoch": 0.06751073531364823, "grad_norm": 2.5536853271310815, "learning_rate": 6.751073531364825e-06, "loss": 0.8007, "step": 15250 }, { "epoch": 0.06751516224711143, "grad_norm": 2.6197973055779795, "learning_rate": 6.7515162247111435e-06, "loss": 0.8872, "step": 15251 }, { "epoch": 0.06751958918057462, "grad_norm": 3.20254901937428, "learning_rate": 6.751958918057462e-06, "loss": 0.7681, "step": 15252 }, { "epoch": 0.0675240161140378, "grad_norm": 2.9871524685540125, "learning_rate": 6.752401611403781e-06, "loss": 0.8665, "step": 15253 }, { "epoch": 0.067528443047501, "grad_norm": 3.5269694081980036, "learning_rate": 6.7528443047501e-06, "loss": 0.6997, "step": 15254 }, { "epoch": 0.06753286998096418, "grad_norm": 2.755139119259525, "learning_rate": 6.7532869980964184e-06, "loss": 0.7159, "step": 15255 }, { "epoch": 0.06753729691442738, "grad_norm": 2.932324297641608, "learning_rate": 6.753729691442739e-06, "loss": 0.8336, "step": 15256 }, { "epoch": 0.06754172384789056, "grad_norm": 2.541721121932454, "learning_rate": 6.754172384789057e-06, "loss": 0.6792, "step": 15257 }, { "epoch": 0.06754615078135376, "grad_norm": 3.047292542549675, "learning_rate": 6.754615078135376e-06, "loss": 0.9272, "step": 15258 }, { "epoch": 0.06755057771481694, "grad_norm": 2.59339170969936, "learning_rate": 6.755057771481696e-06, "loss": 0.6964, "step": 15259 }, { "epoch": 0.06755500464828014, "grad_norm": 2.871186190066509, "learning_rate": 6.7555004648280145e-06, "loss": 0.4748, "step": 15260 }, { "epoch": 0.06755943158174332, "grad_norm": 2.71203124702088, "learning_rate": 6.755943158174333e-06, "loss": 0.6308, "step": 15261 }, { "epoch": 0.06756385851520652, "grad_norm": 2.3368232465762504, "learning_rate": 6.756385851520652e-06, "loss": 0.8035, "step": 15262 }, { "epoch": 0.0675682854486697, "grad_norm": 2.467846341098626, "learning_rate": 6.756828544866971e-06, "loss": 0.576, "step": 15263 }, { "epoch": 0.0675727123821329, "grad_norm": 3.4770819086393305, "learning_rate": 6.75727123821329e-06, "loss": 1.3396, "step": 15264 }, { "epoch": 0.06757713931559609, "grad_norm": 2.3702363931530552, "learning_rate": 6.75771393155961e-06, "loss": 0.7099, "step": 15265 }, { "epoch": 0.06758156624905928, "grad_norm": 2.6885157581607078, "learning_rate": 6.758156624905928e-06, "loss": 0.9319, "step": 15266 }, { "epoch": 0.06758599318252247, "grad_norm": 2.5380676050707462, "learning_rate": 6.758599318252247e-06, "loss": 0.5713, "step": 15267 }, { "epoch": 0.06759042011598565, "grad_norm": 2.7479916333876684, "learning_rate": 6.759042011598567e-06, "loss": 1.0752, "step": 15268 }, { "epoch": 0.06759484704944885, "grad_norm": 2.2205502703917035, "learning_rate": 6.759484704944885e-06, "loss": 0.5363, "step": 15269 }, { "epoch": 0.06759927398291203, "grad_norm": 2.6452728666185665, "learning_rate": 6.759927398291204e-06, "loss": 0.714, "step": 15270 }, { "epoch": 0.06760370091637523, "grad_norm": 3.099519812641182, "learning_rate": 6.760370091637523e-06, "loss": 1.0007, "step": 15271 }, { "epoch": 0.06760812784983841, "grad_norm": 3.178745335027447, "learning_rate": 6.760812784983843e-06, "loss": 1.0526, "step": 15272 }, { "epoch": 0.06761255478330161, "grad_norm": 2.3811386475591543, "learning_rate": 6.761255478330161e-06, "loss": 0.7321, "step": 15273 }, { "epoch": 0.06761698171676479, "grad_norm": 2.6655680508764488, "learning_rate": 6.7616981716764806e-06, "loss": 0.4996, "step": 15274 }, { "epoch": 0.06762140865022799, "grad_norm": 2.7653712591212805, "learning_rate": 6.762140865022799e-06, "loss": 0.8058, "step": 15275 }, { "epoch": 0.06762583558369117, "grad_norm": 2.3690736635058975, "learning_rate": 6.762583558369118e-06, "loss": 0.538, "step": 15276 }, { "epoch": 0.06763026251715437, "grad_norm": 2.9071897971652416, "learning_rate": 6.763026251715438e-06, "loss": 1.0123, "step": 15277 }, { "epoch": 0.06763468945061755, "grad_norm": 2.755139825577186, "learning_rate": 6.763468945061756e-06, "loss": 0.633, "step": 15278 }, { "epoch": 0.06763911638408075, "grad_norm": 2.996470396370716, "learning_rate": 6.763911638408075e-06, "loss": 1.0503, "step": 15279 }, { "epoch": 0.06764354331754394, "grad_norm": 2.707297081369568, "learning_rate": 6.764354331754395e-06, "loss": 0.6949, "step": 15280 }, { "epoch": 0.06764797025100713, "grad_norm": 2.2711981205340956, "learning_rate": 6.764797025100714e-06, "loss": 0.5058, "step": 15281 }, { "epoch": 0.06765239718447032, "grad_norm": 3.2264922756798997, "learning_rate": 6.765239718447032e-06, "loss": 0.9114, "step": 15282 }, { "epoch": 0.0676568241179335, "grad_norm": 2.4637001168308017, "learning_rate": 6.7656824117933515e-06, "loss": 0.8368, "step": 15283 }, { "epoch": 0.0676612510513967, "grad_norm": 3.8883535650199756, "learning_rate": 6.76612510513967e-06, "loss": 0.8401, "step": 15284 }, { "epoch": 0.06766567798485988, "grad_norm": 3.2043204083248753, "learning_rate": 6.766567798485989e-06, "loss": 0.9427, "step": 15285 }, { "epoch": 0.06767010491832308, "grad_norm": 2.876019200133462, "learning_rate": 6.767010491832309e-06, "loss": 0.4862, "step": 15286 }, { "epoch": 0.06767453185178626, "grad_norm": 3.1141700645794246, "learning_rate": 6.767453185178627e-06, "loss": 1.0077, "step": 15287 }, { "epoch": 0.06767895878524946, "grad_norm": 3.352248431055868, "learning_rate": 6.767895878524946e-06, "loss": 0.7615, "step": 15288 }, { "epoch": 0.06768338571871264, "grad_norm": 3.0622573356187774, "learning_rate": 6.768338571871266e-06, "loss": 1.1312, "step": 15289 }, { "epoch": 0.06768781265217584, "grad_norm": 2.6847727813925557, "learning_rate": 6.7687812652175846e-06, "loss": 0.7717, "step": 15290 }, { "epoch": 0.06769223958563902, "grad_norm": 2.4296295399003895, "learning_rate": 6.769223958563903e-06, "loss": 0.5984, "step": 15291 }, { "epoch": 0.06769666651910222, "grad_norm": 2.6323057982267084, "learning_rate": 6.7696666519102225e-06, "loss": 0.8667, "step": 15292 }, { "epoch": 0.0677010934525654, "grad_norm": 2.586169417797334, "learning_rate": 6.770109345256541e-06, "loss": 0.576, "step": 15293 }, { "epoch": 0.0677055203860286, "grad_norm": 2.7855783628683817, "learning_rate": 6.77055203860286e-06, "loss": 0.9723, "step": 15294 }, { "epoch": 0.06770994731949179, "grad_norm": 2.46734429738376, "learning_rate": 6.77099473194918e-06, "loss": 0.4747, "step": 15295 }, { "epoch": 0.06771437425295498, "grad_norm": 3.6494856012415084, "learning_rate": 6.771437425295498e-06, "loss": 1.1528, "step": 15296 }, { "epoch": 0.06771880118641817, "grad_norm": 2.9485595083385316, "learning_rate": 6.771880118641817e-06, "loss": 0.761, "step": 15297 }, { "epoch": 0.06772322811988135, "grad_norm": 2.399759514166009, "learning_rate": 6.772322811988137e-06, "loss": 0.8437, "step": 15298 }, { "epoch": 0.06772765505334455, "grad_norm": 2.5212889731184673, "learning_rate": 6.7727655053344555e-06, "loss": 0.8173, "step": 15299 }, { "epoch": 0.06773208198680773, "grad_norm": 3.074889233230228, "learning_rate": 6.773208198680774e-06, "loss": 0.7248, "step": 15300 }, { "epoch": 0.06773650892027093, "grad_norm": 2.3253149185079933, "learning_rate": 6.773650892027093e-06, "loss": 0.7009, "step": 15301 }, { "epoch": 0.06774093585373411, "grad_norm": 2.6371795040069235, "learning_rate": 6.774093585373413e-06, "loss": 0.796, "step": 15302 }, { "epoch": 0.06774536278719731, "grad_norm": 2.856821453233146, "learning_rate": 6.774536278719731e-06, "loss": 1.1914, "step": 15303 }, { "epoch": 0.0677497897206605, "grad_norm": 3.424797096662223, "learning_rate": 6.774978972066051e-06, "loss": 1.071, "step": 15304 }, { "epoch": 0.06775421665412369, "grad_norm": 2.6414537432418226, "learning_rate": 6.775421665412369e-06, "loss": 0.5411, "step": 15305 }, { "epoch": 0.06775864358758688, "grad_norm": 2.4118158992464154, "learning_rate": 6.775864358758688e-06, "loss": 0.734, "step": 15306 }, { "epoch": 0.06776307052105007, "grad_norm": 2.8339610701059725, "learning_rate": 6.776307052105008e-06, "loss": 0.8938, "step": 15307 }, { "epoch": 0.06776749745451326, "grad_norm": 2.337912045806166, "learning_rate": 6.7767497454513265e-06, "loss": 0.5436, "step": 15308 }, { "epoch": 0.06777192438797645, "grad_norm": 2.6130294540657366, "learning_rate": 6.777192438797645e-06, "loss": 0.7159, "step": 15309 }, { "epoch": 0.06777635132143964, "grad_norm": 2.285413801731859, "learning_rate": 6.777635132143965e-06, "loss": 0.6823, "step": 15310 }, { "epoch": 0.06778077825490283, "grad_norm": 2.511377290690751, "learning_rate": 6.778077825490284e-06, "loss": 0.7758, "step": 15311 }, { "epoch": 0.06778520518836602, "grad_norm": 3.0312221000972994, "learning_rate": 6.778520518836602e-06, "loss": 0.9144, "step": 15312 }, { "epoch": 0.06778963212182922, "grad_norm": 2.706520437649562, "learning_rate": 6.778963212182922e-06, "loss": 0.8126, "step": 15313 }, { "epoch": 0.0677940590552924, "grad_norm": 2.6034432834336316, "learning_rate": 6.77940590552924e-06, "loss": 0.8799, "step": 15314 }, { "epoch": 0.06779848598875558, "grad_norm": 2.647698367207588, "learning_rate": 6.7798485988755595e-06, "loss": 0.8617, "step": 15315 }, { "epoch": 0.06780291292221878, "grad_norm": 2.7249885456499405, "learning_rate": 6.780291292221879e-06, "loss": 0.6673, "step": 15316 }, { "epoch": 0.06780733985568196, "grad_norm": 2.7454201595276557, "learning_rate": 6.780733985568197e-06, "loss": 0.5694, "step": 15317 }, { "epoch": 0.06781176678914516, "grad_norm": 2.492881367340473, "learning_rate": 6.781176678914516e-06, "loss": 0.5505, "step": 15318 }, { "epoch": 0.06781619372260834, "grad_norm": 2.5483432269077024, "learning_rate": 6.781619372260836e-06, "loss": 0.9705, "step": 15319 }, { "epoch": 0.06782062065607154, "grad_norm": 2.2166287525136377, "learning_rate": 6.782062065607155e-06, "loss": 0.7536, "step": 15320 }, { "epoch": 0.06782504758953473, "grad_norm": 2.9663182721027366, "learning_rate": 6.782504758953473e-06, "loss": 0.9636, "step": 15321 }, { "epoch": 0.06782947452299792, "grad_norm": 3.4468358886513677, "learning_rate": 6.7829474522997926e-06, "loss": 0.8522, "step": 15322 }, { "epoch": 0.06783390145646111, "grad_norm": 3.053257081462811, "learning_rate": 6.783390145646112e-06, "loss": 0.8371, "step": 15323 }, { "epoch": 0.0678383283899243, "grad_norm": 3.298730535213422, "learning_rate": 6.7838328389924305e-06, "loss": 0.9907, "step": 15324 }, { "epoch": 0.06784275532338749, "grad_norm": 2.6809728296590696, "learning_rate": 6.78427553233875e-06, "loss": 0.7278, "step": 15325 }, { "epoch": 0.06784718225685069, "grad_norm": 2.7643617205032647, "learning_rate": 6.784718225685068e-06, "loss": 0.7407, "step": 15326 }, { "epoch": 0.06785160919031387, "grad_norm": 2.7253162599477627, "learning_rate": 6.785160919031387e-06, "loss": 0.843, "step": 15327 }, { "epoch": 0.06785603612377707, "grad_norm": 2.4806209558531624, "learning_rate": 6.785603612377707e-06, "loss": 0.3343, "step": 15328 }, { "epoch": 0.06786046305724025, "grad_norm": 2.4514059406025286, "learning_rate": 6.786046305724026e-06, "loss": 0.4597, "step": 15329 }, { "epoch": 0.06786488999070343, "grad_norm": 3.3794575567532195, "learning_rate": 6.786488999070344e-06, "loss": 1.0984, "step": 15330 }, { "epoch": 0.06786931692416663, "grad_norm": 2.6895091537714513, "learning_rate": 6.786931692416664e-06, "loss": 0.9916, "step": 15331 }, { "epoch": 0.06787374385762981, "grad_norm": 2.438925175317717, "learning_rate": 6.787374385762983e-06, "loss": 0.7862, "step": 15332 }, { "epoch": 0.06787817079109301, "grad_norm": 3.333652147624925, "learning_rate": 6.787817079109301e-06, "loss": 0.5468, "step": 15333 }, { "epoch": 0.0678825977245562, "grad_norm": 2.434072444737571, "learning_rate": 6.788259772455621e-06, "loss": 0.4555, "step": 15334 }, { "epoch": 0.0678870246580194, "grad_norm": 2.6113207903640374, "learning_rate": 6.788702465801939e-06, "loss": 0.8657, "step": 15335 }, { "epoch": 0.06789145159148258, "grad_norm": 2.312200194704504, "learning_rate": 6.789145159148258e-06, "loss": 0.6786, "step": 15336 }, { "epoch": 0.06789587852494577, "grad_norm": 2.366948729146443, "learning_rate": 6.789587852494578e-06, "loss": 0.5251, "step": 15337 }, { "epoch": 0.06790030545840896, "grad_norm": 2.5215435903308543, "learning_rate": 6.7900305458408966e-06, "loss": 0.5274, "step": 15338 }, { "epoch": 0.06790473239187216, "grad_norm": 3.1555052676250384, "learning_rate": 6.790473239187215e-06, "loss": 1.0333, "step": 15339 }, { "epoch": 0.06790915932533534, "grad_norm": 3.2162844910235835, "learning_rate": 6.790915932533535e-06, "loss": 0.9195, "step": 15340 }, { "epoch": 0.06791358625879854, "grad_norm": 2.371968793536888, "learning_rate": 6.791358625879854e-06, "loss": 0.5317, "step": 15341 }, { "epoch": 0.06791801319226172, "grad_norm": 2.6978634444008245, "learning_rate": 6.791801319226172e-06, "loss": 0.8899, "step": 15342 }, { "epoch": 0.06792244012572492, "grad_norm": 2.771766859097361, "learning_rate": 6.792244012572492e-06, "loss": 0.6334, "step": 15343 }, { "epoch": 0.0679268670591881, "grad_norm": 3.1750439192945388, "learning_rate": 6.79268670591881e-06, "loss": 0.8643, "step": 15344 }, { "epoch": 0.06793129399265128, "grad_norm": 2.723248200255128, "learning_rate": 6.79312939926513e-06, "loss": 0.75, "step": 15345 }, { "epoch": 0.06793572092611448, "grad_norm": 3.008182890393388, "learning_rate": 6.793572092611449e-06, "loss": 0.5132, "step": 15346 }, { "epoch": 0.06794014785957767, "grad_norm": 2.2050713975883243, "learning_rate": 6.7940147859577675e-06, "loss": 0.6942, "step": 15347 }, { "epoch": 0.06794457479304086, "grad_norm": 3.2764258634414167, "learning_rate": 6.794457479304086e-06, "loss": 1.069, "step": 15348 }, { "epoch": 0.06794900172650405, "grad_norm": 2.5983221236080447, "learning_rate": 6.794900172650406e-06, "loss": 0.4391, "step": 15349 }, { "epoch": 0.06795342865996724, "grad_norm": 2.563594053608556, "learning_rate": 6.795342865996725e-06, "loss": 0.5898, "step": 15350 }, { "epoch": 0.06795785559343043, "grad_norm": 2.776736040226308, "learning_rate": 6.795785559343043e-06, "loss": 0.7404, "step": 15351 }, { "epoch": 0.06796228252689362, "grad_norm": 2.2409644497175236, "learning_rate": 6.796228252689363e-06, "loss": 0.6787, "step": 15352 }, { "epoch": 0.06796670946035681, "grad_norm": 2.6600054912597315, "learning_rate": 6.796670946035682e-06, "loss": 0.7031, "step": 15353 }, { "epoch": 0.06797113639382, "grad_norm": 2.5008505212323597, "learning_rate": 6.7971136393820006e-06, "loss": 0.5972, "step": 15354 }, { "epoch": 0.06797556332728319, "grad_norm": 2.6611044225958596, "learning_rate": 6.79755633272832e-06, "loss": 0.7359, "step": 15355 }, { "epoch": 0.06797999026074639, "grad_norm": 3.0313498316320393, "learning_rate": 6.7979990260746385e-06, "loss": 0.7514, "step": 15356 }, { "epoch": 0.06798441719420957, "grad_norm": 3.5007721583450575, "learning_rate": 6.798441719420957e-06, "loss": 1.1362, "step": 15357 }, { "epoch": 0.06798884412767277, "grad_norm": 2.6540702383345507, "learning_rate": 6.798884412767277e-06, "loss": 0.7028, "step": 15358 }, { "epoch": 0.06799327106113595, "grad_norm": 2.8128624370033126, "learning_rate": 6.799327106113596e-06, "loss": 0.6223, "step": 15359 }, { "epoch": 0.06799769799459913, "grad_norm": 4.155190107380478, "learning_rate": 6.799769799459914e-06, "loss": 0.9522, "step": 15360 }, { "epoch": 0.06800212492806233, "grad_norm": 2.234566169422474, "learning_rate": 6.8002124928062345e-06, "loss": 0.4552, "step": 15361 }, { "epoch": 0.06800655186152552, "grad_norm": 2.621896228077618, "learning_rate": 6.800655186152553e-06, "loss": 0.763, "step": 15362 }, { "epoch": 0.06801097879498871, "grad_norm": 3.036715885011199, "learning_rate": 6.8010978794988715e-06, "loss": 0.9392, "step": 15363 }, { "epoch": 0.0680154057284519, "grad_norm": 2.4403106754567254, "learning_rate": 6.801540572845191e-06, "loss": 0.6374, "step": 15364 }, { "epoch": 0.0680198326619151, "grad_norm": 2.527036305030736, "learning_rate": 6.801983266191509e-06, "loss": 0.8587, "step": 15365 }, { "epoch": 0.06802425959537828, "grad_norm": 2.3335702831517566, "learning_rate": 6.802425959537829e-06, "loss": 0.636, "step": 15366 }, { "epoch": 0.06802868652884148, "grad_norm": 3.210933576491037, "learning_rate": 6.802868652884148e-06, "loss": 0.6732, "step": 15367 }, { "epoch": 0.06803311346230466, "grad_norm": 2.740766112652751, "learning_rate": 6.803311346230467e-06, "loss": 0.9911, "step": 15368 }, { "epoch": 0.06803754039576786, "grad_norm": 2.853642013501671, "learning_rate": 6.803754039576785e-06, "loss": 0.8063, "step": 15369 }, { "epoch": 0.06804196732923104, "grad_norm": 2.3167950698341535, "learning_rate": 6.804196732923105e-06, "loss": 0.5959, "step": 15370 }, { "epoch": 0.06804639426269424, "grad_norm": 2.0442750624517885, "learning_rate": 6.804639426269424e-06, "loss": 0.5344, "step": 15371 }, { "epoch": 0.06805082119615742, "grad_norm": 2.8044283839865463, "learning_rate": 6.8050821196157425e-06, "loss": 0.7763, "step": 15372 }, { "epoch": 0.06805524812962062, "grad_norm": 2.620345558748744, "learning_rate": 6.805524812962062e-06, "loss": 0.7544, "step": 15373 }, { "epoch": 0.0680596750630838, "grad_norm": 2.532617644059394, "learning_rate": 6.80596750630838e-06, "loss": 0.4564, "step": 15374 }, { "epoch": 0.06806410199654699, "grad_norm": 3.4960379163722464, "learning_rate": 6.8064101996547e-06, "loss": 1.1337, "step": 15375 }, { "epoch": 0.06806852893001018, "grad_norm": 2.5114309873964302, "learning_rate": 6.806852893001019e-06, "loss": 0.8095, "step": 15376 }, { "epoch": 0.06807295586347337, "grad_norm": 2.2032132808539844, "learning_rate": 6.807295586347338e-06, "loss": 0.546, "step": 15377 }, { "epoch": 0.06807738279693656, "grad_norm": 2.8005233490971517, "learning_rate": 6.807738279693656e-06, "loss": 0.6185, "step": 15378 }, { "epoch": 0.06808180973039975, "grad_norm": 2.968089822702122, "learning_rate": 6.808180973039976e-06, "loss": 0.8589, "step": 15379 }, { "epoch": 0.06808623666386295, "grad_norm": 2.1266667964470347, "learning_rate": 6.808623666386295e-06, "loss": 0.4847, "step": 15380 }, { "epoch": 0.06809066359732613, "grad_norm": 2.6524125224556623, "learning_rate": 6.809066359732613e-06, "loss": 0.7457, "step": 15381 }, { "epoch": 0.06809509053078933, "grad_norm": 2.8028908130948333, "learning_rate": 6.809509053078933e-06, "loss": 0.7048, "step": 15382 }, { "epoch": 0.06809951746425251, "grad_norm": 2.403742792661865, "learning_rate": 6.809951746425252e-06, "loss": 0.5712, "step": 15383 }, { "epoch": 0.06810394439771571, "grad_norm": 3.1187537992048533, "learning_rate": 6.810394439771571e-06, "loss": 0.5737, "step": 15384 }, { "epoch": 0.06810837133117889, "grad_norm": 2.945594750557816, "learning_rate": 6.81083713311789e-06, "loss": 0.7754, "step": 15385 }, { "epoch": 0.06811279826464209, "grad_norm": 2.362371349363347, "learning_rate": 6.8112798264642086e-06, "loss": 0.6008, "step": 15386 }, { "epoch": 0.06811722519810527, "grad_norm": 2.664301358389033, "learning_rate": 6.811722519810527e-06, "loss": 0.9255, "step": 15387 }, { "epoch": 0.06812165213156847, "grad_norm": 3.2589326954203344, "learning_rate": 6.812165213156847e-06, "loss": 0.8572, "step": 15388 }, { "epoch": 0.06812607906503165, "grad_norm": 2.9261394842021686, "learning_rate": 6.812607906503166e-06, "loss": 1.1113, "step": 15389 }, { "epoch": 0.06813050599849484, "grad_norm": 2.593738628764482, "learning_rate": 6.813050599849484e-06, "loss": 0.6505, "step": 15390 }, { "epoch": 0.06813493293195803, "grad_norm": 2.824198306144099, "learning_rate": 6.8134932931958046e-06, "loss": 0.7499, "step": 15391 }, { "epoch": 0.06813935986542122, "grad_norm": 2.351025044290917, "learning_rate": 6.813935986542123e-06, "loss": 0.7679, "step": 15392 }, { "epoch": 0.06814378679888441, "grad_norm": 2.849860168131677, "learning_rate": 6.814378679888442e-06, "loss": 0.6816, "step": 15393 }, { "epoch": 0.0681482137323476, "grad_norm": 2.516586545567448, "learning_rate": 6.814821373234761e-06, "loss": 0.8018, "step": 15394 }, { "epoch": 0.0681526406658108, "grad_norm": 2.382559778973619, "learning_rate": 6.8152640665810795e-06, "loss": 0.7307, "step": 15395 }, { "epoch": 0.06815706759927398, "grad_norm": 2.6297486051662737, "learning_rate": 6.815706759927399e-06, "loss": 0.6052, "step": 15396 }, { "epoch": 0.06816149453273718, "grad_norm": 4.532200182298856, "learning_rate": 6.816149453273718e-06, "loss": 0.9393, "step": 15397 }, { "epoch": 0.06816592146620036, "grad_norm": 2.7379505022136077, "learning_rate": 6.816592146620037e-06, "loss": 0.6633, "step": 15398 }, { "epoch": 0.06817034839966356, "grad_norm": 3.02148510273558, "learning_rate": 6.817034839966355e-06, "loss": 0.7856, "step": 15399 }, { "epoch": 0.06817477533312674, "grad_norm": 3.3864820436824306, "learning_rate": 6.8174775333126755e-06, "loss": 0.729, "step": 15400 }, { "epoch": 0.06817920226658994, "grad_norm": 2.844023988304301, "learning_rate": 6.817920226658994e-06, "loss": 0.7261, "step": 15401 }, { "epoch": 0.06818362920005312, "grad_norm": 2.530161404824823, "learning_rate": 6.8183629200053126e-06, "loss": 0.8087, "step": 15402 }, { "epoch": 0.06818805613351632, "grad_norm": 3.465806580469345, "learning_rate": 6.818805613351632e-06, "loss": 1.1185, "step": 15403 }, { "epoch": 0.0681924830669795, "grad_norm": 2.3825436599629253, "learning_rate": 6.819248306697951e-06, "loss": 0.7256, "step": 15404 }, { "epoch": 0.06819691000044269, "grad_norm": 2.1822879770447434, "learning_rate": 6.81969100004427e-06, "loss": 0.5735, "step": 15405 }, { "epoch": 0.06820133693390588, "grad_norm": 2.4494219402199313, "learning_rate": 6.820133693390589e-06, "loss": 0.7149, "step": 15406 }, { "epoch": 0.06820576386736907, "grad_norm": 2.6130829983137804, "learning_rate": 6.820576386736908e-06, "loss": 0.9408, "step": 15407 }, { "epoch": 0.06821019080083227, "grad_norm": 2.28516146734096, "learning_rate": 6.821019080083226e-06, "loss": 0.6987, "step": 15408 }, { "epoch": 0.06821461773429545, "grad_norm": 2.197386676192993, "learning_rate": 6.8214617734295465e-06, "loss": 0.5577, "step": 15409 }, { "epoch": 0.06821904466775865, "grad_norm": 2.4241952829306705, "learning_rate": 6.821904466775865e-06, "loss": 0.9047, "step": 15410 }, { "epoch": 0.06822347160122183, "grad_norm": 2.781336446743596, "learning_rate": 6.8223471601221835e-06, "loss": 1.0139, "step": 15411 }, { "epoch": 0.06822789853468503, "grad_norm": 2.9542509582359924, "learning_rate": 6.822789853468504e-06, "loss": 0.9738, "step": 15412 }, { "epoch": 0.06823232546814821, "grad_norm": 3.9669636211639, "learning_rate": 6.823232546814822e-06, "loss": 1.1193, "step": 15413 }, { "epoch": 0.06823675240161141, "grad_norm": 2.5817103452360035, "learning_rate": 6.823675240161141e-06, "loss": 0.6485, "step": 15414 }, { "epoch": 0.06824117933507459, "grad_norm": 2.5231684631773366, "learning_rate": 6.82411793350746e-06, "loss": 0.6206, "step": 15415 }, { "epoch": 0.06824560626853779, "grad_norm": 3.3587095005879246, "learning_rate": 6.824560626853779e-06, "loss": 1.2287, "step": 15416 }, { "epoch": 0.06825003320200097, "grad_norm": 2.4838007531526842, "learning_rate": 6.825003320200097e-06, "loss": 0.8131, "step": 15417 }, { "epoch": 0.06825446013546417, "grad_norm": 2.3825636108083827, "learning_rate": 6.825446013546417e-06, "loss": 0.7632, "step": 15418 }, { "epoch": 0.06825888706892735, "grad_norm": 2.634860691066281, "learning_rate": 6.825888706892736e-06, "loss": 0.674, "step": 15419 }, { "epoch": 0.06826331400239054, "grad_norm": 2.4509979183061485, "learning_rate": 6.8263314002390545e-06, "loss": 0.5846, "step": 15420 }, { "epoch": 0.06826774093585374, "grad_norm": 3.0508863970647844, "learning_rate": 6.826774093585375e-06, "loss": 0.9101, "step": 15421 }, { "epoch": 0.06827216786931692, "grad_norm": 2.5162461828553826, "learning_rate": 6.827216786931693e-06, "loss": 0.5797, "step": 15422 }, { "epoch": 0.06827659480278012, "grad_norm": 3.2421479531966697, "learning_rate": 6.827659480278012e-06, "loss": 1.0018, "step": 15423 }, { "epoch": 0.0682810217362433, "grad_norm": 2.7254664609619677, "learning_rate": 6.828102173624331e-06, "loss": 0.6387, "step": 15424 }, { "epoch": 0.0682854486697065, "grad_norm": 2.9238999536438968, "learning_rate": 6.82854486697065e-06, "loss": 0.8627, "step": 15425 }, { "epoch": 0.06828987560316968, "grad_norm": 2.832833766652519, "learning_rate": 6.828987560316969e-06, "loss": 0.8423, "step": 15426 }, { "epoch": 0.06829430253663288, "grad_norm": 2.2320859162162794, "learning_rate": 6.829430253663288e-06, "loss": 0.527, "step": 15427 }, { "epoch": 0.06829872947009606, "grad_norm": 2.9733283173169354, "learning_rate": 6.829872947009607e-06, "loss": 0.897, "step": 15428 }, { "epoch": 0.06830315640355926, "grad_norm": 2.5481342958391258, "learning_rate": 6.830315640355925e-06, "loss": 0.7004, "step": 15429 }, { "epoch": 0.06830758333702244, "grad_norm": 3.0500218287658756, "learning_rate": 6.830758333702246e-06, "loss": 0.9154, "step": 15430 }, { "epoch": 0.06831201027048564, "grad_norm": 2.6940007059422126, "learning_rate": 6.831201027048564e-06, "loss": 0.6266, "step": 15431 }, { "epoch": 0.06831643720394882, "grad_norm": 3.5079926037609908, "learning_rate": 6.831643720394883e-06, "loss": 1.0121, "step": 15432 }, { "epoch": 0.06832086413741202, "grad_norm": 4.284812848324588, "learning_rate": 6.832086413741202e-06, "loss": 1.296, "step": 15433 }, { "epoch": 0.0683252910708752, "grad_norm": 3.2733793101462494, "learning_rate": 6.832529107087521e-06, "loss": 0.8357, "step": 15434 }, { "epoch": 0.06832971800433839, "grad_norm": 2.5216776088766077, "learning_rate": 6.83297180043384e-06, "loss": 0.6679, "step": 15435 }, { "epoch": 0.06833414493780159, "grad_norm": 2.6833075277717535, "learning_rate": 6.833414493780159e-06, "loss": 0.6458, "step": 15436 }, { "epoch": 0.06833857187126477, "grad_norm": 2.6176898841220857, "learning_rate": 6.833857187126478e-06, "loss": 0.7576, "step": 15437 }, { "epoch": 0.06834299880472797, "grad_norm": 2.402859612525405, "learning_rate": 6.834299880472796e-06, "loss": 0.6443, "step": 15438 }, { "epoch": 0.06834742573819115, "grad_norm": 3.2067303366244673, "learning_rate": 6.8347425738191166e-06, "loss": 1.0056, "step": 15439 }, { "epoch": 0.06835185267165435, "grad_norm": 3.360273227398212, "learning_rate": 6.835185267165435e-06, "loss": 1.2033, "step": 15440 }, { "epoch": 0.06835627960511753, "grad_norm": 2.4313155123390313, "learning_rate": 6.835627960511754e-06, "loss": 0.6477, "step": 15441 }, { "epoch": 0.06836070653858073, "grad_norm": 2.9544161270517892, "learning_rate": 6.836070653858074e-06, "loss": 0.8397, "step": 15442 }, { "epoch": 0.06836513347204391, "grad_norm": 2.9094782949076707, "learning_rate": 6.836513347204392e-06, "loss": 0.975, "step": 15443 }, { "epoch": 0.06836956040550711, "grad_norm": 2.3371092916163114, "learning_rate": 6.836956040550711e-06, "loss": 0.661, "step": 15444 }, { "epoch": 0.0683739873389703, "grad_norm": 3.011512965085893, "learning_rate": 6.83739873389703e-06, "loss": 0.8939, "step": 15445 }, { "epoch": 0.06837841427243349, "grad_norm": 2.4714374065894025, "learning_rate": 6.837841427243349e-06, "loss": 0.6188, "step": 15446 }, { "epoch": 0.06838284120589667, "grad_norm": 2.6402971677953886, "learning_rate": 6.838284120589668e-06, "loss": 0.812, "step": 15447 }, { "epoch": 0.06838726813935987, "grad_norm": 2.667974957282394, "learning_rate": 6.8387268139359875e-06, "loss": 0.4878, "step": 15448 }, { "epoch": 0.06839169507282306, "grad_norm": 4.243037373161959, "learning_rate": 6.839169507282306e-06, "loss": 1.0526, "step": 15449 }, { "epoch": 0.06839612200628624, "grad_norm": 2.963222842179534, "learning_rate": 6.8396122006286246e-06, "loss": 0.754, "step": 15450 }, { "epoch": 0.06840054893974944, "grad_norm": 2.722701394795894, "learning_rate": 6.840054893974945e-06, "loss": 0.663, "step": 15451 }, { "epoch": 0.06840497587321262, "grad_norm": 2.628299121240975, "learning_rate": 6.840497587321263e-06, "loss": 0.7192, "step": 15452 }, { "epoch": 0.06840940280667582, "grad_norm": 2.7396955789478477, "learning_rate": 6.840940280667582e-06, "loss": 0.71, "step": 15453 }, { "epoch": 0.068413829740139, "grad_norm": 2.4328349078097817, "learning_rate": 6.841382974013901e-06, "loss": 0.6721, "step": 15454 }, { "epoch": 0.0684182566736022, "grad_norm": 3.061528664789232, "learning_rate": 6.84182566736022e-06, "loss": 0.8825, "step": 15455 }, { "epoch": 0.06842268360706538, "grad_norm": 2.4945989683877263, "learning_rate": 6.842268360706539e-06, "loss": 0.7667, "step": 15456 }, { "epoch": 0.06842711054052858, "grad_norm": 2.4720134751360323, "learning_rate": 6.8427110540528585e-06, "loss": 0.6488, "step": 15457 }, { "epoch": 0.06843153747399176, "grad_norm": 3.014293921553734, "learning_rate": 6.843153747399177e-06, "loss": 0.6312, "step": 15458 }, { "epoch": 0.06843596440745496, "grad_norm": 2.8101487748732406, "learning_rate": 6.8435964407454955e-06, "loss": 0.5204, "step": 15459 }, { "epoch": 0.06844039134091814, "grad_norm": 3.030023678968993, "learning_rate": 6.844039134091816e-06, "loss": 0.6737, "step": 15460 }, { "epoch": 0.06844481827438134, "grad_norm": 2.9045131512537385, "learning_rate": 6.844481827438134e-06, "loss": 0.8215, "step": 15461 }, { "epoch": 0.06844924520784453, "grad_norm": 2.2835843309304455, "learning_rate": 6.844924520784453e-06, "loss": 0.6158, "step": 15462 }, { "epoch": 0.06845367214130772, "grad_norm": 2.4046167268233094, "learning_rate": 6.845367214130772e-06, "loss": 0.6107, "step": 15463 }, { "epoch": 0.0684580990747709, "grad_norm": 2.9420292038612037, "learning_rate": 6.8458099074770915e-06, "loss": 0.9317, "step": 15464 }, { "epoch": 0.06846252600823409, "grad_norm": 2.512141827560285, "learning_rate": 6.84625260082341e-06, "loss": 0.7265, "step": 15465 }, { "epoch": 0.06846695294169729, "grad_norm": 2.714152060346323, "learning_rate": 6.846695294169729e-06, "loss": 0.6139, "step": 15466 }, { "epoch": 0.06847137987516047, "grad_norm": 2.354300978052627, "learning_rate": 6.847137987516048e-06, "loss": 0.7927, "step": 15467 }, { "epoch": 0.06847580680862367, "grad_norm": 3.525078720693769, "learning_rate": 6.8475806808623665e-06, "loss": 0.761, "step": 15468 }, { "epoch": 0.06848023374208685, "grad_norm": 2.6030817088044156, "learning_rate": 6.848023374208687e-06, "loss": 0.8288, "step": 15469 }, { "epoch": 0.06848466067555005, "grad_norm": 2.0951818588085733, "learning_rate": 6.848466067555005e-06, "loss": 0.4764, "step": 15470 }, { "epoch": 0.06848908760901323, "grad_norm": 2.4369639760353854, "learning_rate": 6.848908760901324e-06, "loss": 0.6995, "step": 15471 }, { "epoch": 0.06849351454247643, "grad_norm": 2.9112210842864163, "learning_rate": 6.849351454247644e-06, "loss": 0.8401, "step": 15472 }, { "epoch": 0.06849794147593961, "grad_norm": 2.262630697206016, "learning_rate": 6.8497941475939625e-06, "loss": 0.4383, "step": 15473 }, { "epoch": 0.06850236840940281, "grad_norm": 2.861646558476716, "learning_rate": 6.850236840940281e-06, "loss": 0.7531, "step": 15474 }, { "epoch": 0.068506795342866, "grad_norm": 2.9804588147761364, "learning_rate": 6.8506795342866e-06, "loss": 0.7829, "step": 15475 }, { "epoch": 0.06851122227632919, "grad_norm": 2.564123089532059, "learning_rate": 6.851122227632919e-06, "loss": 0.8538, "step": 15476 }, { "epoch": 0.06851564920979238, "grad_norm": 2.3907695127417425, "learning_rate": 6.851564920979238e-06, "loss": 0.8587, "step": 15477 }, { "epoch": 0.06852007614325557, "grad_norm": 2.688983237431312, "learning_rate": 6.852007614325558e-06, "loss": 1.1164, "step": 15478 }, { "epoch": 0.06852450307671876, "grad_norm": 2.3184262223049332, "learning_rate": 6.852450307671876e-06, "loss": 0.6503, "step": 15479 }, { "epoch": 0.06852893001018194, "grad_norm": 2.396125699211565, "learning_rate": 6.852893001018195e-06, "loss": 0.7312, "step": 15480 }, { "epoch": 0.06853335694364514, "grad_norm": 3.3968452027054283, "learning_rate": 6.853335694364515e-06, "loss": 0.7011, "step": 15481 }, { "epoch": 0.06853778387710832, "grad_norm": 2.4711764248008437, "learning_rate": 6.853778387710833e-06, "loss": 0.8485, "step": 15482 }, { "epoch": 0.06854221081057152, "grad_norm": 2.418395427723271, "learning_rate": 6.854221081057152e-06, "loss": 0.7187, "step": 15483 }, { "epoch": 0.0685466377440347, "grad_norm": 2.365727745412157, "learning_rate": 6.854663774403471e-06, "loss": 0.7713, "step": 15484 }, { "epoch": 0.0685510646774979, "grad_norm": 2.986003151489041, "learning_rate": 6.855106467749791e-06, "loss": 0.896, "step": 15485 }, { "epoch": 0.06855549161096108, "grad_norm": 2.8054300867478315, "learning_rate": 6.855549161096109e-06, "loss": 0.8017, "step": 15486 }, { "epoch": 0.06855991854442428, "grad_norm": 3.355166963195942, "learning_rate": 6.8559918544424286e-06, "loss": 0.7903, "step": 15487 }, { "epoch": 0.06856434547788746, "grad_norm": 2.433307103994356, "learning_rate": 6.856434547788747e-06, "loss": 0.737, "step": 15488 }, { "epoch": 0.06856877241135066, "grad_norm": 2.867703493948617, "learning_rate": 6.856877241135066e-06, "loss": 0.7753, "step": 15489 }, { "epoch": 0.06857319934481385, "grad_norm": 3.4033777005629506, "learning_rate": 6.857319934481386e-06, "loss": 0.8845, "step": 15490 }, { "epoch": 0.06857762627827704, "grad_norm": 2.7701472350188143, "learning_rate": 6.857762627827704e-06, "loss": 0.8019, "step": 15491 }, { "epoch": 0.06858205321174023, "grad_norm": 2.6604475810841803, "learning_rate": 6.858205321174023e-06, "loss": 0.5012, "step": 15492 }, { "epoch": 0.06858648014520342, "grad_norm": 2.5836809703947883, "learning_rate": 6.858648014520342e-06, "loss": 0.7373, "step": 15493 }, { "epoch": 0.06859090707866661, "grad_norm": 3.420854476667207, "learning_rate": 6.859090707866662e-06, "loss": 1.0011, "step": 15494 }, { "epoch": 0.06859533401212979, "grad_norm": 2.2396275713282052, "learning_rate": 6.85953340121298e-06, "loss": 0.6147, "step": 15495 }, { "epoch": 0.06859976094559299, "grad_norm": 4.081844996355287, "learning_rate": 6.8599760945592995e-06, "loss": 1.0935, "step": 15496 }, { "epoch": 0.06860418787905617, "grad_norm": 2.1463312938896637, "learning_rate": 6.860418787905618e-06, "loss": 0.6975, "step": 15497 }, { "epoch": 0.06860861481251937, "grad_norm": 2.4293969223707195, "learning_rate": 6.8608614812519366e-06, "loss": 0.8532, "step": 15498 }, { "epoch": 0.06861304174598255, "grad_norm": 2.5209318703977397, "learning_rate": 6.861304174598257e-06, "loss": 0.4607, "step": 15499 }, { "epoch": 0.06861746867944575, "grad_norm": 2.7604490465831573, "learning_rate": 6.861746867944575e-06, "loss": 0.7963, "step": 15500 }, { "epoch": 0.06862189561290893, "grad_norm": 2.647937730170932, "learning_rate": 6.862189561290894e-06, "loss": 0.7103, "step": 15501 }, { "epoch": 0.06862632254637213, "grad_norm": 2.391118384434698, "learning_rate": 6.862632254637214e-06, "loss": 0.7968, "step": 15502 }, { "epoch": 0.06863074947983532, "grad_norm": 2.4652967469131823, "learning_rate": 6.8630749479835326e-06, "loss": 0.807, "step": 15503 }, { "epoch": 0.06863517641329851, "grad_norm": 3.3188174504458887, "learning_rate": 6.863517641329851e-06, "loss": 1.3119, "step": 15504 }, { "epoch": 0.0686396033467617, "grad_norm": 2.146611333659233, "learning_rate": 6.8639603346761705e-06, "loss": 0.5923, "step": 15505 }, { "epoch": 0.0686440302802249, "grad_norm": 3.1137456685252958, "learning_rate": 6.864403028022489e-06, "loss": 0.7111, "step": 15506 }, { "epoch": 0.06864845721368808, "grad_norm": 2.3600822804337653, "learning_rate": 6.864845721368808e-06, "loss": 0.6175, "step": 15507 }, { "epoch": 0.06865288414715127, "grad_norm": 2.1440301974970457, "learning_rate": 6.865288414715128e-06, "loss": 0.696, "step": 15508 }, { "epoch": 0.06865731108061446, "grad_norm": 3.3677732637988202, "learning_rate": 6.865731108061446e-06, "loss": 0.7855, "step": 15509 }, { "epoch": 0.06866173801407764, "grad_norm": 2.6845483344067564, "learning_rate": 6.866173801407765e-06, "loss": 0.752, "step": 15510 }, { "epoch": 0.06866616494754084, "grad_norm": 2.7150338627488715, "learning_rate": 6.866616494754085e-06, "loss": 0.652, "step": 15511 }, { "epoch": 0.06867059188100402, "grad_norm": 2.907658978418117, "learning_rate": 6.8670591881004035e-06, "loss": 0.7407, "step": 15512 }, { "epoch": 0.06867501881446722, "grad_norm": 3.051136040947347, "learning_rate": 6.867501881446722e-06, "loss": 1.0322, "step": 15513 }, { "epoch": 0.0686794457479304, "grad_norm": 2.8128189458073876, "learning_rate": 6.867944574793041e-06, "loss": 1.0397, "step": 15514 }, { "epoch": 0.0686838726813936, "grad_norm": 2.2050089578018834, "learning_rate": 6.868387268139361e-06, "loss": 0.6134, "step": 15515 }, { "epoch": 0.06868829961485678, "grad_norm": 2.848059854932996, "learning_rate": 6.868829961485679e-06, "loss": 1.0645, "step": 15516 }, { "epoch": 0.06869272654831998, "grad_norm": 2.970163016257246, "learning_rate": 6.869272654831999e-06, "loss": 0.7384, "step": 15517 }, { "epoch": 0.06869715348178317, "grad_norm": 3.0109460586119403, "learning_rate": 6.869715348178317e-06, "loss": 0.6786, "step": 15518 }, { "epoch": 0.06870158041524636, "grad_norm": 2.471572914111492, "learning_rate": 6.870158041524636e-06, "loss": 0.8073, "step": 15519 }, { "epoch": 0.06870600734870955, "grad_norm": 2.3877511284609283, "learning_rate": 6.870600734870956e-06, "loss": 0.7039, "step": 15520 }, { "epoch": 0.06871043428217274, "grad_norm": 2.7119902664620392, "learning_rate": 6.8710434282172745e-06, "loss": 0.7214, "step": 15521 }, { "epoch": 0.06871486121563593, "grad_norm": 2.6774867447928314, "learning_rate": 6.871486121563593e-06, "loss": 0.619, "step": 15522 }, { "epoch": 0.06871928814909913, "grad_norm": 3.016604299787738, "learning_rate": 6.871928814909913e-06, "loss": 0.7601, "step": 15523 }, { "epoch": 0.06872371508256231, "grad_norm": 2.625149869815563, "learning_rate": 6.872371508256232e-06, "loss": 0.4844, "step": 15524 }, { "epoch": 0.06872814201602549, "grad_norm": 2.5369475262876087, "learning_rate": 6.87281420160255e-06, "loss": 0.7624, "step": 15525 }, { "epoch": 0.06873256894948869, "grad_norm": 2.3151654650913316, "learning_rate": 6.87325689494887e-06, "loss": 0.6823, "step": 15526 }, { "epoch": 0.06873699588295187, "grad_norm": 2.585508497164255, "learning_rate": 6.873699588295188e-06, "loss": 0.9168, "step": 15527 }, { "epoch": 0.06874142281641507, "grad_norm": 2.3042963799318095, "learning_rate": 6.874142281641507e-06, "loss": 0.7727, "step": 15528 }, { "epoch": 0.06874584974987825, "grad_norm": 2.7874468873935365, "learning_rate": 6.874584974987827e-06, "loss": 0.7436, "step": 15529 }, { "epoch": 0.06875027668334145, "grad_norm": 2.8710692400570816, "learning_rate": 6.875027668334145e-06, "loss": 0.6645, "step": 15530 }, { "epoch": 0.06875470361680464, "grad_norm": 2.532048210079438, "learning_rate": 6.875470361680464e-06, "loss": 0.7506, "step": 15531 }, { "epoch": 0.06875913055026783, "grad_norm": 2.6015969785603064, "learning_rate": 6.875913055026784e-06, "loss": 0.7749, "step": 15532 }, { "epoch": 0.06876355748373102, "grad_norm": 2.9221983510844622, "learning_rate": 6.876355748373103e-06, "loss": 1.0665, "step": 15533 }, { "epoch": 0.06876798441719421, "grad_norm": 2.4512502965497265, "learning_rate": 6.876798441719421e-06, "loss": 0.8244, "step": 15534 }, { "epoch": 0.0687724113506574, "grad_norm": 3.193392018172672, "learning_rate": 6.8772411350657406e-06, "loss": 1.1618, "step": 15535 }, { "epoch": 0.0687768382841206, "grad_norm": 2.1108831762695486, "learning_rate": 6.877683828412059e-06, "loss": 0.5982, "step": 15536 }, { "epoch": 0.06878126521758378, "grad_norm": 2.2343065695712583, "learning_rate": 6.8781265217583785e-06, "loss": 0.5193, "step": 15537 }, { "epoch": 0.06878569215104698, "grad_norm": 2.5733238215791996, "learning_rate": 6.878569215104698e-06, "loss": 0.9208, "step": 15538 }, { "epoch": 0.06879011908451016, "grad_norm": 2.587365506118292, "learning_rate": 6.879011908451016e-06, "loss": 0.4315, "step": 15539 }, { "epoch": 0.06879454601797334, "grad_norm": 2.3681230760360985, "learning_rate": 6.879454601797335e-06, "loss": 0.5256, "step": 15540 }, { "epoch": 0.06879897295143654, "grad_norm": 2.322369935210308, "learning_rate": 6.879897295143655e-06, "loss": 0.6871, "step": 15541 }, { "epoch": 0.06880339988489972, "grad_norm": 2.609304680812556, "learning_rate": 6.880339988489974e-06, "loss": 0.6935, "step": 15542 }, { "epoch": 0.06880782681836292, "grad_norm": 3.0724746934099745, "learning_rate": 6.880782681836292e-06, "loss": 0.8191, "step": 15543 }, { "epoch": 0.0688122537518261, "grad_norm": 2.365865812891877, "learning_rate": 6.8812253751826115e-06, "loss": 0.6882, "step": 15544 }, { "epoch": 0.0688166806852893, "grad_norm": 2.8572103055288274, "learning_rate": 6.881668068528931e-06, "loss": 0.6151, "step": 15545 }, { "epoch": 0.06882110761875249, "grad_norm": 2.84050847217659, "learning_rate": 6.882110761875249e-06, "loss": 0.6888, "step": 15546 }, { "epoch": 0.06882553455221568, "grad_norm": 2.9992364426172142, "learning_rate": 6.882553455221569e-06, "loss": 0.7161, "step": 15547 }, { "epoch": 0.06882996148567887, "grad_norm": 3.3054754381084455, "learning_rate": 6.882996148567887e-06, "loss": 0.7094, "step": 15548 }, { "epoch": 0.06883438841914206, "grad_norm": 2.8127605452819835, "learning_rate": 6.883438841914206e-06, "loss": 0.7271, "step": 15549 }, { "epoch": 0.06883881535260525, "grad_norm": 3.093522251598442, "learning_rate": 6.883881535260526e-06, "loss": 1.0519, "step": 15550 }, { "epoch": 0.06884324228606845, "grad_norm": 3.130666337912238, "learning_rate": 6.884324228606845e-06, "loss": 0.5974, "step": 15551 }, { "epoch": 0.06884766921953163, "grad_norm": 3.2253376845027013, "learning_rate": 6.884766921953163e-06, "loss": 0.9151, "step": 15552 }, { "epoch": 0.06885209615299483, "grad_norm": 3.583526463405488, "learning_rate": 6.885209615299483e-06, "loss": 0.4813, "step": 15553 }, { "epoch": 0.06885652308645801, "grad_norm": 2.453426758491046, "learning_rate": 6.885652308645802e-06, "loss": 0.4336, "step": 15554 }, { "epoch": 0.0688609500199212, "grad_norm": 2.852222377200577, "learning_rate": 6.88609500199212e-06, "loss": 0.6213, "step": 15555 }, { "epoch": 0.06886537695338439, "grad_norm": 2.3846404972779265, "learning_rate": 6.88653769533844e-06, "loss": 0.6446, "step": 15556 }, { "epoch": 0.06886980388684757, "grad_norm": 2.863067699138449, "learning_rate": 6.886980388684758e-06, "loss": 0.9987, "step": 15557 }, { "epoch": 0.06887423082031077, "grad_norm": 3.0025492872477537, "learning_rate": 6.887423082031078e-06, "loss": 0.8436, "step": 15558 }, { "epoch": 0.06887865775377396, "grad_norm": 2.7146238305124504, "learning_rate": 6.887865775377397e-06, "loss": 0.7308, "step": 15559 }, { "epoch": 0.06888308468723715, "grad_norm": 2.589837931999089, "learning_rate": 6.8883084687237155e-06, "loss": 0.4645, "step": 15560 }, { "epoch": 0.06888751162070034, "grad_norm": 2.827003901442118, "learning_rate": 6.888751162070034e-06, "loss": 0.7666, "step": 15561 }, { "epoch": 0.06889193855416353, "grad_norm": 2.9145825553376135, "learning_rate": 6.889193855416354e-06, "loss": 0.9572, "step": 15562 }, { "epoch": 0.06889636548762672, "grad_norm": 3.1616303199227587, "learning_rate": 6.889636548762673e-06, "loss": 0.9509, "step": 15563 }, { "epoch": 0.06890079242108992, "grad_norm": 2.855428285818454, "learning_rate": 6.890079242108991e-06, "loss": 0.8938, "step": 15564 }, { "epoch": 0.0689052193545531, "grad_norm": 3.4485001725203457, "learning_rate": 6.890521935455311e-06, "loss": 1.1141, "step": 15565 }, { "epoch": 0.0689096462880163, "grad_norm": 3.695676664674355, "learning_rate": 6.89096462880163e-06, "loss": 1.1016, "step": 15566 }, { "epoch": 0.06891407322147948, "grad_norm": 2.37910534118074, "learning_rate": 6.891407322147949e-06, "loss": 0.705, "step": 15567 }, { "epoch": 0.06891850015494268, "grad_norm": 2.695648077925848, "learning_rate": 6.891850015494268e-06, "loss": 0.8557, "step": 15568 }, { "epoch": 0.06892292708840586, "grad_norm": 2.6163978317115957, "learning_rate": 6.8922927088405865e-06, "loss": 0.7011, "step": 15569 }, { "epoch": 0.06892735402186904, "grad_norm": 2.489278517685647, "learning_rate": 6.892735402186905e-06, "loss": 0.8466, "step": 15570 }, { "epoch": 0.06893178095533224, "grad_norm": 3.182769314625825, "learning_rate": 6.893178095533225e-06, "loss": 0.6753, "step": 15571 }, { "epoch": 0.06893620788879543, "grad_norm": 3.0506508666441814, "learning_rate": 6.893620788879544e-06, "loss": 0.5909, "step": 15572 }, { "epoch": 0.06894063482225862, "grad_norm": 2.8827689402190764, "learning_rate": 6.894063482225862e-06, "loss": 0.8403, "step": 15573 }, { "epoch": 0.0689450617557218, "grad_norm": 3.0199952106582773, "learning_rate": 6.894506175572182e-06, "loss": 1.0751, "step": 15574 }, { "epoch": 0.068949488689185, "grad_norm": 2.997391909628922, "learning_rate": 6.894948868918501e-06, "loss": 1.0379, "step": 15575 }, { "epoch": 0.06895391562264819, "grad_norm": 3.893588659834685, "learning_rate": 6.8953915622648195e-06, "loss": 1.3609, "step": 15576 }, { "epoch": 0.06895834255611138, "grad_norm": 2.999307182048566, "learning_rate": 6.895834255611139e-06, "loss": 0.906, "step": 15577 }, { "epoch": 0.06896276948957457, "grad_norm": 3.2288803076169303, "learning_rate": 6.896276948957457e-06, "loss": 0.9764, "step": 15578 }, { "epoch": 0.06896719642303777, "grad_norm": 2.855514505804366, "learning_rate": 6.896719642303776e-06, "loss": 0.8829, "step": 15579 }, { "epoch": 0.06897162335650095, "grad_norm": 2.500987763531765, "learning_rate": 6.897162335650096e-06, "loss": 0.5925, "step": 15580 }, { "epoch": 0.06897605028996415, "grad_norm": 2.57771747980557, "learning_rate": 6.897605028996415e-06, "loss": 0.6486, "step": 15581 }, { "epoch": 0.06898047722342733, "grad_norm": 3.447587766484613, "learning_rate": 6.898047722342733e-06, "loss": 0.6031, "step": 15582 }, { "epoch": 0.06898490415689053, "grad_norm": 2.5551512644622574, "learning_rate": 6.8984904156890534e-06, "loss": 0.6149, "step": 15583 }, { "epoch": 0.06898933109035371, "grad_norm": 3.044348112390182, "learning_rate": 6.898933109035372e-06, "loss": 0.8601, "step": 15584 }, { "epoch": 0.0689937580238169, "grad_norm": 2.681112787937439, "learning_rate": 6.8993758023816905e-06, "loss": 0.6638, "step": 15585 }, { "epoch": 0.06899818495728009, "grad_norm": 2.8937799004791036, "learning_rate": 6.89981849572801e-06, "loss": 0.9798, "step": 15586 }, { "epoch": 0.06900261189074328, "grad_norm": 2.6778567505299407, "learning_rate": 6.900261189074328e-06, "loss": 0.8338, "step": 15587 }, { "epoch": 0.06900703882420647, "grad_norm": 2.4983997286492876, "learning_rate": 6.900703882420648e-06, "loss": 0.7153, "step": 15588 }, { "epoch": 0.06901146575766966, "grad_norm": 2.516752551089477, "learning_rate": 6.901146575766967e-06, "loss": 0.5419, "step": 15589 }, { "epoch": 0.06901589269113285, "grad_norm": 2.9021984728651025, "learning_rate": 6.901589269113286e-06, "loss": 0.9691, "step": 15590 }, { "epoch": 0.06902031962459604, "grad_norm": 2.3572256602093264, "learning_rate": 6.902031962459604e-06, "loss": 0.7645, "step": 15591 }, { "epoch": 0.06902474655805924, "grad_norm": 3.2495637810622884, "learning_rate": 6.902474655805924e-06, "loss": 0.6665, "step": 15592 }, { "epoch": 0.06902917349152242, "grad_norm": 3.5516767459114407, "learning_rate": 6.902917349152243e-06, "loss": 0.9392, "step": 15593 }, { "epoch": 0.06903360042498562, "grad_norm": 3.157251731077479, "learning_rate": 6.903360042498561e-06, "loss": 0.4755, "step": 15594 }, { "epoch": 0.0690380273584488, "grad_norm": 3.5518316388879003, "learning_rate": 6.903802735844881e-06, "loss": 1.2952, "step": 15595 }, { "epoch": 0.069042454291912, "grad_norm": 2.7934075700214134, "learning_rate": 6.9042454291912e-06, "loss": 0.8187, "step": 15596 }, { "epoch": 0.06904688122537518, "grad_norm": 2.528473555916883, "learning_rate": 6.904688122537519e-06, "loss": 0.4499, "step": 15597 }, { "epoch": 0.06905130815883838, "grad_norm": 2.55450558080836, "learning_rate": 6.905130815883838e-06, "loss": 0.792, "step": 15598 }, { "epoch": 0.06905573509230156, "grad_norm": 3.742966304192584, "learning_rate": 6.905573509230157e-06, "loss": 0.8379, "step": 15599 }, { "epoch": 0.06906016202576475, "grad_norm": 3.0069998222973733, "learning_rate": 6.906016202576475e-06, "loss": 0.6712, "step": 15600 }, { "epoch": 0.06906458895922794, "grad_norm": 3.4059946863411548, "learning_rate": 6.906458895922795e-06, "loss": 0.8867, "step": 15601 }, { "epoch": 0.06906901589269113, "grad_norm": 2.608012068401601, "learning_rate": 6.906901589269114e-06, "loss": 0.7293, "step": 15602 }, { "epoch": 0.06907344282615432, "grad_norm": 2.816380972964826, "learning_rate": 6.907344282615432e-06, "loss": 0.6508, "step": 15603 }, { "epoch": 0.06907786975961751, "grad_norm": 2.629114808172462, "learning_rate": 6.907786975961753e-06, "loss": 0.8526, "step": 15604 }, { "epoch": 0.0690822966930807, "grad_norm": 2.1971535346090545, "learning_rate": 6.908229669308071e-06, "loss": 0.4817, "step": 15605 }, { "epoch": 0.06908672362654389, "grad_norm": 2.5737917864492013, "learning_rate": 6.90867236265439e-06, "loss": 0.7416, "step": 15606 }, { "epoch": 0.06909115056000709, "grad_norm": 2.557946697279825, "learning_rate": 6.909115056000709e-06, "loss": 0.8914, "step": 15607 }, { "epoch": 0.06909557749347027, "grad_norm": 3.350649121374687, "learning_rate": 6.9095577493470275e-06, "loss": 0.8774, "step": 15608 }, { "epoch": 0.06910000442693347, "grad_norm": 2.558186306948306, "learning_rate": 6.910000442693346e-06, "loss": 0.8176, "step": 15609 }, { "epoch": 0.06910443136039665, "grad_norm": 2.4169803226993585, "learning_rate": 6.910443136039666e-06, "loss": 0.5665, "step": 15610 }, { "epoch": 0.06910885829385985, "grad_norm": 2.3849199760501674, "learning_rate": 6.910885829385985e-06, "loss": 0.7882, "step": 15611 }, { "epoch": 0.06911328522732303, "grad_norm": 3.2443087013901906, "learning_rate": 6.911328522732303e-06, "loss": 0.8685, "step": 15612 }, { "epoch": 0.06911771216078623, "grad_norm": 2.2950258860022212, "learning_rate": 6.9117712160786235e-06, "loss": 0.4155, "step": 15613 }, { "epoch": 0.06912213909424941, "grad_norm": 2.3757907783306957, "learning_rate": 6.912213909424942e-06, "loss": 0.4903, "step": 15614 }, { "epoch": 0.06912656602771261, "grad_norm": 2.5244465497650594, "learning_rate": 6.912656602771261e-06, "loss": 0.8433, "step": 15615 }, { "epoch": 0.0691309929611758, "grad_norm": 3.595359656853907, "learning_rate": 6.91309929611758e-06, "loss": 1.1, "step": 15616 }, { "epoch": 0.06913541989463898, "grad_norm": 2.435022322460868, "learning_rate": 6.9135419894638985e-06, "loss": 0.821, "step": 15617 }, { "epoch": 0.06913984682810217, "grad_norm": 2.8771398549233944, "learning_rate": 6.913984682810218e-06, "loss": 0.858, "step": 15618 }, { "epoch": 0.06914427376156536, "grad_norm": 3.058324303865335, "learning_rate": 6.914427376156537e-06, "loss": 0.7702, "step": 15619 }, { "epoch": 0.06914870069502856, "grad_norm": 3.115778074537563, "learning_rate": 6.914870069502856e-06, "loss": 0.6827, "step": 15620 }, { "epoch": 0.06915312762849174, "grad_norm": 2.728302149229495, "learning_rate": 6.915312762849174e-06, "loss": 0.6539, "step": 15621 }, { "epoch": 0.06915755456195494, "grad_norm": 2.5113935015131683, "learning_rate": 6.9157554561954945e-06, "loss": 0.7689, "step": 15622 }, { "epoch": 0.06916198149541812, "grad_norm": 2.417465703349334, "learning_rate": 6.916198149541813e-06, "loss": 0.5558, "step": 15623 }, { "epoch": 0.06916640842888132, "grad_norm": 3.3078418454091354, "learning_rate": 6.9166408428881315e-06, "loss": 1.1335, "step": 15624 }, { "epoch": 0.0691708353623445, "grad_norm": 3.4288285453845004, "learning_rate": 6.917083536234451e-06, "loss": 1.1762, "step": 15625 }, { "epoch": 0.0691752622958077, "grad_norm": 2.479935013030923, "learning_rate": 6.91752622958077e-06, "loss": 0.6703, "step": 15626 }, { "epoch": 0.06917968922927088, "grad_norm": 2.75721824081117, "learning_rate": 6.917968922927089e-06, "loss": 0.673, "step": 15627 }, { "epoch": 0.06918411616273408, "grad_norm": 3.187797704127624, "learning_rate": 6.918411616273408e-06, "loss": 0.8487, "step": 15628 }, { "epoch": 0.06918854309619726, "grad_norm": 3.045420177359483, "learning_rate": 6.918854309619727e-06, "loss": 0.5755, "step": 15629 }, { "epoch": 0.06919297002966046, "grad_norm": 2.557913711643754, "learning_rate": 6.919297002966045e-06, "loss": 0.7595, "step": 15630 }, { "epoch": 0.06919739696312364, "grad_norm": 2.952074777117013, "learning_rate": 6.9197396963123654e-06, "loss": 0.8068, "step": 15631 }, { "epoch": 0.06920182389658683, "grad_norm": 3.0000800955288534, "learning_rate": 6.920182389658684e-06, "loss": 0.6785, "step": 15632 }, { "epoch": 0.06920625083005003, "grad_norm": 2.965253099773802, "learning_rate": 6.9206250830050025e-06, "loss": 0.7443, "step": 15633 }, { "epoch": 0.06921067776351321, "grad_norm": 3.575150782098648, "learning_rate": 6.921067776351323e-06, "loss": 0.4393, "step": 15634 }, { "epoch": 0.0692151046969764, "grad_norm": 2.385562631480492, "learning_rate": 6.921510469697641e-06, "loss": 0.5823, "step": 15635 }, { "epoch": 0.06921953163043959, "grad_norm": 2.901427364045454, "learning_rate": 6.92195316304396e-06, "loss": 0.8655, "step": 15636 }, { "epoch": 0.06922395856390279, "grad_norm": 2.351620737976244, "learning_rate": 6.922395856390279e-06, "loss": 0.6775, "step": 15637 }, { "epoch": 0.06922838549736597, "grad_norm": 2.413931852583837, "learning_rate": 6.922838549736598e-06, "loss": 0.5206, "step": 15638 }, { "epoch": 0.06923281243082917, "grad_norm": 2.5614197101638556, "learning_rate": 6.923281243082917e-06, "loss": 0.7873, "step": 15639 }, { "epoch": 0.06923723936429235, "grad_norm": 3.572937660475933, "learning_rate": 6.923723936429236e-06, "loss": 1.1089, "step": 15640 }, { "epoch": 0.06924166629775555, "grad_norm": 2.41333516465543, "learning_rate": 6.924166629775555e-06, "loss": 0.4965, "step": 15641 }, { "epoch": 0.06924609323121873, "grad_norm": 2.497513182292201, "learning_rate": 6.924609323121873e-06, "loss": 0.6918, "step": 15642 }, { "epoch": 0.06925052016468193, "grad_norm": 2.3207338360038383, "learning_rate": 6.925052016468194e-06, "loss": 0.7055, "step": 15643 }, { "epoch": 0.06925494709814511, "grad_norm": 3.0058523137575315, "learning_rate": 6.925494709814512e-06, "loss": 0.829, "step": 15644 }, { "epoch": 0.06925937403160831, "grad_norm": 3.2297337553144714, "learning_rate": 6.925937403160831e-06, "loss": 0.656, "step": 15645 }, { "epoch": 0.0692638009650715, "grad_norm": 3.132392780904886, "learning_rate": 6.92638009650715e-06, "loss": 1.004, "step": 15646 }, { "epoch": 0.06926822789853468, "grad_norm": 2.7067123987942017, "learning_rate": 6.9268227898534694e-06, "loss": 0.5811, "step": 15647 }, { "epoch": 0.06927265483199788, "grad_norm": 2.9066223216676264, "learning_rate": 6.927265483199788e-06, "loss": 0.7149, "step": 15648 }, { "epoch": 0.06927708176546106, "grad_norm": 3.375126434492991, "learning_rate": 6.927708176546107e-06, "loss": 1.1062, "step": 15649 }, { "epoch": 0.06928150869892426, "grad_norm": 2.303390720157244, "learning_rate": 6.928150869892426e-06, "loss": 0.8893, "step": 15650 }, { "epoch": 0.06928593563238744, "grad_norm": 2.4877745950573837, "learning_rate": 6.928593563238744e-06, "loss": 0.7716, "step": 15651 }, { "epoch": 0.06929036256585064, "grad_norm": 2.421653189197085, "learning_rate": 6.929036256585065e-06, "loss": 0.5676, "step": 15652 }, { "epoch": 0.06929478949931382, "grad_norm": 2.5613673972027473, "learning_rate": 6.929478949931383e-06, "loss": 0.782, "step": 15653 }, { "epoch": 0.06929921643277702, "grad_norm": 2.9358130081989313, "learning_rate": 6.929921643277702e-06, "loss": 0.9234, "step": 15654 }, { "epoch": 0.0693036433662402, "grad_norm": 3.098967840312425, "learning_rate": 6.930364336624021e-06, "loss": 0.8812, "step": 15655 }, { "epoch": 0.0693080702997034, "grad_norm": 3.2277592011167857, "learning_rate": 6.93080702997034e-06, "loss": 1.1302, "step": 15656 }, { "epoch": 0.06931249723316658, "grad_norm": 2.383818870462696, "learning_rate": 6.931249723316659e-06, "loss": 0.7248, "step": 15657 }, { "epoch": 0.06931692416662978, "grad_norm": 2.487717424715348, "learning_rate": 6.931692416662978e-06, "loss": 0.5611, "step": 15658 }, { "epoch": 0.06932135110009296, "grad_norm": 2.677695073652099, "learning_rate": 6.932135110009297e-06, "loss": 0.9261, "step": 15659 }, { "epoch": 0.06932577803355616, "grad_norm": 2.596626271915246, "learning_rate": 6.932577803355615e-06, "loss": 0.7411, "step": 15660 }, { "epoch": 0.06933020496701935, "grad_norm": 2.7809313452113598, "learning_rate": 6.9330204967019355e-06, "loss": 0.8043, "step": 15661 }, { "epoch": 0.06933463190048253, "grad_norm": 3.4495552916503653, "learning_rate": 6.933463190048254e-06, "loss": 0.6965, "step": 15662 }, { "epoch": 0.06933905883394573, "grad_norm": 2.666829330075688, "learning_rate": 6.933905883394573e-06, "loss": 0.5652, "step": 15663 }, { "epoch": 0.06934348576740891, "grad_norm": 2.5245021406957764, "learning_rate": 6.934348576740893e-06, "loss": 0.7528, "step": 15664 }, { "epoch": 0.06934791270087211, "grad_norm": 2.2806226067749056, "learning_rate": 6.934791270087211e-06, "loss": 0.4663, "step": 15665 }, { "epoch": 0.06935233963433529, "grad_norm": 3.004531935079455, "learning_rate": 6.93523396343353e-06, "loss": 0.8667, "step": 15666 }, { "epoch": 0.06935676656779849, "grad_norm": 2.7939511632646217, "learning_rate": 6.935676656779849e-06, "loss": 0.8401, "step": 15667 }, { "epoch": 0.06936119350126167, "grad_norm": 2.6942974663408097, "learning_rate": 6.936119350126168e-06, "loss": 0.6512, "step": 15668 }, { "epoch": 0.06936562043472487, "grad_norm": 3.236924553862356, "learning_rate": 6.936562043472487e-06, "loss": 1.0553, "step": 15669 }, { "epoch": 0.06937004736818805, "grad_norm": 3.0105997959195383, "learning_rate": 6.9370047368188065e-06, "loss": 0.7972, "step": 15670 }, { "epoch": 0.06937447430165125, "grad_norm": 2.539727247709228, "learning_rate": 6.937447430165125e-06, "loss": 0.456, "step": 15671 }, { "epoch": 0.06937890123511443, "grad_norm": 2.4095262693408066, "learning_rate": 6.9378901235114435e-06, "loss": 0.8617, "step": 15672 }, { "epoch": 0.06938332816857763, "grad_norm": 2.696214436599603, "learning_rate": 6.938332816857764e-06, "loss": 0.8159, "step": 15673 }, { "epoch": 0.06938775510204082, "grad_norm": 3.004574019463995, "learning_rate": 6.938775510204082e-06, "loss": 1.0276, "step": 15674 }, { "epoch": 0.06939218203550401, "grad_norm": 2.6428356765650043, "learning_rate": 6.939218203550401e-06, "loss": 0.4851, "step": 15675 }, { "epoch": 0.0693966089689672, "grad_norm": 3.0367859653200266, "learning_rate": 6.93966089689672e-06, "loss": 0.8814, "step": 15676 }, { "epoch": 0.06940103590243038, "grad_norm": 2.744574956296657, "learning_rate": 6.9401035902430395e-06, "loss": 0.3711, "step": 15677 }, { "epoch": 0.06940546283589358, "grad_norm": 2.7152085698415505, "learning_rate": 6.940546283589358e-06, "loss": 0.6819, "step": 15678 }, { "epoch": 0.06940988976935676, "grad_norm": 2.7821630664967594, "learning_rate": 6.9409889769356774e-06, "loss": 0.6961, "step": 15679 }, { "epoch": 0.06941431670281996, "grad_norm": 2.4453845434901607, "learning_rate": 6.941431670281996e-06, "loss": 0.612, "step": 15680 }, { "epoch": 0.06941874363628314, "grad_norm": 2.6280031158386032, "learning_rate": 6.9418743636283145e-06, "loss": 0.9079, "step": 15681 }, { "epoch": 0.06942317056974634, "grad_norm": 3.427510854112777, "learning_rate": 6.942317056974635e-06, "loss": 0.7696, "step": 15682 }, { "epoch": 0.06942759750320952, "grad_norm": 3.1388943567281684, "learning_rate": 6.942759750320953e-06, "loss": 1.2641, "step": 15683 }, { "epoch": 0.06943202443667272, "grad_norm": 2.3072280010360866, "learning_rate": 6.943202443667272e-06, "loss": 0.6038, "step": 15684 }, { "epoch": 0.0694364513701359, "grad_norm": 2.433050433050042, "learning_rate": 6.943645137013592e-06, "loss": 0.6595, "step": 15685 }, { "epoch": 0.0694408783035991, "grad_norm": 3.0007436922683466, "learning_rate": 6.9440878303599105e-06, "loss": 0.8592, "step": 15686 }, { "epoch": 0.06944530523706229, "grad_norm": 3.404889107356247, "learning_rate": 6.944530523706229e-06, "loss": 0.8676, "step": 15687 }, { "epoch": 0.06944973217052548, "grad_norm": 2.822201441812203, "learning_rate": 6.944973217052548e-06, "loss": 0.6928, "step": 15688 }, { "epoch": 0.06945415910398867, "grad_norm": 3.066679186442157, "learning_rate": 6.945415910398867e-06, "loss": 0.7702, "step": 15689 }, { "epoch": 0.06945858603745186, "grad_norm": 2.823611811663492, "learning_rate": 6.945858603745185e-06, "loss": 1.0471, "step": 15690 }, { "epoch": 0.06946301297091505, "grad_norm": 3.7776232379793773, "learning_rate": 6.946301297091506e-06, "loss": 1.2344, "step": 15691 }, { "epoch": 0.06946743990437823, "grad_norm": 3.1315937440454333, "learning_rate": 6.946743990437824e-06, "loss": 0.6373, "step": 15692 }, { "epoch": 0.06947186683784143, "grad_norm": 2.245053522405842, "learning_rate": 6.947186683784143e-06, "loss": 0.5935, "step": 15693 }, { "epoch": 0.06947629377130461, "grad_norm": 2.462267804893701, "learning_rate": 6.947629377130463e-06, "loss": 0.4554, "step": 15694 }, { "epoch": 0.06948072070476781, "grad_norm": 2.452939687282845, "learning_rate": 6.9480720704767814e-06, "loss": 0.6063, "step": 15695 }, { "epoch": 0.06948514763823099, "grad_norm": 3.014154983550404, "learning_rate": 6.9485147638231e-06, "loss": 0.8113, "step": 15696 }, { "epoch": 0.06948957457169419, "grad_norm": 2.3250067791343976, "learning_rate": 6.948957457169419e-06, "loss": 0.6171, "step": 15697 }, { "epoch": 0.06949400150515737, "grad_norm": 3.772380900925216, "learning_rate": 6.949400150515738e-06, "loss": 0.6518, "step": 15698 }, { "epoch": 0.06949842843862057, "grad_norm": 2.6798766294234526, "learning_rate": 6.949842843862057e-06, "loss": 0.5349, "step": 15699 }, { "epoch": 0.06950285537208375, "grad_norm": 2.6714608856086293, "learning_rate": 6.950285537208377e-06, "loss": 0.6707, "step": 15700 }, { "epoch": 0.06950728230554695, "grad_norm": 2.500122079159822, "learning_rate": 6.950728230554695e-06, "loss": 0.6729, "step": 15701 }, { "epoch": 0.06951170923901014, "grad_norm": 3.3207168210228124, "learning_rate": 6.951170923901014e-06, "loss": 0.703, "step": 15702 }, { "epoch": 0.06951613617247333, "grad_norm": 2.8671762996227503, "learning_rate": 6.951613617247334e-06, "loss": 0.7184, "step": 15703 }, { "epoch": 0.06952056310593652, "grad_norm": 2.6647503996790114, "learning_rate": 6.952056310593652e-06, "loss": 0.6447, "step": 15704 }, { "epoch": 0.06952499003939971, "grad_norm": 3.530529581809045, "learning_rate": 6.952499003939971e-06, "loss": 0.8524, "step": 15705 }, { "epoch": 0.0695294169728629, "grad_norm": 3.115487342111783, "learning_rate": 6.95294169728629e-06, "loss": 0.6929, "step": 15706 }, { "epoch": 0.06953384390632608, "grad_norm": 2.4839981376856497, "learning_rate": 6.95338439063261e-06, "loss": 0.6179, "step": 15707 }, { "epoch": 0.06953827083978928, "grad_norm": 2.2642238467994193, "learning_rate": 6.953827083978928e-06, "loss": 0.7992, "step": 15708 }, { "epoch": 0.06954269777325246, "grad_norm": 2.2164797021532396, "learning_rate": 6.9542697773252475e-06, "loss": 0.5842, "step": 15709 }, { "epoch": 0.06954712470671566, "grad_norm": 2.844767958112665, "learning_rate": 6.954712470671566e-06, "loss": 0.9813, "step": 15710 }, { "epoch": 0.06955155164017884, "grad_norm": 2.4820894128022157, "learning_rate": 6.955155164017885e-06, "loss": 0.7933, "step": 15711 }, { "epoch": 0.06955597857364204, "grad_norm": 2.7659052520273826, "learning_rate": 6.955597857364205e-06, "loss": 0.9907, "step": 15712 }, { "epoch": 0.06956040550710522, "grad_norm": 2.8097908164972702, "learning_rate": 6.956040550710523e-06, "loss": 0.828, "step": 15713 }, { "epoch": 0.06956483244056842, "grad_norm": 2.6810073914809105, "learning_rate": 6.956483244056842e-06, "loss": 0.877, "step": 15714 }, { "epoch": 0.0695692593740316, "grad_norm": 2.658937599085859, "learning_rate": 6.956925937403162e-06, "loss": 0.7888, "step": 15715 }, { "epoch": 0.0695736863074948, "grad_norm": 2.9440454630444335, "learning_rate": 6.957368630749481e-06, "loss": 0.8353, "step": 15716 }, { "epoch": 0.06957811324095799, "grad_norm": 2.5248684223611093, "learning_rate": 6.957811324095799e-06, "loss": 0.7672, "step": 15717 }, { "epoch": 0.06958254017442118, "grad_norm": 2.6473586469380685, "learning_rate": 6.9582540174421185e-06, "loss": 0.536, "step": 15718 }, { "epoch": 0.06958696710788437, "grad_norm": 2.569923217179035, "learning_rate": 6.958696710788437e-06, "loss": 0.7296, "step": 15719 }, { "epoch": 0.06959139404134757, "grad_norm": 2.360516999579115, "learning_rate": 6.959139404134756e-06, "loss": 0.5739, "step": 15720 }, { "epoch": 0.06959582097481075, "grad_norm": 3.6759987959247447, "learning_rate": 6.959582097481076e-06, "loss": 1.0595, "step": 15721 }, { "epoch": 0.06960024790827393, "grad_norm": 3.259789215083389, "learning_rate": 6.960024790827394e-06, "loss": 1.0738, "step": 15722 }, { "epoch": 0.06960467484173713, "grad_norm": 2.732406710760109, "learning_rate": 6.960467484173713e-06, "loss": 0.8135, "step": 15723 }, { "epoch": 0.06960910177520031, "grad_norm": 2.466950629395193, "learning_rate": 6.960910177520033e-06, "loss": 0.6986, "step": 15724 }, { "epoch": 0.06961352870866351, "grad_norm": 2.5802357164149554, "learning_rate": 6.9613528708663515e-06, "loss": 0.7374, "step": 15725 }, { "epoch": 0.0696179556421267, "grad_norm": 3.206091624739501, "learning_rate": 6.96179556421267e-06, "loss": 0.712, "step": 15726 }, { "epoch": 0.06962238257558989, "grad_norm": 3.2620045164095512, "learning_rate": 6.9622382575589894e-06, "loss": 0.8696, "step": 15727 }, { "epoch": 0.06962680950905308, "grad_norm": 3.0490225588370747, "learning_rate": 6.962680950905309e-06, "loss": 1.1507, "step": 15728 }, { "epoch": 0.06963123644251627, "grad_norm": 2.7205409893842676, "learning_rate": 6.963123644251627e-06, "loss": 0.642, "step": 15729 }, { "epoch": 0.06963566337597946, "grad_norm": 3.145489380225621, "learning_rate": 6.963566337597947e-06, "loss": 1.0551, "step": 15730 }, { "epoch": 0.06964009030944265, "grad_norm": 2.1357832025687506, "learning_rate": 6.964009030944265e-06, "loss": 0.4601, "step": 15731 }, { "epoch": 0.06964451724290584, "grad_norm": 2.6494539697352653, "learning_rate": 6.964451724290584e-06, "loss": 0.7622, "step": 15732 }, { "epoch": 0.06964894417636903, "grad_norm": 2.3608180520357673, "learning_rate": 6.964894417636904e-06, "loss": 0.5307, "step": 15733 }, { "epoch": 0.06965337110983222, "grad_norm": 3.368641340915787, "learning_rate": 6.9653371109832225e-06, "loss": 0.6734, "step": 15734 }, { "epoch": 0.06965779804329542, "grad_norm": 2.8507616220494545, "learning_rate": 6.965779804329541e-06, "loss": 0.7849, "step": 15735 }, { "epoch": 0.0696622249767586, "grad_norm": 2.5445913534626095, "learning_rate": 6.96622249767586e-06, "loss": 0.4957, "step": 15736 }, { "epoch": 0.06966665191022178, "grad_norm": 2.9486426412577273, "learning_rate": 6.96666519102218e-06, "loss": 0.8151, "step": 15737 }, { "epoch": 0.06967107884368498, "grad_norm": 3.262312564166375, "learning_rate": 6.967107884368498e-06, "loss": 1.0696, "step": 15738 }, { "epoch": 0.06967550577714816, "grad_norm": 2.9037484984287256, "learning_rate": 6.967550577714818e-06, "loss": 0.8187, "step": 15739 }, { "epoch": 0.06967993271061136, "grad_norm": 2.6758584640450334, "learning_rate": 6.967993271061136e-06, "loss": 0.7341, "step": 15740 }, { "epoch": 0.06968435964407454, "grad_norm": 3.543130550095032, "learning_rate": 6.968435964407455e-06, "loss": 0.8271, "step": 15741 }, { "epoch": 0.06968878657753774, "grad_norm": 2.6507610811119817, "learning_rate": 6.968878657753775e-06, "loss": 0.8379, "step": 15742 }, { "epoch": 0.06969321351100093, "grad_norm": 2.5753113281272544, "learning_rate": 6.9693213511000934e-06, "loss": 0.6609, "step": 15743 }, { "epoch": 0.06969764044446412, "grad_norm": 3.0414248013690575, "learning_rate": 6.969764044446412e-06, "loss": 0.6339, "step": 15744 }, { "epoch": 0.06970206737792731, "grad_norm": 2.1639600262546708, "learning_rate": 6.970206737792732e-06, "loss": 0.6312, "step": 15745 }, { "epoch": 0.0697064943113905, "grad_norm": 2.468480035673877, "learning_rate": 6.970649431139051e-06, "loss": 0.6078, "step": 15746 }, { "epoch": 0.06971092124485369, "grad_norm": 2.317499388164784, "learning_rate": 6.971092124485369e-06, "loss": 0.6445, "step": 15747 }, { "epoch": 0.06971534817831689, "grad_norm": 2.792318656748945, "learning_rate": 6.971534817831689e-06, "loss": 0.7777, "step": 15748 }, { "epoch": 0.06971977511178007, "grad_norm": 3.098883120352903, "learning_rate": 6.971977511178007e-06, "loss": 0.8801, "step": 15749 }, { "epoch": 0.06972420204524327, "grad_norm": 2.372206712550983, "learning_rate": 6.9724202045243265e-06, "loss": 0.5901, "step": 15750 }, { "epoch": 0.06972862897870645, "grad_norm": 3.105510762388536, "learning_rate": 6.972862897870646e-06, "loss": 1.0218, "step": 15751 }, { "epoch": 0.06973305591216963, "grad_norm": 2.8114592262354146, "learning_rate": 6.973305591216964e-06, "loss": 0.7956, "step": 15752 }, { "epoch": 0.06973748284563283, "grad_norm": 3.0467660358895428, "learning_rate": 6.973748284563283e-06, "loss": 0.791, "step": 15753 }, { "epoch": 0.06974190977909601, "grad_norm": 2.3081055920251212, "learning_rate": 6.974190977909603e-06, "loss": 0.6381, "step": 15754 }, { "epoch": 0.06974633671255921, "grad_norm": 3.456428311185414, "learning_rate": 6.974633671255922e-06, "loss": 1.1374, "step": 15755 }, { "epoch": 0.0697507636460224, "grad_norm": 2.8009924694631207, "learning_rate": 6.97507636460224e-06, "loss": 1.007, "step": 15756 }, { "epoch": 0.0697551905794856, "grad_norm": 2.8657548796010586, "learning_rate": 6.9755190579485595e-06, "loss": 0.733, "step": 15757 }, { "epoch": 0.06975961751294878, "grad_norm": 2.6247072614452582, "learning_rate": 6.975961751294879e-06, "loss": 0.8373, "step": 15758 }, { "epoch": 0.06976404444641197, "grad_norm": 2.8729218318878305, "learning_rate": 6.9764044446411974e-06, "loss": 0.9379, "step": 15759 }, { "epoch": 0.06976847137987516, "grad_norm": 2.9440296402838304, "learning_rate": 6.976847137987517e-06, "loss": 0.6083, "step": 15760 }, { "epoch": 0.06977289831333836, "grad_norm": 2.513219976888391, "learning_rate": 6.977289831333835e-06, "loss": 0.6594, "step": 15761 }, { "epoch": 0.06977732524680154, "grad_norm": 2.714515382480393, "learning_rate": 6.977732524680154e-06, "loss": 0.9316, "step": 15762 }, { "epoch": 0.06978175218026474, "grad_norm": 2.088695666246931, "learning_rate": 6.978175218026474e-06, "loss": 0.566, "step": 15763 }, { "epoch": 0.06978617911372792, "grad_norm": 2.3168752061364004, "learning_rate": 6.978617911372793e-06, "loss": 0.5804, "step": 15764 }, { "epoch": 0.06979060604719112, "grad_norm": 3.53401947214919, "learning_rate": 6.979060604719111e-06, "loss": 1.0453, "step": 15765 }, { "epoch": 0.0697950329806543, "grad_norm": 2.585592468598627, "learning_rate": 6.979503298065431e-06, "loss": 0.715, "step": 15766 }, { "epoch": 0.06979945991411748, "grad_norm": 3.0386248194434944, "learning_rate": 6.97994599141175e-06, "loss": 1.041, "step": 15767 }, { "epoch": 0.06980388684758068, "grad_norm": 2.34495240641836, "learning_rate": 6.980388684758068e-06, "loss": 0.6844, "step": 15768 }, { "epoch": 0.06980831378104387, "grad_norm": 2.7357024432915398, "learning_rate": 6.980831378104388e-06, "loss": 0.7299, "step": 15769 }, { "epoch": 0.06981274071450706, "grad_norm": 2.6965108803536495, "learning_rate": 6.981274071450706e-06, "loss": 0.593, "step": 15770 }, { "epoch": 0.06981716764797025, "grad_norm": 2.2578341170027194, "learning_rate": 6.981716764797025e-06, "loss": 0.6425, "step": 15771 }, { "epoch": 0.06982159458143344, "grad_norm": 3.0257398549673145, "learning_rate": 6.982159458143345e-06, "loss": 0.6569, "step": 15772 }, { "epoch": 0.06982602151489663, "grad_norm": 3.2385659505980837, "learning_rate": 6.9826021514896635e-06, "loss": 0.9188, "step": 15773 }, { "epoch": 0.06983044844835982, "grad_norm": 3.486782675215948, "learning_rate": 6.983044844835982e-06, "loss": 1.0307, "step": 15774 }, { "epoch": 0.06983487538182301, "grad_norm": 2.8862956011272827, "learning_rate": 6.983487538182302e-06, "loss": 0.9324, "step": 15775 }, { "epoch": 0.0698393023152862, "grad_norm": 2.2227533287748003, "learning_rate": 6.983930231528621e-06, "loss": 0.6956, "step": 15776 }, { "epoch": 0.06984372924874939, "grad_norm": 2.868382754554352, "learning_rate": 6.984372924874939e-06, "loss": 1.0653, "step": 15777 }, { "epoch": 0.06984815618221259, "grad_norm": 2.7700811730385837, "learning_rate": 6.984815618221259e-06, "loss": 0.5727, "step": 15778 }, { "epoch": 0.06985258311567577, "grad_norm": 2.9921486229894123, "learning_rate": 6.985258311567577e-06, "loss": 0.5305, "step": 15779 }, { "epoch": 0.06985701004913897, "grad_norm": 3.4431888827683097, "learning_rate": 6.985701004913897e-06, "loss": 1.0929, "step": 15780 }, { "epoch": 0.06986143698260215, "grad_norm": 3.8278674636967907, "learning_rate": 6.986143698260216e-06, "loss": 1.0007, "step": 15781 }, { "epoch": 0.06986586391606533, "grad_norm": 2.383172462139982, "learning_rate": 6.9865863916065345e-06, "loss": 0.6746, "step": 15782 }, { "epoch": 0.06987029084952853, "grad_norm": 2.3900550082621708, "learning_rate": 6.987029084952853e-06, "loss": 0.6418, "step": 15783 }, { "epoch": 0.06987471778299172, "grad_norm": 2.833248215900534, "learning_rate": 6.987471778299173e-06, "loss": 0.6237, "step": 15784 }, { "epoch": 0.06987914471645491, "grad_norm": 2.4802494720876003, "learning_rate": 6.987914471645492e-06, "loss": 0.7427, "step": 15785 }, { "epoch": 0.0698835716499181, "grad_norm": 2.521942366912195, "learning_rate": 6.98835716499181e-06, "loss": 0.4829, "step": 15786 }, { "epoch": 0.0698879985833813, "grad_norm": 3.0982737506071696, "learning_rate": 6.98879985833813e-06, "loss": 1.1186, "step": 15787 }, { "epoch": 0.06989242551684448, "grad_norm": 2.5354437544056725, "learning_rate": 6.989242551684449e-06, "loss": 0.6721, "step": 15788 }, { "epoch": 0.06989685245030768, "grad_norm": 2.7408546669095504, "learning_rate": 6.9896852450307675e-06, "loss": 0.6503, "step": 15789 }, { "epoch": 0.06990127938377086, "grad_norm": 2.797345888232078, "learning_rate": 6.990127938377087e-06, "loss": 0.7124, "step": 15790 }, { "epoch": 0.06990570631723406, "grad_norm": 2.177280243775515, "learning_rate": 6.9905706317234054e-06, "loss": 0.6471, "step": 15791 }, { "epoch": 0.06991013325069724, "grad_norm": 2.7556565188170286, "learning_rate": 6.991013325069724e-06, "loss": 0.4939, "step": 15792 }, { "epoch": 0.06991456018416044, "grad_norm": 2.5267904168120037, "learning_rate": 6.991456018416044e-06, "loss": 0.61, "step": 15793 }, { "epoch": 0.06991898711762362, "grad_norm": 3.161328009231996, "learning_rate": 6.991898711762363e-06, "loss": 1.0735, "step": 15794 }, { "epoch": 0.06992341405108682, "grad_norm": 2.5616581306575625, "learning_rate": 6.992341405108681e-06, "loss": 0.6129, "step": 15795 }, { "epoch": 0.06992784098455, "grad_norm": 2.39345441542135, "learning_rate": 6.9927840984550014e-06, "loss": 0.4497, "step": 15796 }, { "epoch": 0.06993226791801319, "grad_norm": 2.6727630700307463, "learning_rate": 6.99322679180132e-06, "loss": 0.6432, "step": 15797 }, { "epoch": 0.06993669485147638, "grad_norm": 3.2644469260015034, "learning_rate": 6.9936694851476385e-06, "loss": 0.9902, "step": 15798 }, { "epoch": 0.06994112178493957, "grad_norm": 3.425638513015516, "learning_rate": 6.994112178493958e-06, "loss": 1.1854, "step": 15799 }, { "epoch": 0.06994554871840276, "grad_norm": 2.625812462439329, "learning_rate": 6.994554871840276e-06, "loss": 0.6088, "step": 15800 }, { "epoch": 0.06994997565186595, "grad_norm": 2.490787374279544, "learning_rate": 6.994997565186596e-06, "loss": 0.6431, "step": 15801 }, { "epoch": 0.06995440258532915, "grad_norm": 2.952404057109843, "learning_rate": 6.995440258532915e-06, "loss": 0.847, "step": 15802 }, { "epoch": 0.06995882951879233, "grad_norm": 3.2659229273431785, "learning_rate": 6.995882951879234e-06, "loss": 1.0774, "step": 15803 }, { "epoch": 0.06996325645225553, "grad_norm": 2.7959751497452423, "learning_rate": 6.996325645225552e-06, "loss": 0.8082, "step": 15804 }, { "epoch": 0.06996768338571871, "grad_norm": 2.749029182099439, "learning_rate": 6.996768338571872e-06, "loss": 0.7932, "step": 15805 }, { "epoch": 0.06997211031918191, "grad_norm": 2.22512923076529, "learning_rate": 6.997211031918191e-06, "loss": 0.5379, "step": 15806 }, { "epoch": 0.06997653725264509, "grad_norm": 3.4648986925787053, "learning_rate": 6.9976537252645094e-06, "loss": 1.1942, "step": 15807 }, { "epoch": 0.06998096418610829, "grad_norm": 2.3834345843861513, "learning_rate": 6.998096418610829e-06, "loss": 0.744, "step": 15808 }, { "epoch": 0.06998539111957147, "grad_norm": 2.5297498546783683, "learning_rate": 6.998539111957147e-06, "loss": 0.5914, "step": 15809 }, { "epoch": 0.06998981805303467, "grad_norm": 2.272072895557513, "learning_rate": 6.998981805303467e-06, "loss": 0.6618, "step": 15810 }, { "epoch": 0.06999424498649785, "grad_norm": 2.6929426113939328, "learning_rate": 6.999424498649786e-06, "loss": 0.8771, "step": 15811 }, { "epoch": 0.06999867191996104, "grad_norm": 3.178006806666365, "learning_rate": 6.999867191996105e-06, "loss": 0.7533, "step": 15812 }, { "epoch": 0.07000309885342423, "grad_norm": 2.8328260770705977, "learning_rate": 7.000309885342423e-06, "loss": 0.7439, "step": 15813 }, { "epoch": 0.07000752578688742, "grad_norm": 3.008852103212384, "learning_rate": 7.000752578688743e-06, "loss": 1.0742, "step": 15814 }, { "epoch": 0.07001195272035061, "grad_norm": 2.4154549880302616, "learning_rate": 7.001195272035062e-06, "loss": 0.4385, "step": 15815 }, { "epoch": 0.0700163796538138, "grad_norm": 2.9676411822300945, "learning_rate": 7.00163796538138e-06, "loss": 0.7596, "step": 15816 }, { "epoch": 0.070020806587277, "grad_norm": 2.6067237268176786, "learning_rate": 7.0020806587277e-06, "loss": 0.8043, "step": 15817 }, { "epoch": 0.07002523352074018, "grad_norm": 2.5344339294604152, "learning_rate": 7.002523352074019e-06, "loss": 0.6176, "step": 15818 }, { "epoch": 0.07002966045420338, "grad_norm": 3.008363411851196, "learning_rate": 7.002966045420338e-06, "loss": 0.6413, "step": 15819 }, { "epoch": 0.07003408738766656, "grad_norm": 4.1411192297899175, "learning_rate": 7.003408738766657e-06, "loss": 1.1367, "step": 15820 }, { "epoch": 0.07003851432112976, "grad_norm": 3.732924867372154, "learning_rate": 7.0038514321129755e-06, "loss": 0.9911, "step": 15821 }, { "epoch": 0.07004294125459294, "grad_norm": 2.691072889517756, "learning_rate": 7.004294125459294e-06, "loss": 0.7569, "step": 15822 }, { "epoch": 0.07004736818805614, "grad_norm": 3.4328951296564503, "learning_rate": 7.004736818805614e-06, "loss": 0.8933, "step": 15823 }, { "epoch": 0.07005179512151932, "grad_norm": 2.771085471640746, "learning_rate": 7.005179512151933e-06, "loss": 0.8337, "step": 15824 }, { "epoch": 0.07005622205498252, "grad_norm": 2.508280159956439, "learning_rate": 7.005622205498251e-06, "loss": 0.7025, "step": 15825 }, { "epoch": 0.0700606489884457, "grad_norm": 2.272533357860616, "learning_rate": 7.0060648988445715e-06, "loss": 0.6316, "step": 15826 }, { "epoch": 0.07006507592190889, "grad_norm": 3.8642648357301477, "learning_rate": 7.00650759219089e-06, "loss": 1.3983, "step": 15827 }, { "epoch": 0.07006950285537208, "grad_norm": 2.369092082922191, "learning_rate": 7.006950285537209e-06, "loss": 0.6186, "step": 15828 }, { "epoch": 0.07007392978883527, "grad_norm": 2.7636345565451816, "learning_rate": 7.007392978883528e-06, "loss": 0.7003, "step": 15829 }, { "epoch": 0.07007835672229847, "grad_norm": 3.15811374199772, "learning_rate": 7.0078356722298465e-06, "loss": 0.8757, "step": 15830 }, { "epoch": 0.07008278365576165, "grad_norm": 2.7453606199324248, "learning_rate": 7.008278365576166e-06, "loss": 0.9883, "step": 15831 }, { "epoch": 0.07008721058922485, "grad_norm": 3.1195909069397847, "learning_rate": 7.008721058922485e-06, "loss": 1.0173, "step": 15832 }, { "epoch": 0.07009163752268803, "grad_norm": 2.614059560427433, "learning_rate": 7.009163752268804e-06, "loss": 0.786, "step": 15833 }, { "epoch": 0.07009606445615123, "grad_norm": 2.8921521744401333, "learning_rate": 7.009606445615122e-06, "loss": 0.8827, "step": 15834 }, { "epoch": 0.07010049138961441, "grad_norm": 2.2086349236200977, "learning_rate": 7.0100491389614425e-06, "loss": 0.7617, "step": 15835 }, { "epoch": 0.07010491832307761, "grad_norm": 3.0384017843802207, "learning_rate": 7.010491832307761e-06, "loss": 1.0429, "step": 15836 }, { "epoch": 0.07010934525654079, "grad_norm": 2.54680798725339, "learning_rate": 7.0109345256540795e-06, "loss": 0.5636, "step": 15837 }, { "epoch": 0.07011377219000399, "grad_norm": 2.808865185318882, "learning_rate": 7.011377219000399e-06, "loss": 0.6776, "step": 15838 }, { "epoch": 0.07011819912346717, "grad_norm": 2.932449930131155, "learning_rate": 7.011819912346718e-06, "loss": 0.7833, "step": 15839 }, { "epoch": 0.07012262605693037, "grad_norm": 2.6214537363076977, "learning_rate": 7.012262605693037e-06, "loss": 0.6894, "step": 15840 }, { "epoch": 0.07012705299039355, "grad_norm": 2.1503418378214145, "learning_rate": 7.012705299039356e-06, "loss": 0.558, "step": 15841 }, { "epoch": 0.07013147992385674, "grad_norm": 3.28066364178526, "learning_rate": 7.013147992385675e-06, "loss": 0.7169, "step": 15842 }, { "epoch": 0.07013590685731994, "grad_norm": 2.850771826319491, "learning_rate": 7.013590685731993e-06, "loss": 0.9581, "step": 15843 }, { "epoch": 0.07014033379078312, "grad_norm": 2.775321762352465, "learning_rate": 7.0140333790783134e-06, "loss": 0.6861, "step": 15844 }, { "epoch": 0.07014476072424632, "grad_norm": 2.7732330375184784, "learning_rate": 7.014476072424632e-06, "loss": 0.6478, "step": 15845 }, { "epoch": 0.0701491876577095, "grad_norm": 3.1032744106548464, "learning_rate": 7.0149187657709505e-06, "loss": 1.0746, "step": 15846 }, { "epoch": 0.0701536145911727, "grad_norm": 2.387487112265895, "learning_rate": 7.015361459117271e-06, "loss": 0.7061, "step": 15847 }, { "epoch": 0.07015804152463588, "grad_norm": 2.8890004005723444, "learning_rate": 7.015804152463589e-06, "loss": 0.4907, "step": 15848 }, { "epoch": 0.07016246845809908, "grad_norm": 2.674314990144889, "learning_rate": 7.016246845809908e-06, "loss": 0.7042, "step": 15849 }, { "epoch": 0.07016689539156226, "grad_norm": 2.882910220791293, "learning_rate": 7.016689539156227e-06, "loss": 0.83, "step": 15850 }, { "epoch": 0.07017132232502546, "grad_norm": 2.558159543526406, "learning_rate": 7.017132232502546e-06, "loss": 0.6881, "step": 15851 }, { "epoch": 0.07017574925848864, "grad_norm": 3.5127857448705706, "learning_rate": 7.017574925848864e-06, "loss": 1.1026, "step": 15852 }, { "epoch": 0.07018017619195184, "grad_norm": 2.5395700007694875, "learning_rate": 7.018017619195184e-06, "loss": 0.5196, "step": 15853 }, { "epoch": 0.07018460312541502, "grad_norm": 3.217549915826031, "learning_rate": 7.018460312541503e-06, "loss": 1.0921, "step": 15854 }, { "epoch": 0.07018903005887822, "grad_norm": 2.164204487493107, "learning_rate": 7.0189030058878214e-06, "loss": 0.6537, "step": 15855 }, { "epoch": 0.0701934569923414, "grad_norm": 2.774979427064084, "learning_rate": 7.019345699234142e-06, "loss": 0.6391, "step": 15856 }, { "epoch": 0.07019788392580459, "grad_norm": 2.4256856904085637, "learning_rate": 7.01978839258046e-06, "loss": 0.6064, "step": 15857 }, { "epoch": 0.07020231085926779, "grad_norm": 2.489409853619721, "learning_rate": 7.020231085926779e-06, "loss": 0.3661, "step": 15858 }, { "epoch": 0.07020673779273097, "grad_norm": 2.9758579685255016, "learning_rate": 7.020673779273098e-06, "loss": 0.8412, "step": 15859 }, { "epoch": 0.07021116472619417, "grad_norm": 4.645790615976579, "learning_rate": 7.021116472619417e-06, "loss": 1.0998, "step": 15860 }, { "epoch": 0.07021559165965735, "grad_norm": 2.878002159793049, "learning_rate": 7.021559165965736e-06, "loss": 0.8446, "step": 15861 }, { "epoch": 0.07022001859312055, "grad_norm": 4.034754717261892, "learning_rate": 7.022001859312055e-06, "loss": 1.484, "step": 15862 }, { "epoch": 0.07022444552658373, "grad_norm": 2.728195402120099, "learning_rate": 7.022444552658374e-06, "loss": 0.7331, "step": 15863 }, { "epoch": 0.07022887246004693, "grad_norm": 2.419686869750639, "learning_rate": 7.022887246004692e-06, "loss": 0.5661, "step": 15864 }, { "epoch": 0.07023329939351011, "grad_norm": 3.8234061430927353, "learning_rate": 7.023329939351013e-06, "loss": 0.6899, "step": 15865 }, { "epoch": 0.07023772632697331, "grad_norm": 2.4850141298377637, "learning_rate": 7.023772632697331e-06, "loss": 0.6776, "step": 15866 }, { "epoch": 0.0702421532604365, "grad_norm": 2.4329825673042995, "learning_rate": 7.02421532604365e-06, "loss": 0.6898, "step": 15867 }, { "epoch": 0.07024658019389969, "grad_norm": 2.7366991184835703, "learning_rate": 7.024658019389969e-06, "loss": 0.9145, "step": 15868 }, { "epoch": 0.07025100712736287, "grad_norm": 2.24410398426616, "learning_rate": 7.025100712736288e-06, "loss": 0.7406, "step": 15869 }, { "epoch": 0.07025543406082607, "grad_norm": 3.050925349591323, "learning_rate": 7.025543406082607e-06, "loss": 1.1178, "step": 15870 }, { "epoch": 0.07025986099428926, "grad_norm": 2.6721689857604223, "learning_rate": 7.025986099428926e-06, "loss": 0.5672, "step": 15871 }, { "epoch": 0.07026428792775244, "grad_norm": 2.3881201502048097, "learning_rate": 7.026428792775245e-06, "loss": 0.5723, "step": 15872 }, { "epoch": 0.07026871486121564, "grad_norm": 2.4379478907005243, "learning_rate": 7.026871486121563e-06, "loss": 0.55, "step": 15873 }, { "epoch": 0.07027314179467882, "grad_norm": 2.815587882672435, "learning_rate": 7.0273141794678835e-06, "loss": 0.7838, "step": 15874 }, { "epoch": 0.07027756872814202, "grad_norm": 2.7644276020545364, "learning_rate": 7.027756872814202e-06, "loss": 0.8411, "step": 15875 }, { "epoch": 0.0702819956616052, "grad_norm": 2.5636895573735736, "learning_rate": 7.028199566160521e-06, "loss": 0.8047, "step": 15876 }, { "epoch": 0.0702864225950684, "grad_norm": 2.3935424644392365, "learning_rate": 7.028642259506841e-06, "loss": 0.8705, "step": 15877 }, { "epoch": 0.07029084952853158, "grad_norm": 2.2287940412367715, "learning_rate": 7.029084952853159e-06, "loss": 0.5785, "step": 15878 }, { "epoch": 0.07029527646199478, "grad_norm": 2.62973733748034, "learning_rate": 7.029527646199478e-06, "loss": 0.7118, "step": 15879 }, { "epoch": 0.07029970339545796, "grad_norm": 2.4153662176888906, "learning_rate": 7.029970339545797e-06, "loss": 0.7423, "step": 15880 }, { "epoch": 0.07030413032892116, "grad_norm": 3.1539523558353233, "learning_rate": 7.030413032892116e-06, "loss": 0.8911, "step": 15881 }, { "epoch": 0.07030855726238434, "grad_norm": 2.362587269447897, "learning_rate": 7.030855726238435e-06, "loss": 0.891, "step": 15882 }, { "epoch": 0.07031298419584754, "grad_norm": 2.9859538153035516, "learning_rate": 7.0312984195847545e-06, "loss": 0.7761, "step": 15883 }, { "epoch": 0.07031741112931073, "grad_norm": 3.04189563418792, "learning_rate": 7.031741112931073e-06, "loss": 1.1456, "step": 15884 }, { "epoch": 0.07032183806277392, "grad_norm": 3.0455489488228573, "learning_rate": 7.0321838062773915e-06, "loss": 0.8966, "step": 15885 }, { "epoch": 0.0703262649962371, "grad_norm": 3.6895820447868037, "learning_rate": 7.032626499623712e-06, "loss": 0.8604, "step": 15886 }, { "epoch": 0.07033069192970029, "grad_norm": 2.2741997680306407, "learning_rate": 7.03306919297003e-06, "loss": 0.4332, "step": 15887 }, { "epoch": 0.07033511886316349, "grad_norm": 3.132822943338403, "learning_rate": 7.033511886316349e-06, "loss": 0.9059, "step": 15888 }, { "epoch": 0.07033954579662667, "grad_norm": 2.977712756949992, "learning_rate": 7.033954579662668e-06, "loss": 0.8465, "step": 15889 }, { "epoch": 0.07034397273008987, "grad_norm": 3.0142283156582503, "learning_rate": 7.034397273008987e-06, "loss": 0.62, "step": 15890 }, { "epoch": 0.07034839966355305, "grad_norm": 2.7360884216227066, "learning_rate": 7.034839966355306e-06, "loss": 1.0035, "step": 15891 }, { "epoch": 0.07035282659701625, "grad_norm": 2.952648169229438, "learning_rate": 7.0352826597016254e-06, "loss": 0.9341, "step": 15892 }, { "epoch": 0.07035725353047943, "grad_norm": 4.172503525723765, "learning_rate": 7.035725353047944e-06, "loss": 1.1274, "step": 15893 }, { "epoch": 0.07036168046394263, "grad_norm": 2.3687969943367646, "learning_rate": 7.0361680463942625e-06, "loss": 0.7039, "step": 15894 }, { "epoch": 0.07036610739740581, "grad_norm": 2.231526142070611, "learning_rate": 7.036610739740583e-06, "loss": 0.581, "step": 15895 }, { "epoch": 0.07037053433086901, "grad_norm": 3.127564442089649, "learning_rate": 7.037053433086901e-06, "loss": 0.7127, "step": 15896 }, { "epoch": 0.0703749612643322, "grad_norm": 2.7888212492817206, "learning_rate": 7.03749612643322e-06, "loss": 1.0149, "step": 15897 }, { "epoch": 0.07037938819779539, "grad_norm": 2.3403237858388843, "learning_rate": 7.037938819779539e-06, "loss": 0.5679, "step": 15898 }, { "epoch": 0.07038381513125858, "grad_norm": 2.4662124602059365, "learning_rate": 7.0383815131258585e-06, "loss": 0.9379, "step": 15899 }, { "epoch": 0.07038824206472177, "grad_norm": 2.4494221703574337, "learning_rate": 7.038824206472177e-06, "loss": 0.6871, "step": 15900 }, { "epoch": 0.07039266899818496, "grad_norm": 2.84388311644406, "learning_rate": 7.039266899818496e-06, "loss": 1.0892, "step": 15901 }, { "epoch": 0.07039709593164815, "grad_norm": 2.496278764120797, "learning_rate": 7.039709593164815e-06, "loss": 0.7871, "step": 15902 }, { "epoch": 0.07040152286511134, "grad_norm": 2.5316013446321546, "learning_rate": 7.0401522865111334e-06, "loss": 0.6231, "step": 15903 }, { "epoch": 0.07040594979857452, "grad_norm": 2.9342614385187575, "learning_rate": 7.040594979857454e-06, "loss": 0.7995, "step": 15904 }, { "epoch": 0.07041037673203772, "grad_norm": 3.1970401753630124, "learning_rate": 7.041037673203772e-06, "loss": 1.2441, "step": 15905 }, { "epoch": 0.0704148036655009, "grad_norm": 2.2002333959473344, "learning_rate": 7.041480366550091e-06, "loss": 0.6722, "step": 15906 }, { "epoch": 0.0704192305989641, "grad_norm": 2.792063663426911, "learning_rate": 7.041923059896411e-06, "loss": 0.7426, "step": 15907 }, { "epoch": 0.07042365753242728, "grad_norm": 2.816941203789158, "learning_rate": 7.0423657532427294e-06, "loss": 0.9551, "step": 15908 }, { "epoch": 0.07042808446589048, "grad_norm": 2.99920005519101, "learning_rate": 7.042808446589048e-06, "loss": 0.7098, "step": 15909 }, { "epoch": 0.07043251139935366, "grad_norm": 3.0858506759198687, "learning_rate": 7.043251139935367e-06, "loss": 0.7623, "step": 15910 }, { "epoch": 0.07043693833281686, "grad_norm": 2.289931799143924, "learning_rate": 7.043693833281686e-06, "loss": 0.8078, "step": 15911 }, { "epoch": 0.07044136526628005, "grad_norm": 2.7180624360175294, "learning_rate": 7.044136526628005e-06, "loss": 0.6561, "step": 15912 }, { "epoch": 0.07044579219974324, "grad_norm": 2.3518743271220344, "learning_rate": 7.044579219974325e-06, "loss": 0.4756, "step": 15913 }, { "epoch": 0.07045021913320643, "grad_norm": 2.056148998317807, "learning_rate": 7.045021913320643e-06, "loss": 0.4871, "step": 15914 }, { "epoch": 0.07045464606666962, "grad_norm": 2.769115996273914, "learning_rate": 7.045464606666962e-06, "loss": 0.9008, "step": 15915 }, { "epoch": 0.07045907300013281, "grad_norm": 2.6304599884453235, "learning_rate": 7.045907300013282e-06, "loss": 0.7242, "step": 15916 }, { "epoch": 0.070463499933596, "grad_norm": 4.405071526642781, "learning_rate": 7.0463499933596e-06, "loss": 1.4035, "step": 15917 }, { "epoch": 0.07046792686705919, "grad_norm": 2.436405332320604, "learning_rate": 7.046792686705919e-06, "loss": 0.9755, "step": 15918 }, { "epoch": 0.07047235380052237, "grad_norm": 2.252874042921519, "learning_rate": 7.047235380052238e-06, "loss": 0.4875, "step": 15919 }, { "epoch": 0.07047678073398557, "grad_norm": 3.976085535200643, "learning_rate": 7.047678073398558e-06, "loss": 0.8871, "step": 15920 }, { "epoch": 0.07048120766744875, "grad_norm": 3.5570023998761955, "learning_rate": 7.048120766744876e-06, "loss": 1.0944, "step": 15921 }, { "epoch": 0.07048563460091195, "grad_norm": 3.1590764251176666, "learning_rate": 7.0485634600911955e-06, "loss": 0.5562, "step": 15922 }, { "epoch": 0.07049006153437513, "grad_norm": 2.487024171150242, "learning_rate": 7.049006153437514e-06, "loss": 0.5002, "step": 15923 }, { "epoch": 0.07049448846783833, "grad_norm": 2.881435191870355, "learning_rate": 7.049448846783833e-06, "loss": 0.817, "step": 15924 }, { "epoch": 0.07049891540130152, "grad_norm": 2.447301384692589, "learning_rate": 7.049891540130153e-06, "loss": 0.6011, "step": 15925 }, { "epoch": 0.07050334233476471, "grad_norm": 3.484316817660283, "learning_rate": 7.050334233476471e-06, "loss": 1.0097, "step": 15926 }, { "epoch": 0.0705077692682279, "grad_norm": 2.41084552385709, "learning_rate": 7.05077692682279e-06, "loss": 0.4423, "step": 15927 }, { "epoch": 0.0705121962016911, "grad_norm": 3.005846362542735, "learning_rate": 7.05121962016911e-06, "loss": 0.7375, "step": 15928 }, { "epoch": 0.07051662313515428, "grad_norm": 3.9528123071976777, "learning_rate": 7.051662313515429e-06, "loss": 0.9263, "step": 15929 }, { "epoch": 0.07052105006861747, "grad_norm": 2.3906631959328593, "learning_rate": 7.052105006861747e-06, "loss": 0.7963, "step": 15930 }, { "epoch": 0.07052547700208066, "grad_norm": 2.6942494410104203, "learning_rate": 7.0525477002080665e-06, "loss": 0.7704, "step": 15931 }, { "epoch": 0.07052990393554386, "grad_norm": 3.2816247102299774, "learning_rate": 7.052990393554385e-06, "loss": 1.0879, "step": 15932 }, { "epoch": 0.07053433086900704, "grad_norm": 2.348444998548725, "learning_rate": 7.0534330869007035e-06, "loss": 0.584, "step": 15933 }, { "epoch": 0.07053875780247022, "grad_norm": 2.6502431892797014, "learning_rate": 7.053875780247024e-06, "loss": 0.6504, "step": 15934 }, { "epoch": 0.07054318473593342, "grad_norm": 3.2564172238217823, "learning_rate": 7.054318473593342e-06, "loss": 0.9583, "step": 15935 }, { "epoch": 0.0705476116693966, "grad_norm": 2.339705811691802, "learning_rate": 7.054761166939661e-06, "loss": 0.7351, "step": 15936 }, { "epoch": 0.0705520386028598, "grad_norm": 2.5884113912996445, "learning_rate": 7.055203860285981e-06, "loss": 0.6202, "step": 15937 }, { "epoch": 0.07055646553632298, "grad_norm": 3.3965876877884424, "learning_rate": 7.0556465536322995e-06, "loss": 1.0158, "step": 15938 }, { "epoch": 0.07056089246978618, "grad_norm": 2.7179831971775448, "learning_rate": 7.056089246978618e-06, "loss": 0.9314, "step": 15939 }, { "epoch": 0.07056531940324937, "grad_norm": 2.60966297995043, "learning_rate": 7.0565319403249374e-06, "loss": 0.6972, "step": 15940 }, { "epoch": 0.07056974633671256, "grad_norm": 2.3525891874996265, "learning_rate": 7.056974633671256e-06, "loss": 0.7094, "step": 15941 }, { "epoch": 0.07057417327017575, "grad_norm": 3.0683720446022917, "learning_rate": 7.057417327017575e-06, "loss": 1.0815, "step": 15942 }, { "epoch": 0.07057860020363894, "grad_norm": 3.7097106303864105, "learning_rate": 7.057860020363895e-06, "loss": 1.0295, "step": 15943 }, { "epoch": 0.07058302713710213, "grad_norm": 2.835621348323969, "learning_rate": 7.058302713710213e-06, "loss": 0.9855, "step": 15944 }, { "epoch": 0.07058745407056533, "grad_norm": 2.5606118452032987, "learning_rate": 7.058745407056532e-06, "loss": 0.9514, "step": 15945 }, { "epoch": 0.07059188100402851, "grad_norm": 3.0964030931208515, "learning_rate": 7.059188100402852e-06, "loss": 1.036, "step": 15946 }, { "epoch": 0.0705963079374917, "grad_norm": 2.9734770928559384, "learning_rate": 7.0596307937491705e-06, "loss": 0.9535, "step": 15947 }, { "epoch": 0.07060073487095489, "grad_norm": 2.4376267583165236, "learning_rate": 7.060073487095489e-06, "loss": 0.5347, "step": 15948 }, { "epoch": 0.07060516180441807, "grad_norm": 2.460815354766097, "learning_rate": 7.060516180441808e-06, "loss": 0.6819, "step": 15949 }, { "epoch": 0.07060958873788127, "grad_norm": 2.7532236570748423, "learning_rate": 7.060958873788128e-06, "loss": 0.6712, "step": 15950 }, { "epoch": 0.07061401567134445, "grad_norm": 2.6155109053822856, "learning_rate": 7.061401567134446e-06, "loss": 0.5824, "step": 15951 }, { "epoch": 0.07061844260480765, "grad_norm": 2.9819448707529843, "learning_rate": 7.061844260480766e-06, "loss": 0.9251, "step": 15952 }, { "epoch": 0.07062286953827084, "grad_norm": 3.4381114460254567, "learning_rate": 7.062286953827084e-06, "loss": 1.1254, "step": 15953 }, { "epoch": 0.07062729647173403, "grad_norm": 2.3948771398026913, "learning_rate": 7.062729647173403e-06, "loss": 0.8337, "step": 15954 }, { "epoch": 0.07063172340519722, "grad_norm": 2.4608614751311197, "learning_rate": 7.063172340519723e-06, "loss": 0.5762, "step": 15955 }, { "epoch": 0.07063615033866041, "grad_norm": 2.1925359604384402, "learning_rate": 7.0636150338660414e-06, "loss": 0.3878, "step": 15956 }, { "epoch": 0.0706405772721236, "grad_norm": 2.75422032469545, "learning_rate": 7.06405772721236e-06, "loss": 0.8439, "step": 15957 }, { "epoch": 0.0706450042055868, "grad_norm": 2.338873787860264, "learning_rate": 7.06450042055868e-06, "loss": 0.5674, "step": 15958 }, { "epoch": 0.07064943113904998, "grad_norm": 2.9355607676462796, "learning_rate": 7.064943113904999e-06, "loss": 0.747, "step": 15959 }, { "epoch": 0.07065385807251318, "grad_norm": 2.3169447552051974, "learning_rate": 7.065385807251317e-06, "loss": 0.4491, "step": 15960 }, { "epoch": 0.07065828500597636, "grad_norm": 2.307837003551212, "learning_rate": 7.065828500597637e-06, "loss": 0.5154, "step": 15961 }, { "epoch": 0.07066271193943956, "grad_norm": 2.582011986901337, "learning_rate": 7.066271193943955e-06, "loss": 0.7162, "step": 15962 }, { "epoch": 0.07066713887290274, "grad_norm": 2.714759704190932, "learning_rate": 7.0667138872902745e-06, "loss": 0.6084, "step": 15963 }, { "epoch": 0.07067156580636592, "grad_norm": 3.073038995330407, "learning_rate": 7.067156580636594e-06, "loss": 1.0269, "step": 15964 }, { "epoch": 0.07067599273982912, "grad_norm": 3.6540534700326543, "learning_rate": 7.067599273982912e-06, "loss": 0.5749, "step": 15965 }, { "epoch": 0.0706804196732923, "grad_norm": 2.239676974096732, "learning_rate": 7.068041967329231e-06, "loss": 0.6801, "step": 15966 }, { "epoch": 0.0706848466067555, "grad_norm": 2.4588427080982456, "learning_rate": 7.068484660675551e-06, "loss": 0.9226, "step": 15967 }, { "epoch": 0.07068927354021869, "grad_norm": 2.8247043704038135, "learning_rate": 7.06892735402187e-06, "loss": 1.1574, "step": 15968 }, { "epoch": 0.07069370047368188, "grad_norm": 3.1034133383194527, "learning_rate": 7.069370047368188e-06, "loss": 1.002, "step": 15969 }, { "epoch": 0.07069812740714507, "grad_norm": 2.2311792100697425, "learning_rate": 7.0698127407145076e-06, "loss": 0.6999, "step": 15970 }, { "epoch": 0.07070255434060826, "grad_norm": 2.383215251913951, "learning_rate": 7.070255434060826e-06, "loss": 0.5188, "step": 15971 }, { "epoch": 0.07070698127407145, "grad_norm": 3.061877494217996, "learning_rate": 7.0706981274071454e-06, "loss": 0.9451, "step": 15972 }, { "epoch": 0.07071140820753465, "grad_norm": 3.051966570098943, "learning_rate": 7.071140820753465e-06, "loss": 1.0204, "step": 15973 }, { "epoch": 0.07071583514099783, "grad_norm": 3.6729921789567697, "learning_rate": 7.071583514099783e-06, "loss": 0.7501, "step": 15974 }, { "epoch": 0.07072026207446103, "grad_norm": 2.195917941083769, "learning_rate": 7.072026207446102e-06, "loss": 0.6959, "step": 15975 }, { "epoch": 0.07072468900792421, "grad_norm": 2.3765948713015645, "learning_rate": 7.072468900792422e-06, "loss": 0.4377, "step": 15976 }, { "epoch": 0.07072911594138741, "grad_norm": 2.5378891386737155, "learning_rate": 7.072911594138741e-06, "loss": 0.8227, "step": 15977 }, { "epoch": 0.07073354287485059, "grad_norm": 3.732055677656323, "learning_rate": 7.073354287485059e-06, "loss": 1.1611, "step": 15978 }, { "epoch": 0.07073796980831377, "grad_norm": 2.427294253164644, "learning_rate": 7.0737969808313785e-06, "loss": 0.8599, "step": 15979 }, { "epoch": 0.07074239674177697, "grad_norm": 2.268876217594078, "learning_rate": 7.074239674177698e-06, "loss": 0.6467, "step": 15980 }, { "epoch": 0.07074682367524016, "grad_norm": 2.481842674993101, "learning_rate": 7.074682367524016e-06, "loss": 0.7944, "step": 15981 }, { "epoch": 0.07075125060870335, "grad_norm": 2.7745705058367913, "learning_rate": 7.075125060870336e-06, "loss": 0.5659, "step": 15982 }, { "epoch": 0.07075567754216654, "grad_norm": 3.2420267421337696, "learning_rate": 7.075567754216654e-06, "loss": 0.8151, "step": 15983 }, { "epoch": 0.07076010447562973, "grad_norm": 2.4045737725963123, "learning_rate": 7.076010447562973e-06, "loss": 0.258, "step": 15984 }, { "epoch": 0.07076453140909292, "grad_norm": 2.9537141450493625, "learning_rate": 7.076453140909293e-06, "loss": 0.516, "step": 15985 }, { "epoch": 0.07076895834255612, "grad_norm": 3.1934172582427696, "learning_rate": 7.0768958342556116e-06, "loss": 0.7438, "step": 15986 }, { "epoch": 0.0707733852760193, "grad_norm": 3.067531624545114, "learning_rate": 7.07733852760193e-06, "loss": 0.7841, "step": 15987 }, { "epoch": 0.0707778122094825, "grad_norm": 2.5986838985022267, "learning_rate": 7.07778122094825e-06, "loss": 0.9111, "step": 15988 }, { "epoch": 0.07078223914294568, "grad_norm": 2.2570374275445264, "learning_rate": 7.078223914294569e-06, "loss": 0.6712, "step": 15989 }, { "epoch": 0.07078666607640888, "grad_norm": 2.9110485028526547, "learning_rate": 7.078666607640887e-06, "loss": 0.8822, "step": 15990 }, { "epoch": 0.07079109300987206, "grad_norm": 2.4476196026265336, "learning_rate": 7.079109300987207e-06, "loss": 0.4891, "step": 15991 }, { "epoch": 0.07079551994333526, "grad_norm": 2.6010619554717396, "learning_rate": 7.079551994333525e-06, "loss": 0.7153, "step": 15992 }, { "epoch": 0.07079994687679844, "grad_norm": 3.0868935336402665, "learning_rate": 7.079994687679845e-06, "loss": 0.9293, "step": 15993 }, { "epoch": 0.07080437381026163, "grad_norm": 2.4099781264937636, "learning_rate": 7.080437381026164e-06, "loss": 0.7619, "step": 15994 }, { "epoch": 0.07080880074372482, "grad_norm": 2.6095117277038518, "learning_rate": 7.0808800743724825e-06, "loss": 0.7842, "step": 15995 }, { "epoch": 0.070813227677188, "grad_norm": 3.215267280993533, "learning_rate": 7.081322767718801e-06, "loss": 0.9492, "step": 15996 }, { "epoch": 0.0708176546106512, "grad_norm": 2.2663503537965433, "learning_rate": 7.081765461065121e-06, "loss": 0.6243, "step": 15997 }, { "epoch": 0.07082208154411439, "grad_norm": 3.002045720781578, "learning_rate": 7.08220815441144e-06, "loss": 1.0402, "step": 15998 }, { "epoch": 0.07082650847757758, "grad_norm": 3.1397353536588817, "learning_rate": 7.082650847757758e-06, "loss": 1.1347, "step": 15999 }, { "epoch": 0.07083093541104077, "grad_norm": 2.504183283431591, "learning_rate": 7.083093541104078e-06, "loss": 0.7151, "step": 16000 } ], "logging_steps": 1, "max_steps": 225890, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 110947177857024.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }