{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.45263025128968226, "eval_steps": 500, "global_step": 229376, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.314596139644004e-05, "grad_norm": 10.52633285522461, "learning_rate": 1.5625e-06, "loss": 11.3101, "step": 32 }, { "epoch": 0.0001262919227928801, "grad_norm": 10.210962295532227, "learning_rate": 3.125e-06, "loss": 11.2799, "step": 64 }, { "epoch": 0.00018943788418932013, "grad_norm": 10.53814697265625, "learning_rate": 4.6875000000000004e-06, "loss": 11.2291, "step": 96 }, { "epoch": 0.0002525838455857602, "grad_norm": 10.390519142150879, "learning_rate": 6.25e-06, "loss": 11.1376, "step": 128 }, { "epoch": 0.00031572980698220025, "grad_norm": 10.355998992919922, "learning_rate": 7.8125e-06, "loss": 10.9842, "step": 160 }, { "epoch": 0.00037887576837864026, "grad_norm": 10.46985912322998, "learning_rate": 9.375000000000001e-06, "loss": 10.5984, "step": 192 }, { "epoch": 0.00044202172977508033, "grad_norm": 10.829180717468262, "learning_rate": 1.09375e-05, "loss": 9.3536, "step": 224 }, { "epoch": 0.0005051676911715204, "grad_norm": 6.283628463745117, "learning_rate": 1.25e-05, "loss": 6.5904, "step": 256 }, { "epoch": 0.0005683136525679605, "grad_norm": 0.3195990324020386, "learning_rate": 1.4062500000000001e-05, "loss": 4.9049, "step": 288 }, { "epoch": 0.0006314596139644005, "grad_norm": 0.2334459125995636, "learning_rate": 1.5625e-05, "loss": 4.7834, "step": 320 }, { "epoch": 0.0006946055753608405, "grad_norm": 0.8244943022727966, "learning_rate": 1.71875e-05, "loss": 4.6017, "step": 352 }, { "epoch": 0.0007577515367572805, "grad_norm": 1.0876139402389526, "learning_rate": 1.8750000000000002e-05, "loss": 4.1688, "step": 384 }, { "epoch": 0.0008208974981537207, "grad_norm": 0.6743049025535583, "learning_rate": 2.0312500000000002e-05, "loss": 3.7257, "step": 416 }, { "epoch": 0.0008840434595501607, "grad_norm": 0.3528505265712738, "learning_rate": 2.1875e-05, "loss": 3.4752, "step": 448 }, { "epoch": 0.0009471894209466007, "grad_norm": 0.21036797761917114, "learning_rate": 2.34375e-05, "loss": 3.4045, "step": 480 }, { "epoch": 0.0010103353823430407, "grad_norm": 0.16363564133644104, "learning_rate": 2.5e-05, "loss": 3.3964, "step": 512 }, { "epoch": 0.0010734813437394807, "grad_norm": 0.12469228357076645, "learning_rate": 2.6562500000000002e-05, "loss": 3.3912, "step": 544 }, { "epoch": 0.001136627305135921, "grad_norm": 0.11611668020486832, "learning_rate": 2.8125000000000003e-05, "loss": 3.3176, "step": 576 }, { "epoch": 0.001199773266532361, "grad_norm": 0.1084858626127243, "learning_rate": 2.96875e-05, "loss": 3.296, "step": 608 }, { "epoch": 0.001262919227928801, "grad_norm": 0.11642524600028992, "learning_rate": 3.125e-05, "loss": 3.3348, "step": 640 }, { "epoch": 0.001326065189325241, "grad_norm": 0.09771055728197098, "learning_rate": 3.2812500000000005e-05, "loss": 3.2892, "step": 672 }, { "epoch": 0.001389211150721681, "grad_norm": 0.08836262673139572, "learning_rate": 3.4375e-05, "loss": 3.2728, "step": 704 }, { "epoch": 0.001452357112118121, "grad_norm": 0.07660011202096939, "learning_rate": 3.59375e-05, "loss": 3.2759, "step": 736 }, { "epoch": 0.001515503073514561, "grad_norm": 0.0760306715965271, "learning_rate": 3.7500000000000003e-05, "loss": 3.2631, "step": 768 }, { "epoch": 0.0015786490349110013, "grad_norm": 0.07291432470083237, "learning_rate": 3.90625e-05, "loss": 3.2215, "step": 800 }, { "epoch": 0.0016417949963074413, "grad_norm": 0.07679640501737595, "learning_rate": 4.0625000000000005e-05, "loss": 3.2085, "step": 832 }, { "epoch": 0.0017049409577038813, "grad_norm": 0.07241988927125931, "learning_rate": 4.21875e-05, "loss": 3.1709, "step": 864 }, { "epoch": 0.0017680869191003213, "grad_norm": 0.06889630109071732, "learning_rate": 4.375e-05, "loss": 3.1494, "step": 896 }, { "epoch": 0.0018312328804967614, "grad_norm": 0.06774609535932541, "learning_rate": 4.5312500000000004e-05, "loss": 3.1271, "step": 928 }, { "epoch": 0.0018943788418932014, "grad_norm": 0.05798167362809181, "learning_rate": 4.6875e-05, "loss": 3.1373, "step": 960 }, { "epoch": 0.0019575248032896414, "grad_norm": 0.062139470130205154, "learning_rate": 4.8437500000000005e-05, "loss": 3.1317, "step": 992 }, { "epoch": 0.0020206707646860814, "grad_norm": 0.058672286570072174, "learning_rate": 5e-05, "loss": 3.096, "step": 1024 }, { "epoch": 0.0020838167260825214, "grad_norm": 0.06083087623119354, "learning_rate": 5.15625e-05, "loss": 3.0899, "step": 1056 }, { "epoch": 0.0021469626874789614, "grad_norm": 0.06108880415558815, "learning_rate": 5.3125000000000004e-05, "loss": 3.0704, "step": 1088 }, { "epoch": 0.002210108648875402, "grad_norm": 0.05641736090183258, "learning_rate": 5.46875e-05, "loss": 3.0791, "step": 1120 }, { "epoch": 0.002273254610271842, "grad_norm": 0.05347690358757973, "learning_rate": 5.6250000000000005e-05, "loss": 3.0401, "step": 1152 }, { "epoch": 0.002336400571668282, "grad_norm": 0.07868298143148422, "learning_rate": 5.78125e-05, "loss": 3.0288, "step": 1184 }, { "epoch": 0.002399546533064722, "grad_norm": 0.11979499459266663, "learning_rate": 5.9375e-05, "loss": 3.0046, "step": 1216 }, { "epoch": 0.002462692494461162, "grad_norm": 0.06157733127474785, "learning_rate": 6.0937500000000004e-05, "loss": 3.0156, "step": 1248 }, { "epoch": 0.002525838455857602, "grad_norm": 0.07994844764471054, "learning_rate": 6.25e-05, "loss": 3.0079, "step": 1280 }, { "epoch": 0.002588984417254042, "grad_norm": 0.05458033084869385, "learning_rate": 6.40625e-05, "loss": 3.0012, "step": 1312 }, { "epoch": 0.002652130378650482, "grad_norm": 0.05429359897971153, "learning_rate": 6.562500000000001e-05, "loss": 2.9843, "step": 1344 }, { "epoch": 0.002715276340046922, "grad_norm": 0.05572785809636116, "learning_rate": 6.71875e-05, "loss": 2.967, "step": 1376 }, { "epoch": 0.002778422301443362, "grad_norm": 0.05930449441075325, "learning_rate": 6.875e-05, "loss": 2.9612, "step": 1408 }, { "epoch": 0.002841568262839802, "grad_norm": 0.05627207085490227, "learning_rate": 7.031250000000001e-05, "loss": 2.9641, "step": 1440 }, { "epoch": 0.002904714224236242, "grad_norm": 0.05794452130794525, "learning_rate": 7.1875e-05, "loss": 2.9627, "step": 1472 }, { "epoch": 0.002967860185632682, "grad_norm": 0.07208114117383957, "learning_rate": 7.34375e-05, "loss": 2.9441, "step": 1504 }, { "epoch": 0.003031006147029122, "grad_norm": 0.07248008996248245, "learning_rate": 7.500000000000001e-05, "loss": 2.9143, "step": 1536 }, { "epoch": 0.0030941521084255626, "grad_norm": 0.06737381219863892, "learning_rate": 7.65625e-05, "loss": 2.94, "step": 1568 }, { "epoch": 0.0031572980698220026, "grad_norm": 0.05484765022993088, "learning_rate": 7.8125e-05, "loss": 2.9153, "step": 1600 }, { "epoch": 0.0032204440312184426, "grad_norm": 0.07437755912542343, "learning_rate": 7.96875e-05, "loss": 2.8954, "step": 1632 }, { "epoch": 0.0032835899926148826, "grad_norm": 0.06876343488693237, "learning_rate": 8.125000000000001e-05, "loss": 2.8888, "step": 1664 }, { "epoch": 0.0033467359540113226, "grad_norm": 0.08410373330116272, "learning_rate": 8.28125e-05, "loss": 2.8979, "step": 1696 }, { "epoch": 0.0034098819154077626, "grad_norm": 0.09201439470052719, "learning_rate": 8.4375e-05, "loss": 2.9048, "step": 1728 }, { "epoch": 0.0034730278768042027, "grad_norm": 0.07328721880912781, "learning_rate": 8.593750000000001e-05, "loss": 2.8579, "step": 1760 }, { "epoch": 0.0035361738382006427, "grad_norm": 0.06861106306314468, "learning_rate": 8.75e-05, "loss": 2.8691, "step": 1792 }, { "epoch": 0.0035993197995970827, "grad_norm": 0.060703031718730927, "learning_rate": 8.90625e-05, "loss": 2.8603, "step": 1824 }, { "epoch": 0.0036624657609935227, "grad_norm": 0.06833851337432861, "learning_rate": 9.062500000000001e-05, "loss": 2.8635, "step": 1856 }, { "epoch": 0.0037256117223899627, "grad_norm": 0.06204499304294586, "learning_rate": 9.21875e-05, "loss": 2.8366, "step": 1888 }, { "epoch": 0.0037887576837864027, "grad_norm": 0.06592980027198792, "learning_rate": 9.375e-05, "loss": 2.8378, "step": 1920 }, { "epoch": 0.0038519036451828428, "grad_norm": 0.06360165029764175, "learning_rate": 9.53125e-05, "loss": 2.8363, "step": 1952 }, { "epoch": 0.003915049606579283, "grad_norm": 0.08830870687961578, "learning_rate": 9.687500000000001e-05, "loss": 2.8353, "step": 1984 }, { "epoch": 0.003978195567975723, "grad_norm": 0.056621383875608444, "learning_rate": 9.84375e-05, "loss": 2.8128, "step": 2016 }, { "epoch": 0.004041341529372163, "grad_norm": 0.13722540438175201, "learning_rate": 0.0001, "loss": 2.8067, "step": 2048 }, { "epoch": 0.004104487490768603, "grad_norm": 0.09581632167100906, "learning_rate": 9.999365977563532e-05, "loss": 2.8154, "step": 2080 }, { "epoch": 0.004167633452165043, "grad_norm": 0.08235198259353638, "learning_rate": 9.998731955127063e-05, "loss": 2.7958, "step": 2112 }, { "epoch": 0.004230779413561483, "grad_norm": 0.08382923156023026, "learning_rate": 9.998097932690593e-05, "loss": 2.7944, "step": 2144 }, { "epoch": 0.004293925374957923, "grad_norm": 0.06371873617172241, "learning_rate": 9.997463910254125e-05, "loss": 2.802, "step": 2176 }, { "epoch": 0.004357071336354363, "grad_norm": 0.08306098729372025, "learning_rate": 9.996829887817656e-05, "loss": 2.7686, "step": 2208 }, { "epoch": 0.004420217297750804, "grad_norm": 0.08489057421684265, "learning_rate": 9.996195865381187e-05, "loss": 2.7685, "step": 2240 }, { "epoch": 0.004483363259147243, "grad_norm": 0.12560348212718964, "learning_rate": 9.995561842944718e-05, "loss": 2.7651, "step": 2272 }, { "epoch": 0.004546509220543684, "grad_norm": 0.11862937361001968, "learning_rate": 9.994927820508248e-05, "loss": 2.7598, "step": 2304 }, { "epoch": 0.004609655181940123, "grad_norm": 0.08164895325899124, "learning_rate": 9.99429379807178e-05, "loss": 2.7607, "step": 2336 }, { "epoch": 0.004672801143336564, "grad_norm": 0.10594496130943298, "learning_rate": 9.993659775635311e-05, "loss": 2.723, "step": 2368 }, { "epoch": 0.004735947104733003, "grad_norm": 0.10034490376710892, "learning_rate": 9.993025753198841e-05, "loss": 2.7459, "step": 2400 }, { "epoch": 0.004799093066129444, "grad_norm": 0.08003844320774078, "learning_rate": 9.992391730762373e-05, "loss": 2.7368, "step": 2432 }, { "epoch": 0.0048622390275258835, "grad_norm": 0.06444230675697327, "learning_rate": 9.991757708325904e-05, "loss": 2.76, "step": 2464 }, { "epoch": 0.004925384988922324, "grad_norm": 0.1225021705031395, "learning_rate": 9.991123685889435e-05, "loss": 2.7376, "step": 2496 }, { "epoch": 0.0049885309503187635, "grad_norm": 0.15236003696918488, "learning_rate": 9.990489663452966e-05, "loss": 2.726, "step": 2528 }, { "epoch": 0.005051676911715204, "grad_norm": 0.11898784339427948, "learning_rate": 9.989855641016497e-05, "loss": 2.7456, "step": 2560 }, { "epoch": 0.0051148228731116435, "grad_norm": 0.0697527602314949, "learning_rate": 9.989221618580028e-05, "loss": 2.7145, "step": 2592 }, { "epoch": 0.005177968834508084, "grad_norm": 0.15741455554962158, "learning_rate": 9.98858759614356e-05, "loss": 2.7207, "step": 2624 }, { "epoch": 0.0052411147959045236, "grad_norm": 0.11154522001743317, "learning_rate": 9.98795357370709e-05, "loss": 2.6988, "step": 2656 }, { "epoch": 0.005304260757300964, "grad_norm": 0.14753562211990356, "learning_rate": 9.987319551270621e-05, "loss": 2.7388, "step": 2688 }, { "epoch": 0.0053674067186974045, "grad_norm": 0.1524190604686737, "learning_rate": 9.986685528834152e-05, "loss": 2.707, "step": 2720 }, { "epoch": 0.005430552680093844, "grad_norm": 0.09351467341184616, "learning_rate": 9.986051506397683e-05, "loss": 2.7009, "step": 2752 }, { "epoch": 0.0054936986414902845, "grad_norm": 0.14326535165309906, "learning_rate": 9.985417483961214e-05, "loss": 2.7022, "step": 2784 }, { "epoch": 0.005556844602886724, "grad_norm": 0.10329147428274155, "learning_rate": 9.984783461524745e-05, "loss": 2.6885, "step": 2816 }, { "epoch": 0.0056199905642831645, "grad_norm": 0.12978807091712952, "learning_rate": 9.984149439088276e-05, "loss": 2.6897, "step": 2848 }, { "epoch": 0.005683136525679604, "grad_norm": 0.13619905710220337, "learning_rate": 9.983515416651807e-05, "loss": 2.6886, "step": 2880 }, { "epoch": 0.0057462824870760446, "grad_norm": 0.11991044133901596, "learning_rate": 9.982881394215339e-05, "loss": 2.6807, "step": 2912 }, { "epoch": 0.005809428448472484, "grad_norm": 0.11966120451688766, "learning_rate": 9.982247371778869e-05, "loss": 2.6532, "step": 2944 }, { "epoch": 0.005872574409868925, "grad_norm": 0.0946977287530899, "learning_rate": 9.9816133493424e-05, "loss": 2.6688, "step": 2976 }, { "epoch": 0.005935720371265364, "grad_norm": 0.10586100816726685, "learning_rate": 9.980979326905932e-05, "loss": 2.6861, "step": 3008 }, { "epoch": 0.005998866332661805, "grad_norm": 0.10303099453449249, "learning_rate": 9.980345304469463e-05, "loss": 2.6866, "step": 3040 }, { "epoch": 0.006062012294058244, "grad_norm": 0.09335581958293915, "learning_rate": 9.979711282032994e-05, "loss": 2.6949, "step": 3072 }, { "epoch": 0.006125158255454685, "grad_norm": 0.1507342904806137, "learning_rate": 9.979077259596524e-05, "loss": 2.6616, "step": 3104 }, { "epoch": 0.006188304216851125, "grad_norm": 0.15320587158203125, "learning_rate": 9.978443237160055e-05, "loss": 2.6616, "step": 3136 }, { "epoch": 0.006251450178247565, "grad_norm": 0.12013068795204163, "learning_rate": 9.977809214723587e-05, "loss": 2.6482, "step": 3168 }, { "epoch": 0.006314596139644005, "grad_norm": 0.1357378363609314, "learning_rate": 9.977175192287117e-05, "loss": 2.6615, "step": 3200 }, { "epoch": 0.006377742101040445, "grad_norm": 0.12570512294769287, "learning_rate": 9.976541169850648e-05, "loss": 2.6419, "step": 3232 }, { "epoch": 0.006440888062436885, "grad_norm": 0.13948673009872437, "learning_rate": 9.97590714741418e-05, "loss": 2.6446, "step": 3264 }, { "epoch": 0.006504034023833325, "grad_norm": 0.08918385952711105, "learning_rate": 9.975273124977711e-05, "loss": 2.6327, "step": 3296 }, { "epoch": 0.006567179985229765, "grad_norm": 0.14465107023715973, "learning_rate": 9.974639102541241e-05, "loss": 2.6517, "step": 3328 }, { "epoch": 0.006630325946626205, "grad_norm": 0.10744307190179825, "learning_rate": 9.974005080104773e-05, "loss": 2.6524, "step": 3360 }, { "epoch": 0.006693471908022645, "grad_norm": 0.1016412228345871, "learning_rate": 9.973371057668304e-05, "loss": 2.6375, "step": 3392 }, { "epoch": 0.006756617869419085, "grad_norm": 0.1091398224234581, "learning_rate": 9.972737035231835e-05, "loss": 2.6379, "step": 3424 }, { "epoch": 0.006819763830815525, "grad_norm": 0.0891808420419693, "learning_rate": 9.972103012795367e-05, "loss": 2.6314, "step": 3456 }, { "epoch": 0.006882909792211965, "grad_norm": 0.11935264617204666, "learning_rate": 9.971468990358897e-05, "loss": 2.6265, "step": 3488 }, { "epoch": 0.006946055753608405, "grad_norm": 0.10122443735599518, "learning_rate": 9.970834967922427e-05, "loss": 2.6441, "step": 3520 }, { "epoch": 0.007009201715004845, "grad_norm": 0.12963150441646576, "learning_rate": 9.970200945485959e-05, "loss": 2.6326, "step": 3552 }, { "epoch": 0.007072347676401285, "grad_norm": 0.2340211272239685, "learning_rate": 9.96956692304949e-05, "loss": 2.6125, "step": 3584 }, { "epoch": 0.007135493637797726, "grad_norm": 0.1019572764635086, "learning_rate": 9.96893290061302e-05, "loss": 2.6032, "step": 3616 }, { "epoch": 0.007198639599194165, "grad_norm": 0.18724122643470764, "learning_rate": 9.968298878176552e-05, "loss": 2.6258, "step": 3648 }, { "epoch": 0.007261785560590606, "grad_norm": 0.14609533548355103, "learning_rate": 9.967664855740083e-05, "loss": 2.5863, "step": 3680 }, { "epoch": 0.007324931521987045, "grad_norm": 0.11218056827783585, "learning_rate": 9.967030833303615e-05, "loss": 2.6391, "step": 3712 }, { "epoch": 0.007388077483383486, "grad_norm": 0.09486357867717743, "learning_rate": 9.966396810867145e-05, "loss": 2.5997, "step": 3744 }, { "epoch": 0.0074512234447799254, "grad_norm": 0.1694619506597519, "learning_rate": 9.965762788430676e-05, "loss": 2.5852, "step": 3776 }, { "epoch": 0.007514369406176366, "grad_norm": 0.13817843794822693, "learning_rate": 9.965128765994208e-05, "loss": 2.5845, "step": 3808 }, { "epoch": 0.0075775153675728055, "grad_norm": 0.16682597994804382, "learning_rate": 9.964494743557739e-05, "loss": 2.6041, "step": 3840 }, { "epoch": 0.007640661328969246, "grad_norm": 0.08626808971166611, "learning_rate": 9.963860721121269e-05, "loss": 2.6296, "step": 3872 }, { "epoch": 0.0077038072903656855, "grad_norm": 0.11467722803354263, "learning_rate": 9.9632266986848e-05, "loss": 2.5933, "step": 3904 }, { "epoch": 0.007766953251762126, "grad_norm": 0.11957038938999176, "learning_rate": 9.96259267624833e-05, "loss": 2.5891, "step": 3936 }, { "epoch": 0.007830099213158566, "grad_norm": 0.10939373075962067, "learning_rate": 9.961958653811862e-05, "loss": 2.5775, "step": 3968 }, { "epoch": 0.007893245174555006, "grad_norm": 0.13572102785110474, "learning_rate": 9.961324631375392e-05, "loss": 2.5927, "step": 4000 }, { "epoch": 0.007956391135951446, "grad_norm": 0.10408812016248703, "learning_rate": 9.960690608938924e-05, "loss": 2.5642, "step": 4032 }, { "epoch": 0.008019537097347887, "grad_norm": 0.10480830818414688, "learning_rate": 9.960056586502455e-05, "loss": 2.5907, "step": 4064 }, { "epoch": 0.008082683058744326, "grad_norm": 0.13999754190444946, "learning_rate": 9.959422564065987e-05, "loss": 2.5944, "step": 4096 }, { "epoch": 0.008145829020140766, "grad_norm": 0.12907759845256805, "learning_rate": 9.958788541629518e-05, "loss": 2.5821, "step": 4128 }, { "epoch": 0.008208974981537207, "grad_norm": 0.12192639708518982, "learning_rate": 9.958154519193048e-05, "loss": 2.5634, "step": 4160 }, { "epoch": 0.008272120942933647, "grad_norm": 0.09974056482315063, "learning_rate": 9.95752049675658e-05, "loss": 2.5784, "step": 4192 }, { "epoch": 0.008335266904330086, "grad_norm": 0.12721870839595795, "learning_rate": 9.956886474320111e-05, "loss": 2.5651, "step": 4224 }, { "epoch": 0.008398412865726526, "grad_norm": 0.11077609658241272, "learning_rate": 9.956252451883643e-05, "loss": 2.5719, "step": 4256 }, { "epoch": 0.008461558827122967, "grad_norm": 0.16812993586063385, "learning_rate": 9.955618429447173e-05, "loss": 2.5628, "step": 4288 }, { "epoch": 0.008524704788519407, "grad_norm": 0.12394724786281586, "learning_rate": 9.954984407010704e-05, "loss": 2.5557, "step": 4320 }, { "epoch": 0.008587850749915846, "grad_norm": 0.09741286188364029, "learning_rate": 9.954350384574234e-05, "loss": 2.5337, "step": 4352 }, { "epoch": 0.008650996711312286, "grad_norm": 0.15195594727993011, "learning_rate": 9.953716362137766e-05, "loss": 2.5611, "step": 4384 }, { "epoch": 0.008714142672708727, "grad_norm": 0.12760357558727264, "learning_rate": 9.953082339701296e-05, "loss": 2.5581, "step": 4416 }, { "epoch": 0.008777288634105167, "grad_norm": 0.12614698708057404, "learning_rate": 9.952448317264827e-05, "loss": 2.5575, "step": 4448 }, { "epoch": 0.008840434595501608, "grad_norm": 0.1622260957956314, "learning_rate": 9.951814294828359e-05, "loss": 2.5658, "step": 4480 }, { "epoch": 0.008903580556898046, "grad_norm": 0.14965364336967468, "learning_rate": 9.95118027239189e-05, "loss": 2.5405, "step": 4512 }, { "epoch": 0.008966726518294487, "grad_norm": 0.12377427518367767, "learning_rate": 9.95054624995542e-05, "loss": 2.5316, "step": 4544 }, { "epoch": 0.009029872479690927, "grad_norm": 0.12055251747369766, "learning_rate": 9.949912227518952e-05, "loss": 2.5432, "step": 4576 }, { "epoch": 0.009093018441087368, "grad_norm": 0.12006771564483643, "learning_rate": 9.949278205082483e-05, "loss": 2.548, "step": 4608 }, { "epoch": 0.009156164402483806, "grad_norm": 0.12881246209144592, "learning_rate": 9.948644182646015e-05, "loss": 2.5318, "step": 4640 }, { "epoch": 0.009219310363880247, "grad_norm": 0.19436371326446533, "learning_rate": 9.948010160209545e-05, "loss": 2.5281, "step": 4672 }, { "epoch": 0.009282456325276687, "grad_norm": 0.16575217247009277, "learning_rate": 9.947376137773076e-05, "loss": 2.5385, "step": 4704 }, { "epoch": 0.009345602286673128, "grad_norm": 0.14120186865329742, "learning_rate": 9.946742115336608e-05, "loss": 2.5213, "step": 4736 }, { "epoch": 0.009408748248069566, "grad_norm": 0.1341506838798523, "learning_rate": 9.946108092900138e-05, "loss": 2.5289, "step": 4768 }, { "epoch": 0.009471894209466007, "grad_norm": 0.08819320797920227, "learning_rate": 9.945474070463669e-05, "loss": 2.5144, "step": 4800 }, { "epoch": 0.009535040170862447, "grad_norm": 0.10903503000736237, "learning_rate": 9.944840048027199e-05, "loss": 2.5452, "step": 4832 }, { "epoch": 0.009598186132258888, "grad_norm": 0.1760348528623581, "learning_rate": 9.944206025590731e-05, "loss": 2.5241, "step": 4864 }, { "epoch": 0.009661332093655326, "grad_norm": 0.1575450599193573, "learning_rate": 9.943572003154262e-05, "loss": 2.5206, "step": 4896 }, { "epoch": 0.009724478055051767, "grad_norm": 0.14991284906864166, "learning_rate": 9.942937980717794e-05, "loss": 2.514, "step": 4928 }, { "epoch": 0.009787624016448207, "grad_norm": 0.13919000327587128, "learning_rate": 9.942303958281324e-05, "loss": 2.5254, "step": 4960 }, { "epoch": 0.009850769977844648, "grad_norm": 0.14055348932743073, "learning_rate": 9.941669935844855e-05, "loss": 2.5022, "step": 4992 }, { "epoch": 0.009913915939241088, "grad_norm": 0.13361525535583496, "learning_rate": 9.941035913408387e-05, "loss": 2.5265, "step": 5024 }, { "epoch": 0.009977061900637527, "grad_norm": 0.09767939150333405, "learning_rate": 9.940401890971918e-05, "loss": 2.5184, "step": 5056 }, { "epoch": 0.010040207862033967, "grad_norm": 0.10839032381772995, "learning_rate": 9.939767868535448e-05, "loss": 2.5075, "step": 5088 }, { "epoch": 0.010103353823430408, "grad_norm": 0.17004302144050598, "learning_rate": 9.93913384609898e-05, "loss": 2.4998, "step": 5120 }, { "epoch": 0.010166499784826848, "grad_norm": 0.15774109959602356, "learning_rate": 9.938499823662511e-05, "loss": 2.4954, "step": 5152 }, { "epoch": 0.010229645746223287, "grad_norm": 0.11758945882320404, "learning_rate": 9.937865801226041e-05, "loss": 2.5128, "step": 5184 }, { "epoch": 0.010292791707619727, "grad_norm": 0.12542724609375, "learning_rate": 9.937231778789571e-05, "loss": 2.4798, "step": 5216 }, { "epoch": 0.010355937669016168, "grad_norm": 0.14986108243465424, "learning_rate": 9.936597756353103e-05, "loss": 2.4902, "step": 5248 }, { "epoch": 0.010419083630412608, "grad_norm": 0.14334240555763245, "learning_rate": 9.935963733916634e-05, "loss": 2.5063, "step": 5280 }, { "epoch": 0.010482229591809047, "grad_norm": 0.15978749096393585, "learning_rate": 9.935329711480166e-05, "loss": 2.4669, "step": 5312 }, { "epoch": 0.010545375553205488, "grad_norm": 0.14394745230674744, "learning_rate": 9.934695689043696e-05, "loss": 2.4786, "step": 5344 }, { "epoch": 0.010608521514601928, "grad_norm": 0.11807334423065186, "learning_rate": 9.934061666607227e-05, "loss": 2.4714, "step": 5376 }, { "epoch": 0.010671667475998368, "grad_norm": 0.11549796909093857, "learning_rate": 9.933427644170759e-05, "loss": 2.4937, "step": 5408 }, { "epoch": 0.010734813437394809, "grad_norm": 0.09807826578617096, "learning_rate": 9.93279362173429e-05, "loss": 2.5044, "step": 5440 }, { "epoch": 0.010797959398791248, "grad_norm": 0.15426675975322723, "learning_rate": 9.932159599297822e-05, "loss": 2.4747, "step": 5472 }, { "epoch": 0.010861105360187688, "grad_norm": 0.11509829014539719, "learning_rate": 9.931525576861352e-05, "loss": 2.4819, "step": 5504 }, { "epoch": 0.010924251321584129, "grad_norm": 0.11389163881540298, "learning_rate": 9.930891554424883e-05, "loss": 2.4693, "step": 5536 }, { "epoch": 0.010987397282980569, "grad_norm": 0.15173538029193878, "learning_rate": 9.930257531988413e-05, "loss": 2.4741, "step": 5568 }, { "epoch": 0.011050543244377008, "grad_norm": 0.08514861762523651, "learning_rate": 9.929623509551945e-05, "loss": 2.4691, "step": 5600 }, { "epoch": 0.011113689205773448, "grad_norm": 0.14991246163845062, "learning_rate": 9.928989487115475e-05, "loss": 2.4765, "step": 5632 }, { "epoch": 0.011176835167169889, "grad_norm": 0.18143261969089508, "learning_rate": 9.928355464679006e-05, "loss": 2.4821, "step": 5664 }, { "epoch": 0.011239981128566329, "grad_norm": 0.09394911676645279, "learning_rate": 9.927721442242538e-05, "loss": 2.4814, "step": 5696 }, { "epoch": 0.011303127089962768, "grad_norm": 0.14808283746242523, "learning_rate": 9.927087419806069e-05, "loss": 2.4528, "step": 5728 }, { "epoch": 0.011366273051359208, "grad_norm": 0.15319721400737762, "learning_rate": 9.926453397369599e-05, "loss": 2.4617, "step": 5760 }, { "epoch": 0.011429419012755649, "grad_norm": 0.1394672840833664, "learning_rate": 9.925819374933131e-05, "loss": 2.4676, "step": 5792 }, { "epoch": 0.011492564974152089, "grad_norm": 0.12285158038139343, "learning_rate": 9.925185352496662e-05, "loss": 2.4441, "step": 5824 }, { "epoch": 0.01155571093554853, "grad_norm": 0.1122804805636406, "learning_rate": 9.924551330060194e-05, "loss": 2.4419, "step": 5856 }, { "epoch": 0.011618856896944968, "grad_norm": 0.15353912115097046, "learning_rate": 9.923917307623724e-05, "loss": 2.4574, "step": 5888 }, { "epoch": 0.011682002858341409, "grad_norm": 0.11223366111516953, "learning_rate": 9.923283285187255e-05, "loss": 2.4401, "step": 5920 }, { "epoch": 0.01174514881973785, "grad_norm": 0.144102081656456, "learning_rate": 9.922649262750787e-05, "loss": 2.4421, "step": 5952 }, { "epoch": 0.01180829478113429, "grad_norm": 0.11691421270370483, "learning_rate": 9.922015240314317e-05, "loss": 2.4482, "step": 5984 }, { "epoch": 0.011871440742530728, "grad_norm": 0.10664689540863037, "learning_rate": 9.921381217877848e-05, "loss": 2.4675, "step": 6016 }, { "epoch": 0.011934586703927169, "grad_norm": 0.17872704565525055, "learning_rate": 9.920747195441378e-05, "loss": 2.4587, "step": 6048 }, { "epoch": 0.01199773266532361, "grad_norm": 0.18183505535125732, "learning_rate": 9.92011317300491e-05, "loss": 2.456, "step": 6080 }, { "epoch": 0.01206087862672005, "grad_norm": 0.1468466818332672, "learning_rate": 9.919479150568441e-05, "loss": 2.4525, "step": 6112 }, { "epoch": 0.012124024588116488, "grad_norm": 0.1823159009218216, "learning_rate": 9.918845128131973e-05, "loss": 2.4499, "step": 6144 }, { "epoch": 0.012187170549512929, "grad_norm": 0.1419377326965332, "learning_rate": 9.918211105695503e-05, "loss": 2.4478, "step": 6176 }, { "epoch": 0.01225031651090937, "grad_norm": 0.1857587844133377, "learning_rate": 9.917577083259034e-05, "loss": 2.4584, "step": 6208 }, { "epoch": 0.01231346247230581, "grad_norm": 0.11730950325727463, "learning_rate": 9.916943060822566e-05, "loss": 2.4331, "step": 6240 }, { "epoch": 0.01237660843370225, "grad_norm": 0.13131648302078247, "learning_rate": 9.916309038386097e-05, "loss": 2.4243, "step": 6272 }, { "epoch": 0.012439754395098689, "grad_norm": 0.1440175622701645, "learning_rate": 9.915675015949627e-05, "loss": 2.4368, "step": 6304 }, { "epoch": 0.01250290035649513, "grad_norm": 0.13897374272346497, "learning_rate": 9.915040993513159e-05, "loss": 2.4205, "step": 6336 }, { "epoch": 0.01256604631789157, "grad_norm": 0.11762618273496628, "learning_rate": 9.91440697107669e-05, "loss": 2.4145, "step": 6368 }, { "epoch": 0.01262919227928801, "grad_norm": 0.13914981484413147, "learning_rate": 9.91377294864022e-05, "loss": 2.4289, "step": 6400 }, { "epoch": 0.012692338240684449, "grad_norm": 0.13673797249794006, "learning_rate": 9.913138926203752e-05, "loss": 2.4316, "step": 6432 }, { "epoch": 0.01275548420208089, "grad_norm": 0.13205789029598236, "learning_rate": 9.912504903767282e-05, "loss": 2.4166, "step": 6464 }, { "epoch": 0.01281863016347733, "grad_norm": 0.1587177962064743, "learning_rate": 9.911870881330813e-05, "loss": 2.4257, "step": 6496 }, { "epoch": 0.01288177612487377, "grad_norm": 0.15956047177314758, "learning_rate": 9.911236858894345e-05, "loss": 2.4288, "step": 6528 }, { "epoch": 0.012944922086270209, "grad_norm": 0.14064407348632812, "learning_rate": 9.910602836457875e-05, "loss": 2.394, "step": 6560 }, { "epoch": 0.01300806804766665, "grad_norm": 0.13206064701080322, "learning_rate": 9.909968814021406e-05, "loss": 2.4063, "step": 6592 }, { "epoch": 0.01307121400906309, "grad_norm": 0.13762450218200684, "learning_rate": 9.909334791584938e-05, "loss": 2.4285, "step": 6624 }, { "epoch": 0.01313435997045953, "grad_norm": 0.12068444490432739, "learning_rate": 9.908700769148469e-05, "loss": 2.3994, "step": 6656 }, { "epoch": 0.013197505931855969, "grad_norm": 0.1713280975818634, "learning_rate": 9.908066746711999e-05, "loss": 2.4185, "step": 6688 }, { "epoch": 0.01326065189325241, "grad_norm": 0.15375898778438568, "learning_rate": 9.907432724275531e-05, "loss": 2.4047, "step": 6720 }, { "epoch": 0.01332379785464885, "grad_norm": 0.13617244362831116, "learning_rate": 9.906798701839062e-05, "loss": 2.4041, "step": 6752 }, { "epoch": 0.01338694381604529, "grad_norm": 0.10807432234287262, "learning_rate": 9.906164679402594e-05, "loss": 2.3981, "step": 6784 }, { "epoch": 0.013450089777441731, "grad_norm": 0.12593461573123932, "learning_rate": 9.905530656966124e-05, "loss": 2.4024, "step": 6816 }, { "epoch": 0.01351323573883817, "grad_norm": 0.1572636216878891, "learning_rate": 9.904896634529655e-05, "loss": 2.3851, "step": 6848 }, { "epoch": 0.01357638170023461, "grad_norm": 0.12052024155855179, "learning_rate": 9.904262612093185e-05, "loss": 2.4108, "step": 6880 }, { "epoch": 0.01363952766163105, "grad_norm": 0.12341807782649994, "learning_rate": 9.903628589656717e-05, "loss": 2.378, "step": 6912 }, { "epoch": 0.013702673623027491, "grad_norm": 0.11022727936506271, "learning_rate": 9.902994567220248e-05, "loss": 2.3936, "step": 6944 }, { "epoch": 0.01376581958442393, "grad_norm": 0.1105339452624321, "learning_rate": 9.902360544783778e-05, "loss": 2.3723, "step": 6976 }, { "epoch": 0.01382896554582037, "grad_norm": 0.10231024026870728, "learning_rate": 9.90172652234731e-05, "loss": 2.3974, "step": 7008 }, { "epoch": 0.01389211150721681, "grad_norm": 0.1630864292383194, "learning_rate": 9.901092499910841e-05, "loss": 2.3695, "step": 7040 }, { "epoch": 0.013955257468613251, "grad_norm": 0.13933351635932922, "learning_rate": 9.900458477474373e-05, "loss": 2.4029, "step": 7072 }, { "epoch": 0.01401840343000969, "grad_norm": 0.12549299001693726, "learning_rate": 9.899824455037903e-05, "loss": 2.402, "step": 7104 }, { "epoch": 0.01408154939140613, "grad_norm": 0.16632123291492462, "learning_rate": 9.899190432601434e-05, "loss": 2.3822, "step": 7136 }, { "epoch": 0.01414469535280257, "grad_norm": 0.0944233387708664, "learning_rate": 9.898556410164966e-05, "loss": 2.3816, "step": 7168 }, { "epoch": 0.014207841314199011, "grad_norm": 0.1514974981546402, "learning_rate": 9.897922387728497e-05, "loss": 2.3894, "step": 7200 }, { "epoch": 0.014270987275595452, "grad_norm": 0.10387679934501648, "learning_rate": 9.897288365292027e-05, "loss": 2.3789, "step": 7232 }, { "epoch": 0.01433413323699189, "grad_norm": 0.12096551805734634, "learning_rate": 9.896654342855557e-05, "loss": 2.3716, "step": 7264 }, { "epoch": 0.01439727919838833, "grad_norm": 0.12126098573207855, "learning_rate": 9.896020320419089e-05, "loss": 2.371, "step": 7296 }, { "epoch": 0.014460425159784771, "grad_norm": 0.1568283885717392, "learning_rate": 9.89538629798262e-05, "loss": 2.3794, "step": 7328 }, { "epoch": 0.014523571121181212, "grad_norm": 0.14460304379463196, "learning_rate": 9.894752275546152e-05, "loss": 2.3773, "step": 7360 }, { "epoch": 0.01458671708257765, "grad_norm": 0.11914809048175812, "learning_rate": 9.894118253109682e-05, "loss": 2.3522, "step": 7392 }, { "epoch": 0.01464986304397409, "grad_norm": 0.11162387579679489, "learning_rate": 9.893484230673213e-05, "loss": 2.3808, "step": 7424 }, { "epoch": 0.014713009005370531, "grad_norm": 0.170000359416008, "learning_rate": 9.892850208236745e-05, "loss": 2.3767, "step": 7456 }, { "epoch": 0.014776154966766972, "grad_norm": 0.1726778894662857, "learning_rate": 9.892216185800276e-05, "loss": 2.3874, "step": 7488 }, { "epoch": 0.01483930092816341, "grad_norm": 0.15378707647323608, "learning_rate": 9.891582163363806e-05, "loss": 2.3731, "step": 7520 }, { "epoch": 0.014902446889559851, "grad_norm": 0.12425557523965836, "learning_rate": 9.890948140927338e-05, "loss": 2.366, "step": 7552 }, { "epoch": 0.014965592850956291, "grad_norm": 0.12605996429920197, "learning_rate": 9.890314118490869e-05, "loss": 2.3739, "step": 7584 }, { "epoch": 0.015028738812352732, "grad_norm": 0.12717851996421814, "learning_rate": 9.889680096054401e-05, "loss": 2.3655, "step": 7616 }, { "epoch": 0.015091884773749172, "grad_norm": 0.16373498737812042, "learning_rate": 9.889046073617931e-05, "loss": 2.3521, "step": 7648 }, { "epoch": 0.015155030735145611, "grad_norm": 0.1463109701871872, "learning_rate": 9.888412051181461e-05, "loss": 2.3661, "step": 7680 }, { "epoch": 0.015218176696542051, "grad_norm": 0.13224859535694122, "learning_rate": 9.887778028744992e-05, "loss": 2.3706, "step": 7712 }, { "epoch": 0.015281322657938492, "grad_norm": 0.11841218918561935, "learning_rate": 9.887144006308524e-05, "loss": 2.3845, "step": 7744 }, { "epoch": 0.015344468619334932, "grad_norm": 0.13370342552661896, "learning_rate": 9.886509983872054e-05, "loss": 2.3745, "step": 7776 }, { "epoch": 0.015407614580731371, "grad_norm": 0.12113891541957855, "learning_rate": 9.885875961435585e-05, "loss": 2.3523, "step": 7808 }, { "epoch": 0.015470760542127811, "grad_norm": 0.1323300004005432, "learning_rate": 9.885241938999117e-05, "loss": 2.3777, "step": 7840 }, { "epoch": 0.015533906503524252, "grad_norm": 0.12009269744157791, "learning_rate": 9.884607916562648e-05, "loss": 2.3426, "step": 7872 }, { "epoch": 0.015597052464920692, "grad_norm": 0.11964011192321777, "learning_rate": 9.883973894126178e-05, "loss": 2.3754, "step": 7904 }, { "epoch": 0.01566019842631713, "grad_norm": 0.1414196938276291, "learning_rate": 9.88333987168971e-05, "loss": 2.3454, "step": 7936 }, { "epoch": 0.015723344387713573, "grad_norm": 0.11974673718214035, "learning_rate": 9.882705849253241e-05, "loss": 2.3345, "step": 7968 }, { "epoch": 0.015786490349110012, "grad_norm": 0.12504935264587402, "learning_rate": 9.882071826816773e-05, "loss": 2.3336, "step": 8000 }, { "epoch": 0.01584963631050645, "grad_norm": 0.13010753691196442, "learning_rate": 9.881437804380304e-05, "loss": 2.3503, "step": 8032 }, { "epoch": 0.015912782271902893, "grad_norm": 0.10797622054815292, "learning_rate": 9.880803781943834e-05, "loss": 2.3447, "step": 8064 }, { "epoch": 0.01597592823329933, "grad_norm": 0.1536203771829605, "learning_rate": 9.880169759507364e-05, "loss": 2.332, "step": 8096 }, { "epoch": 0.016039074194695774, "grad_norm": 0.1467064470052719, "learning_rate": 9.879535737070896e-05, "loss": 2.3462, "step": 8128 }, { "epoch": 0.016102220156092212, "grad_norm": 0.1106255054473877, "learning_rate": 9.878901714634427e-05, "loss": 2.3298, "step": 8160 }, { "epoch": 0.01616536611748865, "grad_norm": 0.12443240731954575, "learning_rate": 9.878267692197957e-05, "loss": 2.3472, "step": 8192 }, { "epoch": 0.016228512078885093, "grad_norm": 0.12979218363761902, "learning_rate": 9.877633669761489e-05, "loss": 2.3486, "step": 8224 }, { "epoch": 0.016291658040281532, "grad_norm": 0.10633663088083267, "learning_rate": 9.87699964732502e-05, "loss": 2.3483, "step": 8256 }, { "epoch": 0.01635480400167797, "grad_norm": 0.13850173354148865, "learning_rate": 9.876365624888552e-05, "loss": 2.3359, "step": 8288 }, { "epoch": 0.016417949963074413, "grad_norm": 0.16345906257629395, "learning_rate": 9.875731602452082e-05, "loss": 2.332, "step": 8320 }, { "epoch": 0.01648109592447085, "grad_norm": 0.1388985812664032, "learning_rate": 9.875097580015613e-05, "loss": 2.3383, "step": 8352 }, { "epoch": 0.016544241885867294, "grad_norm": 0.10737188905477524, "learning_rate": 9.874463557579145e-05, "loss": 2.3233, "step": 8384 }, { "epoch": 0.016607387847263733, "grad_norm": 0.15225957334041595, "learning_rate": 9.873829535142676e-05, "loss": 2.319, "step": 8416 }, { "epoch": 0.01667053380866017, "grad_norm": 0.15039414167404175, "learning_rate": 9.873195512706206e-05, "loss": 2.3375, "step": 8448 }, { "epoch": 0.016733679770056614, "grad_norm": 0.15000516176223755, "learning_rate": 9.872561490269738e-05, "loss": 2.3286, "step": 8480 }, { "epoch": 0.016796825731453052, "grad_norm": 0.12463720142841339, "learning_rate": 9.871927467833268e-05, "loss": 2.3407, "step": 8512 }, { "epoch": 0.016859971692849494, "grad_norm": 0.1400393396615982, "learning_rate": 9.8712934453968e-05, "loss": 2.3171, "step": 8544 }, { "epoch": 0.016923117654245933, "grad_norm": 0.15409910678863525, "learning_rate": 9.87065942296033e-05, "loss": 2.3156, "step": 8576 }, { "epoch": 0.016986263615642372, "grad_norm": 0.13042177259922028, "learning_rate": 9.870025400523861e-05, "loss": 2.3182, "step": 8608 }, { "epoch": 0.017049409577038814, "grad_norm": 0.1081709936261177, "learning_rate": 9.869391378087392e-05, "loss": 2.3263, "step": 8640 }, { "epoch": 0.017112555538435253, "grad_norm": 0.12457891553640366, "learning_rate": 9.868757355650924e-05, "loss": 2.325, "step": 8672 }, { "epoch": 0.01717570149983169, "grad_norm": 0.17015835642814636, "learning_rate": 9.868123333214455e-05, "loss": 2.3214, "step": 8704 }, { "epoch": 0.017238847461228134, "grad_norm": 0.13807617127895355, "learning_rate": 9.867489310777985e-05, "loss": 2.315, "step": 8736 }, { "epoch": 0.017301993422624572, "grad_norm": 0.11597169935703278, "learning_rate": 9.866855288341517e-05, "loss": 2.3442, "step": 8768 }, { "epoch": 0.017365139384021015, "grad_norm": 0.13555516302585602, "learning_rate": 9.866221265905048e-05, "loss": 2.3178, "step": 8800 }, { "epoch": 0.017428285345417453, "grad_norm": 0.14685258269309998, "learning_rate": 9.86558724346858e-05, "loss": 2.3154, "step": 8832 }, { "epoch": 0.017491431306813892, "grad_norm": 0.1215466782450676, "learning_rate": 9.86495322103211e-05, "loss": 2.3259, "step": 8864 }, { "epoch": 0.017554577268210334, "grad_norm": 0.13843005895614624, "learning_rate": 9.864319198595641e-05, "loss": 2.3255, "step": 8896 }, { "epoch": 0.017617723229606773, "grad_norm": 0.1204628273844719, "learning_rate": 9.863685176159171e-05, "loss": 2.32, "step": 8928 }, { "epoch": 0.017680869191003215, "grad_norm": 0.13078300654888153, "learning_rate": 9.863051153722703e-05, "loss": 2.3022, "step": 8960 }, { "epoch": 0.017744015152399654, "grad_norm": 0.09985719621181488, "learning_rate": 9.862417131286233e-05, "loss": 2.3046, "step": 8992 }, { "epoch": 0.017807161113796093, "grad_norm": 0.16064776480197906, "learning_rate": 9.861783108849764e-05, "loss": 2.3086, "step": 9024 }, { "epoch": 0.017870307075192535, "grad_norm": 0.15166877210140228, "learning_rate": 9.861149086413296e-05, "loss": 2.319, "step": 9056 }, { "epoch": 0.017933453036588973, "grad_norm": 0.11681035161018372, "learning_rate": 9.860515063976827e-05, "loss": 2.3085, "step": 9088 }, { "epoch": 0.017996598997985412, "grad_norm": 0.16018840670585632, "learning_rate": 9.859881041540357e-05, "loss": 2.3146, "step": 9120 }, { "epoch": 0.018059744959381854, "grad_norm": 0.12750431895256042, "learning_rate": 9.859247019103889e-05, "loss": 2.3054, "step": 9152 }, { "epoch": 0.018122890920778293, "grad_norm": 0.1396716833114624, "learning_rate": 9.85861299666742e-05, "loss": 2.3153, "step": 9184 }, { "epoch": 0.018186036882174735, "grad_norm": 0.1469569206237793, "learning_rate": 9.857978974230952e-05, "loss": 2.2942, "step": 9216 }, { "epoch": 0.018249182843571174, "grad_norm": 0.16730424761772156, "learning_rate": 9.857344951794482e-05, "loss": 2.2763, "step": 9248 }, { "epoch": 0.018312328804967613, "grad_norm": 0.11629340797662735, "learning_rate": 9.856710929358013e-05, "loss": 2.3194, "step": 9280 }, { "epoch": 0.018375474766364055, "grad_norm": 0.14931993186473846, "learning_rate": 9.856076906921545e-05, "loss": 2.2907, "step": 9312 }, { "epoch": 0.018438620727760494, "grad_norm": 0.14245635271072388, "learning_rate": 9.855442884485075e-05, "loss": 2.3068, "step": 9344 }, { "epoch": 0.018501766689156932, "grad_norm": 0.12499593198299408, "learning_rate": 9.854808862048606e-05, "loss": 2.2925, "step": 9376 }, { "epoch": 0.018564912650553374, "grad_norm": 0.11900224536657333, "learning_rate": 9.854174839612136e-05, "loss": 2.2877, "step": 9408 }, { "epoch": 0.018628058611949813, "grad_norm": 0.11563645303249359, "learning_rate": 9.853540817175668e-05, "loss": 2.284, "step": 9440 }, { "epoch": 0.018691204573346255, "grad_norm": 0.1066991463303566, "learning_rate": 9.8529067947392e-05, "loss": 2.2858, "step": 9472 }, { "epoch": 0.018754350534742694, "grad_norm": 0.1500285118818283, "learning_rate": 9.852272772302731e-05, "loss": 2.2971, "step": 9504 }, { "epoch": 0.018817496496139133, "grad_norm": 0.14508545398712158, "learning_rate": 9.851638749866261e-05, "loss": 2.2873, "step": 9536 }, { "epoch": 0.018880642457535575, "grad_norm": 0.12327894568443298, "learning_rate": 9.851004727429792e-05, "loss": 2.2817, "step": 9568 }, { "epoch": 0.018943788418932014, "grad_norm": 0.1686916947364807, "learning_rate": 9.850370704993324e-05, "loss": 2.2881, "step": 9600 }, { "epoch": 0.019006934380328456, "grad_norm": 0.11902410537004471, "learning_rate": 9.849736682556855e-05, "loss": 2.2848, "step": 9632 }, { "epoch": 0.019070080341724895, "grad_norm": 0.1380203515291214, "learning_rate": 9.849102660120385e-05, "loss": 2.285, "step": 9664 }, { "epoch": 0.019133226303121333, "grad_norm": 0.12518124282360077, "learning_rate": 9.848468637683917e-05, "loss": 2.2955, "step": 9696 }, { "epoch": 0.019196372264517775, "grad_norm": 0.11999049782752991, "learning_rate": 9.847834615247448e-05, "loss": 2.2709, "step": 9728 }, { "epoch": 0.019259518225914214, "grad_norm": 0.1295236349105835, "learning_rate": 9.847200592810978e-05, "loss": 2.2726, "step": 9760 }, { "epoch": 0.019322664187310653, "grad_norm": 0.12159834057092667, "learning_rate": 9.846566570374509e-05, "loss": 2.2713, "step": 9792 }, { "epoch": 0.019385810148707095, "grad_norm": 0.12995825707912445, "learning_rate": 9.84593254793804e-05, "loss": 2.2846, "step": 9824 }, { "epoch": 0.019448956110103534, "grad_norm": 0.13964006304740906, "learning_rate": 9.845298525501571e-05, "loss": 2.2957, "step": 9856 }, { "epoch": 0.019512102071499976, "grad_norm": 0.1186000406742096, "learning_rate": 9.844664503065103e-05, "loss": 2.2771, "step": 9888 }, { "epoch": 0.019575248032896415, "grad_norm": 0.13176684081554413, "learning_rate": 9.844030480628633e-05, "loss": 2.2811, "step": 9920 }, { "epoch": 0.019638393994292853, "grad_norm": 0.1463262289762497, "learning_rate": 9.843396458192164e-05, "loss": 2.2699, "step": 9952 }, { "epoch": 0.019701539955689296, "grad_norm": 0.13183870911598206, "learning_rate": 9.842762435755696e-05, "loss": 2.2847, "step": 9984 }, { "epoch": 0.019764685917085734, "grad_norm": 0.179069384932518, "learning_rate": 9.842128413319227e-05, "loss": 2.2534, "step": 10016 }, { "epoch": 0.019827831878482177, "grad_norm": 0.10940461605787277, "learning_rate": 9.841494390882759e-05, "loss": 2.2843, "step": 10048 }, { "epoch": 0.019890977839878615, "grad_norm": 0.13837820291519165, "learning_rate": 9.840860368446289e-05, "loss": 2.2703, "step": 10080 }, { "epoch": 0.019954123801275054, "grad_norm": 0.16267909109592438, "learning_rate": 9.84022634600982e-05, "loss": 2.2649, "step": 10112 }, { "epoch": 0.020017269762671496, "grad_norm": 0.1511298567056656, "learning_rate": 9.83959232357335e-05, "loss": 2.2737, "step": 10144 }, { "epoch": 0.020080415724067935, "grad_norm": 0.10784769058227539, "learning_rate": 9.838958301136882e-05, "loss": 2.2954, "step": 10176 }, { "epoch": 0.020143561685464374, "grad_norm": 0.11363289505243301, "learning_rate": 9.838324278700412e-05, "loss": 2.2527, "step": 10208 }, { "epoch": 0.020206707646860816, "grad_norm": 0.12205290794372559, "learning_rate": 9.837690256263944e-05, "loss": 2.2546, "step": 10240 }, { "epoch": 0.020269853608257254, "grad_norm": 0.11637623608112335, "learning_rate": 9.837056233827475e-05, "loss": 2.2757, "step": 10272 }, { "epoch": 0.020332999569653697, "grad_norm": 0.12465595453977585, "learning_rate": 9.836422211391006e-05, "loss": 2.2612, "step": 10304 }, { "epoch": 0.020396145531050135, "grad_norm": 0.13645561039447784, "learning_rate": 9.835788188954537e-05, "loss": 2.2678, "step": 10336 }, { "epoch": 0.020459291492446574, "grad_norm": 0.15595458447933197, "learning_rate": 9.835154166518068e-05, "loss": 2.2642, "step": 10368 }, { "epoch": 0.020522437453843016, "grad_norm": 0.11267196387052536, "learning_rate": 9.8345201440816e-05, "loss": 2.2501, "step": 10400 }, { "epoch": 0.020585583415239455, "grad_norm": 0.16104404628276825, "learning_rate": 9.833886121645131e-05, "loss": 2.2697, "step": 10432 }, { "epoch": 0.020648729376635897, "grad_norm": 0.13484841585159302, "learning_rate": 9.833252099208661e-05, "loss": 2.2534, "step": 10464 }, { "epoch": 0.020711875338032336, "grad_norm": 0.15047989785671234, "learning_rate": 9.832618076772192e-05, "loss": 2.2551, "step": 10496 }, { "epoch": 0.020775021299428775, "grad_norm": 0.12250261753797531, "learning_rate": 9.831984054335724e-05, "loss": 2.2522, "step": 10528 }, { "epoch": 0.020838167260825217, "grad_norm": 0.12256615608930588, "learning_rate": 9.831350031899254e-05, "loss": 2.2388, "step": 10560 }, { "epoch": 0.020901313222221655, "grad_norm": 0.13912375271320343, "learning_rate": 9.830716009462785e-05, "loss": 2.2516, "step": 10592 }, { "epoch": 0.020964459183618094, "grad_norm": 0.16619546711444855, "learning_rate": 9.830081987026316e-05, "loss": 2.2593, "step": 10624 }, { "epoch": 0.021027605145014536, "grad_norm": 0.12345771491527557, "learning_rate": 9.829447964589847e-05, "loss": 2.2534, "step": 10656 }, { "epoch": 0.021090751106410975, "grad_norm": 0.14183910191059113, "learning_rate": 9.828813942153378e-05, "loss": 2.2348, "step": 10688 }, { "epoch": 0.021153897067807417, "grad_norm": 0.14614734053611755, "learning_rate": 9.82817991971691e-05, "loss": 2.2442, "step": 10720 }, { "epoch": 0.021217043029203856, "grad_norm": 0.15900371968746185, "learning_rate": 9.82754589728044e-05, "loss": 2.2624, "step": 10752 }, { "epoch": 0.021280188990600295, "grad_norm": 0.12799856066703796, "learning_rate": 9.826911874843971e-05, "loss": 2.2443, "step": 10784 }, { "epoch": 0.021343334951996737, "grad_norm": 0.12659910321235657, "learning_rate": 9.826277852407503e-05, "loss": 2.2653, "step": 10816 }, { "epoch": 0.021406480913393176, "grad_norm": 0.1434067338705063, "learning_rate": 9.825643829971034e-05, "loss": 2.2754, "step": 10848 }, { "epoch": 0.021469626874789618, "grad_norm": 0.1296793818473816, "learning_rate": 9.825009807534564e-05, "loss": 2.2522, "step": 10880 }, { "epoch": 0.021532772836186057, "grad_norm": 0.12483330070972443, "learning_rate": 9.824375785098096e-05, "loss": 2.2547, "step": 10912 }, { "epoch": 0.021595918797582495, "grad_norm": 0.11052534729242325, "learning_rate": 9.823741762661627e-05, "loss": 2.2392, "step": 10944 }, { "epoch": 0.021659064758978937, "grad_norm": 0.1412501484155655, "learning_rate": 9.823107740225158e-05, "loss": 2.2684, "step": 10976 }, { "epoch": 0.021722210720375376, "grad_norm": 0.12963959574699402, "learning_rate": 9.822473717788689e-05, "loss": 2.24, "step": 11008 }, { "epoch": 0.021785356681771815, "grad_norm": 0.12713031470775604, "learning_rate": 9.821839695352219e-05, "loss": 2.2349, "step": 11040 }, { "epoch": 0.021848502643168257, "grad_norm": 0.1355070173740387, "learning_rate": 9.82120567291575e-05, "loss": 2.2297, "step": 11072 }, { "epoch": 0.021911648604564696, "grad_norm": 0.11268522590398788, "learning_rate": 9.820571650479282e-05, "loss": 2.2379, "step": 11104 }, { "epoch": 0.021974794565961138, "grad_norm": 0.11818798631429672, "learning_rate": 9.819937628042812e-05, "loss": 2.225, "step": 11136 }, { "epoch": 0.022037940527357577, "grad_norm": 0.10650172084569931, "learning_rate": 9.819303605606344e-05, "loss": 2.246, "step": 11168 }, { "epoch": 0.022101086488754015, "grad_norm": 0.14348286390304565, "learning_rate": 9.818669583169875e-05, "loss": 2.2465, "step": 11200 }, { "epoch": 0.022164232450150458, "grad_norm": 0.13717545568943024, "learning_rate": 9.818035560733406e-05, "loss": 2.2242, "step": 11232 }, { "epoch": 0.022227378411546896, "grad_norm": 0.12716437876224518, "learning_rate": 9.817401538296937e-05, "loss": 2.1999, "step": 11264 }, { "epoch": 0.02229052437294334, "grad_norm": 0.14523211121559143, "learning_rate": 9.816767515860468e-05, "loss": 2.2169, "step": 11296 }, { "epoch": 0.022353670334339777, "grad_norm": 0.13594989478588104, "learning_rate": 9.816133493424e-05, "loss": 2.2351, "step": 11328 }, { "epoch": 0.022416816295736216, "grad_norm": 0.10901647061109543, "learning_rate": 9.815499470987531e-05, "loss": 2.2271, "step": 11360 }, { "epoch": 0.022479962257132658, "grad_norm": 0.12008926272392273, "learning_rate": 9.814865448551061e-05, "loss": 2.2311, "step": 11392 }, { "epoch": 0.022543108218529097, "grad_norm": 0.11971398442983627, "learning_rate": 9.814231426114591e-05, "loss": 2.2217, "step": 11424 }, { "epoch": 0.022606254179925536, "grad_norm": 0.11262718588113785, "learning_rate": 9.813597403678123e-05, "loss": 2.2355, "step": 11456 }, { "epoch": 0.022669400141321978, "grad_norm": 0.11999446898698807, "learning_rate": 9.812963381241654e-05, "loss": 2.2373, "step": 11488 }, { "epoch": 0.022732546102718416, "grad_norm": 0.11067375540733337, "learning_rate": 9.812329358805185e-05, "loss": 2.2084, "step": 11520 }, { "epoch": 0.02279569206411486, "grad_norm": 0.12921836972236633, "learning_rate": 9.811695336368716e-05, "loss": 2.2285, "step": 11552 }, { "epoch": 0.022858838025511297, "grad_norm": 0.10505735874176025, "learning_rate": 9.811061313932247e-05, "loss": 2.2215, "step": 11584 }, { "epoch": 0.022921983986907736, "grad_norm": 0.10381817072629929, "learning_rate": 9.810427291495779e-05, "loss": 2.2292, "step": 11616 }, { "epoch": 0.022985129948304178, "grad_norm": 0.11980079114437103, "learning_rate": 9.80979326905931e-05, "loss": 2.2335, "step": 11648 }, { "epoch": 0.023048275909700617, "grad_norm": 0.13260555267333984, "learning_rate": 9.80915924662284e-05, "loss": 2.2082, "step": 11680 }, { "epoch": 0.02311142187109706, "grad_norm": 0.11776828020811081, "learning_rate": 9.808525224186372e-05, "loss": 2.2405, "step": 11712 }, { "epoch": 0.023174567832493498, "grad_norm": 0.12389204651117325, "learning_rate": 9.807891201749903e-05, "loss": 2.2277, "step": 11744 }, { "epoch": 0.023237713793889937, "grad_norm": 0.11607711762189865, "learning_rate": 9.807257179313434e-05, "loss": 2.2196, "step": 11776 }, { "epoch": 0.02330085975528638, "grad_norm": 0.1326410174369812, "learning_rate": 9.806623156876965e-05, "loss": 2.2098, "step": 11808 }, { "epoch": 0.023364005716682817, "grad_norm": 0.11321288347244263, "learning_rate": 9.805989134440495e-05, "loss": 2.2129, "step": 11840 }, { "epoch": 0.023427151678079256, "grad_norm": 0.13293352723121643, "learning_rate": 9.805355112004026e-05, "loss": 2.2336, "step": 11872 }, { "epoch": 0.0234902976394757, "grad_norm": 0.11404547095298767, "learning_rate": 9.804721089567558e-05, "loss": 2.2018, "step": 11904 }, { "epoch": 0.023553443600872137, "grad_norm": 0.12156685441732407, "learning_rate": 9.804087067131088e-05, "loss": 2.213, "step": 11936 }, { "epoch": 0.02361658956226858, "grad_norm": 0.12596964836120605, "learning_rate": 9.803453044694619e-05, "loss": 2.2218, "step": 11968 }, { "epoch": 0.023679735523665018, "grad_norm": 0.15181002020835876, "learning_rate": 9.80281902225815e-05, "loss": 2.202, "step": 12000 }, { "epoch": 0.023742881485061457, "grad_norm": 0.1088121235370636, "learning_rate": 9.802184999821682e-05, "loss": 2.206, "step": 12032 }, { "epoch": 0.0238060274464579, "grad_norm": 0.12130860239267349, "learning_rate": 9.801550977385213e-05, "loss": 2.1916, "step": 12064 }, { "epoch": 0.023869173407854338, "grad_norm": 0.10830409079790115, "learning_rate": 9.800916954948744e-05, "loss": 2.2073, "step": 12096 }, { "epoch": 0.02393231936925078, "grad_norm": 0.1181134432554245, "learning_rate": 9.800282932512275e-05, "loss": 2.2139, "step": 12128 }, { "epoch": 0.02399546533064722, "grad_norm": 0.10429253429174423, "learning_rate": 9.799648910075806e-05, "loss": 2.1931, "step": 12160 }, { "epoch": 0.024058611292043657, "grad_norm": 0.12049050629138947, "learning_rate": 9.799014887639338e-05, "loss": 2.2107, "step": 12192 }, { "epoch": 0.0241217572534401, "grad_norm": 0.15965713560581207, "learning_rate": 9.798380865202868e-05, "loss": 2.1845, "step": 12224 }, { "epoch": 0.024184903214836538, "grad_norm": 0.14251761138439178, "learning_rate": 9.797746842766398e-05, "loss": 2.1937, "step": 12256 }, { "epoch": 0.024248049176232977, "grad_norm": 0.14183920621871948, "learning_rate": 9.79711282032993e-05, "loss": 2.1973, "step": 12288 }, { "epoch": 0.02431119513762942, "grad_norm": 0.11488441377878189, "learning_rate": 9.796478797893461e-05, "loss": 2.2127, "step": 12320 }, { "epoch": 0.024374341099025858, "grad_norm": 0.1270715445280075, "learning_rate": 9.795844775456991e-05, "loss": 2.2019, "step": 12352 }, { "epoch": 0.0244374870604223, "grad_norm": 0.11428068578243256, "learning_rate": 9.795210753020523e-05, "loss": 2.1863, "step": 12384 }, { "epoch": 0.02450063302181874, "grad_norm": 0.12566788494586945, "learning_rate": 9.794576730584054e-05, "loss": 2.1871, "step": 12416 }, { "epoch": 0.024563778983215177, "grad_norm": 0.12264042347669601, "learning_rate": 9.793942708147586e-05, "loss": 2.1987, "step": 12448 }, { "epoch": 0.02462692494461162, "grad_norm": 0.11069290339946747, "learning_rate": 9.793308685711116e-05, "loss": 2.1985, "step": 12480 }, { "epoch": 0.024690070906008058, "grad_norm": 0.11952099204063416, "learning_rate": 9.792674663274647e-05, "loss": 2.1909, "step": 12512 }, { "epoch": 0.0247532168674045, "grad_norm": 0.14786210656166077, "learning_rate": 9.792040640838179e-05, "loss": 2.1795, "step": 12544 }, { "epoch": 0.02481636282880094, "grad_norm": 0.13370029628276825, "learning_rate": 9.79140661840171e-05, "loss": 2.1859, "step": 12576 }, { "epoch": 0.024879508790197378, "grad_norm": 0.14006869494915009, "learning_rate": 9.79077259596524e-05, "loss": 2.1762, "step": 12608 }, { "epoch": 0.02494265475159382, "grad_norm": 0.12157852202653885, "learning_rate": 9.790138573528772e-05, "loss": 2.1924, "step": 12640 }, { "epoch": 0.02500580071299026, "grad_norm": 0.11815094202756882, "learning_rate": 9.789504551092302e-05, "loss": 2.179, "step": 12672 }, { "epoch": 0.025068946674386697, "grad_norm": 0.14207015931606293, "learning_rate": 9.788870528655833e-05, "loss": 2.1935, "step": 12704 }, { "epoch": 0.02513209263578314, "grad_norm": 0.12679685652256012, "learning_rate": 9.788236506219365e-05, "loss": 2.1804, "step": 12736 }, { "epoch": 0.02519523859717958, "grad_norm": 0.13686706125736237, "learning_rate": 9.787602483782895e-05, "loss": 2.1689, "step": 12768 }, { "epoch": 0.02525838455857602, "grad_norm": 0.12256750464439392, "learning_rate": 9.786968461346426e-05, "loss": 2.1587, "step": 12800 }, { "epoch": 0.02532153051997246, "grad_norm": 0.15027447044849396, "learning_rate": 9.786334438909958e-05, "loss": 2.1729, "step": 12832 }, { "epoch": 0.025384676481368898, "grad_norm": 0.132175475358963, "learning_rate": 9.785700416473489e-05, "loss": 2.1735, "step": 12864 }, { "epoch": 0.02544782244276534, "grad_norm": 0.12547612190246582, "learning_rate": 9.785066394037019e-05, "loss": 2.1533, "step": 12896 }, { "epoch": 0.02551096840416178, "grad_norm": 0.13197678327560425, "learning_rate": 9.78443237160055e-05, "loss": 2.1683, "step": 12928 }, { "epoch": 0.02557411436555822, "grad_norm": 0.11401775479316711, "learning_rate": 9.783798349164082e-05, "loss": 2.1549, "step": 12960 }, { "epoch": 0.02563726032695466, "grad_norm": 0.1391398012638092, "learning_rate": 9.783164326727614e-05, "loss": 2.1663, "step": 12992 }, { "epoch": 0.0257004062883511, "grad_norm": 0.13919059932231903, "learning_rate": 9.782530304291144e-05, "loss": 2.1685, "step": 13024 }, { "epoch": 0.02576355224974754, "grad_norm": 0.12351135164499283, "learning_rate": 9.781896281854675e-05, "loss": 2.1502, "step": 13056 }, { "epoch": 0.02582669821114398, "grad_norm": 0.14834736287593842, "learning_rate": 9.781262259418205e-05, "loss": 2.1729, "step": 13088 }, { "epoch": 0.025889844172540418, "grad_norm": 0.128239244222641, "learning_rate": 9.780628236981737e-05, "loss": 2.1727, "step": 13120 }, { "epoch": 0.02595299013393686, "grad_norm": 0.11607243865728378, "learning_rate": 9.779994214545267e-05, "loss": 2.1596, "step": 13152 }, { "epoch": 0.0260161360953333, "grad_norm": 0.13490678369998932, "learning_rate": 9.779360192108798e-05, "loss": 2.1572, "step": 13184 }, { "epoch": 0.02607928205672974, "grad_norm": 0.12663912773132324, "learning_rate": 9.77872616967233e-05, "loss": 2.1667, "step": 13216 }, { "epoch": 0.02614242801812618, "grad_norm": 0.12302709370851517, "learning_rate": 9.778092147235861e-05, "loss": 2.1672, "step": 13248 }, { "epoch": 0.02620557397952262, "grad_norm": 0.11665955930948257, "learning_rate": 9.777458124799391e-05, "loss": 2.1497, "step": 13280 }, { "epoch": 0.02626871994091906, "grad_norm": 0.1355448067188263, "learning_rate": 9.776824102362923e-05, "loss": 2.1525, "step": 13312 }, { "epoch": 0.0263318659023155, "grad_norm": 0.13190394639968872, "learning_rate": 9.776190079926454e-05, "loss": 2.1861, "step": 13344 }, { "epoch": 0.026395011863711938, "grad_norm": 0.11398578435182571, "learning_rate": 9.775556057489986e-05, "loss": 2.1596, "step": 13376 }, { "epoch": 0.02645815782510838, "grad_norm": 0.1408715397119522, "learning_rate": 9.774922035053517e-05, "loss": 2.168, "step": 13408 }, { "epoch": 0.02652130378650482, "grad_norm": 0.11405431479215622, "learning_rate": 9.774288012617047e-05, "loss": 2.1462, "step": 13440 }, { "epoch": 0.02658444974790126, "grad_norm": 0.11981717497110367, "learning_rate": 9.773653990180579e-05, "loss": 2.1588, "step": 13472 }, { "epoch": 0.0266475957092977, "grad_norm": 0.12904761731624603, "learning_rate": 9.773019967744109e-05, "loss": 2.1454, "step": 13504 }, { "epoch": 0.02671074167069414, "grad_norm": 0.1340661495923996, "learning_rate": 9.77238594530764e-05, "loss": 2.1666, "step": 13536 }, { "epoch": 0.02677388763209058, "grad_norm": 0.12820249795913696, "learning_rate": 9.77175192287117e-05, "loss": 2.1488, "step": 13568 }, { "epoch": 0.02683703359348702, "grad_norm": 0.1250142604112625, "learning_rate": 9.771117900434702e-05, "loss": 2.144, "step": 13600 }, { "epoch": 0.026900179554883462, "grad_norm": 0.1425981968641281, "learning_rate": 9.770483877998233e-05, "loss": 2.1547, "step": 13632 }, { "epoch": 0.0269633255162799, "grad_norm": 0.15214186906814575, "learning_rate": 9.769849855561765e-05, "loss": 2.1559, "step": 13664 }, { "epoch": 0.02702647147767634, "grad_norm": 0.13166294991970062, "learning_rate": 9.769215833125295e-05, "loss": 2.141, "step": 13696 }, { "epoch": 0.02708961743907278, "grad_norm": 0.12783321738243103, "learning_rate": 9.768581810688826e-05, "loss": 2.1352, "step": 13728 }, { "epoch": 0.02715276340046922, "grad_norm": 0.1286151260137558, "learning_rate": 9.767947788252358e-05, "loss": 2.1382, "step": 13760 }, { "epoch": 0.02721590936186566, "grad_norm": 0.1499750167131424, "learning_rate": 9.767313765815889e-05, "loss": 2.1579, "step": 13792 }, { "epoch": 0.0272790553232621, "grad_norm": 0.11294298619031906, "learning_rate": 9.766679743379419e-05, "loss": 2.1374, "step": 13824 }, { "epoch": 0.02734220128465854, "grad_norm": 0.13401901721954346, "learning_rate": 9.76604572094295e-05, "loss": 2.1487, "step": 13856 }, { "epoch": 0.027405347246054982, "grad_norm": 0.1361786127090454, "learning_rate": 9.765411698506482e-05, "loss": 2.1131, "step": 13888 }, { "epoch": 0.02746849320745142, "grad_norm": 0.13850685954093933, "learning_rate": 9.764777676070012e-05, "loss": 2.1621, "step": 13920 }, { "epoch": 0.02753163916884786, "grad_norm": 0.12793563306331635, "learning_rate": 9.764143653633542e-05, "loss": 2.1394, "step": 13952 }, { "epoch": 0.0275947851302443, "grad_norm": 0.13138705492019653, "learning_rate": 9.763509631197074e-05, "loss": 2.1425, "step": 13984 }, { "epoch": 0.02765793109164074, "grad_norm": 0.11367518454790115, "learning_rate": 9.762875608760605e-05, "loss": 2.1313, "step": 14016 }, { "epoch": 0.027721077053037183, "grad_norm": 0.12217991054058075, "learning_rate": 9.762241586324137e-05, "loss": 2.1252, "step": 14048 }, { "epoch": 0.02778422301443362, "grad_norm": 0.12337450683116913, "learning_rate": 9.761607563887668e-05, "loss": 2.1275, "step": 14080 }, { "epoch": 0.02784736897583006, "grad_norm": 0.13017290830612183, "learning_rate": 9.760973541451198e-05, "loss": 2.1174, "step": 14112 }, { "epoch": 0.027910514937226502, "grad_norm": 0.12573756277561188, "learning_rate": 9.76033951901473e-05, "loss": 2.1314, "step": 14144 }, { "epoch": 0.02797366089862294, "grad_norm": 0.13559311628341675, "learning_rate": 9.759705496578261e-05, "loss": 2.1253, "step": 14176 }, { "epoch": 0.02803680686001938, "grad_norm": 0.1147247925400734, "learning_rate": 9.759071474141793e-05, "loss": 2.1322, "step": 14208 }, { "epoch": 0.028099952821415822, "grad_norm": 0.14051298797130585, "learning_rate": 9.758437451705323e-05, "loss": 2.1349, "step": 14240 }, { "epoch": 0.02816309878281226, "grad_norm": 0.12124649435281754, "learning_rate": 9.757803429268854e-05, "loss": 2.1138, "step": 14272 }, { "epoch": 0.028226244744208703, "grad_norm": 0.13273552060127258, "learning_rate": 9.757169406832384e-05, "loss": 2.1174, "step": 14304 }, { "epoch": 0.02828939070560514, "grad_norm": 0.13307848572731018, "learning_rate": 9.756535384395916e-05, "loss": 2.1194, "step": 14336 }, { "epoch": 0.02835253666700158, "grad_norm": 0.12077753245830536, "learning_rate": 9.755901361959446e-05, "loss": 2.1104, "step": 14368 }, { "epoch": 0.028415682628398022, "grad_norm": 0.11729800701141357, "learning_rate": 9.755267339522977e-05, "loss": 2.1161, "step": 14400 }, { "epoch": 0.02847882858979446, "grad_norm": 0.1477431207895279, "learning_rate": 9.754633317086509e-05, "loss": 2.1252, "step": 14432 }, { "epoch": 0.028541974551190903, "grad_norm": 0.12713754177093506, "learning_rate": 9.75399929465004e-05, "loss": 2.1255, "step": 14464 }, { "epoch": 0.028605120512587342, "grad_norm": 0.11540602892637253, "learning_rate": 9.75336527221357e-05, "loss": 2.1277, "step": 14496 }, { "epoch": 0.02866826647398378, "grad_norm": 0.14172641932964325, "learning_rate": 9.752731249777102e-05, "loss": 2.1122, "step": 14528 }, { "epoch": 0.028731412435380223, "grad_norm": 0.10793290287256241, "learning_rate": 9.752097227340633e-05, "loss": 2.1159, "step": 14560 }, { "epoch": 0.02879455839677666, "grad_norm": 0.11252595484256744, "learning_rate": 9.751463204904165e-05, "loss": 2.1054, "step": 14592 }, { "epoch": 0.0288577043581731, "grad_norm": 0.1248583123087883, "learning_rate": 9.750829182467695e-05, "loss": 2.1166, "step": 14624 }, { "epoch": 0.028920850319569542, "grad_norm": 0.11714232712984085, "learning_rate": 9.750195160031226e-05, "loss": 2.0992, "step": 14656 }, { "epoch": 0.02898399628096598, "grad_norm": 0.11430927366018295, "learning_rate": 9.749561137594758e-05, "loss": 2.0934, "step": 14688 }, { "epoch": 0.029047142242362423, "grad_norm": 0.11717204004526138, "learning_rate": 9.748927115158288e-05, "loss": 2.1056, "step": 14720 }, { "epoch": 0.029110288203758862, "grad_norm": 0.1542053520679474, "learning_rate": 9.748293092721819e-05, "loss": 2.1077, "step": 14752 }, { "epoch": 0.0291734341651553, "grad_norm": 0.1308835744857788, "learning_rate": 9.747659070285349e-05, "loss": 2.1103, "step": 14784 }, { "epoch": 0.029236580126551743, "grad_norm": 0.10842951387166977, "learning_rate": 9.747025047848881e-05, "loss": 2.109, "step": 14816 }, { "epoch": 0.02929972608794818, "grad_norm": 0.12598951160907745, "learning_rate": 9.746391025412412e-05, "loss": 2.1072, "step": 14848 }, { "epoch": 0.029362872049344624, "grad_norm": 0.10338089615106583, "learning_rate": 9.745757002975944e-05, "loss": 2.1041, "step": 14880 }, { "epoch": 0.029426018010741063, "grad_norm": 0.11851800978183746, "learning_rate": 9.745122980539474e-05, "loss": 2.0945, "step": 14912 }, { "epoch": 0.0294891639721375, "grad_norm": 0.11729994416236877, "learning_rate": 9.744488958103005e-05, "loss": 2.112, "step": 14944 }, { "epoch": 0.029552309933533943, "grad_norm": 0.13360081613063812, "learning_rate": 9.743854935666537e-05, "loss": 2.1067, "step": 14976 }, { "epoch": 0.029615455894930382, "grad_norm": 0.13326188921928406, "learning_rate": 9.743220913230068e-05, "loss": 2.102, "step": 15008 }, { "epoch": 0.02967860185632682, "grad_norm": 0.11109422147274017, "learning_rate": 9.742586890793598e-05, "loss": 2.0932, "step": 15040 }, { "epoch": 0.029741747817723263, "grad_norm": 0.14367780089378357, "learning_rate": 9.74195286835713e-05, "loss": 2.1007, "step": 15072 }, { "epoch": 0.029804893779119702, "grad_norm": 0.12622703611850739, "learning_rate": 9.741318845920661e-05, "loss": 2.1006, "step": 15104 }, { "epoch": 0.029868039740516144, "grad_norm": 0.1098259836435318, "learning_rate": 9.740684823484191e-05, "loss": 2.1098, "step": 15136 }, { "epoch": 0.029931185701912583, "grad_norm": 0.12503789365291595, "learning_rate": 9.740050801047723e-05, "loss": 2.1026, "step": 15168 }, { "epoch": 0.02999433166330902, "grad_norm": 0.12734921276569366, "learning_rate": 9.739416778611253e-05, "loss": 2.0925, "step": 15200 }, { "epoch": 0.030057477624705464, "grad_norm": 0.12283790111541748, "learning_rate": 9.738782756174784e-05, "loss": 2.0946, "step": 15232 }, { "epoch": 0.030120623586101902, "grad_norm": 0.12231511622667313, "learning_rate": 9.738148733738316e-05, "loss": 2.0867, "step": 15264 }, { "epoch": 0.030183769547498344, "grad_norm": 0.1260044127702713, "learning_rate": 9.737514711301846e-05, "loss": 2.0838, "step": 15296 }, { "epoch": 0.030246915508894783, "grad_norm": 0.1352124810218811, "learning_rate": 9.736880688865377e-05, "loss": 2.1022, "step": 15328 }, { "epoch": 0.030310061470291222, "grad_norm": 0.13228322565555573, "learning_rate": 9.736246666428909e-05, "loss": 2.0997, "step": 15360 }, { "epoch": 0.030373207431687664, "grad_norm": 0.11998327076435089, "learning_rate": 9.73561264399244e-05, "loss": 2.0854, "step": 15392 }, { "epoch": 0.030436353393084103, "grad_norm": 0.12817572057247162, "learning_rate": 9.734978621555972e-05, "loss": 2.0972, "step": 15424 }, { "epoch": 0.03049949935448054, "grad_norm": 0.1324385553598404, "learning_rate": 9.734344599119502e-05, "loss": 2.0763, "step": 15456 }, { "epoch": 0.030562645315876984, "grad_norm": 0.13350187242031097, "learning_rate": 9.733710576683033e-05, "loss": 2.1011, "step": 15488 }, { "epoch": 0.030625791277273422, "grad_norm": 0.1624910533428192, "learning_rate": 9.733076554246565e-05, "loss": 2.0783, "step": 15520 }, { "epoch": 0.030688937238669865, "grad_norm": 0.11206740140914917, "learning_rate": 9.732442531810095e-05, "loss": 2.0922, "step": 15552 }, { "epoch": 0.030752083200066303, "grad_norm": 0.14127673208713531, "learning_rate": 9.731808509373625e-05, "loss": 2.0792, "step": 15584 }, { "epoch": 0.030815229161462742, "grad_norm": 0.13760198652744293, "learning_rate": 9.731174486937156e-05, "loss": 2.0921, "step": 15616 }, { "epoch": 0.030878375122859184, "grad_norm": 0.14275386929512024, "learning_rate": 9.730540464500688e-05, "loss": 2.0867, "step": 15648 }, { "epoch": 0.030941521084255623, "grad_norm": 0.12568217515945435, "learning_rate": 9.729906442064219e-05, "loss": 2.1021, "step": 15680 }, { "epoch": 0.031004667045652065, "grad_norm": 0.10878398269414902, "learning_rate": 9.72927241962775e-05, "loss": 2.0805, "step": 15712 }, { "epoch": 0.031067813007048504, "grad_norm": 0.11523103713989258, "learning_rate": 9.728638397191281e-05, "loss": 2.0612, "step": 15744 }, { "epoch": 0.031130958968444943, "grad_norm": 0.12170586735010147, "learning_rate": 9.728004374754812e-05, "loss": 2.0731, "step": 15776 }, { "epoch": 0.031194104929841385, "grad_norm": 0.12763361632823944, "learning_rate": 9.727370352318344e-05, "loss": 2.0939, "step": 15808 }, { "epoch": 0.03125725089123783, "grad_norm": 0.13741229474544525, "learning_rate": 9.726736329881874e-05, "loss": 2.0912, "step": 15840 }, { "epoch": 0.03132039685263426, "grad_norm": 0.12540291249752045, "learning_rate": 9.726102307445405e-05, "loss": 2.0721, "step": 15872 }, { "epoch": 0.031383542814030704, "grad_norm": 0.11759001761674881, "learning_rate": 9.725468285008937e-05, "loss": 2.0875, "step": 15904 }, { "epoch": 0.031446688775427147, "grad_norm": 0.1085490733385086, "learning_rate": 9.724834262572468e-05, "loss": 2.0747, "step": 15936 }, { "epoch": 0.03150983473682358, "grad_norm": 0.11623985320329666, "learning_rate": 9.724200240135998e-05, "loss": 2.078, "step": 15968 }, { "epoch": 0.031572980698220024, "grad_norm": 0.11528050899505615, "learning_rate": 9.723566217699528e-05, "loss": 2.0874, "step": 16000 }, { "epoch": 0.031636126659616466, "grad_norm": 0.12489989399909973, "learning_rate": 9.72293219526306e-05, "loss": 2.0722, "step": 16032 }, { "epoch": 0.0316992726210129, "grad_norm": 0.11752578616142273, "learning_rate": 9.722298172826591e-05, "loss": 2.0719, "step": 16064 }, { "epoch": 0.031762418582409344, "grad_norm": 0.14901821315288544, "learning_rate": 9.721664150390123e-05, "loss": 2.0599, "step": 16096 }, { "epoch": 0.031825564543805786, "grad_norm": 0.11683712154626846, "learning_rate": 9.721030127953653e-05, "loss": 2.0638, "step": 16128 }, { "epoch": 0.03188871050520222, "grad_norm": 0.13710802793502808, "learning_rate": 9.720396105517184e-05, "loss": 2.0547, "step": 16160 }, { "epoch": 0.03195185646659866, "grad_norm": 0.12909361720085144, "learning_rate": 9.719762083080716e-05, "loss": 2.0644, "step": 16192 }, { "epoch": 0.032015002427995105, "grad_norm": 0.12128318846225739, "learning_rate": 9.719128060644247e-05, "loss": 2.0633, "step": 16224 }, { "epoch": 0.03207814838939155, "grad_norm": 0.1090121939778328, "learning_rate": 9.718494038207777e-05, "loss": 2.0816, "step": 16256 }, { "epoch": 0.03214129435078798, "grad_norm": 0.11851469427347183, "learning_rate": 9.717860015771309e-05, "loss": 2.0671, "step": 16288 }, { "epoch": 0.032204440312184425, "grad_norm": 0.11412264406681061, "learning_rate": 9.71722599333484e-05, "loss": 2.0629, "step": 16320 }, { "epoch": 0.03226758627358087, "grad_norm": 0.11426562815904617, "learning_rate": 9.716591970898372e-05, "loss": 2.0619, "step": 16352 }, { "epoch": 0.0323307322349773, "grad_norm": 0.1323927342891693, "learning_rate": 9.715957948461902e-05, "loss": 2.0627, "step": 16384 }, { "epoch": 0.032393878196373745, "grad_norm": 0.12511026859283447, "learning_rate": 9.715323926025432e-05, "loss": 2.0607, "step": 16416 }, { "epoch": 0.03245702415777019, "grad_norm": 0.14868119359016418, "learning_rate": 9.714689903588963e-05, "loss": 2.0559, "step": 16448 }, { "epoch": 0.03252017011916662, "grad_norm": 0.14075885713100433, "learning_rate": 9.714055881152495e-05, "loss": 2.0768, "step": 16480 }, { "epoch": 0.032583316080563064, "grad_norm": 0.13771525025367737, "learning_rate": 9.713421858716025e-05, "loss": 2.0716, "step": 16512 }, { "epoch": 0.032646462041959506, "grad_norm": 0.11929049342870712, "learning_rate": 9.712787836279556e-05, "loss": 2.0606, "step": 16544 }, { "epoch": 0.03270960800335594, "grad_norm": 0.11700630187988281, "learning_rate": 9.712153813843088e-05, "loss": 2.0737, "step": 16576 }, { "epoch": 0.032772753964752384, "grad_norm": 0.12586750090122223, "learning_rate": 9.711519791406619e-05, "loss": 2.0771, "step": 16608 }, { "epoch": 0.032835899926148826, "grad_norm": 0.10553145408630371, "learning_rate": 9.71088576897015e-05, "loss": 2.0423, "step": 16640 }, { "epoch": 0.03289904588754527, "grad_norm": 0.1436128169298172, "learning_rate": 9.710251746533681e-05, "loss": 2.0559, "step": 16672 }, { "epoch": 0.0329621918489417, "grad_norm": 0.1168614998459816, "learning_rate": 9.709617724097212e-05, "loss": 2.0443, "step": 16704 }, { "epoch": 0.033025337810338146, "grad_norm": 0.12289905548095703, "learning_rate": 9.708983701660744e-05, "loss": 2.056, "step": 16736 }, { "epoch": 0.03308848377173459, "grad_norm": 0.1273569017648697, "learning_rate": 9.708349679224274e-05, "loss": 2.0509, "step": 16768 }, { "epoch": 0.03315162973313102, "grad_norm": 0.11731128394603729, "learning_rate": 9.707715656787805e-05, "loss": 2.0505, "step": 16800 }, { "epoch": 0.033214775694527465, "grad_norm": 0.11746280640363693, "learning_rate": 9.707081634351335e-05, "loss": 2.0482, "step": 16832 }, { "epoch": 0.03327792165592391, "grad_norm": 0.12205459177494049, "learning_rate": 9.706447611914867e-05, "loss": 2.0475, "step": 16864 }, { "epoch": 0.03334106761732034, "grad_norm": 0.12475898116827011, "learning_rate": 9.705813589478398e-05, "loss": 2.0579, "step": 16896 }, { "epoch": 0.033404213578716785, "grad_norm": 0.11365223675966263, "learning_rate": 9.705179567041928e-05, "loss": 2.056, "step": 16928 }, { "epoch": 0.03346735954011323, "grad_norm": 0.11358797550201416, "learning_rate": 9.70454554460546e-05, "loss": 2.0318, "step": 16960 }, { "epoch": 0.03353050550150966, "grad_norm": 0.1325814425945282, "learning_rate": 9.703911522168991e-05, "loss": 2.0621, "step": 16992 }, { "epoch": 0.033593651462906104, "grad_norm": 0.1368623822927475, "learning_rate": 9.703277499732523e-05, "loss": 2.0488, "step": 17024 }, { "epoch": 0.03365679742430255, "grad_norm": 0.1259676218032837, "learning_rate": 9.702643477296053e-05, "loss": 2.0363, "step": 17056 }, { "epoch": 0.03371994338569899, "grad_norm": 0.11567375808954239, "learning_rate": 9.702009454859584e-05, "loss": 2.0385, "step": 17088 }, { "epoch": 0.033783089347095424, "grad_norm": 0.12076389789581299, "learning_rate": 9.701375432423116e-05, "loss": 2.0652, "step": 17120 }, { "epoch": 0.033846235308491866, "grad_norm": 0.11648164689540863, "learning_rate": 9.700741409986647e-05, "loss": 2.0465, "step": 17152 }, { "epoch": 0.03390938126988831, "grad_norm": 0.12384187430143356, "learning_rate": 9.700107387550177e-05, "loss": 2.0479, "step": 17184 }, { "epoch": 0.033972527231284744, "grad_norm": 0.11063065379858017, "learning_rate": 9.699473365113709e-05, "loss": 2.0426, "step": 17216 }, { "epoch": 0.034035673192681186, "grad_norm": 0.11873957514762878, "learning_rate": 9.698839342677239e-05, "loss": 2.0488, "step": 17248 }, { "epoch": 0.03409881915407763, "grad_norm": 0.12810015678405762, "learning_rate": 9.69820532024077e-05, "loss": 2.0444, "step": 17280 }, { "epoch": 0.03416196511547406, "grad_norm": 0.11562523245811462, "learning_rate": 9.697571297804302e-05, "loss": 2.0455, "step": 17312 }, { "epoch": 0.034225111076870506, "grad_norm": 0.12042046338319778, "learning_rate": 9.696937275367832e-05, "loss": 2.0547, "step": 17344 }, { "epoch": 0.03428825703826695, "grad_norm": 0.10553345084190369, "learning_rate": 9.696303252931363e-05, "loss": 2.0426, "step": 17376 }, { "epoch": 0.03435140299966338, "grad_norm": 0.12902826070785522, "learning_rate": 9.695669230494895e-05, "loss": 2.0638, "step": 17408 }, { "epoch": 0.034414548961059825, "grad_norm": 0.11707167327404022, "learning_rate": 9.695035208058426e-05, "loss": 2.0423, "step": 17440 }, { "epoch": 0.03447769492245627, "grad_norm": 0.13379773497581482, "learning_rate": 9.694401185621956e-05, "loss": 2.0388, "step": 17472 }, { "epoch": 0.03454084088385271, "grad_norm": 0.1197654977440834, "learning_rate": 9.693767163185488e-05, "loss": 2.028, "step": 17504 }, { "epoch": 0.034603986845249145, "grad_norm": 0.11672238260507584, "learning_rate": 9.693133140749019e-05, "loss": 2.023, "step": 17536 }, { "epoch": 0.03466713280664559, "grad_norm": 0.11840718239545822, "learning_rate": 9.692499118312551e-05, "loss": 2.0377, "step": 17568 }, { "epoch": 0.03473027876804203, "grad_norm": 0.1268356293439865, "learning_rate": 9.691865095876081e-05, "loss": 2.0193, "step": 17600 }, { "epoch": 0.034793424729438464, "grad_norm": 0.1247047707438469, "learning_rate": 9.691231073439612e-05, "loss": 2.0511, "step": 17632 }, { "epoch": 0.03485657069083491, "grad_norm": 0.13573023676872253, "learning_rate": 9.690597051003142e-05, "loss": 2.0541, "step": 17664 }, { "epoch": 0.03491971665223135, "grad_norm": 0.12114041298627853, "learning_rate": 9.689963028566674e-05, "loss": 2.0323, "step": 17696 }, { "epoch": 0.034982862613627784, "grad_norm": 0.12819577753543854, "learning_rate": 9.689329006130204e-05, "loss": 2.0265, "step": 17728 }, { "epoch": 0.035046008575024226, "grad_norm": 0.11567926406860352, "learning_rate": 9.688694983693735e-05, "loss": 2.0435, "step": 17760 }, { "epoch": 0.03510915453642067, "grad_norm": 0.13010470569133759, "learning_rate": 9.688060961257267e-05, "loss": 2.0298, "step": 17792 }, { "epoch": 0.035172300497817104, "grad_norm": 0.14122366905212402, "learning_rate": 9.687426938820798e-05, "loss": 2.0302, "step": 17824 }, { "epoch": 0.035235446459213546, "grad_norm": 0.11040329188108444, "learning_rate": 9.686792916384328e-05, "loss": 2.0237, "step": 17856 }, { "epoch": 0.03529859242060999, "grad_norm": 0.1371491402387619, "learning_rate": 9.68615889394786e-05, "loss": 2.0594, "step": 17888 }, { "epoch": 0.03536173838200643, "grad_norm": 0.11435120552778244, "learning_rate": 9.685524871511391e-05, "loss": 2.0307, "step": 17920 }, { "epoch": 0.035424884343402865, "grad_norm": 0.11154137551784515, "learning_rate": 9.684890849074923e-05, "loss": 2.03, "step": 17952 }, { "epoch": 0.03548803030479931, "grad_norm": 0.13120387494564056, "learning_rate": 9.684256826638454e-05, "loss": 2.0287, "step": 17984 }, { "epoch": 0.03555117626619575, "grad_norm": 0.11339680105447769, "learning_rate": 9.683622804201984e-05, "loss": 2.0382, "step": 18016 }, { "epoch": 0.035614322227592185, "grad_norm": 0.10982314497232437, "learning_rate": 9.682988781765516e-05, "loss": 2.0297, "step": 18048 }, { "epoch": 0.03567746818898863, "grad_norm": 0.12217368185520172, "learning_rate": 9.682354759329046e-05, "loss": 2.0192, "step": 18080 }, { "epoch": 0.03574061415038507, "grad_norm": 0.11636436730623245, "learning_rate": 9.681720736892577e-05, "loss": 2.0114, "step": 18112 }, { "epoch": 0.035803760111781505, "grad_norm": 0.11972784250974655, "learning_rate": 9.681086714456107e-05, "loss": 2.0184, "step": 18144 }, { "epoch": 0.03586690607317795, "grad_norm": 0.13137874007225037, "learning_rate": 9.680452692019639e-05, "loss": 2.0184, "step": 18176 }, { "epoch": 0.03593005203457439, "grad_norm": 0.11658748239278793, "learning_rate": 9.67981866958317e-05, "loss": 2.0203, "step": 18208 }, { "epoch": 0.035993197995970824, "grad_norm": 0.1266217827796936, "learning_rate": 9.679184647146702e-05, "loss": 2.0241, "step": 18240 }, { "epoch": 0.036056343957367266, "grad_norm": 0.12376082688570023, "learning_rate": 9.678550624710232e-05, "loss": 2.0128, "step": 18272 }, { "epoch": 0.03611948991876371, "grad_norm": 0.12112032622098923, "learning_rate": 9.677916602273763e-05, "loss": 2.0316, "step": 18304 }, { "epoch": 0.03618263588016015, "grad_norm": 0.11909458041191101, "learning_rate": 9.677282579837295e-05, "loss": 2.0278, "step": 18336 }, { "epoch": 0.036245781841556586, "grad_norm": 0.12725473940372467, "learning_rate": 9.676648557400826e-05, "loss": 2.0218, "step": 18368 }, { "epoch": 0.03630892780295303, "grad_norm": 0.11885615438222885, "learning_rate": 9.676014534964356e-05, "loss": 2.0056, "step": 18400 }, { "epoch": 0.03637207376434947, "grad_norm": 0.11043411493301392, "learning_rate": 9.675380512527888e-05, "loss": 2.0259, "step": 18432 }, { "epoch": 0.036435219725745906, "grad_norm": 0.12948209047317505, "learning_rate": 9.674746490091418e-05, "loss": 2.0014, "step": 18464 }, { "epoch": 0.03649836568714235, "grad_norm": 0.12479124963283539, "learning_rate": 9.67411246765495e-05, "loss": 2.0164, "step": 18496 }, { "epoch": 0.03656151164853879, "grad_norm": 0.11851473152637482, "learning_rate": 9.67347844521848e-05, "loss": 2.0192, "step": 18528 }, { "epoch": 0.036624657609935225, "grad_norm": 0.12374136596918106, "learning_rate": 9.672844422782011e-05, "loss": 2.0157, "step": 18560 }, { "epoch": 0.03668780357133167, "grad_norm": 0.11049731075763702, "learning_rate": 9.672210400345542e-05, "loss": 1.9978, "step": 18592 }, { "epoch": 0.03675094953272811, "grad_norm": 0.12011417001485825, "learning_rate": 9.671576377909074e-05, "loss": 2.019, "step": 18624 }, { "epoch": 0.036814095494124545, "grad_norm": 0.14559400081634521, "learning_rate": 9.670942355472605e-05, "loss": 2.027, "step": 18656 }, { "epoch": 0.03687724145552099, "grad_norm": 0.13493861258029938, "learning_rate": 9.670308333036135e-05, "loss": 2.0104, "step": 18688 }, { "epoch": 0.03694038741691743, "grad_norm": 0.11162148416042328, "learning_rate": 9.669674310599667e-05, "loss": 2.0195, "step": 18720 }, { "epoch": 0.037003533378313865, "grad_norm": 0.1158028393983841, "learning_rate": 9.669040288163198e-05, "loss": 2.0086, "step": 18752 }, { "epoch": 0.03706667933971031, "grad_norm": 0.10774479806423187, "learning_rate": 9.66840626572673e-05, "loss": 2.0136, "step": 18784 }, { "epoch": 0.03712982530110675, "grad_norm": 0.11519818007946014, "learning_rate": 9.66777224329026e-05, "loss": 2.0052, "step": 18816 }, { "epoch": 0.03719297126250319, "grad_norm": 0.12130032479763031, "learning_rate": 9.667138220853791e-05, "loss": 2.0088, "step": 18848 }, { "epoch": 0.037256117223899626, "grad_norm": 0.12328891456127167, "learning_rate": 9.666504198417321e-05, "loss": 2.0259, "step": 18880 }, { "epoch": 0.03731926318529607, "grad_norm": 0.12909002602100372, "learning_rate": 9.665870175980853e-05, "loss": 2.0058, "step": 18912 }, { "epoch": 0.03738240914669251, "grad_norm": 0.11103764921426773, "learning_rate": 9.665236153544383e-05, "loss": 2.0082, "step": 18944 }, { "epoch": 0.037445555108088946, "grad_norm": 0.13746172189712524, "learning_rate": 9.664602131107915e-05, "loss": 2.0122, "step": 18976 }, { "epoch": 0.03750870106948539, "grad_norm": 0.13963700830936432, "learning_rate": 9.663968108671446e-05, "loss": 2.0104, "step": 19008 }, { "epoch": 0.03757184703088183, "grad_norm": 0.11920302361249924, "learning_rate": 9.663334086234977e-05, "loss": 2.029, "step": 19040 }, { "epoch": 0.037634992992278266, "grad_norm": 0.11948902159929276, "learning_rate": 9.662700063798508e-05, "loss": 2.0033, "step": 19072 }, { "epoch": 0.03769813895367471, "grad_norm": 0.11687491834163666, "learning_rate": 9.662066041362039e-05, "loss": 1.998, "step": 19104 }, { "epoch": 0.03776128491507115, "grad_norm": 0.12691032886505127, "learning_rate": 9.66143201892557e-05, "loss": 2.0032, "step": 19136 }, { "epoch": 0.037824430876467585, "grad_norm": 0.11522775888442993, "learning_rate": 9.660797996489102e-05, "loss": 2.0181, "step": 19168 }, { "epoch": 0.03788757683786403, "grad_norm": 0.1483718752861023, "learning_rate": 9.660163974052632e-05, "loss": 2.0104, "step": 19200 }, { "epoch": 0.03795072279926047, "grad_norm": 0.12110782414674759, "learning_rate": 9.659529951616163e-05, "loss": 2.0086, "step": 19232 }, { "epoch": 0.03801386876065691, "grad_norm": 0.1225944310426712, "learning_rate": 9.658895929179695e-05, "loss": 2.0026, "step": 19264 }, { "epoch": 0.03807701472205335, "grad_norm": 0.11542484164237976, "learning_rate": 9.658261906743225e-05, "loss": 1.9814, "step": 19296 }, { "epoch": 0.03814016068344979, "grad_norm": 0.1180296316742897, "learning_rate": 9.657627884306756e-05, "loss": 1.9988, "step": 19328 }, { "epoch": 0.03820330664484623, "grad_norm": 0.11563392728567123, "learning_rate": 9.656993861870287e-05, "loss": 1.9985, "step": 19360 }, { "epoch": 0.03826645260624267, "grad_norm": 0.1254437267780304, "learning_rate": 9.656359839433818e-05, "loss": 2.0047, "step": 19392 }, { "epoch": 0.03832959856763911, "grad_norm": 0.11752956360578537, "learning_rate": 9.65572581699735e-05, "loss": 1.9895, "step": 19424 }, { "epoch": 0.03839274452903555, "grad_norm": 0.14189307391643524, "learning_rate": 9.655091794560881e-05, "loss": 1.9955, "step": 19456 }, { "epoch": 0.038455890490431986, "grad_norm": 0.12572942674160004, "learning_rate": 9.654457772124411e-05, "loss": 2.0009, "step": 19488 }, { "epoch": 0.03851903645182843, "grad_norm": 0.12132708728313446, "learning_rate": 9.653823749687942e-05, "loss": 1.9865, "step": 19520 }, { "epoch": 0.03858218241322487, "grad_norm": 0.12943652272224426, "learning_rate": 9.653189727251474e-05, "loss": 2.0015, "step": 19552 }, { "epoch": 0.038645328374621306, "grad_norm": 0.12172172963619232, "learning_rate": 9.652555704815005e-05, "loss": 1.9924, "step": 19584 }, { "epoch": 0.03870847433601775, "grad_norm": 0.10879118740558624, "learning_rate": 9.651921682378536e-05, "loss": 1.9977, "step": 19616 }, { "epoch": 0.03877162029741419, "grad_norm": 0.1369899958372116, "learning_rate": 9.651287659942067e-05, "loss": 1.996, "step": 19648 }, { "epoch": 0.03883476625881063, "grad_norm": 0.12542742490768433, "learning_rate": 9.650653637505598e-05, "loss": 1.9921, "step": 19680 }, { "epoch": 0.03889791222020707, "grad_norm": 0.13500872254371643, "learning_rate": 9.650019615069129e-05, "loss": 1.9998, "step": 19712 }, { "epoch": 0.03896105818160351, "grad_norm": 0.11500067263841629, "learning_rate": 9.649385592632659e-05, "loss": 1.9864, "step": 19744 }, { "epoch": 0.03902420414299995, "grad_norm": 0.14147840440273285, "learning_rate": 9.64875157019619e-05, "loss": 1.9939, "step": 19776 }, { "epoch": 0.03908735010439639, "grad_norm": 0.11108619719743729, "learning_rate": 9.648117547759722e-05, "loss": 2.0023, "step": 19808 }, { "epoch": 0.03915049606579283, "grad_norm": 0.1156499832868576, "learning_rate": 9.647483525323253e-05, "loss": 2.0081, "step": 19840 }, { "epoch": 0.03921364202718927, "grad_norm": 0.11189933866262436, "learning_rate": 9.646849502886783e-05, "loss": 1.9745, "step": 19872 }, { "epoch": 0.03927678798858571, "grad_norm": 0.13415177166461945, "learning_rate": 9.646215480450315e-05, "loss": 1.9928, "step": 19904 }, { "epoch": 0.03933993394998215, "grad_norm": 0.11870824545621872, "learning_rate": 9.645581458013846e-05, "loss": 2.0, "step": 19936 }, { "epoch": 0.03940307991137859, "grad_norm": 0.11301769316196442, "learning_rate": 9.644947435577377e-05, "loss": 1.971, "step": 19968 }, { "epoch": 0.039466225872775026, "grad_norm": 0.11457088589668274, "learning_rate": 9.644313413140909e-05, "loss": 1.986, "step": 20000 }, { "epoch": 0.03952937183417147, "grad_norm": 0.12496864050626755, "learning_rate": 9.643679390704439e-05, "loss": 2.0052, "step": 20032 }, { "epoch": 0.03959251779556791, "grad_norm": 0.10965562611818314, "learning_rate": 9.64304536826797e-05, "loss": 1.9727, "step": 20064 }, { "epoch": 0.03965566375696435, "grad_norm": 0.1069643646478653, "learning_rate": 9.642411345831502e-05, "loss": 1.9937, "step": 20096 }, { "epoch": 0.03971880971836079, "grad_norm": 0.11410249024629593, "learning_rate": 9.641777323395032e-05, "loss": 1.9865, "step": 20128 }, { "epoch": 0.03978195567975723, "grad_norm": 0.12830950319766998, "learning_rate": 9.641143300958562e-05, "loss": 1.9844, "step": 20160 }, { "epoch": 0.03984510164115367, "grad_norm": 0.12240003794431686, "learning_rate": 9.640509278522094e-05, "loss": 1.9719, "step": 20192 }, { "epoch": 0.03990824760255011, "grad_norm": 0.1246711015701294, "learning_rate": 9.639875256085625e-05, "loss": 1.9975, "step": 20224 }, { "epoch": 0.03997139356394655, "grad_norm": 0.10353072732686996, "learning_rate": 9.639241233649156e-05, "loss": 1.9852, "step": 20256 }, { "epoch": 0.04003453952534299, "grad_norm": 0.10986755043268204, "learning_rate": 9.638607211212687e-05, "loss": 1.9754, "step": 20288 }, { "epoch": 0.04009768548673943, "grad_norm": 0.12473009526729584, "learning_rate": 9.637973188776218e-05, "loss": 1.9844, "step": 20320 }, { "epoch": 0.04016083144813587, "grad_norm": 0.12376633286476135, "learning_rate": 9.63733916633975e-05, "loss": 1.9733, "step": 20352 }, { "epoch": 0.04022397740953231, "grad_norm": 0.12706255912780762, "learning_rate": 9.636705143903281e-05, "loss": 1.9858, "step": 20384 }, { "epoch": 0.04028712337092875, "grad_norm": 0.1247047558426857, "learning_rate": 9.636071121466811e-05, "loss": 1.9652, "step": 20416 }, { "epoch": 0.04035026933232519, "grad_norm": 0.13165326416492462, "learning_rate": 9.635437099030343e-05, "loss": 1.9796, "step": 20448 }, { "epoch": 0.04041341529372163, "grad_norm": 0.12569890916347504, "learning_rate": 9.634803076593874e-05, "loss": 1.977, "step": 20480 }, { "epoch": 0.040476561255118074, "grad_norm": 0.10226519405841827, "learning_rate": 9.634169054157405e-05, "loss": 1.9805, "step": 20512 }, { "epoch": 0.04053970721651451, "grad_norm": 0.11636253446340561, "learning_rate": 9.633535031720936e-05, "loss": 2.0009, "step": 20544 }, { "epoch": 0.04060285317791095, "grad_norm": 0.12742649018764496, "learning_rate": 9.632901009284466e-05, "loss": 1.9942, "step": 20576 }, { "epoch": 0.04066599913930739, "grad_norm": 0.13119980692863464, "learning_rate": 9.632266986847997e-05, "loss": 1.981, "step": 20608 }, { "epoch": 0.04072914510070383, "grad_norm": 0.11325479298830032, "learning_rate": 9.631632964411529e-05, "loss": 1.9585, "step": 20640 }, { "epoch": 0.04079229106210027, "grad_norm": 0.10867073386907578, "learning_rate": 9.63099894197506e-05, "loss": 1.9908, "step": 20672 }, { "epoch": 0.04085543702349671, "grad_norm": 0.12300091981887817, "learning_rate": 9.63036491953859e-05, "loss": 1.9766, "step": 20704 }, { "epoch": 0.04091858298489315, "grad_norm": 0.10913708806037903, "learning_rate": 9.629730897102122e-05, "loss": 1.986, "step": 20736 }, { "epoch": 0.04098172894628959, "grad_norm": 0.12194447964429855, "learning_rate": 9.629096874665653e-05, "loss": 1.9827, "step": 20768 }, { "epoch": 0.04104487490768603, "grad_norm": 0.11287178844213486, "learning_rate": 9.628462852229184e-05, "loss": 1.9894, "step": 20800 }, { "epoch": 0.04110802086908247, "grad_norm": 0.1337476372718811, "learning_rate": 9.627828829792715e-05, "loss": 1.9593, "step": 20832 }, { "epoch": 0.04117116683047891, "grad_norm": 0.1178978905081749, "learning_rate": 9.627194807356246e-05, "loss": 1.9828, "step": 20864 }, { "epoch": 0.04123431279187535, "grad_norm": 0.11720005422830582, "learning_rate": 9.626560784919777e-05, "loss": 1.9865, "step": 20896 }, { "epoch": 0.041297458753271794, "grad_norm": 0.1083127111196518, "learning_rate": 9.625926762483308e-05, "loss": 1.9826, "step": 20928 }, { "epoch": 0.04136060471466823, "grad_norm": 0.10887587070465088, "learning_rate": 9.625292740046839e-05, "loss": 1.9741, "step": 20960 }, { "epoch": 0.04142375067606467, "grad_norm": 0.12325512617826462, "learning_rate": 9.624658717610369e-05, "loss": 1.9762, "step": 20992 }, { "epoch": 0.041486896637461114, "grad_norm": 0.11215290427207947, "learning_rate": 9.6240246951739e-05, "loss": 1.9804, "step": 21024 }, { "epoch": 0.04155004259885755, "grad_norm": 0.12300876528024673, "learning_rate": 9.623390672737432e-05, "loss": 1.9713, "step": 21056 }, { "epoch": 0.04161318856025399, "grad_norm": 0.10765711218118668, "learning_rate": 9.622756650300962e-05, "loss": 1.9738, "step": 21088 }, { "epoch": 0.041676334521650434, "grad_norm": 0.1282142698764801, "learning_rate": 9.622122627864494e-05, "loss": 1.9588, "step": 21120 }, { "epoch": 0.04173948048304687, "grad_norm": 0.12258365750312805, "learning_rate": 9.621488605428025e-05, "loss": 1.9814, "step": 21152 }, { "epoch": 0.04180262644444331, "grad_norm": 0.12109114229679108, "learning_rate": 9.620854582991557e-05, "loss": 1.9573, "step": 21184 }, { "epoch": 0.04186577240583975, "grad_norm": 0.11835681647062302, "learning_rate": 9.620220560555087e-05, "loss": 1.9777, "step": 21216 }, { "epoch": 0.04192891836723619, "grad_norm": 0.11301271617412567, "learning_rate": 9.619586538118618e-05, "loss": 1.9748, "step": 21248 }, { "epoch": 0.04199206432863263, "grad_norm": 0.11349284648895264, "learning_rate": 9.61895251568215e-05, "loss": 1.9781, "step": 21280 }, { "epoch": 0.04205521029002907, "grad_norm": 0.11842041462659836, "learning_rate": 9.618318493245681e-05, "loss": 1.9841, "step": 21312 }, { "epoch": 0.042118356251425515, "grad_norm": 0.11143847554922104, "learning_rate": 9.617684470809211e-05, "loss": 1.9714, "step": 21344 }, { "epoch": 0.04218150221282195, "grad_norm": 0.10669038444757462, "learning_rate": 9.617050448372743e-05, "loss": 1.974, "step": 21376 }, { "epoch": 0.04224464817421839, "grad_norm": 0.103308767080307, "learning_rate": 9.616416425936273e-05, "loss": 1.9596, "step": 21408 }, { "epoch": 0.042307794135614835, "grad_norm": 0.10760737955570221, "learning_rate": 9.615782403499804e-05, "loss": 1.9776, "step": 21440 }, { "epoch": 0.04237094009701127, "grad_norm": 0.11961367726325989, "learning_rate": 9.615148381063336e-05, "loss": 1.9557, "step": 21472 }, { "epoch": 0.04243408605840771, "grad_norm": 0.12137002497911453, "learning_rate": 9.614514358626866e-05, "loss": 1.9654, "step": 21504 }, { "epoch": 0.042497232019804154, "grad_norm": 0.10979246348142624, "learning_rate": 9.613880336190397e-05, "loss": 1.9636, "step": 21536 }, { "epoch": 0.04256037798120059, "grad_norm": 0.12187566608190536, "learning_rate": 9.613246313753929e-05, "loss": 1.9586, "step": 21568 }, { "epoch": 0.04262352394259703, "grad_norm": 0.11190672963857651, "learning_rate": 9.61261229131746e-05, "loss": 1.9561, "step": 21600 }, { "epoch": 0.042686669903993474, "grad_norm": 0.12588316202163696, "learning_rate": 9.61197826888099e-05, "loss": 1.959, "step": 21632 }, { "epoch": 0.04274981586538991, "grad_norm": 0.11184147745370865, "learning_rate": 9.611344246444522e-05, "loss": 1.9643, "step": 21664 }, { "epoch": 0.04281296182678635, "grad_norm": 0.1143326535820961, "learning_rate": 9.610710224008053e-05, "loss": 1.9755, "step": 21696 }, { "epoch": 0.04287610778818279, "grad_norm": 0.1150774285197258, "learning_rate": 9.610076201571585e-05, "loss": 1.967, "step": 21728 }, { "epoch": 0.042939253749579236, "grad_norm": 0.11972521990537643, "learning_rate": 9.609442179135115e-05, "loss": 1.9678, "step": 21760 }, { "epoch": 0.04300239971097567, "grad_norm": 0.12995310127735138, "learning_rate": 9.608808156698646e-05, "loss": 1.958, "step": 21792 }, { "epoch": 0.04306554567237211, "grad_norm": 0.13119225203990936, "learning_rate": 9.608174134262176e-05, "loss": 1.9775, "step": 21824 }, { "epoch": 0.043128691633768555, "grad_norm": 0.11164345592260361, "learning_rate": 9.607540111825708e-05, "loss": 1.9784, "step": 21856 }, { "epoch": 0.04319183759516499, "grad_norm": 0.1257980614900589, "learning_rate": 9.606906089389238e-05, "loss": 1.9688, "step": 21888 }, { "epoch": 0.04325498355656143, "grad_norm": 0.11268804967403412, "learning_rate": 9.606272066952769e-05, "loss": 1.967, "step": 21920 }, { "epoch": 0.043318129517957875, "grad_norm": 0.1409463733434677, "learning_rate": 9.6056380445163e-05, "loss": 1.9557, "step": 21952 }, { "epoch": 0.04338127547935431, "grad_norm": 0.126288503408432, "learning_rate": 9.605004022079832e-05, "loss": 1.9611, "step": 21984 }, { "epoch": 0.04344442144075075, "grad_norm": 0.10051508992910385, "learning_rate": 9.604369999643364e-05, "loss": 1.9593, "step": 22016 }, { "epoch": 0.043507567402147194, "grad_norm": 0.12255176156759262, "learning_rate": 9.603735977206894e-05, "loss": 1.9688, "step": 22048 }, { "epoch": 0.04357071336354363, "grad_norm": 0.11303877085447311, "learning_rate": 9.603101954770425e-05, "loss": 1.9406, "step": 22080 }, { "epoch": 0.04363385932494007, "grad_norm": 0.11862237006425858, "learning_rate": 9.602467932333957e-05, "loss": 1.9634, "step": 22112 }, { "epoch": 0.043697005286336514, "grad_norm": 0.10821133852005005, "learning_rate": 9.601833909897488e-05, "loss": 1.9418, "step": 22144 }, { "epoch": 0.043760151247732956, "grad_norm": 0.1072334423661232, "learning_rate": 9.601199887461018e-05, "loss": 1.9793, "step": 22176 }, { "epoch": 0.04382329720912939, "grad_norm": 0.10535736382007599, "learning_rate": 9.60056586502455e-05, "loss": 1.9575, "step": 22208 }, { "epoch": 0.043886443170525834, "grad_norm": 0.12884929776191711, "learning_rate": 9.59993184258808e-05, "loss": 1.9629, "step": 22240 }, { "epoch": 0.043949589131922276, "grad_norm": 0.11832892894744873, "learning_rate": 9.599297820151611e-05, "loss": 1.9699, "step": 22272 }, { "epoch": 0.04401273509331871, "grad_norm": 0.14499302208423615, "learning_rate": 9.598663797715141e-05, "loss": 1.9592, "step": 22304 }, { "epoch": 0.04407588105471515, "grad_norm": 0.11090567708015442, "learning_rate": 9.598029775278673e-05, "loss": 1.9468, "step": 22336 }, { "epoch": 0.044139027016111596, "grad_norm": 0.11581122875213623, "learning_rate": 9.597395752842204e-05, "loss": 1.9616, "step": 22368 }, { "epoch": 0.04420217297750803, "grad_norm": 0.11382453143596649, "learning_rate": 9.596761730405736e-05, "loss": 1.9456, "step": 22400 }, { "epoch": 0.04426531893890447, "grad_norm": 0.12293533235788345, "learning_rate": 9.596127707969266e-05, "loss": 1.9452, "step": 22432 }, { "epoch": 0.044328464900300915, "grad_norm": 0.13024349510669708, "learning_rate": 9.595493685532797e-05, "loss": 1.9593, "step": 22464 }, { "epoch": 0.04439161086169735, "grad_norm": 0.14722859859466553, "learning_rate": 9.594859663096329e-05, "loss": 1.9746, "step": 22496 }, { "epoch": 0.04445475682309379, "grad_norm": 0.10557448118925095, "learning_rate": 9.59422564065986e-05, "loss": 1.9641, "step": 22528 }, { "epoch": 0.044517902784490235, "grad_norm": 0.11203256249427795, "learning_rate": 9.59359161822339e-05, "loss": 1.9505, "step": 22560 }, { "epoch": 0.04458104874588668, "grad_norm": 0.11080003529787064, "learning_rate": 9.592957595786922e-05, "loss": 1.966, "step": 22592 }, { "epoch": 0.04464419470728311, "grad_norm": 0.10656057298183441, "learning_rate": 9.592323573350452e-05, "loss": 1.9584, "step": 22624 }, { "epoch": 0.044707340668679554, "grad_norm": 0.12340083718299866, "learning_rate": 9.591689550913983e-05, "loss": 1.9526, "step": 22656 }, { "epoch": 0.044770486630076, "grad_norm": 0.1045255959033966, "learning_rate": 9.591055528477515e-05, "loss": 1.9646, "step": 22688 }, { "epoch": 0.04483363259147243, "grad_norm": 0.09781979024410248, "learning_rate": 9.590421506041045e-05, "loss": 1.9531, "step": 22720 }, { "epoch": 0.044896778552868874, "grad_norm": 0.11920382082462311, "learning_rate": 9.589787483604576e-05, "loss": 1.9566, "step": 22752 }, { "epoch": 0.044959924514265316, "grad_norm": 0.10923431068658829, "learning_rate": 9.589153461168108e-05, "loss": 1.9396, "step": 22784 }, { "epoch": 0.04502307047566175, "grad_norm": 0.1006297841668129, "learning_rate": 9.588519438731639e-05, "loss": 1.9499, "step": 22816 }, { "epoch": 0.045086216437058194, "grad_norm": 0.11739280074834824, "learning_rate": 9.587885416295169e-05, "loss": 1.9578, "step": 22848 }, { "epoch": 0.045149362398454636, "grad_norm": 0.12821663916110992, "learning_rate": 9.5872513938587e-05, "loss": 1.9512, "step": 22880 }, { "epoch": 0.04521250835985107, "grad_norm": 0.13252002000808716, "learning_rate": 9.586617371422232e-05, "loss": 1.9356, "step": 22912 }, { "epoch": 0.04527565432124751, "grad_norm": 0.12858210504055023, "learning_rate": 9.585983348985764e-05, "loss": 1.9307, "step": 22944 }, { "epoch": 0.045338800282643955, "grad_norm": 0.10175346583127975, "learning_rate": 9.585349326549294e-05, "loss": 1.9372, "step": 22976 }, { "epoch": 0.0454019462440404, "grad_norm": 0.1078830286860466, "learning_rate": 9.584715304112825e-05, "loss": 1.9599, "step": 23008 }, { "epoch": 0.04546509220543683, "grad_norm": 0.10003285855054855, "learning_rate": 9.584081281676355e-05, "loss": 1.9467, "step": 23040 }, { "epoch": 0.045528238166833275, "grad_norm": 0.10893139243125916, "learning_rate": 9.583447259239887e-05, "loss": 1.9467, "step": 23072 }, { "epoch": 0.04559138412822972, "grad_norm": 0.10260356217622757, "learning_rate": 9.582813236803417e-05, "loss": 1.9394, "step": 23104 }, { "epoch": 0.04565453008962615, "grad_norm": 0.11526516079902649, "learning_rate": 9.582179214366948e-05, "loss": 1.9511, "step": 23136 }, { "epoch": 0.045717676051022595, "grad_norm": 0.12039726227521896, "learning_rate": 9.58154519193048e-05, "loss": 1.9502, "step": 23168 }, { "epoch": 0.04578082201241904, "grad_norm": 0.10691642761230469, "learning_rate": 9.580911169494011e-05, "loss": 1.937, "step": 23200 }, { "epoch": 0.04584396797381547, "grad_norm": 0.11702313274145126, "learning_rate": 9.580277147057541e-05, "loss": 1.9532, "step": 23232 }, { "epoch": 0.045907113935211914, "grad_norm": 0.11025272309780121, "learning_rate": 9.579643124621073e-05, "loss": 1.9378, "step": 23264 }, { "epoch": 0.045970259896608356, "grad_norm": 0.10566860437393188, "learning_rate": 9.579009102184604e-05, "loss": 1.9355, "step": 23296 }, { "epoch": 0.04603340585800479, "grad_norm": 0.1200597733259201, "learning_rate": 9.578375079748136e-05, "loss": 1.9406, "step": 23328 }, { "epoch": 0.046096551819401234, "grad_norm": 0.10709843784570694, "learning_rate": 9.577741057311667e-05, "loss": 1.9403, "step": 23360 }, { "epoch": 0.046159697780797676, "grad_norm": 0.11283406615257263, "learning_rate": 9.577107034875197e-05, "loss": 1.9302, "step": 23392 }, { "epoch": 0.04622284374219412, "grad_norm": 0.1264004111289978, "learning_rate": 9.576473012438729e-05, "loss": 1.9477, "step": 23424 }, { "epoch": 0.046285989703590553, "grad_norm": 0.09890617430210114, "learning_rate": 9.575838990002259e-05, "loss": 1.929, "step": 23456 }, { "epoch": 0.046349135664986996, "grad_norm": 0.11972219496965408, "learning_rate": 9.57520496756579e-05, "loss": 1.9466, "step": 23488 }, { "epoch": 0.04641228162638344, "grad_norm": 0.11952702701091766, "learning_rate": 9.57457094512932e-05, "loss": 1.9369, "step": 23520 }, { "epoch": 0.04647542758777987, "grad_norm": 0.12452511489391327, "learning_rate": 9.573936922692852e-05, "loss": 1.9306, "step": 23552 }, { "epoch": 0.046538573549176315, "grad_norm": 0.14350657165050507, "learning_rate": 9.573302900256383e-05, "loss": 1.925, "step": 23584 }, { "epoch": 0.04660171951057276, "grad_norm": 0.10996013134717941, "learning_rate": 9.572668877819915e-05, "loss": 1.9475, "step": 23616 }, { "epoch": 0.04666486547196919, "grad_norm": 0.12992681562900543, "learning_rate": 9.572034855383445e-05, "loss": 1.9342, "step": 23648 }, { "epoch": 0.046728011433365635, "grad_norm": 0.10513056814670563, "learning_rate": 9.571400832946976e-05, "loss": 1.9313, "step": 23680 }, { "epoch": 0.04679115739476208, "grad_norm": 0.11533654481172562, "learning_rate": 9.570766810510508e-05, "loss": 1.9481, "step": 23712 }, { "epoch": 0.04685430335615851, "grad_norm": 0.10715834051370621, "learning_rate": 9.570132788074039e-05, "loss": 1.9474, "step": 23744 }, { "epoch": 0.046917449317554955, "grad_norm": 0.11807551980018616, "learning_rate": 9.569498765637569e-05, "loss": 1.9461, "step": 23776 }, { "epoch": 0.0469805952789514, "grad_norm": 0.10314300656318665, "learning_rate": 9.568864743201101e-05, "loss": 1.9291, "step": 23808 }, { "epoch": 0.04704374124034784, "grad_norm": 0.1296030431985855, "learning_rate": 9.568230720764632e-05, "loss": 1.9255, "step": 23840 }, { "epoch": 0.047106887201744274, "grad_norm": 0.1086842343211174, "learning_rate": 9.567596698328162e-05, "loss": 1.9336, "step": 23872 }, { "epoch": 0.047170033163140716, "grad_norm": 0.1167302206158638, "learning_rate": 9.566962675891692e-05, "loss": 1.9315, "step": 23904 }, { "epoch": 0.04723317912453716, "grad_norm": 0.11264488846063614, "learning_rate": 9.566328653455224e-05, "loss": 1.9334, "step": 23936 }, { "epoch": 0.047296325085933594, "grad_norm": 0.10670772939920425, "learning_rate": 9.565694631018755e-05, "loss": 1.927, "step": 23968 }, { "epoch": 0.047359471047330036, "grad_norm": 0.11302254348993301, "learning_rate": 9.565060608582287e-05, "loss": 1.9355, "step": 24000 }, { "epoch": 0.04742261700872648, "grad_norm": 0.1167798787355423, "learning_rate": 9.564426586145818e-05, "loss": 1.9291, "step": 24032 }, { "epoch": 0.04748576297012291, "grad_norm": 0.11112770438194275, "learning_rate": 9.563792563709348e-05, "loss": 1.9323, "step": 24064 }, { "epoch": 0.047548908931519356, "grad_norm": 0.10099230706691742, "learning_rate": 9.56315854127288e-05, "loss": 1.9368, "step": 24096 }, { "epoch": 0.0476120548929158, "grad_norm": 0.11324863880872726, "learning_rate": 9.562524518836411e-05, "loss": 1.9417, "step": 24128 }, { "epoch": 0.04767520085431223, "grad_norm": 0.1250937134027481, "learning_rate": 9.561890496399943e-05, "loss": 1.9231, "step": 24160 }, { "epoch": 0.047738346815708675, "grad_norm": 0.13605114817619324, "learning_rate": 9.561256473963473e-05, "loss": 1.9371, "step": 24192 }, { "epoch": 0.04780149277710512, "grad_norm": 0.10294576734304428, "learning_rate": 9.560622451527004e-05, "loss": 1.9365, "step": 24224 }, { "epoch": 0.04786463873850156, "grad_norm": 0.10928324609994888, "learning_rate": 9.559988429090536e-05, "loss": 1.9221, "step": 24256 }, { "epoch": 0.047927784699897995, "grad_norm": 0.11262323707342148, "learning_rate": 9.559354406654066e-05, "loss": 1.9337, "step": 24288 }, { "epoch": 0.04799093066129444, "grad_norm": 0.10129617899656296, "learning_rate": 9.558720384217596e-05, "loss": 1.9378, "step": 24320 }, { "epoch": 0.04805407662269088, "grad_norm": 0.1110493540763855, "learning_rate": 9.558086361781127e-05, "loss": 1.9252, "step": 24352 }, { "epoch": 0.048117222584087314, "grad_norm": 0.11660345643758774, "learning_rate": 9.557452339344659e-05, "loss": 1.9259, "step": 24384 }, { "epoch": 0.04818036854548376, "grad_norm": 0.11428438872098923, "learning_rate": 9.55681831690819e-05, "loss": 1.9261, "step": 24416 }, { "epoch": 0.0482435145068802, "grad_norm": 0.12056509405374527, "learning_rate": 9.55618429447172e-05, "loss": 1.9343, "step": 24448 }, { "epoch": 0.048306660468276634, "grad_norm": 0.14168789982795715, "learning_rate": 9.555550272035252e-05, "loss": 1.9282, "step": 24480 }, { "epoch": 0.048369806429673076, "grad_norm": 0.11102975159883499, "learning_rate": 9.554916249598783e-05, "loss": 1.9352, "step": 24512 }, { "epoch": 0.04843295239106952, "grad_norm": 0.11062134057283401, "learning_rate": 9.554282227162315e-05, "loss": 1.9253, "step": 24544 }, { "epoch": 0.048496098352465954, "grad_norm": 0.10571175068616867, "learning_rate": 9.553648204725845e-05, "loss": 1.9398, "step": 24576 }, { "epoch": 0.048559244313862396, "grad_norm": 0.11620599776506424, "learning_rate": 9.553014182289376e-05, "loss": 1.9344, "step": 24608 }, { "epoch": 0.04862239027525884, "grad_norm": 0.11041609942913055, "learning_rate": 9.552380159852908e-05, "loss": 1.9211, "step": 24640 }, { "epoch": 0.04868553623665528, "grad_norm": 0.1092856228351593, "learning_rate": 9.551746137416439e-05, "loss": 1.9293, "step": 24672 }, { "epoch": 0.048748682198051715, "grad_norm": 0.12742461264133453, "learning_rate": 9.551112114979969e-05, "loss": 1.9154, "step": 24704 }, { "epoch": 0.04881182815944816, "grad_norm": 0.11318569630384445, "learning_rate": 9.5504780925435e-05, "loss": 1.9275, "step": 24736 }, { "epoch": 0.0488749741208446, "grad_norm": 0.10278608649969101, "learning_rate": 9.549844070107031e-05, "loss": 1.9182, "step": 24768 }, { "epoch": 0.048938120082241035, "grad_norm": 0.10825817286968231, "learning_rate": 9.549210047670562e-05, "loss": 1.924, "step": 24800 }, { "epoch": 0.04900126604363748, "grad_norm": 0.13008153438568115, "learning_rate": 9.548576025234094e-05, "loss": 1.9356, "step": 24832 }, { "epoch": 0.04906441200503392, "grad_norm": 0.0987807959318161, "learning_rate": 9.547942002797624e-05, "loss": 1.9309, "step": 24864 }, { "epoch": 0.049127557966430355, "grad_norm": 0.12102729082107544, "learning_rate": 9.547307980361155e-05, "loss": 1.9162, "step": 24896 }, { "epoch": 0.0491907039278268, "grad_norm": 0.10271728038787842, "learning_rate": 9.546673957924687e-05, "loss": 1.9239, "step": 24928 }, { "epoch": 0.04925384988922324, "grad_norm": 0.11824354529380798, "learning_rate": 9.546039935488218e-05, "loss": 1.9357, "step": 24960 }, { "epoch": 0.049316995850619674, "grad_norm": 0.10709060728549957, "learning_rate": 9.545405913051748e-05, "loss": 1.9299, "step": 24992 }, { "epoch": 0.049380141812016116, "grad_norm": 0.10140976309776306, "learning_rate": 9.54477189061528e-05, "loss": 1.9323, "step": 25024 }, { "epoch": 0.04944328777341256, "grad_norm": 0.11804775148630142, "learning_rate": 9.544137868178811e-05, "loss": 1.9181, "step": 25056 }, { "epoch": 0.049506433734809, "grad_norm": 0.11727722734212875, "learning_rate": 9.543503845742343e-05, "loss": 1.9206, "step": 25088 }, { "epoch": 0.049569579696205436, "grad_norm": 0.10891515016555786, "learning_rate": 9.542869823305873e-05, "loss": 1.9103, "step": 25120 }, { "epoch": 0.04963272565760188, "grad_norm": 0.11480802297592163, "learning_rate": 9.542235800869403e-05, "loss": 1.916, "step": 25152 }, { "epoch": 0.04969587161899832, "grad_norm": 0.1161581426858902, "learning_rate": 9.541601778432934e-05, "loss": 1.9142, "step": 25184 }, { "epoch": 0.049759017580394756, "grad_norm": 0.11386236548423767, "learning_rate": 9.540967755996466e-05, "loss": 1.9098, "step": 25216 }, { "epoch": 0.0498221635417912, "grad_norm": 0.1235729232430458, "learning_rate": 9.540333733559996e-05, "loss": 1.9319, "step": 25248 }, { "epoch": 0.04988530950318764, "grad_norm": 0.1056947186589241, "learning_rate": 9.539699711123527e-05, "loss": 1.9405, "step": 25280 }, { "epoch": 0.049948455464584075, "grad_norm": 0.11457525193691254, "learning_rate": 9.539065688687059e-05, "loss": 1.9017, "step": 25312 }, { "epoch": 0.05001160142598052, "grad_norm": 0.11316017061471939, "learning_rate": 9.53843166625059e-05, "loss": 1.9172, "step": 25344 }, { "epoch": 0.05007474738737696, "grad_norm": 0.10579028725624084, "learning_rate": 9.537797643814122e-05, "loss": 1.9146, "step": 25376 }, { "epoch": 0.050137893348773395, "grad_norm": 0.09902004897594452, "learning_rate": 9.537163621377652e-05, "loss": 1.9151, "step": 25408 }, { "epoch": 0.05020103931016984, "grad_norm": 0.11914989352226257, "learning_rate": 9.536529598941183e-05, "loss": 1.922, "step": 25440 }, { "epoch": 0.05026418527156628, "grad_norm": 0.10532984137535095, "learning_rate": 9.535895576504715e-05, "loss": 1.915, "step": 25472 }, { "epoch": 0.05032733123296272, "grad_norm": 0.11634562164545059, "learning_rate": 9.535261554068245e-05, "loss": 1.9147, "step": 25504 }, { "epoch": 0.05039047719435916, "grad_norm": 0.11276385188102722, "learning_rate": 9.534627531631776e-05, "loss": 1.9116, "step": 25536 }, { "epoch": 0.0504536231557556, "grad_norm": 0.10931231826543808, "learning_rate": 9.533993509195306e-05, "loss": 1.8955, "step": 25568 }, { "epoch": 0.05051676911715204, "grad_norm": 0.10813896358013153, "learning_rate": 9.533359486758838e-05, "loss": 1.9097, "step": 25600 }, { "epoch": 0.050579915078548476, "grad_norm": 0.11851253360509872, "learning_rate": 9.532725464322369e-05, "loss": 1.9201, "step": 25632 }, { "epoch": 0.05064306103994492, "grad_norm": 0.09754260629415512, "learning_rate": 9.5320914418859e-05, "loss": 1.9143, "step": 25664 }, { "epoch": 0.05070620700134136, "grad_norm": 0.09885375201702118, "learning_rate": 9.531457419449431e-05, "loss": 1.9091, "step": 25696 }, { "epoch": 0.050769352962737796, "grad_norm": 0.10297214984893799, "learning_rate": 9.530823397012962e-05, "loss": 1.9143, "step": 25728 }, { "epoch": 0.05083249892413424, "grad_norm": 0.10813931375741959, "learning_rate": 9.530189374576494e-05, "loss": 1.9143, "step": 25760 }, { "epoch": 0.05089564488553068, "grad_norm": 0.10659068822860718, "learning_rate": 9.529555352140024e-05, "loss": 1.91, "step": 25792 }, { "epoch": 0.050958790846927116, "grad_norm": 0.12919165194034576, "learning_rate": 9.528921329703555e-05, "loss": 1.9225, "step": 25824 }, { "epoch": 0.05102193680832356, "grad_norm": 0.10619667917490005, "learning_rate": 9.528287307267087e-05, "loss": 1.9228, "step": 25856 }, { "epoch": 0.05108508276972, "grad_norm": 0.10115159302949905, "learning_rate": 9.527653284830618e-05, "loss": 1.9056, "step": 25888 }, { "epoch": 0.05114822873111644, "grad_norm": 0.11068226397037506, "learning_rate": 9.527019262394148e-05, "loss": 1.918, "step": 25920 }, { "epoch": 0.05121137469251288, "grad_norm": 0.11327261477708817, "learning_rate": 9.52638523995768e-05, "loss": 1.9259, "step": 25952 }, { "epoch": 0.05127452065390932, "grad_norm": 0.1283046156167984, "learning_rate": 9.52575121752121e-05, "loss": 1.9046, "step": 25984 }, { "epoch": 0.05133766661530576, "grad_norm": 0.10593913495540619, "learning_rate": 9.525117195084741e-05, "loss": 1.8981, "step": 26016 }, { "epoch": 0.0514008125767022, "grad_norm": 0.11951139569282532, "learning_rate": 9.524483172648273e-05, "loss": 1.9145, "step": 26048 }, { "epoch": 0.05146395853809864, "grad_norm": 0.10877706110477448, "learning_rate": 9.523849150211803e-05, "loss": 1.9183, "step": 26080 }, { "epoch": 0.05152710449949508, "grad_norm": 0.10846201330423355, "learning_rate": 9.523215127775334e-05, "loss": 1.9038, "step": 26112 }, { "epoch": 0.05159025046089152, "grad_norm": 0.10390051454305649, "learning_rate": 9.522581105338866e-05, "loss": 1.9048, "step": 26144 }, { "epoch": 0.05165339642228796, "grad_norm": 0.12031883001327515, "learning_rate": 9.521947082902397e-05, "loss": 1.9125, "step": 26176 }, { "epoch": 0.0517165423836844, "grad_norm": 0.10653280466794968, "learning_rate": 9.521313060465927e-05, "loss": 1.908, "step": 26208 }, { "epoch": 0.051779688345080836, "grad_norm": 0.12093240022659302, "learning_rate": 9.520679038029459e-05, "loss": 1.9114, "step": 26240 }, { "epoch": 0.05184283430647728, "grad_norm": 0.1351175308227539, "learning_rate": 9.52004501559299e-05, "loss": 1.8928, "step": 26272 }, { "epoch": 0.05190598026787372, "grad_norm": 0.11259331554174423, "learning_rate": 9.519410993156522e-05, "loss": 1.9057, "step": 26304 }, { "epoch": 0.051969126229270156, "grad_norm": 0.11353620886802673, "learning_rate": 9.518776970720052e-05, "loss": 1.9012, "step": 26336 }, { "epoch": 0.0520322721906666, "grad_norm": 0.0989617332816124, "learning_rate": 9.518142948283583e-05, "loss": 1.9019, "step": 26368 }, { "epoch": 0.05209541815206304, "grad_norm": 0.10656723380088806, "learning_rate": 9.517508925847113e-05, "loss": 1.9125, "step": 26400 }, { "epoch": 0.05215856411345948, "grad_norm": 0.1056458055973053, "learning_rate": 9.516874903410645e-05, "loss": 1.9088, "step": 26432 }, { "epoch": 0.05222171007485592, "grad_norm": 0.11590812355279922, "learning_rate": 9.516240880974175e-05, "loss": 1.8987, "step": 26464 }, { "epoch": 0.05228485603625236, "grad_norm": 0.11630557477474213, "learning_rate": 9.515606858537706e-05, "loss": 1.9037, "step": 26496 }, { "epoch": 0.0523480019976488, "grad_norm": 0.1062152162194252, "learning_rate": 9.514972836101238e-05, "loss": 1.9047, "step": 26528 }, { "epoch": 0.05241114795904524, "grad_norm": 0.11292682588100433, "learning_rate": 9.51433881366477e-05, "loss": 1.9205, "step": 26560 }, { "epoch": 0.05247429392044168, "grad_norm": 0.11707049608230591, "learning_rate": 9.5137047912283e-05, "loss": 1.9085, "step": 26592 }, { "epoch": 0.05253743988183812, "grad_norm": 0.10360373556613922, "learning_rate": 9.513070768791831e-05, "loss": 1.8961, "step": 26624 }, { "epoch": 0.05260058584323456, "grad_norm": 0.10200996696949005, "learning_rate": 9.512436746355362e-05, "loss": 1.8976, "step": 26656 }, { "epoch": 0.052663731804631, "grad_norm": 0.09945542365312576, "learning_rate": 9.511802723918894e-05, "loss": 1.9122, "step": 26688 }, { "epoch": 0.05272687776602744, "grad_norm": 0.10186757892370224, "learning_rate": 9.511168701482425e-05, "loss": 1.9053, "step": 26720 }, { "epoch": 0.052790023727423876, "grad_norm": 0.11728771030902863, "learning_rate": 9.510534679045955e-05, "loss": 1.901, "step": 26752 }, { "epoch": 0.05285316968882032, "grad_norm": 0.10647526383399963, "learning_rate": 9.509900656609485e-05, "loss": 1.8989, "step": 26784 }, { "epoch": 0.05291631565021676, "grad_norm": 0.11470351368188858, "learning_rate": 9.509266634173017e-05, "loss": 1.8961, "step": 26816 }, { "epoch": 0.0529794616116132, "grad_norm": 0.11885487288236618, "learning_rate": 9.508632611736548e-05, "loss": 1.9105, "step": 26848 }, { "epoch": 0.05304260757300964, "grad_norm": 0.10806205868721008, "learning_rate": 9.507998589300078e-05, "loss": 1.8858, "step": 26880 }, { "epoch": 0.05310575353440608, "grad_norm": 0.10871472209692001, "learning_rate": 9.50736456686361e-05, "loss": 1.8853, "step": 26912 }, { "epoch": 0.05316889949580252, "grad_norm": 0.10162289440631866, "learning_rate": 9.506730544427141e-05, "loss": 1.8977, "step": 26944 }, { "epoch": 0.05323204545719896, "grad_norm": 0.11214316636323929, "learning_rate": 9.506096521990673e-05, "loss": 1.8947, "step": 26976 }, { "epoch": 0.0532951914185954, "grad_norm": 0.09722857177257538, "learning_rate": 9.505462499554203e-05, "loss": 1.8828, "step": 27008 }, { "epoch": 0.05335833737999184, "grad_norm": 0.10414082556962967, "learning_rate": 9.504828477117734e-05, "loss": 1.8992, "step": 27040 }, { "epoch": 0.05342148334138828, "grad_norm": 0.11235885322093964, "learning_rate": 9.504194454681266e-05, "loss": 1.8978, "step": 27072 }, { "epoch": 0.05348462930278472, "grad_norm": 0.10151505470275879, "learning_rate": 9.503560432244797e-05, "loss": 1.8798, "step": 27104 }, { "epoch": 0.05354777526418116, "grad_norm": 0.09740497916936874, "learning_rate": 9.502926409808327e-05, "loss": 1.8949, "step": 27136 }, { "epoch": 0.0536109212255776, "grad_norm": 0.10710079967975616, "learning_rate": 9.502292387371859e-05, "loss": 1.9016, "step": 27168 }, { "epoch": 0.05367406718697404, "grad_norm": 0.12225481122732162, "learning_rate": 9.501658364935389e-05, "loss": 1.8886, "step": 27200 }, { "epoch": 0.05373721314837048, "grad_norm": 0.10491971671581268, "learning_rate": 9.50102434249892e-05, "loss": 1.8994, "step": 27232 }, { "epoch": 0.053800359109766924, "grad_norm": 0.11765529215335846, "learning_rate": 9.50039032006245e-05, "loss": 1.9054, "step": 27264 }, { "epoch": 0.05386350507116336, "grad_norm": 0.10463310033082962, "learning_rate": 9.499756297625982e-05, "loss": 1.895, "step": 27296 }, { "epoch": 0.0539266510325598, "grad_norm": 0.10823880881071091, "learning_rate": 9.499122275189513e-05, "loss": 1.9042, "step": 27328 }, { "epoch": 0.05398979699395624, "grad_norm": 0.09078743308782578, "learning_rate": 9.498488252753045e-05, "loss": 1.8862, "step": 27360 }, { "epoch": 0.05405294295535268, "grad_norm": 0.11342493444681168, "learning_rate": 9.497854230316576e-05, "loss": 1.9163, "step": 27392 }, { "epoch": 0.05411608891674912, "grad_norm": 0.1100945919752121, "learning_rate": 9.497220207880106e-05, "loss": 1.9048, "step": 27424 }, { "epoch": 0.05417923487814556, "grad_norm": 0.11827767640352249, "learning_rate": 9.496586185443638e-05, "loss": 1.8797, "step": 27456 }, { "epoch": 0.054242380839542, "grad_norm": 0.11936810612678528, "learning_rate": 9.49595216300717e-05, "loss": 1.908, "step": 27488 }, { "epoch": 0.05430552680093844, "grad_norm": 0.10007096827030182, "learning_rate": 9.495318140570701e-05, "loss": 1.8897, "step": 27520 }, { "epoch": 0.05436867276233488, "grad_norm": 0.09597699344158173, "learning_rate": 9.494684118134231e-05, "loss": 1.8686, "step": 27552 }, { "epoch": 0.05443181872373132, "grad_norm": 0.11114552617073059, "learning_rate": 9.494050095697762e-05, "loss": 1.8969, "step": 27584 }, { "epoch": 0.05449496468512776, "grad_norm": 0.09870576858520508, "learning_rate": 9.493416073261293e-05, "loss": 1.9002, "step": 27616 }, { "epoch": 0.0545581106465242, "grad_norm": 0.10398736596107483, "learning_rate": 9.492782050824824e-05, "loss": 1.8967, "step": 27648 }, { "epoch": 0.054621256607920644, "grad_norm": 0.10817091166973114, "learning_rate": 9.492148028388354e-05, "loss": 1.9013, "step": 27680 }, { "epoch": 0.05468440256931708, "grad_norm": 0.11018930375576019, "learning_rate": 9.491514005951886e-05, "loss": 1.891, "step": 27712 }, { "epoch": 0.05474754853071352, "grad_norm": 0.11235178261995316, "learning_rate": 9.490879983515417e-05, "loss": 1.8873, "step": 27744 }, { "epoch": 0.054810694492109964, "grad_norm": 0.12674926221370697, "learning_rate": 9.490245961078948e-05, "loss": 1.8918, "step": 27776 }, { "epoch": 0.0548738404535064, "grad_norm": 0.10456321388483047, "learning_rate": 9.489611938642479e-05, "loss": 1.8827, "step": 27808 }, { "epoch": 0.05493698641490284, "grad_norm": 0.10634982585906982, "learning_rate": 9.48897791620601e-05, "loss": 1.9104, "step": 27840 }, { "epoch": 0.055000132376299284, "grad_norm": 0.10933510214090347, "learning_rate": 9.488343893769541e-05, "loss": 1.8867, "step": 27872 }, { "epoch": 0.05506327833769572, "grad_norm": 0.10465336591005325, "learning_rate": 9.487709871333073e-05, "loss": 1.8801, "step": 27904 }, { "epoch": 0.05512642429909216, "grad_norm": 0.11155618727207184, "learning_rate": 9.487075848896604e-05, "loss": 1.8933, "step": 27936 }, { "epoch": 0.0551895702604886, "grad_norm": 0.10860616713762283, "learning_rate": 9.486441826460134e-05, "loss": 1.9008, "step": 27968 }, { "epoch": 0.05525271622188504, "grad_norm": 0.11804484575986862, "learning_rate": 9.485807804023666e-05, "loss": 1.9105, "step": 28000 }, { "epoch": 0.05531586218328148, "grad_norm": 0.09635041654109955, "learning_rate": 9.485173781587196e-05, "loss": 1.8917, "step": 28032 }, { "epoch": 0.05537900814467792, "grad_norm": 0.10530688613653183, "learning_rate": 9.484539759150727e-05, "loss": 1.9062, "step": 28064 }, { "epoch": 0.055442154106074365, "grad_norm": 0.11746163666248322, "learning_rate": 9.483905736714258e-05, "loss": 1.8866, "step": 28096 }, { "epoch": 0.0555053000674708, "grad_norm": 0.11865773051977158, "learning_rate": 9.483271714277789e-05, "loss": 1.8951, "step": 28128 }, { "epoch": 0.05556844602886724, "grad_norm": 0.10170378535985947, "learning_rate": 9.48263769184132e-05, "loss": 1.8927, "step": 28160 }, { "epoch": 0.055631591990263685, "grad_norm": 0.10263235121965408, "learning_rate": 9.482003669404852e-05, "loss": 1.8792, "step": 28192 }, { "epoch": 0.05569473795166012, "grad_norm": 0.10327958315610886, "learning_rate": 9.481369646968382e-05, "loss": 1.8872, "step": 28224 }, { "epoch": 0.05575788391305656, "grad_norm": 0.10997182130813599, "learning_rate": 9.480735624531913e-05, "loss": 1.8944, "step": 28256 }, { "epoch": 0.055821029874453004, "grad_norm": 0.11584657430648804, "learning_rate": 9.480101602095445e-05, "loss": 1.8791, "step": 28288 }, { "epoch": 0.05588417583584944, "grad_norm": 0.10891609638929367, "learning_rate": 9.479467579658976e-05, "loss": 1.8887, "step": 28320 }, { "epoch": 0.05594732179724588, "grad_norm": 0.1165160983800888, "learning_rate": 9.478833557222507e-05, "loss": 1.8915, "step": 28352 }, { "epoch": 0.056010467758642324, "grad_norm": 0.09597717225551605, "learning_rate": 9.478199534786038e-05, "loss": 1.8957, "step": 28384 }, { "epoch": 0.05607361372003876, "grad_norm": 0.11800549924373627, "learning_rate": 9.47756551234957e-05, "loss": 1.8891, "step": 28416 }, { "epoch": 0.0561367596814352, "grad_norm": 0.11111108958721161, "learning_rate": 9.4769314899131e-05, "loss": 1.8804, "step": 28448 }, { "epoch": 0.056199905642831643, "grad_norm": 0.10995693504810333, "learning_rate": 9.47629746747663e-05, "loss": 1.8816, "step": 28480 }, { "epoch": 0.056263051604228086, "grad_norm": 0.11199373006820679, "learning_rate": 9.475663445040161e-05, "loss": 1.8914, "step": 28512 }, { "epoch": 0.05632619756562452, "grad_norm": 0.10174044966697693, "learning_rate": 9.475029422603693e-05, "loss": 1.8792, "step": 28544 }, { "epoch": 0.05638934352702096, "grad_norm": 0.11624758690595627, "learning_rate": 9.474395400167224e-05, "loss": 1.8874, "step": 28576 }, { "epoch": 0.056452489488417405, "grad_norm": 0.11394298076629639, "learning_rate": 9.473761377730755e-05, "loss": 1.8796, "step": 28608 }, { "epoch": 0.05651563544981384, "grad_norm": 0.11033052206039429, "learning_rate": 9.473127355294286e-05, "loss": 1.8838, "step": 28640 }, { "epoch": 0.05657878141121028, "grad_norm": 0.10413506627082825, "learning_rate": 9.472493332857817e-05, "loss": 1.8771, "step": 28672 }, { "epoch": 0.056641927372606725, "grad_norm": 0.09666626900434494, "learning_rate": 9.471859310421348e-05, "loss": 1.8729, "step": 28704 }, { "epoch": 0.05670507333400316, "grad_norm": 0.11766234785318375, "learning_rate": 9.47122528798488e-05, "loss": 1.8831, "step": 28736 }, { "epoch": 0.0567682192953996, "grad_norm": 0.11246274411678314, "learning_rate": 9.47059126554841e-05, "loss": 1.8915, "step": 28768 }, { "epoch": 0.056831365256796045, "grad_norm": 0.10445088893175125, "learning_rate": 9.469957243111941e-05, "loss": 1.8941, "step": 28800 }, { "epoch": 0.05689451121819248, "grad_norm": 0.10135059803724289, "learning_rate": 9.469323220675473e-05, "loss": 1.8897, "step": 28832 }, { "epoch": 0.05695765717958892, "grad_norm": 0.10946794599294662, "learning_rate": 9.468689198239003e-05, "loss": 1.8911, "step": 28864 }, { "epoch": 0.057020803140985364, "grad_norm": 0.11228682845830917, "learning_rate": 9.468055175802533e-05, "loss": 1.8852, "step": 28896 }, { "epoch": 0.057083949102381806, "grad_norm": 0.11112036556005478, "learning_rate": 9.467421153366065e-05, "loss": 1.9012, "step": 28928 }, { "epoch": 0.05714709506377824, "grad_norm": 0.10750123113393784, "learning_rate": 9.466787130929596e-05, "loss": 1.8819, "step": 28960 }, { "epoch": 0.057210241025174684, "grad_norm": 0.1083349660038948, "learning_rate": 9.466153108493128e-05, "loss": 1.8892, "step": 28992 }, { "epoch": 0.057273386986571126, "grad_norm": 0.09778472036123276, "learning_rate": 9.465519086056658e-05, "loss": 1.8827, "step": 29024 }, { "epoch": 0.05733653294796756, "grad_norm": 0.10571292042732239, "learning_rate": 9.464885063620189e-05, "loss": 1.8831, "step": 29056 }, { "epoch": 0.057399678909364, "grad_norm": 0.10290931165218353, "learning_rate": 9.46425104118372e-05, "loss": 1.8693, "step": 29088 }, { "epoch": 0.057462824870760446, "grad_norm": 0.11763163655996323, "learning_rate": 9.463617018747252e-05, "loss": 1.8697, "step": 29120 }, { "epoch": 0.05752597083215688, "grad_norm": 0.10329300910234451, "learning_rate": 9.462982996310782e-05, "loss": 1.8864, "step": 29152 }, { "epoch": 0.05758911679355332, "grad_norm": 0.11135107278823853, "learning_rate": 9.462348973874314e-05, "loss": 1.8804, "step": 29184 }, { "epoch": 0.057652262754949765, "grad_norm": 0.10401959717273712, "learning_rate": 9.461714951437845e-05, "loss": 1.892, "step": 29216 }, { "epoch": 0.0577154087163462, "grad_norm": 0.10142754018306732, "learning_rate": 9.461080929001376e-05, "loss": 1.884, "step": 29248 }, { "epoch": 0.05777855467774264, "grad_norm": 0.10389091074466705, "learning_rate": 9.460446906564907e-05, "loss": 1.8801, "step": 29280 }, { "epoch": 0.057841700639139085, "grad_norm": 0.10583070665597916, "learning_rate": 9.459812884128437e-05, "loss": 1.8735, "step": 29312 }, { "epoch": 0.05790484660053553, "grad_norm": 0.10557013750076294, "learning_rate": 9.459178861691968e-05, "loss": 1.8728, "step": 29344 }, { "epoch": 0.05796799256193196, "grad_norm": 0.10017932206392288, "learning_rate": 9.4585448392555e-05, "loss": 1.8893, "step": 29376 }, { "epoch": 0.058031138523328404, "grad_norm": 0.10164113342761993, "learning_rate": 9.457910816819031e-05, "loss": 1.8814, "step": 29408 }, { "epoch": 0.05809428448472485, "grad_norm": 0.10548080503940582, "learning_rate": 9.457276794382561e-05, "loss": 1.8938, "step": 29440 }, { "epoch": 0.05815743044612128, "grad_norm": 0.10909568518400192, "learning_rate": 9.456642771946093e-05, "loss": 1.8881, "step": 29472 }, { "epoch": 0.058220576407517724, "grad_norm": 0.1213250458240509, "learning_rate": 9.456008749509624e-05, "loss": 1.893, "step": 29504 }, { "epoch": 0.058283722368914166, "grad_norm": 0.10932382196187973, "learning_rate": 9.455374727073155e-05, "loss": 1.8733, "step": 29536 }, { "epoch": 0.0583468683303106, "grad_norm": 0.10439607501029968, "learning_rate": 9.454740704636686e-05, "loss": 1.8755, "step": 29568 }, { "epoch": 0.058410014291707044, "grad_norm": 0.09857314079999924, "learning_rate": 9.454106682200217e-05, "loss": 1.8853, "step": 29600 }, { "epoch": 0.058473160253103486, "grad_norm": 0.1132400706410408, "learning_rate": 9.453472659763748e-05, "loss": 1.8798, "step": 29632 }, { "epoch": 0.05853630621449992, "grad_norm": 0.100809745490551, "learning_rate": 9.452838637327279e-05, "loss": 1.8763, "step": 29664 }, { "epoch": 0.05859945217589636, "grad_norm": 0.10080935060977936, "learning_rate": 9.45220461489081e-05, "loss": 1.8827, "step": 29696 }, { "epoch": 0.058662598137292805, "grad_norm": 0.1064523234963417, "learning_rate": 9.45157059245434e-05, "loss": 1.8887, "step": 29728 }, { "epoch": 0.05872574409868925, "grad_norm": 0.11582209914922714, "learning_rate": 9.450936570017872e-05, "loss": 1.8871, "step": 29760 }, { "epoch": 0.05878889006008568, "grad_norm": 0.11003582924604416, "learning_rate": 9.450302547581403e-05, "loss": 1.8731, "step": 29792 }, { "epoch": 0.058852036021482125, "grad_norm": 0.10873007774353027, "learning_rate": 9.449668525144933e-05, "loss": 1.8671, "step": 29824 }, { "epoch": 0.05891518198287857, "grad_norm": 0.09862037003040314, "learning_rate": 9.449034502708465e-05, "loss": 1.8787, "step": 29856 }, { "epoch": 0.058978327944275, "grad_norm": 0.12812422215938568, "learning_rate": 9.448400480271996e-05, "loss": 1.8892, "step": 29888 }, { "epoch": 0.059041473905671445, "grad_norm": 0.10228711366653442, "learning_rate": 9.447766457835528e-05, "loss": 1.8613, "step": 29920 }, { "epoch": 0.05910461986706789, "grad_norm": 0.10565419495105743, "learning_rate": 9.447132435399059e-05, "loss": 1.8811, "step": 29952 }, { "epoch": 0.05916776582846432, "grad_norm": 0.11540845036506653, "learning_rate": 9.446498412962589e-05, "loss": 1.8816, "step": 29984 }, { "epoch": 0.059230911789860764, "grad_norm": 0.10832008719444275, "learning_rate": 9.44586439052612e-05, "loss": 1.881, "step": 30016 }, { "epoch": 0.059294057751257206, "grad_norm": 0.09858869016170502, "learning_rate": 9.445230368089652e-05, "loss": 1.8787, "step": 30048 }, { "epoch": 0.05935720371265364, "grad_norm": 0.10506797581911087, "learning_rate": 9.444596345653182e-05, "loss": 1.8671, "step": 30080 }, { "epoch": 0.059420349674050084, "grad_norm": 0.11254420876502991, "learning_rate": 9.443962323216714e-05, "loss": 1.8822, "step": 30112 }, { "epoch": 0.059483495635446526, "grad_norm": 0.10230351984500885, "learning_rate": 9.443328300780244e-05, "loss": 1.8785, "step": 30144 }, { "epoch": 0.05954664159684297, "grad_norm": 0.10950108617544174, "learning_rate": 9.442694278343775e-05, "loss": 1.8559, "step": 30176 }, { "epoch": 0.059609787558239404, "grad_norm": 0.1156979352235794, "learning_rate": 9.442060255907307e-05, "loss": 1.8648, "step": 30208 }, { "epoch": 0.059672933519635846, "grad_norm": 0.1161760613322258, "learning_rate": 9.441426233470837e-05, "loss": 1.8852, "step": 30240 }, { "epoch": 0.05973607948103229, "grad_norm": 0.11749791353940964, "learning_rate": 9.440792211034368e-05, "loss": 1.8618, "step": 30272 }, { "epoch": 0.05979922544242872, "grad_norm": 0.10605373978614807, "learning_rate": 9.4401581885979e-05, "loss": 1.8673, "step": 30304 }, { "epoch": 0.059862371403825165, "grad_norm": 0.0999060571193695, "learning_rate": 9.439524166161431e-05, "loss": 1.8675, "step": 30336 }, { "epoch": 0.05992551736522161, "grad_norm": 0.10648031532764435, "learning_rate": 9.438890143724961e-05, "loss": 1.8563, "step": 30368 }, { "epoch": 0.05998866332661804, "grad_norm": 0.11411280930042267, "learning_rate": 9.438256121288493e-05, "loss": 1.8631, "step": 30400 }, { "epoch": 0.060051809288014485, "grad_norm": 0.1032540425658226, "learning_rate": 9.437622098852024e-05, "loss": 1.8676, "step": 30432 }, { "epoch": 0.06011495524941093, "grad_norm": 0.1066264808177948, "learning_rate": 9.436988076415556e-05, "loss": 1.8756, "step": 30464 }, { "epoch": 0.06017810121080736, "grad_norm": 0.10596653074026108, "learning_rate": 9.436354053979086e-05, "loss": 1.8663, "step": 30496 }, { "epoch": 0.060241247172203805, "grad_norm": 0.1073792353272438, "learning_rate": 9.435720031542617e-05, "loss": 1.8723, "step": 30528 }, { "epoch": 0.06030439313360025, "grad_norm": 0.10586169362068176, "learning_rate": 9.435086009106147e-05, "loss": 1.8722, "step": 30560 }, { "epoch": 0.06036753909499669, "grad_norm": 0.10382824391126633, "learning_rate": 9.434451986669679e-05, "loss": 1.8712, "step": 30592 }, { "epoch": 0.060430685056393124, "grad_norm": 0.09873615950345993, "learning_rate": 9.43381796423321e-05, "loss": 1.8701, "step": 30624 }, { "epoch": 0.060493831017789566, "grad_norm": 0.11185143142938614, "learning_rate": 9.43318394179674e-05, "loss": 1.8588, "step": 30656 }, { "epoch": 0.06055697697918601, "grad_norm": 0.10457509011030197, "learning_rate": 9.432549919360272e-05, "loss": 1.874, "step": 30688 }, { "epoch": 0.060620122940582444, "grad_norm": 0.09991231560707092, "learning_rate": 9.431915896923803e-05, "loss": 1.8771, "step": 30720 }, { "epoch": 0.060683268901978886, "grad_norm": 0.10770970582962036, "learning_rate": 9.431281874487335e-05, "loss": 1.8782, "step": 30752 }, { "epoch": 0.06074641486337533, "grad_norm": 0.11045695841312408, "learning_rate": 9.430647852050865e-05, "loss": 1.8703, "step": 30784 }, { "epoch": 0.06080956082477176, "grad_norm": 0.10054302215576172, "learning_rate": 9.430013829614396e-05, "loss": 1.8633, "step": 30816 }, { "epoch": 0.060872706786168206, "grad_norm": 0.10831281542778015, "learning_rate": 9.429379807177928e-05, "loss": 1.8725, "step": 30848 }, { "epoch": 0.06093585274756465, "grad_norm": 0.10674191266298294, "learning_rate": 9.428745784741459e-05, "loss": 1.8616, "step": 30880 }, { "epoch": 0.06099899870896108, "grad_norm": 0.12082844227552414, "learning_rate": 9.428111762304989e-05, "loss": 1.8689, "step": 30912 }, { "epoch": 0.061062144670357525, "grad_norm": 0.10098271071910858, "learning_rate": 9.427477739868519e-05, "loss": 1.8662, "step": 30944 }, { "epoch": 0.06112529063175397, "grad_norm": 0.11558236181735992, "learning_rate": 9.426843717432051e-05, "loss": 1.8718, "step": 30976 }, { "epoch": 0.06118843659315041, "grad_norm": 0.09774626046419144, "learning_rate": 9.426209694995582e-05, "loss": 1.8606, "step": 31008 }, { "epoch": 0.061251582554546845, "grad_norm": 0.10142559558153152, "learning_rate": 9.425575672559112e-05, "loss": 1.8713, "step": 31040 }, { "epoch": 0.06131472851594329, "grad_norm": 0.12105502188205719, "learning_rate": 9.424941650122644e-05, "loss": 1.8643, "step": 31072 }, { "epoch": 0.06137787447733973, "grad_norm": 0.11434998363256454, "learning_rate": 9.424307627686175e-05, "loss": 1.8754, "step": 31104 }, { "epoch": 0.061441020438736164, "grad_norm": 0.106901615858078, "learning_rate": 9.423673605249707e-05, "loss": 1.8629, "step": 31136 }, { "epoch": 0.06150416640013261, "grad_norm": 0.10285452753305435, "learning_rate": 9.423039582813237e-05, "loss": 1.8641, "step": 31168 }, { "epoch": 0.06156731236152905, "grad_norm": 0.11215592175722122, "learning_rate": 9.422405560376768e-05, "loss": 1.8532, "step": 31200 }, { "epoch": 0.061630458322925484, "grad_norm": 0.10553357750177383, "learning_rate": 9.4217715379403e-05, "loss": 1.862, "step": 31232 }, { "epoch": 0.061693604284321926, "grad_norm": 0.10379686951637268, "learning_rate": 9.421137515503831e-05, "loss": 1.8578, "step": 31264 }, { "epoch": 0.06175675024571837, "grad_norm": 0.1035488024353981, "learning_rate": 9.420503493067363e-05, "loss": 1.8621, "step": 31296 }, { "epoch": 0.061819896207114804, "grad_norm": 0.10524856299161911, "learning_rate": 9.419869470630893e-05, "loss": 1.8707, "step": 31328 }, { "epoch": 0.061883042168511246, "grad_norm": 0.1163095161318779, "learning_rate": 9.419235448194423e-05, "loss": 1.8554, "step": 31360 }, { "epoch": 0.06194618812990769, "grad_norm": 0.1016009971499443, "learning_rate": 9.418601425757954e-05, "loss": 1.8507, "step": 31392 }, { "epoch": 0.06200933409130413, "grad_norm": 0.11709136515855789, "learning_rate": 9.417967403321486e-05, "loss": 1.8746, "step": 31424 }, { "epoch": 0.062072480052700565, "grad_norm": 0.11425372958183289, "learning_rate": 9.417333380885016e-05, "loss": 1.8665, "step": 31456 }, { "epoch": 0.06213562601409701, "grad_norm": 0.11852588504552841, "learning_rate": 9.416699358448547e-05, "loss": 1.8692, "step": 31488 }, { "epoch": 0.06219877197549345, "grad_norm": 0.09975966066122055, "learning_rate": 9.416065336012079e-05, "loss": 1.841, "step": 31520 }, { "epoch": 0.062261917936889885, "grad_norm": 0.10193809121847153, "learning_rate": 9.41543131357561e-05, "loss": 1.8695, "step": 31552 }, { "epoch": 0.06232506389828633, "grad_norm": 0.10371892154216766, "learning_rate": 9.41479729113914e-05, "loss": 1.8671, "step": 31584 }, { "epoch": 0.06238820985968277, "grad_norm": 0.1013907864689827, "learning_rate": 9.414163268702672e-05, "loss": 1.856, "step": 31616 }, { "epoch": 0.062451355821079205, "grad_norm": 0.11418340355157852, "learning_rate": 9.413529246266203e-05, "loss": 1.845, "step": 31648 }, { "epoch": 0.06251450178247565, "grad_norm": 0.10536934435367584, "learning_rate": 9.412895223829735e-05, "loss": 1.849, "step": 31680 }, { "epoch": 0.06257764774387209, "grad_norm": 0.11544335633516312, "learning_rate": 9.412261201393265e-05, "loss": 1.8558, "step": 31712 }, { "epoch": 0.06264079370526852, "grad_norm": 0.10208461433649063, "learning_rate": 9.411627178956796e-05, "loss": 1.8505, "step": 31744 }, { "epoch": 0.06270393966666497, "grad_norm": 0.11636324226856232, "learning_rate": 9.410993156520326e-05, "loss": 1.8476, "step": 31776 }, { "epoch": 0.06276708562806141, "grad_norm": 0.11715982109308243, "learning_rate": 9.410359134083858e-05, "loss": 1.8606, "step": 31808 }, { "epoch": 0.06283023158945784, "grad_norm": 0.10133142024278641, "learning_rate": 9.409725111647388e-05, "loss": 1.854, "step": 31840 }, { "epoch": 0.06289337755085429, "grad_norm": 0.10231012105941772, "learning_rate": 9.409091089210919e-05, "loss": 1.849, "step": 31872 }, { "epoch": 0.06295652351225073, "grad_norm": 0.10346190631389618, "learning_rate": 9.408457066774451e-05, "loss": 1.8464, "step": 31904 }, { "epoch": 0.06301966947364716, "grad_norm": 0.11396406590938568, "learning_rate": 9.407823044337982e-05, "loss": 1.8518, "step": 31936 }, { "epoch": 0.06308281543504361, "grad_norm": 0.10464853793382645, "learning_rate": 9.407189021901514e-05, "loss": 1.8655, "step": 31968 }, { "epoch": 0.06314596139644005, "grad_norm": 0.0973316878080368, "learning_rate": 9.406554999465044e-05, "loss": 1.8616, "step": 32000 }, { "epoch": 0.06320910735783648, "grad_norm": 0.10807619988918304, "learning_rate": 9.405920977028575e-05, "loss": 1.8431, "step": 32032 }, { "epoch": 0.06327225331923293, "grad_norm": 0.10671120136976242, "learning_rate": 9.405286954592107e-05, "loss": 1.8544, "step": 32064 }, { "epoch": 0.06333539928062937, "grad_norm": 0.12205708026885986, "learning_rate": 9.404652932155638e-05, "loss": 1.8684, "step": 32096 }, { "epoch": 0.0633985452420258, "grad_norm": 0.1130564957857132, "learning_rate": 9.404018909719168e-05, "loss": 1.8424, "step": 32128 }, { "epoch": 0.06346169120342225, "grad_norm": 0.09570758044719696, "learning_rate": 9.4033848872827e-05, "loss": 1.8601, "step": 32160 }, { "epoch": 0.06352483716481869, "grad_norm": 0.11315812915563583, "learning_rate": 9.40275086484623e-05, "loss": 1.8599, "step": 32192 }, { "epoch": 0.06358798312621512, "grad_norm": 0.0973125696182251, "learning_rate": 9.402116842409761e-05, "loss": 1.8553, "step": 32224 }, { "epoch": 0.06365112908761157, "grad_norm": 0.10498806834220886, "learning_rate": 9.401482819973291e-05, "loss": 1.8643, "step": 32256 }, { "epoch": 0.063714275049008, "grad_norm": 0.10018099844455719, "learning_rate": 9.400848797536823e-05, "loss": 1.823, "step": 32288 }, { "epoch": 0.06377742101040444, "grad_norm": 0.10291516780853271, "learning_rate": 9.400214775100354e-05, "loss": 1.8516, "step": 32320 }, { "epoch": 0.06384056697180089, "grad_norm": 0.10548707097768784, "learning_rate": 9.399580752663886e-05, "loss": 1.8485, "step": 32352 }, { "epoch": 0.06390371293319733, "grad_norm": 0.11257361620664597, "learning_rate": 9.398946730227416e-05, "loss": 1.8574, "step": 32384 }, { "epoch": 0.06396685889459376, "grad_norm": 0.10809215158224106, "learning_rate": 9.398312707790947e-05, "loss": 1.8471, "step": 32416 }, { "epoch": 0.06403000485599021, "grad_norm": 0.10717517882585526, "learning_rate": 9.397678685354479e-05, "loss": 1.8476, "step": 32448 }, { "epoch": 0.06409315081738665, "grad_norm": 0.10977809876203537, "learning_rate": 9.39704466291801e-05, "loss": 1.857, "step": 32480 }, { "epoch": 0.0641562967787831, "grad_norm": 0.1052245944738388, "learning_rate": 9.39641064048154e-05, "loss": 1.8523, "step": 32512 }, { "epoch": 0.06421944274017953, "grad_norm": 0.10304667800664902, "learning_rate": 9.395776618045072e-05, "loss": 1.8453, "step": 32544 }, { "epoch": 0.06428258870157597, "grad_norm": 0.10384541004896164, "learning_rate": 9.395142595608603e-05, "loss": 1.87, "step": 32576 }, { "epoch": 0.06434573466297241, "grad_norm": 0.1081276535987854, "learning_rate": 9.394508573172133e-05, "loss": 1.8541, "step": 32608 }, { "epoch": 0.06440888062436885, "grad_norm": 0.11084219068288803, "learning_rate": 9.393874550735665e-05, "loss": 1.8466, "step": 32640 }, { "epoch": 0.06447202658576529, "grad_norm": 0.09971606731414795, "learning_rate": 9.393240528299195e-05, "loss": 1.8521, "step": 32672 }, { "epoch": 0.06453517254716173, "grad_norm": 0.09828321635723114, "learning_rate": 9.392606505862726e-05, "loss": 1.8525, "step": 32704 }, { "epoch": 0.06459831850855817, "grad_norm": 0.10448254644870758, "learning_rate": 9.391972483426258e-05, "loss": 1.8488, "step": 32736 }, { "epoch": 0.0646614644699546, "grad_norm": 0.11569347232580185, "learning_rate": 9.391338460989789e-05, "loss": 1.8632, "step": 32768 }, { "epoch": 0.06472461043135105, "grad_norm": 0.11592109501361847, "learning_rate": 9.390704438553319e-05, "loss": 1.8493, "step": 32800 }, { "epoch": 0.06478775639274749, "grad_norm": 0.10566107928752899, "learning_rate": 9.390070416116851e-05, "loss": 1.8402, "step": 32832 }, { "epoch": 0.06485090235414392, "grad_norm": 0.11042334884405136, "learning_rate": 9.389436393680382e-05, "loss": 1.8508, "step": 32864 }, { "epoch": 0.06491404831554037, "grad_norm": 0.10361830145120621, "learning_rate": 9.388802371243914e-05, "loss": 1.859, "step": 32896 }, { "epoch": 0.06497719427693681, "grad_norm": 0.10170776396989822, "learning_rate": 9.388168348807444e-05, "loss": 1.8428, "step": 32928 }, { "epoch": 0.06504034023833324, "grad_norm": 0.10321789234876633, "learning_rate": 9.387534326370975e-05, "loss": 1.8575, "step": 32960 }, { "epoch": 0.0651034861997297, "grad_norm": 0.10872825235128403, "learning_rate": 9.386900303934507e-05, "loss": 1.8482, "step": 32992 }, { "epoch": 0.06516663216112613, "grad_norm": 0.09724610298871994, "learning_rate": 9.386266281498037e-05, "loss": 1.837, "step": 33024 }, { "epoch": 0.06522977812252256, "grad_norm": 0.11104436218738556, "learning_rate": 9.385632259061567e-05, "loss": 1.8588, "step": 33056 }, { "epoch": 0.06529292408391901, "grad_norm": 0.10021993517875671, "learning_rate": 9.384998236625098e-05, "loss": 1.8586, "step": 33088 }, { "epoch": 0.06535607004531545, "grad_norm": 0.10966669023036957, "learning_rate": 9.38436421418863e-05, "loss": 1.8523, "step": 33120 }, { "epoch": 0.06541921600671188, "grad_norm": 0.11023969948291779, "learning_rate": 9.383730191752161e-05, "loss": 1.8482, "step": 33152 }, { "epoch": 0.06548236196810833, "grad_norm": 0.09609818458557129, "learning_rate": 9.383096169315691e-05, "loss": 1.848, "step": 33184 }, { "epoch": 0.06554550792950477, "grad_norm": 0.10640505701303482, "learning_rate": 9.382462146879223e-05, "loss": 1.8451, "step": 33216 }, { "epoch": 0.0656086538909012, "grad_norm": 0.1084061861038208, "learning_rate": 9.381828124442754e-05, "loss": 1.8529, "step": 33248 }, { "epoch": 0.06567179985229765, "grad_norm": 0.10528600960969925, "learning_rate": 9.381194102006286e-05, "loss": 1.8577, "step": 33280 }, { "epoch": 0.06573494581369409, "grad_norm": 0.10225199162960052, "learning_rate": 9.380560079569817e-05, "loss": 1.8445, "step": 33312 }, { "epoch": 0.06579809177509054, "grad_norm": 0.11053970456123352, "learning_rate": 9.379926057133347e-05, "loss": 1.8526, "step": 33344 }, { "epoch": 0.06586123773648697, "grad_norm": 0.10372111201286316, "learning_rate": 9.379292034696879e-05, "loss": 1.8407, "step": 33376 }, { "epoch": 0.0659243836978834, "grad_norm": 0.09763038903474808, "learning_rate": 9.37865801226041e-05, "loss": 1.841, "step": 33408 }, { "epoch": 0.06598752965927986, "grad_norm": 0.11436961591243744, "learning_rate": 9.37802398982394e-05, "loss": 1.8472, "step": 33440 }, { "epoch": 0.06605067562067629, "grad_norm": 0.10392463952302933, "learning_rate": 9.37738996738747e-05, "loss": 1.8603, "step": 33472 }, { "epoch": 0.06611382158207273, "grad_norm": 0.10825751721858978, "learning_rate": 9.376755944951002e-05, "loss": 1.8329, "step": 33504 }, { "epoch": 0.06617696754346918, "grad_norm": 0.1062132716178894, "learning_rate": 9.376121922514533e-05, "loss": 1.8494, "step": 33536 }, { "epoch": 0.06624011350486561, "grad_norm": 0.12203536182641983, "learning_rate": 9.375487900078065e-05, "loss": 1.834, "step": 33568 }, { "epoch": 0.06630325946626205, "grad_norm": 0.11732408404350281, "learning_rate": 9.374853877641595e-05, "loss": 1.835, "step": 33600 }, { "epoch": 0.0663664054276585, "grad_norm": 0.12333063036203384, "learning_rate": 9.374219855205126e-05, "loss": 1.8399, "step": 33632 }, { "epoch": 0.06642955138905493, "grad_norm": 0.1118704080581665, "learning_rate": 9.373585832768658e-05, "loss": 1.8354, "step": 33664 }, { "epoch": 0.06649269735045137, "grad_norm": 0.09639861434698105, "learning_rate": 9.372951810332189e-05, "loss": 1.8518, "step": 33696 }, { "epoch": 0.06655584331184781, "grad_norm": 0.1065998300909996, "learning_rate": 9.37231778789572e-05, "loss": 1.8421, "step": 33728 }, { "epoch": 0.06661898927324425, "grad_norm": 0.103479765355587, "learning_rate": 9.371683765459251e-05, "loss": 1.8475, "step": 33760 }, { "epoch": 0.06668213523464069, "grad_norm": 0.1073637381196022, "learning_rate": 9.371049743022782e-05, "loss": 1.8495, "step": 33792 }, { "epoch": 0.06674528119603713, "grad_norm": 0.10078073292970657, "learning_rate": 9.370415720586312e-05, "loss": 1.8184, "step": 33824 }, { "epoch": 0.06680842715743357, "grad_norm": 0.11597784608602524, "learning_rate": 9.369781698149844e-05, "loss": 1.8486, "step": 33856 }, { "epoch": 0.06687157311883, "grad_norm": 0.10159990191459656, "learning_rate": 9.369147675713374e-05, "loss": 1.8371, "step": 33888 }, { "epoch": 0.06693471908022645, "grad_norm": 0.10640428215265274, "learning_rate": 9.368513653276905e-05, "loss": 1.8417, "step": 33920 }, { "epoch": 0.06699786504162289, "grad_norm": 0.09588663280010223, "learning_rate": 9.367879630840437e-05, "loss": 1.8506, "step": 33952 }, { "epoch": 0.06706101100301932, "grad_norm": 0.10144439339637756, "learning_rate": 9.367245608403968e-05, "loss": 1.8284, "step": 33984 }, { "epoch": 0.06712415696441577, "grad_norm": 0.09015002846717834, "learning_rate": 9.366611585967498e-05, "loss": 1.8413, "step": 34016 }, { "epoch": 0.06718730292581221, "grad_norm": 0.10386817902326584, "learning_rate": 9.36597756353103e-05, "loss": 1.8349, "step": 34048 }, { "epoch": 0.06725044888720864, "grad_norm": 0.10589347034692764, "learning_rate": 9.365343541094561e-05, "loss": 1.8433, "step": 34080 }, { "epoch": 0.0673135948486051, "grad_norm": 0.1025700718164444, "learning_rate": 9.364709518658093e-05, "loss": 1.846, "step": 34112 }, { "epoch": 0.06737674081000153, "grad_norm": 0.09602469950914383, "learning_rate": 9.364075496221623e-05, "loss": 1.8336, "step": 34144 }, { "epoch": 0.06743988677139798, "grad_norm": 0.10800101608037949, "learning_rate": 9.363441473785154e-05, "loss": 1.8537, "step": 34176 }, { "epoch": 0.06750303273279441, "grad_norm": 0.09845973551273346, "learning_rate": 9.362807451348686e-05, "loss": 1.8374, "step": 34208 }, { "epoch": 0.06756617869419085, "grad_norm": 0.10679402947425842, "learning_rate": 9.362173428912216e-05, "loss": 1.8411, "step": 34240 }, { "epoch": 0.0676293246555873, "grad_norm": 0.10255902260541916, "learning_rate": 9.361539406475747e-05, "loss": 1.853, "step": 34272 }, { "epoch": 0.06769247061698373, "grad_norm": 0.09579630196094513, "learning_rate": 9.360905384039277e-05, "loss": 1.8328, "step": 34304 }, { "epoch": 0.06775561657838017, "grad_norm": 0.10283277183771133, "learning_rate": 9.360271361602809e-05, "loss": 1.8425, "step": 34336 }, { "epoch": 0.06781876253977662, "grad_norm": 0.11232533305883408, "learning_rate": 9.35963733916634e-05, "loss": 1.8424, "step": 34368 }, { "epoch": 0.06788190850117305, "grad_norm": 0.10461856424808502, "learning_rate": 9.35900331672987e-05, "loss": 1.8263, "step": 34400 }, { "epoch": 0.06794505446256949, "grad_norm": 0.10416267812252045, "learning_rate": 9.358369294293402e-05, "loss": 1.828, "step": 34432 }, { "epoch": 0.06800820042396594, "grad_norm": 0.11975955218076706, "learning_rate": 9.357735271856933e-05, "loss": 1.8534, "step": 34464 }, { "epoch": 0.06807134638536237, "grad_norm": 0.10487686097621918, "learning_rate": 9.357101249420465e-05, "loss": 1.8388, "step": 34496 }, { "epoch": 0.06813449234675881, "grad_norm": 0.11208721995353699, "learning_rate": 9.356467226983995e-05, "loss": 1.8204, "step": 34528 }, { "epoch": 0.06819763830815526, "grad_norm": 0.11162339150905609, "learning_rate": 9.355833204547526e-05, "loss": 1.8441, "step": 34560 }, { "epoch": 0.06826078426955169, "grad_norm": 0.10376334935426712, "learning_rate": 9.355199182111058e-05, "loss": 1.8387, "step": 34592 }, { "epoch": 0.06832393023094813, "grad_norm": 0.09627466648817062, "learning_rate": 9.354565159674589e-05, "loss": 1.8387, "step": 34624 }, { "epoch": 0.06838707619234458, "grad_norm": 0.10894633084535599, "learning_rate": 9.35393113723812e-05, "loss": 1.8272, "step": 34656 }, { "epoch": 0.06845022215374101, "grad_norm": 0.09806417673826218, "learning_rate": 9.353297114801651e-05, "loss": 1.8304, "step": 34688 }, { "epoch": 0.06851336811513745, "grad_norm": 0.10127557814121246, "learning_rate": 9.352663092365181e-05, "loss": 1.8213, "step": 34720 }, { "epoch": 0.0685765140765339, "grad_norm": 0.13220052421092987, "learning_rate": 9.352029069928712e-05, "loss": 1.8507, "step": 34752 }, { "epoch": 0.06863966003793033, "grad_norm": 0.11304617673158646, "learning_rate": 9.351395047492244e-05, "loss": 1.8452, "step": 34784 }, { "epoch": 0.06870280599932677, "grad_norm": 0.10286716371774673, "learning_rate": 9.350761025055774e-05, "loss": 1.826, "step": 34816 }, { "epoch": 0.06876595196072322, "grad_norm": 0.1059626117348671, "learning_rate": 9.350127002619305e-05, "loss": 1.8255, "step": 34848 }, { "epoch": 0.06882909792211965, "grad_norm": 0.11121994256973267, "learning_rate": 9.349492980182837e-05, "loss": 1.8272, "step": 34880 }, { "epoch": 0.06889224388351609, "grad_norm": 0.09956642240285873, "learning_rate": 9.348858957746368e-05, "loss": 1.8269, "step": 34912 }, { "epoch": 0.06895538984491253, "grad_norm": 0.10039377212524414, "learning_rate": 9.348224935309898e-05, "loss": 1.841, "step": 34944 }, { "epoch": 0.06901853580630897, "grad_norm": 0.11147228628396988, "learning_rate": 9.34759091287343e-05, "loss": 1.8451, "step": 34976 }, { "epoch": 0.06908168176770542, "grad_norm": 0.1100914403796196, "learning_rate": 9.346956890436961e-05, "loss": 1.8242, "step": 35008 }, { "epoch": 0.06914482772910185, "grad_norm": 0.10677902400493622, "learning_rate": 9.346322868000493e-05, "loss": 1.8255, "step": 35040 }, { "epoch": 0.06920797369049829, "grad_norm": 0.10005491971969604, "learning_rate": 9.345688845564023e-05, "loss": 1.8378, "step": 35072 }, { "epoch": 0.06927111965189474, "grad_norm": 0.10176543146371841, "learning_rate": 9.345054823127553e-05, "loss": 1.8412, "step": 35104 }, { "epoch": 0.06933426561329117, "grad_norm": 0.11025370657444, "learning_rate": 9.344420800691084e-05, "loss": 1.832, "step": 35136 }, { "epoch": 0.06939741157468761, "grad_norm": 0.09753858298063278, "learning_rate": 9.343786778254616e-05, "loss": 1.8441, "step": 35168 }, { "epoch": 0.06946055753608406, "grad_norm": 0.13318485021591187, "learning_rate": 9.343152755818146e-05, "loss": 1.8396, "step": 35200 }, { "epoch": 0.0695237034974805, "grad_norm": 0.10949849337339401, "learning_rate": 9.342518733381677e-05, "loss": 1.8351, "step": 35232 }, { "epoch": 0.06958684945887693, "grad_norm": 0.10659553110599518, "learning_rate": 9.341884710945209e-05, "loss": 1.8205, "step": 35264 }, { "epoch": 0.06964999542027338, "grad_norm": 0.11543211340904236, "learning_rate": 9.34125068850874e-05, "loss": 1.8368, "step": 35296 }, { "epoch": 0.06971314138166981, "grad_norm": 0.10537153482437134, "learning_rate": 9.340616666072272e-05, "loss": 1.8328, "step": 35328 }, { "epoch": 0.06977628734306625, "grad_norm": 0.10511999577283859, "learning_rate": 9.339982643635802e-05, "loss": 1.8268, "step": 35360 }, { "epoch": 0.0698394333044627, "grad_norm": 0.10195047408342361, "learning_rate": 9.339348621199333e-05, "loss": 1.8243, "step": 35392 }, { "epoch": 0.06990257926585913, "grad_norm": 0.10528641939163208, "learning_rate": 9.338714598762865e-05, "loss": 1.8472, "step": 35424 }, { "epoch": 0.06996572522725557, "grad_norm": 0.10591515153646469, "learning_rate": 9.338080576326396e-05, "loss": 1.823, "step": 35456 }, { "epoch": 0.07002887118865202, "grad_norm": 0.10703007131814957, "learning_rate": 9.337446553889926e-05, "loss": 1.8035, "step": 35488 }, { "epoch": 0.07009201715004845, "grad_norm": 0.10312335938215256, "learning_rate": 9.336812531453456e-05, "loss": 1.8261, "step": 35520 }, { "epoch": 0.07015516311144489, "grad_norm": 0.13176143169403076, "learning_rate": 9.336178509016988e-05, "loss": 1.8245, "step": 35552 }, { "epoch": 0.07021830907284134, "grad_norm": 0.09669128805398941, "learning_rate": 9.33554448658052e-05, "loss": 1.8196, "step": 35584 }, { "epoch": 0.07028145503423777, "grad_norm": 0.09845513850450516, "learning_rate": 9.33491046414405e-05, "loss": 1.8271, "step": 35616 }, { "epoch": 0.07034460099563421, "grad_norm": 0.10833404213190079, "learning_rate": 9.334276441707581e-05, "loss": 1.8224, "step": 35648 }, { "epoch": 0.07040774695703066, "grad_norm": 0.11137785017490387, "learning_rate": 9.333642419271112e-05, "loss": 1.8191, "step": 35680 }, { "epoch": 0.07047089291842709, "grad_norm": 0.11151645332574844, "learning_rate": 9.333008396834644e-05, "loss": 1.8361, "step": 35712 }, { "epoch": 0.07053403887982353, "grad_norm": 0.09397818893194199, "learning_rate": 9.332374374398174e-05, "loss": 1.8441, "step": 35744 }, { "epoch": 0.07059718484121998, "grad_norm": 0.11775724589824677, "learning_rate": 9.331740351961705e-05, "loss": 1.8303, "step": 35776 }, { "epoch": 0.07066033080261641, "grad_norm": 0.11066251248121262, "learning_rate": 9.331106329525237e-05, "loss": 1.8267, "step": 35808 }, { "epoch": 0.07072347676401286, "grad_norm": 0.11496420949697495, "learning_rate": 9.330472307088768e-05, "loss": 1.8324, "step": 35840 }, { "epoch": 0.0707866227254093, "grad_norm": 0.10406922549009323, "learning_rate": 9.329838284652298e-05, "loss": 1.8273, "step": 35872 }, { "epoch": 0.07084976868680573, "grad_norm": 0.09711103141307831, "learning_rate": 9.32920426221583e-05, "loss": 1.8346, "step": 35904 }, { "epoch": 0.07091291464820218, "grad_norm": 0.11311649531126022, "learning_rate": 9.32857023977936e-05, "loss": 1.8224, "step": 35936 }, { "epoch": 0.07097606060959862, "grad_norm": 0.10964278876781464, "learning_rate": 9.327936217342891e-05, "loss": 1.8288, "step": 35968 }, { "epoch": 0.07103920657099505, "grad_norm": 0.09969433397054672, "learning_rate": 9.327302194906423e-05, "loss": 1.8556, "step": 36000 }, { "epoch": 0.0711023525323915, "grad_norm": 0.10349023342132568, "learning_rate": 9.326668172469953e-05, "loss": 1.8193, "step": 36032 }, { "epoch": 0.07116549849378793, "grad_norm": 0.10405611246824265, "learning_rate": 9.326034150033484e-05, "loss": 1.8229, "step": 36064 }, { "epoch": 0.07122864445518437, "grad_norm": 0.09794848412275314, "learning_rate": 9.325400127597016e-05, "loss": 1.8301, "step": 36096 }, { "epoch": 0.07129179041658082, "grad_norm": 0.09608778357505798, "learning_rate": 9.324766105160547e-05, "loss": 1.823, "step": 36128 }, { "epoch": 0.07135493637797725, "grad_norm": 0.10224819928407669, "learning_rate": 9.324132082724077e-05, "loss": 1.8375, "step": 36160 }, { "epoch": 0.07141808233937369, "grad_norm": 0.10872190445661545, "learning_rate": 9.323498060287609e-05, "loss": 1.8158, "step": 36192 }, { "epoch": 0.07148122830077014, "grad_norm": 0.11490211635828018, "learning_rate": 9.32286403785114e-05, "loss": 1.8213, "step": 36224 }, { "epoch": 0.07154437426216657, "grad_norm": 0.09908118844032288, "learning_rate": 9.322230015414672e-05, "loss": 1.8214, "step": 36256 }, { "epoch": 0.07160752022356301, "grad_norm": 0.10633298754692078, "learning_rate": 9.321595992978202e-05, "loss": 1.8294, "step": 36288 }, { "epoch": 0.07167066618495946, "grad_norm": 0.1104525774717331, "learning_rate": 9.320961970541733e-05, "loss": 1.834, "step": 36320 }, { "epoch": 0.0717338121463559, "grad_norm": 0.09994344413280487, "learning_rate": 9.320327948105264e-05, "loss": 1.8192, "step": 36352 }, { "epoch": 0.07179695810775233, "grad_norm": 0.10000725835561752, "learning_rate": 9.319693925668795e-05, "loss": 1.8291, "step": 36384 }, { "epoch": 0.07186010406914878, "grad_norm": 0.10379309952259064, "learning_rate": 9.319059903232325e-05, "loss": 1.8159, "step": 36416 }, { "epoch": 0.07192325003054521, "grad_norm": 0.10898219794034958, "learning_rate": 9.318425880795857e-05, "loss": 1.8293, "step": 36448 }, { "epoch": 0.07198639599194165, "grad_norm": 0.11423615366220474, "learning_rate": 9.317791858359388e-05, "loss": 1.8243, "step": 36480 }, { "epoch": 0.0720495419533381, "grad_norm": 0.10108506679534912, "learning_rate": 9.31715783592292e-05, "loss": 1.8247, "step": 36512 }, { "epoch": 0.07211268791473453, "grad_norm": 0.12401416152715683, "learning_rate": 9.31652381348645e-05, "loss": 1.8166, "step": 36544 }, { "epoch": 0.07217583387613097, "grad_norm": 0.10530426353216171, "learning_rate": 9.315889791049981e-05, "loss": 1.8176, "step": 36576 }, { "epoch": 0.07223897983752742, "grad_norm": 0.09537271410226822, "learning_rate": 9.315255768613512e-05, "loss": 1.8203, "step": 36608 }, { "epoch": 0.07230212579892385, "grad_norm": 0.10115291178226471, "learning_rate": 9.314621746177044e-05, "loss": 1.8296, "step": 36640 }, { "epoch": 0.0723652717603203, "grad_norm": 0.11395847052335739, "learning_rate": 9.313987723740575e-05, "loss": 1.8294, "step": 36672 }, { "epoch": 0.07242841772171674, "grad_norm": 0.10678514093160629, "learning_rate": 9.313353701304105e-05, "loss": 1.8013, "step": 36704 }, { "epoch": 0.07249156368311317, "grad_norm": 0.10725942254066467, "learning_rate": 9.312719678867637e-05, "loss": 1.8217, "step": 36736 }, { "epoch": 0.07255470964450962, "grad_norm": 0.1011452004313469, "learning_rate": 9.312085656431167e-05, "loss": 1.8101, "step": 36768 }, { "epoch": 0.07261785560590606, "grad_norm": 0.12700913846492767, "learning_rate": 9.311451633994698e-05, "loss": 1.8211, "step": 36800 }, { "epoch": 0.07268100156730249, "grad_norm": 0.1157853752374649, "learning_rate": 9.310817611558229e-05, "loss": 1.8159, "step": 36832 }, { "epoch": 0.07274414752869894, "grad_norm": 0.10147330909967422, "learning_rate": 9.31018358912176e-05, "loss": 1.8103, "step": 36864 }, { "epoch": 0.07280729349009538, "grad_norm": 0.1215939000248909, "learning_rate": 9.309549566685291e-05, "loss": 1.8265, "step": 36896 }, { "epoch": 0.07287043945149181, "grad_norm": 0.11663418263196945, "learning_rate": 9.308915544248823e-05, "loss": 1.8169, "step": 36928 }, { "epoch": 0.07293358541288826, "grad_norm": 0.10314902663230896, "learning_rate": 9.308281521812353e-05, "loss": 1.8128, "step": 36960 }, { "epoch": 0.0729967313742847, "grad_norm": 0.09695911407470703, "learning_rate": 9.307647499375885e-05, "loss": 1.8461, "step": 36992 }, { "epoch": 0.07305987733568113, "grad_norm": 0.09900636225938797, "learning_rate": 9.307013476939416e-05, "loss": 1.8228, "step": 37024 }, { "epoch": 0.07312302329707758, "grad_norm": 0.10411260277032852, "learning_rate": 9.306379454502947e-05, "loss": 1.8257, "step": 37056 }, { "epoch": 0.07318616925847402, "grad_norm": 0.1080189198255539, "learning_rate": 9.305745432066478e-05, "loss": 1.8154, "step": 37088 }, { "epoch": 0.07324931521987045, "grad_norm": 0.09551020711660385, "learning_rate": 9.305111409630009e-05, "loss": 1.84, "step": 37120 }, { "epoch": 0.0733124611812669, "grad_norm": 0.11926567554473877, "learning_rate": 9.30447738719354e-05, "loss": 1.8262, "step": 37152 }, { "epoch": 0.07337560714266333, "grad_norm": 0.1030915156006813, "learning_rate": 9.30384336475707e-05, "loss": 1.8131, "step": 37184 }, { "epoch": 0.07343875310405977, "grad_norm": 0.1016959697008133, "learning_rate": 9.3032093423206e-05, "loss": 1.8185, "step": 37216 }, { "epoch": 0.07350189906545622, "grad_norm": 0.10035569965839386, "learning_rate": 9.302575319884132e-05, "loss": 1.8153, "step": 37248 }, { "epoch": 0.07356504502685265, "grad_norm": 0.10914278775453568, "learning_rate": 9.301941297447664e-05, "loss": 1.813, "step": 37280 }, { "epoch": 0.07362819098824909, "grad_norm": 0.11313147842884064, "learning_rate": 9.301307275011195e-05, "loss": 1.8114, "step": 37312 }, { "epoch": 0.07369133694964554, "grad_norm": 0.11023863404989243, "learning_rate": 9.300673252574726e-05, "loss": 1.8167, "step": 37344 }, { "epoch": 0.07375448291104197, "grad_norm": 0.09830230474472046, "learning_rate": 9.300039230138257e-05, "loss": 1.804, "step": 37376 }, { "epoch": 0.07381762887243841, "grad_norm": 0.10366123914718628, "learning_rate": 9.299405207701788e-05, "loss": 1.8213, "step": 37408 }, { "epoch": 0.07388077483383486, "grad_norm": 0.11017373204231262, "learning_rate": 9.29877118526532e-05, "loss": 1.8149, "step": 37440 }, { "epoch": 0.0739439207952313, "grad_norm": 0.09416994452476501, "learning_rate": 9.298137162828851e-05, "loss": 1.8254, "step": 37472 }, { "epoch": 0.07400706675662773, "grad_norm": 0.10092901438474655, "learning_rate": 9.297503140392381e-05, "loss": 1.809, "step": 37504 }, { "epoch": 0.07407021271802418, "grad_norm": 0.10300584882497787, "learning_rate": 9.296869117955912e-05, "loss": 1.8281, "step": 37536 }, { "epoch": 0.07413335867942061, "grad_norm": 0.09270688146352768, "learning_rate": 9.296235095519444e-05, "loss": 1.8268, "step": 37568 }, { "epoch": 0.07419650464081706, "grad_norm": 0.11394724994897842, "learning_rate": 9.295601073082974e-05, "loss": 1.8228, "step": 37600 }, { "epoch": 0.0742596506022135, "grad_norm": 0.09486842900514603, "learning_rate": 9.294967050646504e-05, "loss": 1.8305, "step": 37632 }, { "epoch": 0.07432279656360993, "grad_norm": 0.0975983738899231, "learning_rate": 9.294333028210036e-05, "loss": 1.8077, "step": 37664 }, { "epoch": 0.07438594252500638, "grad_norm": 0.11098145693540573, "learning_rate": 9.293699005773567e-05, "loss": 1.8202, "step": 37696 }, { "epoch": 0.07444908848640282, "grad_norm": 0.1053488478064537, "learning_rate": 9.293064983337099e-05, "loss": 1.8218, "step": 37728 }, { "epoch": 0.07451223444779925, "grad_norm": 0.10563009232282639, "learning_rate": 9.292430960900629e-05, "loss": 1.8237, "step": 37760 }, { "epoch": 0.0745753804091957, "grad_norm": 0.11022141575813293, "learning_rate": 9.29179693846416e-05, "loss": 1.8189, "step": 37792 }, { "epoch": 0.07463852637059214, "grad_norm": 0.10405189543962479, "learning_rate": 9.291162916027692e-05, "loss": 1.8158, "step": 37824 }, { "epoch": 0.07470167233198857, "grad_norm": 0.10142597556114197, "learning_rate": 9.290528893591223e-05, "loss": 1.8115, "step": 37856 }, { "epoch": 0.07476481829338502, "grad_norm": 0.11152772605419159, "learning_rate": 9.289894871154754e-05, "loss": 1.8083, "step": 37888 }, { "epoch": 0.07482796425478146, "grad_norm": 0.10542437434196472, "learning_rate": 9.289260848718285e-05, "loss": 1.8219, "step": 37920 }, { "epoch": 0.07489111021617789, "grad_norm": 0.10162851959466934, "learning_rate": 9.288626826281816e-05, "loss": 1.8184, "step": 37952 }, { "epoch": 0.07495425617757434, "grad_norm": 0.0991579219698906, "learning_rate": 9.287992803845346e-05, "loss": 1.8167, "step": 37984 }, { "epoch": 0.07501740213897078, "grad_norm": 0.1067013218998909, "learning_rate": 9.287358781408878e-05, "loss": 1.8097, "step": 38016 }, { "epoch": 0.07508054810036721, "grad_norm": 0.10220654308795929, "learning_rate": 9.286724758972408e-05, "loss": 1.8114, "step": 38048 }, { "epoch": 0.07514369406176366, "grad_norm": 0.12089502066373825, "learning_rate": 9.286090736535939e-05, "loss": 1.8146, "step": 38080 }, { "epoch": 0.0752068400231601, "grad_norm": 0.09971319884061813, "learning_rate": 9.28545671409947e-05, "loss": 1.8044, "step": 38112 }, { "epoch": 0.07526998598455653, "grad_norm": 0.11873181164264679, "learning_rate": 9.284822691663002e-05, "loss": 1.7931, "step": 38144 }, { "epoch": 0.07533313194595298, "grad_norm": 0.10177662968635559, "learning_rate": 9.284188669226532e-05, "loss": 1.8277, "step": 38176 }, { "epoch": 0.07539627790734942, "grad_norm": 0.0983022078871727, "learning_rate": 9.283554646790064e-05, "loss": 1.8167, "step": 38208 }, { "epoch": 0.07545942386874585, "grad_norm": 0.09422893077135086, "learning_rate": 9.282920624353595e-05, "loss": 1.8133, "step": 38240 }, { "epoch": 0.0755225698301423, "grad_norm": 0.10076016187667847, "learning_rate": 9.282286601917126e-05, "loss": 1.808, "step": 38272 }, { "epoch": 0.07558571579153874, "grad_norm": 0.10187894105911255, "learning_rate": 9.281652579480657e-05, "loss": 1.8169, "step": 38304 }, { "epoch": 0.07564886175293517, "grad_norm": 0.1030297726392746, "learning_rate": 9.281018557044188e-05, "loss": 1.8038, "step": 38336 }, { "epoch": 0.07571200771433162, "grad_norm": 0.11084078997373581, "learning_rate": 9.28038453460772e-05, "loss": 1.8102, "step": 38368 }, { "epoch": 0.07577515367572805, "grad_norm": 0.10150840878486633, "learning_rate": 9.27975051217125e-05, "loss": 1.8206, "step": 38400 }, { "epoch": 0.0758382996371245, "grad_norm": 0.10247491300106049, "learning_rate": 9.279116489734781e-05, "loss": 1.8095, "step": 38432 }, { "epoch": 0.07590144559852094, "grad_norm": 0.10542050004005432, "learning_rate": 9.278482467298311e-05, "loss": 1.8101, "step": 38464 }, { "epoch": 0.07596459155991737, "grad_norm": 0.10321768373250961, "learning_rate": 9.277848444861843e-05, "loss": 1.814, "step": 38496 }, { "epoch": 0.07602773752131382, "grad_norm": 0.11027068644762039, "learning_rate": 9.277214422425374e-05, "loss": 1.801, "step": 38528 }, { "epoch": 0.07609088348271026, "grad_norm": 0.10503915697336197, "learning_rate": 9.276580399988906e-05, "loss": 1.8123, "step": 38560 }, { "epoch": 0.0761540294441067, "grad_norm": 0.09527040272951126, "learning_rate": 9.275946377552436e-05, "loss": 1.818, "step": 38592 }, { "epoch": 0.07621717540550314, "grad_norm": 0.0944819524884224, "learning_rate": 9.275312355115967e-05, "loss": 1.8021, "step": 38624 }, { "epoch": 0.07628032136689958, "grad_norm": 0.1021362841129303, "learning_rate": 9.274678332679499e-05, "loss": 1.8008, "step": 38656 }, { "epoch": 0.07634346732829601, "grad_norm": 0.11262723058462143, "learning_rate": 9.27404431024303e-05, "loss": 1.8125, "step": 38688 }, { "epoch": 0.07640661328969246, "grad_norm": 0.12389929592609406, "learning_rate": 9.27341028780656e-05, "loss": 1.8122, "step": 38720 }, { "epoch": 0.0764697592510889, "grad_norm": 0.12069406360387802, "learning_rate": 9.272776265370092e-05, "loss": 1.8153, "step": 38752 }, { "epoch": 0.07653290521248533, "grad_norm": 0.1107429787516594, "learning_rate": 9.272142242933623e-05, "loss": 1.8074, "step": 38784 }, { "epoch": 0.07659605117388178, "grad_norm": 0.10065750032663345, "learning_rate": 9.271508220497153e-05, "loss": 1.7971, "step": 38816 }, { "epoch": 0.07665919713527822, "grad_norm": 0.1054379791021347, "learning_rate": 9.270874198060685e-05, "loss": 1.8116, "step": 38848 }, { "epoch": 0.07672234309667465, "grad_norm": 0.10749877244234085, "learning_rate": 9.270240175624215e-05, "loss": 1.8134, "step": 38880 }, { "epoch": 0.0767854890580711, "grad_norm": 0.10346371680498123, "learning_rate": 9.269606153187746e-05, "loss": 1.7979, "step": 38912 }, { "epoch": 0.07684863501946754, "grad_norm": 0.09499409049749374, "learning_rate": 9.268972130751278e-05, "loss": 1.787, "step": 38944 }, { "epoch": 0.07691178098086397, "grad_norm": 0.10238825529813766, "learning_rate": 9.268338108314808e-05, "loss": 1.8043, "step": 38976 }, { "epoch": 0.07697492694226042, "grad_norm": 0.10353446006774902, "learning_rate": 9.267704085878339e-05, "loss": 1.8027, "step": 39008 }, { "epoch": 0.07703807290365686, "grad_norm": 0.10532581061124802, "learning_rate": 9.26707006344187e-05, "loss": 1.8136, "step": 39040 }, { "epoch": 0.07710121886505329, "grad_norm": 0.10245820134878159, "learning_rate": 9.266436041005402e-05, "loss": 1.7957, "step": 39072 }, { "epoch": 0.07716436482644974, "grad_norm": 0.12150374799966812, "learning_rate": 9.265802018568932e-05, "loss": 1.8121, "step": 39104 }, { "epoch": 0.07722751078784618, "grad_norm": 0.09707668423652649, "learning_rate": 9.265167996132464e-05, "loss": 1.8083, "step": 39136 }, { "epoch": 0.07729065674924261, "grad_norm": 0.10562973469495773, "learning_rate": 9.264533973695995e-05, "loss": 1.8108, "step": 39168 }, { "epoch": 0.07735380271063906, "grad_norm": 0.1110835075378418, "learning_rate": 9.263899951259527e-05, "loss": 1.8058, "step": 39200 }, { "epoch": 0.0774169486720355, "grad_norm": 0.12374687939882278, "learning_rate": 9.263265928823057e-05, "loss": 1.8182, "step": 39232 }, { "epoch": 0.07748009463343195, "grad_norm": 0.1003134623169899, "learning_rate": 9.262631906386587e-05, "loss": 1.8034, "step": 39264 }, { "epoch": 0.07754324059482838, "grad_norm": 0.10557498782873154, "learning_rate": 9.261997883950118e-05, "loss": 1.8188, "step": 39296 }, { "epoch": 0.07760638655622482, "grad_norm": 0.11196437478065491, "learning_rate": 9.26136386151365e-05, "loss": 1.8039, "step": 39328 }, { "epoch": 0.07766953251762126, "grad_norm": 0.11173975467681885, "learning_rate": 9.260729839077181e-05, "loss": 1.813, "step": 39360 }, { "epoch": 0.0777326784790177, "grad_norm": 0.11359525471925735, "learning_rate": 9.260095816640711e-05, "loss": 1.8039, "step": 39392 }, { "epoch": 0.07779582444041414, "grad_norm": 0.10052287578582764, "learning_rate": 9.259461794204243e-05, "loss": 1.7947, "step": 39424 }, { "epoch": 0.07785897040181058, "grad_norm": 0.10982044041156769, "learning_rate": 9.258827771767774e-05, "loss": 1.7962, "step": 39456 }, { "epoch": 0.07792211636320702, "grad_norm": 0.1478874832391739, "learning_rate": 9.258193749331306e-05, "loss": 1.7988, "step": 39488 }, { "epoch": 0.07798526232460345, "grad_norm": 0.11171736568212509, "learning_rate": 9.257559726894836e-05, "loss": 1.792, "step": 39520 }, { "epoch": 0.0780484082859999, "grad_norm": 0.11384129524230957, "learning_rate": 9.256925704458367e-05, "loss": 1.7974, "step": 39552 }, { "epoch": 0.07811155424739634, "grad_norm": 0.09803447127342224, "learning_rate": 9.256291682021899e-05, "loss": 1.7955, "step": 39584 }, { "epoch": 0.07817470020879277, "grad_norm": 0.11528968065977097, "learning_rate": 9.25565765958543e-05, "loss": 1.7994, "step": 39616 }, { "epoch": 0.07823784617018922, "grad_norm": 0.10794825106859207, "learning_rate": 9.25502363714896e-05, "loss": 1.8061, "step": 39648 }, { "epoch": 0.07830099213158566, "grad_norm": 0.10372241586446762, "learning_rate": 9.25438961471249e-05, "loss": 1.8079, "step": 39680 }, { "epoch": 0.0783641380929821, "grad_norm": 0.1086694598197937, "learning_rate": 9.253755592276022e-05, "loss": 1.7964, "step": 39712 }, { "epoch": 0.07842728405437854, "grad_norm": 0.10435189306735992, "learning_rate": 9.253121569839553e-05, "loss": 1.8014, "step": 39744 }, { "epoch": 0.07849043001577498, "grad_norm": 0.09914956986904144, "learning_rate": 9.252487547403083e-05, "loss": 1.8016, "step": 39776 }, { "epoch": 0.07855357597717141, "grad_norm": 0.10343772917985916, "learning_rate": 9.251853524966615e-05, "loss": 1.7982, "step": 39808 }, { "epoch": 0.07861672193856786, "grad_norm": 0.10450094193220139, "learning_rate": 9.251219502530146e-05, "loss": 1.8064, "step": 39840 }, { "epoch": 0.0786798678999643, "grad_norm": 0.10399092733860016, "learning_rate": 9.250585480093678e-05, "loss": 1.8072, "step": 39872 }, { "epoch": 0.07874301386136073, "grad_norm": 0.0940062552690506, "learning_rate": 9.249951457657209e-05, "loss": 1.81, "step": 39904 }, { "epoch": 0.07880615982275718, "grad_norm": 0.10817860811948776, "learning_rate": 9.249317435220739e-05, "loss": 1.8054, "step": 39936 }, { "epoch": 0.07886930578415362, "grad_norm": 0.10647379606962204, "learning_rate": 9.24868341278427e-05, "loss": 1.8002, "step": 39968 }, { "epoch": 0.07893245174555005, "grad_norm": 0.10552795976400375, "learning_rate": 9.248049390347802e-05, "loss": 1.8109, "step": 40000 }, { "epoch": 0.0789955977069465, "grad_norm": 0.09389588981866837, "learning_rate": 9.247415367911334e-05, "loss": 1.7971, "step": 40032 }, { "epoch": 0.07905874366834294, "grad_norm": 0.09789952635765076, "learning_rate": 9.246781345474864e-05, "loss": 1.7939, "step": 40064 }, { "epoch": 0.07912188962973939, "grad_norm": 0.10113061219453812, "learning_rate": 9.246147323038394e-05, "loss": 1.8024, "step": 40096 }, { "epoch": 0.07918503559113582, "grad_norm": 0.11077635735273361, "learning_rate": 9.245513300601925e-05, "loss": 1.7936, "step": 40128 }, { "epoch": 0.07924818155253226, "grad_norm": 0.10890121757984161, "learning_rate": 9.244879278165457e-05, "loss": 1.8042, "step": 40160 }, { "epoch": 0.0793113275139287, "grad_norm": 0.09608139842748642, "learning_rate": 9.244245255728987e-05, "loss": 1.7896, "step": 40192 }, { "epoch": 0.07937447347532514, "grad_norm": 0.11083830893039703, "learning_rate": 9.243611233292518e-05, "loss": 1.7935, "step": 40224 }, { "epoch": 0.07943761943672158, "grad_norm": 0.10516346246004105, "learning_rate": 9.24297721085605e-05, "loss": 1.8001, "step": 40256 }, { "epoch": 0.07950076539811803, "grad_norm": 0.09649717062711716, "learning_rate": 9.242343188419581e-05, "loss": 1.7929, "step": 40288 }, { "epoch": 0.07956391135951446, "grad_norm": 0.1006387397646904, "learning_rate": 9.241709165983111e-05, "loss": 1.7992, "step": 40320 }, { "epoch": 0.0796270573209109, "grad_norm": 0.10673325508832932, "learning_rate": 9.241075143546643e-05, "loss": 1.795, "step": 40352 }, { "epoch": 0.07969020328230735, "grad_norm": 0.09951280057430267, "learning_rate": 9.240441121110174e-05, "loss": 1.7951, "step": 40384 }, { "epoch": 0.07975334924370378, "grad_norm": 0.10043288767337799, "learning_rate": 9.239807098673706e-05, "loss": 1.7918, "step": 40416 }, { "epoch": 0.07981649520510022, "grad_norm": 0.099413201212883, "learning_rate": 9.239173076237236e-05, "loss": 1.8007, "step": 40448 }, { "epoch": 0.07987964116649666, "grad_norm": 0.10252483189105988, "learning_rate": 9.238539053800767e-05, "loss": 1.8108, "step": 40480 }, { "epoch": 0.0799427871278931, "grad_norm": 0.11060453951358795, "learning_rate": 9.237905031364297e-05, "loss": 1.8088, "step": 40512 }, { "epoch": 0.08000593308928954, "grad_norm": 0.11427866667509079, "learning_rate": 9.237271008927829e-05, "loss": 1.8075, "step": 40544 }, { "epoch": 0.08006907905068598, "grad_norm": 0.11861047893762589, "learning_rate": 9.23663698649136e-05, "loss": 1.8043, "step": 40576 }, { "epoch": 0.08013222501208242, "grad_norm": 0.10218507796525955, "learning_rate": 9.23600296405489e-05, "loss": 1.786, "step": 40608 }, { "epoch": 0.08019537097347886, "grad_norm": 0.09573070704936981, "learning_rate": 9.235368941618422e-05, "loss": 1.8028, "step": 40640 }, { "epoch": 0.0802585169348753, "grad_norm": 0.10090518742799759, "learning_rate": 9.234734919181953e-05, "loss": 1.7961, "step": 40672 }, { "epoch": 0.08032166289627174, "grad_norm": 0.10573742538690567, "learning_rate": 9.234100896745485e-05, "loss": 1.7983, "step": 40704 }, { "epoch": 0.08038480885766817, "grad_norm": 0.10804630070924759, "learning_rate": 9.233466874309015e-05, "loss": 1.8034, "step": 40736 }, { "epoch": 0.08044795481906462, "grad_norm": 0.09635918587446213, "learning_rate": 9.232832851872546e-05, "loss": 1.7872, "step": 40768 }, { "epoch": 0.08051110078046106, "grad_norm": 0.10776662826538086, "learning_rate": 9.232198829436078e-05, "loss": 1.7856, "step": 40800 }, { "epoch": 0.0805742467418575, "grad_norm": 0.09461846947669983, "learning_rate": 9.231564806999609e-05, "loss": 1.7823, "step": 40832 }, { "epoch": 0.08063739270325394, "grad_norm": 0.11045592278242111, "learning_rate": 9.230930784563139e-05, "loss": 1.7996, "step": 40864 }, { "epoch": 0.08070053866465038, "grad_norm": 0.11291345208883286, "learning_rate": 9.23029676212667e-05, "loss": 1.7871, "step": 40896 }, { "epoch": 0.08076368462604683, "grad_norm": 0.09787199646234512, "learning_rate": 9.229662739690201e-05, "loss": 1.7935, "step": 40928 }, { "epoch": 0.08082683058744326, "grad_norm": 0.1011669784784317, "learning_rate": 9.229028717253732e-05, "loss": 1.7938, "step": 40960 }, { "epoch": 0.0808899765488397, "grad_norm": 0.11360353231430054, "learning_rate": 9.228394694817262e-05, "loss": 1.7995, "step": 40992 }, { "epoch": 0.08095312251023615, "grad_norm": 0.1012316346168518, "learning_rate": 9.227760672380794e-05, "loss": 1.7962, "step": 41024 }, { "epoch": 0.08101626847163258, "grad_norm": 0.092242531478405, "learning_rate": 9.227126649944325e-05, "loss": 1.8005, "step": 41056 }, { "epoch": 0.08107941443302902, "grad_norm": 0.09925538301467896, "learning_rate": 9.226492627507857e-05, "loss": 1.8026, "step": 41088 }, { "epoch": 0.08114256039442547, "grad_norm": 0.10542775690555573, "learning_rate": 9.225858605071387e-05, "loss": 1.805, "step": 41120 }, { "epoch": 0.0812057063558219, "grad_norm": 0.0951443612575531, "learning_rate": 9.225224582634918e-05, "loss": 1.7847, "step": 41152 }, { "epoch": 0.08126885231721834, "grad_norm": 0.09704306721687317, "learning_rate": 9.22459056019845e-05, "loss": 1.8043, "step": 41184 }, { "epoch": 0.08133199827861479, "grad_norm": 0.10940796136856079, "learning_rate": 9.223956537761981e-05, "loss": 1.7811, "step": 41216 }, { "epoch": 0.08139514424001122, "grad_norm": 0.10112254321575165, "learning_rate": 9.223322515325513e-05, "loss": 1.8039, "step": 41248 }, { "epoch": 0.08145829020140766, "grad_norm": 0.1044624075293541, "learning_rate": 9.222688492889043e-05, "loss": 1.8049, "step": 41280 }, { "epoch": 0.0815214361628041, "grad_norm": 0.0949002057313919, "learning_rate": 9.222054470452574e-05, "loss": 1.7953, "step": 41312 }, { "epoch": 0.08158458212420054, "grad_norm": 0.0980764627456665, "learning_rate": 9.221420448016104e-05, "loss": 1.8033, "step": 41344 }, { "epoch": 0.08164772808559698, "grad_norm": 0.09994692355394363, "learning_rate": 9.220786425579636e-05, "loss": 1.7902, "step": 41376 }, { "epoch": 0.08171087404699343, "grad_norm": 0.09915197640657425, "learning_rate": 9.220152403143166e-05, "loss": 1.8045, "step": 41408 }, { "epoch": 0.08177402000838986, "grad_norm": 0.10263440757989883, "learning_rate": 9.219518380706697e-05, "loss": 1.7937, "step": 41440 }, { "epoch": 0.0818371659697863, "grad_norm": 0.10403347760438919, "learning_rate": 9.218884358270229e-05, "loss": 1.7966, "step": 41472 }, { "epoch": 0.08190031193118275, "grad_norm": 0.10131601244211197, "learning_rate": 9.21825033583376e-05, "loss": 1.7821, "step": 41504 }, { "epoch": 0.08196345789257918, "grad_norm": 0.11343494802713394, "learning_rate": 9.21761631339729e-05, "loss": 1.7958, "step": 41536 }, { "epoch": 0.08202660385397562, "grad_norm": 0.11137432605028152, "learning_rate": 9.216982290960822e-05, "loss": 1.7931, "step": 41568 }, { "epoch": 0.08208974981537207, "grad_norm": 0.10910090059041977, "learning_rate": 9.216348268524353e-05, "loss": 1.784, "step": 41600 }, { "epoch": 0.0821528957767685, "grad_norm": 0.10436250269412994, "learning_rate": 9.215714246087885e-05, "loss": 1.8033, "step": 41632 }, { "epoch": 0.08221604173816494, "grad_norm": 0.09831598401069641, "learning_rate": 9.215080223651415e-05, "loss": 1.7731, "step": 41664 }, { "epoch": 0.08227918769956138, "grad_norm": 0.1071930006146431, "learning_rate": 9.214446201214946e-05, "loss": 1.7773, "step": 41696 }, { "epoch": 0.08234233366095782, "grad_norm": 0.10158498585224152, "learning_rate": 9.213812178778478e-05, "loss": 1.8027, "step": 41728 }, { "epoch": 0.08240547962235427, "grad_norm": 0.09204748272895813, "learning_rate": 9.213178156342008e-05, "loss": 1.7938, "step": 41760 }, { "epoch": 0.0824686255837507, "grad_norm": 0.10261339694261551, "learning_rate": 9.212544133905538e-05, "loss": 1.7923, "step": 41792 }, { "epoch": 0.08253177154514714, "grad_norm": 0.1180913895368576, "learning_rate": 9.21191011146907e-05, "loss": 1.7916, "step": 41824 }, { "epoch": 0.08259491750654359, "grad_norm": 0.106290303170681, "learning_rate": 9.211276089032601e-05, "loss": 1.7908, "step": 41856 }, { "epoch": 0.08265806346794002, "grad_norm": 0.10435096174478531, "learning_rate": 9.210642066596132e-05, "loss": 1.7848, "step": 41888 }, { "epoch": 0.08272120942933646, "grad_norm": 0.10200298577547073, "learning_rate": 9.210008044159664e-05, "loss": 1.7808, "step": 41920 }, { "epoch": 0.08278435539073291, "grad_norm": 0.09501753002405167, "learning_rate": 9.209374021723194e-05, "loss": 1.8, "step": 41952 }, { "epoch": 0.08284750135212934, "grad_norm": 0.10118541121482849, "learning_rate": 9.208739999286725e-05, "loss": 1.7862, "step": 41984 }, { "epoch": 0.08291064731352578, "grad_norm": 0.11437226831912994, "learning_rate": 9.208105976850257e-05, "loss": 1.7996, "step": 42016 }, { "epoch": 0.08297379327492223, "grad_norm": 0.10282813757658005, "learning_rate": 9.207471954413788e-05, "loss": 1.7932, "step": 42048 }, { "epoch": 0.08303693923631866, "grad_norm": 0.11247044801712036, "learning_rate": 9.206837931977318e-05, "loss": 1.795, "step": 42080 }, { "epoch": 0.0831000851977151, "grad_norm": 0.10876993089914322, "learning_rate": 9.20620390954085e-05, "loss": 1.7984, "step": 42112 }, { "epoch": 0.08316323115911155, "grad_norm": 0.10570403188467026, "learning_rate": 9.20556988710438e-05, "loss": 1.7801, "step": 42144 }, { "epoch": 0.08322637712050798, "grad_norm": 0.09808826446533203, "learning_rate": 9.204935864667911e-05, "loss": 1.7848, "step": 42176 }, { "epoch": 0.08328952308190442, "grad_norm": 0.09764552861452103, "learning_rate": 9.204301842231441e-05, "loss": 1.7892, "step": 42208 }, { "epoch": 0.08335266904330087, "grad_norm": 0.10342146456241608, "learning_rate": 9.203667819794973e-05, "loss": 1.787, "step": 42240 }, { "epoch": 0.0834158150046973, "grad_norm": 0.10156437009572983, "learning_rate": 9.203033797358504e-05, "loss": 1.7839, "step": 42272 }, { "epoch": 0.08347896096609374, "grad_norm": 0.10073494911193848, "learning_rate": 9.202399774922036e-05, "loss": 1.7848, "step": 42304 }, { "epoch": 0.08354210692749019, "grad_norm": 0.09972520917654037, "learning_rate": 9.201765752485566e-05, "loss": 1.7969, "step": 42336 }, { "epoch": 0.08360525288888662, "grad_norm": 0.10750122368335724, "learning_rate": 9.201131730049097e-05, "loss": 1.7742, "step": 42368 }, { "epoch": 0.08366839885028306, "grad_norm": 0.10817664116621017, "learning_rate": 9.200497707612629e-05, "loss": 1.7913, "step": 42400 }, { "epoch": 0.0837315448116795, "grad_norm": 0.0960138589143753, "learning_rate": 9.19986368517616e-05, "loss": 1.7763, "step": 42432 }, { "epoch": 0.08379469077307594, "grad_norm": 0.1208823025226593, "learning_rate": 9.19922966273969e-05, "loss": 1.7922, "step": 42464 }, { "epoch": 0.08385783673447238, "grad_norm": 0.09739633649587631, "learning_rate": 9.198595640303222e-05, "loss": 1.7814, "step": 42496 }, { "epoch": 0.08392098269586883, "grad_norm": 0.09989912062883377, "learning_rate": 9.197961617866753e-05, "loss": 1.7784, "step": 42528 }, { "epoch": 0.08398412865726526, "grad_norm": 0.09694484621286392, "learning_rate": 9.197327595430283e-05, "loss": 1.7898, "step": 42560 }, { "epoch": 0.08404727461866171, "grad_norm": 0.10541020333766937, "learning_rate": 9.196693572993815e-05, "loss": 1.7813, "step": 42592 }, { "epoch": 0.08411042058005815, "grad_norm": 0.09711425006389618, "learning_rate": 9.196059550557345e-05, "loss": 1.7851, "step": 42624 }, { "epoch": 0.08417356654145458, "grad_norm": 0.10375643521547318, "learning_rate": 9.195425528120876e-05, "loss": 1.7821, "step": 42656 }, { "epoch": 0.08423671250285103, "grad_norm": 0.10384511947631836, "learning_rate": 9.194791505684408e-05, "loss": 1.7672, "step": 42688 }, { "epoch": 0.08429985846424747, "grad_norm": 0.10180196911096573, "learning_rate": 9.194157483247939e-05, "loss": 1.8067, "step": 42720 }, { "epoch": 0.0843630044256439, "grad_norm": 0.10466025769710541, "learning_rate": 9.19352346081147e-05, "loss": 1.7788, "step": 42752 }, { "epoch": 0.08442615038704035, "grad_norm": 0.101494699716568, "learning_rate": 9.192889438375001e-05, "loss": 1.77, "step": 42784 }, { "epoch": 0.08448929634843678, "grad_norm": 0.09314579516649246, "learning_rate": 9.192255415938532e-05, "loss": 1.7882, "step": 42816 }, { "epoch": 0.08455244230983322, "grad_norm": 0.09800131618976593, "learning_rate": 9.191621393502064e-05, "loss": 1.7885, "step": 42848 }, { "epoch": 0.08461558827122967, "grad_norm": 0.1176493763923645, "learning_rate": 9.190987371065594e-05, "loss": 1.7811, "step": 42880 }, { "epoch": 0.0846787342326261, "grad_norm": 0.09414546191692352, "learning_rate": 9.190353348629125e-05, "loss": 1.7718, "step": 42912 }, { "epoch": 0.08474188019402254, "grad_norm": 0.10884156078100204, "learning_rate": 9.189719326192657e-05, "loss": 1.7932, "step": 42944 }, { "epoch": 0.08480502615541899, "grad_norm": 0.10244859009981155, "learning_rate": 9.189085303756187e-05, "loss": 1.789, "step": 42976 }, { "epoch": 0.08486817211681542, "grad_norm": 0.1141287088394165, "learning_rate": 9.188451281319718e-05, "loss": 1.7932, "step": 43008 }, { "epoch": 0.08493131807821186, "grad_norm": 0.10819052904844284, "learning_rate": 9.187817258883248e-05, "loss": 1.7814, "step": 43040 }, { "epoch": 0.08499446403960831, "grad_norm": 0.1084299311041832, "learning_rate": 9.18718323644678e-05, "loss": 1.7944, "step": 43072 }, { "epoch": 0.08505761000100474, "grad_norm": 0.0994194969534874, "learning_rate": 9.186549214010311e-05, "loss": 1.7865, "step": 43104 }, { "epoch": 0.08512075596240118, "grad_norm": 0.0987580195069313, "learning_rate": 9.185915191573841e-05, "loss": 1.7902, "step": 43136 }, { "epoch": 0.08518390192379763, "grad_norm": 0.10071149468421936, "learning_rate": 9.185281169137373e-05, "loss": 1.7826, "step": 43168 }, { "epoch": 0.08524704788519406, "grad_norm": 0.09697140753269196, "learning_rate": 9.184647146700904e-05, "loss": 1.7816, "step": 43200 }, { "epoch": 0.0853101938465905, "grad_norm": 0.10094501823186874, "learning_rate": 9.184013124264436e-05, "loss": 1.7721, "step": 43232 }, { "epoch": 0.08537333980798695, "grad_norm": 0.12242378294467926, "learning_rate": 9.183379101827967e-05, "loss": 1.7691, "step": 43264 }, { "epoch": 0.08543648576938338, "grad_norm": 0.09958004951477051, "learning_rate": 9.182745079391497e-05, "loss": 1.7728, "step": 43296 }, { "epoch": 0.08549963173077982, "grad_norm": 0.09500090032815933, "learning_rate": 9.182111056955029e-05, "loss": 1.788, "step": 43328 }, { "epoch": 0.08556277769217627, "grad_norm": 0.10032177716493607, "learning_rate": 9.18147703451856e-05, "loss": 1.779, "step": 43360 }, { "epoch": 0.0856259236535727, "grad_norm": 0.11116743832826614, "learning_rate": 9.18084301208209e-05, "loss": 1.7763, "step": 43392 }, { "epoch": 0.08568906961496915, "grad_norm": 0.09952928870916367, "learning_rate": 9.18020898964562e-05, "loss": 1.7928, "step": 43424 }, { "epoch": 0.08575221557636559, "grad_norm": 0.09488236159086227, "learning_rate": 9.179574967209152e-05, "loss": 1.7815, "step": 43456 }, { "epoch": 0.08581536153776202, "grad_norm": 0.0935351774096489, "learning_rate": 9.178940944772683e-05, "loss": 1.7812, "step": 43488 }, { "epoch": 0.08587850749915847, "grad_norm": 0.10840978473424911, "learning_rate": 9.178306922336215e-05, "loss": 1.784, "step": 43520 }, { "epoch": 0.0859416534605549, "grad_norm": 0.09159401804208755, "learning_rate": 9.177672899899745e-05, "loss": 1.7635, "step": 43552 }, { "epoch": 0.08600479942195134, "grad_norm": 0.1033141240477562, "learning_rate": 9.177038877463276e-05, "loss": 1.7757, "step": 43584 }, { "epoch": 0.08606794538334779, "grad_norm": 0.11672907322645187, "learning_rate": 9.176404855026808e-05, "loss": 1.7754, "step": 43616 }, { "epoch": 0.08613109134474423, "grad_norm": 0.09875050187110901, "learning_rate": 9.175770832590339e-05, "loss": 1.7823, "step": 43648 }, { "epoch": 0.08619423730614066, "grad_norm": 0.1074860617518425, "learning_rate": 9.17513681015387e-05, "loss": 1.7694, "step": 43680 }, { "epoch": 0.08625738326753711, "grad_norm": 0.09928867220878601, "learning_rate": 9.174502787717401e-05, "loss": 1.784, "step": 43712 }, { "epoch": 0.08632052922893355, "grad_norm": 0.09989845752716064, "learning_rate": 9.173868765280932e-05, "loss": 1.7733, "step": 43744 }, { "epoch": 0.08638367519032998, "grad_norm": 0.10041379928588867, "learning_rate": 9.173234742844464e-05, "loss": 1.7821, "step": 43776 }, { "epoch": 0.08644682115172643, "grad_norm": 0.10040847957134247, "learning_rate": 9.172600720407994e-05, "loss": 1.7918, "step": 43808 }, { "epoch": 0.08650996711312287, "grad_norm": 0.1039457842707634, "learning_rate": 9.171966697971524e-05, "loss": 1.7796, "step": 43840 }, { "epoch": 0.0865731130745193, "grad_norm": 0.1016286239027977, "learning_rate": 9.171332675535055e-05, "loss": 1.798, "step": 43872 }, { "epoch": 0.08663625903591575, "grad_norm": 0.10243168473243713, "learning_rate": 9.170698653098587e-05, "loss": 1.7655, "step": 43904 }, { "epoch": 0.08669940499731218, "grad_norm": 0.10807812958955765, "learning_rate": 9.170064630662118e-05, "loss": 1.7855, "step": 43936 }, { "epoch": 0.08676255095870862, "grad_norm": 0.09867890924215317, "learning_rate": 9.169430608225648e-05, "loss": 1.784, "step": 43968 }, { "epoch": 0.08682569692010507, "grad_norm": 0.10520311444997787, "learning_rate": 9.16879658578918e-05, "loss": 1.7829, "step": 44000 }, { "epoch": 0.0868888428815015, "grad_norm": 0.10657200217247009, "learning_rate": 9.168162563352711e-05, "loss": 1.7779, "step": 44032 }, { "epoch": 0.08695198884289794, "grad_norm": 0.10489603132009506, "learning_rate": 9.167528540916243e-05, "loss": 1.7712, "step": 44064 }, { "epoch": 0.08701513480429439, "grad_norm": 0.11926059424877167, "learning_rate": 9.166894518479773e-05, "loss": 1.7596, "step": 44096 }, { "epoch": 0.08707828076569082, "grad_norm": 0.10963926464319229, "learning_rate": 9.166260496043304e-05, "loss": 1.7784, "step": 44128 }, { "epoch": 0.08714142672708726, "grad_norm": 0.09565390646457672, "learning_rate": 9.165626473606836e-05, "loss": 1.7787, "step": 44160 }, { "epoch": 0.08720457268848371, "grad_norm": 0.09736917912960052, "learning_rate": 9.164992451170367e-05, "loss": 1.7737, "step": 44192 }, { "epoch": 0.08726771864988014, "grad_norm": 0.09433715790510178, "learning_rate": 9.164358428733897e-05, "loss": 1.7831, "step": 44224 }, { "epoch": 0.08733086461127659, "grad_norm": 0.10643383860588074, "learning_rate": 9.163724406297427e-05, "loss": 1.7752, "step": 44256 }, { "epoch": 0.08739401057267303, "grad_norm": 0.10283531993627548, "learning_rate": 9.163090383860959e-05, "loss": 1.7671, "step": 44288 }, { "epoch": 0.08745715653406946, "grad_norm": 0.1017676293849945, "learning_rate": 9.16245636142449e-05, "loss": 1.7745, "step": 44320 }, { "epoch": 0.08752030249546591, "grad_norm": 0.09568081796169281, "learning_rate": 9.16182233898802e-05, "loss": 1.764, "step": 44352 }, { "epoch": 0.08758344845686235, "grad_norm": 0.09435157477855682, "learning_rate": 9.161188316551552e-05, "loss": 1.7754, "step": 44384 }, { "epoch": 0.08764659441825878, "grad_norm": 0.09382575750350952, "learning_rate": 9.160554294115083e-05, "loss": 1.7806, "step": 44416 }, { "epoch": 0.08770974037965523, "grad_norm": 0.11132270097732544, "learning_rate": 9.159920271678615e-05, "loss": 1.7952, "step": 44448 }, { "epoch": 0.08777288634105167, "grad_norm": 0.10555444657802582, "learning_rate": 9.159286249242145e-05, "loss": 1.7851, "step": 44480 }, { "epoch": 0.0878360323024481, "grad_norm": 0.12005648761987686, "learning_rate": 9.158652226805676e-05, "loss": 1.7731, "step": 44512 }, { "epoch": 0.08789917826384455, "grad_norm": 0.10580897331237793, "learning_rate": 9.158018204369208e-05, "loss": 1.7764, "step": 44544 }, { "epoch": 0.08796232422524099, "grad_norm": 0.1080353707075119, "learning_rate": 9.15738418193274e-05, "loss": 1.7906, "step": 44576 }, { "epoch": 0.08802547018663742, "grad_norm": 0.09693998098373413, "learning_rate": 9.156750159496271e-05, "loss": 1.7681, "step": 44608 }, { "epoch": 0.08808861614803387, "grad_norm": 0.10377060621976852, "learning_rate": 9.156116137059801e-05, "loss": 1.7765, "step": 44640 }, { "epoch": 0.0881517621094303, "grad_norm": 0.09804272651672363, "learning_rate": 9.155482114623331e-05, "loss": 1.7676, "step": 44672 }, { "epoch": 0.08821490807082674, "grad_norm": 0.09506504982709885, "learning_rate": 9.154848092186862e-05, "loss": 1.7788, "step": 44704 }, { "epoch": 0.08827805403222319, "grad_norm": 0.10393403470516205, "learning_rate": 9.154214069750394e-05, "loss": 1.7785, "step": 44736 }, { "epoch": 0.08834119999361963, "grad_norm": 0.10242267698049545, "learning_rate": 9.153580047313924e-05, "loss": 1.763, "step": 44768 }, { "epoch": 0.08840434595501606, "grad_norm": 0.1032901257276535, "learning_rate": 9.152946024877455e-05, "loss": 1.7592, "step": 44800 }, { "epoch": 0.08846749191641251, "grad_norm": 0.10288886725902557, "learning_rate": 9.152312002440987e-05, "loss": 1.7737, "step": 44832 }, { "epoch": 0.08853063787780895, "grad_norm": 0.11493588984012604, "learning_rate": 9.151677980004518e-05, "loss": 1.7862, "step": 44864 }, { "epoch": 0.08859378383920538, "grad_norm": 0.10273680835962296, "learning_rate": 9.151043957568048e-05, "loss": 1.7551, "step": 44896 }, { "epoch": 0.08865692980060183, "grad_norm": 0.09941155463457108, "learning_rate": 9.15040993513158e-05, "loss": 1.7706, "step": 44928 }, { "epoch": 0.08872007576199827, "grad_norm": 0.09910742193460464, "learning_rate": 9.149775912695111e-05, "loss": 1.7847, "step": 44960 }, { "epoch": 0.0887832217233947, "grad_norm": 0.09402486681938171, "learning_rate": 9.149141890258643e-05, "loss": 1.7863, "step": 44992 }, { "epoch": 0.08884636768479115, "grad_norm": 0.1073371097445488, "learning_rate": 9.148507867822173e-05, "loss": 1.7717, "step": 45024 }, { "epoch": 0.08890951364618759, "grad_norm": 0.0946664810180664, "learning_rate": 9.147873845385704e-05, "loss": 1.7773, "step": 45056 }, { "epoch": 0.08897265960758402, "grad_norm": 0.09917966276407242, "learning_rate": 9.147239822949235e-05, "loss": 1.7713, "step": 45088 }, { "epoch": 0.08903580556898047, "grad_norm": 0.097694993019104, "learning_rate": 9.146605800512766e-05, "loss": 1.7715, "step": 45120 }, { "epoch": 0.0890989515303769, "grad_norm": 0.09713596850633621, "learning_rate": 9.145971778076296e-05, "loss": 1.7701, "step": 45152 }, { "epoch": 0.08916209749177335, "grad_norm": 0.10814554244279861, "learning_rate": 9.145337755639828e-05, "loss": 1.7682, "step": 45184 }, { "epoch": 0.08922524345316979, "grad_norm": 0.09634155035018921, "learning_rate": 9.144703733203359e-05, "loss": 1.77, "step": 45216 }, { "epoch": 0.08928838941456622, "grad_norm": 0.10277990996837616, "learning_rate": 9.14406971076689e-05, "loss": 1.7702, "step": 45248 }, { "epoch": 0.08935153537596267, "grad_norm": 0.10231760144233704, "learning_rate": 9.143435688330422e-05, "loss": 1.7677, "step": 45280 }, { "epoch": 0.08941468133735911, "grad_norm": 0.09473563730716705, "learning_rate": 9.142801665893952e-05, "loss": 1.7676, "step": 45312 }, { "epoch": 0.08947782729875554, "grad_norm": 0.0933060497045517, "learning_rate": 9.142167643457483e-05, "loss": 1.7573, "step": 45344 }, { "epoch": 0.089540973260152, "grad_norm": 0.11009597033262253, "learning_rate": 9.141533621021015e-05, "loss": 1.7727, "step": 45376 }, { "epoch": 0.08960411922154843, "grad_norm": 0.10508358478546143, "learning_rate": 9.140899598584546e-05, "loss": 1.7667, "step": 45408 }, { "epoch": 0.08966726518294486, "grad_norm": 0.11706570535898209, "learning_rate": 9.140265576148076e-05, "loss": 1.7728, "step": 45440 }, { "epoch": 0.08973041114434131, "grad_norm": 0.1036912128329277, "learning_rate": 9.139631553711608e-05, "loss": 1.7708, "step": 45472 }, { "epoch": 0.08979355710573775, "grad_norm": 0.10347336530685425, "learning_rate": 9.138997531275138e-05, "loss": 1.782, "step": 45504 }, { "epoch": 0.08985670306713418, "grad_norm": 0.12362384796142578, "learning_rate": 9.13836350883867e-05, "loss": 1.7812, "step": 45536 }, { "epoch": 0.08991984902853063, "grad_norm": 0.09904732555150986, "learning_rate": 9.1377294864022e-05, "loss": 1.765, "step": 45568 }, { "epoch": 0.08998299498992707, "grad_norm": 0.12025132030248642, "learning_rate": 9.137095463965731e-05, "loss": 1.7708, "step": 45600 }, { "epoch": 0.0900461409513235, "grad_norm": 0.0950314849615097, "learning_rate": 9.136461441529262e-05, "loss": 1.7588, "step": 45632 }, { "epoch": 0.09010928691271995, "grad_norm": 0.0949680283665657, "learning_rate": 9.135827419092794e-05, "loss": 1.7692, "step": 45664 }, { "epoch": 0.09017243287411639, "grad_norm": 0.09779734164476395, "learning_rate": 9.135193396656324e-05, "loss": 1.7705, "step": 45696 }, { "epoch": 0.09023557883551282, "grad_norm": 0.09569840878248215, "learning_rate": 9.134559374219856e-05, "loss": 1.7637, "step": 45728 }, { "epoch": 0.09029872479690927, "grad_norm": 0.11018886417150497, "learning_rate": 9.133925351783387e-05, "loss": 1.7847, "step": 45760 }, { "epoch": 0.0903618707583057, "grad_norm": 0.13354440033435822, "learning_rate": 9.133291329346918e-05, "loss": 1.7658, "step": 45792 }, { "epoch": 0.09042501671970214, "grad_norm": 0.09451562166213989, "learning_rate": 9.132657306910449e-05, "loss": 1.7802, "step": 45824 }, { "epoch": 0.09048816268109859, "grad_norm": 0.10078594833612442, "learning_rate": 9.13202328447398e-05, "loss": 1.781, "step": 45856 }, { "epoch": 0.09055130864249503, "grad_norm": 0.10404020547866821, "learning_rate": 9.131389262037511e-05, "loss": 1.7625, "step": 45888 }, { "epoch": 0.09061445460389146, "grad_norm": 0.104709692299366, "learning_rate": 9.130755239601042e-05, "loss": 1.7666, "step": 45920 }, { "epoch": 0.09067760056528791, "grad_norm": 0.09853670001029968, "learning_rate": 9.130121217164573e-05, "loss": 1.7837, "step": 45952 }, { "epoch": 0.09074074652668435, "grad_norm": 0.09726610779762268, "learning_rate": 9.129487194728103e-05, "loss": 1.766, "step": 45984 }, { "epoch": 0.0908038924880808, "grad_norm": 0.10068164020776749, "learning_rate": 9.128853172291635e-05, "loss": 1.7705, "step": 46016 }, { "epoch": 0.09086703844947723, "grad_norm": 0.09436113387346268, "learning_rate": 9.128219149855166e-05, "loss": 1.7621, "step": 46048 }, { "epoch": 0.09093018441087367, "grad_norm": 0.09907054156064987, "learning_rate": 9.127585127418697e-05, "loss": 1.7715, "step": 46080 }, { "epoch": 0.09099333037227011, "grad_norm": 0.10897371917963028, "learning_rate": 9.126951104982228e-05, "loss": 1.7669, "step": 46112 }, { "epoch": 0.09105647633366655, "grad_norm": 0.10993722081184387, "learning_rate": 9.126317082545759e-05, "loss": 1.7759, "step": 46144 }, { "epoch": 0.09111962229506299, "grad_norm": 0.09244080632925034, "learning_rate": 9.12568306010929e-05, "loss": 1.7744, "step": 46176 }, { "epoch": 0.09118276825645943, "grad_norm": 0.1071285679936409, "learning_rate": 9.125049037672822e-05, "loss": 1.7786, "step": 46208 }, { "epoch": 0.09124591421785587, "grad_norm": 0.09893108159303665, "learning_rate": 9.124415015236352e-05, "loss": 1.7712, "step": 46240 }, { "epoch": 0.0913090601792523, "grad_norm": 0.10827232152223587, "learning_rate": 9.123780992799883e-05, "loss": 1.7685, "step": 46272 }, { "epoch": 0.09137220614064875, "grad_norm": 0.09942732006311417, "learning_rate": 9.123146970363414e-05, "loss": 1.7526, "step": 46304 }, { "epoch": 0.09143535210204519, "grad_norm": 0.10993362218141556, "learning_rate": 9.122512947926945e-05, "loss": 1.7748, "step": 46336 }, { "epoch": 0.09149849806344162, "grad_norm": 0.09984655678272247, "learning_rate": 9.121878925490475e-05, "loss": 1.773, "step": 46368 }, { "epoch": 0.09156164402483807, "grad_norm": 0.09968504309654236, "learning_rate": 9.121244903054007e-05, "loss": 1.7652, "step": 46400 }, { "epoch": 0.09162478998623451, "grad_norm": 0.0972420945763588, "learning_rate": 9.120610880617538e-05, "loss": 1.771, "step": 46432 }, { "epoch": 0.09168793594763094, "grad_norm": 0.09219255298376083, "learning_rate": 9.11997685818107e-05, "loss": 1.7695, "step": 46464 }, { "epoch": 0.0917510819090274, "grad_norm": 0.09459584206342697, "learning_rate": 9.1193428357446e-05, "loss": 1.7714, "step": 46496 }, { "epoch": 0.09181422787042383, "grad_norm": 0.09561120718717575, "learning_rate": 9.118708813308131e-05, "loss": 1.7632, "step": 46528 }, { "epoch": 0.09187737383182026, "grad_norm": 0.10604512691497803, "learning_rate": 9.118074790871663e-05, "loss": 1.771, "step": 46560 }, { "epoch": 0.09194051979321671, "grad_norm": 0.11895725131034851, "learning_rate": 9.117440768435194e-05, "loss": 1.768, "step": 46592 }, { "epoch": 0.09200366575461315, "grad_norm": 0.12577363848686218, "learning_rate": 9.116806745998725e-05, "loss": 1.776, "step": 46624 }, { "epoch": 0.09206681171600958, "grad_norm": 0.10248183459043503, "learning_rate": 9.116172723562256e-05, "loss": 1.769, "step": 46656 }, { "epoch": 0.09212995767740603, "grad_norm": 0.09341636300086975, "learning_rate": 9.115538701125787e-05, "loss": 1.7673, "step": 46688 }, { "epoch": 0.09219310363880247, "grad_norm": 0.10240786522626877, "learning_rate": 9.114904678689317e-05, "loss": 1.7577, "step": 46720 }, { "epoch": 0.0922562496001989, "grad_norm": 0.10875356197357178, "learning_rate": 9.114270656252849e-05, "loss": 1.7614, "step": 46752 }, { "epoch": 0.09231939556159535, "grad_norm": 0.10090479254722595, "learning_rate": 9.113636633816379e-05, "loss": 1.7592, "step": 46784 }, { "epoch": 0.09238254152299179, "grad_norm": 0.10226140171289444, "learning_rate": 9.11300261137991e-05, "loss": 1.7618, "step": 46816 }, { "epoch": 0.09244568748438824, "grad_norm": 0.10055588185787201, "learning_rate": 9.112368588943442e-05, "loss": 1.7646, "step": 46848 }, { "epoch": 0.09250883344578467, "grad_norm": 0.10336578637361526, "learning_rate": 9.111734566506973e-05, "loss": 1.7587, "step": 46880 }, { "epoch": 0.09257197940718111, "grad_norm": 0.09818453341722488, "learning_rate": 9.111100544070503e-05, "loss": 1.7655, "step": 46912 }, { "epoch": 0.09263512536857756, "grad_norm": 0.1010284349322319, "learning_rate": 9.110466521634035e-05, "loss": 1.7573, "step": 46944 }, { "epoch": 0.09269827132997399, "grad_norm": 0.09641031175851822, "learning_rate": 9.109832499197566e-05, "loss": 1.7644, "step": 46976 }, { "epoch": 0.09276141729137043, "grad_norm": 0.1004476398229599, "learning_rate": 9.109198476761097e-05, "loss": 1.7531, "step": 47008 }, { "epoch": 0.09282456325276688, "grad_norm": 0.09911256283521652, "learning_rate": 9.108564454324628e-05, "loss": 1.7625, "step": 47040 }, { "epoch": 0.09288770921416331, "grad_norm": 0.0979299247264862, "learning_rate": 9.107930431888159e-05, "loss": 1.7647, "step": 47072 }, { "epoch": 0.09295085517555975, "grad_norm": 0.10227309912443161, "learning_rate": 9.10729640945169e-05, "loss": 1.7736, "step": 47104 }, { "epoch": 0.0930140011369562, "grad_norm": 0.10089951753616333, "learning_rate": 9.10666238701522e-05, "loss": 1.7699, "step": 47136 }, { "epoch": 0.09307714709835263, "grad_norm": 0.09610005468130112, "learning_rate": 9.106028364578752e-05, "loss": 1.7673, "step": 47168 }, { "epoch": 0.09314029305974907, "grad_norm": 0.12601642310619354, "learning_rate": 9.105394342142282e-05, "loss": 1.7654, "step": 47200 }, { "epoch": 0.09320343902114551, "grad_norm": 0.09972970187664032, "learning_rate": 9.104760319705814e-05, "loss": 1.7547, "step": 47232 }, { "epoch": 0.09326658498254195, "grad_norm": 0.1147465631365776, "learning_rate": 9.104126297269345e-05, "loss": 1.7547, "step": 47264 }, { "epoch": 0.09332973094393839, "grad_norm": 0.1079910472035408, "learning_rate": 9.103492274832877e-05, "loss": 1.7565, "step": 47296 }, { "epoch": 0.09339287690533483, "grad_norm": 0.11165102571249008, "learning_rate": 9.102858252396407e-05, "loss": 1.7594, "step": 47328 }, { "epoch": 0.09345602286673127, "grad_norm": 0.11357971280813217, "learning_rate": 9.102224229959938e-05, "loss": 1.7637, "step": 47360 }, { "epoch": 0.0935191688281277, "grad_norm": 0.1016388013958931, "learning_rate": 9.10159020752347e-05, "loss": 1.7649, "step": 47392 }, { "epoch": 0.09358231478952415, "grad_norm": 0.09839114546775818, "learning_rate": 9.100956185087001e-05, "loss": 1.7532, "step": 47424 }, { "epoch": 0.09364546075092059, "grad_norm": 0.09849175065755844, "learning_rate": 9.100322162650531e-05, "loss": 1.7457, "step": 47456 }, { "epoch": 0.09370860671231702, "grad_norm": 0.11595937609672546, "learning_rate": 9.099688140214063e-05, "loss": 1.76, "step": 47488 }, { "epoch": 0.09377175267371347, "grad_norm": 0.1018570140004158, "learning_rate": 9.099054117777594e-05, "loss": 1.7662, "step": 47520 }, { "epoch": 0.09383489863510991, "grad_norm": 0.10275154560804367, "learning_rate": 9.098420095341124e-05, "loss": 1.7672, "step": 47552 }, { "epoch": 0.09389804459650634, "grad_norm": 0.09677479416131973, "learning_rate": 9.097786072904654e-05, "loss": 1.7593, "step": 47584 }, { "epoch": 0.0939611905579028, "grad_norm": 0.09905097633600235, "learning_rate": 9.097152050468186e-05, "loss": 1.7521, "step": 47616 }, { "epoch": 0.09402433651929923, "grad_norm": 0.0992780551314354, "learning_rate": 9.096518028031717e-05, "loss": 1.7695, "step": 47648 }, { "epoch": 0.09408748248069568, "grad_norm": 0.10553256422281265, "learning_rate": 9.095884005595249e-05, "loss": 1.773, "step": 47680 }, { "epoch": 0.09415062844209211, "grad_norm": 0.09222009032964706, "learning_rate": 9.095249983158779e-05, "loss": 1.7375, "step": 47712 }, { "epoch": 0.09421377440348855, "grad_norm": 0.09913988411426544, "learning_rate": 9.09461596072231e-05, "loss": 1.7556, "step": 47744 }, { "epoch": 0.094276920364885, "grad_norm": 0.10213114321231842, "learning_rate": 9.093981938285842e-05, "loss": 1.7566, "step": 47776 }, { "epoch": 0.09434006632628143, "grad_norm": 0.09298853576183319, "learning_rate": 9.093347915849373e-05, "loss": 1.7702, "step": 47808 }, { "epoch": 0.09440321228767787, "grad_norm": 0.11833615601062775, "learning_rate": 9.092713893412905e-05, "loss": 1.7736, "step": 47840 }, { "epoch": 0.09446635824907432, "grad_norm": 0.10365059971809387, "learning_rate": 9.092079870976435e-05, "loss": 1.742, "step": 47872 }, { "epoch": 0.09452950421047075, "grad_norm": 0.09631436318159103, "learning_rate": 9.091445848539966e-05, "loss": 1.7562, "step": 47904 }, { "epoch": 0.09459265017186719, "grad_norm": 0.09646230190992355, "learning_rate": 9.090811826103498e-05, "loss": 1.7812, "step": 47936 }, { "epoch": 0.09465579613326364, "grad_norm": 0.12156777083873749, "learning_rate": 9.090177803667028e-05, "loss": 1.7538, "step": 47968 }, { "epoch": 0.09471894209466007, "grad_norm": 0.10942567884922028, "learning_rate": 9.089543781230558e-05, "loss": 1.7459, "step": 48000 }, { "epoch": 0.09478208805605651, "grad_norm": 0.09691857546567917, "learning_rate": 9.088909758794089e-05, "loss": 1.7617, "step": 48032 }, { "epoch": 0.09484523401745296, "grad_norm": 0.09761829674243927, "learning_rate": 9.08827573635762e-05, "loss": 1.766, "step": 48064 }, { "epoch": 0.09490837997884939, "grad_norm": 0.09544821828603745, "learning_rate": 9.087641713921152e-05, "loss": 1.7778, "step": 48096 }, { "epoch": 0.09497152594024583, "grad_norm": 0.09921257942914963, "learning_rate": 9.087007691484682e-05, "loss": 1.747, "step": 48128 }, { "epoch": 0.09503467190164228, "grad_norm": 0.0977461040019989, "learning_rate": 9.086373669048214e-05, "loss": 1.7597, "step": 48160 }, { "epoch": 0.09509781786303871, "grad_norm": 0.10993138700723648, "learning_rate": 9.085739646611745e-05, "loss": 1.7586, "step": 48192 }, { "epoch": 0.09516096382443515, "grad_norm": 0.10029158741235733, "learning_rate": 9.085105624175277e-05, "loss": 1.7691, "step": 48224 }, { "epoch": 0.0952241097858316, "grad_norm": 0.09948234260082245, "learning_rate": 9.084471601738807e-05, "loss": 1.7622, "step": 48256 }, { "epoch": 0.09528725574722803, "grad_norm": 0.10209999978542328, "learning_rate": 9.083837579302338e-05, "loss": 1.7573, "step": 48288 }, { "epoch": 0.09535040170862447, "grad_norm": 0.11497338861227036, "learning_rate": 9.08320355686587e-05, "loss": 1.7633, "step": 48320 }, { "epoch": 0.09541354767002092, "grad_norm": 0.09922916442155838, "learning_rate": 9.082569534429401e-05, "loss": 1.7697, "step": 48352 }, { "epoch": 0.09547669363141735, "grad_norm": 0.09739507734775543, "learning_rate": 9.081935511992931e-05, "loss": 1.7589, "step": 48384 }, { "epoch": 0.09553983959281379, "grad_norm": 0.09743262827396393, "learning_rate": 9.081301489556461e-05, "loss": 1.7704, "step": 48416 }, { "epoch": 0.09560298555421023, "grad_norm": 0.10023558139801025, "learning_rate": 9.080667467119993e-05, "loss": 1.7497, "step": 48448 }, { "epoch": 0.09566613151560667, "grad_norm": 0.10142900794744492, "learning_rate": 9.080033444683524e-05, "loss": 1.7444, "step": 48480 }, { "epoch": 0.09572927747700312, "grad_norm": 0.09515956044197083, "learning_rate": 9.079399422247056e-05, "loss": 1.7384, "step": 48512 }, { "epoch": 0.09579242343839955, "grad_norm": 0.10556641221046448, "learning_rate": 9.078765399810586e-05, "loss": 1.7564, "step": 48544 }, { "epoch": 0.09585556939979599, "grad_norm": 0.09588541090488434, "learning_rate": 9.078131377374117e-05, "loss": 1.7479, "step": 48576 }, { "epoch": 0.09591871536119244, "grad_norm": 0.09864659607410431, "learning_rate": 9.077497354937649e-05, "loss": 1.7538, "step": 48608 }, { "epoch": 0.09598186132258887, "grad_norm": 0.10459697991609573, "learning_rate": 9.07686333250118e-05, "loss": 1.7524, "step": 48640 }, { "epoch": 0.09604500728398531, "grad_norm": 0.09505860507488251, "learning_rate": 9.07622931006471e-05, "loss": 1.7566, "step": 48672 }, { "epoch": 0.09610815324538176, "grad_norm": 0.09968584775924683, "learning_rate": 9.075595287628242e-05, "loss": 1.7651, "step": 48704 }, { "epoch": 0.0961712992067782, "grad_norm": 0.1054338738322258, "learning_rate": 9.074961265191773e-05, "loss": 1.7603, "step": 48736 }, { "epoch": 0.09623444516817463, "grad_norm": 0.10174793750047684, "learning_rate": 9.074327242755305e-05, "loss": 1.7536, "step": 48768 }, { "epoch": 0.09629759112957108, "grad_norm": 0.10760358721017838, "learning_rate": 9.073693220318835e-05, "loss": 1.7307, "step": 48800 }, { "epoch": 0.09636073709096751, "grad_norm": 0.0917457640171051, "learning_rate": 9.073059197882365e-05, "loss": 1.7466, "step": 48832 }, { "epoch": 0.09642388305236395, "grad_norm": 0.09406810998916626, "learning_rate": 9.072425175445896e-05, "loss": 1.7496, "step": 48864 }, { "epoch": 0.0964870290137604, "grad_norm": 0.09314677119255066, "learning_rate": 9.071791153009428e-05, "loss": 1.7507, "step": 48896 }, { "epoch": 0.09655017497515683, "grad_norm": 0.0993109866976738, "learning_rate": 9.071157130572958e-05, "loss": 1.7444, "step": 48928 }, { "epoch": 0.09661332093655327, "grad_norm": 0.09146977216005325, "learning_rate": 9.070523108136489e-05, "loss": 1.7481, "step": 48960 }, { "epoch": 0.09667646689794972, "grad_norm": 0.09343001991510391, "learning_rate": 9.069889085700021e-05, "loss": 1.7532, "step": 48992 }, { "epoch": 0.09673961285934615, "grad_norm": 0.09770987927913666, "learning_rate": 9.069255063263552e-05, "loss": 1.7599, "step": 49024 }, { "epoch": 0.09680275882074259, "grad_norm": 0.10346570611000061, "learning_rate": 9.068621040827082e-05, "loss": 1.7743, "step": 49056 }, { "epoch": 0.09686590478213904, "grad_norm": 0.11342669278383255, "learning_rate": 9.067987018390614e-05, "loss": 1.7583, "step": 49088 }, { "epoch": 0.09692905074353547, "grad_norm": 0.09998240321874619, "learning_rate": 9.067352995954145e-05, "loss": 1.7566, "step": 49120 }, { "epoch": 0.09699219670493191, "grad_norm": 0.10686476528644562, "learning_rate": 9.066718973517677e-05, "loss": 1.7471, "step": 49152 }, { "epoch": 0.09705534266632836, "grad_norm": 0.09399770945310593, "learning_rate": 9.066084951081207e-05, "loss": 1.7476, "step": 49184 }, { "epoch": 0.09711848862772479, "grad_norm": 0.1129143163561821, "learning_rate": 9.065450928644738e-05, "loss": 1.7607, "step": 49216 }, { "epoch": 0.09718163458912123, "grad_norm": 0.10864224284887314, "learning_rate": 9.064816906208268e-05, "loss": 1.7519, "step": 49248 }, { "epoch": 0.09724478055051768, "grad_norm": 0.10117150098085403, "learning_rate": 9.0641828837718e-05, "loss": 1.7462, "step": 49280 }, { "epoch": 0.09730792651191411, "grad_norm": 0.1072075292468071, "learning_rate": 9.063548861335331e-05, "loss": 1.7569, "step": 49312 }, { "epoch": 0.09737107247331056, "grad_norm": 0.10172448307275772, "learning_rate": 9.062914838898861e-05, "loss": 1.7662, "step": 49344 }, { "epoch": 0.097434218434707, "grad_norm": 0.10960230231285095, "learning_rate": 9.062280816462393e-05, "loss": 1.7557, "step": 49376 }, { "epoch": 0.09749736439610343, "grad_norm": 0.10693272203207016, "learning_rate": 9.061646794025924e-05, "loss": 1.7604, "step": 49408 }, { "epoch": 0.09756051035749988, "grad_norm": 0.1062304675579071, "learning_rate": 9.061012771589456e-05, "loss": 1.7713, "step": 49440 }, { "epoch": 0.09762365631889632, "grad_norm": 0.11206618696451187, "learning_rate": 9.060378749152986e-05, "loss": 1.7575, "step": 49472 }, { "epoch": 0.09768680228029275, "grad_norm": 0.09645897150039673, "learning_rate": 9.059744726716517e-05, "loss": 1.7553, "step": 49504 }, { "epoch": 0.0977499482416892, "grad_norm": 0.10010252147912979, "learning_rate": 9.059110704280049e-05, "loss": 1.7536, "step": 49536 }, { "epoch": 0.09781309420308563, "grad_norm": 0.09469565749168396, "learning_rate": 9.05847668184358e-05, "loss": 1.7515, "step": 49568 }, { "epoch": 0.09787624016448207, "grad_norm": 0.09600904583930969, "learning_rate": 9.05784265940711e-05, "loss": 1.7476, "step": 49600 }, { "epoch": 0.09793938612587852, "grad_norm": 0.10906678438186646, "learning_rate": 9.057208636970642e-05, "loss": 1.767, "step": 49632 }, { "epoch": 0.09800253208727495, "grad_norm": 0.09677731245756149, "learning_rate": 9.056574614534172e-05, "loss": 1.7581, "step": 49664 }, { "epoch": 0.09806567804867139, "grad_norm": 0.10066740214824677, "learning_rate": 9.055940592097703e-05, "loss": 1.7547, "step": 49696 }, { "epoch": 0.09812882401006784, "grad_norm": 0.10452299565076828, "learning_rate": 9.055306569661233e-05, "loss": 1.7583, "step": 49728 }, { "epoch": 0.09819196997146427, "grad_norm": 0.09318119287490845, "learning_rate": 9.054672547224765e-05, "loss": 1.7407, "step": 49760 }, { "epoch": 0.09825511593286071, "grad_norm": 0.11290091276168823, "learning_rate": 9.054038524788296e-05, "loss": 1.7462, "step": 49792 }, { "epoch": 0.09831826189425716, "grad_norm": 0.1076420322060585, "learning_rate": 9.053404502351828e-05, "loss": 1.7718, "step": 49824 }, { "epoch": 0.0983814078556536, "grad_norm": 0.10183405876159668, "learning_rate": 9.052770479915359e-05, "loss": 1.7366, "step": 49856 }, { "epoch": 0.09844455381705003, "grad_norm": 0.1181720644235611, "learning_rate": 9.052136457478889e-05, "loss": 1.7686, "step": 49888 }, { "epoch": 0.09850769977844648, "grad_norm": 0.09957777708768845, "learning_rate": 9.051502435042421e-05, "loss": 1.769, "step": 49920 }, { "epoch": 0.09857084573984291, "grad_norm": 0.09811808913946152, "learning_rate": 9.050868412605952e-05, "loss": 1.7455, "step": 49952 }, { "epoch": 0.09863399170123935, "grad_norm": 0.11086859554052353, "learning_rate": 9.050234390169484e-05, "loss": 1.7642, "step": 49984 }, { "epoch": 0.0986971376626358, "grad_norm": 0.10157608240842819, "learning_rate": 9.049600367733014e-05, "loss": 1.759, "step": 50016 }, { "epoch": 0.09876028362403223, "grad_norm": 0.0975349023938179, "learning_rate": 9.048966345296545e-05, "loss": 1.7359, "step": 50048 }, { "epoch": 0.09882342958542867, "grad_norm": 0.09794504195451736, "learning_rate": 9.048332322860075e-05, "loss": 1.7671, "step": 50080 }, { "epoch": 0.09888657554682512, "grad_norm": 0.10445244610309601, "learning_rate": 9.047698300423607e-05, "loss": 1.7574, "step": 50112 }, { "epoch": 0.09894972150822155, "grad_norm": 0.11278077960014343, "learning_rate": 9.047064277987137e-05, "loss": 1.7454, "step": 50144 }, { "epoch": 0.099012867469618, "grad_norm": 0.10108131170272827, "learning_rate": 9.046430255550668e-05, "loss": 1.7542, "step": 50176 }, { "epoch": 0.09907601343101444, "grad_norm": 0.0978446826338768, "learning_rate": 9.0457962331142e-05, "loss": 1.7593, "step": 50208 }, { "epoch": 0.09913915939241087, "grad_norm": 0.10433097183704376, "learning_rate": 9.045162210677731e-05, "loss": 1.7427, "step": 50240 }, { "epoch": 0.09920230535380732, "grad_norm": 0.10728266835212708, "learning_rate": 9.044528188241261e-05, "loss": 1.7432, "step": 50272 }, { "epoch": 0.09926545131520376, "grad_norm": 0.10774000734090805, "learning_rate": 9.043894165804793e-05, "loss": 1.7526, "step": 50304 }, { "epoch": 0.09932859727660019, "grad_norm": 0.1002371534705162, "learning_rate": 9.043260143368324e-05, "loss": 1.7552, "step": 50336 }, { "epoch": 0.09939174323799664, "grad_norm": 0.09350822120904922, "learning_rate": 9.042626120931856e-05, "loss": 1.7462, "step": 50368 }, { "epoch": 0.09945488919939308, "grad_norm": 0.09911242127418518, "learning_rate": 9.041992098495386e-05, "loss": 1.763, "step": 50400 }, { "epoch": 0.09951803516078951, "grad_norm": 0.09527602791786194, "learning_rate": 9.041358076058917e-05, "loss": 1.7546, "step": 50432 }, { "epoch": 0.09958118112218596, "grad_norm": 0.11248945444822311, "learning_rate": 9.040724053622447e-05, "loss": 1.7441, "step": 50464 }, { "epoch": 0.0996443270835824, "grad_norm": 0.0988423228263855, "learning_rate": 9.040090031185979e-05, "loss": 1.7575, "step": 50496 }, { "epoch": 0.09970747304497883, "grad_norm": 0.09794781357049942, "learning_rate": 9.03945600874951e-05, "loss": 1.7613, "step": 50528 }, { "epoch": 0.09977061900637528, "grad_norm": 0.09636078774929047, "learning_rate": 9.03882198631304e-05, "loss": 1.734, "step": 50560 }, { "epoch": 0.09983376496777172, "grad_norm": 0.10354897379875183, "learning_rate": 9.038187963876572e-05, "loss": 1.7527, "step": 50592 }, { "epoch": 0.09989691092916815, "grad_norm": 0.10228206217288971, "learning_rate": 9.037553941440103e-05, "loss": 1.757, "step": 50624 }, { "epoch": 0.0999600568905646, "grad_norm": 0.09956642240285873, "learning_rate": 9.036919919003635e-05, "loss": 1.7482, "step": 50656 }, { "epoch": 0.10002320285196103, "grad_norm": 0.09959997236728668, "learning_rate": 9.036285896567165e-05, "loss": 1.7565, "step": 50688 }, { "epoch": 0.10008634881335747, "grad_norm": 0.09238331764936447, "learning_rate": 9.035651874130696e-05, "loss": 1.7331, "step": 50720 }, { "epoch": 0.10014949477475392, "grad_norm": 0.1037338376045227, "learning_rate": 9.035017851694228e-05, "loss": 1.752, "step": 50752 }, { "epoch": 0.10021264073615035, "grad_norm": 0.09862665832042694, "learning_rate": 9.034383829257759e-05, "loss": 1.7587, "step": 50784 }, { "epoch": 0.10027578669754679, "grad_norm": 0.10530706495046616, "learning_rate": 9.033749806821289e-05, "loss": 1.7563, "step": 50816 }, { "epoch": 0.10033893265894324, "grad_norm": 0.11070514470338821, "learning_rate": 9.033115784384821e-05, "loss": 1.7535, "step": 50848 }, { "epoch": 0.10040207862033967, "grad_norm": 0.1009884625673294, "learning_rate": 9.032481761948351e-05, "loss": 1.7684, "step": 50880 }, { "epoch": 0.10046522458173611, "grad_norm": 0.09597666561603546, "learning_rate": 9.031847739511882e-05, "loss": 1.7538, "step": 50912 }, { "epoch": 0.10052837054313256, "grad_norm": 0.09755109995603561, "learning_rate": 9.031213717075412e-05, "loss": 1.7545, "step": 50944 }, { "epoch": 0.100591516504529, "grad_norm": 0.12461982667446136, "learning_rate": 9.030579694638944e-05, "loss": 1.7583, "step": 50976 }, { "epoch": 0.10065466246592544, "grad_norm": 0.09848106652498245, "learning_rate": 9.029945672202475e-05, "loss": 1.7576, "step": 51008 }, { "epoch": 0.10071780842732188, "grad_norm": 0.09583540260791779, "learning_rate": 9.029311649766007e-05, "loss": 1.7342, "step": 51040 }, { "epoch": 0.10078095438871831, "grad_norm": 0.09553193300962448, "learning_rate": 9.028677627329537e-05, "loss": 1.7515, "step": 51072 }, { "epoch": 0.10084410035011476, "grad_norm": 0.10564205050468445, "learning_rate": 9.028043604893068e-05, "loss": 1.7594, "step": 51104 }, { "epoch": 0.1009072463115112, "grad_norm": 0.11469171941280365, "learning_rate": 9.0274095824566e-05, "loss": 1.7479, "step": 51136 }, { "epoch": 0.10097039227290763, "grad_norm": 0.09900109469890594, "learning_rate": 9.026775560020131e-05, "loss": 1.7549, "step": 51168 }, { "epoch": 0.10103353823430408, "grad_norm": 0.10750380903482437, "learning_rate": 9.026141537583663e-05, "loss": 1.7587, "step": 51200 }, { "epoch": 0.10109668419570052, "grad_norm": 0.09829618036746979, "learning_rate": 9.025507515147193e-05, "loss": 1.7375, "step": 51232 }, { "epoch": 0.10115983015709695, "grad_norm": 0.09377654641866684, "learning_rate": 9.024873492710724e-05, "loss": 1.7531, "step": 51264 }, { "epoch": 0.1012229761184934, "grad_norm": 0.09642499685287476, "learning_rate": 9.024239470274254e-05, "loss": 1.7497, "step": 51296 }, { "epoch": 0.10128612207988984, "grad_norm": 0.09268879145383835, "learning_rate": 9.023605447837786e-05, "loss": 1.7363, "step": 51328 }, { "epoch": 0.10134926804128627, "grad_norm": 0.10005810856819153, "learning_rate": 9.022971425401316e-05, "loss": 1.7644, "step": 51360 }, { "epoch": 0.10141241400268272, "grad_norm": 0.1035057082772255, "learning_rate": 9.022337402964847e-05, "loss": 1.7439, "step": 51392 }, { "epoch": 0.10147555996407916, "grad_norm": 0.09726722538471222, "learning_rate": 9.021703380528379e-05, "loss": 1.7447, "step": 51424 }, { "epoch": 0.10153870592547559, "grad_norm": 0.11155097931623459, "learning_rate": 9.02106935809191e-05, "loss": 1.7487, "step": 51456 }, { "epoch": 0.10160185188687204, "grad_norm": 0.09736192971467972, "learning_rate": 9.02043533565544e-05, "loss": 1.7633, "step": 51488 }, { "epoch": 0.10166499784826848, "grad_norm": 0.09542526304721832, "learning_rate": 9.019801313218972e-05, "loss": 1.7509, "step": 51520 }, { "epoch": 0.10172814380966491, "grad_norm": 0.10050925612449646, "learning_rate": 9.019167290782503e-05, "loss": 1.7541, "step": 51552 }, { "epoch": 0.10179128977106136, "grad_norm": 0.09684551507234573, "learning_rate": 9.018533268346035e-05, "loss": 1.7556, "step": 51584 }, { "epoch": 0.1018544357324578, "grad_norm": 0.09515704214572906, "learning_rate": 9.017899245909565e-05, "loss": 1.7416, "step": 51616 }, { "epoch": 0.10191758169385423, "grad_norm": 0.10582724958658218, "learning_rate": 9.017265223473096e-05, "loss": 1.7554, "step": 51648 }, { "epoch": 0.10198072765525068, "grad_norm": 0.131036639213562, "learning_rate": 9.016631201036628e-05, "loss": 1.7368, "step": 51680 }, { "epoch": 0.10204387361664712, "grad_norm": 0.09360115230083466, "learning_rate": 9.015997178600158e-05, "loss": 1.7453, "step": 51712 }, { "epoch": 0.10210701957804355, "grad_norm": 0.10363040864467621, "learning_rate": 9.01536315616369e-05, "loss": 1.7382, "step": 51744 }, { "epoch": 0.10217016553944, "grad_norm": 0.10199641436338425, "learning_rate": 9.01472913372722e-05, "loss": 1.7367, "step": 51776 }, { "epoch": 0.10223331150083644, "grad_norm": 0.09684382379055023, "learning_rate": 9.014095111290751e-05, "loss": 1.7474, "step": 51808 }, { "epoch": 0.10229645746223288, "grad_norm": 0.0927148312330246, "learning_rate": 9.013461088854282e-05, "loss": 1.7359, "step": 51840 }, { "epoch": 0.10235960342362932, "grad_norm": 0.0999477207660675, "learning_rate": 9.012827066417814e-05, "loss": 1.7423, "step": 51872 }, { "epoch": 0.10242274938502575, "grad_norm": 0.11755125969648361, "learning_rate": 9.012193043981344e-05, "loss": 1.728, "step": 51904 }, { "epoch": 0.1024858953464222, "grad_norm": 0.10312087833881378, "learning_rate": 9.011559021544875e-05, "loss": 1.7523, "step": 51936 }, { "epoch": 0.10254904130781864, "grad_norm": 0.10827986896038055, "learning_rate": 9.010924999108407e-05, "loss": 1.7375, "step": 51968 }, { "epoch": 0.10261218726921507, "grad_norm": 0.09504279494285583, "learning_rate": 9.010290976671938e-05, "loss": 1.7525, "step": 52000 }, { "epoch": 0.10267533323061152, "grad_norm": 0.10902871936559677, "learning_rate": 9.009656954235468e-05, "loss": 1.7462, "step": 52032 }, { "epoch": 0.10273847919200796, "grad_norm": 0.09501804411411285, "learning_rate": 9.009022931799e-05, "loss": 1.7481, "step": 52064 }, { "epoch": 0.1028016251534044, "grad_norm": 0.10616398602724075, "learning_rate": 9.008388909362531e-05, "loss": 1.734, "step": 52096 }, { "epoch": 0.10286477111480084, "grad_norm": 0.10708317905664444, "learning_rate": 9.007754886926061e-05, "loss": 1.756, "step": 52128 }, { "epoch": 0.10292791707619728, "grad_norm": 0.09232480823993683, "learning_rate": 9.007120864489591e-05, "loss": 1.7443, "step": 52160 }, { "epoch": 0.10299106303759371, "grad_norm": 0.0943201407790184, "learning_rate": 9.006486842053123e-05, "loss": 1.7603, "step": 52192 }, { "epoch": 0.10305420899899016, "grad_norm": 0.10176809132099152, "learning_rate": 9.005852819616654e-05, "loss": 1.7408, "step": 52224 }, { "epoch": 0.1031173549603866, "grad_norm": 0.10179544240236282, "learning_rate": 9.005218797180186e-05, "loss": 1.7465, "step": 52256 }, { "epoch": 0.10318050092178303, "grad_norm": 0.09759853035211563, "learning_rate": 9.004584774743716e-05, "loss": 1.7493, "step": 52288 }, { "epoch": 0.10324364688317948, "grad_norm": 0.10105500370264053, "learning_rate": 9.003950752307247e-05, "loss": 1.7492, "step": 52320 }, { "epoch": 0.10330679284457592, "grad_norm": 0.10510502755641937, "learning_rate": 9.003316729870779e-05, "loss": 1.7401, "step": 52352 }, { "epoch": 0.10336993880597235, "grad_norm": 0.09681029617786407, "learning_rate": 9.00268270743431e-05, "loss": 1.7497, "step": 52384 }, { "epoch": 0.1034330847673688, "grad_norm": 0.09967440366744995, "learning_rate": 9.00204868499784e-05, "loss": 1.7477, "step": 52416 }, { "epoch": 0.10349623072876524, "grad_norm": 0.0955488458275795, "learning_rate": 9.001414662561372e-05, "loss": 1.7481, "step": 52448 }, { "epoch": 0.10355937669016167, "grad_norm": 0.10663046687841415, "learning_rate": 9.000780640124903e-05, "loss": 1.741, "step": 52480 }, { "epoch": 0.10362252265155812, "grad_norm": 0.09638260304927826, "learning_rate": 9.000146617688435e-05, "loss": 1.7371, "step": 52512 }, { "epoch": 0.10368566861295456, "grad_norm": 0.11361704021692276, "learning_rate": 8.999512595251965e-05, "loss": 1.7477, "step": 52544 }, { "epoch": 0.10374881457435099, "grad_norm": 0.09574690461158752, "learning_rate": 8.998878572815495e-05, "loss": 1.7409, "step": 52576 }, { "epoch": 0.10381196053574744, "grad_norm": 0.1048581600189209, "learning_rate": 8.998244550379026e-05, "loss": 1.742, "step": 52608 }, { "epoch": 0.10387510649714388, "grad_norm": 0.09773201495409012, "learning_rate": 8.997610527942558e-05, "loss": 1.7472, "step": 52640 }, { "epoch": 0.10393825245854031, "grad_norm": 0.09244991838932037, "learning_rate": 8.99697650550609e-05, "loss": 1.7266, "step": 52672 }, { "epoch": 0.10400139841993676, "grad_norm": 0.10884491354227066, "learning_rate": 8.99634248306962e-05, "loss": 1.7568, "step": 52704 }, { "epoch": 0.1040645443813332, "grad_norm": 0.09903937578201294, "learning_rate": 8.995708460633151e-05, "loss": 1.7393, "step": 52736 }, { "epoch": 0.10412769034272965, "grad_norm": 0.10828418284654617, "learning_rate": 8.995074438196682e-05, "loss": 1.7463, "step": 52768 }, { "epoch": 0.10419083630412608, "grad_norm": 0.10341514647006989, "learning_rate": 8.994440415760214e-05, "loss": 1.7403, "step": 52800 }, { "epoch": 0.10425398226552252, "grad_norm": 0.09311387687921524, "learning_rate": 8.993806393323744e-05, "loss": 1.7469, "step": 52832 }, { "epoch": 0.10431712822691896, "grad_norm": 0.1073962152004242, "learning_rate": 8.993172370887275e-05, "loss": 1.7437, "step": 52864 }, { "epoch": 0.1043802741883154, "grad_norm": 0.10534194856882095, "learning_rate": 8.992538348450807e-05, "loss": 1.7476, "step": 52896 }, { "epoch": 0.10444342014971184, "grad_norm": 0.11144720017910004, "learning_rate": 8.991904326014338e-05, "loss": 1.7348, "step": 52928 }, { "epoch": 0.10450656611110828, "grad_norm": 0.09449729323387146, "learning_rate": 8.991270303577868e-05, "loss": 1.7415, "step": 52960 }, { "epoch": 0.10456971207250472, "grad_norm": 0.1120440736413002, "learning_rate": 8.990636281141399e-05, "loss": 1.7415, "step": 52992 }, { "epoch": 0.10463285803390115, "grad_norm": 0.1097162514925003, "learning_rate": 8.99000225870493e-05, "loss": 1.7453, "step": 53024 }, { "epoch": 0.1046960039952976, "grad_norm": 0.09272979199886322, "learning_rate": 8.989368236268461e-05, "loss": 1.7514, "step": 53056 }, { "epoch": 0.10475914995669404, "grad_norm": 0.10094426572322845, "learning_rate": 8.988734213831992e-05, "loss": 1.7378, "step": 53088 }, { "epoch": 0.10482229591809047, "grad_norm": 0.11835931241512299, "learning_rate": 8.988100191395523e-05, "loss": 1.742, "step": 53120 }, { "epoch": 0.10488544187948692, "grad_norm": 0.09697704017162323, "learning_rate": 8.987466168959054e-05, "loss": 1.7373, "step": 53152 }, { "epoch": 0.10494858784088336, "grad_norm": 0.1052129715681076, "learning_rate": 8.986832146522586e-05, "loss": 1.732, "step": 53184 }, { "epoch": 0.1050117338022798, "grad_norm": 0.09859216213226318, "learning_rate": 8.986198124086117e-05, "loss": 1.7252, "step": 53216 }, { "epoch": 0.10507487976367624, "grad_norm": 0.09212696552276611, "learning_rate": 8.985564101649647e-05, "loss": 1.7544, "step": 53248 }, { "epoch": 0.10513802572507268, "grad_norm": 0.11304720491170883, "learning_rate": 8.984930079213179e-05, "loss": 1.7486, "step": 53280 }, { "epoch": 0.10520117168646911, "grad_norm": 0.09030237048864365, "learning_rate": 8.98429605677671e-05, "loss": 1.7276, "step": 53312 }, { "epoch": 0.10526431764786556, "grad_norm": 0.09863579273223877, "learning_rate": 8.98366203434024e-05, "loss": 1.7398, "step": 53344 }, { "epoch": 0.105327463609262, "grad_norm": 0.1069789007306099, "learning_rate": 8.983028011903772e-05, "loss": 1.7422, "step": 53376 }, { "epoch": 0.10539060957065843, "grad_norm": 0.11715696007013321, "learning_rate": 8.982393989467302e-05, "loss": 1.7258, "step": 53408 }, { "epoch": 0.10545375553205488, "grad_norm": 0.10883591324090958, "learning_rate": 8.981759967030833e-05, "loss": 1.7326, "step": 53440 }, { "epoch": 0.10551690149345132, "grad_norm": 0.09361957013607025, "learning_rate": 8.981125944594365e-05, "loss": 1.7437, "step": 53472 }, { "epoch": 0.10558004745484775, "grad_norm": 0.09470156580209732, "learning_rate": 8.980491922157895e-05, "loss": 1.7413, "step": 53504 }, { "epoch": 0.1056431934162442, "grad_norm": 0.09954388439655304, "learning_rate": 8.979857899721426e-05, "loss": 1.744, "step": 53536 }, { "epoch": 0.10570633937764064, "grad_norm": 0.09837543964385986, "learning_rate": 8.979223877284958e-05, "loss": 1.7267, "step": 53568 }, { "epoch": 0.10576948533903709, "grad_norm": 0.10575970262289047, "learning_rate": 8.97858985484849e-05, "loss": 1.7402, "step": 53600 }, { "epoch": 0.10583263130043352, "grad_norm": 0.09170810878276825, "learning_rate": 8.97795583241202e-05, "loss": 1.7413, "step": 53632 }, { "epoch": 0.10589577726182996, "grad_norm": 0.09788578003644943, "learning_rate": 8.977321809975551e-05, "loss": 1.744, "step": 53664 }, { "epoch": 0.1059589232232264, "grad_norm": 0.09518428891897202, "learning_rate": 8.976687787539082e-05, "loss": 1.7404, "step": 53696 }, { "epoch": 0.10602206918462284, "grad_norm": 0.09772198647260666, "learning_rate": 8.976053765102614e-05, "loss": 1.7378, "step": 53728 }, { "epoch": 0.10608521514601928, "grad_norm": 0.09868532419204712, "learning_rate": 8.975419742666144e-05, "loss": 1.7313, "step": 53760 }, { "epoch": 0.10614836110741573, "grad_norm": 0.09947213530540466, "learning_rate": 8.974785720229675e-05, "loss": 1.7266, "step": 53792 }, { "epoch": 0.10621150706881216, "grad_norm": 0.09919548779726028, "learning_rate": 8.974151697793206e-05, "loss": 1.7165, "step": 53824 }, { "epoch": 0.1062746530302086, "grad_norm": 0.09903539717197418, "learning_rate": 8.973517675356737e-05, "loss": 1.7326, "step": 53856 }, { "epoch": 0.10633779899160505, "grad_norm": 0.1100149005651474, "learning_rate": 8.972883652920268e-05, "loss": 1.7391, "step": 53888 }, { "epoch": 0.10640094495300148, "grad_norm": 0.10203427821397781, "learning_rate": 8.972249630483799e-05, "loss": 1.7243, "step": 53920 }, { "epoch": 0.10646409091439792, "grad_norm": 0.10099450498819351, "learning_rate": 8.97161560804733e-05, "loss": 1.7305, "step": 53952 }, { "epoch": 0.10652723687579436, "grad_norm": 0.0974942222237587, "learning_rate": 8.970981585610861e-05, "loss": 1.7459, "step": 53984 }, { "epoch": 0.1065903828371908, "grad_norm": 0.1016789898276329, "learning_rate": 8.970347563174393e-05, "loss": 1.7316, "step": 54016 }, { "epoch": 0.10665352879858724, "grad_norm": 0.10867282003164291, "learning_rate": 8.969713540737923e-05, "loss": 1.7457, "step": 54048 }, { "epoch": 0.10671667475998368, "grad_norm": 0.10432814061641693, "learning_rate": 8.969079518301454e-05, "loss": 1.739, "step": 54080 }, { "epoch": 0.10677982072138012, "grad_norm": 0.10358621925115585, "learning_rate": 8.968445495864986e-05, "loss": 1.7187, "step": 54112 }, { "epoch": 0.10684296668277656, "grad_norm": 0.11256913095712662, "learning_rate": 8.967811473428517e-05, "loss": 1.7448, "step": 54144 }, { "epoch": 0.106906112644173, "grad_norm": 0.09977933764457703, "learning_rate": 8.967177450992047e-05, "loss": 1.7367, "step": 54176 }, { "epoch": 0.10696925860556944, "grad_norm": 0.09639386832714081, "learning_rate": 8.966543428555579e-05, "loss": 1.7493, "step": 54208 }, { "epoch": 0.10703240456696587, "grad_norm": 0.11345695704221725, "learning_rate": 8.965909406119109e-05, "loss": 1.7533, "step": 54240 }, { "epoch": 0.10709555052836232, "grad_norm": 0.09637809544801712, "learning_rate": 8.96527538368264e-05, "loss": 1.7382, "step": 54272 }, { "epoch": 0.10715869648975876, "grad_norm": 0.1110880970954895, "learning_rate": 8.96464136124617e-05, "loss": 1.7507, "step": 54304 }, { "epoch": 0.1072218424511552, "grad_norm": 0.09914442896842957, "learning_rate": 8.964007338809702e-05, "loss": 1.7505, "step": 54336 }, { "epoch": 0.10728498841255164, "grad_norm": 0.09976788610219955, "learning_rate": 8.963373316373234e-05, "loss": 1.7446, "step": 54368 }, { "epoch": 0.10734813437394808, "grad_norm": 0.1007564514875412, "learning_rate": 8.962739293936765e-05, "loss": 1.7352, "step": 54400 }, { "epoch": 0.10741128033534453, "grad_norm": 0.09931594133377075, "learning_rate": 8.962105271500295e-05, "loss": 1.7415, "step": 54432 }, { "epoch": 0.10747442629674096, "grad_norm": 0.10601203143596649, "learning_rate": 8.961471249063827e-05, "loss": 1.7268, "step": 54464 }, { "epoch": 0.1075375722581374, "grad_norm": 0.11378839612007141, "learning_rate": 8.960837226627358e-05, "loss": 1.7416, "step": 54496 }, { "epoch": 0.10760071821953385, "grad_norm": 0.09632743895053864, "learning_rate": 8.96020320419089e-05, "loss": 1.7297, "step": 54528 }, { "epoch": 0.10766386418093028, "grad_norm": 0.09724630415439606, "learning_rate": 8.959569181754421e-05, "loss": 1.7426, "step": 54560 }, { "epoch": 0.10772701014232672, "grad_norm": 0.09703235328197479, "learning_rate": 8.958935159317951e-05, "loss": 1.7295, "step": 54592 }, { "epoch": 0.10779015610372317, "grad_norm": 0.11917123943567276, "learning_rate": 8.958301136881481e-05, "loss": 1.7309, "step": 54624 }, { "epoch": 0.1078533020651196, "grad_norm": 0.09530433267354965, "learning_rate": 8.957667114445013e-05, "loss": 1.7289, "step": 54656 }, { "epoch": 0.10791644802651604, "grad_norm": 0.10603784769773483, "learning_rate": 8.957033092008544e-05, "loss": 1.7499, "step": 54688 }, { "epoch": 0.10797959398791249, "grad_norm": 0.0967014953494072, "learning_rate": 8.956399069572074e-05, "loss": 1.7316, "step": 54720 }, { "epoch": 0.10804273994930892, "grad_norm": 0.09749648720026016, "learning_rate": 8.955765047135606e-05, "loss": 1.7405, "step": 54752 }, { "epoch": 0.10810588591070536, "grad_norm": 0.0986323282122612, "learning_rate": 8.955131024699137e-05, "loss": 1.7389, "step": 54784 }, { "epoch": 0.1081690318721018, "grad_norm": 0.09602555632591248, "learning_rate": 8.954497002262668e-05, "loss": 1.7367, "step": 54816 }, { "epoch": 0.10823217783349824, "grad_norm": 0.09541743993759155, "learning_rate": 8.953862979826199e-05, "loss": 1.7159, "step": 54848 }, { "epoch": 0.10829532379489468, "grad_norm": 0.12124884873628616, "learning_rate": 8.95322895738973e-05, "loss": 1.728, "step": 54880 }, { "epoch": 0.10835846975629113, "grad_norm": 0.09567489475011826, "learning_rate": 8.952594934953261e-05, "loss": 1.7364, "step": 54912 }, { "epoch": 0.10842161571768756, "grad_norm": 0.09893244504928589, "learning_rate": 8.951960912516793e-05, "loss": 1.7361, "step": 54944 }, { "epoch": 0.108484761679084, "grad_norm": 0.0997154638171196, "learning_rate": 8.951326890080323e-05, "loss": 1.7301, "step": 54976 }, { "epoch": 0.10854790764048045, "grad_norm": 0.09740784764289856, "learning_rate": 8.950692867643854e-05, "loss": 1.7363, "step": 55008 }, { "epoch": 0.10861105360187688, "grad_norm": 0.09866963326931, "learning_rate": 8.950058845207385e-05, "loss": 1.7195, "step": 55040 }, { "epoch": 0.10867419956327332, "grad_norm": 0.1122496947646141, "learning_rate": 8.949424822770916e-05, "loss": 1.7322, "step": 55072 }, { "epoch": 0.10873734552466977, "grad_norm": 0.11134584248065948, "learning_rate": 8.948790800334446e-05, "loss": 1.7324, "step": 55104 }, { "epoch": 0.1088004914860662, "grad_norm": 0.09876733273267746, "learning_rate": 8.948156777897978e-05, "loss": 1.7384, "step": 55136 }, { "epoch": 0.10886363744746264, "grad_norm": 0.10825956612825394, "learning_rate": 8.947522755461509e-05, "loss": 1.7386, "step": 55168 }, { "epoch": 0.10892678340885908, "grad_norm": 0.09949720650911331, "learning_rate": 8.94688873302504e-05, "loss": 1.7369, "step": 55200 }, { "epoch": 0.10898992937025552, "grad_norm": 0.1249699518084526, "learning_rate": 8.946254710588572e-05, "loss": 1.7318, "step": 55232 }, { "epoch": 0.10905307533165197, "grad_norm": 0.10329774022102356, "learning_rate": 8.945620688152102e-05, "loss": 1.7334, "step": 55264 }, { "epoch": 0.1091162212930484, "grad_norm": 0.09686914086341858, "learning_rate": 8.944986665715634e-05, "loss": 1.7248, "step": 55296 }, { "epoch": 0.10917936725444484, "grad_norm": 0.10577890276908875, "learning_rate": 8.944352643279165e-05, "loss": 1.7601, "step": 55328 }, { "epoch": 0.10924251321584129, "grad_norm": 0.10029914230108261, "learning_rate": 8.943718620842696e-05, "loss": 1.73, "step": 55360 }, { "epoch": 0.10930565917723772, "grad_norm": 0.10715453326702118, "learning_rate": 8.943084598406227e-05, "loss": 1.7327, "step": 55392 }, { "epoch": 0.10936880513863416, "grad_norm": 0.10437731444835663, "learning_rate": 8.942450575969758e-05, "loss": 1.7187, "step": 55424 }, { "epoch": 0.10943195110003061, "grad_norm": 0.09671631455421448, "learning_rate": 8.941816553533288e-05, "loss": 1.7194, "step": 55456 }, { "epoch": 0.10949509706142704, "grad_norm": 0.10065539181232452, "learning_rate": 8.94118253109682e-05, "loss": 1.7306, "step": 55488 }, { "epoch": 0.10955824302282348, "grad_norm": 0.09755487740039825, "learning_rate": 8.94054850866035e-05, "loss": 1.7344, "step": 55520 }, { "epoch": 0.10962138898421993, "grad_norm": 0.09347401559352875, "learning_rate": 8.939914486223881e-05, "loss": 1.7286, "step": 55552 }, { "epoch": 0.10968453494561636, "grad_norm": 0.09633270651102066, "learning_rate": 8.939280463787413e-05, "loss": 1.7531, "step": 55584 }, { "epoch": 0.1097476809070128, "grad_norm": 0.0984179675579071, "learning_rate": 8.938646441350944e-05, "loss": 1.7392, "step": 55616 }, { "epoch": 0.10981082686840925, "grad_norm": 0.09896954894065857, "learning_rate": 8.938012418914474e-05, "loss": 1.7257, "step": 55648 }, { "epoch": 0.10987397282980568, "grad_norm": 0.0924028754234314, "learning_rate": 8.937378396478006e-05, "loss": 1.7216, "step": 55680 }, { "epoch": 0.10993711879120212, "grad_norm": 0.09533971548080444, "learning_rate": 8.936744374041537e-05, "loss": 1.7245, "step": 55712 }, { "epoch": 0.11000026475259857, "grad_norm": 0.0952165424823761, "learning_rate": 8.936110351605069e-05, "loss": 1.7227, "step": 55744 }, { "epoch": 0.110063410713995, "grad_norm": 0.1044548973441124, "learning_rate": 8.935476329168599e-05, "loss": 1.7389, "step": 55776 }, { "epoch": 0.11012655667539144, "grad_norm": 0.100387804210186, "learning_rate": 8.93484230673213e-05, "loss": 1.7347, "step": 55808 }, { "epoch": 0.11018970263678789, "grad_norm": 0.09685352444648743, "learning_rate": 8.934208284295662e-05, "loss": 1.7543, "step": 55840 }, { "epoch": 0.11025284859818432, "grad_norm": 0.10507971793413162, "learning_rate": 8.933574261859192e-05, "loss": 1.7258, "step": 55872 }, { "epoch": 0.11031599455958076, "grad_norm": 0.09932276606559753, "learning_rate": 8.932940239422723e-05, "loss": 1.7273, "step": 55904 }, { "epoch": 0.1103791405209772, "grad_norm": 0.1030394434928894, "learning_rate": 8.932306216986253e-05, "loss": 1.737, "step": 55936 }, { "epoch": 0.11044228648237364, "grad_norm": 0.11110737919807434, "learning_rate": 8.931672194549785e-05, "loss": 1.7277, "step": 55968 }, { "epoch": 0.11050543244377008, "grad_norm": 0.1007714718580246, "learning_rate": 8.931038172113316e-05, "loss": 1.7252, "step": 56000 }, { "epoch": 0.11056857840516653, "grad_norm": 0.10777892917394638, "learning_rate": 8.930404149676848e-05, "loss": 1.7268, "step": 56032 }, { "epoch": 0.11063172436656296, "grad_norm": 0.09986016899347305, "learning_rate": 8.929770127240378e-05, "loss": 1.7196, "step": 56064 }, { "epoch": 0.11069487032795941, "grad_norm": 0.09925538301467896, "learning_rate": 8.929136104803909e-05, "loss": 1.7371, "step": 56096 }, { "epoch": 0.11075801628935585, "grad_norm": 0.10937350243330002, "learning_rate": 8.92850208236744e-05, "loss": 1.7441, "step": 56128 }, { "epoch": 0.11082116225075228, "grad_norm": 0.0906435176730156, "learning_rate": 8.927868059930972e-05, "loss": 1.7282, "step": 56160 }, { "epoch": 0.11088430821214873, "grad_norm": 0.10186203569173813, "learning_rate": 8.927234037494502e-05, "loss": 1.7303, "step": 56192 }, { "epoch": 0.11094745417354517, "grad_norm": 0.10142674297094345, "learning_rate": 8.926600015058034e-05, "loss": 1.7121, "step": 56224 }, { "epoch": 0.1110106001349416, "grad_norm": 0.09603843837976456, "learning_rate": 8.925965992621565e-05, "loss": 1.725, "step": 56256 }, { "epoch": 0.11107374609633805, "grad_norm": 0.09735945612192154, "learning_rate": 8.925331970185095e-05, "loss": 1.7226, "step": 56288 }, { "epoch": 0.11113689205773448, "grad_norm": 0.101629838347435, "learning_rate": 8.924697947748625e-05, "loss": 1.7249, "step": 56320 }, { "epoch": 0.11120003801913092, "grad_norm": 0.09802871197462082, "learning_rate": 8.924063925312157e-05, "loss": 1.7285, "step": 56352 }, { "epoch": 0.11126318398052737, "grad_norm": 0.09986495226621628, "learning_rate": 8.923429902875688e-05, "loss": 1.7227, "step": 56384 }, { "epoch": 0.1113263299419238, "grad_norm": 0.10159213840961456, "learning_rate": 8.92279588043922e-05, "loss": 1.722, "step": 56416 }, { "epoch": 0.11138947590332024, "grad_norm": 0.09450211375951767, "learning_rate": 8.92216185800275e-05, "loss": 1.7415, "step": 56448 }, { "epoch": 0.11145262186471669, "grad_norm": 0.10010956227779388, "learning_rate": 8.921527835566281e-05, "loss": 1.7214, "step": 56480 }, { "epoch": 0.11151576782611312, "grad_norm": 0.10517623275518417, "learning_rate": 8.920893813129813e-05, "loss": 1.7443, "step": 56512 }, { "epoch": 0.11157891378750956, "grad_norm": 0.10186073929071426, "learning_rate": 8.920259790693344e-05, "loss": 1.7316, "step": 56544 }, { "epoch": 0.11164205974890601, "grad_norm": 0.10859499126672745, "learning_rate": 8.919625768256876e-05, "loss": 1.7185, "step": 56576 }, { "epoch": 0.11170520571030244, "grad_norm": 0.09792271256446838, "learning_rate": 8.918991745820406e-05, "loss": 1.7227, "step": 56608 }, { "epoch": 0.11176835167169888, "grad_norm": 0.10137863457202911, "learning_rate": 8.918357723383937e-05, "loss": 1.7333, "step": 56640 }, { "epoch": 0.11183149763309533, "grad_norm": 0.0956532284617424, "learning_rate": 8.917723700947469e-05, "loss": 1.7403, "step": 56672 }, { "epoch": 0.11189464359449176, "grad_norm": 0.09691862016916275, "learning_rate": 8.917089678510999e-05, "loss": 1.7317, "step": 56704 }, { "epoch": 0.1119577895558882, "grad_norm": 0.1034805104136467, "learning_rate": 8.916455656074529e-05, "loss": 1.7369, "step": 56736 }, { "epoch": 0.11202093551728465, "grad_norm": 0.0991164967417717, "learning_rate": 8.91582163363806e-05, "loss": 1.7297, "step": 56768 }, { "epoch": 0.11208408147868108, "grad_norm": 0.10262954980134964, "learning_rate": 8.915187611201592e-05, "loss": 1.7233, "step": 56800 }, { "epoch": 0.11214722744007752, "grad_norm": 0.10207094252109528, "learning_rate": 8.914553588765123e-05, "loss": 1.7227, "step": 56832 }, { "epoch": 0.11221037340147397, "grad_norm": 0.10577253252267838, "learning_rate": 8.913919566328653e-05, "loss": 1.7331, "step": 56864 }, { "epoch": 0.1122735193628704, "grad_norm": 0.11363503336906433, "learning_rate": 8.913285543892185e-05, "loss": 1.7172, "step": 56896 }, { "epoch": 0.11233666532426685, "grad_norm": 0.09643087536096573, "learning_rate": 8.912651521455716e-05, "loss": 1.7321, "step": 56928 }, { "epoch": 0.11239981128566329, "grad_norm": 0.11154823005199432, "learning_rate": 8.912017499019248e-05, "loss": 1.726, "step": 56960 }, { "epoch": 0.11246295724705972, "grad_norm": 0.10231855511665344, "learning_rate": 8.911383476582778e-05, "loss": 1.7308, "step": 56992 }, { "epoch": 0.11252610320845617, "grad_norm": 0.0947440043091774, "learning_rate": 8.910749454146309e-05, "loss": 1.7315, "step": 57024 }, { "epoch": 0.1125892491698526, "grad_norm": 0.10054609179496765, "learning_rate": 8.91011543170984e-05, "loss": 1.727, "step": 57056 }, { "epoch": 0.11265239513124904, "grad_norm": 0.09855375438928604, "learning_rate": 8.909481409273372e-05, "loss": 1.7351, "step": 57088 }, { "epoch": 0.11271554109264549, "grad_norm": 0.09844506531953812, "learning_rate": 8.908847386836902e-05, "loss": 1.7132, "step": 57120 }, { "epoch": 0.11277868705404193, "grad_norm": 0.10131185501813889, "learning_rate": 8.908213364400432e-05, "loss": 1.7026, "step": 57152 }, { "epoch": 0.11284183301543836, "grad_norm": 0.11000480502843857, "learning_rate": 8.907579341963964e-05, "loss": 1.719, "step": 57184 }, { "epoch": 0.11290497897683481, "grad_norm": 0.10741524398326874, "learning_rate": 8.906945319527495e-05, "loss": 1.7258, "step": 57216 }, { "epoch": 0.11296812493823125, "grad_norm": 0.10290786623954773, "learning_rate": 8.906311297091027e-05, "loss": 1.7338, "step": 57248 }, { "epoch": 0.11303127089962768, "grad_norm": 0.09821883589029312, "learning_rate": 8.905677274654557e-05, "loss": 1.7201, "step": 57280 }, { "epoch": 0.11309441686102413, "grad_norm": 0.09282093495130539, "learning_rate": 8.905043252218088e-05, "loss": 1.7238, "step": 57312 }, { "epoch": 0.11315756282242057, "grad_norm": 0.1000959724187851, "learning_rate": 8.90440922978162e-05, "loss": 1.7278, "step": 57344 }, { "epoch": 0.113220708783817, "grad_norm": 0.10023311525583267, "learning_rate": 8.903775207345151e-05, "loss": 1.7389, "step": 57376 }, { "epoch": 0.11328385474521345, "grad_norm": 0.11677708476781845, "learning_rate": 8.903141184908681e-05, "loss": 1.726, "step": 57408 }, { "epoch": 0.11334700070660989, "grad_norm": 0.10847847908735275, "learning_rate": 8.902507162472213e-05, "loss": 1.7173, "step": 57440 }, { "epoch": 0.11341014666800632, "grad_norm": 0.09579665958881378, "learning_rate": 8.901873140035744e-05, "loss": 1.714, "step": 57472 }, { "epoch": 0.11347329262940277, "grad_norm": 0.09655775129795074, "learning_rate": 8.901239117599274e-05, "loss": 1.7304, "step": 57504 }, { "epoch": 0.1135364385907992, "grad_norm": 0.1029743105173111, "learning_rate": 8.900605095162806e-05, "loss": 1.7349, "step": 57536 }, { "epoch": 0.11359958455219564, "grad_norm": 0.09406114369630814, "learning_rate": 8.899971072726336e-05, "loss": 1.7313, "step": 57568 }, { "epoch": 0.11366273051359209, "grad_norm": 0.09342000633478165, "learning_rate": 8.899337050289867e-05, "loss": 1.725, "step": 57600 }, { "epoch": 0.11372587647498852, "grad_norm": 0.09975459426641464, "learning_rate": 8.898703027853399e-05, "loss": 1.7249, "step": 57632 }, { "epoch": 0.11378902243638496, "grad_norm": 0.10720018297433853, "learning_rate": 8.898069005416929e-05, "loss": 1.731, "step": 57664 }, { "epoch": 0.11385216839778141, "grad_norm": 0.09851442277431488, "learning_rate": 8.89743498298046e-05, "loss": 1.7413, "step": 57696 }, { "epoch": 0.11391531435917784, "grad_norm": 0.09014596790075302, "learning_rate": 8.896800960543992e-05, "loss": 1.7276, "step": 57728 }, { "epoch": 0.11397846032057429, "grad_norm": 0.09720245003700256, "learning_rate": 8.896166938107523e-05, "loss": 1.7305, "step": 57760 }, { "epoch": 0.11404160628197073, "grad_norm": 0.09888971596956253, "learning_rate": 8.895532915671055e-05, "loss": 1.7381, "step": 57792 }, { "epoch": 0.11410475224336716, "grad_norm": 0.10641719400882721, "learning_rate": 8.894898893234585e-05, "loss": 1.7299, "step": 57824 }, { "epoch": 0.11416789820476361, "grad_norm": 0.10379938781261444, "learning_rate": 8.894264870798116e-05, "loss": 1.7114, "step": 57856 }, { "epoch": 0.11423104416616005, "grad_norm": 0.10532380640506744, "learning_rate": 8.893630848361648e-05, "loss": 1.7444, "step": 57888 }, { "epoch": 0.11429419012755648, "grad_norm": 0.11497674137353897, "learning_rate": 8.892996825925178e-05, "loss": 1.721, "step": 57920 }, { "epoch": 0.11435733608895293, "grad_norm": 0.09847824275493622, "learning_rate": 8.892362803488709e-05, "loss": 1.7108, "step": 57952 }, { "epoch": 0.11442048205034937, "grad_norm": 0.1033286601305008, "learning_rate": 8.891728781052239e-05, "loss": 1.7205, "step": 57984 }, { "epoch": 0.1144836280117458, "grad_norm": 0.10216425359249115, "learning_rate": 8.891094758615771e-05, "loss": 1.7115, "step": 58016 }, { "epoch": 0.11454677397314225, "grad_norm": 0.11663391441106796, "learning_rate": 8.890460736179302e-05, "loss": 1.7167, "step": 58048 }, { "epoch": 0.11460991993453869, "grad_norm": 0.10209348797798157, "learning_rate": 8.889826713742832e-05, "loss": 1.7327, "step": 58080 }, { "epoch": 0.11467306589593512, "grad_norm": 0.10272000730037689, "learning_rate": 8.889192691306364e-05, "loss": 1.716, "step": 58112 }, { "epoch": 0.11473621185733157, "grad_norm": 0.11681805551052094, "learning_rate": 8.888558668869895e-05, "loss": 1.7172, "step": 58144 }, { "epoch": 0.114799357818728, "grad_norm": 0.10092071443796158, "learning_rate": 8.887924646433427e-05, "loss": 1.7253, "step": 58176 }, { "epoch": 0.11486250378012444, "grad_norm": 0.09716968238353729, "learning_rate": 8.887290623996957e-05, "loss": 1.7247, "step": 58208 }, { "epoch": 0.11492564974152089, "grad_norm": 0.09291841089725494, "learning_rate": 8.886656601560488e-05, "loss": 1.7265, "step": 58240 }, { "epoch": 0.11498879570291733, "grad_norm": 0.10266035795211792, "learning_rate": 8.88602257912402e-05, "loss": 1.7138, "step": 58272 }, { "epoch": 0.11505194166431376, "grad_norm": 0.10050240904092789, "learning_rate": 8.885388556687551e-05, "loss": 1.7305, "step": 58304 }, { "epoch": 0.11511508762571021, "grad_norm": 0.10351265966892242, "learning_rate": 8.884754534251081e-05, "loss": 1.7236, "step": 58336 }, { "epoch": 0.11517823358710665, "grad_norm": 0.10937555134296417, "learning_rate": 8.884120511814613e-05, "loss": 1.731, "step": 58368 }, { "epoch": 0.11524137954850308, "grad_norm": 0.09581410884857178, "learning_rate": 8.883486489378143e-05, "loss": 1.7404, "step": 58400 }, { "epoch": 0.11530452550989953, "grad_norm": 0.09766798466444016, "learning_rate": 8.882852466941674e-05, "loss": 1.7239, "step": 58432 }, { "epoch": 0.11536767147129597, "grad_norm": 0.10437649488449097, "learning_rate": 8.882218444505206e-05, "loss": 1.7229, "step": 58464 }, { "epoch": 0.1154308174326924, "grad_norm": 0.10612741857767105, "learning_rate": 8.881584422068736e-05, "loss": 1.7104, "step": 58496 }, { "epoch": 0.11549396339408885, "grad_norm": 0.09655754268169403, "learning_rate": 8.880950399632267e-05, "loss": 1.7354, "step": 58528 }, { "epoch": 0.11555710935548529, "grad_norm": 0.09518884867429733, "learning_rate": 8.880316377195799e-05, "loss": 1.7071, "step": 58560 }, { "epoch": 0.11562025531688173, "grad_norm": 0.11409979313611984, "learning_rate": 8.87968235475933e-05, "loss": 1.7165, "step": 58592 }, { "epoch": 0.11568340127827817, "grad_norm": 0.10633133351802826, "learning_rate": 8.87904833232286e-05, "loss": 1.7251, "step": 58624 }, { "epoch": 0.1157465472396746, "grad_norm": 0.10412392020225525, "learning_rate": 8.878414309886392e-05, "loss": 1.7237, "step": 58656 }, { "epoch": 0.11580969320107105, "grad_norm": 0.10202760994434357, "learning_rate": 8.877780287449923e-05, "loss": 1.7154, "step": 58688 }, { "epoch": 0.11587283916246749, "grad_norm": 0.09684973955154419, "learning_rate": 8.877146265013455e-05, "loss": 1.7328, "step": 58720 }, { "epoch": 0.11593598512386392, "grad_norm": 0.09888684004545212, "learning_rate": 8.876512242576985e-05, "loss": 1.7259, "step": 58752 }, { "epoch": 0.11599913108526037, "grad_norm": 0.09473881870508194, "learning_rate": 8.875878220140515e-05, "loss": 1.7188, "step": 58784 }, { "epoch": 0.11606227704665681, "grad_norm": 0.10119607299566269, "learning_rate": 8.875244197704046e-05, "loss": 1.7373, "step": 58816 }, { "epoch": 0.11612542300805324, "grad_norm": 0.11298197507858276, "learning_rate": 8.874610175267578e-05, "loss": 1.7256, "step": 58848 }, { "epoch": 0.1161885689694497, "grad_norm": 0.10415863990783691, "learning_rate": 8.873976152831108e-05, "loss": 1.7198, "step": 58880 }, { "epoch": 0.11625171493084613, "grad_norm": 0.09710899740457535, "learning_rate": 8.873342130394639e-05, "loss": 1.7213, "step": 58912 }, { "epoch": 0.11631486089224256, "grad_norm": 0.09486311674118042, "learning_rate": 8.872708107958171e-05, "loss": 1.7239, "step": 58944 }, { "epoch": 0.11637800685363901, "grad_norm": 0.10320895165205002, "learning_rate": 8.872074085521702e-05, "loss": 1.7173, "step": 58976 }, { "epoch": 0.11644115281503545, "grad_norm": 0.10717189311981201, "learning_rate": 8.871440063085232e-05, "loss": 1.7186, "step": 59008 }, { "epoch": 0.11650429877643188, "grad_norm": 0.09883598238229752, "learning_rate": 8.870806040648764e-05, "loss": 1.7141, "step": 59040 }, { "epoch": 0.11656744473782833, "grad_norm": 0.10612888634204865, "learning_rate": 8.870172018212295e-05, "loss": 1.7167, "step": 59072 }, { "epoch": 0.11663059069922477, "grad_norm": 0.10070229321718216, "learning_rate": 8.869537995775827e-05, "loss": 1.7282, "step": 59104 }, { "epoch": 0.1166937366606212, "grad_norm": 0.09305015951395035, "learning_rate": 8.868903973339358e-05, "loss": 1.723, "step": 59136 }, { "epoch": 0.11675688262201765, "grad_norm": 0.10157781839370728, "learning_rate": 8.868269950902888e-05, "loss": 1.7131, "step": 59168 }, { "epoch": 0.11682002858341409, "grad_norm": 0.10942184925079346, "learning_rate": 8.867635928466418e-05, "loss": 1.7332, "step": 59200 }, { "epoch": 0.11688317454481052, "grad_norm": 0.09981514513492584, "learning_rate": 8.86700190602995e-05, "loss": 1.7079, "step": 59232 }, { "epoch": 0.11694632050620697, "grad_norm": 0.09638652205467224, "learning_rate": 8.866367883593481e-05, "loss": 1.7142, "step": 59264 }, { "epoch": 0.1170094664676034, "grad_norm": 0.09642908722162247, "learning_rate": 8.865733861157011e-05, "loss": 1.7273, "step": 59296 }, { "epoch": 0.11707261242899984, "grad_norm": 0.09426500648260117, "learning_rate": 8.865099838720543e-05, "loss": 1.7278, "step": 59328 }, { "epoch": 0.11713575839039629, "grad_norm": 0.09257742762565613, "learning_rate": 8.864465816284074e-05, "loss": 1.7116, "step": 59360 }, { "epoch": 0.11719890435179273, "grad_norm": 0.09522250294685364, "learning_rate": 8.863831793847606e-05, "loss": 1.7336, "step": 59392 }, { "epoch": 0.11726205031318918, "grad_norm": 0.10382465273141861, "learning_rate": 8.863197771411136e-05, "loss": 1.7237, "step": 59424 }, { "epoch": 0.11732519627458561, "grad_norm": 0.09743598848581314, "learning_rate": 8.862563748974667e-05, "loss": 1.7137, "step": 59456 }, { "epoch": 0.11738834223598205, "grad_norm": 0.0995405837893486, "learning_rate": 8.861929726538199e-05, "loss": 1.7205, "step": 59488 }, { "epoch": 0.1174514881973785, "grad_norm": 0.10108558088541031, "learning_rate": 8.86129570410173e-05, "loss": 1.7195, "step": 59520 }, { "epoch": 0.11751463415877493, "grad_norm": 0.10459664463996887, "learning_rate": 8.86066168166526e-05, "loss": 1.7146, "step": 59552 }, { "epoch": 0.11757778012017137, "grad_norm": 0.09926500171422958, "learning_rate": 8.860027659228792e-05, "loss": 1.7228, "step": 59584 }, { "epoch": 0.11764092608156781, "grad_norm": 0.1011614203453064, "learning_rate": 8.859393636792322e-05, "loss": 1.7235, "step": 59616 }, { "epoch": 0.11770407204296425, "grad_norm": 0.09708654880523682, "learning_rate": 8.858759614355853e-05, "loss": 1.7182, "step": 59648 }, { "epoch": 0.11776721800436069, "grad_norm": 0.1018308699131012, "learning_rate": 8.858125591919383e-05, "loss": 1.7181, "step": 59680 }, { "epoch": 0.11783036396575713, "grad_norm": 0.09900863468647003, "learning_rate": 8.857491569482915e-05, "loss": 1.7293, "step": 59712 }, { "epoch": 0.11789350992715357, "grad_norm": 0.09655536711215973, "learning_rate": 8.856857547046446e-05, "loss": 1.7174, "step": 59744 }, { "epoch": 0.11795665588855, "grad_norm": 0.09684063494205475, "learning_rate": 8.856223524609978e-05, "loss": 1.7208, "step": 59776 }, { "epoch": 0.11801980184994645, "grad_norm": 0.10317421704530716, "learning_rate": 8.855589502173509e-05, "loss": 1.7114, "step": 59808 }, { "epoch": 0.11808294781134289, "grad_norm": 0.1004984974861145, "learning_rate": 8.85495547973704e-05, "loss": 1.7205, "step": 59840 }, { "epoch": 0.11814609377273932, "grad_norm": 0.09612350910902023, "learning_rate": 8.854321457300571e-05, "loss": 1.7275, "step": 59872 }, { "epoch": 0.11820923973413577, "grad_norm": 0.09707285463809967, "learning_rate": 8.853687434864102e-05, "loss": 1.7085, "step": 59904 }, { "epoch": 0.11827238569553221, "grad_norm": 0.10384999960660934, "learning_rate": 8.853053412427634e-05, "loss": 1.7203, "step": 59936 }, { "epoch": 0.11833553165692864, "grad_norm": 0.10721999406814575, "learning_rate": 8.852419389991164e-05, "loss": 1.7226, "step": 59968 }, { "epoch": 0.1183986776183251, "grad_norm": 0.09263718873262405, "learning_rate": 8.851785367554695e-05, "loss": 1.7085, "step": 60000 }, { "epoch": 0.11846182357972153, "grad_norm": 0.10204324871301651, "learning_rate": 8.851151345118225e-05, "loss": 1.73, "step": 60032 }, { "epoch": 0.11852496954111796, "grad_norm": 0.0958547443151474, "learning_rate": 8.850517322681757e-05, "loss": 1.7162, "step": 60064 }, { "epoch": 0.11858811550251441, "grad_norm": 0.09493812173604965, "learning_rate": 8.849883300245287e-05, "loss": 1.7146, "step": 60096 }, { "epoch": 0.11865126146391085, "grad_norm": 0.10567105561494827, "learning_rate": 8.849249277808818e-05, "loss": 1.722, "step": 60128 }, { "epoch": 0.11871440742530728, "grad_norm": 0.10352890193462372, "learning_rate": 8.84861525537235e-05, "loss": 1.6995, "step": 60160 }, { "epoch": 0.11877755338670373, "grad_norm": 0.10077771544456482, "learning_rate": 8.847981232935881e-05, "loss": 1.7096, "step": 60192 }, { "epoch": 0.11884069934810017, "grad_norm": 0.09664720296859741, "learning_rate": 8.847347210499411e-05, "loss": 1.7085, "step": 60224 }, { "epoch": 0.1189038453094966, "grad_norm": 0.09669395536184311, "learning_rate": 8.846713188062943e-05, "loss": 1.723, "step": 60256 }, { "epoch": 0.11896699127089305, "grad_norm": 0.10533349215984344, "learning_rate": 8.846079165626474e-05, "loss": 1.7222, "step": 60288 }, { "epoch": 0.11903013723228949, "grad_norm": 0.09265675395727158, "learning_rate": 8.845445143190006e-05, "loss": 1.7205, "step": 60320 }, { "epoch": 0.11909328319368594, "grad_norm": 0.09601378440856934, "learning_rate": 8.844811120753536e-05, "loss": 1.7231, "step": 60352 }, { "epoch": 0.11915642915508237, "grad_norm": 0.10059671103954315, "learning_rate": 8.844177098317067e-05, "loss": 1.7213, "step": 60384 }, { "epoch": 0.11921957511647881, "grad_norm": 0.10184638202190399, "learning_rate": 8.843543075880599e-05, "loss": 1.7136, "step": 60416 }, { "epoch": 0.11928272107787526, "grad_norm": 0.09648539870977402, "learning_rate": 8.842909053444129e-05, "loss": 1.7194, "step": 60448 }, { "epoch": 0.11934586703927169, "grad_norm": 0.10598673671483994, "learning_rate": 8.84227503100766e-05, "loss": 1.7254, "step": 60480 }, { "epoch": 0.11940901300066813, "grad_norm": 0.10552442073822021, "learning_rate": 8.84164100857119e-05, "loss": 1.7104, "step": 60512 }, { "epoch": 0.11947215896206458, "grad_norm": 0.0955764427781105, "learning_rate": 8.841006986134722e-05, "loss": 1.6987, "step": 60544 }, { "epoch": 0.11953530492346101, "grad_norm": 0.11034785956144333, "learning_rate": 8.840372963698253e-05, "loss": 1.7107, "step": 60576 }, { "epoch": 0.11959845088485745, "grad_norm": 0.09071777760982513, "learning_rate": 8.839738941261785e-05, "loss": 1.7111, "step": 60608 }, { "epoch": 0.1196615968462539, "grad_norm": 0.11077333241701126, "learning_rate": 8.839104918825315e-05, "loss": 1.7147, "step": 60640 }, { "epoch": 0.11972474280765033, "grad_norm": 0.09640567749738693, "learning_rate": 8.838470896388846e-05, "loss": 1.7043, "step": 60672 }, { "epoch": 0.11978788876904677, "grad_norm": 0.09565507620573044, "learning_rate": 8.837836873952378e-05, "loss": 1.718, "step": 60704 }, { "epoch": 0.11985103473044321, "grad_norm": 0.11187933385372162, "learning_rate": 8.837202851515909e-05, "loss": 1.7183, "step": 60736 }, { "epoch": 0.11991418069183965, "grad_norm": 0.09896146506071091, "learning_rate": 8.83656882907944e-05, "loss": 1.7038, "step": 60768 }, { "epoch": 0.11997732665323609, "grad_norm": 0.09657255560159683, "learning_rate": 8.835934806642971e-05, "loss": 1.7216, "step": 60800 }, { "epoch": 0.12004047261463253, "grad_norm": 0.10551982372999191, "learning_rate": 8.835300784206502e-05, "loss": 1.7189, "step": 60832 }, { "epoch": 0.12010361857602897, "grad_norm": 0.10490927845239639, "learning_rate": 8.834666761770032e-05, "loss": 1.7255, "step": 60864 }, { "epoch": 0.1201667645374254, "grad_norm": 0.1005062609910965, "learning_rate": 8.834032739333562e-05, "loss": 1.7058, "step": 60896 }, { "epoch": 0.12022991049882185, "grad_norm": 0.09513265639543533, "learning_rate": 8.833398716897094e-05, "loss": 1.7279, "step": 60928 }, { "epoch": 0.12029305646021829, "grad_norm": 0.10015387833118439, "learning_rate": 8.832764694460625e-05, "loss": 1.7229, "step": 60960 }, { "epoch": 0.12035620242161472, "grad_norm": 0.09876395016908646, "learning_rate": 8.832130672024157e-05, "loss": 1.7146, "step": 60992 }, { "epoch": 0.12041934838301117, "grad_norm": 0.10086855292320251, "learning_rate": 8.831496649587687e-05, "loss": 1.7053, "step": 61024 }, { "epoch": 0.12048249434440761, "grad_norm": 0.10953648388385773, "learning_rate": 8.830862627151218e-05, "loss": 1.7104, "step": 61056 }, { "epoch": 0.12054564030580404, "grad_norm": 0.10189200192689896, "learning_rate": 8.83022860471475e-05, "loss": 1.7271, "step": 61088 }, { "epoch": 0.1206087862672005, "grad_norm": 0.09535510838031769, "learning_rate": 8.829594582278281e-05, "loss": 1.6988, "step": 61120 }, { "epoch": 0.12067193222859693, "grad_norm": 0.10282225161790848, "learning_rate": 8.828960559841813e-05, "loss": 1.7099, "step": 61152 }, { "epoch": 0.12073507818999338, "grad_norm": 0.09956984221935272, "learning_rate": 8.828326537405343e-05, "loss": 1.7108, "step": 61184 }, { "epoch": 0.12079822415138981, "grad_norm": 0.11226121336221695, "learning_rate": 8.827692514968874e-05, "loss": 1.7123, "step": 61216 }, { "epoch": 0.12086137011278625, "grad_norm": 0.0932110920548439, "learning_rate": 8.827058492532406e-05, "loss": 1.7199, "step": 61248 }, { "epoch": 0.1209245160741827, "grad_norm": 0.10740521550178528, "learning_rate": 8.826424470095936e-05, "loss": 1.7231, "step": 61280 }, { "epoch": 0.12098766203557913, "grad_norm": 0.09979068487882614, "learning_rate": 8.825790447659466e-05, "loss": 1.7151, "step": 61312 }, { "epoch": 0.12105080799697557, "grad_norm": 0.10132623463869095, "learning_rate": 8.825156425222997e-05, "loss": 1.7129, "step": 61344 }, { "epoch": 0.12111395395837202, "grad_norm": 0.12113574147224426, "learning_rate": 8.824522402786529e-05, "loss": 1.7142, "step": 61376 }, { "epoch": 0.12117709991976845, "grad_norm": 0.09612365812063217, "learning_rate": 8.82388838035006e-05, "loss": 1.704, "step": 61408 }, { "epoch": 0.12124024588116489, "grad_norm": 0.0958484411239624, "learning_rate": 8.82325435791359e-05, "loss": 1.7213, "step": 61440 }, { "epoch": 0.12130339184256134, "grad_norm": 0.0972626656293869, "learning_rate": 8.822620335477122e-05, "loss": 1.7246, "step": 61472 }, { "epoch": 0.12136653780395777, "grad_norm": 0.10045930743217468, "learning_rate": 8.821986313040653e-05, "loss": 1.7039, "step": 61504 }, { "epoch": 0.12142968376535421, "grad_norm": 0.10228867828845978, "learning_rate": 8.821352290604185e-05, "loss": 1.7062, "step": 61536 }, { "epoch": 0.12149282972675066, "grad_norm": 0.09936395287513733, "learning_rate": 8.820718268167715e-05, "loss": 1.7161, "step": 61568 }, { "epoch": 0.12155597568814709, "grad_norm": 0.10981740802526474, "learning_rate": 8.820084245731246e-05, "loss": 1.7156, "step": 61600 }, { "epoch": 0.12161912164954353, "grad_norm": 0.09943246096372604, "learning_rate": 8.819450223294778e-05, "loss": 1.7159, "step": 61632 }, { "epoch": 0.12168226761093998, "grad_norm": 0.10377268493175507, "learning_rate": 8.818816200858308e-05, "loss": 1.7165, "step": 61664 }, { "epoch": 0.12174541357233641, "grad_norm": 0.09910544008016586, "learning_rate": 8.81818217842184e-05, "loss": 1.7191, "step": 61696 }, { "epoch": 0.12180855953373285, "grad_norm": 0.10430502146482468, "learning_rate": 8.81754815598537e-05, "loss": 1.7162, "step": 61728 }, { "epoch": 0.1218717054951293, "grad_norm": 0.10514651238918304, "learning_rate": 8.816914133548901e-05, "loss": 1.7163, "step": 61760 }, { "epoch": 0.12193485145652573, "grad_norm": 0.09861918538808823, "learning_rate": 8.816280111112432e-05, "loss": 1.7119, "step": 61792 }, { "epoch": 0.12199799741792217, "grad_norm": 0.09835562109947205, "learning_rate": 8.815646088675964e-05, "loss": 1.6996, "step": 61824 }, { "epoch": 0.12206114337931862, "grad_norm": 0.11428391188383102, "learning_rate": 8.815012066239494e-05, "loss": 1.7223, "step": 61856 }, { "epoch": 0.12212428934071505, "grad_norm": 0.09933076798915863, "learning_rate": 8.814378043803025e-05, "loss": 1.7192, "step": 61888 }, { "epoch": 0.12218743530211149, "grad_norm": 0.1008901372551918, "learning_rate": 8.813744021366557e-05, "loss": 1.7213, "step": 61920 }, { "epoch": 0.12225058126350793, "grad_norm": 0.10245493054389954, "learning_rate": 8.813109998930088e-05, "loss": 1.7096, "step": 61952 }, { "epoch": 0.12231372722490437, "grad_norm": 0.09385039657354355, "learning_rate": 8.812475976493618e-05, "loss": 1.7096, "step": 61984 }, { "epoch": 0.12237687318630082, "grad_norm": 0.09650552272796631, "learning_rate": 8.81184195405715e-05, "loss": 1.7089, "step": 62016 }, { "epoch": 0.12244001914769725, "grad_norm": 0.1181628555059433, "learning_rate": 8.811207931620681e-05, "loss": 1.7232, "step": 62048 }, { "epoch": 0.12250316510909369, "grad_norm": 0.11358977854251862, "learning_rate": 8.810573909184211e-05, "loss": 1.7075, "step": 62080 }, { "epoch": 0.12256631107049014, "grad_norm": 0.0945497378706932, "learning_rate": 8.809939886747743e-05, "loss": 1.7116, "step": 62112 }, { "epoch": 0.12262945703188657, "grad_norm": 0.09654837846755981, "learning_rate": 8.809305864311273e-05, "loss": 1.7164, "step": 62144 }, { "epoch": 0.12269260299328301, "grad_norm": 0.1133982315659523, "learning_rate": 8.808671841874804e-05, "loss": 1.7247, "step": 62176 }, { "epoch": 0.12275574895467946, "grad_norm": 0.1014733836054802, "learning_rate": 8.808037819438336e-05, "loss": 1.723, "step": 62208 }, { "epoch": 0.1228188949160759, "grad_norm": 0.09367592632770538, "learning_rate": 8.807403797001866e-05, "loss": 1.7207, "step": 62240 }, { "epoch": 0.12288204087747233, "grad_norm": 0.10122451186180115, "learning_rate": 8.806769774565397e-05, "loss": 1.712, "step": 62272 }, { "epoch": 0.12294518683886878, "grad_norm": 0.10430467128753662, "learning_rate": 8.806135752128929e-05, "loss": 1.7166, "step": 62304 }, { "epoch": 0.12300833280026521, "grad_norm": 0.10268769413232803, "learning_rate": 8.80550172969246e-05, "loss": 1.6995, "step": 62336 }, { "epoch": 0.12307147876166165, "grad_norm": 0.10345062613487244, "learning_rate": 8.80486770725599e-05, "loss": 1.7149, "step": 62368 }, { "epoch": 0.1231346247230581, "grad_norm": 0.10112529247999191, "learning_rate": 8.804233684819522e-05, "loss": 1.6989, "step": 62400 }, { "epoch": 0.12319777068445453, "grad_norm": 0.10453299432992935, "learning_rate": 8.803599662383053e-05, "loss": 1.7199, "step": 62432 }, { "epoch": 0.12326091664585097, "grad_norm": 0.09306968748569489, "learning_rate": 8.802965639946585e-05, "loss": 1.7138, "step": 62464 }, { "epoch": 0.12332406260724742, "grad_norm": 0.09521358460187912, "learning_rate": 8.802331617510115e-05, "loss": 1.7107, "step": 62496 }, { "epoch": 0.12338720856864385, "grad_norm": 0.10357385128736496, "learning_rate": 8.801697595073646e-05, "loss": 1.7197, "step": 62528 }, { "epoch": 0.12345035453004029, "grad_norm": 0.0984930694103241, "learning_rate": 8.801063572637177e-05, "loss": 1.717, "step": 62560 }, { "epoch": 0.12351350049143674, "grad_norm": 0.09655478596687317, "learning_rate": 8.800429550200708e-05, "loss": 1.7079, "step": 62592 }, { "epoch": 0.12357664645283317, "grad_norm": 0.10092873871326447, "learning_rate": 8.79979552776424e-05, "loss": 1.696, "step": 62624 }, { "epoch": 0.12363979241422961, "grad_norm": 0.0903581902384758, "learning_rate": 8.79916150532777e-05, "loss": 1.7151, "step": 62656 }, { "epoch": 0.12370293837562606, "grad_norm": 0.09429716318845749, "learning_rate": 8.798527482891301e-05, "loss": 1.706, "step": 62688 }, { "epoch": 0.12376608433702249, "grad_norm": 0.10034441202878952, "learning_rate": 8.797893460454832e-05, "loss": 1.7179, "step": 62720 }, { "epoch": 0.12382923029841893, "grad_norm": 0.10464702546596527, "learning_rate": 8.797259438018364e-05, "loss": 1.7106, "step": 62752 }, { "epoch": 0.12389237625981538, "grad_norm": 0.09326034784317017, "learning_rate": 8.796625415581894e-05, "loss": 1.7161, "step": 62784 }, { "epoch": 0.12395552222121181, "grad_norm": 0.09914770722389221, "learning_rate": 8.795991393145425e-05, "loss": 1.7073, "step": 62816 }, { "epoch": 0.12401866818260826, "grad_norm": 0.11192018538713455, "learning_rate": 8.795357370708957e-05, "loss": 1.7125, "step": 62848 }, { "epoch": 0.1240818141440047, "grad_norm": 0.10720349103212357, "learning_rate": 8.794723348272488e-05, "loss": 1.7086, "step": 62880 }, { "epoch": 0.12414496010540113, "grad_norm": 0.10631818324327469, "learning_rate": 8.794089325836018e-05, "loss": 1.7089, "step": 62912 }, { "epoch": 0.12420810606679758, "grad_norm": 0.10009995102882385, "learning_rate": 8.793455303399549e-05, "loss": 1.7149, "step": 62944 }, { "epoch": 0.12427125202819402, "grad_norm": 0.09808431565761566, "learning_rate": 8.79282128096308e-05, "loss": 1.7085, "step": 62976 }, { "epoch": 0.12433439798959045, "grad_norm": 0.10020089894533157, "learning_rate": 8.792187258526611e-05, "loss": 1.7159, "step": 63008 }, { "epoch": 0.1243975439509869, "grad_norm": 0.09870421141386032, "learning_rate": 8.791553236090142e-05, "loss": 1.7046, "step": 63040 }, { "epoch": 0.12446068991238333, "grad_norm": 0.10279548913240433, "learning_rate": 8.790919213653673e-05, "loss": 1.6907, "step": 63072 }, { "epoch": 0.12452383587377977, "grad_norm": 0.12537583708763123, "learning_rate": 8.790285191217205e-05, "loss": 1.7072, "step": 63104 }, { "epoch": 0.12458698183517622, "grad_norm": 0.09676754474639893, "learning_rate": 8.789651168780736e-05, "loss": 1.7032, "step": 63136 }, { "epoch": 0.12465012779657265, "grad_norm": 0.09899728000164032, "learning_rate": 8.789017146344267e-05, "loss": 1.7181, "step": 63168 }, { "epoch": 0.12471327375796909, "grad_norm": 0.10402088612318039, "learning_rate": 8.788383123907798e-05, "loss": 1.7157, "step": 63200 }, { "epoch": 0.12477641971936554, "grad_norm": 0.1084921583533287, "learning_rate": 8.787749101471329e-05, "loss": 1.7142, "step": 63232 }, { "epoch": 0.12483956568076197, "grad_norm": 0.09379089623689651, "learning_rate": 8.78711507903486e-05, "loss": 1.7125, "step": 63264 }, { "epoch": 0.12490271164215841, "grad_norm": 0.0964265912771225, "learning_rate": 8.786481056598392e-05, "loss": 1.6972, "step": 63296 }, { "epoch": 0.12496585760355486, "grad_norm": 0.09247568249702454, "learning_rate": 8.785847034161922e-05, "loss": 1.7035, "step": 63328 }, { "epoch": 0.1250290035649513, "grad_norm": 0.09406764805316925, "learning_rate": 8.785213011725452e-05, "loss": 1.7115, "step": 63360 }, { "epoch": 0.12509214952634773, "grad_norm": 0.09933844208717346, "learning_rate": 8.784578989288984e-05, "loss": 1.7162, "step": 63392 }, { "epoch": 0.12515529548774418, "grad_norm": 0.10537005960941315, "learning_rate": 8.783944966852515e-05, "loss": 1.7138, "step": 63424 }, { "epoch": 0.12521844144914063, "grad_norm": 0.10083386301994324, "learning_rate": 8.783310944416045e-05, "loss": 1.717, "step": 63456 }, { "epoch": 0.12528158741053705, "grad_norm": 0.10026167333126068, "learning_rate": 8.782676921979577e-05, "loss": 1.7303, "step": 63488 }, { "epoch": 0.1253447333719335, "grad_norm": 0.09965796023607254, "learning_rate": 8.782042899543108e-05, "loss": 1.703, "step": 63520 }, { "epoch": 0.12540787933332995, "grad_norm": 0.09772130101919174, "learning_rate": 8.78140887710664e-05, "loss": 1.7146, "step": 63552 }, { "epoch": 0.12547102529472637, "grad_norm": 0.10148956626653671, "learning_rate": 8.78077485467017e-05, "loss": 1.7183, "step": 63584 }, { "epoch": 0.12553417125612282, "grad_norm": 0.10039090365171432, "learning_rate": 8.780140832233701e-05, "loss": 1.7228, "step": 63616 }, { "epoch": 0.12559731721751927, "grad_norm": 0.10283941775560379, "learning_rate": 8.779506809797232e-05, "loss": 1.6946, "step": 63648 }, { "epoch": 0.1256604631789157, "grad_norm": 0.09719688445329666, "learning_rate": 8.778872787360764e-05, "loss": 1.7038, "step": 63680 }, { "epoch": 0.12572360914031214, "grad_norm": 0.094937764108181, "learning_rate": 8.778238764924294e-05, "loss": 1.7007, "step": 63712 }, { "epoch": 0.12578675510170859, "grad_norm": 0.11529593169689178, "learning_rate": 8.777604742487826e-05, "loss": 1.7021, "step": 63744 }, { "epoch": 0.125849901063105, "grad_norm": 0.1040341705083847, "learning_rate": 8.776970720051356e-05, "loss": 1.7168, "step": 63776 }, { "epoch": 0.12591304702450146, "grad_norm": 0.12705889344215393, "learning_rate": 8.776336697614887e-05, "loss": 1.7126, "step": 63808 }, { "epoch": 0.1259761929858979, "grad_norm": 0.1022736057639122, "learning_rate": 8.775702675178419e-05, "loss": 1.6892, "step": 63840 }, { "epoch": 0.12603933894729433, "grad_norm": 0.09732268750667572, "learning_rate": 8.775068652741949e-05, "loss": 1.7202, "step": 63872 }, { "epoch": 0.12610248490869078, "grad_norm": 0.11430452764034271, "learning_rate": 8.77443463030548e-05, "loss": 1.7009, "step": 63904 }, { "epoch": 0.12616563087008723, "grad_norm": 0.10175000131130219, "learning_rate": 8.773800607869012e-05, "loss": 1.7026, "step": 63936 }, { "epoch": 0.12622877683148365, "grad_norm": 0.1036100834608078, "learning_rate": 8.773166585432543e-05, "loss": 1.7036, "step": 63968 }, { "epoch": 0.1262919227928801, "grad_norm": 0.09711123257875443, "learning_rate": 8.772532562996073e-05, "loss": 1.7073, "step": 64000 }, { "epoch": 0.12635506875427654, "grad_norm": 0.10777747631072998, "learning_rate": 8.771898540559605e-05, "loss": 1.7026, "step": 64032 }, { "epoch": 0.12641821471567297, "grad_norm": 0.10371820628643036, "learning_rate": 8.771264518123136e-05, "loss": 1.7074, "step": 64064 }, { "epoch": 0.12648136067706942, "grad_norm": 0.10325604677200317, "learning_rate": 8.770630495686667e-05, "loss": 1.7056, "step": 64096 }, { "epoch": 0.12654450663846586, "grad_norm": 0.10680054873228073, "learning_rate": 8.769996473250198e-05, "loss": 1.7121, "step": 64128 }, { "epoch": 0.12660765259986229, "grad_norm": 0.1007707491517067, "learning_rate": 8.769362450813729e-05, "loss": 1.7146, "step": 64160 }, { "epoch": 0.12667079856125874, "grad_norm": 0.11234920471906662, "learning_rate": 8.768728428377259e-05, "loss": 1.7141, "step": 64192 }, { "epoch": 0.12673394452265518, "grad_norm": 0.10464319586753845, "learning_rate": 8.76809440594079e-05, "loss": 1.7175, "step": 64224 }, { "epoch": 0.1267970904840516, "grad_norm": 0.09534218162298203, "learning_rate": 8.76746038350432e-05, "loss": 1.6995, "step": 64256 }, { "epoch": 0.12686023644544805, "grad_norm": 0.10238602757453918, "learning_rate": 8.766826361067852e-05, "loss": 1.7134, "step": 64288 }, { "epoch": 0.1269233824068445, "grad_norm": 0.09826625883579254, "learning_rate": 8.766192338631384e-05, "loss": 1.708, "step": 64320 }, { "epoch": 0.12698652836824093, "grad_norm": 0.10708026587963104, "learning_rate": 8.765558316194915e-05, "loss": 1.7147, "step": 64352 }, { "epoch": 0.12704967432963737, "grad_norm": 0.0951545387506485, "learning_rate": 8.764924293758445e-05, "loss": 1.7102, "step": 64384 }, { "epoch": 0.12711282029103382, "grad_norm": 0.09865587949752808, "learning_rate": 8.764290271321977e-05, "loss": 1.713, "step": 64416 }, { "epoch": 0.12717596625243024, "grad_norm": 0.10516698658466339, "learning_rate": 8.763656248885508e-05, "loss": 1.7155, "step": 64448 }, { "epoch": 0.1272391122138267, "grad_norm": 0.0997084230184555, "learning_rate": 8.76302222644904e-05, "loss": 1.7251, "step": 64480 }, { "epoch": 0.12730225817522314, "grad_norm": 0.09966826438903809, "learning_rate": 8.762388204012571e-05, "loss": 1.7083, "step": 64512 }, { "epoch": 0.12736540413661956, "grad_norm": 0.10060398280620575, "learning_rate": 8.761754181576101e-05, "loss": 1.702, "step": 64544 }, { "epoch": 0.127428550098016, "grad_norm": 0.1012912318110466, "learning_rate": 8.761120159139633e-05, "loss": 1.6979, "step": 64576 }, { "epoch": 0.12749169605941246, "grad_norm": 0.10541724413633347, "learning_rate": 8.760486136703163e-05, "loss": 1.7123, "step": 64608 }, { "epoch": 0.12755484202080888, "grad_norm": 0.09584583342075348, "learning_rate": 8.759852114266694e-05, "loss": 1.6894, "step": 64640 }, { "epoch": 0.12761798798220533, "grad_norm": 0.09712526202201843, "learning_rate": 8.759218091830224e-05, "loss": 1.7176, "step": 64672 }, { "epoch": 0.12768113394360178, "grad_norm": 0.10084573924541473, "learning_rate": 8.758584069393756e-05, "loss": 1.7092, "step": 64704 }, { "epoch": 0.1277442799049982, "grad_norm": 0.09810017794370651, "learning_rate": 8.757950046957287e-05, "loss": 1.7095, "step": 64736 }, { "epoch": 0.12780742586639465, "grad_norm": 0.09851551055908203, "learning_rate": 8.757316024520819e-05, "loss": 1.7135, "step": 64768 }, { "epoch": 0.1278705718277911, "grad_norm": 0.11118162423372269, "learning_rate": 8.756682002084349e-05, "loss": 1.6993, "step": 64800 }, { "epoch": 0.12793371778918752, "grad_norm": 0.09544149041175842, "learning_rate": 8.75604797964788e-05, "loss": 1.7134, "step": 64832 }, { "epoch": 0.12799686375058397, "grad_norm": 0.10334372520446777, "learning_rate": 8.755413957211412e-05, "loss": 1.7134, "step": 64864 }, { "epoch": 0.12806000971198042, "grad_norm": 0.10433602333068848, "learning_rate": 8.754779934774943e-05, "loss": 1.6914, "step": 64896 }, { "epoch": 0.12812315567337687, "grad_norm": 0.0969606265425682, "learning_rate": 8.754145912338473e-05, "loss": 1.7003, "step": 64928 }, { "epoch": 0.1281863016347733, "grad_norm": 0.09980641305446625, "learning_rate": 8.753511889902005e-05, "loss": 1.7107, "step": 64960 }, { "epoch": 0.12824944759616974, "grad_norm": 0.10546495020389557, "learning_rate": 8.752877867465536e-05, "loss": 1.6991, "step": 64992 }, { "epoch": 0.1283125935575662, "grad_norm": 0.10080936551094055, "learning_rate": 8.752243845029066e-05, "loss": 1.7025, "step": 65024 }, { "epoch": 0.1283757395189626, "grad_norm": 0.10327662527561188, "learning_rate": 8.751609822592596e-05, "loss": 1.7167, "step": 65056 }, { "epoch": 0.12843888548035906, "grad_norm": 0.09645973145961761, "learning_rate": 8.750975800156128e-05, "loss": 1.7163, "step": 65088 }, { "epoch": 0.1285020314417555, "grad_norm": 0.09340842813253403, "learning_rate": 8.750341777719659e-05, "loss": 1.7169, "step": 65120 }, { "epoch": 0.12856517740315193, "grad_norm": 0.10235944390296936, "learning_rate": 8.74970775528319e-05, "loss": 1.6974, "step": 65152 }, { "epoch": 0.12862832336454838, "grad_norm": 0.11224786192178726, "learning_rate": 8.749073732846722e-05, "loss": 1.7222, "step": 65184 }, { "epoch": 0.12869146932594483, "grad_norm": 0.11072751879692078, "learning_rate": 8.748439710410252e-05, "loss": 1.6987, "step": 65216 }, { "epoch": 0.12875461528734125, "grad_norm": 0.09741399437189102, "learning_rate": 8.747805687973784e-05, "loss": 1.7056, "step": 65248 }, { "epoch": 0.1288177612487377, "grad_norm": 0.09321194142103195, "learning_rate": 8.747171665537315e-05, "loss": 1.6989, "step": 65280 }, { "epoch": 0.12888090721013415, "grad_norm": 0.10490649193525314, "learning_rate": 8.746537643100847e-05, "loss": 1.6959, "step": 65312 }, { "epoch": 0.12894405317153057, "grad_norm": 0.09913329780101776, "learning_rate": 8.745903620664377e-05, "loss": 1.7053, "step": 65344 }, { "epoch": 0.12900719913292702, "grad_norm": 0.10102079063653946, "learning_rate": 8.745269598227908e-05, "loss": 1.69, "step": 65376 }, { "epoch": 0.12907034509432347, "grad_norm": 0.09921082854270935, "learning_rate": 8.74463557579144e-05, "loss": 1.6981, "step": 65408 }, { "epoch": 0.1291334910557199, "grad_norm": 0.0991610437631607, "learning_rate": 8.74400155335497e-05, "loss": 1.7063, "step": 65440 }, { "epoch": 0.12919663701711634, "grad_norm": 0.10011253505945206, "learning_rate": 8.7433675309185e-05, "loss": 1.7143, "step": 65472 }, { "epoch": 0.1292597829785128, "grad_norm": 0.10191270709037781, "learning_rate": 8.742733508482031e-05, "loss": 1.704, "step": 65504 }, { "epoch": 0.1293229289399092, "grad_norm": 0.10369692742824554, "learning_rate": 8.742099486045563e-05, "loss": 1.694, "step": 65536 }, { "epoch": 0.12938607490130566, "grad_norm": 0.10494969040155411, "learning_rate": 8.741465463609094e-05, "loss": 1.6927, "step": 65568 }, { "epoch": 0.1294492208627021, "grad_norm": 0.10710588842630386, "learning_rate": 8.740831441172624e-05, "loss": 1.706, "step": 65600 }, { "epoch": 0.12951236682409853, "grad_norm": 0.12005143612623215, "learning_rate": 8.740197418736156e-05, "loss": 1.7084, "step": 65632 }, { "epoch": 0.12957551278549498, "grad_norm": 0.09254439175128937, "learning_rate": 8.739563396299687e-05, "loss": 1.6939, "step": 65664 }, { "epoch": 0.12963865874689143, "grad_norm": 0.10031227767467499, "learning_rate": 8.738929373863219e-05, "loss": 1.7105, "step": 65696 }, { "epoch": 0.12970180470828785, "grad_norm": 0.10078656673431396, "learning_rate": 8.738295351426749e-05, "loss": 1.6924, "step": 65728 }, { "epoch": 0.1297649506696843, "grad_norm": 0.10685756057500839, "learning_rate": 8.73766132899028e-05, "loss": 1.701, "step": 65760 }, { "epoch": 0.12982809663108075, "grad_norm": 0.09442702680826187, "learning_rate": 8.737027306553812e-05, "loss": 1.6992, "step": 65792 }, { "epoch": 0.12989124259247717, "grad_norm": 0.10017342865467072, "learning_rate": 8.736393284117342e-05, "loss": 1.7157, "step": 65824 }, { "epoch": 0.12995438855387362, "grad_norm": 0.09943683445453644, "learning_rate": 8.735759261680873e-05, "loss": 1.7059, "step": 65856 }, { "epoch": 0.13001753451527007, "grad_norm": 0.09687332063913345, "learning_rate": 8.735125239244403e-05, "loss": 1.7026, "step": 65888 }, { "epoch": 0.1300806804766665, "grad_norm": 0.09685111790895462, "learning_rate": 8.734491216807935e-05, "loss": 1.6921, "step": 65920 }, { "epoch": 0.13014382643806294, "grad_norm": 0.10358686745166779, "learning_rate": 8.733857194371466e-05, "loss": 1.6936, "step": 65952 }, { "epoch": 0.1302069723994594, "grad_norm": 0.09398703277111053, "learning_rate": 8.733223171934998e-05, "loss": 1.6947, "step": 65984 }, { "epoch": 0.1302701183608558, "grad_norm": 0.10856349021196365, "learning_rate": 8.732589149498528e-05, "loss": 1.6984, "step": 66016 }, { "epoch": 0.13033326432225226, "grad_norm": 0.10347195714712143, "learning_rate": 8.731955127062059e-05, "loss": 1.696, "step": 66048 }, { "epoch": 0.1303964102836487, "grad_norm": 0.10519301891326904, "learning_rate": 8.73132110462559e-05, "loss": 1.7046, "step": 66080 }, { "epoch": 0.13045955624504513, "grad_norm": 0.10505388677120209, "learning_rate": 8.730687082189122e-05, "loss": 1.6987, "step": 66112 }, { "epoch": 0.13052270220644158, "grad_norm": 0.10282491147518158, "learning_rate": 8.730053059752652e-05, "loss": 1.6933, "step": 66144 }, { "epoch": 0.13058584816783803, "grad_norm": 0.09779899567365646, "learning_rate": 8.729419037316184e-05, "loss": 1.6921, "step": 66176 }, { "epoch": 0.13064899412923445, "grad_norm": 0.10245571285486221, "learning_rate": 8.728785014879715e-05, "loss": 1.6907, "step": 66208 }, { "epoch": 0.1307121400906309, "grad_norm": 0.11757884919643402, "learning_rate": 8.728150992443245e-05, "loss": 1.7188, "step": 66240 }, { "epoch": 0.13077528605202735, "grad_norm": 0.09794843941926956, "learning_rate": 8.727516970006777e-05, "loss": 1.7053, "step": 66272 }, { "epoch": 0.13083843201342377, "grad_norm": 0.09846805781126022, "learning_rate": 8.726882947570307e-05, "loss": 1.6925, "step": 66304 }, { "epoch": 0.13090157797482022, "grad_norm": 0.10118546336889267, "learning_rate": 8.726248925133838e-05, "loss": 1.7072, "step": 66336 }, { "epoch": 0.13096472393621666, "grad_norm": 0.09692150354385376, "learning_rate": 8.72561490269737e-05, "loss": 1.7097, "step": 66368 }, { "epoch": 0.1310278698976131, "grad_norm": 0.09960059821605682, "learning_rate": 8.7249808802609e-05, "loss": 1.6942, "step": 66400 }, { "epoch": 0.13109101585900954, "grad_norm": 0.0997898280620575, "learning_rate": 8.724346857824431e-05, "loss": 1.7031, "step": 66432 }, { "epoch": 0.13115416182040598, "grad_norm": 0.10471518337726593, "learning_rate": 8.723712835387963e-05, "loss": 1.7006, "step": 66464 }, { "epoch": 0.1312173077818024, "grad_norm": 0.09863907098770142, "learning_rate": 8.723078812951494e-05, "loss": 1.7123, "step": 66496 }, { "epoch": 0.13128045374319885, "grad_norm": 0.10759855061769485, "learning_rate": 8.722444790515026e-05, "loss": 1.6977, "step": 66528 }, { "epoch": 0.1313435997045953, "grad_norm": 0.11002608388662338, "learning_rate": 8.721810768078556e-05, "loss": 1.6967, "step": 66560 }, { "epoch": 0.13140674566599175, "grad_norm": 0.12025301903486252, "learning_rate": 8.721176745642087e-05, "loss": 1.6869, "step": 66592 }, { "epoch": 0.13146989162738817, "grad_norm": 0.0912991538643837, "learning_rate": 8.720542723205619e-05, "loss": 1.72, "step": 66624 }, { "epoch": 0.13153303758878462, "grad_norm": 0.09873910993337631, "learning_rate": 8.719908700769149e-05, "loss": 1.7048, "step": 66656 }, { "epoch": 0.13159618355018107, "grad_norm": 0.10642040520906448, "learning_rate": 8.71927467833268e-05, "loss": 1.7016, "step": 66688 }, { "epoch": 0.1316593295115775, "grad_norm": 0.10963353514671326, "learning_rate": 8.71864065589621e-05, "loss": 1.6879, "step": 66720 }, { "epoch": 0.13172247547297394, "grad_norm": 0.10779455304145813, "learning_rate": 8.718006633459742e-05, "loss": 1.7122, "step": 66752 }, { "epoch": 0.1317856214343704, "grad_norm": 0.11267006397247314, "learning_rate": 8.717372611023273e-05, "loss": 1.7023, "step": 66784 }, { "epoch": 0.1318487673957668, "grad_norm": 0.10699082911014557, "learning_rate": 8.716738588586803e-05, "loss": 1.695, "step": 66816 }, { "epoch": 0.13191191335716326, "grad_norm": 0.10018612444400787, "learning_rate": 8.716104566150335e-05, "loss": 1.6949, "step": 66848 }, { "epoch": 0.1319750593185597, "grad_norm": 0.10509435087442398, "learning_rate": 8.715470543713866e-05, "loss": 1.7061, "step": 66880 }, { "epoch": 0.13203820527995613, "grad_norm": 0.09271152317523956, "learning_rate": 8.714836521277398e-05, "loss": 1.7038, "step": 66912 }, { "epoch": 0.13210135124135258, "grad_norm": 0.1133396327495575, "learning_rate": 8.714202498840928e-05, "loss": 1.6939, "step": 66944 }, { "epoch": 0.13216449720274903, "grad_norm": 0.0986180230975151, "learning_rate": 8.713568476404459e-05, "loss": 1.6971, "step": 66976 }, { "epoch": 0.13222764316414545, "grad_norm": 0.10401751846075058, "learning_rate": 8.71293445396799e-05, "loss": 1.6987, "step": 67008 }, { "epoch": 0.1322907891255419, "grad_norm": 0.10454605519771576, "learning_rate": 8.712300431531522e-05, "loss": 1.705, "step": 67040 }, { "epoch": 0.13235393508693835, "grad_norm": 0.09792706370353699, "learning_rate": 8.711666409095052e-05, "loss": 1.7028, "step": 67072 }, { "epoch": 0.13241708104833477, "grad_norm": 0.09791544824838638, "learning_rate": 8.711032386658584e-05, "loss": 1.6958, "step": 67104 }, { "epoch": 0.13248022700973122, "grad_norm": 0.1010831817984581, "learning_rate": 8.710398364222114e-05, "loss": 1.7015, "step": 67136 }, { "epoch": 0.13254337297112767, "grad_norm": 0.096627376973629, "learning_rate": 8.709764341785645e-05, "loss": 1.6955, "step": 67168 }, { "epoch": 0.1326065189325241, "grad_norm": 0.10321414470672607, "learning_rate": 8.709130319349177e-05, "loss": 1.7067, "step": 67200 }, { "epoch": 0.13266966489392054, "grad_norm": 0.09757894277572632, "learning_rate": 8.708496296912707e-05, "loss": 1.6998, "step": 67232 }, { "epoch": 0.132732810855317, "grad_norm": 0.10374078154563904, "learning_rate": 8.707862274476238e-05, "loss": 1.7074, "step": 67264 }, { "epoch": 0.1327959568167134, "grad_norm": 0.11785580217838287, "learning_rate": 8.70722825203977e-05, "loss": 1.7011, "step": 67296 }, { "epoch": 0.13285910277810986, "grad_norm": 0.09936001896858215, "learning_rate": 8.706594229603301e-05, "loss": 1.6901, "step": 67328 }, { "epoch": 0.1329222487395063, "grad_norm": 0.09808773547410965, "learning_rate": 8.705960207166831e-05, "loss": 1.7006, "step": 67360 }, { "epoch": 0.13298539470090273, "grad_norm": 0.10902006924152374, "learning_rate": 8.705326184730363e-05, "loss": 1.7021, "step": 67392 }, { "epoch": 0.13304854066229918, "grad_norm": 0.10547816008329391, "learning_rate": 8.704692162293894e-05, "loss": 1.6901, "step": 67424 }, { "epoch": 0.13311168662369563, "grad_norm": 0.09985819458961487, "learning_rate": 8.704058139857426e-05, "loss": 1.6839, "step": 67456 }, { "epoch": 0.13317483258509205, "grad_norm": 0.10067432373762131, "learning_rate": 8.703424117420956e-05, "loss": 1.6984, "step": 67488 }, { "epoch": 0.1332379785464885, "grad_norm": 0.10011441260576248, "learning_rate": 8.702790094984486e-05, "loss": 1.6826, "step": 67520 }, { "epoch": 0.13330112450788495, "grad_norm": 0.09623958170413971, "learning_rate": 8.702156072548017e-05, "loss": 1.7043, "step": 67552 }, { "epoch": 0.13336427046928137, "grad_norm": 0.10766778886318207, "learning_rate": 8.701522050111549e-05, "loss": 1.694, "step": 67584 }, { "epoch": 0.13342741643067782, "grad_norm": 0.10048247873783112, "learning_rate": 8.700888027675079e-05, "loss": 1.6976, "step": 67616 }, { "epoch": 0.13349056239207427, "grad_norm": 0.1025388091802597, "learning_rate": 8.70025400523861e-05, "loss": 1.7094, "step": 67648 }, { "epoch": 0.1335537083534707, "grad_norm": 0.0898028239607811, "learning_rate": 8.699619982802142e-05, "loss": 1.7003, "step": 67680 }, { "epoch": 0.13361685431486714, "grad_norm": 0.10830457508563995, "learning_rate": 8.698985960365673e-05, "loss": 1.6987, "step": 67712 }, { "epoch": 0.1336800002762636, "grad_norm": 0.0960337445139885, "learning_rate": 8.698351937929203e-05, "loss": 1.6911, "step": 67744 }, { "epoch": 0.13374314623766, "grad_norm": 0.10730069130659103, "learning_rate": 8.697717915492735e-05, "loss": 1.6852, "step": 67776 }, { "epoch": 0.13380629219905646, "grad_norm": 0.10252933204174042, "learning_rate": 8.697083893056266e-05, "loss": 1.7069, "step": 67808 }, { "epoch": 0.1338694381604529, "grad_norm": 0.09704408049583435, "learning_rate": 8.696449870619798e-05, "loss": 1.6924, "step": 67840 }, { "epoch": 0.13393258412184933, "grad_norm": 0.09831680357456207, "learning_rate": 8.695815848183329e-05, "loss": 1.7129, "step": 67872 }, { "epoch": 0.13399573008324578, "grad_norm": 0.09816844016313553, "learning_rate": 8.695181825746859e-05, "loss": 1.6999, "step": 67904 }, { "epoch": 0.13405887604464223, "grad_norm": 0.09870949387550354, "learning_rate": 8.69454780331039e-05, "loss": 1.6901, "step": 67936 }, { "epoch": 0.13412202200603865, "grad_norm": 0.0937604159116745, "learning_rate": 8.693913780873921e-05, "loss": 1.6785, "step": 67968 }, { "epoch": 0.1341851679674351, "grad_norm": 0.10146182775497437, "learning_rate": 8.693279758437452e-05, "loss": 1.6888, "step": 68000 }, { "epoch": 0.13424831392883155, "grad_norm": 0.09773188829421997, "learning_rate": 8.692645736000982e-05, "loss": 1.693, "step": 68032 }, { "epoch": 0.13431145989022797, "grad_norm": 0.10485115647315979, "learning_rate": 8.692011713564514e-05, "loss": 1.705, "step": 68064 }, { "epoch": 0.13437460585162442, "grad_norm": 0.09446601569652557, "learning_rate": 8.691377691128045e-05, "loss": 1.6872, "step": 68096 }, { "epoch": 0.13443775181302087, "grad_norm": 0.09718290716409683, "learning_rate": 8.690743668691577e-05, "loss": 1.7068, "step": 68128 }, { "epoch": 0.1345008977744173, "grad_norm": 0.10884904116392136, "learning_rate": 8.690109646255107e-05, "loss": 1.6895, "step": 68160 }, { "epoch": 0.13456404373581374, "grad_norm": 0.10205969959497452, "learning_rate": 8.689475623818638e-05, "loss": 1.6945, "step": 68192 }, { "epoch": 0.1346271896972102, "grad_norm": 0.09928876906633377, "learning_rate": 8.68884160138217e-05, "loss": 1.6973, "step": 68224 }, { "epoch": 0.1346903356586066, "grad_norm": 0.09771773219108582, "learning_rate": 8.688207578945701e-05, "loss": 1.6968, "step": 68256 }, { "epoch": 0.13475348162000306, "grad_norm": 0.10454849153757095, "learning_rate": 8.687573556509231e-05, "loss": 1.6856, "step": 68288 }, { "epoch": 0.1348166275813995, "grad_norm": 0.10128848254680634, "learning_rate": 8.686939534072763e-05, "loss": 1.6928, "step": 68320 }, { "epoch": 0.13487977354279596, "grad_norm": 0.10038738697767258, "learning_rate": 8.686305511636293e-05, "loss": 1.693, "step": 68352 }, { "epoch": 0.13494291950419238, "grad_norm": 0.09970016032457352, "learning_rate": 8.685671489199824e-05, "loss": 1.68, "step": 68384 }, { "epoch": 0.13500606546558883, "grad_norm": 0.10426627844572067, "learning_rate": 8.685037466763356e-05, "loss": 1.6847, "step": 68416 }, { "epoch": 0.13506921142698528, "grad_norm": 0.09571852535009384, "learning_rate": 8.684403444326886e-05, "loss": 1.6855, "step": 68448 }, { "epoch": 0.1351323573883817, "grad_norm": 0.10176054388284683, "learning_rate": 8.683769421890417e-05, "loss": 1.6867, "step": 68480 }, { "epoch": 0.13519550334977815, "grad_norm": 0.09997376054525375, "learning_rate": 8.683135399453949e-05, "loss": 1.6961, "step": 68512 }, { "epoch": 0.1352586493111746, "grad_norm": 0.09780901670455933, "learning_rate": 8.68250137701748e-05, "loss": 1.6979, "step": 68544 }, { "epoch": 0.13532179527257102, "grad_norm": 0.11168446391820908, "learning_rate": 8.68186735458101e-05, "loss": 1.7007, "step": 68576 }, { "epoch": 0.13538494123396747, "grad_norm": 0.10168793052434921, "learning_rate": 8.681233332144542e-05, "loss": 1.6982, "step": 68608 }, { "epoch": 0.13544808719536391, "grad_norm": 0.10820365697145462, "learning_rate": 8.680599309708073e-05, "loss": 1.695, "step": 68640 }, { "epoch": 0.13551123315676034, "grad_norm": 0.10211111605167389, "learning_rate": 8.679965287271605e-05, "loss": 1.6922, "step": 68672 }, { "epoch": 0.13557437911815678, "grad_norm": 0.10094490647315979, "learning_rate": 8.679331264835135e-05, "loss": 1.6898, "step": 68704 }, { "epoch": 0.13563752507955323, "grad_norm": 0.100893534719944, "learning_rate": 8.678697242398666e-05, "loss": 1.7035, "step": 68736 }, { "epoch": 0.13570067104094966, "grad_norm": 0.10502218455076218, "learning_rate": 8.678063219962196e-05, "loss": 1.7052, "step": 68768 }, { "epoch": 0.1357638170023461, "grad_norm": 0.10092008858919144, "learning_rate": 8.677429197525728e-05, "loss": 1.7052, "step": 68800 }, { "epoch": 0.13582696296374255, "grad_norm": 0.09909343719482422, "learning_rate": 8.676795175089258e-05, "loss": 1.696, "step": 68832 }, { "epoch": 0.13589010892513897, "grad_norm": 0.09472015500068665, "learning_rate": 8.67616115265279e-05, "loss": 1.7003, "step": 68864 }, { "epoch": 0.13595325488653542, "grad_norm": 0.09557487815618515, "learning_rate": 8.675527130216321e-05, "loss": 1.687, "step": 68896 }, { "epoch": 0.13601640084793187, "grad_norm": 0.10137154161930084, "learning_rate": 8.674893107779852e-05, "loss": 1.6939, "step": 68928 }, { "epoch": 0.1360795468093283, "grad_norm": 0.12496348470449448, "learning_rate": 8.674259085343382e-05, "loss": 1.6881, "step": 68960 }, { "epoch": 0.13614269277072474, "grad_norm": 0.10233995318412781, "learning_rate": 8.673625062906914e-05, "loss": 1.7001, "step": 68992 }, { "epoch": 0.1362058387321212, "grad_norm": 0.09763490408658981, "learning_rate": 8.672991040470445e-05, "loss": 1.6847, "step": 69024 }, { "epoch": 0.13626898469351761, "grad_norm": 0.0937042310833931, "learning_rate": 8.672357018033977e-05, "loss": 1.6921, "step": 69056 }, { "epoch": 0.13633213065491406, "grad_norm": 0.10340123623609543, "learning_rate": 8.671722995597508e-05, "loss": 1.6991, "step": 69088 }, { "epoch": 0.1363952766163105, "grad_norm": 0.09613881260156631, "learning_rate": 8.671088973161038e-05, "loss": 1.697, "step": 69120 }, { "epoch": 0.13645842257770693, "grad_norm": 0.10652626305818558, "learning_rate": 8.67045495072457e-05, "loss": 1.6987, "step": 69152 }, { "epoch": 0.13652156853910338, "grad_norm": 0.1023712083697319, "learning_rate": 8.6698209282881e-05, "loss": 1.6832, "step": 69184 }, { "epoch": 0.13658471450049983, "grad_norm": 0.09921175241470337, "learning_rate": 8.669186905851631e-05, "loss": 1.6974, "step": 69216 }, { "epoch": 0.13664786046189625, "grad_norm": 0.09992415457963943, "learning_rate": 8.668552883415161e-05, "loss": 1.6914, "step": 69248 }, { "epoch": 0.1367110064232927, "grad_norm": 0.09259074181318283, "learning_rate": 8.667918860978693e-05, "loss": 1.6866, "step": 69280 }, { "epoch": 0.13677415238468915, "grad_norm": 0.10414275527000427, "learning_rate": 8.667284838542224e-05, "loss": 1.716, "step": 69312 }, { "epoch": 0.13683729834608557, "grad_norm": 0.10414236783981323, "learning_rate": 8.666650816105756e-05, "loss": 1.6923, "step": 69344 }, { "epoch": 0.13690044430748202, "grad_norm": 0.10356683284044266, "learning_rate": 8.666016793669286e-05, "loss": 1.6898, "step": 69376 }, { "epoch": 0.13696359026887847, "grad_norm": 0.09934966266155243, "learning_rate": 8.665382771232817e-05, "loss": 1.6875, "step": 69408 }, { "epoch": 0.1370267362302749, "grad_norm": 0.1002814769744873, "learning_rate": 8.664748748796349e-05, "loss": 1.6931, "step": 69440 }, { "epoch": 0.13708988219167134, "grad_norm": 0.10130573064088821, "learning_rate": 8.66411472635988e-05, "loss": 1.7054, "step": 69472 }, { "epoch": 0.1371530281530678, "grad_norm": 0.1078111007809639, "learning_rate": 8.66348070392341e-05, "loss": 1.6925, "step": 69504 }, { "epoch": 0.1372161741144642, "grad_norm": 0.10099471360445023, "learning_rate": 8.662846681486942e-05, "loss": 1.6778, "step": 69536 }, { "epoch": 0.13727932007586066, "grad_norm": 0.09544076025485992, "learning_rate": 8.662212659050473e-05, "loss": 1.6901, "step": 69568 }, { "epoch": 0.1373424660372571, "grad_norm": 0.09985319525003433, "learning_rate": 8.661578636614003e-05, "loss": 1.6984, "step": 69600 }, { "epoch": 0.13740561199865353, "grad_norm": 0.09363614767789841, "learning_rate": 8.660944614177533e-05, "loss": 1.6837, "step": 69632 }, { "epoch": 0.13746875796004998, "grad_norm": 0.10166219621896744, "learning_rate": 8.660310591741065e-05, "loss": 1.6917, "step": 69664 }, { "epoch": 0.13753190392144643, "grad_norm": 0.10159972310066223, "learning_rate": 8.659676569304596e-05, "loss": 1.7055, "step": 69696 }, { "epoch": 0.13759504988284285, "grad_norm": 0.10600333660840988, "learning_rate": 8.659042546868128e-05, "loss": 1.6968, "step": 69728 }, { "epoch": 0.1376581958442393, "grad_norm": 0.10244552791118622, "learning_rate": 8.658408524431659e-05, "loss": 1.6881, "step": 69760 }, { "epoch": 0.13772134180563575, "grad_norm": 0.10649577528238297, "learning_rate": 8.65777450199519e-05, "loss": 1.6974, "step": 69792 }, { "epoch": 0.13778448776703217, "grad_norm": 0.10574222356081009, "learning_rate": 8.657140479558721e-05, "loss": 1.6966, "step": 69824 }, { "epoch": 0.13784763372842862, "grad_norm": 0.094143807888031, "learning_rate": 8.656506457122252e-05, "loss": 1.6726, "step": 69856 }, { "epoch": 0.13791077968982507, "grad_norm": 0.09454295784235, "learning_rate": 8.655872434685784e-05, "loss": 1.6967, "step": 69888 }, { "epoch": 0.1379739256512215, "grad_norm": 0.10063689202070236, "learning_rate": 8.655238412249314e-05, "loss": 1.6965, "step": 69920 }, { "epoch": 0.13803707161261794, "grad_norm": 0.10030588507652283, "learning_rate": 8.654604389812845e-05, "loss": 1.7071, "step": 69952 }, { "epoch": 0.1381002175740144, "grad_norm": 0.09860387444496155, "learning_rate": 8.653970367376375e-05, "loss": 1.6798, "step": 69984 }, { "epoch": 0.13816336353541084, "grad_norm": 0.09735734760761261, "learning_rate": 8.653336344939907e-05, "loss": 1.6821, "step": 70016 }, { "epoch": 0.13822650949680726, "grad_norm": 0.09824249148368835, "learning_rate": 8.652702322503437e-05, "loss": 1.6951, "step": 70048 }, { "epoch": 0.1382896554582037, "grad_norm": 0.09516239166259766, "learning_rate": 8.652068300066968e-05, "loss": 1.6866, "step": 70080 }, { "epoch": 0.13835280141960016, "grad_norm": 0.10811588168144226, "learning_rate": 8.6514342776305e-05, "loss": 1.7058, "step": 70112 }, { "epoch": 0.13841594738099658, "grad_norm": 0.09876111894845963, "learning_rate": 8.650800255194031e-05, "loss": 1.697, "step": 70144 }, { "epoch": 0.13847909334239303, "grad_norm": 0.09289198368787766, "learning_rate": 8.650166232757561e-05, "loss": 1.7014, "step": 70176 }, { "epoch": 0.13854223930378948, "grad_norm": 0.1037587970495224, "learning_rate": 8.649532210321093e-05, "loss": 1.6868, "step": 70208 }, { "epoch": 0.1386053852651859, "grad_norm": 0.09075438231229782, "learning_rate": 8.648898187884624e-05, "loss": 1.6971, "step": 70240 }, { "epoch": 0.13866853122658235, "grad_norm": 0.09826972335577011, "learning_rate": 8.648264165448156e-05, "loss": 1.696, "step": 70272 }, { "epoch": 0.1387316771879788, "grad_norm": 0.10796775668859482, "learning_rate": 8.647630143011686e-05, "loss": 1.691, "step": 70304 }, { "epoch": 0.13879482314937522, "grad_norm": 0.10104010999202728, "learning_rate": 8.646996120575217e-05, "loss": 1.704, "step": 70336 }, { "epoch": 0.13885796911077167, "grad_norm": 0.1002320870757103, "learning_rate": 8.646362098138749e-05, "loss": 1.7012, "step": 70368 }, { "epoch": 0.13892111507216812, "grad_norm": 0.10547274351119995, "learning_rate": 8.645728075702279e-05, "loss": 1.6937, "step": 70400 }, { "epoch": 0.13898426103356454, "grad_norm": 0.11008907854557037, "learning_rate": 8.64509405326581e-05, "loss": 1.7009, "step": 70432 }, { "epoch": 0.139047406994961, "grad_norm": 0.10129993408918381, "learning_rate": 8.64446003082934e-05, "loss": 1.7099, "step": 70464 }, { "epoch": 0.13911055295635744, "grad_norm": 0.09014508128166199, "learning_rate": 8.643826008392872e-05, "loss": 1.6737, "step": 70496 }, { "epoch": 0.13917369891775386, "grad_norm": 0.11317227780818939, "learning_rate": 8.643191985956403e-05, "loss": 1.6905, "step": 70528 }, { "epoch": 0.1392368448791503, "grad_norm": 0.10088500380516052, "learning_rate": 8.642557963519935e-05, "loss": 1.6769, "step": 70560 }, { "epoch": 0.13929999084054676, "grad_norm": 0.1000090166926384, "learning_rate": 8.641923941083465e-05, "loss": 1.6932, "step": 70592 }, { "epoch": 0.13936313680194318, "grad_norm": 0.09973660111427307, "learning_rate": 8.641289918646996e-05, "loss": 1.6857, "step": 70624 }, { "epoch": 0.13942628276333963, "grad_norm": 0.10063603520393372, "learning_rate": 8.640655896210528e-05, "loss": 1.6998, "step": 70656 }, { "epoch": 0.13948942872473608, "grad_norm": 0.10236351191997528, "learning_rate": 8.64002187377406e-05, "loss": 1.6902, "step": 70688 }, { "epoch": 0.1395525746861325, "grad_norm": 0.10239796340465546, "learning_rate": 8.63938785133759e-05, "loss": 1.6699, "step": 70720 }, { "epoch": 0.13961572064752895, "grad_norm": 0.10240877419710159, "learning_rate": 8.638753828901121e-05, "loss": 1.7011, "step": 70752 }, { "epoch": 0.1396788666089254, "grad_norm": 0.09900277853012085, "learning_rate": 8.638119806464652e-05, "loss": 1.6929, "step": 70784 }, { "epoch": 0.13974201257032182, "grad_norm": 0.0964280515909195, "learning_rate": 8.637485784028182e-05, "loss": 1.6951, "step": 70816 }, { "epoch": 0.13980515853171827, "grad_norm": 0.09807398170232773, "learning_rate": 8.636851761591714e-05, "loss": 1.6998, "step": 70848 }, { "epoch": 0.13986830449311471, "grad_norm": 0.10209651291370392, "learning_rate": 8.636217739155244e-05, "loss": 1.6926, "step": 70880 }, { "epoch": 0.13993145045451114, "grad_norm": 0.10472094267606735, "learning_rate": 8.635583716718775e-05, "loss": 1.6878, "step": 70912 }, { "epoch": 0.13999459641590759, "grad_norm": 0.09496672451496124, "learning_rate": 8.634949694282307e-05, "loss": 1.6821, "step": 70944 }, { "epoch": 0.14005774237730403, "grad_norm": 0.09268078207969666, "learning_rate": 8.634315671845837e-05, "loss": 1.6884, "step": 70976 }, { "epoch": 0.14012088833870046, "grad_norm": 0.13251662254333496, "learning_rate": 8.633681649409368e-05, "loss": 1.6901, "step": 71008 }, { "epoch": 0.1401840343000969, "grad_norm": 0.10127288103103638, "learning_rate": 8.6330476269729e-05, "loss": 1.6996, "step": 71040 }, { "epoch": 0.14024718026149335, "grad_norm": 0.11469605565071106, "learning_rate": 8.632413604536431e-05, "loss": 1.697, "step": 71072 }, { "epoch": 0.14031032622288978, "grad_norm": 0.10494356602430344, "learning_rate": 8.631779582099963e-05, "loss": 1.6868, "step": 71104 }, { "epoch": 0.14037347218428622, "grad_norm": 0.10769851505756378, "learning_rate": 8.631145559663493e-05, "loss": 1.6928, "step": 71136 }, { "epoch": 0.14043661814568267, "grad_norm": 0.09526403248310089, "learning_rate": 8.630511537227024e-05, "loss": 1.704, "step": 71168 }, { "epoch": 0.1404997641070791, "grad_norm": 0.10641741752624512, "learning_rate": 8.629877514790556e-05, "loss": 1.6911, "step": 71200 }, { "epoch": 0.14056291006847554, "grad_norm": 0.10984072834253311, "learning_rate": 8.629243492354086e-05, "loss": 1.6918, "step": 71232 }, { "epoch": 0.140626056029872, "grad_norm": 0.09944106638431549, "learning_rate": 8.628609469917617e-05, "loss": 1.6914, "step": 71264 }, { "epoch": 0.14068920199126841, "grad_norm": 0.10753072053194046, "learning_rate": 8.627975447481148e-05, "loss": 1.6877, "step": 71296 }, { "epoch": 0.14075234795266486, "grad_norm": 0.09593670070171356, "learning_rate": 8.627341425044679e-05, "loss": 1.6755, "step": 71328 }, { "epoch": 0.1408154939140613, "grad_norm": 0.10216441750526428, "learning_rate": 8.62670740260821e-05, "loss": 1.6989, "step": 71360 }, { "epoch": 0.14087863987545773, "grad_norm": 0.09993880987167358, "learning_rate": 8.62607338017174e-05, "loss": 1.6991, "step": 71392 }, { "epoch": 0.14094178583685418, "grad_norm": 0.10341391712427139, "learning_rate": 8.625439357735272e-05, "loss": 1.6955, "step": 71424 }, { "epoch": 0.14100493179825063, "grad_norm": 0.10363147407770157, "learning_rate": 8.624805335298803e-05, "loss": 1.6899, "step": 71456 }, { "epoch": 0.14106807775964705, "grad_norm": 0.09979966282844543, "learning_rate": 8.624171312862335e-05, "loss": 1.6814, "step": 71488 }, { "epoch": 0.1411312237210435, "grad_norm": 0.10351796448230743, "learning_rate": 8.623537290425865e-05, "loss": 1.6875, "step": 71520 }, { "epoch": 0.14119436968243995, "grad_norm": 0.11131934076547623, "learning_rate": 8.622903267989396e-05, "loss": 1.695, "step": 71552 }, { "epoch": 0.14125751564383637, "grad_norm": 0.11410072445869446, "learning_rate": 8.622269245552928e-05, "loss": 1.681, "step": 71584 }, { "epoch": 0.14132066160523282, "grad_norm": 0.10665030032396317, "learning_rate": 8.62163522311646e-05, "loss": 1.6993, "step": 71616 }, { "epoch": 0.14138380756662927, "grad_norm": 0.10028386116027832, "learning_rate": 8.62100120067999e-05, "loss": 1.6771, "step": 71648 }, { "epoch": 0.14144695352802572, "grad_norm": 0.10181780159473419, "learning_rate": 8.62036717824352e-05, "loss": 1.7073, "step": 71680 }, { "epoch": 0.14151009948942214, "grad_norm": 0.09605645388364792, "learning_rate": 8.619733155807051e-05, "loss": 1.6922, "step": 71712 }, { "epoch": 0.1415732454508186, "grad_norm": 0.09963902831077576, "learning_rate": 8.619099133370583e-05, "loss": 1.6964, "step": 71744 }, { "epoch": 0.14163639141221504, "grad_norm": 0.10210099816322327, "learning_rate": 8.618465110934114e-05, "loss": 1.7024, "step": 71776 }, { "epoch": 0.14169953737361146, "grad_norm": 0.10228384286165237, "learning_rate": 8.617831088497644e-05, "loss": 1.6848, "step": 71808 }, { "epoch": 0.1417626833350079, "grad_norm": 0.11028777807950974, "learning_rate": 8.617197066061176e-05, "loss": 1.6844, "step": 71840 }, { "epoch": 0.14182582929640436, "grad_norm": 0.09842659533023834, "learning_rate": 8.616563043624707e-05, "loss": 1.6947, "step": 71872 }, { "epoch": 0.14188897525780078, "grad_norm": 0.09887228161096573, "learning_rate": 8.615929021188238e-05, "loss": 1.679, "step": 71904 }, { "epoch": 0.14195212121919723, "grad_norm": 0.10034212470054626, "learning_rate": 8.615294998751769e-05, "loss": 1.6803, "step": 71936 }, { "epoch": 0.14201526718059368, "grad_norm": 0.10476851463317871, "learning_rate": 8.6146609763153e-05, "loss": 1.6906, "step": 71968 }, { "epoch": 0.1420784131419901, "grad_norm": 0.10309062153100967, "learning_rate": 8.614026953878831e-05, "loss": 1.6927, "step": 72000 }, { "epoch": 0.14214155910338655, "grad_norm": 0.10585368424654007, "learning_rate": 8.613392931442363e-05, "loss": 1.7076, "step": 72032 }, { "epoch": 0.142204705064783, "grad_norm": 0.10662491619586945, "learning_rate": 8.612758909005893e-05, "loss": 1.6831, "step": 72064 }, { "epoch": 0.14226785102617942, "grad_norm": 0.10366757959127426, "learning_rate": 8.612124886569423e-05, "loss": 1.6861, "step": 72096 }, { "epoch": 0.14233099698757587, "grad_norm": 0.09764306992292404, "learning_rate": 8.611490864132955e-05, "loss": 1.6837, "step": 72128 }, { "epoch": 0.14239414294897232, "grad_norm": 0.10423771291971207, "learning_rate": 8.610856841696486e-05, "loss": 1.6838, "step": 72160 }, { "epoch": 0.14245728891036874, "grad_norm": 0.10319622606039047, "learning_rate": 8.610222819260016e-05, "loss": 1.6836, "step": 72192 }, { "epoch": 0.1425204348717652, "grad_norm": 0.09485956281423569, "learning_rate": 8.609588796823548e-05, "loss": 1.6806, "step": 72224 }, { "epoch": 0.14258358083316164, "grad_norm": 0.09834043681621552, "learning_rate": 8.608954774387079e-05, "loss": 1.687, "step": 72256 }, { "epoch": 0.14264672679455806, "grad_norm": 0.10211475193500519, "learning_rate": 8.60832075195061e-05, "loss": 1.6771, "step": 72288 }, { "epoch": 0.1427098727559545, "grad_norm": 0.10114586353302002, "learning_rate": 8.60768672951414e-05, "loss": 1.6837, "step": 72320 }, { "epoch": 0.14277301871735096, "grad_norm": 0.09665479511022568, "learning_rate": 8.607052707077672e-05, "loss": 1.67, "step": 72352 }, { "epoch": 0.14283616467874738, "grad_norm": 0.10244018584489822, "learning_rate": 8.606418684641203e-05, "loss": 1.6902, "step": 72384 }, { "epoch": 0.14289931064014383, "grad_norm": 0.10259303450584412, "learning_rate": 8.605784662204735e-05, "loss": 1.6757, "step": 72416 }, { "epoch": 0.14296245660154028, "grad_norm": 0.09692277014255524, "learning_rate": 8.605150639768266e-05, "loss": 1.6925, "step": 72448 }, { "epoch": 0.1430256025629367, "grad_norm": 0.10549569129943848, "learning_rate": 8.604516617331797e-05, "loss": 1.6673, "step": 72480 }, { "epoch": 0.14308874852433315, "grad_norm": 0.09814869612455368, "learning_rate": 8.603882594895327e-05, "loss": 1.6909, "step": 72512 }, { "epoch": 0.1431518944857296, "grad_norm": 0.10366464406251907, "learning_rate": 8.603248572458858e-05, "loss": 1.6755, "step": 72544 }, { "epoch": 0.14321504044712602, "grad_norm": 0.10869229584932327, "learning_rate": 8.60261455002239e-05, "loss": 1.696, "step": 72576 }, { "epoch": 0.14327818640852247, "grad_norm": 0.10145995020866394, "learning_rate": 8.60198052758592e-05, "loss": 1.6836, "step": 72608 }, { "epoch": 0.14334133236991892, "grad_norm": 0.10098624974489212, "learning_rate": 8.601346505149451e-05, "loss": 1.694, "step": 72640 }, { "epoch": 0.14340447833131534, "grad_norm": 0.09813161194324493, "learning_rate": 8.600712482712983e-05, "loss": 1.7053, "step": 72672 }, { "epoch": 0.1434676242927118, "grad_norm": 0.09813837707042694, "learning_rate": 8.600078460276514e-05, "loss": 1.6982, "step": 72704 }, { "epoch": 0.14353077025410824, "grad_norm": 0.1082853302359581, "learning_rate": 8.599444437840044e-05, "loss": 1.6991, "step": 72736 }, { "epoch": 0.14359391621550466, "grad_norm": 0.1084531843662262, "learning_rate": 8.598810415403576e-05, "loss": 1.69, "step": 72768 }, { "epoch": 0.1436570621769011, "grad_norm": 0.11164296418428421, "learning_rate": 8.598176392967107e-05, "loss": 1.6979, "step": 72800 }, { "epoch": 0.14372020813829756, "grad_norm": 0.1038421243429184, "learning_rate": 8.597542370530638e-05, "loss": 1.6716, "step": 72832 }, { "epoch": 0.14378335409969398, "grad_norm": 0.10914447903633118, "learning_rate": 8.596908348094169e-05, "loss": 1.67, "step": 72864 }, { "epoch": 0.14384650006109043, "grad_norm": 0.09301458299160004, "learning_rate": 8.5962743256577e-05, "loss": 1.6815, "step": 72896 }, { "epoch": 0.14390964602248688, "grad_norm": 0.10775292664766312, "learning_rate": 8.59564030322123e-05, "loss": 1.6871, "step": 72928 }, { "epoch": 0.1439727919838833, "grad_norm": 0.10177384316921234, "learning_rate": 8.595006280784762e-05, "loss": 1.6841, "step": 72960 }, { "epoch": 0.14403593794527975, "grad_norm": 0.09420222043991089, "learning_rate": 8.594372258348292e-05, "loss": 1.6732, "step": 72992 }, { "epoch": 0.1440990839066762, "grad_norm": 0.1003427729010582, "learning_rate": 8.593738235911823e-05, "loss": 1.6898, "step": 73024 }, { "epoch": 0.14416222986807262, "grad_norm": 0.09997972846031189, "learning_rate": 8.593104213475355e-05, "loss": 1.6964, "step": 73056 }, { "epoch": 0.14422537582946907, "grad_norm": 0.1105961725115776, "learning_rate": 8.592470191038886e-05, "loss": 1.6845, "step": 73088 }, { "epoch": 0.14428852179086551, "grad_norm": 0.09686896204948425, "learning_rate": 8.591836168602418e-05, "loss": 1.6908, "step": 73120 }, { "epoch": 0.14435166775226194, "grad_norm": 0.09825839102268219, "learning_rate": 8.591202146165948e-05, "loss": 1.6752, "step": 73152 }, { "epoch": 0.14441481371365839, "grad_norm": 0.09540785104036331, "learning_rate": 8.590568123729479e-05, "loss": 1.6784, "step": 73184 }, { "epoch": 0.14447795967505483, "grad_norm": 0.1011510118842125, "learning_rate": 8.58993410129301e-05, "loss": 1.6986, "step": 73216 }, { "epoch": 0.14454110563645126, "grad_norm": 0.09987585246562958, "learning_rate": 8.589300078856542e-05, "loss": 1.6775, "step": 73248 }, { "epoch": 0.1446042515978477, "grad_norm": 0.09723809361457825, "learning_rate": 8.588666056420072e-05, "loss": 1.6746, "step": 73280 }, { "epoch": 0.14466739755924415, "grad_norm": 0.09988243877887726, "learning_rate": 8.588032033983604e-05, "loss": 1.6779, "step": 73312 }, { "epoch": 0.1447305435206406, "grad_norm": 0.10036462545394897, "learning_rate": 8.587398011547134e-05, "loss": 1.6814, "step": 73344 }, { "epoch": 0.14479368948203702, "grad_norm": 0.10327211767435074, "learning_rate": 8.586763989110665e-05, "loss": 1.6775, "step": 73376 }, { "epoch": 0.14485683544343347, "grad_norm": 0.09883878380060196, "learning_rate": 8.586129966674195e-05, "loss": 1.6837, "step": 73408 }, { "epoch": 0.14491998140482992, "grad_norm": 0.11086346954107285, "learning_rate": 8.585495944237727e-05, "loss": 1.6891, "step": 73440 }, { "epoch": 0.14498312736622634, "grad_norm": 0.09583024680614471, "learning_rate": 8.584861921801258e-05, "loss": 1.6845, "step": 73472 }, { "epoch": 0.1450462733276228, "grad_norm": 0.0922607034444809, "learning_rate": 8.58422789936479e-05, "loss": 1.6743, "step": 73504 }, { "epoch": 0.14510941928901924, "grad_norm": 0.10028504580259323, "learning_rate": 8.58359387692832e-05, "loss": 1.6673, "step": 73536 }, { "epoch": 0.14517256525041566, "grad_norm": 0.09664369374513626, "learning_rate": 8.582959854491851e-05, "loss": 1.6664, "step": 73568 }, { "epoch": 0.1452357112118121, "grad_norm": 0.09518463164567947, "learning_rate": 8.582325832055383e-05, "loss": 1.6921, "step": 73600 }, { "epoch": 0.14529885717320856, "grad_norm": 0.10064047574996948, "learning_rate": 8.581691809618914e-05, "loss": 1.6965, "step": 73632 }, { "epoch": 0.14536200313460498, "grad_norm": 0.10300011932849884, "learning_rate": 8.581057787182444e-05, "loss": 1.6768, "step": 73664 }, { "epoch": 0.14542514909600143, "grad_norm": 0.10181638598442078, "learning_rate": 8.580423764745976e-05, "loss": 1.6763, "step": 73696 }, { "epoch": 0.14548829505739788, "grad_norm": 0.10829749703407288, "learning_rate": 8.579789742309507e-05, "loss": 1.6851, "step": 73728 }, { "epoch": 0.1455514410187943, "grad_norm": 0.09572859853506088, "learning_rate": 8.579155719873037e-05, "loss": 1.694, "step": 73760 }, { "epoch": 0.14561458698019075, "grad_norm": 0.10126513987779617, "learning_rate": 8.578521697436569e-05, "loss": 1.6806, "step": 73792 }, { "epoch": 0.1456777329415872, "grad_norm": 0.09824902564287186, "learning_rate": 8.577887675000099e-05, "loss": 1.6818, "step": 73824 }, { "epoch": 0.14574087890298362, "grad_norm": 0.10116986930370331, "learning_rate": 8.57725365256363e-05, "loss": 1.6851, "step": 73856 }, { "epoch": 0.14580402486438007, "grad_norm": 0.10895473510026932, "learning_rate": 8.576619630127162e-05, "loss": 1.6653, "step": 73888 }, { "epoch": 0.14586717082577652, "grad_norm": 0.0989847332239151, "learning_rate": 8.575985607690693e-05, "loss": 1.6867, "step": 73920 }, { "epoch": 0.14593031678717294, "grad_norm": 0.10052268207073212, "learning_rate": 8.575351585254223e-05, "loss": 1.6774, "step": 73952 }, { "epoch": 0.1459934627485694, "grad_norm": 0.10881651192903519, "learning_rate": 8.574717562817755e-05, "loss": 1.6932, "step": 73984 }, { "epoch": 0.14605660870996584, "grad_norm": 0.09766258299350739, "learning_rate": 8.574083540381286e-05, "loss": 1.6763, "step": 74016 }, { "epoch": 0.14611975467136226, "grad_norm": 0.10215530544519424, "learning_rate": 8.573449517944818e-05, "loss": 1.6939, "step": 74048 }, { "epoch": 0.1461829006327587, "grad_norm": 0.09876430034637451, "learning_rate": 8.572815495508348e-05, "loss": 1.6628, "step": 74080 }, { "epoch": 0.14624604659415516, "grad_norm": 0.10266532748937607, "learning_rate": 8.572181473071879e-05, "loss": 1.6937, "step": 74112 }, { "epoch": 0.14630919255555158, "grad_norm": 0.10835704952478409, "learning_rate": 8.571547450635409e-05, "loss": 1.6927, "step": 74144 }, { "epoch": 0.14637233851694803, "grad_norm": 0.09848356992006302, "learning_rate": 8.57091342819894e-05, "loss": 1.6849, "step": 74176 }, { "epoch": 0.14643548447834448, "grad_norm": 0.11198996752500534, "learning_rate": 8.570279405762471e-05, "loss": 1.6871, "step": 74208 }, { "epoch": 0.1464986304397409, "grad_norm": 0.0901528149843216, "learning_rate": 8.569645383326002e-05, "loss": 1.6763, "step": 74240 }, { "epoch": 0.14656177640113735, "grad_norm": 0.10375019907951355, "learning_rate": 8.569011360889534e-05, "loss": 1.6819, "step": 74272 }, { "epoch": 0.1466249223625338, "grad_norm": 0.09366860240697861, "learning_rate": 8.568377338453065e-05, "loss": 1.6828, "step": 74304 }, { "epoch": 0.14668806832393022, "grad_norm": 0.1028008833527565, "learning_rate": 8.567743316016595e-05, "loss": 1.6846, "step": 74336 }, { "epoch": 0.14675121428532667, "grad_norm": 0.09658585488796234, "learning_rate": 8.567109293580127e-05, "loss": 1.6836, "step": 74368 }, { "epoch": 0.14681436024672312, "grad_norm": 0.10165378451347351, "learning_rate": 8.566475271143658e-05, "loss": 1.6827, "step": 74400 }, { "epoch": 0.14687750620811954, "grad_norm": 0.09922339767217636, "learning_rate": 8.56584124870719e-05, "loss": 1.6709, "step": 74432 }, { "epoch": 0.146940652169516, "grad_norm": 0.09646771848201752, "learning_rate": 8.565207226270721e-05, "loss": 1.6728, "step": 74464 }, { "epoch": 0.14700379813091244, "grad_norm": 0.09826529026031494, "learning_rate": 8.564573203834251e-05, "loss": 1.6709, "step": 74496 }, { "epoch": 0.14706694409230886, "grad_norm": 0.09823821485042572, "learning_rate": 8.563939181397783e-05, "loss": 1.6891, "step": 74528 }, { "epoch": 0.1471300900537053, "grad_norm": 0.10077358037233353, "learning_rate": 8.563305158961313e-05, "loss": 1.6731, "step": 74560 }, { "epoch": 0.14719323601510176, "grad_norm": 0.09781774878501892, "learning_rate": 8.562671136524844e-05, "loss": 1.6838, "step": 74592 }, { "epoch": 0.14725638197649818, "grad_norm": 0.1077883169054985, "learning_rate": 8.562037114088374e-05, "loss": 1.6772, "step": 74624 }, { "epoch": 0.14731952793789463, "grad_norm": 0.10241179168224335, "learning_rate": 8.561403091651906e-05, "loss": 1.6659, "step": 74656 }, { "epoch": 0.14738267389929108, "grad_norm": 0.10631418973207474, "learning_rate": 8.560769069215437e-05, "loss": 1.6809, "step": 74688 }, { "epoch": 0.1474458198606875, "grad_norm": 0.09572099149227142, "learning_rate": 8.560135046778969e-05, "loss": 1.6817, "step": 74720 }, { "epoch": 0.14750896582208395, "grad_norm": 0.09712091088294983, "learning_rate": 8.559501024342499e-05, "loss": 1.6787, "step": 74752 }, { "epoch": 0.1475721117834804, "grad_norm": 0.10180314630270004, "learning_rate": 8.55886700190603e-05, "loss": 1.6854, "step": 74784 }, { "epoch": 0.14763525774487682, "grad_norm": 0.10517259687185287, "learning_rate": 8.558232979469562e-05, "loss": 1.6719, "step": 74816 }, { "epoch": 0.14769840370627327, "grad_norm": 0.10625191032886505, "learning_rate": 8.557598957033093e-05, "loss": 1.6805, "step": 74848 }, { "epoch": 0.14776154966766972, "grad_norm": 0.11206270009279251, "learning_rate": 8.556964934596623e-05, "loss": 1.6888, "step": 74880 }, { "epoch": 0.14782469562906614, "grad_norm": 0.09769652783870697, "learning_rate": 8.556330912160155e-05, "loss": 1.6808, "step": 74912 }, { "epoch": 0.1478878415904626, "grad_norm": 0.0974825918674469, "learning_rate": 8.555696889723686e-05, "loss": 1.6759, "step": 74944 }, { "epoch": 0.14795098755185904, "grad_norm": 0.11091966181993484, "learning_rate": 8.555062867287216e-05, "loss": 1.6925, "step": 74976 }, { "epoch": 0.14801413351325546, "grad_norm": 0.09856279194355011, "learning_rate": 8.554428844850748e-05, "loss": 1.6884, "step": 75008 }, { "epoch": 0.1480772794746519, "grad_norm": 0.10277500003576279, "learning_rate": 8.553794822414278e-05, "loss": 1.6786, "step": 75040 }, { "epoch": 0.14814042543604836, "grad_norm": 0.10396499931812286, "learning_rate": 8.553160799977809e-05, "loss": 1.6783, "step": 75072 }, { "epoch": 0.1482035713974448, "grad_norm": 0.10312742739915848, "learning_rate": 8.552526777541341e-05, "loss": 1.6952, "step": 75104 }, { "epoch": 0.14826671735884123, "grad_norm": 0.09461307525634766, "learning_rate": 8.551892755104872e-05, "loss": 1.6655, "step": 75136 }, { "epoch": 0.14832986332023768, "grad_norm": 0.09683814644813538, "learning_rate": 8.551258732668402e-05, "loss": 1.6874, "step": 75168 }, { "epoch": 0.14839300928163413, "grad_norm": 0.09330712258815765, "learning_rate": 8.550624710231934e-05, "loss": 1.69, "step": 75200 }, { "epoch": 0.14845615524303055, "grad_norm": 0.1111738532781601, "learning_rate": 8.549990687795465e-05, "loss": 1.6786, "step": 75232 }, { "epoch": 0.148519301204427, "grad_norm": 0.09655512124300003, "learning_rate": 8.549356665358997e-05, "loss": 1.6778, "step": 75264 }, { "epoch": 0.14858244716582344, "grad_norm": 0.11547809094190598, "learning_rate": 8.548722642922527e-05, "loss": 1.6858, "step": 75296 }, { "epoch": 0.14864559312721987, "grad_norm": 0.0961538702249527, "learning_rate": 8.548088620486058e-05, "loss": 1.6843, "step": 75328 }, { "epoch": 0.14870873908861632, "grad_norm": 0.09542649984359741, "learning_rate": 8.54745459804959e-05, "loss": 1.685, "step": 75360 }, { "epoch": 0.14877188505001276, "grad_norm": 0.10553640127182007, "learning_rate": 8.54682057561312e-05, "loss": 1.6773, "step": 75392 }, { "epoch": 0.14883503101140919, "grad_norm": 0.09616170078516006, "learning_rate": 8.546186553176651e-05, "loss": 1.6752, "step": 75424 }, { "epoch": 0.14889817697280563, "grad_norm": 0.09604093432426453, "learning_rate": 8.545552530740181e-05, "loss": 1.683, "step": 75456 }, { "epoch": 0.14896132293420208, "grad_norm": 0.09946244955062866, "learning_rate": 8.544918508303713e-05, "loss": 1.6827, "step": 75488 }, { "epoch": 0.1490244688955985, "grad_norm": 0.1011558547616005, "learning_rate": 8.544284485867244e-05, "loss": 1.6661, "step": 75520 }, { "epoch": 0.14908761485699495, "grad_norm": 0.09891995042562485, "learning_rate": 8.543650463430774e-05, "loss": 1.6751, "step": 75552 }, { "epoch": 0.1491507608183914, "grad_norm": 0.10548239946365356, "learning_rate": 8.543016440994306e-05, "loss": 1.6824, "step": 75584 }, { "epoch": 0.14921390677978782, "grad_norm": 0.10210949182510376, "learning_rate": 8.542382418557837e-05, "loss": 1.6775, "step": 75616 }, { "epoch": 0.14927705274118427, "grad_norm": 0.0944737046957016, "learning_rate": 8.541748396121369e-05, "loss": 1.6696, "step": 75648 }, { "epoch": 0.14934019870258072, "grad_norm": 0.10408736765384674, "learning_rate": 8.541114373684899e-05, "loss": 1.6815, "step": 75680 }, { "epoch": 0.14940334466397714, "grad_norm": 0.10370989888906479, "learning_rate": 8.54048035124843e-05, "loss": 1.6828, "step": 75712 }, { "epoch": 0.1494664906253736, "grad_norm": 0.11968197673559189, "learning_rate": 8.539846328811962e-05, "loss": 1.6826, "step": 75744 }, { "epoch": 0.14952963658677004, "grad_norm": 0.10264799743890762, "learning_rate": 8.539212306375493e-05, "loss": 1.6844, "step": 75776 }, { "epoch": 0.14959278254816646, "grad_norm": 0.11237389594316483, "learning_rate": 8.538578283939023e-05, "loss": 1.7027, "step": 75808 }, { "epoch": 0.1496559285095629, "grad_norm": 0.10504571348428726, "learning_rate": 8.537944261502553e-05, "loss": 1.6617, "step": 75840 }, { "epoch": 0.14971907447095936, "grad_norm": 0.11485075205564499, "learning_rate": 8.537310239066085e-05, "loss": 1.6814, "step": 75872 }, { "epoch": 0.14978222043235578, "grad_norm": 0.09677886962890625, "learning_rate": 8.536676216629616e-05, "loss": 1.6867, "step": 75904 }, { "epoch": 0.14984536639375223, "grad_norm": 0.0998137816786766, "learning_rate": 8.536042194193148e-05, "loss": 1.6789, "step": 75936 }, { "epoch": 0.14990851235514868, "grad_norm": 0.10647508502006531, "learning_rate": 8.535408171756678e-05, "loss": 1.668, "step": 75968 }, { "epoch": 0.1499716583165451, "grad_norm": 0.09866531938314438, "learning_rate": 8.534774149320209e-05, "loss": 1.675, "step": 76000 }, { "epoch": 0.15003480427794155, "grad_norm": 0.10397178679704666, "learning_rate": 8.534140126883741e-05, "loss": 1.6773, "step": 76032 }, { "epoch": 0.150097950239338, "grad_norm": 0.10752294212579727, "learning_rate": 8.533506104447272e-05, "loss": 1.676, "step": 76064 }, { "epoch": 0.15016109620073442, "grad_norm": 0.10332654416561127, "learning_rate": 8.532872082010802e-05, "loss": 1.6925, "step": 76096 }, { "epoch": 0.15022424216213087, "grad_norm": 0.10872095078229904, "learning_rate": 8.532238059574334e-05, "loss": 1.6749, "step": 76128 }, { "epoch": 0.15028738812352732, "grad_norm": 0.1017443910241127, "learning_rate": 8.531604037137865e-05, "loss": 1.6711, "step": 76160 }, { "epoch": 0.15035053408492374, "grad_norm": 0.09586425870656967, "learning_rate": 8.530970014701397e-05, "loss": 1.6746, "step": 76192 }, { "epoch": 0.1504136800463202, "grad_norm": 0.10036095231771469, "learning_rate": 8.530335992264927e-05, "loss": 1.6751, "step": 76224 }, { "epoch": 0.15047682600771664, "grad_norm": 0.09590496122837067, "learning_rate": 8.529701969828457e-05, "loss": 1.6907, "step": 76256 }, { "epoch": 0.15053997196911306, "grad_norm": 0.10554616898298264, "learning_rate": 8.529067947391988e-05, "loss": 1.6753, "step": 76288 }, { "epoch": 0.1506031179305095, "grad_norm": 0.1041039377450943, "learning_rate": 8.52843392495552e-05, "loss": 1.6745, "step": 76320 }, { "epoch": 0.15066626389190596, "grad_norm": 0.10178229212760925, "learning_rate": 8.52779990251905e-05, "loss": 1.6791, "step": 76352 }, { "epoch": 0.15072940985330238, "grad_norm": 0.09858375042676926, "learning_rate": 8.527165880082581e-05, "loss": 1.681, "step": 76384 }, { "epoch": 0.15079255581469883, "grad_norm": 0.10965598374605179, "learning_rate": 8.526531857646113e-05, "loss": 1.6801, "step": 76416 }, { "epoch": 0.15085570177609528, "grad_norm": 0.11225444823503494, "learning_rate": 8.525897835209644e-05, "loss": 1.6729, "step": 76448 }, { "epoch": 0.1509188477374917, "grad_norm": 0.10393206775188446, "learning_rate": 8.525263812773176e-05, "loss": 1.6569, "step": 76480 }, { "epoch": 0.15098199369888815, "grad_norm": 0.10081935673952103, "learning_rate": 8.524629790336706e-05, "loss": 1.6748, "step": 76512 }, { "epoch": 0.1510451396602846, "grad_norm": 0.10383206605911255, "learning_rate": 8.523995767900237e-05, "loss": 1.6951, "step": 76544 }, { "epoch": 0.15110828562168102, "grad_norm": 0.09918275475502014, "learning_rate": 8.523361745463769e-05, "loss": 1.6797, "step": 76576 }, { "epoch": 0.15117143158307747, "grad_norm": 0.10046989470720291, "learning_rate": 8.5227277230273e-05, "loss": 1.6592, "step": 76608 }, { "epoch": 0.15123457754447392, "grad_norm": 0.1048884242773056, "learning_rate": 8.52209370059083e-05, "loss": 1.6787, "step": 76640 }, { "epoch": 0.15129772350587034, "grad_norm": 0.10680338740348816, "learning_rate": 8.52145967815436e-05, "loss": 1.6862, "step": 76672 }, { "epoch": 0.1513608694672668, "grad_norm": 0.09893324971199036, "learning_rate": 8.520825655717892e-05, "loss": 1.6653, "step": 76704 }, { "epoch": 0.15142401542866324, "grad_norm": 0.10264407098293304, "learning_rate": 8.520191633281423e-05, "loss": 1.7012, "step": 76736 }, { "epoch": 0.1514871613900597, "grad_norm": 0.1042848452925682, "learning_rate": 8.519557610844953e-05, "loss": 1.6781, "step": 76768 }, { "epoch": 0.1515503073514561, "grad_norm": 0.10645639896392822, "learning_rate": 8.518923588408485e-05, "loss": 1.6829, "step": 76800 }, { "epoch": 0.15161345331285256, "grad_norm": 0.10583385825157166, "learning_rate": 8.518289565972016e-05, "loss": 1.6711, "step": 76832 }, { "epoch": 0.151676599274249, "grad_norm": 0.10256838798522949, "learning_rate": 8.517655543535548e-05, "loss": 1.6751, "step": 76864 }, { "epoch": 0.15173974523564543, "grad_norm": 0.09623630344867706, "learning_rate": 8.517021521099078e-05, "loss": 1.6767, "step": 76896 }, { "epoch": 0.15180289119704188, "grad_norm": 0.09691829979419708, "learning_rate": 8.516387498662609e-05, "loss": 1.6617, "step": 76928 }, { "epoch": 0.15186603715843833, "grad_norm": 0.09584102779626846, "learning_rate": 8.515753476226141e-05, "loss": 1.6815, "step": 76960 }, { "epoch": 0.15192918311983475, "grad_norm": 0.10497911274433136, "learning_rate": 8.515119453789672e-05, "loss": 1.6784, "step": 76992 }, { "epoch": 0.1519923290812312, "grad_norm": 0.10860483348369598, "learning_rate": 8.514485431353202e-05, "loss": 1.675, "step": 77024 }, { "epoch": 0.15205547504262765, "grad_norm": 0.10359463095664978, "learning_rate": 8.513851408916734e-05, "loss": 1.6742, "step": 77056 }, { "epoch": 0.15211862100402407, "grad_norm": 0.1057921051979065, "learning_rate": 8.513217386480264e-05, "loss": 1.685, "step": 77088 }, { "epoch": 0.15218176696542052, "grad_norm": 0.09901981055736542, "learning_rate": 8.512583364043795e-05, "loss": 1.6793, "step": 77120 }, { "epoch": 0.15224491292681697, "grad_norm": 0.09833051264286041, "learning_rate": 8.511949341607327e-05, "loss": 1.6694, "step": 77152 }, { "epoch": 0.1523080588882134, "grad_norm": 0.099812813103199, "learning_rate": 8.511315319170857e-05, "loss": 1.6693, "step": 77184 }, { "epoch": 0.15237120484960984, "grad_norm": 0.09430838376283646, "learning_rate": 8.510681296734388e-05, "loss": 1.6628, "step": 77216 }, { "epoch": 0.1524343508110063, "grad_norm": 0.10515926033258438, "learning_rate": 8.51004727429792e-05, "loss": 1.6719, "step": 77248 }, { "epoch": 0.1524974967724027, "grad_norm": 0.10583945363759995, "learning_rate": 8.509413251861451e-05, "loss": 1.6751, "step": 77280 }, { "epoch": 0.15256064273379916, "grad_norm": 0.12096399068832397, "learning_rate": 8.508779229424981e-05, "loss": 1.6875, "step": 77312 }, { "epoch": 0.1526237886951956, "grad_norm": 0.10812398046255112, "learning_rate": 8.508145206988513e-05, "loss": 1.6746, "step": 77344 }, { "epoch": 0.15268693465659203, "grad_norm": 0.10630808025598526, "learning_rate": 8.507511184552044e-05, "loss": 1.6723, "step": 77376 }, { "epoch": 0.15275008061798848, "grad_norm": 0.1007491871714592, "learning_rate": 8.506877162115576e-05, "loss": 1.6876, "step": 77408 }, { "epoch": 0.15281322657938493, "grad_norm": 0.09950321167707443, "learning_rate": 8.506243139679106e-05, "loss": 1.6703, "step": 77440 }, { "epoch": 0.15287637254078135, "grad_norm": 0.11235208809375763, "learning_rate": 8.505609117242637e-05, "loss": 1.6802, "step": 77472 }, { "epoch": 0.1529395185021778, "grad_norm": 0.10004584491252899, "learning_rate": 8.504975094806167e-05, "loss": 1.6815, "step": 77504 }, { "epoch": 0.15300266446357424, "grad_norm": 0.12749536335468292, "learning_rate": 8.504341072369699e-05, "loss": 1.6744, "step": 77536 }, { "epoch": 0.15306581042497067, "grad_norm": 0.0963485911488533, "learning_rate": 8.503707049933229e-05, "loss": 1.661, "step": 77568 }, { "epoch": 0.15312895638636712, "grad_norm": 0.11256939172744751, "learning_rate": 8.50307302749676e-05, "loss": 1.6712, "step": 77600 }, { "epoch": 0.15319210234776356, "grad_norm": 0.10460944473743439, "learning_rate": 8.502439005060292e-05, "loss": 1.6841, "step": 77632 }, { "epoch": 0.15325524830915999, "grad_norm": 0.09804650396108627, "learning_rate": 8.501804982623823e-05, "loss": 1.6739, "step": 77664 }, { "epoch": 0.15331839427055644, "grad_norm": 0.09925612807273865, "learning_rate": 8.501170960187353e-05, "loss": 1.6678, "step": 77696 }, { "epoch": 0.15338154023195288, "grad_norm": 0.0983809307217598, "learning_rate": 8.500536937750885e-05, "loss": 1.675, "step": 77728 }, { "epoch": 0.1534446861933493, "grad_norm": 0.09494610130786896, "learning_rate": 8.499902915314416e-05, "loss": 1.6699, "step": 77760 }, { "epoch": 0.15350783215474575, "grad_norm": 0.10970768332481384, "learning_rate": 8.499268892877948e-05, "loss": 1.6911, "step": 77792 }, { "epoch": 0.1535709781161422, "grad_norm": 0.09740414470434189, "learning_rate": 8.498634870441479e-05, "loss": 1.6793, "step": 77824 }, { "epoch": 0.15363412407753863, "grad_norm": 0.10191775858402252, "learning_rate": 8.49800084800501e-05, "loss": 1.6697, "step": 77856 }, { "epoch": 0.15369727003893507, "grad_norm": 0.10068080574274063, "learning_rate": 8.497366825568541e-05, "loss": 1.6671, "step": 77888 }, { "epoch": 0.15376041600033152, "grad_norm": 0.10468345135450363, "learning_rate": 8.496732803132071e-05, "loss": 1.6843, "step": 77920 }, { "epoch": 0.15382356196172794, "grad_norm": 0.10640867799520493, "learning_rate": 8.496098780695602e-05, "loss": 1.6702, "step": 77952 }, { "epoch": 0.1538867079231244, "grad_norm": 0.09602980315685272, "learning_rate": 8.495464758259132e-05, "loss": 1.6641, "step": 77984 }, { "epoch": 0.15394985388452084, "grad_norm": 0.09832219779491425, "learning_rate": 8.494830735822664e-05, "loss": 1.6727, "step": 78016 }, { "epoch": 0.15401299984591726, "grad_norm": 0.09909886121749878, "learning_rate": 8.494196713386195e-05, "loss": 1.65, "step": 78048 }, { "epoch": 0.1540761458073137, "grad_norm": 0.10727988183498383, "learning_rate": 8.493562690949727e-05, "loss": 1.6878, "step": 78080 }, { "epoch": 0.15413929176871016, "grad_norm": 0.09718786180019379, "learning_rate": 8.492928668513257e-05, "loss": 1.686, "step": 78112 }, { "epoch": 0.15420243773010658, "grad_norm": 0.10660644620656967, "learning_rate": 8.492294646076788e-05, "loss": 1.6685, "step": 78144 }, { "epoch": 0.15426558369150303, "grad_norm": 0.09951602667570114, "learning_rate": 8.49166062364032e-05, "loss": 1.6774, "step": 78176 }, { "epoch": 0.15432872965289948, "grad_norm": 0.10155228525400162, "learning_rate": 8.491026601203851e-05, "loss": 1.6757, "step": 78208 }, { "epoch": 0.1543918756142959, "grad_norm": 0.099421925842762, "learning_rate": 8.490392578767381e-05, "loss": 1.673, "step": 78240 }, { "epoch": 0.15445502157569235, "grad_norm": 0.10386461764574051, "learning_rate": 8.489758556330913e-05, "loss": 1.6877, "step": 78272 }, { "epoch": 0.1545181675370888, "grad_norm": 0.09559035301208496, "learning_rate": 8.489124533894444e-05, "loss": 1.6781, "step": 78304 }, { "epoch": 0.15458131349848522, "grad_norm": 0.10044359415769577, "learning_rate": 8.488490511457974e-05, "loss": 1.6683, "step": 78336 }, { "epoch": 0.15464445945988167, "grad_norm": 0.09693678468465805, "learning_rate": 8.487856489021506e-05, "loss": 1.6837, "step": 78368 }, { "epoch": 0.15470760542127812, "grad_norm": 0.10030453652143478, "learning_rate": 8.487222466585036e-05, "loss": 1.6698, "step": 78400 }, { "epoch": 0.15477075138267457, "grad_norm": 0.10268690437078476, "learning_rate": 8.486588444148567e-05, "loss": 1.6572, "step": 78432 }, { "epoch": 0.154833897344071, "grad_norm": 0.09646078199148178, "learning_rate": 8.485954421712099e-05, "loss": 1.682, "step": 78464 }, { "epoch": 0.15489704330546744, "grad_norm": 0.10658205300569534, "learning_rate": 8.48532039927563e-05, "loss": 1.673, "step": 78496 }, { "epoch": 0.1549601892668639, "grad_norm": 0.09471545368432999, "learning_rate": 8.48468637683916e-05, "loss": 1.6803, "step": 78528 }, { "epoch": 0.1550233352282603, "grad_norm": 0.10045652091503143, "learning_rate": 8.484052354402692e-05, "loss": 1.6719, "step": 78560 }, { "epoch": 0.15508648118965676, "grad_norm": 0.09457363188266754, "learning_rate": 8.483418331966223e-05, "loss": 1.6612, "step": 78592 }, { "epoch": 0.1551496271510532, "grad_norm": 0.09976271539926529, "learning_rate": 8.482784309529755e-05, "loss": 1.6803, "step": 78624 }, { "epoch": 0.15521277311244963, "grad_norm": 0.09982792288064957, "learning_rate": 8.482150287093285e-05, "loss": 1.6812, "step": 78656 }, { "epoch": 0.15527591907384608, "grad_norm": 0.10151281207799911, "learning_rate": 8.481516264656816e-05, "loss": 1.6768, "step": 78688 }, { "epoch": 0.15533906503524253, "grad_norm": 0.10529918968677521, "learning_rate": 8.480882242220346e-05, "loss": 1.6673, "step": 78720 }, { "epoch": 0.15540221099663895, "grad_norm": 0.09730803221464157, "learning_rate": 8.480248219783878e-05, "loss": 1.6841, "step": 78752 }, { "epoch": 0.1554653569580354, "grad_norm": 0.10061978548765182, "learning_rate": 8.479614197347408e-05, "loss": 1.6829, "step": 78784 }, { "epoch": 0.15552850291943185, "grad_norm": 0.1007780134677887, "learning_rate": 8.47898017491094e-05, "loss": 1.6852, "step": 78816 }, { "epoch": 0.15559164888082827, "grad_norm": 0.10080109536647797, "learning_rate": 8.478346152474471e-05, "loss": 1.6831, "step": 78848 }, { "epoch": 0.15565479484222472, "grad_norm": 0.1095278188586235, "learning_rate": 8.477712130038002e-05, "loss": 1.6578, "step": 78880 }, { "epoch": 0.15571794080362117, "grad_norm": 0.11861991137266159, "learning_rate": 8.477078107601532e-05, "loss": 1.6692, "step": 78912 }, { "epoch": 0.1557810867650176, "grad_norm": 0.10310599207878113, "learning_rate": 8.476444085165064e-05, "loss": 1.6588, "step": 78944 }, { "epoch": 0.15584423272641404, "grad_norm": 0.11733296513557434, "learning_rate": 8.475810062728595e-05, "loss": 1.6691, "step": 78976 }, { "epoch": 0.1559073786878105, "grad_norm": 0.10434328764677048, "learning_rate": 8.475176040292127e-05, "loss": 1.6661, "step": 79008 }, { "epoch": 0.1559705246492069, "grad_norm": 0.10890328139066696, "learning_rate": 8.474542017855658e-05, "loss": 1.6771, "step": 79040 }, { "epoch": 0.15603367061060336, "grad_norm": 0.09577399492263794, "learning_rate": 8.473907995419188e-05, "loss": 1.6596, "step": 79072 }, { "epoch": 0.1560968165719998, "grad_norm": 0.10378000140190125, "learning_rate": 8.47327397298272e-05, "loss": 1.6549, "step": 79104 }, { "epoch": 0.15615996253339623, "grad_norm": 0.1202513799071312, "learning_rate": 8.47263995054625e-05, "loss": 1.669, "step": 79136 }, { "epoch": 0.15622310849479268, "grad_norm": 0.10733233392238617, "learning_rate": 8.472005928109781e-05, "loss": 1.6779, "step": 79168 }, { "epoch": 0.15628625445618913, "grad_norm": 0.10416159778833389, "learning_rate": 8.471371905673312e-05, "loss": 1.6851, "step": 79200 }, { "epoch": 0.15634940041758555, "grad_norm": 0.09278441220521927, "learning_rate": 8.470737883236843e-05, "loss": 1.6622, "step": 79232 }, { "epoch": 0.156412546378982, "grad_norm": 0.10320501774549484, "learning_rate": 8.470103860800374e-05, "loss": 1.6763, "step": 79264 }, { "epoch": 0.15647569234037845, "grad_norm": 0.11157459765672684, "learning_rate": 8.469469838363906e-05, "loss": 1.6868, "step": 79296 }, { "epoch": 0.15653883830177487, "grad_norm": 0.09679748862981796, "learning_rate": 8.468835815927436e-05, "loss": 1.6767, "step": 79328 }, { "epoch": 0.15660198426317132, "grad_norm": 0.10498563945293427, "learning_rate": 8.468201793490967e-05, "loss": 1.6637, "step": 79360 }, { "epoch": 0.15666513022456777, "grad_norm": 0.0998353511095047, "learning_rate": 8.467567771054499e-05, "loss": 1.6701, "step": 79392 }, { "epoch": 0.1567282761859642, "grad_norm": 0.10383282601833344, "learning_rate": 8.46693374861803e-05, "loss": 1.6921, "step": 79424 }, { "epoch": 0.15679142214736064, "grad_norm": 0.09842722117900848, "learning_rate": 8.46629972618156e-05, "loss": 1.6815, "step": 79456 }, { "epoch": 0.1568545681087571, "grad_norm": 0.11832182109355927, "learning_rate": 8.465665703745092e-05, "loss": 1.6766, "step": 79488 }, { "epoch": 0.1569177140701535, "grad_norm": 0.0950767919421196, "learning_rate": 8.465031681308623e-05, "loss": 1.6702, "step": 79520 }, { "epoch": 0.15698086003154996, "grad_norm": 0.09939881414175034, "learning_rate": 8.464397658872153e-05, "loss": 1.6637, "step": 79552 }, { "epoch": 0.1570440059929464, "grad_norm": 0.09540250152349472, "learning_rate": 8.463763636435685e-05, "loss": 1.6622, "step": 79584 }, { "epoch": 0.15710715195434283, "grad_norm": 0.11049431562423706, "learning_rate": 8.463129613999215e-05, "loss": 1.6683, "step": 79616 }, { "epoch": 0.15717029791573928, "grad_norm": 0.10123123973608017, "learning_rate": 8.462495591562746e-05, "loss": 1.6642, "step": 79648 }, { "epoch": 0.15723344387713573, "grad_norm": 0.09708897024393082, "learning_rate": 8.461861569126278e-05, "loss": 1.6746, "step": 79680 }, { "epoch": 0.15729658983853215, "grad_norm": 0.1010143831372261, "learning_rate": 8.46122754668981e-05, "loss": 1.6707, "step": 79712 }, { "epoch": 0.1573597357999286, "grad_norm": 0.10873035341501236, "learning_rate": 8.46059352425334e-05, "loss": 1.6878, "step": 79744 }, { "epoch": 0.15742288176132505, "grad_norm": 0.11764772236347198, "learning_rate": 8.459959501816871e-05, "loss": 1.6702, "step": 79776 }, { "epoch": 0.15748602772272147, "grad_norm": 0.10337798297405243, "learning_rate": 8.459325479380402e-05, "loss": 1.6721, "step": 79808 }, { "epoch": 0.15754917368411792, "grad_norm": 0.09046148508787155, "learning_rate": 8.458691456943934e-05, "loss": 1.6639, "step": 79840 }, { "epoch": 0.15761231964551436, "grad_norm": 0.10244788229465485, "learning_rate": 8.458057434507464e-05, "loss": 1.6601, "step": 79872 }, { "epoch": 0.1576754656069108, "grad_norm": 0.09827053546905518, "learning_rate": 8.457423412070995e-05, "loss": 1.6807, "step": 79904 }, { "epoch": 0.15773861156830724, "grad_norm": 0.10390309244394302, "learning_rate": 8.456789389634527e-05, "loss": 1.6795, "step": 79936 }, { "epoch": 0.15780175752970368, "grad_norm": 0.10280296206474304, "learning_rate": 8.456155367198057e-05, "loss": 1.6539, "step": 79968 }, { "epoch": 0.1578649034911001, "grad_norm": 0.09721670299768448, "learning_rate": 8.455521344761587e-05, "loss": 1.6642, "step": 80000 }, { "epoch": 0.15792804945249656, "grad_norm": 0.09536055475473404, "learning_rate": 8.454887322325119e-05, "loss": 1.6592, "step": 80032 }, { "epoch": 0.157991195413893, "grad_norm": 0.09576831758022308, "learning_rate": 8.45425329988865e-05, "loss": 1.6741, "step": 80064 }, { "epoch": 0.15805434137528945, "grad_norm": 0.10265585035085678, "learning_rate": 8.453619277452181e-05, "loss": 1.6604, "step": 80096 }, { "epoch": 0.15811748733668587, "grad_norm": 0.09348326921463013, "learning_rate": 8.452985255015712e-05, "loss": 1.6843, "step": 80128 }, { "epoch": 0.15818063329808232, "grad_norm": 0.09281941503286362, "learning_rate": 8.452351232579243e-05, "loss": 1.6637, "step": 80160 }, { "epoch": 0.15824377925947877, "grad_norm": 0.09963396936655045, "learning_rate": 8.451717210142774e-05, "loss": 1.6616, "step": 80192 }, { "epoch": 0.1583069252208752, "grad_norm": 0.10201126337051392, "learning_rate": 8.451083187706306e-05, "loss": 1.6703, "step": 80224 }, { "epoch": 0.15837007118227164, "grad_norm": 0.09980650991201401, "learning_rate": 8.450449165269836e-05, "loss": 1.6725, "step": 80256 }, { "epoch": 0.1584332171436681, "grad_norm": 0.11375914514064789, "learning_rate": 8.449815142833367e-05, "loss": 1.6625, "step": 80288 }, { "epoch": 0.1584963631050645, "grad_norm": 0.11290016770362854, "learning_rate": 8.449181120396899e-05, "loss": 1.6745, "step": 80320 }, { "epoch": 0.15855950906646096, "grad_norm": 0.0976036787033081, "learning_rate": 8.44854709796043e-05, "loss": 1.673, "step": 80352 }, { "epoch": 0.1586226550278574, "grad_norm": 0.10798491537570953, "learning_rate": 8.44791307552396e-05, "loss": 1.6888, "step": 80384 }, { "epoch": 0.15868580098925383, "grad_norm": 0.0982152670621872, "learning_rate": 8.44727905308749e-05, "loss": 1.6695, "step": 80416 }, { "epoch": 0.15874894695065028, "grad_norm": 0.1046973317861557, "learning_rate": 8.446645030651022e-05, "loss": 1.6588, "step": 80448 }, { "epoch": 0.15881209291204673, "grad_norm": 0.1027870625257492, "learning_rate": 8.446011008214554e-05, "loss": 1.6746, "step": 80480 }, { "epoch": 0.15887523887344315, "grad_norm": 0.09794238954782486, "learning_rate": 8.445376985778085e-05, "loss": 1.6704, "step": 80512 }, { "epoch": 0.1589383848348396, "grad_norm": 0.11949475109577179, "learning_rate": 8.444742963341615e-05, "loss": 1.6639, "step": 80544 }, { "epoch": 0.15900153079623605, "grad_norm": 0.11027876287698746, "learning_rate": 8.444108940905147e-05, "loss": 1.6652, "step": 80576 }, { "epoch": 0.15906467675763247, "grad_norm": 0.10230275243520737, "learning_rate": 8.443474918468678e-05, "loss": 1.6739, "step": 80608 }, { "epoch": 0.15912782271902892, "grad_norm": 0.09737065434455872, "learning_rate": 8.44284089603221e-05, "loss": 1.666, "step": 80640 }, { "epoch": 0.15919096868042537, "grad_norm": 0.10272245109081268, "learning_rate": 8.44220687359574e-05, "loss": 1.6733, "step": 80672 }, { "epoch": 0.1592541146418218, "grad_norm": 0.10037832707166672, "learning_rate": 8.441572851159271e-05, "loss": 1.6661, "step": 80704 }, { "epoch": 0.15931726060321824, "grad_norm": 0.1051454022526741, "learning_rate": 8.440938828722802e-05, "loss": 1.6788, "step": 80736 }, { "epoch": 0.1593804065646147, "grad_norm": 0.10609681904315948, "learning_rate": 8.440304806286334e-05, "loss": 1.6648, "step": 80768 }, { "epoch": 0.1594435525260111, "grad_norm": 0.09505396336317062, "learning_rate": 8.439670783849864e-05, "loss": 1.6644, "step": 80800 }, { "epoch": 0.15950669848740756, "grad_norm": 0.09724284708499908, "learning_rate": 8.439036761413394e-05, "loss": 1.6721, "step": 80832 }, { "epoch": 0.159569844448804, "grad_norm": 0.10502568632364273, "learning_rate": 8.438402738976926e-05, "loss": 1.6718, "step": 80864 }, { "epoch": 0.15963299041020043, "grad_norm": 0.10341182351112366, "learning_rate": 8.437768716540457e-05, "loss": 1.6497, "step": 80896 }, { "epoch": 0.15969613637159688, "grad_norm": 0.10201447457075119, "learning_rate": 8.437134694103987e-05, "loss": 1.6765, "step": 80928 }, { "epoch": 0.15975928233299333, "grad_norm": 0.1063053160905838, "learning_rate": 8.436500671667519e-05, "loss": 1.6829, "step": 80960 }, { "epoch": 0.15982242829438975, "grad_norm": 0.11010798811912537, "learning_rate": 8.43586664923105e-05, "loss": 1.6729, "step": 80992 }, { "epoch": 0.1598855742557862, "grad_norm": 0.1061517745256424, "learning_rate": 8.435232626794581e-05, "loss": 1.6761, "step": 81024 }, { "epoch": 0.15994872021718265, "grad_norm": 0.10358405858278275, "learning_rate": 8.434598604358113e-05, "loss": 1.6689, "step": 81056 }, { "epoch": 0.16001186617857907, "grad_norm": 0.09589546918869019, "learning_rate": 8.433964581921643e-05, "loss": 1.6787, "step": 81088 }, { "epoch": 0.16007501213997552, "grad_norm": 0.1017046868801117, "learning_rate": 8.433330559485175e-05, "loss": 1.6678, "step": 81120 }, { "epoch": 0.16013815810137197, "grad_norm": 0.0994737520813942, "learning_rate": 8.432696537048706e-05, "loss": 1.657, "step": 81152 }, { "epoch": 0.1602013040627684, "grad_norm": 0.10340292751789093, "learning_rate": 8.432062514612236e-05, "loss": 1.6601, "step": 81184 }, { "epoch": 0.16026445002416484, "grad_norm": 0.09561502188444138, "learning_rate": 8.431428492175768e-05, "loss": 1.6774, "step": 81216 }, { "epoch": 0.1603275959855613, "grad_norm": 0.10668741166591644, "learning_rate": 8.430794469739298e-05, "loss": 1.6667, "step": 81248 }, { "epoch": 0.1603907419469577, "grad_norm": 0.09333831816911697, "learning_rate": 8.430160447302829e-05, "loss": 1.6738, "step": 81280 }, { "epoch": 0.16045388790835416, "grad_norm": 0.10256980359554291, "learning_rate": 8.42952642486636e-05, "loss": 1.6556, "step": 81312 }, { "epoch": 0.1605170338697506, "grad_norm": 0.09653441607952118, "learning_rate": 8.42889240242989e-05, "loss": 1.6774, "step": 81344 }, { "epoch": 0.16058017983114703, "grad_norm": 0.10778200626373291, "learning_rate": 8.428258379993422e-05, "loss": 1.6588, "step": 81376 }, { "epoch": 0.16064332579254348, "grad_norm": 0.10164060443639755, "learning_rate": 8.427624357556954e-05, "loss": 1.6669, "step": 81408 }, { "epoch": 0.16070647175393993, "grad_norm": 0.10250438004732132, "learning_rate": 8.426990335120485e-05, "loss": 1.6779, "step": 81440 }, { "epoch": 0.16076961771533635, "grad_norm": 0.09784379601478577, "learning_rate": 8.426356312684015e-05, "loss": 1.6629, "step": 81472 }, { "epoch": 0.1608327636767328, "grad_norm": 0.10251688212156296, "learning_rate": 8.425722290247547e-05, "loss": 1.671, "step": 81504 }, { "epoch": 0.16089590963812925, "grad_norm": 0.09898275136947632, "learning_rate": 8.425088267811078e-05, "loss": 1.6728, "step": 81536 }, { "epoch": 0.16095905559952567, "grad_norm": 0.10107841342687607, "learning_rate": 8.42445424537461e-05, "loss": 1.6674, "step": 81568 }, { "epoch": 0.16102220156092212, "grad_norm": 0.11603739857673645, "learning_rate": 8.42382022293814e-05, "loss": 1.6776, "step": 81600 }, { "epoch": 0.16108534752231857, "grad_norm": 0.10540154576301575, "learning_rate": 8.423186200501671e-05, "loss": 1.6736, "step": 81632 }, { "epoch": 0.161148493483715, "grad_norm": 0.09439444541931152, "learning_rate": 8.422552178065201e-05, "loss": 1.6667, "step": 81664 }, { "epoch": 0.16121163944511144, "grad_norm": 0.10692646354436874, "learning_rate": 8.421918155628733e-05, "loss": 1.6667, "step": 81696 }, { "epoch": 0.1612747854065079, "grad_norm": 0.10389810055494308, "learning_rate": 8.421284133192264e-05, "loss": 1.6669, "step": 81728 }, { "epoch": 0.16133793136790434, "grad_norm": 0.10967563092708588, "learning_rate": 8.420650110755794e-05, "loss": 1.6598, "step": 81760 }, { "epoch": 0.16140107732930076, "grad_norm": 0.1001477912068367, "learning_rate": 8.420016088319326e-05, "loss": 1.6773, "step": 81792 }, { "epoch": 0.1614642232906972, "grad_norm": 0.1112506166100502, "learning_rate": 8.419382065882857e-05, "loss": 1.6747, "step": 81824 }, { "epoch": 0.16152736925209366, "grad_norm": 0.10486098378896713, "learning_rate": 8.418748043446389e-05, "loss": 1.6714, "step": 81856 }, { "epoch": 0.16159051521349008, "grad_norm": 0.10470446199178696, "learning_rate": 8.418114021009919e-05, "loss": 1.6786, "step": 81888 }, { "epoch": 0.16165366117488653, "grad_norm": 0.09796930849552155, "learning_rate": 8.41747999857345e-05, "loss": 1.6629, "step": 81920 }, { "epoch": 0.16171680713628298, "grad_norm": 0.10023059695959091, "learning_rate": 8.416845976136982e-05, "loss": 1.6488, "step": 81952 }, { "epoch": 0.1617799530976794, "grad_norm": 0.10906977206468582, "learning_rate": 8.416211953700513e-05, "loss": 1.6728, "step": 81984 }, { "epoch": 0.16184309905907585, "grad_norm": 0.09570545703172684, "learning_rate": 8.415577931264043e-05, "loss": 1.6658, "step": 82016 }, { "epoch": 0.1619062450204723, "grad_norm": 0.10845465958118439, "learning_rate": 8.414943908827575e-05, "loss": 1.6776, "step": 82048 }, { "epoch": 0.16196939098186872, "grad_norm": 0.10419274866580963, "learning_rate": 8.414309886391105e-05, "loss": 1.6665, "step": 82080 }, { "epoch": 0.16203253694326517, "grad_norm": 0.10235957056283951, "learning_rate": 8.413675863954636e-05, "loss": 1.6595, "step": 82112 }, { "epoch": 0.16209568290466161, "grad_norm": 0.10116744041442871, "learning_rate": 8.413041841518166e-05, "loss": 1.6545, "step": 82144 }, { "epoch": 0.16215882886605804, "grad_norm": 0.10140691697597504, "learning_rate": 8.412407819081698e-05, "loss": 1.6705, "step": 82176 }, { "epoch": 0.16222197482745448, "grad_norm": 0.1144220307469368, "learning_rate": 8.411773796645229e-05, "loss": 1.6673, "step": 82208 }, { "epoch": 0.16228512078885093, "grad_norm": 0.10034649074077606, "learning_rate": 8.41113977420876e-05, "loss": 1.6582, "step": 82240 }, { "epoch": 0.16234826675024736, "grad_norm": 0.10167563706636429, "learning_rate": 8.41050575177229e-05, "loss": 1.6585, "step": 82272 }, { "epoch": 0.1624114127116438, "grad_norm": 0.10308732092380524, "learning_rate": 8.409871729335822e-05, "loss": 1.6816, "step": 82304 }, { "epoch": 0.16247455867304025, "grad_norm": 0.10687581449747086, "learning_rate": 8.409237706899354e-05, "loss": 1.662, "step": 82336 }, { "epoch": 0.16253770463443667, "grad_norm": 0.09884197264909744, "learning_rate": 8.408603684462885e-05, "loss": 1.6751, "step": 82368 }, { "epoch": 0.16260085059583312, "grad_norm": 0.10333535820245743, "learning_rate": 8.407969662026416e-05, "loss": 1.6728, "step": 82400 }, { "epoch": 0.16266399655722957, "grad_norm": 0.10113651305437088, "learning_rate": 8.407335639589947e-05, "loss": 1.6579, "step": 82432 }, { "epoch": 0.162727142518626, "grad_norm": 0.10472423583269119, "learning_rate": 8.406701617153478e-05, "loss": 1.6762, "step": 82464 }, { "epoch": 0.16279028848002244, "grad_norm": 0.0948818027973175, "learning_rate": 8.406067594717008e-05, "loss": 1.6623, "step": 82496 }, { "epoch": 0.1628534344414189, "grad_norm": 0.10804177820682526, "learning_rate": 8.40543357228054e-05, "loss": 1.6655, "step": 82528 }, { "epoch": 0.16291658040281531, "grad_norm": 0.11333528161048889, "learning_rate": 8.40479954984407e-05, "loss": 1.6641, "step": 82560 }, { "epoch": 0.16297972636421176, "grad_norm": 0.10604579001665115, "learning_rate": 8.404165527407601e-05, "loss": 1.6605, "step": 82592 }, { "epoch": 0.1630428723256082, "grad_norm": 0.09933172911405563, "learning_rate": 8.403531504971133e-05, "loss": 1.694, "step": 82624 }, { "epoch": 0.16310601828700463, "grad_norm": 0.10318920016288757, "learning_rate": 8.402897482534664e-05, "loss": 1.6812, "step": 82656 }, { "epoch": 0.16316916424840108, "grad_norm": 0.09602219611406326, "learning_rate": 8.402263460098194e-05, "loss": 1.6704, "step": 82688 }, { "epoch": 0.16323231020979753, "grad_norm": 0.10676294565200806, "learning_rate": 8.401629437661726e-05, "loss": 1.6566, "step": 82720 }, { "epoch": 0.16329545617119395, "grad_norm": 0.10345187038183212, "learning_rate": 8.400995415225257e-05, "loss": 1.6609, "step": 82752 }, { "epoch": 0.1633586021325904, "grad_norm": 0.10429804027080536, "learning_rate": 8.400361392788789e-05, "loss": 1.6608, "step": 82784 }, { "epoch": 0.16342174809398685, "grad_norm": 0.10219980031251907, "learning_rate": 8.399727370352319e-05, "loss": 1.6531, "step": 82816 }, { "epoch": 0.16348489405538327, "grad_norm": 0.0946764349937439, "learning_rate": 8.39909334791585e-05, "loss": 1.6587, "step": 82848 }, { "epoch": 0.16354804001677972, "grad_norm": 0.10328608751296997, "learning_rate": 8.39845932547938e-05, "loss": 1.6666, "step": 82880 }, { "epoch": 0.16361118597817617, "grad_norm": 0.10191641002893448, "learning_rate": 8.397825303042912e-05, "loss": 1.657, "step": 82912 }, { "epoch": 0.1636743319395726, "grad_norm": 0.10231659561395645, "learning_rate": 8.397191280606442e-05, "loss": 1.6588, "step": 82944 }, { "epoch": 0.16373747790096904, "grad_norm": 0.10225328803062439, "learning_rate": 8.396557258169973e-05, "loss": 1.6701, "step": 82976 }, { "epoch": 0.1638006238623655, "grad_norm": 0.1010192334651947, "learning_rate": 8.395923235733505e-05, "loss": 1.6781, "step": 83008 }, { "epoch": 0.1638637698237619, "grad_norm": 0.09910804033279419, "learning_rate": 8.395289213297036e-05, "loss": 1.6547, "step": 83040 }, { "epoch": 0.16392691578515836, "grad_norm": 0.10391703993082047, "learning_rate": 8.394655190860568e-05, "loss": 1.6599, "step": 83072 }, { "epoch": 0.1639900617465548, "grad_norm": 0.10021750628948212, "learning_rate": 8.394021168424098e-05, "loss": 1.6714, "step": 83104 }, { "epoch": 0.16405320770795123, "grad_norm": 0.10518373548984528, "learning_rate": 8.393387145987629e-05, "loss": 1.6654, "step": 83136 }, { "epoch": 0.16411635366934768, "grad_norm": 0.09555603563785553, "learning_rate": 8.39275312355116e-05, "loss": 1.66, "step": 83168 }, { "epoch": 0.16417949963074413, "grad_norm": 0.10554922372102737, "learning_rate": 8.392119101114692e-05, "loss": 1.6524, "step": 83200 }, { "epoch": 0.16424264559214055, "grad_norm": 0.10174940526485443, "learning_rate": 8.391485078678222e-05, "loss": 1.6525, "step": 83232 }, { "epoch": 0.164305791553537, "grad_norm": 0.11020279675722122, "learning_rate": 8.390851056241754e-05, "loss": 1.66, "step": 83264 }, { "epoch": 0.16436893751493345, "grad_norm": 0.10660487413406372, "learning_rate": 8.390217033805284e-05, "loss": 1.6656, "step": 83296 }, { "epoch": 0.16443208347632987, "grad_norm": 0.11338844895362854, "learning_rate": 8.389583011368815e-05, "loss": 1.6686, "step": 83328 }, { "epoch": 0.16449522943772632, "grad_norm": 0.11645433306694031, "learning_rate": 8.388948988932345e-05, "loss": 1.6536, "step": 83360 }, { "epoch": 0.16455837539912277, "grad_norm": 0.1066615954041481, "learning_rate": 8.388314966495877e-05, "loss": 1.6455, "step": 83392 }, { "epoch": 0.1646215213605192, "grad_norm": 0.09756056219339371, "learning_rate": 8.387680944059408e-05, "loss": 1.6617, "step": 83424 }, { "epoch": 0.16468466732191564, "grad_norm": 0.10661061108112335, "learning_rate": 8.38704692162294e-05, "loss": 1.6584, "step": 83456 }, { "epoch": 0.1647478132833121, "grad_norm": 0.10062295198440552, "learning_rate": 8.38641289918647e-05, "loss": 1.6647, "step": 83488 }, { "epoch": 0.16481095924470854, "grad_norm": 0.10491617769002914, "learning_rate": 8.385778876750001e-05, "loss": 1.6738, "step": 83520 }, { "epoch": 0.16487410520610496, "grad_norm": 0.10141460597515106, "learning_rate": 8.385144854313533e-05, "loss": 1.6648, "step": 83552 }, { "epoch": 0.1649372511675014, "grad_norm": 0.09942572563886642, "learning_rate": 8.384510831877064e-05, "loss": 1.6558, "step": 83584 }, { "epoch": 0.16500039712889786, "grad_norm": 0.10586068034172058, "learning_rate": 8.383876809440594e-05, "loss": 1.6622, "step": 83616 }, { "epoch": 0.16506354309029428, "grad_norm": 0.10023793578147888, "learning_rate": 8.383242787004126e-05, "loss": 1.6782, "step": 83648 }, { "epoch": 0.16512668905169073, "grad_norm": 0.09988317638635635, "learning_rate": 8.382608764567657e-05, "loss": 1.6652, "step": 83680 }, { "epoch": 0.16518983501308718, "grad_norm": 0.09722260385751724, "learning_rate": 8.381974742131187e-05, "loss": 1.6531, "step": 83712 }, { "epoch": 0.1652529809744836, "grad_norm": 0.09963898360729218, "learning_rate": 8.381340719694719e-05, "loss": 1.6614, "step": 83744 }, { "epoch": 0.16531612693588005, "grad_norm": 0.10098837316036224, "learning_rate": 8.380706697258249e-05, "loss": 1.6596, "step": 83776 }, { "epoch": 0.1653792728972765, "grad_norm": 0.10564694553613663, "learning_rate": 8.38007267482178e-05, "loss": 1.6697, "step": 83808 }, { "epoch": 0.16544241885867292, "grad_norm": 0.10261000692844391, "learning_rate": 8.379438652385312e-05, "loss": 1.6735, "step": 83840 }, { "epoch": 0.16550556482006937, "grad_norm": 0.09658705443143845, "learning_rate": 8.378804629948843e-05, "loss": 1.6668, "step": 83872 }, { "epoch": 0.16556871078146582, "grad_norm": 0.09841986000537872, "learning_rate": 8.378170607512373e-05, "loss": 1.6716, "step": 83904 }, { "epoch": 0.16563185674286224, "grad_norm": 0.09860815852880478, "learning_rate": 8.377536585075905e-05, "loss": 1.6633, "step": 83936 }, { "epoch": 0.1656950027042587, "grad_norm": 0.1066829040646553, "learning_rate": 8.376902562639436e-05, "loss": 1.6799, "step": 83968 }, { "epoch": 0.16575814866565514, "grad_norm": 0.10175075381994247, "learning_rate": 8.376268540202968e-05, "loss": 1.662, "step": 84000 }, { "epoch": 0.16582129462705156, "grad_norm": 0.09894043207168579, "learning_rate": 8.375634517766498e-05, "loss": 1.6349, "step": 84032 }, { "epoch": 0.165884440588448, "grad_norm": 0.10034500062465668, "learning_rate": 8.375000495330029e-05, "loss": 1.66, "step": 84064 }, { "epoch": 0.16594758654984446, "grad_norm": 0.10211653262376785, "learning_rate": 8.37436647289356e-05, "loss": 1.6538, "step": 84096 }, { "epoch": 0.16601073251124088, "grad_norm": 0.0991155281662941, "learning_rate": 8.373732450457091e-05, "loss": 1.6637, "step": 84128 }, { "epoch": 0.16607387847263733, "grad_norm": 0.10942571610212326, "learning_rate": 8.373098428020621e-05, "loss": 1.6692, "step": 84160 }, { "epoch": 0.16613702443403378, "grad_norm": 0.11085399240255356, "learning_rate": 8.372464405584152e-05, "loss": 1.6748, "step": 84192 }, { "epoch": 0.1662001703954302, "grad_norm": 0.10118217766284943, "learning_rate": 8.371830383147684e-05, "loss": 1.6611, "step": 84224 }, { "epoch": 0.16626331635682665, "grad_norm": 0.09579329937696457, "learning_rate": 8.371196360711215e-05, "loss": 1.6676, "step": 84256 }, { "epoch": 0.1663264623182231, "grad_norm": 0.09783437103033066, "learning_rate": 8.370562338274745e-05, "loss": 1.6649, "step": 84288 }, { "epoch": 0.16638960827961952, "grad_norm": 0.10741367936134338, "learning_rate": 8.369928315838277e-05, "loss": 1.6542, "step": 84320 }, { "epoch": 0.16645275424101597, "grad_norm": 0.10241501778364182, "learning_rate": 8.369294293401808e-05, "loss": 1.6619, "step": 84352 }, { "epoch": 0.16651590020241241, "grad_norm": 0.10578735917806625, "learning_rate": 8.36866027096534e-05, "loss": 1.6591, "step": 84384 }, { "epoch": 0.16657904616380884, "grad_norm": 0.09896484762430191, "learning_rate": 8.368026248528871e-05, "loss": 1.646, "step": 84416 }, { "epoch": 0.16664219212520529, "grad_norm": 0.10287652909755707, "learning_rate": 8.367392226092401e-05, "loss": 1.6564, "step": 84448 }, { "epoch": 0.16670533808660173, "grad_norm": 0.10269158333539963, "learning_rate": 8.366758203655933e-05, "loss": 1.6732, "step": 84480 }, { "epoch": 0.16676848404799816, "grad_norm": 0.1048140749335289, "learning_rate": 8.366124181219464e-05, "loss": 1.6784, "step": 84512 }, { "epoch": 0.1668316300093946, "grad_norm": 0.09970742464065552, "learning_rate": 8.365490158782994e-05, "loss": 1.6595, "step": 84544 }, { "epoch": 0.16689477597079105, "grad_norm": 0.10886450111865997, "learning_rate": 8.364856136346524e-05, "loss": 1.653, "step": 84576 }, { "epoch": 0.16695792193218748, "grad_norm": 0.09977026283740997, "learning_rate": 8.364222113910056e-05, "loss": 1.6516, "step": 84608 }, { "epoch": 0.16702106789358392, "grad_norm": 0.09926028549671173, "learning_rate": 8.363588091473587e-05, "loss": 1.6632, "step": 84640 }, { "epoch": 0.16708421385498037, "grad_norm": 0.1021278128027916, "learning_rate": 8.362954069037119e-05, "loss": 1.6649, "step": 84672 }, { "epoch": 0.1671473598163768, "grad_norm": 0.1026868224143982, "learning_rate": 8.362320046600649e-05, "loss": 1.6732, "step": 84704 }, { "epoch": 0.16721050577777324, "grad_norm": 0.10374830663204193, "learning_rate": 8.36168602416418e-05, "loss": 1.6543, "step": 84736 }, { "epoch": 0.1672736517391697, "grad_norm": 0.10258857160806656, "learning_rate": 8.361052001727712e-05, "loss": 1.6708, "step": 84768 }, { "epoch": 0.16733679770056611, "grad_norm": 0.0977817177772522, "learning_rate": 8.360417979291243e-05, "loss": 1.6613, "step": 84800 }, { "epoch": 0.16739994366196256, "grad_norm": 0.10027941316366196, "learning_rate": 8.359783956854773e-05, "loss": 1.6462, "step": 84832 }, { "epoch": 0.167463089623359, "grad_norm": 0.10649436712265015, "learning_rate": 8.359149934418305e-05, "loss": 1.6552, "step": 84864 }, { "epoch": 0.16752623558475543, "grad_norm": 0.09952892363071442, "learning_rate": 8.358515911981836e-05, "loss": 1.6554, "step": 84896 }, { "epoch": 0.16758938154615188, "grad_norm": 0.1102275550365448, "learning_rate": 8.357881889545368e-05, "loss": 1.6701, "step": 84928 }, { "epoch": 0.16765252750754833, "grad_norm": 0.10210400074720383, "learning_rate": 8.357247867108898e-05, "loss": 1.6791, "step": 84960 }, { "epoch": 0.16771567346894475, "grad_norm": 0.09926417469978333, "learning_rate": 8.356613844672428e-05, "loss": 1.6625, "step": 84992 }, { "epoch": 0.1677788194303412, "grad_norm": 0.09948869794607162, "learning_rate": 8.355979822235959e-05, "loss": 1.6543, "step": 85024 }, { "epoch": 0.16784196539173765, "grad_norm": 0.10216159373521805, "learning_rate": 8.355345799799491e-05, "loss": 1.6558, "step": 85056 }, { "epoch": 0.16790511135313407, "grad_norm": 0.10084868222475052, "learning_rate": 8.354711777363022e-05, "loss": 1.6678, "step": 85088 }, { "epoch": 0.16796825731453052, "grad_norm": 0.10927193611860275, "learning_rate": 8.354077754926552e-05, "loss": 1.6493, "step": 85120 }, { "epoch": 0.16803140327592697, "grad_norm": 0.10268424451351166, "learning_rate": 8.353443732490084e-05, "loss": 1.6592, "step": 85152 }, { "epoch": 0.16809454923732342, "grad_norm": 0.09640819579362869, "learning_rate": 8.352809710053615e-05, "loss": 1.6541, "step": 85184 }, { "epoch": 0.16815769519871984, "grad_norm": 0.09388112276792526, "learning_rate": 8.352175687617147e-05, "loss": 1.6603, "step": 85216 }, { "epoch": 0.1682208411601163, "grad_norm": 0.09656675904989243, "learning_rate": 8.351541665180677e-05, "loss": 1.6556, "step": 85248 }, { "epoch": 0.16828398712151274, "grad_norm": 0.10099435597658157, "learning_rate": 8.350907642744208e-05, "loss": 1.6662, "step": 85280 }, { "epoch": 0.16834713308290916, "grad_norm": 0.10132823139429092, "learning_rate": 8.35027362030774e-05, "loss": 1.6738, "step": 85312 }, { "epoch": 0.1684102790443056, "grad_norm": 0.10148778557777405, "learning_rate": 8.34963959787127e-05, "loss": 1.6613, "step": 85344 }, { "epoch": 0.16847342500570206, "grad_norm": 0.10271482914686203, "learning_rate": 8.349005575434801e-05, "loss": 1.6661, "step": 85376 }, { "epoch": 0.16853657096709848, "grad_norm": 0.10997402667999268, "learning_rate": 8.348371552998331e-05, "loss": 1.6614, "step": 85408 }, { "epoch": 0.16859971692849493, "grad_norm": 0.0976165235042572, "learning_rate": 8.347737530561863e-05, "loss": 1.6626, "step": 85440 }, { "epoch": 0.16866286288989138, "grad_norm": 0.10330215841531754, "learning_rate": 8.347103508125394e-05, "loss": 1.6656, "step": 85472 }, { "epoch": 0.1687260088512878, "grad_norm": 0.10073370486497879, "learning_rate": 8.346469485688924e-05, "loss": 1.6602, "step": 85504 }, { "epoch": 0.16878915481268425, "grad_norm": 0.10443852841854095, "learning_rate": 8.345835463252456e-05, "loss": 1.6633, "step": 85536 }, { "epoch": 0.1688523007740807, "grad_norm": 0.10332836210727692, "learning_rate": 8.345201440815987e-05, "loss": 1.6622, "step": 85568 }, { "epoch": 0.16891544673547712, "grad_norm": 0.09791575372219086, "learning_rate": 8.344567418379519e-05, "loss": 1.6719, "step": 85600 }, { "epoch": 0.16897859269687357, "grad_norm": 0.09709091484546661, "learning_rate": 8.343933395943049e-05, "loss": 1.6531, "step": 85632 }, { "epoch": 0.16904173865827002, "grad_norm": 0.10924059897661209, "learning_rate": 8.34329937350658e-05, "loss": 1.6542, "step": 85664 }, { "epoch": 0.16910488461966644, "grad_norm": 0.10236849635839462, "learning_rate": 8.342665351070112e-05, "loss": 1.6637, "step": 85696 }, { "epoch": 0.1691680305810629, "grad_norm": 0.10873173177242279, "learning_rate": 8.342031328633643e-05, "loss": 1.6609, "step": 85728 }, { "epoch": 0.16923117654245934, "grad_norm": 0.10035929828882217, "learning_rate": 8.341397306197173e-05, "loss": 1.6609, "step": 85760 }, { "epoch": 0.16929432250385576, "grad_norm": 0.10346246510744095, "learning_rate": 8.340763283760705e-05, "loss": 1.6717, "step": 85792 }, { "epoch": 0.1693574684652522, "grad_norm": 0.10286826640367508, "learning_rate": 8.340129261324235e-05, "loss": 1.6653, "step": 85824 }, { "epoch": 0.16942061442664866, "grad_norm": 0.10834146291017532, "learning_rate": 8.339495238887766e-05, "loss": 1.6583, "step": 85856 }, { "epoch": 0.16948376038804508, "grad_norm": 0.10519353300333023, "learning_rate": 8.338861216451298e-05, "loss": 1.6515, "step": 85888 }, { "epoch": 0.16954690634944153, "grad_norm": 0.09746549278497696, "learning_rate": 8.338227194014828e-05, "loss": 1.6484, "step": 85920 }, { "epoch": 0.16961005231083798, "grad_norm": 0.10242948681116104, "learning_rate": 8.33759317157836e-05, "loss": 1.6576, "step": 85952 }, { "epoch": 0.1696731982722344, "grad_norm": 0.10463360697031021, "learning_rate": 8.336959149141891e-05, "loss": 1.6571, "step": 85984 }, { "epoch": 0.16973634423363085, "grad_norm": 0.09747793525457382, "learning_rate": 8.336325126705422e-05, "loss": 1.6421, "step": 86016 }, { "epoch": 0.1697994901950273, "grad_norm": 0.09792450070381165, "learning_rate": 8.335691104268952e-05, "loss": 1.6697, "step": 86048 }, { "epoch": 0.16986263615642372, "grad_norm": 0.10239899158477783, "learning_rate": 8.335057081832484e-05, "loss": 1.6641, "step": 86080 }, { "epoch": 0.16992578211782017, "grad_norm": 0.10340501368045807, "learning_rate": 8.334423059396015e-05, "loss": 1.6655, "step": 86112 }, { "epoch": 0.16998892807921662, "grad_norm": 0.10833476483821869, "learning_rate": 8.333789036959547e-05, "loss": 1.6686, "step": 86144 }, { "epoch": 0.17005207404061304, "grad_norm": 0.1196788027882576, "learning_rate": 8.333155014523077e-05, "loss": 1.6523, "step": 86176 }, { "epoch": 0.1701152200020095, "grad_norm": 0.1082020178437233, "learning_rate": 8.332520992086608e-05, "loss": 1.658, "step": 86208 }, { "epoch": 0.17017836596340594, "grad_norm": 0.10265053063631058, "learning_rate": 8.331886969650138e-05, "loss": 1.665, "step": 86240 }, { "epoch": 0.17024151192480236, "grad_norm": 0.10248216241598129, "learning_rate": 8.33125294721367e-05, "loss": 1.6588, "step": 86272 }, { "epoch": 0.1703046578861988, "grad_norm": 0.10473192483186722, "learning_rate": 8.3306189247772e-05, "loss": 1.6745, "step": 86304 }, { "epoch": 0.17036780384759526, "grad_norm": 0.10582578927278519, "learning_rate": 8.329984902340731e-05, "loss": 1.6676, "step": 86336 }, { "epoch": 0.17043094980899168, "grad_norm": 0.100953109562397, "learning_rate": 8.329350879904263e-05, "loss": 1.6838, "step": 86368 }, { "epoch": 0.17049409577038813, "grad_norm": 0.09963128715753555, "learning_rate": 8.328716857467794e-05, "loss": 1.6503, "step": 86400 }, { "epoch": 0.17055724173178458, "grad_norm": 0.10131825506687164, "learning_rate": 8.328082835031326e-05, "loss": 1.6704, "step": 86432 }, { "epoch": 0.170620387693181, "grad_norm": 0.10538109391927719, "learning_rate": 8.327448812594856e-05, "loss": 1.6662, "step": 86464 }, { "epoch": 0.17068353365457745, "grad_norm": 0.09289366006851196, "learning_rate": 8.326814790158387e-05, "loss": 1.6533, "step": 86496 }, { "epoch": 0.1707466796159739, "grad_norm": 0.09712038189172745, "learning_rate": 8.326180767721919e-05, "loss": 1.6626, "step": 86528 }, { "epoch": 0.17080982557737032, "grad_norm": 0.10773643851280212, "learning_rate": 8.32554674528545e-05, "loss": 1.6691, "step": 86560 }, { "epoch": 0.17087297153876677, "grad_norm": 0.11154456436634064, "learning_rate": 8.32491272284898e-05, "loss": 1.6543, "step": 86592 }, { "epoch": 0.17093611750016321, "grad_norm": 0.10421478003263474, "learning_rate": 8.324278700412512e-05, "loss": 1.6655, "step": 86624 }, { "epoch": 0.17099926346155964, "grad_norm": 0.09560660272836685, "learning_rate": 8.323644677976042e-05, "loss": 1.6557, "step": 86656 }, { "epoch": 0.17106240942295609, "grad_norm": 0.09936953336000443, "learning_rate": 8.323010655539573e-05, "loss": 1.6489, "step": 86688 }, { "epoch": 0.17112555538435253, "grad_norm": 0.11029977351427078, "learning_rate": 8.322376633103103e-05, "loss": 1.6391, "step": 86720 }, { "epoch": 0.17118870134574896, "grad_norm": 0.09954982995986938, "learning_rate": 8.321742610666635e-05, "loss": 1.6586, "step": 86752 }, { "epoch": 0.1712518473071454, "grad_norm": 0.10885919630527496, "learning_rate": 8.321108588230166e-05, "loss": 1.6642, "step": 86784 }, { "epoch": 0.17131499326854185, "grad_norm": 0.10321956872940063, "learning_rate": 8.320474565793698e-05, "loss": 1.6514, "step": 86816 }, { "epoch": 0.1713781392299383, "grad_norm": 0.09847011417150497, "learning_rate": 8.319840543357228e-05, "loss": 1.6589, "step": 86848 }, { "epoch": 0.17144128519133472, "grad_norm": 0.1070474162697792, "learning_rate": 8.31920652092076e-05, "loss": 1.6661, "step": 86880 }, { "epoch": 0.17150443115273117, "grad_norm": 0.10999678820371628, "learning_rate": 8.318572498484291e-05, "loss": 1.6527, "step": 86912 }, { "epoch": 0.17156757711412762, "grad_norm": 0.09914568066596985, "learning_rate": 8.317938476047822e-05, "loss": 1.6672, "step": 86944 }, { "epoch": 0.17163072307552404, "grad_norm": 0.10314370691776276, "learning_rate": 8.317304453611352e-05, "loss": 1.6613, "step": 86976 }, { "epoch": 0.1716938690369205, "grad_norm": 0.10294325649738312, "learning_rate": 8.316670431174884e-05, "loss": 1.6444, "step": 87008 }, { "epoch": 0.17175701499831694, "grad_norm": 0.10406816005706787, "learning_rate": 8.316036408738414e-05, "loss": 1.6573, "step": 87040 }, { "epoch": 0.17182016095971336, "grad_norm": 0.10395383089780807, "learning_rate": 8.315402386301945e-05, "loss": 1.6482, "step": 87072 }, { "epoch": 0.1718833069211098, "grad_norm": 0.10011985152959824, "learning_rate": 8.314768363865477e-05, "loss": 1.6689, "step": 87104 }, { "epoch": 0.17194645288250626, "grad_norm": 0.11181795597076416, "learning_rate": 8.314134341429007e-05, "loss": 1.6703, "step": 87136 }, { "epoch": 0.17200959884390268, "grad_norm": 0.09706149995326996, "learning_rate": 8.313500318992538e-05, "loss": 1.6728, "step": 87168 }, { "epoch": 0.17207274480529913, "grad_norm": 0.10702026635408401, "learning_rate": 8.31286629655607e-05, "loss": 1.6594, "step": 87200 }, { "epoch": 0.17213589076669558, "grad_norm": 0.10278090834617615, "learning_rate": 8.312232274119601e-05, "loss": 1.6631, "step": 87232 }, { "epoch": 0.172199036728092, "grad_norm": 0.10035611689090729, "learning_rate": 8.311598251683131e-05, "loss": 1.6727, "step": 87264 }, { "epoch": 0.17226218268948845, "grad_norm": 0.1062588095664978, "learning_rate": 8.310964229246663e-05, "loss": 1.6535, "step": 87296 }, { "epoch": 0.1723253286508849, "grad_norm": 0.1101216971874237, "learning_rate": 8.310330206810194e-05, "loss": 1.6557, "step": 87328 }, { "epoch": 0.17238847461228132, "grad_norm": 0.11037938296794891, "learning_rate": 8.309696184373726e-05, "loss": 1.6762, "step": 87360 }, { "epoch": 0.17245162057367777, "grad_norm": 0.0970001369714737, "learning_rate": 8.309062161937256e-05, "loss": 1.6585, "step": 87392 }, { "epoch": 0.17251476653507422, "grad_norm": 0.10286374390125275, "learning_rate": 8.308428139500787e-05, "loss": 1.6728, "step": 87424 }, { "epoch": 0.17257791249647064, "grad_norm": 0.09633680433034897, "learning_rate": 8.307794117064317e-05, "loss": 1.6608, "step": 87456 }, { "epoch": 0.1726410584578671, "grad_norm": 0.10094623267650604, "learning_rate": 8.307160094627849e-05, "loss": 1.6587, "step": 87488 }, { "epoch": 0.17270420441926354, "grad_norm": 0.09444501996040344, "learning_rate": 8.306526072191379e-05, "loss": 1.6594, "step": 87520 }, { "epoch": 0.17276735038065996, "grad_norm": 0.10603666305541992, "learning_rate": 8.30589204975491e-05, "loss": 1.6687, "step": 87552 }, { "epoch": 0.1728304963420564, "grad_norm": 0.10152776539325714, "learning_rate": 8.305258027318442e-05, "loss": 1.666, "step": 87584 }, { "epoch": 0.17289364230345286, "grad_norm": 0.1043393611907959, "learning_rate": 8.304624004881973e-05, "loss": 1.6549, "step": 87616 }, { "epoch": 0.17295678826484928, "grad_norm": 0.10143306851387024, "learning_rate": 8.303989982445503e-05, "loss": 1.6486, "step": 87648 }, { "epoch": 0.17301993422624573, "grad_norm": 0.09123295545578003, "learning_rate": 8.303355960009035e-05, "loss": 1.664, "step": 87680 }, { "epoch": 0.17308308018764218, "grad_norm": 0.10808151215314865, "learning_rate": 8.302721937572566e-05, "loss": 1.6567, "step": 87712 }, { "epoch": 0.1731462261490386, "grad_norm": 0.10542468726634979, "learning_rate": 8.302087915136098e-05, "loss": 1.6639, "step": 87744 }, { "epoch": 0.17320937211043505, "grad_norm": 0.10240886360406876, "learning_rate": 8.301453892699629e-05, "loss": 1.6557, "step": 87776 }, { "epoch": 0.1732725180718315, "grad_norm": 0.11001458019018173, "learning_rate": 8.30081987026316e-05, "loss": 1.6639, "step": 87808 }, { "epoch": 0.17333566403322792, "grad_norm": 0.09899230301380157, "learning_rate": 8.300185847826691e-05, "loss": 1.6707, "step": 87840 }, { "epoch": 0.17339880999462437, "grad_norm": 0.09759450703859329, "learning_rate": 8.299551825390221e-05, "loss": 1.651, "step": 87872 }, { "epoch": 0.17346195595602082, "grad_norm": 0.11124710738658905, "learning_rate": 8.298917802953752e-05, "loss": 1.6525, "step": 87904 }, { "epoch": 0.17352510191741724, "grad_norm": 0.10308229178190231, "learning_rate": 8.298283780517283e-05, "loss": 1.6621, "step": 87936 }, { "epoch": 0.1735882478788137, "grad_norm": 0.10963082313537598, "learning_rate": 8.297649758080814e-05, "loss": 1.6491, "step": 87968 }, { "epoch": 0.17365139384021014, "grad_norm": 0.0992642194032669, "learning_rate": 8.297015735644345e-05, "loss": 1.6526, "step": 88000 }, { "epoch": 0.17371453980160656, "grad_norm": 0.10322271287441254, "learning_rate": 8.296381713207877e-05, "loss": 1.6607, "step": 88032 }, { "epoch": 0.173777685763003, "grad_norm": 0.10885316878557205, "learning_rate": 8.295747690771407e-05, "loss": 1.6594, "step": 88064 }, { "epoch": 0.17384083172439946, "grad_norm": 0.10019826889038086, "learning_rate": 8.295113668334938e-05, "loss": 1.6461, "step": 88096 }, { "epoch": 0.17390397768579588, "grad_norm": 0.10750409960746765, "learning_rate": 8.29447964589847e-05, "loss": 1.6511, "step": 88128 }, { "epoch": 0.17396712364719233, "grad_norm": 0.11462727934122086, "learning_rate": 8.293845623462001e-05, "loss": 1.6696, "step": 88160 }, { "epoch": 0.17403026960858878, "grad_norm": 0.108080193400383, "learning_rate": 8.293211601025531e-05, "loss": 1.6668, "step": 88192 }, { "epoch": 0.1740934155699852, "grad_norm": 0.10029991716146469, "learning_rate": 8.292577578589063e-05, "loss": 1.655, "step": 88224 }, { "epoch": 0.17415656153138165, "grad_norm": 0.109747976064682, "learning_rate": 8.291943556152594e-05, "loss": 1.6587, "step": 88256 }, { "epoch": 0.1742197074927781, "grad_norm": 0.1071368008852005, "learning_rate": 8.291309533716124e-05, "loss": 1.6532, "step": 88288 }, { "epoch": 0.17428285345417452, "grad_norm": 0.09972008317708969, "learning_rate": 8.290675511279656e-05, "loss": 1.6526, "step": 88320 }, { "epoch": 0.17434599941557097, "grad_norm": 0.09849822521209717, "learning_rate": 8.290041488843186e-05, "loss": 1.655, "step": 88352 }, { "epoch": 0.17440914537696742, "grad_norm": 0.10606779903173447, "learning_rate": 8.289407466406717e-05, "loss": 1.653, "step": 88384 }, { "epoch": 0.17447229133836384, "grad_norm": 0.10402847826480865, "learning_rate": 8.288773443970249e-05, "loss": 1.6592, "step": 88416 }, { "epoch": 0.1745354372997603, "grad_norm": 0.10727513581514359, "learning_rate": 8.28813942153378e-05, "loss": 1.6612, "step": 88448 }, { "epoch": 0.17459858326115674, "grad_norm": 0.10152298957109451, "learning_rate": 8.28750539909731e-05, "loss": 1.6444, "step": 88480 }, { "epoch": 0.17466172922255319, "grad_norm": 0.10337173193693161, "learning_rate": 8.286871376660842e-05, "loss": 1.6538, "step": 88512 }, { "epoch": 0.1747248751839496, "grad_norm": 0.10739100724458694, "learning_rate": 8.286237354224373e-05, "loss": 1.6705, "step": 88544 }, { "epoch": 0.17478802114534606, "grad_norm": 0.10928995162248611, "learning_rate": 8.285603331787905e-05, "loss": 1.6643, "step": 88576 }, { "epoch": 0.1748511671067425, "grad_norm": 0.09856017678976059, "learning_rate": 8.284969309351435e-05, "loss": 1.6558, "step": 88608 }, { "epoch": 0.17491431306813893, "grad_norm": 0.1065458208322525, "learning_rate": 8.284335286914966e-05, "loss": 1.6535, "step": 88640 }, { "epoch": 0.17497745902953538, "grad_norm": 0.10433677583932877, "learning_rate": 8.283701264478498e-05, "loss": 1.6422, "step": 88672 }, { "epoch": 0.17504060499093183, "grad_norm": 0.11038319021463394, "learning_rate": 8.283067242042028e-05, "loss": 1.6613, "step": 88704 }, { "epoch": 0.17510375095232825, "grad_norm": 0.10992168635129929, "learning_rate": 8.282433219605558e-05, "loss": 1.6723, "step": 88736 }, { "epoch": 0.1751668969137247, "grad_norm": 0.10349271446466446, "learning_rate": 8.28179919716909e-05, "loss": 1.6745, "step": 88768 }, { "epoch": 0.17523004287512114, "grad_norm": 0.10459519922733307, "learning_rate": 8.281165174732621e-05, "loss": 1.6475, "step": 88800 }, { "epoch": 0.17529318883651757, "grad_norm": 0.10314363241195679, "learning_rate": 8.280531152296152e-05, "loss": 1.6568, "step": 88832 }, { "epoch": 0.17535633479791402, "grad_norm": 0.10256712138652802, "learning_rate": 8.279897129859683e-05, "loss": 1.6794, "step": 88864 }, { "epoch": 0.17541948075931046, "grad_norm": 0.09958847612142563, "learning_rate": 8.279263107423214e-05, "loss": 1.6492, "step": 88896 }, { "epoch": 0.17548262672070689, "grad_norm": 0.0999009907245636, "learning_rate": 8.278629084986745e-05, "loss": 1.6571, "step": 88928 }, { "epoch": 0.17554577268210333, "grad_norm": 0.09854394942522049, "learning_rate": 8.277995062550277e-05, "loss": 1.6457, "step": 88960 }, { "epoch": 0.17560891864349978, "grad_norm": 0.10845145583152771, "learning_rate": 8.277361040113808e-05, "loss": 1.6399, "step": 88992 }, { "epoch": 0.1756720646048962, "grad_norm": 0.1146244928240776, "learning_rate": 8.276727017677338e-05, "loss": 1.6602, "step": 89024 }, { "epoch": 0.17573521056629265, "grad_norm": 0.11173972487449646, "learning_rate": 8.27609299524087e-05, "loss": 1.654, "step": 89056 }, { "epoch": 0.1757983565276891, "grad_norm": 0.10688459128141403, "learning_rate": 8.275458972804401e-05, "loss": 1.6638, "step": 89088 }, { "epoch": 0.17586150248908552, "grad_norm": 0.09491217881441116, "learning_rate": 8.274824950367932e-05, "loss": 1.6491, "step": 89120 }, { "epoch": 0.17592464845048197, "grad_norm": 0.10553118586540222, "learning_rate": 8.274190927931462e-05, "loss": 1.6748, "step": 89152 }, { "epoch": 0.17598779441187842, "grad_norm": 0.10010967403650284, "learning_rate": 8.273556905494993e-05, "loss": 1.6575, "step": 89184 }, { "epoch": 0.17605094037327484, "grad_norm": 0.10946615785360336, "learning_rate": 8.272922883058525e-05, "loss": 1.6682, "step": 89216 }, { "epoch": 0.1761140863346713, "grad_norm": 0.09834275394678116, "learning_rate": 8.272288860622056e-05, "loss": 1.6632, "step": 89248 }, { "epoch": 0.17617723229606774, "grad_norm": 0.09829218685626984, "learning_rate": 8.271654838185586e-05, "loss": 1.6543, "step": 89280 }, { "epoch": 0.17624037825746416, "grad_norm": 0.10909516364336014, "learning_rate": 8.271020815749118e-05, "loss": 1.6566, "step": 89312 }, { "epoch": 0.1763035242188606, "grad_norm": 0.09997043758630753, "learning_rate": 8.270386793312649e-05, "loss": 1.6619, "step": 89344 }, { "epoch": 0.17636667018025706, "grad_norm": 0.09957852214574814, "learning_rate": 8.26975277087618e-05, "loss": 1.6583, "step": 89376 }, { "epoch": 0.17642981614165348, "grad_norm": 0.10366418957710266, "learning_rate": 8.26911874843971e-05, "loss": 1.6503, "step": 89408 }, { "epoch": 0.17649296210304993, "grad_norm": 0.10896145552396774, "learning_rate": 8.268484726003242e-05, "loss": 1.6603, "step": 89440 }, { "epoch": 0.17655610806444638, "grad_norm": 0.1021653488278389, "learning_rate": 8.267850703566773e-05, "loss": 1.671, "step": 89472 }, { "epoch": 0.1766192540258428, "grad_norm": 0.0990075170993805, "learning_rate": 8.267216681130304e-05, "loss": 1.6483, "step": 89504 }, { "epoch": 0.17668239998723925, "grad_norm": 0.09864112734794617, "learning_rate": 8.266582658693835e-05, "loss": 1.6608, "step": 89536 }, { "epoch": 0.1767455459486357, "grad_norm": 0.10186667740345001, "learning_rate": 8.265948636257365e-05, "loss": 1.6636, "step": 89568 }, { "epoch": 0.17680869191003212, "grad_norm": 0.10535477846860886, "learning_rate": 8.265314613820897e-05, "loss": 1.6502, "step": 89600 }, { "epoch": 0.17687183787142857, "grad_norm": 0.10260170698165894, "learning_rate": 8.264680591384428e-05, "loss": 1.6501, "step": 89632 }, { "epoch": 0.17693498383282502, "grad_norm": 0.10581181943416595, "learning_rate": 8.26404656894796e-05, "loss": 1.6527, "step": 89664 }, { "epoch": 0.17699812979422144, "grad_norm": 0.10850777477025986, "learning_rate": 8.26341254651149e-05, "loss": 1.64, "step": 89696 }, { "epoch": 0.1770612757556179, "grad_norm": 0.11619777232408524, "learning_rate": 8.262778524075021e-05, "loss": 1.6551, "step": 89728 }, { "epoch": 0.17712442171701434, "grad_norm": 0.09978790581226349, "learning_rate": 8.262144501638552e-05, "loss": 1.6633, "step": 89760 }, { "epoch": 0.17718756767841076, "grad_norm": 0.10603735595941544, "learning_rate": 8.261510479202084e-05, "loss": 1.6648, "step": 89792 }, { "epoch": 0.1772507136398072, "grad_norm": 0.10047462582588196, "learning_rate": 8.260876456765614e-05, "loss": 1.6599, "step": 89824 }, { "epoch": 0.17731385960120366, "grad_norm": 0.0955331027507782, "learning_rate": 8.260242434329146e-05, "loss": 1.6526, "step": 89856 }, { "epoch": 0.17737700556260008, "grad_norm": 0.10489872097969055, "learning_rate": 8.259608411892677e-05, "loss": 1.6587, "step": 89888 }, { "epoch": 0.17744015152399653, "grad_norm": 0.09369321912527084, "learning_rate": 8.258974389456207e-05, "loss": 1.6498, "step": 89920 }, { "epoch": 0.17750329748539298, "grad_norm": 0.10730299353599548, "learning_rate": 8.258340367019739e-05, "loss": 1.6617, "step": 89952 }, { "epoch": 0.1775664434467894, "grad_norm": 0.0987553671002388, "learning_rate": 8.257706344583269e-05, "loss": 1.6505, "step": 89984 }, { "epoch": 0.17762958940818585, "grad_norm": 0.09921061247587204, "learning_rate": 8.2570723221468e-05, "loss": 1.6546, "step": 90016 }, { "epoch": 0.1776927353695823, "grad_norm": 0.10513906925916672, "learning_rate": 8.256438299710332e-05, "loss": 1.672, "step": 90048 }, { "epoch": 0.17775588133097872, "grad_norm": 0.09985551983118057, "learning_rate": 8.255804277273862e-05, "loss": 1.6495, "step": 90080 }, { "epoch": 0.17781902729237517, "grad_norm": 0.10031275451183319, "learning_rate": 8.255170254837393e-05, "loss": 1.6528, "step": 90112 }, { "epoch": 0.17788217325377162, "grad_norm": 0.10402368754148483, "learning_rate": 8.254536232400925e-05, "loss": 1.6631, "step": 90144 }, { "epoch": 0.17794531921516804, "grad_norm": 0.10302762687206268, "learning_rate": 8.253902209964456e-05, "loss": 1.6448, "step": 90176 }, { "epoch": 0.1780084651765645, "grad_norm": 0.09971826523542404, "learning_rate": 8.253268187527986e-05, "loss": 1.6439, "step": 90208 }, { "epoch": 0.17807161113796094, "grad_norm": 0.10372737050056458, "learning_rate": 8.252634165091518e-05, "loss": 1.654, "step": 90240 }, { "epoch": 0.1781347570993574, "grad_norm": 0.10416607558727264, "learning_rate": 8.252000142655049e-05, "loss": 1.6666, "step": 90272 }, { "epoch": 0.1781979030607538, "grad_norm": 0.10966816544532776, "learning_rate": 8.25136612021858e-05, "loss": 1.6614, "step": 90304 }, { "epoch": 0.17826104902215026, "grad_norm": 0.10601796954870224, "learning_rate": 8.25073209778211e-05, "loss": 1.6616, "step": 90336 }, { "epoch": 0.1783241949835467, "grad_norm": 0.10499513894319534, "learning_rate": 8.250098075345642e-05, "loss": 1.647, "step": 90368 }, { "epoch": 0.17838734094494313, "grad_norm": 0.10670746117830276, "learning_rate": 8.249464052909172e-05, "loss": 1.6515, "step": 90400 }, { "epoch": 0.17845048690633958, "grad_norm": 0.10835777968168259, "learning_rate": 8.248830030472704e-05, "loss": 1.6452, "step": 90432 }, { "epoch": 0.17851363286773603, "grad_norm": 0.10320121049880981, "learning_rate": 8.248196008036235e-05, "loss": 1.6503, "step": 90464 }, { "epoch": 0.17857677882913245, "grad_norm": 0.09848032146692276, "learning_rate": 8.247561985599765e-05, "loss": 1.6581, "step": 90496 }, { "epoch": 0.1786399247905289, "grad_norm": 0.10776699334383011, "learning_rate": 8.246927963163297e-05, "loss": 1.6595, "step": 90528 }, { "epoch": 0.17870307075192535, "grad_norm": 0.10353954136371613, "learning_rate": 8.246293940726828e-05, "loss": 1.6547, "step": 90560 }, { "epoch": 0.17876621671332177, "grad_norm": 0.10759712755680084, "learning_rate": 8.24565991829036e-05, "loss": 1.6559, "step": 90592 }, { "epoch": 0.17882936267471822, "grad_norm": 0.09907542914152145, "learning_rate": 8.24502589585389e-05, "loss": 1.6543, "step": 90624 }, { "epoch": 0.17889250863611467, "grad_norm": 0.11011697351932526, "learning_rate": 8.244391873417421e-05, "loss": 1.6623, "step": 90656 }, { "epoch": 0.1789556545975111, "grad_norm": 0.09947836399078369, "learning_rate": 8.243757850980953e-05, "loss": 1.6707, "step": 90688 }, { "epoch": 0.17901880055890754, "grad_norm": 0.10960274189710617, "learning_rate": 8.243123828544484e-05, "loss": 1.6604, "step": 90720 }, { "epoch": 0.179081946520304, "grad_norm": 0.10730334371328354, "learning_rate": 8.242489806108014e-05, "loss": 1.6465, "step": 90752 }, { "epoch": 0.1791450924817004, "grad_norm": 0.0949479416012764, "learning_rate": 8.241855783671546e-05, "loss": 1.6613, "step": 90784 }, { "epoch": 0.17920823844309686, "grad_norm": 0.09434281289577484, "learning_rate": 8.241221761235076e-05, "loss": 1.6629, "step": 90816 }, { "epoch": 0.1792713844044933, "grad_norm": 0.10668272525072098, "learning_rate": 8.240587738798607e-05, "loss": 1.6484, "step": 90848 }, { "epoch": 0.17933453036588973, "grad_norm": 0.104609914124012, "learning_rate": 8.239953716362137e-05, "loss": 1.6483, "step": 90880 }, { "epoch": 0.17939767632728618, "grad_norm": 0.10034120082855225, "learning_rate": 8.239319693925669e-05, "loss": 1.6598, "step": 90912 }, { "epoch": 0.17946082228868263, "grad_norm": 0.10214868187904358, "learning_rate": 8.2386856714892e-05, "loss": 1.6564, "step": 90944 }, { "epoch": 0.17952396825007905, "grad_norm": 0.10459335148334503, "learning_rate": 8.238051649052732e-05, "loss": 1.6404, "step": 90976 }, { "epoch": 0.1795871142114755, "grad_norm": 0.09940114617347717, "learning_rate": 8.237417626616263e-05, "loss": 1.6498, "step": 91008 }, { "epoch": 0.17965026017287195, "grad_norm": 0.10567575693130493, "learning_rate": 8.236783604179793e-05, "loss": 1.6629, "step": 91040 }, { "epoch": 0.17971340613426837, "grad_norm": 0.10702252388000488, "learning_rate": 8.236149581743325e-05, "loss": 1.6512, "step": 91072 }, { "epoch": 0.17977655209566482, "grad_norm": 0.10372652113437653, "learning_rate": 8.235515559306856e-05, "loss": 1.6443, "step": 91104 }, { "epoch": 0.17983969805706126, "grad_norm": 0.10476701706647873, "learning_rate": 8.234881536870387e-05, "loss": 1.655, "step": 91136 }, { "epoch": 0.17990284401845769, "grad_norm": 0.10643497109413147, "learning_rate": 8.234247514433918e-05, "loss": 1.6516, "step": 91168 }, { "epoch": 0.17996598997985414, "grad_norm": 0.10193724185228348, "learning_rate": 8.233613491997448e-05, "loss": 1.6629, "step": 91200 }, { "epoch": 0.18002913594125058, "grad_norm": 0.10508453100919724, "learning_rate": 8.232979469560979e-05, "loss": 1.6526, "step": 91232 }, { "epoch": 0.180092281902647, "grad_norm": 0.09920556098222733, "learning_rate": 8.23234544712451e-05, "loss": 1.6447, "step": 91264 }, { "epoch": 0.18015542786404345, "grad_norm": 0.10268726199865341, "learning_rate": 8.231711424688041e-05, "loss": 1.651, "step": 91296 }, { "epoch": 0.1802185738254399, "grad_norm": 0.10527651011943817, "learning_rate": 8.231077402251572e-05, "loss": 1.6539, "step": 91328 }, { "epoch": 0.18028171978683633, "grad_norm": 0.10914020985364914, "learning_rate": 8.230443379815104e-05, "loss": 1.6589, "step": 91360 }, { "epoch": 0.18034486574823277, "grad_norm": 0.10070778429508209, "learning_rate": 8.229809357378635e-05, "loss": 1.6443, "step": 91392 }, { "epoch": 0.18040801170962922, "grad_norm": 0.09839003533124924, "learning_rate": 8.229175334942165e-05, "loss": 1.6563, "step": 91424 }, { "epoch": 0.18047115767102564, "grad_norm": 0.10623380541801453, "learning_rate": 8.228541312505697e-05, "loss": 1.6433, "step": 91456 }, { "epoch": 0.1805343036324221, "grad_norm": 0.1055963784456253, "learning_rate": 8.227907290069228e-05, "loss": 1.649, "step": 91488 }, { "epoch": 0.18059744959381854, "grad_norm": 0.10425406694412231, "learning_rate": 8.22727326763276e-05, "loss": 1.6502, "step": 91520 }, { "epoch": 0.18066059555521496, "grad_norm": 0.10616173595190048, "learning_rate": 8.22663924519629e-05, "loss": 1.6495, "step": 91552 }, { "epoch": 0.1807237415166114, "grad_norm": 0.09970536828041077, "learning_rate": 8.226005222759821e-05, "loss": 1.6535, "step": 91584 }, { "epoch": 0.18078688747800786, "grad_norm": 0.10234661400318146, "learning_rate": 8.225371200323351e-05, "loss": 1.6435, "step": 91616 }, { "epoch": 0.18085003343940428, "grad_norm": 0.10297203809022903, "learning_rate": 8.224737177886883e-05, "loss": 1.6538, "step": 91648 }, { "epoch": 0.18091317940080073, "grad_norm": 0.10357316583395004, "learning_rate": 8.224103155450414e-05, "loss": 1.6466, "step": 91680 }, { "epoch": 0.18097632536219718, "grad_norm": 0.10353853553533554, "learning_rate": 8.223469133013944e-05, "loss": 1.6504, "step": 91712 }, { "epoch": 0.1810394713235936, "grad_norm": 0.10125371068716049, "learning_rate": 8.222835110577476e-05, "loss": 1.6519, "step": 91744 }, { "epoch": 0.18110261728499005, "grad_norm": 0.1040688008069992, "learning_rate": 8.222201088141007e-05, "loss": 1.6606, "step": 91776 }, { "epoch": 0.1811657632463865, "grad_norm": 0.11518832296133041, "learning_rate": 8.221567065704539e-05, "loss": 1.6491, "step": 91808 }, { "epoch": 0.18122890920778292, "grad_norm": 0.09601999074220657, "learning_rate": 8.220933043268069e-05, "loss": 1.6473, "step": 91840 }, { "epoch": 0.18129205516917937, "grad_norm": 0.10341492295265198, "learning_rate": 8.2202990208316e-05, "loss": 1.6492, "step": 91872 }, { "epoch": 0.18135520113057582, "grad_norm": 0.10092576593160629, "learning_rate": 8.219664998395132e-05, "loss": 1.6522, "step": 91904 }, { "epoch": 0.18141834709197227, "grad_norm": 0.09618775546550751, "learning_rate": 8.219030975958663e-05, "loss": 1.6568, "step": 91936 }, { "epoch": 0.1814814930533687, "grad_norm": 0.10186668485403061, "learning_rate": 8.218396953522193e-05, "loss": 1.6578, "step": 91968 }, { "epoch": 0.18154463901476514, "grad_norm": 0.1145521029829979, "learning_rate": 8.217762931085725e-05, "loss": 1.6568, "step": 92000 }, { "epoch": 0.1816077849761616, "grad_norm": 0.09803149849176407, "learning_rate": 8.217128908649255e-05, "loss": 1.6666, "step": 92032 }, { "epoch": 0.181670930937558, "grad_norm": 0.09554032236337662, "learning_rate": 8.216494886212786e-05, "loss": 1.6577, "step": 92064 }, { "epoch": 0.18173407689895446, "grad_norm": 0.11648745089769363, "learning_rate": 8.215860863776316e-05, "loss": 1.6508, "step": 92096 }, { "epoch": 0.1817972228603509, "grad_norm": 0.10111765563488007, "learning_rate": 8.215226841339848e-05, "loss": 1.653, "step": 92128 }, { "epoch": 0.18186036882174733, "grad_norm": 0.10026899725198746, "learning_rate": 8.214592818903379e-05, "loss": 1.637, "step": 92160 }, { "epoch": 0.18192351478314378, "grad_norm": 0.10087400674819946, "learning_rate": 8.21395879646691e-05, "loss": 1.6597, "step": 92192 }, { "epoch": 0.18198666074454023, "grad_norm": 0.10220714658498764, "learning_rate": 8.213324774030441e-05, "loss": 1.6447, "step": 92224 }, { "epoch": 0.18204980670593665, "grad_norm": 0.10861620306968689, "learning_rate": 8.212690751593972e-05, "loss": 1.6596, "step": 92256 }, { "epoch": 0.1821129526673331, "grad_norm": 0.11353089660406113, "learning_rate": 8.212056729157504e-05, "loss": 1.6526, "step": 92288 }, { "epoch": 0.18217609862872955, "grad_norm": 0.1047775149345398, "learning_rate": 8.211422706721035e-05, "loss": 1.649, "step": 92320 }, { "epoch": 0.18223924459012597, "grad_norm": 0.10837597399950027, "learning_rate": 8.210788684284567e-05, "loss": 1.6404, "step": 92352 }, { "epoch": 0.18230239055152242, "grad_norm": 0.10726919025182724, "learning_rate": 8.210154661848097e-05, "loss": 1.6497, "step": 92384 }, { "epoch": 0.18236553651291887, "grad_norm": 0.09631171822547913, "learning_rate": 8.209520639411628e-05, "loss": 1.6521, "step": 92416 }, { "epoch": 0.1824286824743153, "grad_norm": 0.10086216777563095, "learning_rate": 8.208886616975158e-05, "loss": 1.6444, "step": 92448 }, { "epoch": 0.18249182843571174, "grad_norm": 0.10379354655742645, "learning_rate": 8.20825259453869e-05, "loss": 1.64, "step": 92480 }, { "epoch": 0.1825549743971082, "grad_norm": 0.1042555496096611, "learning_rate": 8.20761857210222e-05, "loss": 1.662, "step": 92512 }, { "epoch": 0.1826181203585046, "grad_norm": 0.09675726294517517, "learning_rate": 8.206984549665751e-05, "loss": 1.6483, "step": 92544 }, { "epoch": 0.18268126631990106, "grad_norm": 0.10146626085042953, "learning_rate": 8.206350527229283e-05, "loss": 1.6493, "step": 92576 }, { "epoch": 0.1827444122812975, "grad_norm": 0.10710596293210983, "learning_rate": 8.205716504792814e-05, "loss": 1.6514, "step": 92608 }, { "epoch": 0.18280755824269393, "grad_norm": 0.10241590440273285, "learning_rate": 8.205082482356344e-05, "loss": 1.6461, "step": 92640 }, { "epoch": 0.18287070420409038, "grad_norm": 0.10124043375253677, "learning_rate": 8.204448459919876e-05, "loss": 1.6521, "step": 92672 }, { "epoch": 0.18293385016548683, "grad_norm": 0.10410934686660767, "learning_rate": 8.203814437483407e-05, "loss": 1.655, "step": 92704 }, { "epoch": 0.18299699612688325, "grad_norm": 0.10063810646533966, "learning_rate": 8.203180415046939e-05, "loss": 1.6452, "step": 92736 }, { "epoch": 0.1830601420882797, "grad_norm": 0.10326386988162994, "learning_rate": 8.202546392610469e-05, "loss": 1.6671, "step": 92768 }, { "epoch": 0.18312328804967615, "grad_norm": 0.10978447645902634, "learning_rate": 8.201912370174e-05, "loss": 1.6435, "step": 92800 }, { "epoch": 0.18318643401107257, "grad_norm": 0.10418623685836792, "learning_rate": 8.201278347737532e-05, "loss": 1.6475, "step": 92832 }, { "epoch": 0.18324957997246902, "grad_norm": 0.10795113444328308, "learning_rate": 8.200644325301062e-05, "loss": 1.6503, "step": 92864 }, { "epoch": 0.18331272593386547, "grad_norm": 0.10588126629590988, "learning_rate": 8.200010302864592e-05, "loss": 1.6523, "step": 92896 }, { "epoch": 0.1833758718952619, "grad_norm": 0.1011577844619751, "learning_rate": 8.199376280428123e-05, "loss": 1.6487, "step": 92928 }, { "epoch": 0.18343901785665834, "grad_norm": 0.1028437688946724, "learning_rate": 8.198742257991655e-05, "loss": 1.6524, "step": 92960 }, { "epoch": 0.1835021638180548, "grad_norm": 0.1025761291384697, "learning_rate": 8.198108235555186e-05, "loss": 1.6411, "step": 92992 }, { "epoch": 0.1835653097794512, "grad_norm": 0.10055080056190491, "learning_rate": 8.197474213118718e-05, "loss": 1.6431, "step": 93024 }, { "epoch": 0.18362845574084766, "grad_norm": 0.1111232340335846, "learning_rate": 8.196840190682248e-05, "loss": 1.6582, "step": 93056 }, { "epoch": 0.1836916017022441, "grad_norm": 0.10614980012178421, "learning_rate": 8.196206168245779e-05, "loss": 1.6341, "step": 93088 }, { "epoch": 0.18375474766364053, "grad_norm": 0.09853687882423401, "learning_rate": 8.195572145809311e-05, "loss": 1.6498, "step": 93120 }, { "epoch": 0.18381789362503698, "grad_norm": 0.10568626970052719, "learning_rate": 8.194938123372842e-05, "loss": 1.6558, "step": 93152 }, { "epoch": 0.18388103958643343, "grad_norm": 0.1024804636836052, "learning_rate": 8.194304100936372e-05, "loss": 1.6557, "step": 93184 }, { "epoch": 0.18394418554782985, "grad_norm": 0.10066686570644379, "learning_rate": 8.193670078499904e-05, "loss": 1.6621, "step": 93216 }, { "epoch": 0.1840073315092263, "grad_norm": 0.10116623342037201, "learning_rate": 8.193036056063435e-05, "loss": 1.6483, "step": 93248 }, { "epoch": 0.18407047747062275, "grad_norm": 0.09657549113035202, "learning_rate": 8.192402033626965e-05, "loss": 1.6482, "step": 93280 }, { "epoch": 0.18413362343201917, "grad_norm": 0.09711366891860962, "learning_rate": 8.191768011190495e-05, "loss": 1.6507, "step": 93312 }, { "epoch": 0.18419676939341562, "grad_norm": 0.10164396464824677, "learning_rate": 8.191133988754027e-05, "loss": 1.6527, "step": 93344 }, { "epoch": 0.18425991535481206, "grad_norm": 0.10205285996198654, "learning_rate": 8.190499966317558e-05, "loss": 1.6543, "step": 93376 }, { "epoch": 0.1843230613162085, "grad_norm": 0.09781279414892197, "learning_rate": 8.18986594388109e-05, "loss": 1.6514, "step": 93408 }, { "epoch": 0.18438620727760494, "grad_norm": 0.10196979343891144, "learning_rate": 8.18923192144462e-05, "loss": 1.6664, "step": 93440 }, { "epoch": 0.18444935323900138, "grad_norm": 0.10308445990085602, "learning_rate": 8.188597899008151e-05, "loss": 1.6408, "step": 93472 }, { "epoch": 0.1845124992003978, "grad_norm": 0.11130218207836151, "learning_rate": 8.187963876571683e-05, "loss": 1.6604, "step": 93504 }, { "epoch": 0.18457564516179426, "grad_norm": 0.10474035143852234, "learning_rate": 8.187329854135214e-05, "loss": 1.6585, "step": 93536 }, { "epoch": 0.1846387911231907, "grad_norm": 0.09855648875236511, "learning_rate": 8.186695831698744e-05, "loss": 1.651, "step": 93568 }, { "epoch": 0.18470193708458715, "grad_norm": 0.1026657447218895, "learning_rate": 8.186061809262276e-05, "loss": 1.6564, "step": 93600 }, { "epoch": 0.18476508304598357, "grad_norm": 0.1057642474770546, "learning_rate": 8.185427786825807e-05, "loss": 1.6538, "step": 93632 }, { "epoch": 0.18482822900738002, "grad_norm": 0.09877024590969086, "learning_rate": 8.184793764389339e-05, "loss": 1.6652, "step": 93664 }, { "epoch": 0.18489137496877647, "grad_norm": 0.10119491070508957, "learning_rate": 8.184159741952869e-05, "loss": 1.656, "step": 93696 }, { "epoch": 0.1849545209301729, "grad_norm": 0.1023765280842781, "learning_rate": 8.183525719516399e-05, "loss": 1.6511, "step": 93728 }, { "epoch": 0.18501766689156934, "grad_norm": 0.10401959717273712, "learning_rate": 8.18289169707993e-05, "loss": 1.6505, "step": 93760 }, { "epoch": 0.1850808128529658, "grad_norm": 0.10199481248855591, "learning_rate": 8.182257674643462e-05, "loss": 1.6528, "step": 93792 }, { "epoch": 0.18514395881436221, "grad_norm": 0.10202162712812424, "learning_rate": 8.181623652206993e-05, "loss": 1.6555, "step": 93824 }, { "epoch": 0.18520710477575866, "grad_norm": 0.10206763446331024, "learning_rate": 8.180989629770523e-05, "loss": 1.6496, "step": 93856 }, { "epoch": 0.1852702507371551, "grad_norm": 0.11224686354398727, "learning_rate": 8.180355607334055e-05, "loss": 1.6544, "step": 93888 }, { "epoch": 0.18533339669855153, "grad_norm": 0.09769473224878311, "learning_rate": 8.179721584897586e-05, "loss": 1.6499, "step": 93920 }, { "epoch": 0.18539654265994798, "grad_norm": 0.10412193089723587, "learning_rate": 8.179087562461118e-05, "loss": 1.6426, "step": 93952 }, { "epoch": 0.18545968862134443, "grad_norm": 0.09697689861059189, "learning_rate": 8.178453540024648e-05, "loss": 1.6478, "step": 93984 }, { "epoch": 0.18552283458274085, "grad_norm": 0.10029570013284683, "learning_rate": 8.177819517588179e-05, "loss": 1.6487, "step": 94016 }, { "epoch": 0.1855859805441373, "grad_norm": 0.10300135612487793, "learning_rate": 8.177185495151711e-05, "loss": 1.6592, "step": 94048 }, { "epoch": 0.18564912650553375, "grad_norm": 0.09986173361539841, "learning_rate": 8.176551472715241e-05, "loss": 1.6507, "step": 94080 }, { "epoch": 0.18571227246693017, "grad_norm": 0.10211902111768723, "learning_rate": 8.175917450278772e-05, "loss": 1.6558, "step": 94112 }, { "epoch": 0.18577541842832662, "grad_norm": 0.1036137118935585, "learning_rate": 8.175283427842302e-05, "loss": 1.6543, "step": 94144 }, { "epoch": 0.18583856438972307, "grad_norm": 0.09529624879360199, "learning_rate": 8.174649405405834e-05, "loss": 1.6417, "step": 94176 }, { "epoch": 0.1859017103511195, "grad_norm": 0.10255629569292068, "learning_rate": 8.174015382969365e-05, "loss": 1.639, "step": 94208 }, { "epoch": 0.18596485631251594, "grad_norm": 0.10615867376327515, "learning_rate": 8.173381360532895e-05, "loss": 1.6408, "step": 94240 }, { "epoch": 0.1860280022739124, "grad_norm": 0.1062152087688446, "learning_rate": 8.172747338096427e-05, "loss": 1.6544, "step": 94272 }, { "epoch": 0.1860911482353088, "grad_norm": 0.0993291586637497, "learning_rate": 8.172113315659958e-05, "loss": 1.6549, "step": 94304 }, { "epoch": 0.18615429419670526, "grad_norm": 0.10131142288446426, "learning_rate": 8.17147929322349e-05, "loss": 1.656, "step": 94336 }, { "epoch": 0.1862174401581017, "grad_norm": 0.11048464477062225, "learning_rate": 8.170845270787021e-05, "loss": 1.6445, "step": 94368 }, { "epoch": 0.18628058611949813, "grad_norm": 0.10709990561008453, "learning_rate": 8.170211248350551e-05, "loss": 1.6411, "step": 94400 }, { "epoch": 0.18634373208089458, "grad_norm": 0.10426360368728638, "learning_rate": 8.169577225914083e-05, "loss": 1.6504, "step": 94432 }, { "epoch": 0.18640687804229103, "grad_norm": 0.10206352174282074, "learning_rate": 8.168943203477614e-05, "loss": 1.6369, "step": 94464 }, { "epoch": 0.18647002400368745, "grad_norm": 0.11030671745538712, "learning_rate": 8.168309181041144e-05, "loss": 1.6452, "step": 94496 }, { "epoch": 0.1865331699650839, "grad_norm": 0.100029356777668, "learning_rate": 8.167675158604676e-05, "loss": 1.6405, "step": 94528 }, { "epoch": 0.18659631592648035, "grad_norm": 0.10335873812437057, "learning_rate": 8.167041136168206e-05, "loss": 1.657, "step": 94560 }, { "epoch": 0.18665946188787677, "grad_norm": 0.10152248293161392, "learning_rate": 8.166407113731737e-05, "loss": 1.6485, "step": 94592 }, { "epoch": 0.18672260784927322, "grad_norm": 0.09640410542488098, "learning_rate": 8.165773091295269e-05, "loss": 1.6314, "step": 94624 }, { "epoch": 0.18678575381066967, "grad_norm": 0.10848820954561234, "learning_rate": 8.165139068858799e-05, "loss": 1.6466, "step": 94656 }, { "epoch": 0.1868488997720661, "grad_norm": 0.10458540171384811, "learning_rate": 8.16450504642233e-05, "loss": 1.6482, "step": 94688 }, { "epoch": 0.18691204573346254, "grad_norm": 0.10645361989736557, "learning_rate": 8.163871023985862e-05, "loss": 1.6547, "step": 94720 }, { "epoch": 0.186975191694859, "grad_norm": 0.10136350989341736, "learning_rate": 8.163237001549393e-05, "loss": 1.6428, "step": 94752 }, { "epoch": 0.1870383376562554, "grad_norm": 0.10477007925510406, "learning_rate": 8.162602979112923e-05, "loss": 1.6424, "step": 94784 }, { "epoch": 0.18710148361765186, "grad_norm": 0.11905611306428909, "learning_rate": 8.161968956676455e-05, "loss": 1.6503, "step": 94816 }, { "epoch": 0.1871646295790483, "grad_norm": 0.09771967679262161, "learning_rate": 8.161334934239986e-05, "loss": 1.6336, "step": 94848 }, { "epoch": 0.18722777554044473, "grad_norm": 0.10688287019729614, "learning_rate": 8.160700911803518e-05, "loss": 1.6517, "step": 94880 }, { "epoch": 0.18729092150184118, "grad_norm": 0.09580983966588974, "learning_rate": 8.160066889367048e-05, "loss": 1.6467, "step": 94912 }, { "epoch": 0.18735406746323763, "grad_norm": 0.09686318039894104, "learning_rate": 8.159432866930579e-05, "loss": 1.6563, "step": 94944 }, { "epoch": 0.18741721342463405, "grad_norm": 0.09595602750778198, "learning_rate": 8.15879884449411e-05, "loss": 1.6466, "step": 94976 }, { "epoch": 0.1874803593860305, "grad_norm": 0.1114514172077179, "learning_rate": 8.158164822057641e-05, "loss": 1.638, "step": 95008 }, { "epoch": 0.18754350534742695, "grad_norm": 0.10126730054616928, "learning_rate": 8.157530799621172e-05, "loss": 1.6645, "step": 95040 }, { "epoch": 0.18760665130882337, "grad_norm": 0.10387317091226578, "learning_rate": 8.156896777184702e-05, "loss": 1.636, "step": 95072 }, { "epoch": 0.18766979727021982, "grad_norm": 0.10348210483789444, "learning_rate": 8.156262754748234e-05, "loss": 1.651, "step": 95104 }, { "epoch": 0.18773294323161627, "grad_norm": 0.09826581925153732, "learning_rate": 8.155628732311765e-05, "loss": 1.6471, "step": 95136 }, { "epoch": 0.1877960891930127, "grad_norm": 0.10650411248207092, "learning_rate": 8.154994709875297e-05, "loss": 1.6481, "step": 95168 }, { "epoch": 0.18785923515440914, "grad_norm": 0.09995251148939133, "learning_rate": 8.154360687438827e-05, "loss": 1.6544, "step": 95200 }, { "epoch": 0.1879223811158056, "grad_norm": 0.10455673933029175, "learning_rate": 8.153726665002358e-05, "loss": 1.6416, "step": 95232 }, { "epoch": 0.18798552707720204, "grad_norm": 0.10279633849859238, "learning_rate": 8.15309264256589e-05, "loss": 1.6414, "step": 95264 }, { "epoch": 0.18804867303859846, "grad_norm": 0.10274940729141235, "learning_rate": 8.152458620129421e-05, "loss": 1.6594, "step": 95296 }, { "epoch": 0.1881118189999949, "grad_norm": 0.10336265712976456, "learning_rate": 8.151824597692951e-05, "loss": 1.6567, "step": 95328 }, { "epoch": 0.18817496496139136, "grad_norm": 0.10061810910701752, "learning_rate": 8.151190575256481e-05, "loss": 1.6391, "step": 95360 }, { "epoch": 0.18823811092278778, "grad_norm": 0.10806973278522491, "learning_rate": 8.150556552820013e-05, "loss": 1.6399, "step": 95392 }, { "epoch": 0.18830125688418423, "grad_norm": 0.10195987671613693, "learning_rate": 8.149922530383544e-05, "loss": 1.6512, "step": 95424 }, { "epoch": 0.18836440284558068, "grad_norm": 0.10850729048252106, "learning_rate": 8.149288507947074e-05, "loss": 1.6606, "step": 95456 }, { "epoch": 0.1884275488069771, "grad_norm": 0.10320477932691574, "learning_rate": 8.148654485510606e-05, "loss": 1.6406, "step": 95488 }, { "epoch": 0.18849069476837355, "grad_norm": 0.10265101492404938, "learning_rate": 8.148020463074137e-05, "loss": 1.6553, "step": 95520 }, { "epoch": 0.18855384072977, "grad_norm": 0.1066662147641182, "learning_rate": 8.147386440637669e-05, "loss": 1.654, "step": 95552 }, { "epoch": 0.18861698669116642, "grad_norm": 0.09863730520009995, "learning_rate": 8.146752418201199e-05, "loss": 1.6564, "step": 95584 }, { "epoch": 0.18868013265256287, "grad_norm": 0.10677376389503479, "learning_rate": 8.14611839576473e-05, "loss": 1.6508, "step": 95616 }, { "epoch": 0.18874327861395931, "grad_norm": 0.1141144335269928, "learning_rate": 8.145484373328262e-05, "loss": 1.6624, "step": 95648 }, { "epoch": 0.18880642457535574, "grad_norm": 0.10073503851890564, "learning_rate": 8.144850350891793e-05, "loss": 1.6435, "step": 95680 }, { "epoch": 0.18886957053675218, "grad_norm": 0.10807400941848755, "learning_rate": 8.144216328455325e-05, "loss": 1.6561, "step": 95712 }, { "epoch": 0.18893271649814863, "grad_norm": 0.10667506605386734, "learning_rate": 8.143582306018855e-05, "loss": 1.6476, "step": 95744 }, { "epoch": 0.18899586245954506, "grad_norm": 0.10062408447265625, "learning_rate": 8.142948283582385e-05, "loss": 1.6498, "step": 95776 }, { "epoch": 0.1890590084209415, "grad_norm": 0.09419935196638107, "learning_rate": 8.142314261145916e-05, "loss": 1.6497, "step": 95808 }, { "epoch": 0.18912215438233795, "grad_norm": 0.10771109163761139, "learning_rate": 8.141680238709448e-05, "loss": 1.6449, "step": 95840 }, { "epoch": 0.18918530034373437, "grad_norm": 0.1017397940158844, "learning_rate": 8.141046216272978e-05, "loss": 1.6556, "step": 95872 }, { "epoch": 0.18924844630513082, "grad_norm": 0.10358379781246185, "learning_rate": 8.14041219383651e-05, "loss": 1.6353, "step": 95904 }, { "epoch": 0.18931159226652727, "grad_norm": 0.10079692304134369, "learning_rate": 8.139778171400041e-05, "loss": 1.6432, "step": 95936 }, { "epoch": 0.1893747382279237, "grad_norm": 0.10643357038497925, "learning_rate": 8.139144148963572e-05, "loss": 1.6492, "step": 95968 }, { "epoch": 0.18943788418932014, "grad_norm": 0.10666496306657791, "learning_rate": 8.138510126527102e-05, "loss": 1.6451, "step": 96000 }, { "epoch": 0.1895010301507166, "grad_norm": 0.09939305484294891, "learning_rate": 8.137876104090634e-05, "loss": 1.6441, "step": 96032 }, { "epoch": 0.18956417611211301, "grad_norm": 0.10081891715526581, "learning_rate": 8.137242081654165e-05, "loss": 1.6482, "step": 96064 }, { "epoch": 0.18962732207350946, "grad_norm": 0.10111203044652939, "learning_rate": 8.136608059217697e-05, "loss": 1.6519, "step": 96096 }, { "epoch": 0.1896904680349059, "grad_norm": 0.10509985685348511, "learning_rate": 8.135974036781227e-05, "loss": 1.6562, "step": 96128 }, { "epoch": 0.18975361399630233, "grad_norm": 0.10449133068323135, "learning_rate": 8.135340014344758e-05, "loss": 1.6607, "step": 96160 }, { "epoch": 0.18981675995769878, "grad_norm": 0.10193048417568207, "learning_rate": 8.134705991908288e-05, "loss": 1.6527, "step": 96192 }, { "epoch": 0.18987990591909523, "grad_norm": 0.10266821831464767, "learning_rate": 8.13407196947182e-05, "loss": 1.6349, "step": 96224 }, { "epoch": 0.18994305188049165, "grad_norm": 0.10425172746181488, "learning_rate": 8.13343794703535e-05, "loss": 1.6547, "step": 96256 }, { "epoch": 0.1900061978418881, "grad_norm": 0.097136490046978, "learning_rate": 8.132803924598881e-05, "loss": 1.6369, "step": 96288 }, { "epoch": 0.19006934380328455, "grad_norm": 0.10471000522375107, "learning_rate": 8.132169902162413e-05, "loss": 1.6486, "step": 96320 }, { "epoch": 0.19013248976468097, "grad_norm": 0.10519730299711227, "learning_rate": 8.131535879725944e-05, "loss": 1.6568, "step": 96352 }, { "epoch": 0.19019563572607742, "grad_norm": 0.1161370649933815, "learning_rate": 8.130901857289476e-05, "loss": 1.6442, "step": 96384 }, { "epoch": 0.19025878168747387, "grad_norm": 0.10171066969633102, "learning_rate": 8.130267834853006e-05, "loss": 1.6534, "step": 96416 }, { "epoch": 0.1903219276488703, "grad_norm": 0.10538149625062943, "learning_rate": 8.129633812416537e-05, "loss": 1.645, "step": 96448 }, { "epoch": 0.19038507361026674, "grad_norm": 0.10869000107049942, "learning_rate": 8.128999789980069e-05, "loss": 1.6618, "step": 96480 }, { "epoch": 0.1904482195716632, "grad_norm": 0.10533742606639862, "learning_rate": 8.1283657675436e-05, "loss": 1.6469, "step": 96512 }, { "epoch": 0.1905113655330596, "grad_norm": 0.10774150490760803, "learning_rate": 8.12773174510713e-05, "loss": 1.6399, "step": 96544 }, { "epoch": 0.19057451149445606, "grad_norm": 0.09870371967554092, "learning_rate": 8.127097722670662e-05, "loss": 1.6704, "step": 96576 }, { "epoch": 0.1906376574558525, "grad_norm": 0.11601744592189789, "learning_rate": 8.126463700234192e-05, "loss": 1.6337, "step": 96608 }, { "epoch": 0.19070080341724893, "grad_norm": 0.09750094264745712, "learning_rate": 8.125829677797723e-05, "loss": 1.6564, "step": 96640 }, { "epoch": 0.19076394937864538, "grad_norm": 0.10632169246673584, "learning_rate": 8.125195655361254e-05, "loss": 1.6354, "step": 96672 }, { "epoch": 0.19082709534004183, "grad_norm": 0.10485920310020447, "learning_rate": 8.124561632924785e-05, "loss": 1.6384, "step": 96704 }, { "epoch": 0.19089024130143825, "grad_norm": 0.10848258435726166, "learning_rate": 8.123927610488316e-05, "loss": 1.6351, "step": 96736 }, { "epoch": 0.1909533872628347, "grad_norm": 0.09728173911571503, "learning_rate": 8.123293588051848e-05, "loss": 1.6388, "step": 96768 }, { "epoch": 0.19101653322423115, "grad_norm": 0.10131725668907166, "learning_rate": 8.122659565615378e-05, "loss": 1.6474, "step": 96800 }, { "epoch": 0.19107967918562757, "grad_norm": 0.10624504834413528, "learning_rate": 8.12202554317891e-05, "loss": 1.6605, "step": 96832 }, { "epoch": 0.19114282514702402, "grad_norm": 0.10106166452169418, "learning_rate": 8.121391520742441e-05, "loss": 1.6469, "step": 96864 }, { "epoch": 0.19120597110842047, "grad_norm": 0.10225090384483337, "learning_rate": 8.120757498305972e-05, "loss": 1.6605, "step": 96896 }, { "epoch": 0.19126911706981692, "grad_norm": 0.11132453382015228, "learning_rate": 8.120123475869502e-05, "loss": 1.6549, "step": 96928 }, { "epoch": 0.19133226303121334, "grad_norm": 0.10704923421144485, "learning_rate": 8.119489453433034e-05, "loss": 1.6479, "step": 96960 }, { "epoch": 0.1913954089926098, "grad_norm": 0.09917685389518738, "learning_rate": 8.118855430996565e-05, "loss": 1.6501, "step": 96992 }, { "epoch": 0.19145855495400624, "grad_norm": 0.1037558913230896, "learning_rate": 8.118221408560095e-05, "loss": 1.6623, "step": 97024 }, { "epoch": 0.19152170091540266, "grad_norm": 0.10285740345716476, "learning_rate": 8.117587386123627e-05, "loss": 1.652, "step": 97056 }, { "epoch": 0.1915848468767991, "grad_norm": 0.11165329068899155, "learning_rate": 8.116953363687157e-05, "loss": 1.6458, "step": 97088 }, { "epoch": 0.19164799283819556, "grad_norm": 0.09738541394472122, "learning_rate": 8.116319341250689e-05, "loss": 1.6244, "step": 97120 }, { "epoch": 0.19171113879959198, "grad_norm": 0.1026107519865036, "learning_rate": 8.11568531881422e-05, "loss": 1.6272, "step": 97152 }, { "epoch": 0.19177428476098843, "grad_norm": 0.09903274476528168, "learning_rate": 8.115051296377751e-05, "loss": 1.6434, "step": 97184 }, { "epoch": 0.19183743072238488, "grad_norm": 0.10325904935598373, "learning_rate": 8.114417273941282e-05, "loss": 1.6486, "step": 97216 }, { "epoch": 0.1919005766837813, "grad_norm": 0.11294377595186234, "learning_rate": 8.113783251504813e-05, "loss": 1.6392, "step": 97248 }, { "epoch": 0.19196372264517775, "grad_norm": 0.09898438304662704, "learning_rate": 8.113149229068344e-05, "loss": 1.6489, "step": 97280 }, { "epoch": 0.1920268686065742, "grad_norm": 0.1110379546880722, "learning_rate": 8.112515206631876e-05, "loss": 1.6338, "step": 97312 }, { "epoch": 0.19209001456797062, "grad_norm": 0.10507860779762268, "learning_rate": 8.111881184195406e-05, "loss": 1.6593, "step": 97344 }, { "epoch": 0.19215316052936707, "grad_norm": 0.1059076115489006, "learning_rate": 8.111247161758937e-05, "loss": 1.6513, "step": 97376 }, { "epoch": 0.19221630649076352, "grad_norm": 0.10829482972621918, "learning_rate": 8.110613139322469e-05, "loss": 1.6439, "step": 97408 }, { "epoch": 0.19227945245215994, "grad_norm": 0.10728301107883453, "learning_rate": 8.109979116885999e-05, "loss": 1.6492, "step": 97440 }, { "epoch": 0.1923425984135564, "grad_norm": 0.1041484922170639, "learning_rate": 8.109345094449529e-05, "loss": 1.6485, "step": 97472 }, { "epoch": 0.19240574437495284, "grad_norm": 0.11218147724866867, "learning_rate": 8.10871107201306e-05, "loss": 1.6417, "step": 97504 }, { "epoch": 0.19246889033634926, "grad_norm": 0.09830255061388016, "learning_rate": 8.108077049576592e-05, "loss": 1.6525, "step": 97536 }, { "epoch": 0.1925320362977457, "grad_norm": 0.10048898309469223, "learning_rate": 8.107443027140123e-05, "loss": 1.6614, "step": 97568 }, { "epoch": 0.19259518225914216, "grad_norm": 0.11010058224201202, "learning_rate": 8.106809004703654e-05, "loss": 1.6481, "step": 97600 }, { "epoch": 0.19265832822053858, "grad_norm": 0.09566009044647217, "learning_rate": 8.106174982267185e-05, "loss": 1.6518, "step": 97632 }, { "epoch": 0.19272147418193503, "grad_norm": 0.11182036995887756, "learning_rate": 8.105540959830716e-05, "loss": 1.6405, "step": 97664 }, { "epoch": 0.19278462014333148, "grad_norm": 0.10775816440582275, "learning_rate": 8.104906937394248e-05, "loss": 1.6487, "step": 97696 }, { "epoch": 0.1928477661047279, "grad_norm": 0.1047726571559906, "learning_rate": 8.10427291495778e-05, "loss": 1.6442, "step": 97728 }, { "epoch": 0.19291091206612435, "grad_norm": 0.09861469268798828, "learning_rate": 8.10363889252131e-05, "loss": 1.6592, "step": 97760 }, { "epoch": 0.1929740580275208, "grad_norm": 0.10144228488206863, "learning_rate": 8.103004870084841e-05, "loss": 1.6347, "step": 97792 }, { "epoch": 0.19303720398891722, "grad_norm": 0.10166819393634796, "learning_rate": 8.102370847648372e-05, "loss": 1.6351, "step": 97824 }, { "epoch": 0.19310034995031367, "grad_norm": 0.09624498337507248, "learning_rate": 8.101736825211903e-05, "loss": 1.6453, "step": 97856 }, { "epoch": 0.19316349591171011, "grad_norm": 0.10432087630033493, "learning_rate": 8.101102802775433e-05, "loss": 1.6373, "step": 97888 }, { "epoch": 0.19322664187310654, "grad_norm": 0.09928113222122192, "learning_rate": 8.100468780338964e-05, "loss": 1.6521, "step": 97920 }, { "epoch": 0.19328978783450299, "grad_norm": 0.10615003108978271, "learning_rate": 8.099834757902496e-05, "loss": 1.6345, "step": 97952 }, { "epoch": 0.19335293379589943, "grad_norm": 0.11123167723417282, "learning_rate": 8.099200735466027e-05, "loss": 1.6516, "step": 97984 }, { "epoch": 0.19341607975729586, "grad_norm": 0.10558531433343887, "learning_rate": 8.098566713029557e-05, "loss": 1.6458, "step": 98016 }, { "epoch": 0.1934792257186923, "grad_norm": 0.1033475399017334, "learning_rate": 8.097932690593089e-05, "loss": 1.648, "step": 98048 }, { "epoch": 0.19354237168008875, "grad_norm": 0.10164621472358704, "learning_rate": 8.09729866815662e-05, "loss": 1.634, "step": 98080 }, { "epoch": 0.19360551764148518, "grad_norm": 0.11083465069532394, "learning_rate": 8.096664645720151e-05, "loss": 1.6413, "step": 98112 }, { "epoch": 0.19366866360288162, "grad_norm": 0.10140354931354523, "learning_rate": 8.096030623283682e-05, "loss": 1.653, "step": 98144 }, { "epoch": 0.19373180956427807, "grad_norm": 0.10933768004179001, "learning_rate": 8.095396600847213e-05, "loss": 1.6367, "step": 98176 }, { "epoch": 0.1937949555256745, "grad_norm": 0.10203826427459717, "learning_rate": 8.094762578410744e-05, "loss": 1.6484, "step": 98208 }, { "epoch": 0.19385810148707094, "grad_norm": 0.11817627400159836, "learning_rate": 8.094128555974275e-05, "loss": 1.6633, "step": 98240 }, { "epoch": 0.1939212474484674, "grad_norm": 0.10960616916418076, "learning_rate": 8.093494533537806e-05, "loss": 1.642, "step": 98272 }, { "epoch": 0.19398439340986381, "grad_norm": 0.10867169499397278, "learning_rate": 8.092860511101336e-05, "loss": 1.6415, "step": 98304 }, { "epoch": 0.19404753937126026, "grad_norm": 0.10847428441047668, "learning_rate": 8.092226488664868e-05, "loss": 1.6447, "step": 98336 }, { "epoch": 0.1941106853326567, "grad_norm": 0.11210865527391434, "learning_rate": 8.091592466228399e-05, "loss": 1.6458, "step": 98368 }, { "epoch": 0.19417383129405313, "grad_norm": 0.0990021750330925, "learning_rate": 8.09095844379193e-05, "loss": 1.6528, "step": 98400 }, { "epoch": 0.19423697725544958, "grad_norm": 0.10736454278230667, "learning_rate": 8.09032442135546e-05, "loss": 1.6336, "step": 98432 }, { "epoch": 0.19430012321684603, "grad_norm": 0.11273426562547684, "learning_rate": 8.089690398918992e-05, "loss": 1.6453, "step": 98464 }, { "epoch": 0.19436326917824245, "grad_norm": 0.09996658563613892, "learning_rate": 8.089056376482524e-05, "loss": 1.6415, "step": 98496 }, { "epoch": 0.1944264151396389, "grad_norm": 0.10359546542167664, "learning_rate": 8.088422354046055e-05, "loss": 1.6481, "step": 98528 }, { "epoch": 0.19448956110103535, "grad_norm": 0.10674504190683365, "learning_rate": 8.087788331609585e-05, "loss": 1.6489, "step": 98560 }, { "epoch": 0.19455270706243177, "grad_norm": 0.11016551405191422, "learning_rate": 8.087154309173117e-05, "loss": 1.6447, "step": 98592 }, { "epoch": 0.19461585302382822, "grad_norm": 0.10759218037128448, "learning_rate": 8.086520286736648e-05, "loss": 1.6423, "step": 98624 }, { "epoch": 0.19467899898522467, "grad_norm": 0.10278622806072235, "learning_rate": 8.085886264300178e-05, "loss": 1.637, "step": 98656 }, { "epoch": 0.19474214494662112, "grad_norm": 0.10291390120983124, "learning_rate": 8.08525224186371e-05, "loss": 1.6439, "step": 98688 }, { "epoch": 0.19480529090801754, "grad_norm": 0.10670579224824905, "learning_rate": 8.08461821942724e-05, "loss": 1.6431, "step": 98720 }, { "epoch": 0.194868436869414, "grad_norm": 0.10281068831682205, "learning_rate": 8.083984196990771e-05, "loss": 1.652, "step": 98752 }, { "epoch": 0.19493158283081044, "grad_norm": 0.10091810673475266, "learning_rate": 8.083350174554303e-05, "loss": 1.6324, "step": 98784 }, { "epoch": 0.19499472879220686, "grad_norm": 0.10356218367815018, "learning_rate": 8.082716152117833e-05, "loss": 1.6405, "step": 98816 }, { "epoch": 0.1950578747536033, "grad_norm": 0.11054695397615433, "learning_rate": 8.082082129681364e-05, "loss": 1.6401, "step": 98848 }, { "epoch": 0.19512102071499976, "grad_norm": 0.10458224266767502, "learning_rate": 8.081448107244896e-05, "loss": 1.6454, "step": 98880 }, { "epoch": 0.19518416667639618, "grad_norm": 0.09834691882133484, "learning_rate": 8.080814084808427e-05, "loss": 1.637, "step": 98912 }, { "epoch": 0.19524731263779263, "grad_norm": 0.10757139325141907, "learning_rate": 8.080180062371958e-05, "loss": 1.6443, "step": 98944 }, { "epoch": 0.19531045859918908, "grad_norm": 0.10837163031101227, "learning_rate": 8.079546039935489e-05, "loss": 1.6451, "step": 98976 }, { "epoch": 0.1953736045605855, "grad_norm": 0.10756513476371765, "learning_rate": 8.07891201749902e-05, "loss": 1.6424, "step": 99008 }, { "epoch": 0.19543675052198195, "grad_norm": 0.11152730137109756, "learning_rate": 8.078277995062551e-05, "loss": 1.6454, "step": 99040 }, { "epoch": 0.1954998964833784, "grad_norm": 0.11061135679483414, "learning_rate": 8.077643972626082e-05, "loss": 1.633, "step": 99072 }, { "epoch": 0.19556304244477482, "grad_norm": 0.11073538661003113, "learning_rate": 8.077009950189613e-05, "loss": 1.6572, "step": 99104 }, { "epoch": 0.19562618840617127, "grad_norm": 0.10706045478582382, "learning_rate": 8.076375927753143e-05, "loss": 1.6454, "step": 99136 }, { "epoch": 0.19568933436756772, "grad_norm": 0.10636785626411438, "learning_rate": 8.075741905316675e-05, "loss": 1.6424, "step": 99168 }, { "epoch": 0.19575248032896414, "grad_norm": 0.10608632117509842, "learning_rate": 8.075107882880206e-05, "loss": 1.6418, "step": 99200 }, { "epoch": 0.1958156262903606, "grad_norm": 0.10915641486644745, "learning_rate": 8.074473860443736e-05, "loss": 1.6452, "step": 99232 }, { "epoch": 0.19587877225175704, "grad_norm": 0.11602920293807983, "learning_rate": 8.073839838007268e-05, "loss": 1.6507, "step": 99264 }, { "epoch": 0.19594191821315346, "grad_norm": 0.10733691602945328, "learning_rate": 8.073205815570799e-05, "loss": 1.6323, "step": 99296 }, { "epoch": 0.1960050641745499, "grad_norm": 0.10154245048761368, "learning_rate": 8.07257179313433e-05, "loss": 1.628, "step": 99328 }, { "epoch": 0.19606821013594636, "grad_norm": 0.1138070598244667, "learning_rate": 8.07193777069786e-05, "loss": 1.6535, "step": 99360 }, { "epoch": 0.19613135609734278, "grad_norm": 0.10772701352834702, "learning_rate": 8.071303748261392e-05, "loss": 1.6271, "step": 99392 }, { "epoch": 0.19619450205873923, "grad_norm": 0.1014992892742157, "learning_rate": 8.070669725824924e-05, "loss": 1.6415, "step": 99424 }, { "epoch": 0.19625764802013568, "grad_norm": 0.10349030047655106, "learning_rate": 8.070035703388455e-05, "loss": 1.6422, "step": 99456 }, { "epoch": 0.1963207939815321, "grad_norm": 0.10261155664920807, "learning_rate": 8.069401680951985e-05, "loss": 1.6474, "step": 99488 }, { "epoch": 0.19638393994292855, "grad_norm": 0.1023111417889595, "learning_rate": 8.068767658515515e-05, "loss": 1.644, "step": 99520 }, { "epoch": 0.196447085904325, "grad_norm": 0.10375040024518967, "learning_rate": 8.068133636079047e-05, "loss": 1.6466, "step": 99552 }, { "epoch": 0.19651023186572142, "grad_norm": 0.10253901034593582, "learning_rate": 8.067499613642578e-05, "loss": 1.6493, "step": 99584 }, { "epoch": 0.19657337782711787, "grad_norm": 0.09769316762685776, "learning_rate": 8.06686559120611e-05, "loss": 1.6492, "step": 99616 }, { "epoch": 0.19663652378851432, "grad_norm": 0.09973352402448654, "learning_rate": 8.06623156876964e-05, "loss": 1.6459, "step": 99648 }, { "epoch": 0.19669966974991074, "grad_norm": 0.09752755612134933, "learning_rate": 8.065597546333171e-05, "loss": 1.6495, "step": 99680 }, { "epoch": 0.1967628157113072, "grad_norm": 0.10267551243305206, "learning_rate": 8.064963523896703e-05, "loss": 1.6457, "step": 99712 }, { "epoch": 0.19682596167270364, "grad_norm": 0.10137288272380829, "learning_rate": 8.064329501460234e-05, "loss": 1.6338, "step": 99744 }, { "epoch": 0.19688910763410006, "grad_norm": 0.10639408975839615, "learning_rate": 8.063695479023764e-05, "loss": 1.6423, "step": 99776 }, { "epoch": 0.1969522535954965, "grad_norm": 0.10027466714382172, "learning_rate": 8.063061456587296e-05, "loss": 1.6394, "step": 99808 }, { "epoch": 0.19701539955689296, "grad_norm": 0.10378637909889221, "learning_rate": 8.062427434150827e-05, "loss": 1.6373, "step": 99840 }, { "epoch": 0.19707854551828938, "grad_norm": 0.10521262139081955, "learning_rate": 8.061793411714359e-05, "loss": 1.6414, "step": 99872 }, { "epoch": 0.19714169147968583, "grad_norm": 0.108073890209198, "learning_rate": 8.061159389277889e-05, "loss": 1.6466, "step": 99904 }, { "epoch": 0.19720483744108228, "grad_norm": 0.10325919836759567, "learning_rate": 8.060525366841419e-05, "loss": 1.6486, "step": 99936 }, { "epoch": 0.1972679834024787, "grad_norm": 0.10606803745031357, "learning_rate": 8.05989134440495e-05, "loss": 1.6402, "step": 99968 }, { "epoch": 0.19733112936387515, "grad_norm": 0.10333341360092163, "learning_rate": 8.059257321968482e-05, "loss": 1.649, "step": 100000 }, { "epoch": 0.1973942753252716, "grad_norm": 0.09961511939764023, "learning_rate": 8.058623299532012e-05, "loss": 1.6369, "step": 100032 }, { "epoch": 0.19745742128666802, "grad_norm": 0.10387607663869858, "learning_rate": 8.057989277095543e-05, "loss": 1.6453, "step": 100064 }, { "epoch": 0.19752056724806447, "grad_norm": 0.10061222314834595, "learning_rate": 8.057355254659075e-05, "loss": 1.638, "step": 100096 }, { "epoch": 0.19758371320946091, "grad_norm": 0.1025899276137352, "learning_rate": 8.056721232222606e-05, "loss": 1.6349, "step": 100128 }, { "epoch": 0.19764685917085734, "grad_norm": 0.10175150632858276, "learning_rate": 8.056087209786136e-05, "loss": 1.6466, "step": 100160 }, { "epoch": 0.19771000513225379, "grad_norm": 0.10213925689458847, "learning_rate": 8.055453187349668e-05, "loss": 1.6409, "step": 100192 }, { "epoch": 0.19777315109365023, "grad_norm": 0.10493280738592148, "learning_rate": 8.054819164913199e-05, "loss": 1.6476, "step": 100224 }, { "epoch": 0.19783629705504666, "grad_norm": 0.10523036867380142, "learning_rate": 8.05418514247673e-05, "loss": 1.65, "step": 100256 }, { "epoch": 0.1978994430164431, "grad_norm": 0.10567599534988403, "learning_rate": 8.053551120040262e-05, "loss": 1.631, "step": 100288 }, { "epoch": 0.19796258897783955, "grad_norm": 0.10249998420476913, "learning_rate": 8.052917097603792e-05, "loss": 1.6429, "step": 100320 }, { "epoch": 0.198025734939236, "grad_norm": 0.10519619286060333, "learning_rate": 8.052283075167322e-05, "loss": 1.6386, "step": 100352 }, { "epoch": 0.19808888090063242, "grad_norm": 0.1096009612083435, "learning_rate": 8.051649052730854e-05, "loss": 1.6612, "step": 100384 }, { "epoch": 0.19815202686202887, "grad_norm": 0.10052002221345901, "learning_rate": 8.051015030294385e-05, "loss": 1.6266, "step": 100416 }, { "epoch": 0.19821517282342532, "grad_norm": 0.10516545921564102, "learning_rate": 8.050381007857915e-05, "loss": 1.6359, "step": 100448 }, { "epoch": 0.19827831878482174, "grad_norm": 0.11198201030492783, "learning_rate": 8.049746985421447e-05, "loss": 1.6374, "step": 100480 }, { "epoch": 0.1983414647462182, "grad_norm": 0.10974428802728653, "learning_rate": 8.049112962984978e-05, "loss": 1.6302, "step": 100512 }, { "epoch": 0.19840461070761464, "grad_norm": 0.106208935379982, "learning_rate": 8.04847894054851e-05, "loss": 1.6484, "step": 100544 }, { "epoch": 0.19846775666901106, "grad_norm": 0.10340795665979385, "learning_rate": 8.04784491811204e-05, "loss": 1.6364, "step": 100576 }, { "epoch": 0.1985309026304075, "grad_norm": 0.10531339049339294, "learning_rate": 8.047210895675571e-05, "loss": 1.6321, "step": 100608 }, { "epoch": 0.19859404859180396, "grad_norm": 0.10440301895141602, "learning_rate": 8.046576873239103e-05, "loss": 1.6376, "step": 100640 }, { "epoch": 0.19865719455320038, "grad_norm": 0.10433050245046616, "learning_rate": 8.045942850802634e-05, "loss": 1.6453, "step": 100672 }, { "epoch": 0.19872034051459683, "grad_norm": 0.11418783664703369, "learning_rate": 8.045308828366164e-05, "loss": 1.6548, "step": 100704 }, { "epoch": 0.19878348647599328, "grad_norm": 0.10041820257902145, "learning_rate": 8.044674805929696e-05, "loss": 1.6455, "step": 100736 }, { "epoch": 0.1988466324373897, "grad_norm": 0.10225223749876022, "learning_rate": 8.044040783493226e-05, "loss": 1.6588, "step": 100768 }, { "epoch": 0.19890977839878615, "grad_norm": 0.10490626841783524, "learning_rate": 8.043406761056757e-05, "loss": 1.6358, "step": 100800 }, { "epoch": 0.1989729243601826, "grad_norm": 0.09783979505300522, "learning_rate": 8.042772738620287e-05, "loss": 1.6426, "step": 100832 }, { "epoch": 0.19903607032157902, "grad_norm": 0.1107398271560669, "learning_rate": 8.042138716183819e-05, "loss": 1.6348, "step": 100864 }, { "epoch": 0.19909921628297547, "grad_norm": 0.10240809619426727, "learning_rate": 8.04150469374735e-05, "loss": 1.637, "step": 100896 }, { "epoch": 0.19916236224437192, "grad_norm": 0.10322824865579605, "learning_rate": 8.040870671310882e-05, "loss": 1.636, "step": 100928 }, { "epoch": 0.19922550820576834, "grad_norm": 0.10999355465173721, "learning_rate": 8.040236648874413e-05, "loss": 1.6548, "step": 100960 }, { "epoch": 0.1992886541671648, "grad_norm": 0.11258241534233093, "learning_rate": 8.039602626437943e-05, "loss": 1.6374, "step": 100992 }, { "epoch": 0.19935180012856124, "grad_norm": 0.10544874519109726, "learning_rate": 8.038968604001475e-05, "loss": 1.6326, "step": 101024 }, { "epoch": 0.19941494608995766, "grad_norm": 0.10838599503040314, "learning_rate": 8.038334581565006e-05, "loss": 1.6444, "step": 101056 }, { "epoch": 0.1994780920513541, "grad_norm": 0.10652278363704681, "learning_rate": 8.037700559128538e-05, "loss": 1.634, "step": 101088 }, { "epoch": 0.19954123801275056, "grad_norm": 0.09899964183568954, "learning_rate": 8.037066536692068e-05, "loss": 1.6305, "step": 101120 }, { "epoch": 0.19960438397414698, "grad_norm": 0.09979450702667236, "learning_rate": 8.036432514255599e-05, "loss": 1.6409, "step": 101152 }, { "epoch": 0.19966752993554343, "grad_norm": 0.1164548322558403, "learning_rate": 8.035798491819129e-05, "loss": 1.6286, "step": 101184 }, { "epoch": 0.19973067589693988, "grad_norm": 0.10702712833881378, "learning_rate": 8.035164469382661e-05, "loss": 1.6503, "step": 101216 }, { "epoch": 0.1997938218583363, "grad_norm": 0.11386249959468842, "learning_rate": 8.034530446946191e-05, "loss": 1.6249, "step": 101248 }, { "epoch": 0.19985696781973275, "grad_norm": 0.11760639399290085, "learning_rate": 8.033896424509722e-05, "loss": 1.6389, "step": 101280 }, { "epoch": 0.1999201137811292, "grad_norm": 0.11131103336811066, "learning_rate": 8.033262402073254e-05, "loss": 1.6484, "step": 101312 }, { "epoch": 0.19998325974252562, "grad_norm": 0.10473434627056122, "learning_rate": 8.032628379636785e-05, "loss": 1.6389, "step": 101344 }, { "epoch": 0.20004640570392207, "grad_norm": 0.09926243871450424, "learning_rate": 8.031994357200315e-05, "loss": 1.6279, "step": 101376 }, { "epoch": 0.20010955166531852, "grad_norm": 0.10468669980764389, "learning_rate": 8.031360334763847e-05, "loss": 1.6538, "step": 101408 }, { "epoch": 0.20017269762671494, "grad_norm": 0.10861735045909882, "learning_rate": 8.030726312327378e-05, "loss": 1.6387, "step": 101440 }, { "epoch": 0.2002358435881114, "grad_norm": 0.10439712554216385, "learning_rate": 8.03009228989091e-05, "loss": 1.6422, "step": 101472 }, { "epoch": 0.20029898954950784, "grad_norm": 0.10245823860168457, "learning_rate": 8.02945826745444e-05, "loss": 1.6305, "step": 101504 }, { "epoch": 0.20036213551090426, "grad_norm": 0.10010068118572235, "learning_rate": 8.028824245017971e-05, "loss": 1.6429, "step": 101536 }, { "epoch": 0.2004252814723007, "grad_norm": 0.11026593297719955, "learning_rate": 8.028190222581503e-05, "loss": 1.6346, "step": 101568 }, { "epoch": 0.20048842743369716, "grad_norm": 0.09516335278749466, "learning_rate": 8.027556200145033e-05, "loss": 1.6437, "step": 101600 }, { "epoch": 0.20055157339509358, "grad_norm": 0.10414741933345795, "learning_rate": 8.026922177708564e-05, "loss": 1.6344, "step": 101632 }, { "epoch": 0.20061471935649003, "grad_norm": 0.09960043430328369, "learning_rate": 8.026288155272094e-05, "loss": 1.6429, "step": 101664 }, { "epoch": 0.20067786531788648, "grad_norm": 0.1058763787150383, "learning_rate": 8.025654132835626e-05, "loss": 1.6422, "step": 101696 }, { "epoch": 0.2007410112792829, "grad_norm": 0.10479961335659027, "learning_rate": 8.025020110399157e-05, "loss": 1.6385, "step": 101728 }, { "epoch": 0.20080415724067935, "grad_norm": 0.10158281028270721, "learning_rate": 8.024386087962689e-05, "loss": 1.6297, "step": 101760 }, { "epoch": 0.2008673032020758, "grad_norm": 0.10812821984291077, "learning_rate": 8.023752065526219e-05, "loss": 1.6454, "step": 101792 }, { "epoch": 0.20093044916347222, "grad_norm": 0.10637827217578888, "learning_rate": 8.02311804308975e-05, "loss": 1.6391, "step": 101824 }, { "epoch": 0.20099359512486867, "grad_norm": 0.09711770713329315, "learning_rate": 8.022484020653282e-05, "loss": 1.6454, "step": 101856 }, { "epoch": 0.20105674108626512, "grad_norm": 0.10218976438045502, "learning_rate": 8.021849998216813e-05, "loss": 1.6299, "step": 101888 }, { "epoch": 0.20111988704766154, "grad_norm": 0.10815039277076721, "learning_rate": 8.021215975780343e-05, "loss": 1.6494, "step": 101920 }, { "epoch": 0.201183033009058, "grad_norm": 0.1069837212562561, "learning_rate": 8.020581953343875e-05, "loss": 1.6388, "step": 101952 }, { "epoch": 0.20124617897045444, "grad_norm": 0.10718067735433578, "learning_rate": 8.019947930907406e-05, "loss": 1.6392, "step": 101984 }, { "epoch": 0.20130932493185089, "grad_norm": 0.10629121214151382, "learning_rate": 8.019313908470936e-05, "loss": 1.6318, "step": 102016 }, { "epoch": 0.2013724708932473, "grad_norm": 0.10247395187616348, "learning_rate": 8.018679886034466e-05, "loss": 1.6257, "step": 102048 }, { "epoch": 0.20143561685464376, "grad_norm": 0.10103968530893326, "learning_rate": 8.018045863597998e-05, "loss": 1.6292, "step": 102080 }, { "epoch": 0.2014987628160402, "grad_norm": 0.1039213314652443, "learning_rate": 8.017411841161529e-05, "loss": 1.6437, "step": 102112 }, { "epoch": 0.20156190877743663, "grad_norm": 0.10181664675474167, "learning_rate": 8.016777818725061e-05, "loss": 1.6263, "step": 102144 }, { "epoch": 0.20162505473883308, "grad_norm": 0.10486700385808945, "learning_rate": 8.016143796288591e-05, "loss": 1.6438, "step": 102176 }, { "epoch": 0.20168820070022953, "grad_norm": 0.10240813344717026, "learning_rate": 8.015509773852122e-05, "loss": 1.6496, "step": 102208 }, { "epoch": 0.20175134666162595, "grad_norm": 0.10481508076190948, "learning_rate": 8.014875751415654e-05, "loss": 1.6448, "step": 102240 }, { "epoch": 0.2018144926230224, "grad_norm": 0.10472988337278366, "learning_rate": 8.014241728979185e-05, "loss": 1.6536, "step": 102272 }, { "epoch": 0.20187763858441884, "grad_norm": 0.10133840888738632, "learning_rate": 8.013607706542717e-05, "loss": 1.6377, "step": 102304 }, { "epoch": 0.20194078454581527, "grad_norm": 0.10089903324842453, "learning_rate": 8.012973684106247e-05, "loss": 1.6298, "step": 102336 }, { "epoch": 0.20200393050721172, "grad_norm": 0.09654972702264786, "learning_rate": 8.012339661669778e-05, "loss": 1.6373, "step": 102368 }, { "epoch": 0.20206707646860816, "grad_norm": 0.11219391226768494, "learning_rate": 8.011705639233308e-05, "loss": 1.6376, "step": 102400 }, { "epoch": 0.20213022243000459, "grad_norm": 0.0997326597571373, "learning_rate": 8.01107161679684e-05, "loss": 1.6414, "step": 102432 }, { "epoch": 0.20219336839140103, "grad_norm": 0.10251238942146301, "learning_rate": 8.01043759436037e-05, "loss": 1.6398, "step": 102464 }, { "epoch": 0.20225651435279748, "grad_norm": 0.10804726928472519, "learning_rate": 8.009803571923901e-05, "loss": 1.6321, "step": 102496 }, { "epoch": 0.2023196603141939, "grad_norm": 0.10709121823310852, "learning_rate": 8.009169549487433e-05, "loss": 1.6401, "step": 102528 }, { "epoch": 0.20238280627559035, "grad_norm": 0.10653090476989746, "learning_rate": 8.008535527050964e-05, "loss": 1.6388, "step": 102560 }, { "epoch": 0.2024459522369868, "grad_norm": 0.10393144935369492, "learning_rate": 8.007901504614494e-05, "loss": 1.6322, "step": 102592 }, { "epoch": 0.20250909819838323, "grad_norm": 0.10259247571229935, "learning_rate": 8.007267482178026e-05, "loss": 1.6508, "step": 102624 }, { "epoch": 0.20257224415977967, "grad_norm": 0.09982217848300934, "learning_rate": 8.006633459741557e-05, "loss": 1.633, "step": 102656 }, { "epoch": 0.20263539012117612, "grad_norm": 0.10510411113500595, "learning_rate": 8.005999437305089e-05, "loss": 1.6486, "step": 102688 }, { "epoch": 0.20269853608257254, "grad_norm": 0.10338443517684937, "learning_rate": 8.005365414868619e-05, "loss": 1.6382, "step": 102720 }, { "epoch": 0.202761682043969, "grad_norm": 0.10875093191862106, "learning_rate": 8.00473139243215e-05, "loss": 1.6319, "step": 102752 }, { "epoch": 0.20282482800536544, "grad_norm": 0.10368429124355316, "learning_rate": 8.004097369995682e-05, "loss": 1.642, "step": 102784 }, { "epoch": 0.20288797396676186, "grad_norm": 0.10730479657649994, "learning_rate": 8.003463347559212e-05, "loss": 1.6423, "step": 102816 }, { "epoch": 0.2029511199281583, "grad_norm": 0.10140582174062729, "learning_rate": 8.002829325122743e-05, "loss": 1.6465, "step": 102848 }, { "epoch": 0.20301426588955476, "grad_norm": 0.10043517500162125, "learning_rate": 8.002195302686273e-05, "loss": 1.6381, "step": 102880 }, { "epoch": 0.20307741185095118, "grad_norm": 0.10014060139656067, "learning_rate": 8.001561280249805e-05, "loss": 1.6382, "step": 102912 }, { "epoch": 0.20314055781234763, "grad_norm": 0.10281931608915329, "learning_rate": 8.000927257813336e-05, "loss": 1.6392, "step": 102944 }, { "epoch": 0.20320370377374408, "grad_norm": 0.10233409702777863, "learning_rate": 8.000293235376868e-05, "loss": 1.6471, "step": 102976 }, { "epoch": 0.2032668497351405, "grad_norm": 0.10273443162441254, "learning_rate": 7.999659212940398e-05, "loss": 1.6392, "step": 103008 }, { "epoch": 0.20332999569653695, "grad_norm": 0.11665486544370651, "learning_rate": 7.999025190503929e-05, "loss": 1.6329, "step": 103040 }, { "epoch": 0.2033931416579334, "grad_norm": 0.10916301608085632, "learning_rate": 7.998391168067461e-05, "loss": 1.6349, "step": 103072 }, { "epoch": 0.20345628761932982, "grad_norm": 0.11598243564367294, "learning_rate": 7.997757145630992e-05, "loss": 1.6355, "step": 103104 }, { "epoch": 0.20351943358072627, "grad_norm": 0.09926288574934006, "learning_rate": 7.997123123194522e-05, "loss": 1.6362, "step": 103136 }, { "epoch": 0.20358257954212272, "grad_norm": 0.1062379777431488, "learning_rate": 7.996489100758054e-05, "loss": 1.6415, "step": 103168 }, { "epoch": 0.20364572550351914, "grad_norm": 0.10139545798301697, "learning_rate": 7.995855078321585e-05, "loss": 1.6482, "step": 103200 }, { "epoch": 0.2037088714649156, "grad_norm": 0.1033335030078888, "learning_rate": 7.995221055885115e-05, "loss": 1.6377, "step": 103232 }, { "epoch": 0.20377201742631204, "grad_norm": 0.09786981344223022, "learning_rate": 7.994587033448647e-05, "loss": 1.6401, "step": 103264 }, { "epoch": 0.20383516338770846, "grad_norm": 0.10496127605438232, "learning_rate": 7.993953011012177e-05, "loss": 1.6311, "step": 103296 }, { "epoch": 0.2038983093491049, "grad_norm": 0.10922686010599136, "learning_rate": 7.993318988575708e-05, "loss": 1.6566, "step": 103328 }, { "epoch": 0.20396145531050136, "grad_norm": 0.10109837353229523, "learning_rate": 7.99268496613924e-05, "loss": 1.6406, "step": 103360 }, { "epoch": 0.20402460127189778, "grad_norm": 0.10708053410053253, "learning_rate": 7.99205094370277e-05, "loss": 1.6286, "step": 103392 }, { "epoch": 0.20408774723329423, "grad_norm": 0.10658697038888931, "learning_rate": 7.991416921266301e-05, "loss": 1.6402, "step": 103424 }, { "epoch": 0.20415089319469068, "grad_norm": 0.1058441624045372, "learning_rate": 7.990782898829833e-05, "loss": 1.6323, "step": 103456 }, { "epoch": 0.2042140391560871, "grad_norm": 0.10291415452957153, "learning_rate": 7.990148876393364e-05, "loss": 1.652, "step": 103488 }, { "epoch": 0.20427718511748355, "grad_norm": 0.10548091679811478, "learning_rate": 7.989514853956894e-05, "loss": 1.6589, "step": 103520 }, { "epoch": 0.20434033107888, "grad_norm": 0.10049108415842056, "learning_rate": 7.988880831520426e-05, "loss": 1.6392, "step": 103552 }, { "epoch": 0.20440347704027642, "grad_norm": 0.10302495956420898, "learning_rate": 7.988246809083957e-05, "loss": 1.6507, "step": 103584 }, { "epoch": 0.20446662300167287, "grad_norm": 0.11602891236543655, "learning_rate": 7.987612786647489e-05, "loss": 1.6406, "step": 103616 }, { "epoch": 0.20452976896306932, "grad_norm": 0.11603625118732452, "learning_rate": 7.986978764211019e-05, "loss": 1.6338, "step": 103648 }, { "epoch": 0.20459291492446577, "grad_norm": 0.1035018116235733, "learning_rate": 7.986344741774549e-05, "loss": 1.6365, "step": 103680 }, { "epoch": 0.2046560608858622, "grad_norm": 0.10047438740730286, "learning_rate": 7.98571071933808e-05, "loss": 1.6358, "step": 103712 }, { "epoch": 0.20471920684725864, "grad_norm": 0.10895229876041412, "learning_rate": 7.985076696901612e-05, "loss": 1.6266, "step": 103744 }, { "epoch": 0.2047823528086551, "grad_norm": 0.10601717233657837, "learning_rate": 7.984442674465143e-05, "loss": 1.6484, "step": 103776 }, { "epoch": 0.2048454987700515, "grad_norm": 0.10165352374315262, "learning_rate": 7.983808652028673e-05, "loss": 1.6343, "step": 103808 }, { "epoch": 0.20490864473144796, "grad_norm": 0.10823352634906769, "learning_rate": 7.983174629592205e-05, "loss": 1.6422, "step": 103840 }, { "epoch": 0.2049717906928444, "grad_norm": 0.1027594804763794, "learning_rate": 7.982540607155736e-05, "loss": 1.6399, "step": 103872 }, { "epoch": 0.20503493665424083, "grad_norm": 0.10999977588653564, "learning_rate": 7.981906584719268e-05, "loss": 1.6346, "step": 103904 }, { "epoch": 0.20509808261563728, "grad_norm": 0.11509554833173752, "learning_rate": 7.981272562282798e-05, "loss": 1.6308, "step": 103936 }, { "epoch": 0.20516122857703373, "grad_norm": 0.1042904406785965, "learning_rate": 7.98063853984633e-05, "loss": 1.6328, "step": 103968 }, { "epoch": 0.20522437453843015, "grad_norm": 0.10567998886108398, "learning_rate": 7.980004517409861e-05, "loss": 1.642, "step": 104000 }, { "epoch": 0.2052875204998266, "grad_norm": 0.1036311537027359, "learning_rate": 7.979370494973392e-05, "loss": 1.6318, "step": 104032 }, { "epoch": 0.20535066646122305, "grad_norm": 0.11015824228525162, "learning_rate": 7.978736472536922e-05, "loss": 1.6422, "step": 104064 }, { "epoch": 0.20541381242261947, "grad_norm": 0.10671118646860123, "learning_rate": 7.978102450100452e-05, "loss": 1.6493, "step": 104096 }, { "epoch": 0.20547695838401592, "grad_norm": 0.10612434893846512, "learning_rate": 7.977468427663984e-05, "loss": 1.6368, "step": 104128 }, { "epoch": 0.20554010434541237, "grad_norm": 0.11015817523002625, "learning_rate": 7.976834405227515e-05, "loss": 1.6262, "step": 104160 }, { "epoch": 0.2056032503068088, "grad_norm": 0.10126221179962158, "learning_rate": 7.976200382791045e-05, "loss": 1.623, "step": 104192 }, { "epoch": 0.20566639626820524, "grad_norm": 0.10176030546426773, "learning_rate": 7.975566360354577e-05, "loss": 1.6464, "step": 104224 }, { "epoch": 0.2057295422296017, "grad_norm": 0.09650059789419174, "learning_rate": 7.974932337918108e-05, "loss": 1.654, "step": 104256 }, { "epoch": 0.2057926881909981, "grad_norm": 0.11535985767841339, "learning_rate": 7.97429831548164e-05, "loss": 1.629, "step": 104288 }, { "epoch": 0.20585583415239456, "grad_norm": 0.1117384284734726, "learning_rate": 7.973664293045171e-05, "loss": 1.638, "step": 104320 }, { "epoch": 0.205918980113791, "grad_norm": 0.10364539176225662, "learning_rate": 7.973030270608701e-05, "loss": 1.6406, "step": 104352 }, { "epoch": 0.20598212607518743, "grad_norm": 0.11449093371629715, "learning_rate": 7.972396248172233e-05, "loss": 1.6387, "step": 104384 }, { "epoch": 0.20604527203658388, "grad_norm": 0.10183209180831909, "learning_rate": 7.971762225735764e-05, "loss": 1.6382, "step": 104416 }, { "epoch": 0.20610841799798033, "grad_norm": 0.10846957564353943, "learning_rate": 7.971128203299296e-05, "loss": 1.642, "step": 104448 }, { "epoch": 0.20617156395937675, "grad_norm": 0.10982728749513626, "learning_rate": 7.970494180862826e-05, "loss": 1.6434, "step": 104480 }, { "epoch": 0.2062347099207732, "grad_norm": 0.10392747819423676, "learning_rate": 7.969860158426356e-05, "loss": 1.6378, "step": 104512 }, { "epoch": 0.20629785588216965, "grad_norm": 0.1094764694571495, "learning_rate": 7.969226135989887e-05, "loss": 1.6288, "step": 104544 }, { "epoch": 0.20636100184356607, "grad_norm": 0.10573791712522507, "learning_rate": 7.968592113553419e-05, "loss": 1.6465, "step": 104576 }, { "epoch": 0.20642414780496252, "grad_norm": 0.10767697542905807, "learning_rate": 7.967958091116949e-05, "loss": 1.6346, "step": 104608 }, { "epoch": 0.20648729376635896, "grad_norm": 0.10751378536224365, "learning_rate": 7.96732406868048e-05, "loss": 1.6503, "step": 104640 }, { "epoch": 0.20655043972775539, "grad_norm": 0.10429476201534271, "learning_rate": 7.966690046244012e-05, "loss": 1.628, "step": 104672 }, { "epoch": 0.20661358568915184, "grad_norm": 0.10951858758926392, "learning_rate": 7.966056023807543e-05, "loss": 1.6357, "step": 104704 }, { "epoch": 0.20667673165054828, "grad_norm": 0.11229857802391052, "learning_rate": 7.965422001371073e-05, "loss": 1.6276, "step": 104736 }, { "epoch": 0.2067398776119447, "grad_norm": 0.10861329734325409, "learning_rate": 7.964787978934605e-05, "loss": 1.6384, "step": 104768 }, { "epoch": 0.20680302357334115, "grad_norm": 0.11048752069473267, "learning_rate": 7.964153956498136e-05, "loss": 1.6302, "step": 104800 }, { "epoch": 0.2068661695347376, "grad_norm": 0.10989987105131149, "learning_rate": 7.963519934061668e-05, "loss": 1.6341, "step": 104832 }, { "epoch": 0.20692931549613403, "grad_norm": 0.11207330226898193, "learning_rate": 7.962885911625198e-05, "loss": 1.6355, "step": 104864 }, { "epoch": 0.20699246145753047, "grad_norm": 0.1130172461271286, "learning_rate": 7.96225188918873e-05, "loss": 1.6352, "step": 104896 }, { "epoch": 0.20705560741892692, "grad_norm": 0.10822995752096176, "learning_rate": 7.96161786675226e-05, "loss": 1.6302, "step": 104928 }, { "epoch": 0.20711875338032334, "grad_norm": 0.10452887415885925, "learning_rate": 7.960983844315791e-05, "loss": 1.6229, "step": 104960 }, { "epoch": 0.2071818993417198, "grad_norm": 0.10353318601846695, "learning_rate": 7.960349821879322e-05, "loss": 1.6546, "step": 104992 }, { "epoch": 0.20724504530311624, "grad_norm": 0.10682032257318497, "learning_rate": 7.959715799442852e-05, "loss": 1.6504, "step": 105024 }, { "epoch": 0.20730819126451266, "grad_norm": 0.1023780032992363, "learning_rate": 7.959081777006384e-05, "loss": 1.6447, "step": 105056 }, { "epoch": 0.2073713372259091, "grad_norm": 0.1099579930305481, "learning_rate": 7.958447754569915e-05, "loss": 1.6346, "step": 105088 }, { "epoch": 0.20743448318730556, "grad_norm": 0.10792119055986404, "learning_rate": 7.957813732133447e-05, "loss": 1.6371, "step": 105120 }, { "epoch": 0.20749762914870198, "grad_norm": 0.1058160662651062, "learning_rate": 7.957179709696977e-05, "loss": 1.6322, "step": 105152 }, { "epoch": 0.20756077511009843, "grad_norm": 0.10008879750967026, "learning_rate": 7.956545687260508e-05, "loss": 1.6285, "step": 105184 }, { "epoch": 0.20762392107149488, "grad_norm": 0.10394812375307083, "learning_rate": 7.95591166482404e-05, "loss": 1.6568, "step": 105216 }, { "epoch": 0.2076870670328913, "grad_norm": 0.103737473487854, "learning_rate": 7.955277642387571e-05, "loss": 1.6271, "step": 105248 }, { "epoch": 0.20775021299428775, "grad_norm": 0.09900253266096115, "learning_rate": 7.954643619951101e-05, "loss": 1.6324, "step": 105280 }, { "epoch": 0.2078133589556842, "grad_norm": 0.10779475420713425, "learning_rate": 7.954009597514633e-05, "loss": 1.6382, "step": 105312 }, { "epoch": 0.20787650491708062, "grad_norm": 0.10209625959396362, "learning_rate": 7.953375575078163e-05, "loss": 1.6425, "step": 105344 }, { "epoch": 0.20793965087847707, "grad_norm": 0.09763719141483307, "learning_rate": 7.952741552641694e-05, "loss": 1.6378, "step": 105376 }, { "epoch": 0.20800279683987352, "grad_norm": 0.10402107238769531, "learning_rate": 7.952107530205225e-05, "loss": 1.6357, "step": 105408 }, { "epoch": 0.20806594280126997, "grad_norm": 0.10512713342905045, "learning_rate": 7.951473507768756e-05, "loss": 1.6365, "step": 105440 }, { "epoch": 0.2081290887626664, "grad_norm": 0.09336667507886887, "learning_rate": 7.950839485332287e-05, "loss": 1.6368, "step": 105472 }, { "epoch": 0.20819223472406284, "grad_norm": 0.11189181357622147, "learning_rate": 7.950205462895819e-05, "loss": 1.6453, "step": 105504 }, { "epoch": 0.2082553806854593, "grad_norm": 0.10604702681303024, "learning_rate": 7.949571440459349e-05, "loss": 1.6456, "step": 105536 }, { "epoch": 0.2083185266468557, "grad_norm": 0.1117272675037384, "learning_rate": 7.94893741802288e-05, "loss": 1.647, "step": 105568 }, { "epoch": 0.20838167260825216, "grad_norm": 0.10491900891065598, "learning_rate": 7.948303395586412e-05, "loss": 1.6448, "step": 105600 }, { "epoch": 0.2084448185696486, "grad_norm": 0.10880821198225021, "learning_rate": 7.947669373149943e-05, "loss": 1.6403, "step": 105632 }, { "epoch": 0.20850796453104503, "grad_norm": 0.10090239346027374, "learning_rate": 7.947035350713475e-05, "loss": 1.6436, "step": 105664 }, { "epoch": 0.20857111049244148, "grad_norm": 0.1004345566034317, "learning_rate": 7.946401328277005e-05, "loss": 1.6449, "step": 105696 }, { "epoch": 0.20863425645383793, "grad_norm": 0.10114094614982605, "learning_rate": 7.945767305840536e-05, "loss": 1.6206, "step": 105728 }, { "epoch": 0.20869740241523435, "grad_norm": 0.1005459651350975, "learning_rate": 7.945133283404066e-05, "loss": 1.6432, "step": 105760 }, { "epoch": 0.2087605483766308, "grad_norm": 0.1044689491391182, "learning_rate": 7.944499260967598e-05, "loss": 1.6372, "step": 105792 }, { "epoch": 0.20882369433802725, "grad_norm": 0.10328377783298492, "learning_rate": 7.943865238531128e-05, "loss": 1.6316, "step": 105824 }, { "epoch": 0.20888684029942367, "grad_norm": 0.10217892378568649, "learning_rate": 7.94323121609466e-05, "loss": 1.6456, "step": 105856 }, { "epoch": 0.20894998626082012, "grad_norm": 0.10909678786993027, "learning_rate": 7.942597193658191e-05, "loss": 1.6392, "step": 105888 }, { "epoch": 0.20901313222221657, "grad_norm": 0.10893984884023666, "learning_rate": 7.941963171221722e-05, "loss": 1.6347, "step": 105920 }, { "epoch": 0.209076278183613, "grad_norm": 0.10438256710767746, "learning_rate": 7.941329148785253e-05, "loss": 1.6432, "step": 105952 }, { "epoch": 0.20913942414500944, "grad_norm": 0.10000976175069809, "learning_rate": 7.940695126348784e-05, "loss": 1.6315, "step": 105984 }, { "epoch": 0.2092025701064059, "grad_norm": 0.10432066768407822, "learning_rate": 7.940061103912315e-05, "loss": 1.6335, "step": 106016 }, { "epoch": 0.2092657160678023, "grad_norm": 0.10000544786453247, "learning_rate": 7.939427081475847e-05, "loss": 1.647, "step": 106048 }, { "epoch": 0.20932886202919876, "grad_norm": 0.10180745273828506, "learning_rate": 7.938793059039377e-05, "loss": 1.6445, "step": 106080 }, { "epoch": 0.2093920079905952, "grad_norm": 0.10191638767719269, "learning_rate": 7.938159036602908e-05, "loss": 1.633, "step": 106112 }, { "epoch": 0.20945515395199163, "grad_norm": 0.10588212311267853, "learning_rate": 7.93752501416644e-05, "loss": 1.633, "step": 106144 }, { "epoch": 0.20951829991338808, "grad_norm": 0.10806088894605637, "learning_rate": 7.93689099172997e-05, "loss": 1.6169, "step": 106176 }, { "epoch": 0.20958144587478453, "grad_norm": 0.10362087190151215, "learning_rate": 7.9362569692935e-05, "loss": 1.6265, "step": 106208 }, { "epoch": 0.20964459183618095, "grad_norm": 0.11316730082035065, "learning_rate": 7.935622946857032e-05, "loss": 1.6222, "step": 106240 }, { "epoch": 0.2097077377975774, "grad_norm": 0.09931106120347977, "learning_rate": 7.934988924420563e-05, "loss": 1.6397, "step": 106272 }, { "epoch": 0.20977088375897385, "grad_norm": 0.10171699523925781, "learning_rate": 7.934354901984094e-05, "loss": 1.6344, "step": 106304 }, { "epoch": 0.20983402972037027, "grad_norm": 0.09845674782991409, "learning_rate": 7.933720879547626e-05, "loss": 1.6288, "step": 106336 }, { "epoch": 0.20989717568176672, "grad_norm": 0.10100769996643066, "learning_rate": 7.933086857111156e-05, "loss": 1.6289, "step": 106368 }, { "epoch": 0.20996032164316317, "grad_norm": 0.10174597054719925, "learning_rate": 7.932452834674687e-05, "loss": 1.6276, "step": 106400 }, { "epoch": 0.2100234676045596, "grad_norm": 0.1099524050951004, "learning_rate": 7.931818812238219e-05, "loss": 1.6393, "step": 106432 }, { "epoch": 0.21008661356595604, "grad_norm": 0.10879639536142349, "learning_rate": 7.93118478980175e-05, "loss": 1.6305, "step": 106464 }, { "epoch": 0.2101497595273525, "grad_norm": 0.09787270426750183, "learning_rate": 7.93055076736528e-05, "loss": 1.6328, "step": 106496 }, { "epoch": 0.2102129054887489, "grad_norm": 0.11112847924232483, "learning_rate": 7.929916744928812e-05, "loss": 1.6346, "step": 106528 }, { "epoch": 0.21027605145014536, "grad_norm": 0.10521311312913895, "learning_rate": 7.929282722492342e-05, "loss": 1.6384, "step": 106560 }, { "epoch": 0.2103391974115418, "grad_norm": 0.10209573060274124, "learning_rate": 7.928648700055874e-05, "loss": 1.6377, "step": 106592 }, { "epoch": 0.21040234337293823, "grad_norm": 0.10402011126279831, "learning_rate": 7.928014677619404e-05, "loss": 1.624, "step": 106624 }, { "epoch": 0.21046548933433468, "grad_norm": 0.10467496514320374, "learning_rate": 7.927380655182935e-05, "loss": 1.6284, "step": 106656 }, { "epoch": 0.21052863529573113, "grad_norm": 0.10447464883327484, "learning_rate": 7.926746632746467e-05, "loss": 1.6357, "step": 106688 }, { "epoch": 0.21059178125712755, "grad_norm": 0.11272741109132767, "learning_rate": 7.926112610309998e-05, "loss": 1.6341, "step": 106720 }, { "epoch": 0.210654927218524, "grad_norm": 0.10509485751390457, "learning_rate": 7.925478587873528e-05, "loss": 1.6352, "step": 106752 }, { "epoch": 0.21071807317992045, "grad_norm": 0.10457901656627655, "learning_rate": 7.92484456543706e-05, "loss": 1.6272, "step": 106784 }, { "epoch": 0.21078121914131687, "grad_norm": 0.10728678107261658, "learning_rate": 7.924210543000591e-05, "loss": 1.6277, "step": 106816 }, { "epoch": 0.21084436510271332, "grad_norm": 0.09993508458137512, "learning_rate": 7.923576520564122e-05, "loss": 1.6368, "step": 106848 }, { "epoch": 0.21090751106410977, "grad_norm": 0.10373237729072571, "learning_rate": 7.922942498127653e-05, "loss": 1.631, "step": 106880 }, { "epoch": 0.2109706570255062, "grad_norm": 0.0996611937880516, "learning_rate": 7.922308475691184e-05, "loss": 1.649, "step": 106912 }, { "epoch": 0.21103380298690264, "grad_norm": 0.10735130310058594, "learning_rate": 7.921674453254715e-05, "loss": 1.6348, "step": 106944 }, { "epoch": 0.21109694894829908, "grad_norm": 0.10662022233009338, "learning_rate": 7.921040430818246e-05, "loss": 1.6241, "step": 106976 }, { "epoch": 0.2111600949096955, "grad_norm": 0.10203149914741516, "learning_rate": 7.920406408381777e-05, "loss": 1.6451, "step": 107008 }, { "epoch": 0.21122324087109196, "grad_norm": 0.10294393450021744, "learning_rate": 7.919772385945307e-05, "loss": 1.6302, "step": 107040 }, { "epoch": 0.2112863868324884, "grad_norm": 0.11116877943277359, "learning_rate": 7.919138363508839e-05, "loss": 1.6367, "step": 107072 }, { "epoch": 0.21134953279388485, "grad_norm": 0.10466013103723526, "learning_rate": 7.91850434107237e-05, "loss": 1.6338, "step": 107104 }, { "epoch": 0.21141267875528127, "grad_norm": 0.09852197021245956, "learning_rate": 7.917870318635901e-05, "loss": 1.6211, "step": 107136 }, { "epoch": 0.21147582471667772, "grad_norm": 0.10259540379047394, "learning_rate": 7.917236296199432e-05, "loss": 1.6187, "step": 107168 }, { "epoch": 0.21153897067807417, "grad_norm": 0.10774032771587372, "learning_rate": 7.916602273762963e-05, "loss": 1.6416, "step": 107200 }, { "epoch": 0.2116021166394706, "grad_norm": 0.1024177297949791, "learning_rate": 7.915968251326495e-05, "loss": 1.6322, "step": 107232 }, { "epoch": 0.21166526260086704, "grad_norm": 0.0955820307135582, "learning_rate": 7.915334228890026e-05, "loss": 1.6275, "step": 107264 }, { "epoch": 0.2117284085622635, "grad_norm": 0.10093463957309723, "learning_rate": 7.914700206453556e-05, "loss": 1.6226, "step": 107296 }, { "epoch": 0.21179155452365991, "grad_norm": 0.10453131794929504, "learning_rate": 7.914066184017088e-05, "loss": 1.6311, "step": 107328 }, { "epoch": 0.21185470048505636, "grad_norm": 0.11554372310638428, "learning_rate": 7.913432161580619e-05, "loss": 1.6197, "step": 107360 }, { "epoch": 0.2119178464464528, "grad_norm": 0.1025693267583847, "learning_rate": 7.912798139144149e-05, "loss": 1.6342, "step": 107392 }, { "epoch": 0.21198099240784923, "grad_norm": 0.11289840191602707, "learning_rate": 7.91216411670768e-05, "loss": 1.6404, "step": 107424 }, { "epoch": 0.21204413836924568, "grad_norm": 0.10061497986316681, "learning_rate": 7.91153009427121e-05, "loss": 1.6438, "step": 107456 }, { "epoch": 0.21210728433064213, "grad_norm": 0.09793902188539505, "learning_rate": 7.910896071834742e-05, "loss": 1.6358, "step": 107488 }, { "epoch": 0.21217043029203855, "grad_norm": 0.11443709582090378, "learning_rate": 7.910262049398274e-05, "loss": 1.6305, "step": 107520 }, { "epoch": 0.212233576253435, "grad_norm": 0.10491108149290085, "learning_rate": 7.909628026961804e-05, "loss": 1.6145, "step": 107552 }, { "epoch": 0.21229672221483145, "grad_norm": 0.09725751727819443, "learning_rate": 7.908994004525335e-05, "loss": 1.6182, "step": 107584 }, { "epoch": 0.21235986817622787, "grad_norm": 0.11422659456729889, "learning_rate": 7.908359982088867e-05, "loss": 1.6441, "step": 107616 }, { "epoch": 0.21242301413762432, "grad_norm": 0.1142466589808464, "learning_rate": 7.907725959652398e-05, "loss": 1.6341, "step": 107648 }, { "epoch": 0.21248616009902077, "grad_norm": 0.09683635085821152, "learning_rate": 7.90709193721593e-05, "loss": 1.6293, "step": 107680 }, { "epoch": 0.2125493060604172, "grad_norm": 0.10389380156993866, "learning_rate": 7.90645791477946e-05, "loss": 1.6246, "step": 107712 }, { "epoch": 0.21261245202181364, "grad_norm": 0.10858426988124847, "learning_rate": 7.905823892342991e-05, "loss": 1.6392, "step": 107744 }, { "epoch": 0.2126755979832101, "grad_norm": 0.10831128060817719, "learning_rate": 7.905189869906522e-05, "loss": 1.6263, "step": 107776 }, { "epoch": 0.2127387439446065, "grad_norm": 0.1085173562169075, "learning_rate": 7.904555847470053e-05, "loss": 1.6341, "step": 107808 }, { "epoch": 0.21280188990600296, "grad_norm": 0.11627600342035294, "learning_rate": 7.903921825033583e-05, "loss": 1.618, "step": 107840 }, { "epoch": 0.2128650358673994, "grad_norm": 0.10929540544748306, "learning_rate": 7.903287802597114e-05, "loss": 1.6351, "step": 107872 }, { "epoch": 0.21292818182879583, "grad_norm": 0.10462719947099686, "learning_rate": 7.902653780160646e-05, "loss": 1.656, "step": 107904 }, { "epoch": 0.21299132779019228, "grad_norm": 0.10402841120958328, "learning_rate": 7.902019757724177e-05, "loss": 1.6345, "step": 107936 }, { "epoch": 0.21305447375158873, "grad_norm": 0.0970005989074707, "learning_rate": 7.901385735287707e-05, "loss": 1.6419, "step": 107968 }, { "epoch": 0.21311761971298515, "grad_norm": 0.10343700647354126, "learning_rate": 7.900751712851239e-05, "loss": 1.6301, "step": 108000 }, { "epoch": 0.2131807656743816, "grad_norm": 0.10491510480642319, "learning_rate": 7.90011769041477e-05, "loss": 1.6357, "step": 108032 }, { "epoch": 0.21324391163577805, "grad_norm": 0.10545338690280914, "learning_rate": 7.899483667978302e-05, "loss": 1.6268, "step": 108064 }, { "epoch": 0.21330705759717447, "grad_norm": 0.10435418784618378, "learning_rate": 7.898849645541832e-05, "loss": 1.6279, "step": 108096 }, { "epoch": 0.21337020355857092, "grad_norm": 0.10306353867053986, "learning_rate": 7.898215623105363e-05, "loss": 1.6348, "step": 108128 }, { "epoch": 0.21343334951996737, "grad_norm": 0.1135459765791893, "learning_rate": 7.897581600668895e-05, "loss": 1.6312, "step": 108160 }, { "epoch": 0.2134964954813638, "grad_norm": 0.10249552875757217, "learning_rate": 7.896947578232426e-05, "loss": 1.6269, "step": 108192 }, { "epoch": 0.21355964144276024, "grad_norm": 0.1019309014081955, "learning_rate": 7.896313555795956e-05, "loss": 1.6337, "step": 108224 }, { "epoch": 0.2136227874041567, "grad_norm": 0.1040746346116066, "learning_rate": 7.895679533359486e-05, "loss": 1.6297, "step": 108256 }, { "epoch": 0.2136859333655531, "grad_norm": 0.10757636278867722, "learning_rate": 7.895045510923018e-05, "loss": 1.6263, "step": 108288 }, { "epoch": 0.21374907932694956, "grad_norm": 0.10473567992448807, "learning_rate": 7.894411488486549e-05, "loss": 1.6301, "step": 108320 }, { "epoch": 0.213812225288346, "grad_norm": 0.09779185801744461, "learning_rate": 7.89377746605008e-05, "loss": 1.6464, "step": 108352 }, { "epoch": 0.21387537124974243, "grad_norm": 0.10053712874650955, "learning_rate": 7.89314344361361e-05, "loss": 1.6334, "step": 108384 }, { "epoch": 0.21393851721113888, "grad_norm": 0.10482043027877808, "learning_rate": 7.892509421177142e-05, "loss": 1.6208, "step": 108416 }, { "epoch": 0.21400166317253533, "grad_norm": 0.10508330166339874, "learning_rate": 7.891875398740674e-05, "loss": 1.6293, "step": 108448 }, { "epoch": 0.21406480913393175, "grad_norm": 0.09850221127271652, "learning_rate": 7.891241376304205e-05, "loss": 1.6223, "step": 108480 }, { "epoch": 0.2141279550953282, "grad_norm": 0.098767951130867, "learning_rate": 7.890607353867735e-05, "loss": 1.6353, "step": 108512 }, { "epoch": 0.21419110105672465, "grad_norm": 0.10548055917024612, "learning_rate": 7.889973331431267e-05, "loss": 1.637, "step": 108544 }, { "epoch": 0.21425424701812107, "grad_norm": 0.102800153195858, "learning_rate": 7.889339308994798e-05, "loss": 1.6297, "step": 108576 }, { "epoch": 0.21431739297951752, "grad_norm": 0.1027311161160469, "learning_rate": 7.88870528655833e-05, "loss": 1.6205, "step": 108608 }, { "epoch": 0.21438053894091397, "grad_norm": 0.09942883998155594, "learning_rate": 7.88807126412186e-05, "loss": 1.6228, "step": 108640 }, { "epoch": 0.2144436849023104, "grad_norm": 0.11116909980773926, "learning_rate": 7.88743724168539e-05, "loss": 1.6206, "step": 108672 }, { "epoch": 0.21450683086370684, "grad_norm": 0.10296850651502609, "learning_rate": 7.886803219248921e-05, "loss": 1.6278, "step": 108704 }, { "epoch": 0.2145699768251033, "grad_norm": 0.10577276349067688, "learning_rate": 7.886169196812453e-05, "loss": 1.6245, "step": 108736 }, { "epoch": 0.21463312278649974, "grad_norm": 0.1063087061047554, "learning_rate": 7.885535174375983e-05, "loss": 1.623, "step": 108768 }, { "epoch": 0.21469626874789616, "grad_norm": 0.11270727962255478, "learning_rate": 7.884901151939514e-05, "loss": 1.6391, "step": 108800 }, { "epoch": 0.2147594147092926, "grad_norm": 0.1048850566148758, "learning_rate": 7.884267129503046e-05, "loss": 1.6455, "step": 108832 }, { "epoch": 0.21482256067068906, "grad_norm": 0.11248733103275299, "learning_rate": 7.883633107066577e-05, "loss": 1.6375, "step": 108864 }, { "epoch": 0.21488570663208548, "grad_norm": 0.10962484031915665, "learning_rate": 7.882999084630109e-05, "loss": 1.6284, "step": 108896 }, { "epoch": 0.21494885259348193, "grad_norm": 0.10210379213094711, "learning_rate": 7.882365062193639e-05, "loss": 1.6452, "step": 108928 }, { "epoch": 0.21501199855487838, "grad_norm": 0.1018432229757309, "learning_rate": 7.88173103975717e-05, "loss": 1.6334, "step": 108960 }, { "epoch": 0.2150751445162748, "grad_norm": 0.1035958006978035, "learning_rate": 7.881097017320702e-05, "loss": 1.6233, "step": 108992 }, { "epoch": 0.21513829047767125, "grad_norm": 0.10939712077379227, "learning_rate": 7.880462994884233e-05, "loss": 1.6263, "step": 109024 }, { "epoch": 0.2152014364390677, "grad_norm": 0.10484683513641357, "learning_rate": 7.879828972447763e-05, "loss": 1.6209, "step": 109056 }, { "epoch": 0.21526458240046412, "grad_norm": 0.10833864659070969, "learning_rate": 7.879194950011293e-05, "loss": 1.624, "step": 109088 }, { "epoch": 0.21532772836186057, "grad_norm": 0.1055784523487091, "learning_rate": 7.878560927574825e-05, "loss": 1.641, "step": 109120 }, { "epoch": 0.21539087432325701, "grad_norm": 0.11066529899835587, "learning_rate": 7.877926905138356e-05, "loss": 1.6356, "step": 109152 }, { "epoch": 0.21545402028465344, "grad_norm": 0.09954646229743958, "learning_rate": 7.877292882701886e-05, "loss": 1.6374, "step": 109184 }, { "epoch": 0.21551716624604988, "grad_norm": 0.09930533170700073, "learning_rate": 7.876658860265418e-05, "loss": 1.633, "step": 109216 }, { "epoch": 0.21558031220744633, "grad_norm": 0.11131240427494049, "learning_rate": 7.876024837828949e-05, "loss": 1.6155, "step": 109248 }, { "epoch": 0.21564345816884276, "grad_norm": 0.09750407934188843, "learning_rate": 7.87539081539248e-05, "loss": 1.6244, "step": 109280 }, { "epoch": 0.2157066041302392, "grad_norm": 0.10745207220315933, "learning_rate": 7.874756792956011e-05, "loss": 1.6204, "step": 109312 }, { "epoch": 0.21576975009163565, "grad_norm": 0.10814482718706131, "learning_rate": 7.874122770519542e-05, "loss": 1.6231, "step": 109344 }, { "epoch": 0.21583289605303208, "grad_norm": 0.1093771904706955, "learning_rate": 7.873488748083074e-05, "loss": 1.6354, "step": 109376 }, { "epoch": 0.21589604201442852, "grad_norm": 0.10462377965450287, "learning_rate": 7.872854725646605e-05, "loss": 1.6178, "step": 109408 }, { "epoch": 0.21595918797582497, "grad_norm": 0.1065559834241867, "learning_rate": 7.872220703210135e-05, "loss": 1.6292, "step": 109440 }, { "epoch": 0.2160223339372214, "grad_norm": 0.0998968705534935, "learning_rate": 7.871586680773667e-05, "loss": 1.6084, "step": 109472 }, { "epoch": 0.21608547989861784, "grad_norm": 0.10516436398029327, "learning_rate": 7.870952658337197e-05, "loss": 1.6281, "step": 109504 }, { "epoch": 0.2161486258600143, "grad_norm": 0.1136736273765564, "learning_rate": 7.870318635900728e-05, "loss": 1.6293, "step": 109536 }, { "epoch": 0.21621177182141071, "grad_norm": 0.10644941031932831, "learning_rate": 7.86968461346426e-05, "loss": 1.6367, "step": 109568 }, { "epoch": 0.21627491778280716, "grad_norm": 0.09942994266748428, "learning_rate": 7.86905059102779e-05, "loss": 1.6251, "step": 109600 }, { "epoch": 0.2163380637442036, "grad_norm": 0.10130082815885544, "learning_rate": 7.868416568591321e-05, "loss": 1.633, "step": 109632 }, { "epoch": 0.21640120970560003, "grad_norm": 0.11468770354986191, "learning_rate": 7.867782546154853e-05, "loss": 1.6193, "step": 109664 }, { "epoch": 0.21646435566699648, "grad_norm": 0.10606314241886139, "learning_rate": 7.867148523718384e-05, "loss": 1.6291, "step": 109696 }, { "epoch": 0.21652750162839293, "grad_norm": 0.10423869639635086, "learning_rate": 7.866514501281914e-05, "loss": 1.6407, "step": 109728 }, { "epoch": 0.21659064758978935, "grad_norm": 0.10519355535507202, "learning_rate": 7.865880478845446e-05, "loss": 1.6379, "step": 109760 }, { "epoch": 0.2166537935511858, "grad_norm": 0.10765162855386734, "learning_rate": 7.865246456408977e-05, "loss": 1.6475, "step": 109792 }, { "epoch": 0.21671693951258225, "grad_norm": 0.10810939222574234, "learning_rate": 7.864612433972509e-05, "loss": 1.6257, "step": 109824 }, { "epoch": 0.21678008547397867, "grad_norm": 0.10260454565286636, "learning_rate": 7.863978411536039e-05, "loss": 1.6276, "step": 109856 }, { "epoch": 0.21684323143537512, "grad_norm": 0.10651948302984238, "learning_rate": 7.86334438909957e-05, "loss": 1.6372, "step": 109888 }, { "epoch": 0.21690637739677157, "grad_norm": 0.11261123418807983, "learning_rate": 7.8627103666631e-05, "loss": 1.6366, "step": 109920 }, { "epoch": 0.216969523358168, "grad_norm": 0.09782645851373672, "learning_rate": 7.862076344226632e-05, "loss": 1.6174, "step": 109952 }, { "epoch": 0.21703266931956444, "grad_norm": 0.1075483039021492, "learning_rate": 7.861442321790162e-05, "loss": 1.6262, "step": 109984 }, { "epoch": 0.2170958152809609, "grad_norm": 0.11073613166809082, "learning_rate": 7.860808299353693e-05, "loss": 1.6423, "step": 110016 }, { "epoch": 0.2171589612423573, "grad_norm": 0.10925047099590302, "learning_rate": 7.860174276917225e-05, "loss": 1.6297, "step": 110048 }, { "epoch": 0.21722210720375376, "grad_norm": 0.10803259164094925, "learning_rate": 7.859540254480756e-05, "loss": 1.635, "step": 110080 }, { "epoch": 0.2172852531651502, "grad_norm": 0.10276775062084198, "learning_rate": 7.858906232044286e-05, "loss": 1.6267, "step": 110112 }, { "epoch": 0.21734839912654663, "grad_norm": 0.1135217472910881, "learning_rate": 7.858272209607818e-05, "loss": 1.6354, "step": 110144 }, { "epoch": 0.21741154508794308, "grad_norm": 0.10434950888156891, "learning_rate": 7.857638187171349e-05, "loss": 1.6338, "step": 110176 }, { "epoch": 0.21747469104933953, "grad_norm": 0.10705443471670151, "learning_rate": 7.85700416473488e-05, "loss": 1.6225, "step": 110208 }, { "epoch": 0.21753783701073595, "grad_norm": 0.10687093436717987, "learning_rate": 7.856370142298412e-05, "loss": 1.6357, "step": 110240 }, { "epoch": 0.2176009829721324, "grad_norm": 0.10969510674476624, "learning_rate": 7.855736119861942e-05, "loss": 1.6346, "step": 110272 }, { "epoch": 0.21766412893352885, "grad_norm": 0.10750909894704819, "learning_rate": 7.855102097425474e-05, "loss": 1.6319, "step": 110304 }, { "epoch": 0.21772727489492527, "grad_norm": 0.10937602072954178, "learning_rate": 7.854468074989004e-05, "loss": 1.6322, "step": 110336 }, { "epoch": 0.21779042085632172, "grad_norm": 0.10048750042915344, "learning_rate": 7.853834052552535e-05, "loss": 1.6354, "step": 110368 }, { "epoch": 0.21785356681771817, "grad_norm": 0.10500896722078323, "learning_rate": 7.853200030116065e-05, "loss": 1.6237, "step": 110400 }, { "epoch": 0.21791671277911462, "grad_norm": 0.10095417499542236, "learning_rate": 7.852566007679597e-05, "loss": 1.6419, "step": 110432 }, { "epoch": 0.21797985874051104, "grad_norm": 0.10962829738855362, "learning_rate": 7.851931985243128e-05, "loss": 1.6383, "step": 110464 }, { "epoch": 0.2180430047019075, "grad_norm": 0.10402845591306686, "learning_rate": 7.85129796280666e-05, "loss": 1.6309, "step": 110496 }, { "epoch": 0.21810615066330394, "grad_norm": 0.09595850855112076, "learning_rate": 7.85066394037019e-05, "loss": 1.639, "step": 110528 }, { "epoch": 0.21816929662470036, "grad_norm": 0.10197676718235016, "learning_rate": 7.850029917933721e-05, "loss": 1.6458, "step": 110560 }, { "epoch": 0.2182324425860968, "grad_norm": 0.10455411672592163, "learning_rate": 7.849395895497253e-05, "loss": 1.6337, "step": 110592 }, { "epoch": 0.21829558854749326, "grad_norm": 0.10910439491271973, "learning_rate": 7.848761873060784e-05, "loss": 1.637, "step": 110624 }, { "epoch": 0.21835873450888968, "grad_norm": 0.09921760857105255, "learning_rate": 7.848127850624314e-05, "loss": 1.6142, "step": 110656 }, { "epoch": 0.21842188047028613, "grad_norm": 0.11171793192625046, "learning_rate": 7.847493828187846e-05, "loss": 1.6172, "step": 110688 }, { "epoch": 0.21848502643168258, "grad_norm": 0.10553435236215591, "learning_rate": 7.846859805751376e-05, "loss": 1.6285, "step": 110720 }, { "epoch": 0.218548172393079, "grad_norm": 0.1026960238814354, "learning_rate": 7.846225783314907e-05, "loss": 1.6174, "step": 110752 }, { "epoch": 0.21861131835447545, "grad_norm": 0.11526110023260117, "learning_rate": 7.845591760878437e-05, "loss": 1.6267, "step": 110784 }, { "epoch": 0.2186744643158719, "grad_norm": 0.10280951112508774, "learning_rate": 7.844957738441969e-05, "loss": 1.6265, "step": 110816 }, { "epoch": 0.21873761027726832, "grad_norm": 0.10278615355491638, "learning_rate": 7.8443237160055e-05, "loss": 1.6355, "step": 110848 }, { "epoch": 0.21880075623866477, "grad_norm": 0.10710864514112473, "learning_rate": 7.843689693569032e-05, "loss": 1.6314, "step": 110880 }, { "epoch": 0.21886390220006122, "grad_norm": 0.10066132247447968, "learning_rate": 7.843055671132563e-05, "loss": 1.6355, "step": 110912 }, { "epoch": 0.21892704816145764, "grad_norm": 0.09862487763166428, "learning_rate": 7.842421648696093e-05, "loss": 1.6373, "step": 110944 }, { "epoch": 0.2189901941228541, "grad_norm": 0.11181669682264328, "learning_rate": 7.841787626259625e-05, "loss": 1.632, "step": 110976 }, { "epoch": 0.21905334008425054, "grad_norm": 0.10781589895486832, "learning_rate": 7.841153603823156e-05, "loss": 1.6292, "step": 111008 }, { "epoch": 0.21911648604564696, "grad_norm": 0.10374114662408829, "learning_rate": 7.840519581386688e-05, "loss": 1.6212, "step": 111040 }, { "epoch": 0.2191796320070434, "grad_norm": 0.10779701173305511, "learning_rate": 7.839885558950218e-05, "loss": 1.6298, "step": 111072 }, { "epoch": 0.21924277796843986, "grad_norm": 0.10612470656633377, "learning_rate": 7.839251536513749e-05, "loss": 1.6366, "step": 111104 }, { "epoch": 0.21930592392983628, "grad_norm": 0.1071191355586052, "learning_rate": 7.838617514077279e-05, "loss": 1.6401, "step": 111136 }, { "epoch": 0.21936906989123273, "grad_norm": 0.1048671081662178, "learning_rate": 7.837983491640811e-05, "loss": 1.6349, "step": 111168 }, { "epoch": 0.21943221585262918, "grad_norm": 0.10103890299797058, "learning_rate": 7.837349469204341e-05, "loss": 1.6188, "step": 111200 }, { "epoch": 0.2194953618140256, "grad_norm": 0.10815919935703278, "learning_rate": 7.836715446767872e-05, "loss": 1.6303, "step": 111232 }, { "epoch": 0.21955850777542205, "grad_norm": 0.10404631495475769, "learning_rate": 7.836081424331404e-05, "loss": 1.6282, "step": 111264 }, { "epoch": 0.2196216537368185, "grad_norm": 0.1171780377626419, "learning_rate": 7.835447401894935e-05, "loss": 1.6413, "step": 111296 }, { "epoch": 0.21968479969821492, "grad_norm": 0.10239430516958237, "learning_rate": 7.834813379458465e-05, "loss": 1.632, "step": 111328 }, { "epoch": 0.21974794565961137, "grad_norm": 0.10737922787666321, "learning_rate": 7.834179357021997e-05, "loss": 1.6254, "step": 111360 }, { "epoch": 0.21981109162100781, "grad_norm": 0.10107067227363586, "learning_rate": 7.833545334585528e-05, "loss": 1.6256, "step": 111392 }, { "epoch": 0.21987423758240424, "grad_norm": 0.1047099232673645, "learning_rate": 7.83291131214906e-05, "loss": 1.6514, "step": 111424 }, { "epoch": 0.21993738354380069, "grad_norm": 0.10276010632514954, "learning_rate": 7.83227728971259e-05, "loss": 1.6278, "step": 111456 }, { "epoch": 0.22000052950519713, "grad_norm": 0.10510295629501343, "learning_rate": 7.831643267276121e-05, "loss": 1.6251, "step": 111488 }, { "epoch": 0.22006367546659356, "grad_norm": 0.10550041496753693, "learning_rate": 7.831009244839653e-05, "loss": 1.6351, "step": 111520 }, { "epoch": 0.22012682142799, "grad_norm": 0.10613173246383667, "learning_rate": 7.830375222403183e-05, "loss": 1.6296, "step": 111552 }, { "epoch": 0.22018996738938645, "grad_norm": 0.12164752185344696, "learning_rate": 7.829741199966714e-05, "loss": 1.627, "step": 111584 }, { "epoch": 0.22025311335078288, "grad_norm": 0.10672342777252197, "learning_rate": 7.829107177530244e-05, "loss": 1.6196, "step": 111616 }, { "epoch": 0.22031625931217932, "grad_norm": 0.10044343769550323, "learning_rate": 7.828473155093776e-05, "loss": 1.6281, "step": 111648 }, { "epoch": 0.22037940527357577, "grad_norm": 0.11962555348873138, "learning_rate": 7.827839132657307e-05, "loss": 1.6275, "step": 111680 }, { "epoch": 0.2204425512349722, "grad_norm": 0.10664345324039459, "learning_rate": 7.827205110220839e-05, "loss": 1.6387, "step": 111712 }, { "epoch": 0.22050569719636864, "grad_norm": 0.10825929790735245, "learning_rate": 7.826571087784369e-05, "loss": 1.6404, "step": 111744 }, { "epoch": 0.2205688431577651, "grad_norm": 0.10334636270999908, "learning_rate": 7.8259370653479e-05, "loss": 1.6308, "step": 111776 }, { "epoch": 0.22063198911916151, "grad_norm": 0.11190655827522278, "learning_rate": 7.825303042911432e-05, "loss": 1.629, "step": 111808 }, { "epoch": 0.22069513508055796, "grad_norm": 0.1063610389828682, "learning_rate": 7.824669020474963e-05, "loss": 1.633, "step": 111840 }, { "epoch": 0.2207582810419544, "grad_norm": 0.1004890576004982, "learning_rate": 7.824034998038493e-05, "loss": 1.6394, "step": 111872 }, { "epoch": 0.22082142700335083, "grad_norm": 0.10761027038097382, "learning_rate": 7.823400975602025e-05, "loss": 1.6454, "step": 111904 }, { "epoch": 0.22088457296474728, "grad_norm": 0.12183315306901932, "learning_rate": 7.822766953165556e-05, "loss": 1.6354, "step": 111936 }, { "epoch": 0.22094771892614373, "grad_norm": 0.11122620850801468, "learning_rate": 7.822132930729086e-05, "loss": 1.6235, "step": 111968 }, { "epoch": 0.22101086488754015, "grad_norm": 0.10745202004909515, "learning_rate": 7.821498908292616e-05, "loss": 1.6342, "step": 112000 }, { "epoch": 0.2210740108489366, "grad_norm": 0.10377126932144165, "learning_rate": 7.820864885856148e-05, "loss": 1.6305, "step": 112032 }, { "epoch": 0.22113715681033305, "grad_norm": 0.10169932246208191, "learning_rate": 7.82023086341968e-05, "loss": 1.6444, "step": 112064 }, { "epoch": 0.2212003027717295, "grad_norm": 0.0997363030910492, "learning_rate": 7.819596840983211e-05, "loss": 1.6336, "step": 112096 }, { "epoch": 0.22126344873312592, "grad_norm": 0.10159129649400711, "learning_rate": 7.818962818546741e-05, "loss": 1.6231, "step": 112128 }, { "epoch": 0.22132659469452237, "grad_norm": 0.10715489089488983, "learning_rate": 7.818328796110272e-05, "loss": 1.6213, "step": 112160 }, { "epoch": 0.22138974065591882, "grad_norm": 0.10581538081169128, "learning_rate": 7.817694773673804e-05, "loss": 1.6162, "step": 112192 }, { "epoch": 0.22145288661731524, "grad_norm": 0.10206440836191177, "learning_rate": 7.817060751237335e-05, "loss": 1.6133, "step": 112224 }, { "epoch": 0.2215160325787117, "grad_norm": 0.1013316810131073, "learning_rate": 7.816426728800867e-05, "loss": 1.622, "step": 112256 }, { "epoch": 0.22157917854010814, "grad_norm": 0.11109724640846252, "learning_rate": 7.815792706364397e-05, "loss": 1.624, "step": 112288 }, { "epoch": 0.22164232450150456, "grad_norm": 0.10483244806528091, "learning_rate": 7.815158683927928e-05, "loss": 1.6306, "step": 112320 }, { "epoch": 0.221705470462901, "grad_norm": 0.11185155063867569, "learning_rate": 7.81452466149146e-05, "loss": 1.6205, "step": 112352 }, { "epoch": 0.22176861642429746, "grad_norm": 0.104886494576931, "learning_rate": 7.81389063905499e-05, "loss": 1.6258, "step": 112384 }, { "epoch": 0.22183176238569388, "grad_norm": 0.12286294251680374, "learning_rate": 7.81325661661852e-05, "loss": 1.632, "step": 112416 }, { "epoch": 0.22189490834709033, "grad_norm": 0.11867167800664902, "learning_rate": 7.812622594182051e-05, "loss": 1.6258, "step": 112448 }, { "epoch": 0.22195805430848678, "grad_norm": 0.10756305605173111, "learning_rate": 7.811988571745583e-05, "loss": 1.6264, "step": 112480 }, { "epoch": 0.2220212002698832, "grad_norm": 0.09912282973527908, "learning_rate": 7.811354549309114e-05, "loss": 1.6366, "step": 112512 }, { "epoch": 0.22208434623127965, "grad_norm": 0.10603366047143936, "learning_rate": 7.810720526872644e-05, "loss": 1.6315, "step": 112544 }, { "epoch": 0.2221474921926761, "grad_norm": 0.10342442989349365, "learning_rate": 7.810086504436176e-05, "loss": 1.6292, "step": 112576 }, { "epoch": 0.22221063815407252, "grad_norm": 0.10729510337114334, "learning_rate": 7.809452481999707e-05, "loss": 1.6243, "step": 112608 }, { "epoch": 0.22227378411546897, "grad_norm": 0.10366231203079224, "learning_rate": 7.808818459563239e-05, "loss": 1.627, "step": 112640 }, { "epoch": 0.22233693007686542, "grad_norm": 0.11111960560083389, "learning_rate": 7.808184437126769e-05, "loss": 1.6344, "step": 112672 }, { "epoch": 0.22240007603826184, "grad_norm": 0.10580338537693024, "learning_rate": 7.8075504146903e-05, "loss": 1.6203, "step": 112704 }, { "epoch": 0.2224632219996583, "grad_norm": 0.11580169200897217, "learning_rate": 7.806916392253832e-05, "loss": 1.6272, "step": 112736 }, { "epoch": 0.22252636796105474, "grad_norm": 0.10675030201673508, "learning_rate": 7.806282369817363e-05, "loss": 1.6122, "step": 112768 }, { "epoch": 0.22258951392245116, "grad_norm": 0.11238189786672592, "learning_rate": 7.805648347380893e-05, "loss": 1.6018, "step": 112800 }, { "epoch": 0.2226526598838476, "grad_norm": 0.09811877459287643, "learning_rate": 7.805014324944423e-05, "loss": 1.6356, "step": 112832 }, { "epoch": 0.22271580584524406, "grad_norm": 0.11010882258415222, "learning_rate": 7.804380302507955e-05, "loss": 1.6277, "step": 112864 }, { "epoch": 0.22277895180664048, "grad_norm": 0.10838333517313004, "learning_rate": 7.803746280071486e-05, "loss": 1.6393, "step": 112896 }, { "epoch": 0.22284209776803693, "grad_norm": 0.1051255315542221, "learning_rate": 7.803112257635018e-05, "loss": 1.6095, "step": 112928 }, { "epoch": 0.22290524372943338, "grad_norm": 0.103900246322155, "learning_rate": 7.802478235198548e-05, "loss": 1.6386, "step": 112960 }, { "epoch": 0.2229683896908298, "grad_norm": 0.10753915458917618, "learning_rate": 7.80184421276208e-05, "loss": 1.6295, "step": 112992 }, { "epoch": 0.22303153565222625, "grad_norm": 0.10351253300905228, "learning_rate": 7.801210190325611e-05, "loss": 1.629, "step": 113024 }, { "epoch": 0.2230946816136227, "grad_norm": 0.1029263287782669, "learning_rate": 7.800576167889142e-05, "loss": 1.6203, "step": 113056 }, { "epoch": 0.22315782757501912, "grad_norm": 0.1170961931347847, "learning_rate": 7.799942145452672e-05, "loss": 1.6383, "step": 113088 }, { "epoch": 0.22322097353641557, "grad_norm": 0.10815365612506866, "learning_rate": 7.799308123016204e-05, "loss": 1.6331, "step": 113120 }, { "epoch": 0.22328411949781202, "grad_norm": 0.10970927774906158, "learning_rate": 7.798674100579735e-05, "loss": 1.6354, "step": 113152 }, { "epoch": 0.22334726545920844, "grad_norm": 0.10320336371660233, "learning_rate": 7.798040078143267e-05, "loss": 1.6152, "step": 113184 }, { "epoch": 0.2234104114206049, "grad_norm": 0.1083802804350853, "learning_rate": 7.797406055706797e-05, "loss": 1.629, "step": 113216 }, { "epoch": 0.22347355738200134, "grad_norm": 0.11191002279520035, "learning_rate": 7.796772033270327e-05, "loss": 1.6214, "step": 113248 }, { "epoch": 0.22353670334339776, "grad_norm": 0.10184124857187271, "learning_rate": 7.796138010833858e-05, "loss": 1.6162, "step": 113280 }, { "epoch": 0.2235998493047942, "grad_norm": 0.10529950261116028, "learning_rate": 7.79550398839739e-05, "loss": 1.627, "step": 113312 }, { "epoch": 0.22366299526619066, "grad_norm": 0.10743270069360733, "learning_rate": 7.79486996596092e-05, "loss": 1.6362, "step": 113344 }, { "epoch": 0.22372614122758708, "grad_norm": 0.0962747111916542, "learning_rate": 7.794235943524451e-05, "loss": 1.6191, "step": 113376 }, { "epoch": 0.22378928718898353, "grad_norm": 0.11665450781583786, "learning_rate": 7.793601921087983e-05, "loss": 1.6256, "step": 113408 }, { "epoch": 0.22385243315037998, "grad_norm": 0.09606826305389404, "learning_rate": 7.792967898651514e-05, "loss": 1.6178, "step": 113440 }, { "epoch": 0.2239155791117764, "grad_norm": 0.10067195445299149, "learning_rate": 7.792333876215044e-05, "loss": 1.6176, "step": 113472 }, { "epoch": 0.22397872507317285, "grad_norm": 0.1075863465666771, "learning_rate": 7.791699853778576e-05, "loss": 1.629, "step": 113504 }, { "epoch": 0.2240418710345693, "grad_norm": 0.11087308079004288, "learning_rate": 7.791065831342107e-05, "loss": 1.6161, "step": 113536 }, { "epoch": 0.22410501699596572, "grad_norm": 0.10315633565187454, "learning_rate": 7.790431808905639e-05, "loss": 1.6276, "step": 113568 }, { "epoch": 0.22416816295736217, "grad_norm": 0.10783594846725464, "learning_rate": 7.789797786469169e-05, "loss": 1.6371, "step": 113600 }, { "epoch": 0.22423130891875862, "grad_norm": 0.110627181828022, "learning_rate": 7.7891637640327e-05, "loss": 1.6214, "step": 113632 }, { "epoch": 0.22429445488015504, "grad_norm": 0.11503355205059052, "learning_rate": 7.78852974159623e-05, "loss": 1.6195, "step": 113664 }, { "epoch": 0.22435760084155149, "grad_norm": 0.11128552258014679, "learning_rate": 7.787895719159762e-05, "loss": 1.6152, "step": 113696 }, { "epoch": 0.22442074680294793, "grad_norm": 0.10582909733057022, "learning_rate": 7.787261696723293e-05, "loss": 1.6315, "step": 113728 }, { "epoch": 0.22448389276434436, "grad_norm": 0.10193505883216858, "learning_rate": 7.786627674286823e-05, "loss": 1.6284, "step": 113760 }, { "epoch": 0.2245470387257408, "grad_norm": 0.10276355594396591, "learning_rate": 7.785993651850355e-05, "loss": 1.6116, "step": 113792 }, { "epoch": 0.22461018468713725, "grad_norm": 0.10170938074588776, "learning_rate": 7.785359629413886e-05, "loss": 1.6322, "step": 113824 }, { "epoch": 0.2246733306485337, "grad_norm": 0.10109400749206543, "learning_rate": 7.784725606977418e-05, "loss": 1.6193, "step": 113856 }, { "epoch": 0.22473647660993012, "grad_norm": 0.11129441112279892, "learning_rate": 7.784091584540948e-05, "loss": 1.6155, "step": 113888 }, { "epoch": 0.22479962257132657, "grad_norm": 0.10492438077926636, "learning_rate": 7.78345756210448e-05, "loss": 1.6357, "step": 113920 }, { "epoch": 0.22486276853272302, "grad_norm": 0.10089823603630066, "learning_rate": 7.782823539668011e-05, "loss": 1.6277, "step": 113952 }, { "epoch": 0.22492591449411944, "grad_norm": 0.10287986695766449, "learning_rate": 7.782189517231542e-05, "loss": 1.6342, "step": 113984 }, { "epoch": 0.2249890604555159, "grad_norm": 0.10186152905225754, "learning_rate": 7.781555494795072e-05, "loss": 1.6243, "step": 114016 }, { "epoch": 0.22505220641691234, "grad_norm": 0.1056632474064827, "learning_rate": 7.780921472358604e-05, "loss": 1.6346, "step": 114048 }, { "epoch": 0.22511535237830876, "grad_norm": 0.1158563420176506, "learning_rate": 7.780287449922134e-05, "loss": 1.6201, "step": 114080 }, { "epoch": 0.2251784983397052, "grad_norm": 0.11432074010372162, "learning_rate": 7.779653427485665e-05, "loss": 1.6165, "step": 114112 }, { "epoch": 0.22524164430110166, "grad_norm": 0.10810406506061554, "learning_rate": 7.779019405049196e-05, "loss": 1.6169, "step": 114144 }, { "epoch": 0.22530479026249808, "grad_norm": 0.10560592263936996, "learning_rate": 7.778385382612727e-05, "loss": 1.6383, "step": 114176 }, { "epoch": 0.22536793622389453, "grad_norm": 0.10545812547206879, "learning_rate": 7.777751360176258e-05, "loss": 1.631, "step": 114208 }, { "epoch": 0.22543108218529098, "grad_norm": 0.1102619618177414, "learning_rate": 7.77711733773979e-05, "loss": 1.6376, "step": 114240 }, { "epoch": 0.2254942281466874, "grad_norm": 0.10669015347957611, "learning_rate": 7.776483315303321e-05, "loss": 1.6094, "step": 114272 }, { "epoch": 0.22555737410808385, "grad_norm": 0.10579013079404831, "learning_rate": 7.775849292866851e-05, "loss": 1.615, "step": 114304 }, { "epoch": 0.2256205200694803, "grad_norm": 0.10540979355573654, "learning_rate": 7.775215270430383e-05, "loss": 1.623, "step": 114336 }, { "epoch": 0.22568366603087672, "grad_norm": 0.11036309599876404, "learning_rate": 7.774581247993914e-05, "loss": 1.6198, "step": 114368 }, { "epoch": 0.22574681199227317, "grad_norm": 0.11022590100765228, "learning_rate": 7.773947225557446e-05, "loss": 1.6219, "step": 114400 }, { "epoch": 0.22580995795366962, "grad_norm": 0.10671652853488922, "learning_rate": 7.773313203120976e-05, "loss": 1.6306, "step": 114432 }, { "epoch": 0.22587310391506604, "grad_norm": 0.10119830816984177, "learning_rate": 7.772679180684507e-05, "loss": 1.619, "step": 114464 }, { "epoch": 0.2259362498764625, "grad_norm": 0.11654257774353027, "learning_rate": 7.772045158248037e-05, "loss": 1.636, "step": 114496 }, { "epoch": 0.22599939583785894, "grad_norm": 0.10148677229881287, "learning_rate": 7.771411135811569e-05, "loss": 1.6217, "step": 114528 }, { "epoch": 0.22606254179925536, "grad_norm": 0.10720031708478928, "learning_rate": 7.770777113375099e-05, "loss": 1.6238, "step": 114560 }, { "epoch": 0.2261256877606518, "grad_norm": 0.10637203603982925, "learning_rate": 7.77014309093863e-05, "loss": 1.6256, "step": 114592 }, { "epoch": 0.22618883372204826, "grad_norm": 0.1099848672747612, "learning_rate": 7.769509068502162e-05, "loss": 1.644, "step": 114624 }, { "epoch": 0.22625197968344468, "grad_norm": 0.10704099386930466, "learning_rate": 7.768875046065693e-05, "loss": 1.619, "step": 114656 }, { "epoch": 0.22631512564484113, "grad_norm": 0.10641255229711533, "learning_rate": 7.768241023629224e-05, "loss": 1.6262, "step": 114688 }, { "epoch": 0.22637827160623758, "grad_norm": 0.11466799676418304, "learning_rate": 7.767607001192755e-05, "loss": 1.6193, "step": 114720 }, { "epoch": 0.226441417567634, "grad_norm": 0.11430511623620987, "learning_rate": 7.766972978756286e-05, "loss": 1.6166, "step": 114752 }, { "epoch": 0.22650456352903045, "grad_norm": 0.10588062554597855, "learning_rate": 7.766338956319818e-05, "loss": 1.6203, "step": 114784 }, { "epoch": 0.2265677094904269, "grad_norm": 0.1109849363565445, "learning_rate": 7.765704933883348e-05, "loss": 1.6167, "step": 114816 }, { "epoch": 0.22663085545182332, "grad_norm": 0.11077682673931122, "learning_rate": 7.76507091144688e-05, "loss": 1.636, "step": 114848 }, { "epoch": 0.22669400141321977, "grad_norm": 0.10223282128572464, "learning_rate": 7.76443688901041e-05, "loss": 1.6335, "step": 114880 }, { "epoch": 0.22675714737461622, "grad_norm": 0.11035159230232239, "learning_rate": 7.763802866573941e-05, "loss": 1.6218, "step": 114912 }, { "epoch": 0.22682029333601264, "grad_norm": 0.11320916563272476, "learning_rate": 7.763168844137472e-05, "loss": 1.6266, "step": 114944 }, { "epoch": 0.2268834392974091, "grad_norm": 0.1096683070063591, "learning_rate": 7.762534821701003e-05, "loss": 1.6222, "step": 114976 }, { "epoch": 0.22694658525880554, "grad_norm": 0.10047774016857147, "learning_rate": 7.761900799264534e-05, "loss": 1.6443, "step": 115008 }, { "epoch": 0.22700973122020196, "grad_norm": 0.11166009306907654, "learning_rate": 7.761266776828065e-05, "loss": 1.6109, "step": 115040 }, { "epoch": 0.2270728771815984, "grad_norm": 0.10214067250490189, "learning_rate": 7.760632754391597e-05, "loss": 1.6226, "step": 115072 }, { "epoch": 0.22713602314299486, "grad_norm": 0.1106981560587883, "learning_rate": 7.759998731955127e-05, "loss": 1.6232, "step": 115104 }, { "epoch": 0.22719916910439128, "grad_norm": 0.10561151057481766, "learning_rate": 7.759364709518658e-05, "loss": 1.6325, "step": 115136 }, { "epoch": 0.22726231506578773, "grad_norm": 0.10548792034387589, "learning_rate": 7.75873068708219e-05, "loss": 1.6204, "step": 115168 }, { "epoch": 0.22732546102718418, "grad_norm": 0.1109824851155281, "learning_rate": 7.758096664645721e-05, "loss": 1.6164, "step": 115200 }, { "epoch": 0.2273886069885806, "grad_norm": 0.1021735891699791, "learning_rate": 7.757462642209252e-05, "loss": 1.6226, "step": 115232 }, { "epoch": 0.22745175294997705, "grad_norm": 0.09949398040771484, "learning_rate": 7.756828619772783e-05, "loss": 1.6191, "step": 115264 }, { "epoch": 0.2275148989113735, "grad_norm": 0.10839954018592834, "learning_rate": 7.756194597336313e-05, "loss": 1.618, "step": 115296 }, { "epoch": 0.22757804487276992, "grad_norm": 0.10837170481681824, "learning_rate": 7.755560574899845e-05, "loss": 1.6243, "step": 115328 }, { "epoch": 0.22764119083416637, "grad_norm": 0.10774082690477371, "learning_rate": 7.754926552463375e-05, "loss": 1.6252, "step": 115360 }, { "epoch": 0.22770433679556282, "grad_norm": 0.12567077577114105, "learning_rate": 7.754292530026906e-05, "loss": 1.6376, "step": 115392 }, { "epoch": 0.22776748275695924, "grad_norm": 0.10726109892129898, "learning_rate": 7.753658507590438e-05, "loss": 1.6287, "step": 115424 }, { "epoch": 0.2278306287183557, "grad_norm": 0.10645340383052826, "learning_rate": 7.753024485153969e-05, "loss": 1.6353, "step": 115456 }, { "epoch": 0.22789377467975214, "grad_norm": 0.10601025074720383, "learning_rate": 7.752390462717499e-05, "loss": 1.6215, "step": 115488 }, { "epoch": 0.22795692064114859, "grad_norm": 0.10729113221168518, "learning_rate": 7.75175644028103e-05, "loss": 1.6231, "step": 115520 }, { "epoch": 0.228020066602545, "grad_norm": 0.11095377057790756, "learning_rate": 7.751122417844562e-05, "loss": 1.6328, "step": 115552 }, { "epoch": 0.22808321256394146, "grad_norm": 0.10204273462295532, "learning_rate": 7.750488395408093e-05, "loss": 1.6416, "step": 115584 }, { "epoch": 0.2281463585253379, "grad_norm": 0.1039181798696518, "learning_rate": 7.749854372971625e-05, "loss": 1.6196, "step": 115616 }, { "epoch": 0.22820950448673433, "grad_norm": 0.09892261773347855, "learning_rate": 7.749220350535155e-05, "loss": 1.6329, "step": 115648 }, { "epoch": 0.22827265044813078, "grad_norm": 0.10598491132259369, "learning_rate": 7.748586328098686e-05, "loss": 1.6269, "step": 115680 }, { "epoch": 0.22833579640952723, "grad_norm": 0.11424053460359573, "learning_rate": 7.747952305662217e-05, "loss": 1.6298, "step": 115712 }, { "epoch": 0.22839894237092365, "grad_norm": 0.1030338779091835, "learning_rate": 7.747318283225748e-05, "loss": 1.6294, "step": 115744 }, { "epoch": 0.2284620883323201, "grad_norm": 0.10474897176027298, "learning_rate": 7.746684260789278e-05, "loss": 1.6263, "step": 115776 }, { "epoch": 0.22852523429371654, "grad_norm": 0.10928239673376083, "learning_rate": 7.74605023835281e-05, "loss": 1.6134, "step": 115808 }, { "epoch": 0.22858838025511297, "grad_norm": 0.10650389641523361, "learning_rate": 7.745416215916341e-05, "loss": 1.6332, "step": 115840 }, { "epoch": 0.22865152621650942, "grad_norm": 0.10185643285512924, "learning_rate": 7.744782193479872e-05, "loss": 1.6155, "step": 115872 }, { "epoch": 0.22871467217790586, "grad_norm": 0.11756762862205505, "learning_rate": 7.744148171043403e-05, "loss": 1.6167, "step": 115904 }, { "epoch": 0.22877781813930229, "grad_norm": 0.10846003890037537, "learning_rate": 7.743514148606934e-05, "loss": 1.6309, "step": 115936 }, { "epoch": 0.22884096410069873, "grad_norm": 0.10317414999008179, "learning_rate": 7.742880126170466e-05, "loss": 1.6248, "step": 115968 }, { "epoch": 0.22890411006209518, "grad_norm": 0.10486005246639252, "learning_rate": 7.742246103733997e-05, "loss": 1.6118, "step": 116000 }, { "epoch": 0.2289672560234916, "grad_norm": 0.10296592116355896, "learning_rate": 7.741612081297527e-05, "loss": 1.639, "step": 116032 }, { "epoch": 0.22903040198488805, "grad_norm": 0.09826012700796127, "learning_rate": 7.740978058861059e-05, "loss": 1.6255, "step": 116064 }, { "epoch": 0.2290935479462845, "grad_norm": 0.11436039954423904, "learning_rate": 7.74034403642459e-05, "loss": 1.6207, "step": 116096 }, { "epoch": 0.22915669390768093, "grad_norm": 0.10209383815526962, "learning_rate": 7.73971001398812e-05, "loss": 1.6264, "step": 116128 }, { "epoch": 0.22921983986907737, "grad_norm": 0.10097664594650269, "learning_rate": 7.73907599155165e-05, "loss": 1.6191, "step": 116160 }, { "epoch": 0.22928298583047382, "grad_norm": 0.10899578779935837, "learning_rate": 7.738441969115182e-05, "loss": 1.6348, "step": 116192 }, { "epoch": 0.22934613179187024, "grad_norm": 0.10263419896364212, "learning_rate": 7.737807946678713e-05, "loss": 1.6136, "step": 116224 }, { "epoch": 0.2294092777532667, "grad_norm": 0.10021056234836578, "learning_rate": 7.737173924242245e-05, "loss": 1.6207, "step": 116256 }, { "epoch": 0.22947242371466314, "grad_norm": 0.11048859357833862, "learning_rate": 7.736539901805776e-05, "loss": 1.6295, "step": 116288 }, { "epoch": 0.22953556967605956, "grad_norm": 0.10607375204563141, "learning_rate": 7.735905879369306e-05, "loss": 1.6272, "step": 116320 }, { "epoch": 0.229598715637456, "grad_norm": 0.10816348344087601, "learning_rate": 7.735271856932838e-05, "loss": 1.6261, "step": 116352 }, { "epoch": 0.22966186159885246, "grad_norm": 0.1096382662653923, "learning_rate": 7.734637834496369e-05, "loss": 1.6177, "step": 116384 }, { "epoch": 0.22972500756024888, "grad_norm": 0.10628510266542435, "learning_rate": 7.7340038120599e-05, "loss": 1.6317, "step": 116416 }, { "epoch": 0.22978815352164533, "grad_norm": 0.10687847435474396, "learning_rate": 7.73336978962343e-05, "loss": 1.6353, "step": 116448 }, { "epoch": 0.22985129948304178, "grad_norm": 0.10397474467754364, "learning_rate": 7.732735767186962e-05, "loss": 1.6378, "step": 116480 }, { "epoch": 0.2299144454444382, "grad_norm": 0.10762056708335876, "learning_rate": 7.732101744750493e-05, "loss": 1.6124, "step": 116512 }, { "epoch": 0.22997759140583465, "grad_norm": 0.10956263542175293, "learning_rate": 7.731467722314024e-05, "loss": 1.624, "step": 116544 }, { "epoch": 0.2300407373672311, "grad_norm": 0.10909552872180939, "learning_rate": 7.730833699877554e-05, "loss": 1.6212, "step": 116576 }, { "epoch": 0.23010388332862752, "grad_norm": 0.11167658120393753, "learning_rate": 7.730199677441085e-05, "loss": 1.623, "step": 116608 }, { "epoch": 0.23016702929002397, "grad_norm": 0.10431547462940216, "learning_rate": 7.729565655004617e-05, "loss": 1.6174, "step": 116640 }, { "epoch": 0.23023017525142042, "grad_norm": 0.10333403944969177, "learning_rate": 7.728931632568148e-05, "loss": 1.6385, "step": 116672 }, { "epoch": 0.23029332121281684, "grad_norm": 0.10513138025999069, "learning_rate": 7.728297610131678e-05, "loss": 1.6286, "step": 116704 }, { "epoch": 0.2303564671742133, "grad_norm": 0.10103452205657959, "learning_rate": 7.72766358769521e-05, "loss": 1.6222, "step": 116736 }, { "epoch": 0.23041961313560974, "grad_norm": 0.10250106453895569, "learning_rate": 7.727029565258741e-05, "loss": 1.6283, "step": 116768 }, { "epoch": 0.23048275909700616, "grad_norm": 0.10892458260059357, "learning_rate": 7.726395542822273e-05, "loss": 1.6353, "step": 116800 }, { "epoch": 0.2305459050584026, "grad_norm": 0.09996946901082993, "learning_rate": 7.725761520385803e-05, "loss": 1.6132, "step": 116832 }, { "epoch": 0.23060905101979906, "grad_norm": 0.11390671879053116, "learning_rate": 7.725127497949334e-05, "loss": 1.635, "step": 116864 }, { "epoch": 0.23067219698119548, "grad_norm": 0.099349744617939, "learning_rate": 7.724493475512866e-05, "loss": 1.6217, "step": 116896 }, { "epoch": 0.23073534294259193, "grad_norm": 0.1030801460146904, "learning_rate": 7.723859453076397e-05, "loss": 1.6336, "step": 116928 }, { "epoch": 0.23079848890398838, "grad_norm": 0.10645098239183426, "learning_rate": 7.723225430639927e-05, "loss": 1.6161, "step": 116960 }, { "epoch": 0.2308616348653848, "grad_norm": 0.103032186627388, "learning_rate": 7.722591408203457e-05, "loss": 1.6203, "step": 116992 }, { "epoch": 0.23092478082678125, "grad_norm": 0.10170614719390869, "learning_rate": 7.721957385766989e-05, "loss": 1.6201, "step": 117024 }, { "epoch": 0.2309879267881777, "grad_norm": 0.10592218488454819, "learning_rate": 7.72132336333052e-05, "loss": 1.6107, "step": 117056 }, { "epoch": 0.23105107274957412, "grad_norm": 0.10595889389514923, "learning_rate": 7.720689340894052e-05, "loss": 1.6207, "step": 117088 }, { "epoch": 0.23111421871097057, "grad_norm": 0.10585210472345352, "learning_rate": 7.720055318457582e-05, "loss": 1.6333, "step": 117120 }, { "epoch": 0.23117736467236702, "grad_norm": 0.11004262417554855, "learning_rate": 7.719421296021113e-05, "loss": 1.6279, "step": 117152 }, { "epoch": 0.23124051063376347, "grad_norm": 0.10163483768701553, "learning_rate": 7.718787273584645e-05, "loss": 1.6141, "step": 117184 }, { "epoch": 0.2313036565951599, "grad_norm": 0.11740569025278091, "learning_rate": 7.718153251148176e-05, "loss": 1.6342, "step": 117216 }, { "epoch": 0.23136680255655634, "grad_norm": 0.10180219262838364, "learning_rate": 7.717519228711706e-05, "loss": 1.6163, "step": 117248 }, { "epoch": 0.2314299485179528, "grad_norm": 0.10720863193273544, "learning_rate": 7.716885206275238e-05, "loss": 1.6184, "step": 117280 }, { "epoch": 0.2314930944793492, "grad_norm": 0.11184300482273102, "learning_rate": 7.716251183838769e-05, "loss": 1.6138, "step": 117312 }, { "epoch": 0.23155624044074566, "grad_norm": 0.1033792644739151, "learning_rate": 7.7156171614023e-05, "loss": 1.6305, "step": 117344 }, { "epoch": 0.2316193864021421, "grad_norm": 0.10217909514904022, "learning_rate": 7.71498313896583e-05, "loss": 1.6233, "step": 117376 }, { "epoch": 0.23168253236353853, "grad_norm": 0.10086463391780853, "learning_rate": 7.714349116529361e-05, "loss": 1.6145, "step": 117408 }, { "epoch": 0.23174567832493498, "grad_norm": 0.10798216611146927, "learning_rate": 7.713715094092892e-05, "loss": 1.6145, "step": 117440 }, { "epoch": 0.23180882428633143, "grad_norm": 0.10482317954301834, "learning_rate": 7.713081071656424e-05, "loss": 1.6309, "step": 117472 }, { "epoch": 0.23187197024772785, "grad_norm": 0.09853389859199524, "learning_rate": 7.712447049219954e-05, "loss": 1.6266, "step": 117504 }, { "epoch": 0.2319351162091243, "grad_norm": 0.11287181079387665, "learning_rate": 7.711813026783485e-05, "loss": 1.6198, "step": 117536 }, { "epoch": 0.23199826217052075, "grad_norm": 0.10568402707576752, "learning_rate": 7.711179004347017e-05, "loss": 1.6265, "step": 117568 }, { "epoch": 0.23206140813191717, "grad_norm": 0.1097228080034256, "learning_rate": 7.710544981910548e-05, "loss": 1.6193, "step": 117600 }, { "epoch": 0.23212455409331362, "grad_norm": 0.10588759183883667, "learning_rate": 7.70991095947408e-05, "loss": 1.6299, "step": 117632 }, { "epoch": 0.23218770005471007, "grad_norm": 0.10230513662099838, "learning_rate": 7.70927693703761e-05, "loss": 1.6201, "step": 117664 }, { "epoch": 0.2322508460161065, "grad_norm": 0.10774840414524078, "learning_rate": 7.708642914601141e-05, "loss": 1.626, "step": 117696 }, { "epoch": 0.23231399197750294, "grad_norm": 0.10887067019939423, "learning_rate": 7.708008892164673e-05, "loss": 1.612, "step": 117728 }, { "epoch": 0.2323771379388994, "grad_norm": 0.10515390336513519, "learning_rate": 7.707374869728203e-05, "loss": 1.6159, "step": 117760 }, { "epoch": 0.2324402839002958, "grad_norm": 0.10412167757749557, "learning_rate": 7.706740847291734e-05, "loss": 1.616, "step": 117792 }, { "epoch": 0.23250342986169226, "grad_norm": 0.11328978836536407, "learning_rate": 7.706106824855264e-05, "loss": 1.6289, "step": 117824 }, { "epoch": 0.2325665758230887, "grad_norm": 0.10668544471263885, "learning_rate": 7.705472802418796e-05, "loss": 1.6341, "step": 117856 }, { "epoch": 0.23262972178448513, "grad_norm": 0.1112147718667984, "learning_rate": 7.704838779982327e-05, "loss": 1.6227, "step": 117888 }, { "epoch": 0.23269286774588158, "grad_norm": 0.10626817494630814, "learning_rate": 7.704204757545857e-05, "loss": 1.6235, "step": 117920 }, { "epoch": 0.23275601370727803, "grad_norm": 0.09796631336212158, "learning_rate": 7.703570735109389e-05, "loss": 1.6276, "step": 117952 }, { "epoch": 0.23281915966867445, "grad_norm": 0.10077675431966782, "learning_rate": 7.70293671267292e-05, "loss": 1.6252, "step": 117984 }, { "epoch": 0.2328823056300709, "grad_norm": 0.11095286905765533, "learning_rate": 7.702302690236452e-05, "loss": 1.6266, "step": 118016 }, { "epoch": 0.23294545159146735, "grad_norm": 0.09941935539245605, "learning_rate": 7.701668667799982e-05, "loss": 1.6307, "step": 118048 }, { "epoch": 0.23300859755286377, "grad_norm": 0.10696975886821747, "learning_rate": 7.701034645363513e-05, "loss": 1.6205, "step": 118080 }, { "epoch": 0.23307174351426022, "grad_norm": 0.10444318503141403, "learning_rate": 7.700400622927045e-05, "loss": 1.6321, "step": 118112 }, { "epoch": 0.23313488947565666, "grad_norm": 0.12032336741685867, "learning_rate": 7.699766600490576e-05, "loss": 1.628, "step": 118144 }, { "epoch": 0.2331980354370531, "grad_norm": 0.10192475467920303, "learning_rate": 7.699132578054106e-05, "loss": 1.6122, "step": 118176 }, { "epoch": 0.23326118139844954, "grad_norm": 0.10944230854511261, "learning_rate": 7.698498555617638e-05, "loss": 1.6268, "step": 118208 }, { "epoch": 0.23332432735984598, "grad_norm": 0.10616450756788254, "learning_rate": 7.697864533181168e-05, "loss": 1.6141, "step": 118240 }, { "epoch": 0.2333874733212424, "grad_norm": 0.10260586440563202, "learning_rate": 7.697230510744699e-05, "loss": 1.6184, "step": 118272 }, { "epoch": 0.23345061928263885, "grad_norm": 0.10613703727722168, "learning_rate": 7.69659648830823e-05, "loss": 1.6197, "step": 118304 }, { "epoch": 0.2335137652440353, "grad_norm": 0.10300738364458084, "learning_rate": 7.695962465871761e-05, "loss": 1.6113, "step": 118336 }, { "epoch": 0.23357691120543173, "grad_norm": 0.10916262120008469, "learning_rate": 7.695328443435292e-05, "loss": 1.6207, "step": 118368 }, { "epoch": 0.23364005716682817, "grad_norm": 0.10659836232662201, "learning_rate": 7.694694420998824e-05, "loss": 1.6212, "step": 118400 }, { "epoch": 0.23370320312822462, "grad_norm": 0.1065870001912117, "learning_rate": 7.694060398562355e-05, "loss": 1.6312, "step": 118432 }, { "epoch": 0.23376634908962104, "grad_norm": 0.11020667850971222, "learning_rate": 7.693426376125885e-05, "loss": 1.6279, "step": 118464 }, { "epoch": 0.2338294950510175, "grad_norm": 0.10344437509775162, "learning_rate": 7.692792353689417e-05, "loss": 1.6178, "step": 118496 }, { "epoch": 0.23389264101241394, "grad_norm": 0.11212826520204544, "learning_rate": 7.692158331252948e-05, "loss": 1.6237, "step": 118528 }, { "epoch": 0.23395578697381036, "grad_norm": 0.10625287145376205, "learning_rate": 7.69152430881648e-05, "loss": 1.6182, "step": 118560 }, { "epoch": 0.2340189329352068, "grad_norm": 0.10378042608499527, "learning_rate": 7.69089028638001e-05, "loss": 1.616, "step": 118592 }, { "epoch": 0.23408207889660326, "grad_norm": 0.10604571551084518, "learning_rate": 7.690256263943541e-05, "loss": 1.635, "step": 118624 }, { "epoch": 0.23414522485799968, "grad_norm": 0.10951421409845352, "learning_rate": 7.689622241507071e-05, "loss": 1.6033, "step": 118656 }, { "epoch": 0.23420837081939613, "grad_norm": 0.10516619682312012, "learning_rate": 7.688988219070603e-05, "loss": 1.6222, "step": 118688 }, { "epoch": 0.23427151678079258, "grad_norm": 0.10432136058807373, "learning_rate": 7.688354196634133e-05, "loss": 1.6281, "step": 118720 }, { "epoch": 0.234334662742189, "grad_norm": 0.10414012521505356, "learning_rate": 7.687720174197664e-05, "loss": 1.6334, "step": 118752 }, { "epoch": 0.23439780870358545, "grad_norm": 0.10413164645433426, "learning_rate": 7.687086151761196e-05, "loss": 1.6199, "step": 118784 }, { "epoch": 0.2344609546649819, "grad_norm": 0.11605276167392731, "learning_rate": 7.686452129324727e-05, "loss": 1.6143, "step": 118816 }, { "epoch": 0.23452410062637835, "grad_norm": 0.10303189605474472, "learning_rate": 7.685818106888259e-05, "loss": 1.6195, "step": 118848 }, { "epoch": 0.23458724658777477, "grad_norm": 0.10710170120000839, "learning_rate": 7.685184084451789e-05, "loss": 1.6238, "step": 118880 }, { "epoch": 0.23465039254917122, "grad_norm": 0.10300502181053162, "learning_rate": 7.68455006201532e-05, "loss": 1.6176, "step": 118912 }, { "epoch": 0.23471353851056767, "grad_norm": 0.11010536551475525, "learning_rate": 7.683916039578852e-05, "loss": 1.6148, "step": 118944 }, { "epoch": 0.2347766844719641, "grad_norm": 0.10205256938934326, "learning_rate": 7.683282017142383e-05, "loss": 1.6227, "step": 118976 }, { "epoch": 0.23483983043336054, "grad_norm": 0.10697031766176224, "learning_rate": 7.682647994705913e-05, "loss": 1.6195, "step": 119008 }, { "epoch": 0.234902976394757, "grad_norm": 0.09970673173666, "learning_rate": 7.682013972269443e-05, "loss": 1.6236, "step": 119040 }, { "epoch": 0.2349661223561534, "grad_norm": 0.10200951248407364, "learning_rate": 7.681379949832975e-05, "loss": 1.6167, "step": 119072 }, { "epoch": 0.23502926831754986, "grad_norm": 0.10827691853046417, "learning_rate": 7.680745927396506e-05, "loss": 1.6206, "step": 119104 }, { "epoch": 0.2350924142789463, "grad_norm": 0.1034129336476326, "learning_rate": 7.680111904960036e-05, "loss": 1.6108, "step": 119136 }, { "epoch": 0.23515556024034273, "grad_norm": 0.10959687829017639, "learning_rate": 7.679477882523568e-05, "loss": 1.6319, "step": 119168 }, { "epoch": 0.23521870620173918, "grad_norm": 0.11860638856887817, "learning_rate": 7.678843860087099e-05, "loss": 1.6121, "step": 119200 }, { "epoch": 0.23528185216313563, "grad_norm": 0.1124686673283577, "learning_rate": 7.678209837650631e-05, "loss": 1.6297, "step": 119232 }, { "epoch": 0.23534499812453205, "grad_norm": 0.10974572598934174, "learning_rate": 7.677575815214161e-05, "loss": 1.6156, "step": 119264 }, { "epoch": 0.2354081440859285, "grad_norm": 0.10951638966798782, "learning_rate": 7.676941792777692e-05, "loss": 1.6235, "step": 119296 }, { "epoch": 0.23547129004732495, "grad_norm": 0.11050344258546829, "learning_rate": 7.676307770341224e-05, "loss": 1.6151, "step": 119328 }, { "epoch": 0.23553443600872137, "grad_norm": 0.1017633005976677, "learning_rate": 7.675673747904755e-05, "loss": 1.6199, "step": 119360 }, { "epoch": 0.23559758197011782, "grad_norm": 0.109011210501194, "learning_rate": 7.675039725468285e-05, "loss": 1.6324, "step": 119392 }, { "epoch": 0.23566072793151427, "grad_norm": 0.11013071238994598, "learning_rate": 7.674405703031817e-05, "loss": 1.6052, "step": 119424 }, { "epoch": 0.2357238738929107, "grad_norm": 0.10388562083244324, "learning_rate": 7.673771680595347e-05, "loss": 1.6241, "step": 119456 }, { "epoch": 0.23578701985430714, "grad_norm": 0.1053258553147316, "learning_rate": 7.673137658158878e-05, "loss": 1.6252, "step": 119488 }, { "epoch": 0.2358501658157036, "grad_norm": 0.10670289397239685, "learning_rate": 7.67250363572241e-05, "loss": 1.6118, "step": 119520 }, { "epoch": 0.2359133117771, "grad_norm": 0.1083621084690094, "learning_rate": 7.67186961328594e-05, "loss": 1.6155, "step": 119552 }, { "epoch": 0.23597645773849646, "grad_norm": 0.10241899639368057, "learning_rate": 7.671235590849471e-05, "loss": 1.6313, "step": 119584 }, { "epoch": 0.2360396036998929, "grad_norm": 0.10821599513292313, "learning_rate": 7.670601568413003e-05, "loss": 1.6246, "step": 119616 }, { "epoch": 0.23610274966128933, "grad_norm": 0.10203195363283157, "learning_rate": 7.669967545976534e-05, "loss": 1.6148, "step": 119648 }, { "epoch": 0.23616589562268578, "grad_norm": 0.09916464984416962, "learning_rate": 7.669333523540064e-05, "loss": 1.6168, "step": 119680 }, { "epoch": 0.23622904158408223, "grad_norm": 0.10602273792028427, "learning_rate": 7.668699501103596e-05, "loss": 1.6138, "step": 119712 }, { "epoch": 0.23629218754547865, "grad_norm": 0.10990050435066223, "learning_rate": 7.668065478667127e-05, "loss": 1.607, "step": 119744 }, { "epoch": 0.2363553335068751, "grad_norm": 0.10659512877464294, "learning_rate": 7.667431456230659e-05, "loss": 1.6323, "step": 119776 }, { "epoch": 0.23641847946827155, "grad_norm": 0.10102102905511856, "learning_rate": 7.666797433794189e-05, "loss": 1.6218, "step": 119808 }, { "epoch": 0.23648162542966797, "grad_norm": 0.10293369740247726, "learning_rate": 7.66616341135772e-05, "loss": 1.6224, "step": 119840 }, { "epoch": 0.23654477139106442, "grad_norm": 0.11067891120910645, "learning_rate": 7.66552938892125e-05, "loss": 1.625, "step": 119872 }, { "epoch": 0.23660791735246087, "grad_norm": 0.1084989532828331, "learning_rate": 7.664895366484782e-05, "loss": 1.6255, "step": 119904 }, { "epoch": 0.2366710633138573, "grad_norm": 0.10479579865932465, "learning_rate": 7.664261344048312e-05, "loss": 1.6412, "step": 119936 }, { "epoch": 0.23673420927525374, "grad_norm": 0.10684318095445633, "learning_rate": 7.663627321611843e-05, "loss": 1.628, "step": 119968 }, { "epoch": 0.2367973552366502, "grad_norm": 0.10191134363412857, "learning_rate": 7.662993299175375e-05, "loss": 1.6146, "step": 120000 }, { "epoch": 0.2368605011980466, "grad_norm": 0.10358243435621262, "learning_rate": 7.662359276738906e-05, "loss": 1.6172, "step": 120032 }, { "epoch": 0.23692364715944306, "grad_norm": 0.1038525253534317, "learning_rate": 7.661725254302436e-05, "loss": 1.6192, "step": 120064 }, { "epoch": 0.2369867931208395, "grad_norm": 0.1099800243973732, "learning_rate": 7.661091231865968e-05, "loss": 1.6291, "step": 120096 }, { "epoch": 0.23704993908223593, "grad_norm": 0.10635606944561005, "learning_rate": 7.660457209429499e-05, "loss": 1.6295, "step": 120128 }, { "epoch": 0.23711308504363238, "grad_norm": 0.10405123233795166, "learning_rate": 7.659823186993031e-05, "loss": 1.632, "step": 120160 }, { "epoch": 0.23717623100502883, "grad_norm": 0.10176390409469604, "learning_rate": 7.659189164556562e-05, "loss": 1.6195, "step": 120192 }, { "epoch": 0.23723937696642525, "grad_norm": 0.11222195625305176, "learning_rate": 7.658555142120092e-05, "loss": 1.6242, "step": 120224 }, { "epoch": 0.2373025229278217, "grad_norm": 0.11476761847734451, "learning_rate": 7.657921119683624e-05, "loss": 1.6232, "step": 120256 }, { "epoch": 0.23736566888921815, "grad_norm": 0.10762138664722443, "learning_rate": 7.657287097247154e-05, "loss": 1.6143, "step": 120288 }, { "epoch": 0.23742881485061457, "grad_norm": 0.10720644146203995, "learning_rate": 7.656653074810685e-05, "loss": 1.6152, "step": 120320 }, { "epoch": 0.23749196081201102, "grad_norm": 0.1090712621808052, "learning_rate": 7.656019052374215e-05, "loss": 1.639, "step": 120352 }, { "epoch": 0.23755510677340747, "grad_norm": 0.10782972723245621, "learning_rate": 7.655385029937747e-05, "loss": 1.6135, "step": 120384 }, { "epoch": 0.2376182527348039, "grad_norm": 0.10414179414510727, "learning_rate": 7.654751007501278e-05, "loss": 1.6198, "step": 120416 }, { "epoch": 0.23768139869620034, "grad_norm": 0.10828063637018204, "learning_rate": 7.65411698506481e-05, "loss": 1.6209, "step": 120448 }, { "epoch": 0.23774454465759678, "grad_norm": 0.10708325356245041, "learning_rate": 7.65348296262834e-05, "loss": 1.6269, "step": 120480 }, { "epoch": 0.2378076906189932, "grad_norm": 0.10508102923631668, "learning_rate": 7.652848940191871e-05, "loss": 1.6306, "step": 120512 }, { "epoch": 0.23787083658038966, "grad_norm": 0.11044111102819443, "learning_rate": 7.652214917755403e-05, "loss": 1.6009, "step": 120544 }, { "epoch": 0.2379339825417861, "grad_norm": 0.12157759815454483, "learning_rate": 7.651580895318934e-05, "loss": 1.6292, "step": 120576 }, { "epoch": 0.23799712850318255, "grad_norm": 0.1084010899066925, "learning_rate": 7.650946872882464e-05, "loss": 1.6169, "step": 120608 }, { "epoch": 0.23806027446457897, "grad_norm": 0.10739398747682571, "learning_rate": 7.650312850445996e-05, "loss": 1.6183, "step": 120640 }, { "epoch": 0.23812342042597542, "grad_norm": 0.11482581496238708, "learning_rate": 7.649678828009527e-05, "loss": 1.6293, "step": 120672 }, { "epoch": 0.23818656638737187, "grad_norm": 0.10567828267812729, "learning_rate": 7.649044805573057e-05, "loss": 1.6363, "step": 120704 }, { "epoch": 0.2382497123487683, "grad_norm": 0.10498349368572235, "learning_rate": 7.648410783136587e-05, "loss": 1.6093, "step": 120736 }, { "epoch": 0.23831285831016474, "grad_norm": 0.10828934609889984, "learning_rate": 7.647776760700119e-05, "loss": 1.6326, "step": 120768 }, { "epoch": 0.2383760042715612, "grad_norm": 0.11244882643222809, "learning_rate": 7.64714273826365e-05, "loss": 1.6256, "step": 120800 }, { "epoch": 0.23843915023295761, "grad_norm": 0.12038005143404007, "learning_rate": 7.646508715827182e-05, "loss": 1.6235, "step": 120832 }, { "epoch": 0.23850229619435406, "grad_norm": 0.09573744982481003, "learning_rate": 7.645874693390713e-05, "loss": 1.6046, "step": 120864 }, { "epoch": 0.2385654421557505, "grad_norm": 0.10723771899938583, "learning_rate": 7.645240670954243e-05, "loss": 1.6188, "step": 120896 }, { "epoch": 0.23862858811714693, "grad_norm": 0.11568645387887955, "learning_rate": 7.644606648517775e-05, "loss": 1.6096, "step": 120928 }, { "epoch": 0.23869173407854338, "grad_norm": 0.1048082485795021, "learning_rate": 7.643972626081306e-05, "loss": 1.6276, "step": 120960 }, { "epoch": 0.23875488003993983, "grad_norm": 0.10420051217079163, "learning_rate": 7.643338603644838e-05, "loss": 1.627, "step": 120992 }, { "epoch": 0.23881802600133625, "grad_norm": 0.10620584338903427, "learning_rate": 7.642704581208368e-05, "loss": 1.6226, "step": 121024 }, { "epoch": 0.2388811719627327, "grad_norm": 0.10347410291433334, "learning_rate": 7.642070558771899e-05, "loss": 1.6274, "step": 121056 }, { "epoch": 0.23894431792412915, "grad_norm": 0.09775253385305405, "learning_rate": 7.641436536335431e-05, "loss": 1.6168, "step": 121088 }, { "epoch": 0.23900746388552557, "grad_norm": 0.11735594272613525, "learning_rate": 7.640802513898961e-05, "loss": 1.637, "step": 121120 }, { "epoch": 0.23907060984692202, "grad_norm": 0.10499858111143112, "learning_rate": 7.640168491462491e-05, "loss": 1.6231, "step": 121152 }, { "epoch": 0.23913375580831847, "grad_norm": 0.10351528227329254, "learning_rate": 7.639534469026022e-05, "loss": 1.6211, "step": 121184 }, { "epoch": 0.2391969017697149, "grad_norm": 0.10469400137662888, "learning_rate": 7.638900446589554e-05, "loss": 1.6216, "step": 121216 }, { "epoch": 0.23926004773111134, "grad_norm": 0.10248269885778427, "learning_rate": 7.638266424153085e-05, "loss": 1.6173, "step": 121248 }, { "epoch": 0.2393231936925078, "grad_norm": 0.10857024043798447, "learning_rate": 7.637632401716615e-05, "loss": 1.6248, "step": 121280 }, { "epoch": 0.2393863396539042, "grad_norm": 0.1071888655424118, "learning_rate": 7.636998379280147e-05, "loss": 1.6234, "step": 121312 }, { "epoch": 0.23944948561530066, "grad_norm": 0.11180950701236725, "learning_rate": 7.636364356843678e-05, "loss": 1.6264, "step": 121344 }, { "epoch": 0.2395126315766971, "grad_norm": 0.10684220492839813, "learning_rate": 7.63573033440721e-05, "loss": 1.6198, "step": 121376 }, { "epoch": 0.23957577753809353, "grad_norm": 0.11148609220981598, "learning_rate": 7.63509631197074e-05, "loss": 1.6185, "step": 121408 }, { "epoch": 0.23963892349948998, "grad_norm": 0.09844603389501572, "learning_rate": 7.634462289534271e-05, "loss": 1.6092, "step": 121440 }, { "epoch": 0.23970206946088643, "grad_norm": 0.10835153609514236, "learning_rate": 7.633828267097803e-05, "loss": 1.6345, "step": 121472 }, { "epoch": 0.23976521542228285, "grad_norm": 0.10451914370059967, "learning_rate": 7.633194244661334e-05, "loss": 1.6291, "step": 121504 }, { "epoch": 0.2398283613836793, "grad_norm": 0.11012529581785202, "learning_rate": 7.632560222224864e-05, "loss": 1.6177, "step": 121536 }, { "epoch": 0.23989150734507575, "grad_norm": 0.10615980625152588, "learning_rate": 7.631926199788394e-05, "loss": 1.6299, "step": 121568 }, { "epoch": 0.23995465330647217, "grad_norm": 0.10427898913621902, "learning_rate": 7.631292177351926e-05, "loss": 1.6097, "step": 121600 }, { "epoch": 0.24001779926786862, "grad_norm": 0.11751525104045868, "learning_rate": 7.630658154915457e-05, "loss": 1.6162, "step": 121632 }, { "epoch": 0.24008094522926507, "grad_norm": 0.10239632427692413, "learning_rate": 7.630024132478989e-05, "loss": 1.618, "step": 121664 }, { "epoch": 0.2401440911906615, "grad_norm": 0.10828424245119095, "learning_rate": 7.629390110042519e-05, "loss": 1.6073, "step": 121696 }, { "epoch": 0.24020723715205794, "grad_norm": 0.10856421291828156, "learning_rate": 7.62875608760605e-05, "loss": 1.6267, "step": 121728 }, { "epoch": 0.2402703831134544, "grad_norm": 0.11210472881793976, "learning_rate": 7.628122065169582e-05, "loss": 1.6184, "step": 121760 }, { "epoch": 0.2403335290748508, "grad_norm": 0.11003146320581436, "learning_rate": 7.627488042733113e-05, "loss": 1.6172, "step": 121792 }, { "epoch": 0.24039667503624726, "grad_norm": 0.10011272132396698, "learning_rate": 7.626854020296643e-05, "loss": 1.6215, "step": 121824 }, { "epoch": 0.2404598209976437, "grad_norm": 0.1020115315914154, "learning_rate": 7.626219997860175e-05, "loss": 1.6154, "step": 121856 }, { "epoch": 0.24052296695904013, "grad_norm": 0.11512121558189392, "learning_rate": 7.625585975423706e-05, "loss": 1.6212, "step": 121888 }, { "epoch": 0.24058611292043658, "grad_norm": 0.10439430177211761, "learning_rate": 7.624951952987236e-05, "loss": 1.6301, "step": 121920 }, { "epoch": 0.24064925888183303, "grad_norm": 0.10639136284589767, "learning_rate": 7.624317930550768e-05, "loss": 1.6083, "step": 121952 }, { "epoch": 0.24071240484322945, "grad_norm": 0.10879534482955933, "learning_rate": 7.623683908114298e-05, "loss": 1.6088, "step": 121984 }, { "epoch": 0.2407755508046259, "grad_norm": 0.10547618567943573, "learning_rate": 7.62304988567783e-05, "loss": 1.6125, "step": 122016 }, { "epoch": 0.24083869676602235, "grad_norm": 0.11240384727716446, "learning_rate": 7.622415863241361e-05, "loss": 1.6153, "step": 122048 }, { "epoch": 0.24090184272741877, "grad_norm": 0.10646174848079681, "learning_rate": 7.621781840804891e-05, "loss": 1.6095, "step": 122080 }, { "epoch": 0.24096498868881522, "grad_norm": 0.10142052918672562, "learning_rate": 7.621147818368422e-05, "loss": 1.6192, "step": 122112 }, { "epoch": 0.24102813465021167, "grad_norm": 0.11388655006885529, "learning_rate": 7.620513795931954e-05, "loss": 1.6336, "step": 122144 }, { "epoch": 0.2410912806116081, "grad_norm": 0.11353683471679688, "learning_rate": 7.619879773495485e-05, "loss": 1.6259, "step": 122176 }, { "epoch": 0.24115442657300454, "grad_norm": 0.10896036028862, "learning_rate": 7.619245751059017e-05, "loss": 1.6274, "step": 122208 }, { "epoch": 0.241217572534401, "grad_norm": 0.1057293489575386, "learning_rate": 7.618611728622547e-05, "loss": 1.6136, "step": 122240 }, { "epoch": 0.24128071849579744, "grad_norm": 0.10727423429489136, "learning_rate": 7.617977706186078e-05, "loss": 1.6189, "step": 122272 }, { "epoch": 0.24134386445719386, "grad_norm": 0.10312186926603317, "learning_rate": 7.61734368374961e-05, "loss": 1.6172, "step": 122304 }, { "epoch": 0.2414070104185903, "grad_norm": 0.10512793064117432, "learning_rate": 7.61670966131314e-05, "loss": 1.6157, "step": 122336 }, { "epoch": 0.24147015637998676, "grad_norm": 0.10570508241653442, "learning_rate": 7.616075638876671e-05, "loss": 1.6266, "step": 122368 }, { "epoch": 0.24153330234138318, "grad_norm": 0.1021624356508255, "learning_rate": 7.615441616440201e-05, "loss": 1.6125, "step": 122400 }, { "epoch": 0.24159644830277963, "grad_norm": 0.10386671870946884, "learning_rate": 7.614807594003733e-05, "loss": 1.6195, "step": 122432 }, { "epoch": 0.24165959426417608, "grad_norm": 0.10835690051317215, "learning_rate": 7.614173571567264e-05, "loss": 1.6215, "step": 122464 }, { "epoch": 0.2417227402255725, "grad_norm": 0.10276900976896286, "learning_rate": 7.613539549130794e-05, "loss": 1.6162, "step": 122496 }, { "epoch": 0.24178588618696895, "grad_norm": 0.10647771507501602, "learning_rate": 7.612905526694326e-05, "loss": 1.6083, "step": 122528 }, { "epoch": 0.2418490321483654, "grad_norm": 0.10218511521816254, "learning_rate": 7.612271504257857e-05, "loss": 1.6139, "step": 122560 }, { "epoch": 0.24191217810976182, "grad_norm": 0.11111932247877121, "learning_rate": 7.611637481821389e-05, "loss": 1.6239, "step": 122592 }, { "epoch": 0.24197532407115827, "grad_norm": 0.11231356114149094, "learning_rate": 7.611003459384919e-05, "loss": 1.6143, "step": 122624 }, { "epoch": 0.24203847003255471, "grad_norm": 0.10202392935752869, "learning_rate": 7.61036943694845e-05, "loss": 1.6149, "step": 122656 }, { "epoch": 0.24210161599395114, "grad_norm": 0.10742413997650146, "learning_rate": 7.609735414511982e-05, "loss": 1.6176, "step": 122688 }, { "epoch": 0.24216476195534758, "grad_norm": 0.10752253234386444, "learning_rate": 7.609101392075513e-05, "loss": 1.607, "step": 122720 }, { "epoch": 0.24222790791674403, "grad_norm": 0.10861130803823471, "learning_rate": 7.608467369639043e-05, "loss": 1.6263, "step": 122752 }, { "epoch": 0.24229105387814046, "grad_norm": 0.10310644656419754, "learning_rate": 7.607833347202575e-05, "loss": 1.6253, "step": 122784 }, { "epoch": 0.2423541998395369, "grad_norm": 0.10626522451639175, "learning_rate": 7.607199324766105e-05, "loss": 1.6209, "step": 122816 }, { "epoch": 0.24241734580093335, "grad_norm": 0.10614936053752899, "learning_rate": 7.606565302329636e-05, "loss": 1.6251, "step": 122848 }, { "epoch": 0.24248049176232978, "grad_norm": 0.10486698895692825, "learning_rate": 7.605931279893168e-05, "loss": 1.614, "step": 122880 }, { "epoch": 0.24254363772372622, "grad_norm": 0.10899936407804489, "learning_rate": 7.605297257456698e-05, "loss": 1.6314, "step": 122912 }, { "epoch": 0.24260678368512267, "grad_norm": 0.11536278575658798, "learning_rate": 7.60466323502023e-05, "loss": 1.6183, "step": 122944 }, { "epoch": 0.2426699296465191, "grad_norm": 0.10541034489870071, "learning_rate": 7.604029212583761e-05, "loss": 1.6166, "step": 122976 }, { "epoch": 0.24273307560791554, "grad_norm": 0.10871303826570511, "learning_rate": 7.603395190147292e-05, "loss": 1.623, "step": 123008 }, { "epoch": 0.242796221569312, "grad_norm": 0.10295688360929489, "learning_rate": 7.602761167710822e-05, "loss": 1.6119, "step": 123040 }, { "epoch": 0.24285936753070841, "grad_norm": 0.10653199255466461, "learning_rate": 7.602127145274354e-05, "loss": 1.6075, "step": 123072 }, { "epoch": 0.24292251349210486, "grad_norm": 0.10827381908893585, "learning_rate": 7.601493122837885e-05, "loss": 1.6183, "step": 123104 }, { "epoch": 0.2429856594535013, "grad_norm": 0.11906562000513077, "learning_rate": 7.600859100401417e-05, "loss": 1.6161, "step": 123136 }, { "epoch": 0.24304880541489773, "grad_norm": 0.1009439006447792, "learning_rate": 7.600225077964947e-05, "loss": 1.6167, "step": 123168 }, { "epoch": 0.24311195137629418, "grad_norm": 0.1109309047460556, "learning_rate": 7.599591055528477e-05, "loss": 1.6144, "step": 123200 }, { "epoch": 0.24317509733769063, "grad_norm": 0.10044911503791809, "learning_rate": 7.598957033092009e-05, "loss": 1.6068, "step": 123232 }, { "epoch": 0.24323824329908705, "grad_norm": 0.11914646625518799, "learning_rate": 7.59832301065554e-05, "loss": 1.6164, "step": 123264 }, { "epoch": 0.2433013892604835, "grad_norm": 0.09595836699008942, "learning_rate": 7.59768898821907e-05, "loss": 1.6242, "step": 123296 }, { "epoch": 0.24336453522187995, "grad_norm": 0.10583670437335968, "learning_rate": 7.597054965782602e-05, "loss": 1.6196, "step": 123328 }, { "epoch": 0.24342768118327637, "grad_norm": 0.10334964841604233, "learning_rate": 7.596420943346133e-05, "loss": 1.6191, "step": 123360 }, { "epoch": 0.24349082714467282, "grad_norm": 0.11159883439540863, "learning_rate": 7.595786920909664e-05, "loss": 1.6159, "step": 123392 }, { "epoch": 0.24355397310606927, "grad_norm": 0.1027933657169342, "learning_rate": 7.595152898473195e-05, "loss": 1.631, "step": 123424 }, { "epoch": 0.2436171190674657, "grad_norm": 0.11988910287618637, "learning_rate": 7.594518876036726e-05, "loss": 1.628, "step": 123456 }, { "epoch": 0.24368026502886214, "grad_norm": 0.10404536873102188, "learning_rate": 7.593884853600257e-05, "loss": 1.6239, "step": 123488 }, { "epoch": 0.2437434109902586, "grad_norm": 0.10335434973239899, "learning_rate": 7.593250831163789e-05, "loss": 1.6106, "step": 123520 }, { "epoch": 0.243806556951655, "grad_norm": 0.10373678803443909, "learning_rate": 7.59261680872732e-05, "loss": 1.6208, "step": 123552 }, { "epoch": 0.24386970291305146, "grad_norm": 0.10799238830804825, "learning_rate": 7.59198278629085e-05, "loss": 1.6094, "step": 123584 }, { "epoch": 0.2439328488744479, "grad_norm": 0.1039934903383255, "learning_rate": 7.59134876385438e-05, "loss": 1.6169, "step": 123616 }, { "epoch": 0.24399599483584433, "grad_norm": 0.09921015053987503, "learning_rate": 7.590714741417912e-05, "loss": 1.6056, "step": 123648 }, { "epoch": 0.24405914079724078, "grad_norm": 0.10938850045204163, "learning_rate": 7.590080718981443e-05, "loss": 1.6197, "step": 123680 }, { "epoch": 0.24412228675863723, "grad_norm": 0.10939442366361618, "learning_rate": 7.589446696544974e-05, "loss": 1.6115, "step": 123712 }, { "epoch": 0.24418543272003365, "grad_norm": 0.10520564764738083, "learning_rate": 7.588812674108505e-05, "loss": 1.6093, "step": 123744 }, { "epoch": 0.2442485786814301, "grad_norm": 0.10615213960409164, "learning_rate": 7.588178651672036e-05, "loss": 1.6184, "step": 123776 }, { "epoch": 0.24431172464282655, "grad_norm": 0.11066926270723343, "learning_rate": 7.587544629235568e-05, "loss": 1.6177, "step": 123808 }, { "epoch": 0.24437487060422297, "grad_norm": 0.1180395632982254, "learning_rate": 7.586910606799098e-05, "loss": 1.6358, "step": 123840 }, { "epoch": 0.24443801656561942, "grad_norm": 0.11051355302333832, "learning_rate": 7.58627658436263e-05, "loss": 1.6385, "step": 123872 }, { "epoch": 0.24450116252701587, "grad_norm": 0.12290903180837631, "learning_rate": 7.585642561926161e-05, "loss": 1.6114, "step": 123904 }, { "epoch": 0.24456430848841232, "grad_norm": 0.10706045478582382, "learning_rate": 7.585008539489692e-05, "loss": 1.624, "step": 123936 }, { "epoch": 0.24462745444980874, "grad_norm": 0.10720265656709671, "learning_rate": 7.584374517053223e-05, "loss": 1.6209, "step": 123968 }, { "epoch": 0.2446906004112052, "grad_norm": 0.10642603784799576, "learning_rate": 7.583740494616754e-05, "loss": 1.6215, "step": 124000 }, { "epoch": 0.24475374637260164, "grad_norm": 0.10877221822738647, "learning_rate": 7.583106472180284e-05, "loss": 1.6178, "step": 124032 }, { "epoch": 0.24481689233399806, "grad_norm": 0.11477097868919373, "learning_rate": 7.582472449743816e-05, "loss": 1.6281, "step": 124064 }, { "epoch": 0.2448800382953945, "grad_norm": 0.10735027492046356, "learning_rate": 7.581838427307346e-05, "loss": 1.6287, "step": 124096 }, { "epoch": 0.24494318425679096, "grad_norm": 0.11125703155994415, "learning_rate": 7.581204404870877e-05, "loss": 1.6065, "step": 124128 }, { "epoch": 0.24500633021818738, "grad_norm": 0.11185038089752197, "learning_rate": 7.580570382434409e-05, "loss": 1.6221, "step": 124160 }, { "epoch": 0.24506947617958383, "grad_norm": 0.10352326184511185, "learning_rate": 7.57993635999794e-05, "loss": 1.6224, "step": 124192 }, { "epoch": 0.24513262214098028, "grad_norm": 0.1192038357257843, "learning_rate": 7.579302337561471e-05, "loss": 1.6145, "step": 124224 }, { "epoch": 0.2451957681023767, "grad_norm": 0.11741012334823608, "learning_rate": 7.578668315125002e-05, "loss": 1.6174, "step": 124256 }, { "epoch": 0.24525891406377315, "grad_norm": 0.10417269915342331, "learning_rate": 7.578034292688533e-05, "loss": 1.6176, "step": 124288 }, { "epoch": 0.2453220600251696, "grad_norm": 0.10842493921518326, "learning_rate": 7.577400270252064e-05, "loss": 1.624, "step": 124320 }, { "epoch": 0.24538520598656602, "grad_norm": 0.10870698094367981, "learning_rate": 7.576766247815596e-05, "loss": 1.6218, "step": 124352 }, { "epoch": 0.24544835194796247, "grad_norm": 0.11051846295595169, "learning_rate": 7.576132225379126e-05, "loss": 1.6166, "step": 124384 }, { "epoch": 0.24551149790935892, "grad_norm": 0.1120854839682579, "learning_rate": 7.575498202942657e-05, "loss": 1.6255, "step": 124416 }, { "epoch": 0.24557464387075534, "grad_norm": 0.10920020937919617, "learning_rate": 7.574864180506188e-05, "loss": 1.6147, "step": 124448 }, { "epoch": 0.2456377898321518, "grad_norm": 0.10512637346982956, "learning_rate": 7.574230158069719e-05, "loss": 1.6308, "step": 124480 }, { "epoch": 0.24570093579354824, "grad_norm": 0.10023616254329681, "learning_rate": 7.573596135633249e-05, "loss": 1.6312, "step": 124512 }, { "epoch": 0.24576408175494466, "grad_norm": 0.1029682457447052, "learning_rate": 7.57296211319678e-05, "loss": 1.6175, "step": 124544 }, { "epoch": 0.2458272277163411, "grad_norm": 0.10144051909446716, "learning_rate": 7.572328090760312e-05, "loss": 1.6243, "step": 124576 }, { "epoch": 0.24589037367773756, "grad_norm": 0.10957455635070801, "learning_rate": 7.571694068323844e-05, "loss": 1.631, "step": 124608 }, { "epoch": 0.24595351963913398, "grad_norm": 0.1120717003941536, "learning_rate": 7.571060045887374e-05, "loss": 1.6076, "step": 124640 }, { "epoch": 0.24601666560053043, "grad_norm": 0.11887475103139877, "learning_rate": 7.570426023450905e-05, "loss": 1.6283, "step": 124672 }, { "epoch": 0.24607981156192688, "grad_norm": 0.10277330875396729, "learning_rate": 7.569792001014437e-05, "loss": 1.6106, "step": 124704 }, { "epoch": 0.2461429575233233, "grad_norm": 0.10494889318943024, "learning_rate": 7.569157978577968e-05, "loss": 1.626, "step": 124736 }, { "epoch": 0.24620610348471975, "grad_norm": 0.10465250164270401, "learning_rate": 7.568523956141498e-05, "loss": 1.6173, "step": 124768 }, { "epoch": 0.2462692494461162, "grad_norm": 0.10026372969150543, "learning_rate": 7.56788993370503e-05, "loss": 1.626, "step": 124800 }, { "epoch": 0.24633239540751262, "grad_norm": 0.11129790544509888, "learning_rate": 7.567255911268561e-05, "loss": 1.6253, "step": 124832 }, { "epoch": 0.24639554136890907, "grad_norm": 0.10483391582965851, "learning_rate": 7.566621888832091e-05, "loss": 1.6145, "step": 124864 }, { "epoch": 0.24645868733030551, "grad_norm": 0.10571115463972092, "learning_rate": 7.565987866395623e-05, "loss": 1.6224, "step": 124896 }, { "epoch": 0.24652183329170194, "grad_norm": 0.1026662290096283, "learning_rate": 7.565353843959153e-05, "loss": 1.6161, "step": 124928 }, { "epoch": 0.24658497925309839, "grad_norm": 0.10826900601387024, "learning_rate": 7.564719821522684e-05, "loss": 1.6068, "step": 124960 }, { "epoch": 0.24664812521449483, "grad_norm": 0.10200421512126923, "learning_rate": 7.564085799086216e-05, "loss": 1.6164, "step": 124992 }, { "epoch": 0.24671127117589126, "grad_norm": 0.10194798558950424, "learning_rate": 7.563451776649747e-05, "loss": 1.6234, "step": 125024 }, { "epoch": 0.2467744171372877, "grad_norm": 0.10575179755687714, "learning_rate": 7.562817754213277e-05, "loss": 1.6135, "step": 125056 }, { "epoch": 0.24683756309868415, "grad_norm": 0.11280045658349991, "learning_rate": 7.562183731776809e-05, "loss": 1.6241, "step": 125088 }, { "epoch": 0.24690070906008058, "grad_norm": 0.10755287855863571, "learning_rate": 7.56154970934034e-05, "loss": 1.6103, "step": 125120 }, { "epoch": 0.24696385502147702, "grad_norm": 0.10866934806108475, "learning_rate": 7.560915686903871e-05, "loss": 1.6105, "step": 125152 }, { "epoch": 0.24702700098287347, "grad_norm": 0.10039102286100388, "learning_rate": 7.560281664467402e-05, "loss": 1.6229, "step": 125184 }, { "epoch": 0.2470901469442699, "grad_norm": 0.10975603759288788, "learning_rate": 7.559647642030933e-05, "loss": 1.6343, "step": 125216 }, { "epoch": 0.24715329290566634, "grad_norm": 0.11331671476364136, "learning_rate": 7.559013619594464e-05, "loss": 1.617, "step": 125248 }, { "epoch": 0.2472164388670628, "grad_norm": 0.10674507170915604, "learning_rate": 7.558379597157995e-05, "loss": 1.6151, "step": 125280 }, { "epoch": 0.24727958482845921, "grad_norm": 0.10739534348249435, "learning_rate": 7.557745574721525e-05, "loss": 1.6082, "step": 125312 }, { "epoch": 0.24734273078985566, "grad_norm": 0.10259276628494263, "learning_rate": 7.557111552285056e-05, "loss": 1.6138, "step": 125344 }, { "epoch": 0.2474058767512521, "grad_norm": 0.1094990149140358, "learning_rate": 7.556477529848588e-05, "loss": 1.6166, "step": 125376 }, { "epoch": 0.24746902271264853, "grad_norm": 0.10647404193878174, "learning_rate": 7.555843507412119e-05, "loss": 1.6132, "step": 125408 }, { "epoch": 0.24753216867404498, "grad_norm": 0.10291962325572968, "learning_rate": 7.555209484975649e-05, "loss": 1.6062, "step": 125440 }, { "epoch": 0.24759531463544143, "grad_norm": 0.1110311895608902, "learning_rate": 7.55457546253918e-05, "loss": 1.6241, "step": 125472 }, { "epoch": 0.24765846059683785, "grad_norm": 0.1133258193731308, "learning_rate": 7.553941440102712e-05, "loss": 1.6159, "step": 125504 }, { "epoch": 0.2477216065582343, "grad_norm": 0.10049762576818466, "learning_rate": 7.553307417666244e-05, "loss": 1.5989, "step": 125536 }, { "epoch": 0.24778475251963075, "grad_norm": 0.10998736321926117, "learning_rate": 7.552673395229775e-05, "loss": 1.6183, "step": 125568 }, { "epoch": 0.2478478984810272, "grad_norm": 0.10934124886989594, "learning_rate": 7.552039372793305e-05, "loss": 1.6179, "step": 125600 }, { "epoch": 0.24791104444242362, "grad_norm": 0.10284886509180069, "learning_rate": 7.551405350356837e-05, "loss": 1.599, "step": 125632 }, { "epoch": 0.24797419040382007, "grad_norm": 0.104775071144104, "learning_rate": 7.550771327920368e-05, "loss": 1.6121, "step": 125664 }, { "epoch": 0.24803733636521652, "grad_norm": 0.10824383795261383, "learning_rate": 7.550137305483898e-05, "loss": 1.6299, "step": 125696 }, { "epoch": 0.24810048232661294, "grad_norm": 0.10084754228591919, "learning_rate": 7.549503283047428e-05, "loss": 1.6213, "step": 125728 }, { "epoch": 0.2481636282880094, "grad_norm": 0.11840032786130905, "learning_rate": 7.54886926061096e-05, "loss": 1.6227, "step": 125760 }, { "epoch": 0.24822677424940584, "grad_norm": 0.10971036553382874, "learning_rate": 7.548235238174491e-05, "loss": 1.6164, "step": 125792 }, { "epoch": 0.24828992021080226, "grad_norm": 0.10688149183988571, "learning_rate": 7.547601215738023e-05, "loss": 1.6259, "step": 125824 }, { "epoch": 0.2483530661721987, "grad_norm": 0.10770287364721298, "learning_rate": 7.546967193301553e-05, "loss": 1.6299, "step": 125856 }, { "epoch": 0.24841621213359516, "grad_norm": 0.1067945659160614, "learning_rate": 7.546333170865084e-05, "loss": 1.6107, "step": 125888 }, { "epoch": 0.24847935809499158, "grad_norm": 0.10258124768733978, "learning_rate": 7.545699148428616e-05, "loss": 1.6125, "step": 125920 }, { "epoch": 0.24854250405638803, "grad_norm": 0.11382295191287994, "learning_rate": 7.545065125992147e-05, "loss": 1.607, "step": 125952 }, { "epoch": 0.24860565001778448, "grad_norm": 0.10734063386917114, "learning_rate": 7.544431103555677e-05, "loss": 1.616, "step": 125984 }, { "epoch": 0.2486687959791809, "grad_norm": 0.10081788152456284, "learning_rate": 7.543797081119209e-05, "loss": 1.6169, "step": 126016 }, { "epoch": 0.24873194194057735, "grad_norm": 0.10751203447580338, "learning_rate": 7.54316305868274e-05, "loss": 1.6135, "step": 126048 }, { "epoch": 0.2487950879019738, "grad_norm": 0.10247769206762314, "learning_rate": 7.54252903624627e-05, "loss": 1.6079, "step": 126080 }, { "epoch": 0.24885823386337022, "grad_norm": 0.10922478139400482, "learning_rate": 7.541895013809802e-05, "loss": 1.6103, "step": 126112 }, { "epoch": 0.24892137982476667, "grad_norm": 0.10713105648756027, "learning_rate": 7.541260991373332e-05, "loss": 1.6267, "step": 126144 }, { "epoch": 0.24898452578616312, "grad_norm": 0.10570314526557922, "learning_rate": 7.540626968936863e-05, "loss": 1.6226, "step": 126176 }, { "epoch": 0.24904767174755954, "grad_norm": 0.11996707320213318, "learning_rate": 7.539992946500395e-05, "loss": 1.6128, "step": 126208 }, { "epoch": 0.249110817708956, "grad_norm": 0.10169323533773422, "learning_rate": 7.539358924063926e-05, "loss": 1.6204, "step": 126240 }, { "epoch": 0.24917396367035244, "grad_norm": 0.10233422368764877, "learning_rate": 7.538724901627456e-05, "loss": 1.6225, "step": 126272 }, { "epoch": 0.24923710963174886, "grad_norm": 0.10392436385154724, "learning_rate": 7.538090879190988e-05, "loss": 1.6096, "step": 126304 }, { "epoch": 0.2493002555931453, "grad_norm": 0.10452143847942352, "learning_rate": 7.537456856754519e-05, "loss": 1.6239, "step": 126336 }, { "epoch": 0.24936340155454176, "grad_norm": 0.10403753817081451, "learning_rate": 7.53682283431805e-05, "loss": 1.6102, "step": 126368 }, { "epoch": 0.24942654751593818, "grad_norm": 0.10682003945112228, "learning_rate": 7.53618881188158e-05, "loss": 1.6267, "step": 126400 }, { "epoch": 0.24948969347733463, "grad_norm": 0.11028899252414703, "learning_rate": 7.535554789445112e-05, "loss": 1.6113, "step": 126432 }, { "epoch": 0.24955283943873108, "grad_norm": 0.10885583609342575, "learning_rate": 7.534920767008644e-05, "loss": 1.6332, "step": 126464 }, { "epoch": 0.2496159854001275, "grad_norm": 0.11095141619443893, "learning_rate": 7.534286744572174e-05, "loss": 1.6187, "step": 126496 }, { "epoch": 0.24967913136152395, "grad_norm": 0.10756413638591766, "learning_rate": 7.533652722135705e-05, "loss": 1.6119, "step": 126528 }, { "epoch": 0.2497422773229204, "grad_norm": 0.11318307369947433, "learning_rate": 7.533018699699235e-05, "loss": 1.6234, "step": 126560 }, { "epoch": 0.24980542328431682, "grad_norm": 0.11176694184541702, "learning_rate": 7.532384677262767e-05, "loss": 1.615, "step": 126592 }, { "epoch": 0.24986856924571327, "grad_norm": 0.09958535432815552, "learning_rate": 7.531750654826298e-05, "loss": 1.6073, "step": 126624 }, { "epoch": 0.24993171520710972, "grad_norm": 0.09965774416923523, "learning_rate": 7.531116632389828e-05, "loss": 1.6066, "step": 126656 }, { "epoch": 0.24999486116850614, "grad_norm": 0.11404179781675339, "learning_rate": 7.53048260995336e-05, "loss": 1.6205, "step": 126688 }, { "epoch": 0.2500580071299026, "grad_norm": 0.10913736373186111, "learning_rate": 7.529848587516891e-05, "loss": 1.5992, "step": 126720 }, { "epoch": 0.250121153091299, "grad_norm": 0.10473071783781052, "learning_rate": 7.529214565080423e-05, "loss": 1.6016, "step": 126752 }, { "epoch": 0.25018429905269546, "grad_norm": 0.1100083440542221, "learning_rate": 7.528580542643953e-05, "loss": 1.6142, "step": 126784 }, { "epoch": 0.2502474450140919, "grad_norm": 0.11019501835107803, "learning_rate": 7.527946520207484e-05, "loss": 1.6168, "step": 126816 }, { "epoch": 0.25031059097548836, "grad_norm": 0.10891743749380112, "learning_rate": 7.527312497771016e-05, "loss": 1.6129, "step": 126848 }, { "epoch": 0.2503737369368848, "grad_norm": 0.10849962383508682, "learning_rate": 7.526678475334547e-05, "loss": 1.6029, "step": 126880 }, { "epoch": 0.25043688289828125, "grad_norm": 0.10497497767210007, "learning_rate": 7.526044452898077e-05, "loss": 1.6155, "step": 126912 }, { "epoch": 0.25050002885967765, "grad_norm": 0.10756964236497879, "learning_rate": 7.525410430461609e-05, "loss": 1.6181, "step": 126944 }, { "epoch": 0.2505631748210741, "grad_norm": 0.10947880893945694, "learning_rate": 7.524776408025139e-05, "loss": 1.6008, "step": 126976 }, { "epoch": 0.25062632078247055, "grad_norm": 0.11128494143486023, "learning_rate": 7.52414238558867e-05, "loss": 1.6238, "step": 127008 }, { "epoch": 0.250689466743867, "grad_norm": 0.10451777279376984, "learning_rate": 7.523508363152202e-05, "loss": 1.6196, "step": 127040 }, { "epoch": 0.25075261270526344, "grad_norm": 0.11082389205694199, "learning_rate": 7.522874340715732e-05, "loss": 1.6312, "step": 127072 }, { "epoch": 0.2508157586666599, "grad_norm": 0.10895415395498276, "learning_rate": 7.522240318279263e-05, "loss": 1.6101, "step": 127104 }, { "epoch": 0.2508789046280563, "grad_norm": 0.11142008006572723, "learning_rate": 7.521606295842795e-05, "loss": 1.6148, "step": 127136 }, { "epoch": 0.25094205058945274, "grad_norm": 0.10830048471689224, "learning_rate": 7.520972273406326e-05, "loss": 1.6213, "step": 127168 }, { "epoch": 0.2510051965508492, "grad_norm": 0.10632851719856262, "learning_rate": 7.520338250969856e-05, "loss": 1.6179, "step": 127200 }, { "epoch": 0.25106834251224563, "grad_norm": 0.1038040891289711, "learning_rate": 7.519704228533388e-05, "loss": 1.6056, "step": 127232 }, { "epoch": 0.2511314884736421, "grad_norm": 0.11971843987703323, "learning_rate": 7.519070206096919e-05, "loss": 1.6144, "step": 127264 }, { "epoch": 0.25119463443503853, "grad_norm": 0.10849187523126602, "learning_rate": 7.51843618366045e-05, "loss": 1.6181, "step": 127296 }, { "epoch": 0.2512577803964349, "grad_norm": 0.1097431480884552, "learning_rate": 7.517802161223981e-05, "loss": 1.5983, "step": 127328 }, { "epoch": 0.2513209263578314, "grad_norm": 0.11160177737474442, "learning_rate": 7.517168138787511e-05, "loss": 1.6177, "step": 127360 }, { "epoch": 0.2513840723192278, "grad_norm": 0.10210192203521729, "learning_rate": 7.516534116351042e-05, "loss": 1.624, "step": 127392 }, { "epoch": 0.2514472182806243, "grad_norm": 0.10520271211862564, "learning_rate": 7.515900093914574e-05, "loss": 1.6267, "step": 127424 }, { "epoch": 0.2515103642420207, "grad_norm": 0.1065123975276947, "learning_rate": 7.515266071478104e-05, "loss": 1.6208, "step": 127456 }, { "epoch": 0.25157351020341717, "grad_norm": 0.11302441358566284, "learning_rate": 7.514632049041635e-05, "loss": 1.6248, "step": 127488 }, { "epoch": 0.25163665616481357, "grad_norm": 0.1007496640086174, "learning_rate": 7.513998026605167e-05, "loss": 1.6311, "step": 127520 }, { "epoch": 0.25169980212621, "grad_norm": 0.10568457096815109, "learning_rate": 7.513364004168698e-05, "loss": 1.6139, "step": 127552 }, { "epoch": 0.25176294808760646, "grad_norm": 0.1054898202419281, "learning_rate": 7.51272998173223e-05, "loss": 1.6233, "step": 127584 }, { "epoch": 0.2518260940490029, "grad_norm": 0.10831870883703232, "learning_rate": 7.51209595929576e-05, "loss": 1.6114, "step": 127616 }, { "epoch": 0.25188924001039936, "grad_norm": 0.1130576878786087, "learning_rate": 7.511461936859291e-05, "loss": 1.6112, "step": 127648 }, { "epoch": 0.2519523859717958, "grad_norm": 0.1042439192533493, "learning_rate": 7.510827914422823e-05, "loss": 1.6252, "step": 127680 }, { "epoch": 0.2520155319331922, "grad_norm": 0.10637133568525314, "learning_rate": 7.510193891986354e-05, "loss": 1.6173, "step": 127712 }, { "epoch": 0.25207867789458865, "grad_norm": 0.10836733877658844, "learning_rate": 7.509559869549884e-05, "loss": 1.6305, "step": 127744 }, { "epoch": 0.2521418238559851, "grad_norm": 0.10669766366481781, "learning_rate": 7.508925847113414e-05, "loss": 1.616, "step": 127776 }, { "epoch": 0.25220496981738155, "grad_norm": 0.111818328499794, "learning_rate": 7.508291824676946e-05, "loss": 1.6267, "step": 127808 }, { "epoch": 0.252268115778778, "grad_norm": 0.11270266771316528, "learning_rate": 7.507657802240477e-05, "loss": 1.603, "step": 127840 }, { "epoch": 0.25233126174017445, "grad_norm": 0.10993698984384537, "learning_rate": 7.507023779804007e-05, "loss": 1.6291, "step": 127872 }, { "epoch": 0.25239440770157084, "grad_norm": 0.09985803812742233, "learning_rate": 7.506389757367539e-05, "loss": 1.6218, "step": 127904 }, { "epoch": 0.2524575536629673, "grad_norm": 0.10485244542360306, "learning_rate": 7.50575573493107e-05, "loss": 1.634, "step": 127936 }, { "epoch": 0.25252069962436374, "grad_norm": 0.10270264744758606, "learning_rate": 7.505121712494602e-05, "loss": 1.6053, "step": 127968 }, { "epoch": 0.2525838455857602, "grad_norm": 0.10258591920137405, "learning_rate": 7.504487690058132e-05, "loss": 1.6209, "step": 128000 }, { "epoch": 0.25264699154715664, "grad_norm": 0.10999858379364014, "learning_rate": 7.503853667621663e-05, "loss": 1.6182, "step": 128032 }, { "epoch": 0.2527101375085531, "grad_norm": 0.10758821666240692, "learning_rate": 7.503219645185195e-05, "loss": 1.6074, "step": 128064 }, { "epoch": 0.2527732834699495, "grad_norm": 0.10051969438791275, "learning_rate": 7.502585622748726e-05, "loss": 1.6216, "step": 128096 }, { "epoch": 0.25283642943134593, "grad_norm": 0.10678692907094955, "learning_rate": 7.501951600312256e-05, "loss": 1.5979, "step": 128128 }, { "epoch": 0.2528995753927424, "grad_norm": 0.09789399802684784, "learning_rate": 7.501317577875788e-05, "loss": 1.5982, "step": 128160 }, { "epoch": 0.25296272135413883, "grad_norm": 0.10215232521295547, "learning_rate": 7.500683555439318e-05, "loss": 1.614, "step": 128192 }, { "epoch": 0.2530258673155353, "grad_norm": 0.10916431248188019, "learning_rate": 7.500049533002849e-05, "loss": 1.6092, "step": 128224 }, { "epoch": 0.25308901327693173, "grad_norm": 0.1081867516040802, "learning_rate": 7.499415510566381e-05, "loss": 1.6237, "step": 128256 }, { "epoch": 0.2531521592383282, "grad_norm": 0.11384835094213486, "learning_rate": 7.498781488129911e-05, "loss": 1.6185, "step": 128288 }, { "epoch": 0.25321530519972457, "grad_norm": 0.10165699571371078, "learning_rate": 7.498147465693442e-05, "loss": 1.6294, "step": 128320 }, { "epoch": 0.253278451161121, "grad_norm": 0.10673161596059799, "learning_rate": 7.497513443256974e-05, "loss": 1.6227, "step": 128352 }, { "epoch": 0.25334159712251747, "grad_norm": 0.1099945604801178, "learning_rate": 7.496879420820505e-05, "loss": 1.6248, "step": 128384 }, { "epoch": 0.2534047430839139, "grad_norm": 0.10666900873184204, "learning_rate": 7.496245398384035e-05, "loss": 1.6355, "step": 128416 }, { "epoch": 0.25346788904531037, "grad_norm": 0.10574255883693695, "learning_rate": 7.495611375947567e-05, "loss": 1.6118, "step": 128448 }, { "epoch": 0.2535310350067068, "grad_norm": 0.10493651032447815, "learning_rate": 7.494977353511098e-05, "loss": 1.5986, "step": 128480 }, { "epoch": 0.2535941809681032, "grad_norm": 0.1075049415230751, "learning_rate": 7.49434333107463e-05, "loss": 1.6079, "step": 128512 }, { "epoch": 0.25365732692949966, "grad_norm": 0.131600484251976, "learning_rate": 7.49370930863816e-05, "loss": 1.6193, "step": 128544 }, { "epoch": 0.2537204728908961, "grad_norm": 0.1113981083035469, "learning_rate": 7.493075286201691e-05, "loss": 1.6224, "step": 128576 }, { "epoch": 0.25378361885229256, "grad_norm": 0.10733971744775772, "learning_rate": 7.492441263765221e-05, "loss": 1.6104, "step": 128608 }, { "epoch": 0.253846764813689, "grad_norm": 0.10786531865596771, "learning_rate": 7.491807241328753e-05, "loss": 1.5924, "step": 128640 }, { "epoch": 0.25390991077508546, "grad_norm": 0.10711334645748138, "learning_rate": 7.491173218892283e-05, "loss": 1.6181, "step": 128672 }, { "epoch": 0.25397305673648185, "grad_norm": 0.10400371253490448, "learning_rate": 7.490539196455814e-05, "loss": 1.6096, "step": 128704 }, { "epoch": 0.2540362026978783, "grad_norm": 0.10419753938913345, "learning_rate": 7.489905174019346e-05, "loss": 1.6029, "step": 128736 }, { "epoch": 0.25409934865927475, "grad_norm": 0.11558572947978973, "learning_rate": 7.489271151582877e-05, "loss": 1.6147, "step": 128768 }, { "epoch": 0.2541624946206712, "grad_norm": 0.10490277409553528, "learning_rate": 7.488637129146409e-05, "loss": 1.6276, "step": 128800 }, { "epoch": 0.25422564058206765, "grad_norm": 0.11033342778682709, "learning_rate": 7.488003106709939e-05, "loss": 1.605, "step": 128832 }, { "epoch": 0.2542887865434641, "grad_norm": 0.11046944558620453, "learning_rate": 7.48736908427347e-05, "loss": 1.6261, "step": 128864 }, { "epoch": 0.2543519325048605, "grad_norm": 0.108438640832901, "learning_rate": 7.486735061837002e-05, "loss": 1.6084, "step": 128896 }, { "epoch": 0.25441507846625694, "grad_norm": 0.10542812943458557, "learning_rate": 7.486101039400533e-05, "loss": 1.5991, "step": 128928 }, { "epoch": 0.2544782244276534, "grad_norm": 0.10477355122566223, "learning_rate": 7.485467016964063e-05, "loss": 1.6154, "step": 128960 }, { "epoch": 0.25454137038904984, "grad_norm": 0.10619034618139267, "learning_rate": 7.484832994527595e-05, "loss": 1.6184, "step": 128992 }, { "epoch": 0.2546045163504463, "grad_norm": 0.09891405701637268, "learning_rate": 7.484198972091125e-05, "loss": 1.6136, "step": 129024 }, { "epoch": 0.25466766231184274, "grad_norm": 0.10589949041604996, "learning_rate": 7.483564949654656e-05, "loss": 1.6279, "step": 129056 }, { "epoch": 0.25473080827323913, "grad_norm": 0.11763922870159149, "learning_rate": 7.482930927218186e-05, "loss": 1.6233, "step": 129088 }, { "epoch": 0.2547939542346356, "grad_norm": 0.104377880692482, "learning_rate": 7.482296904781718e-05, "loss": 1.6032, "step": 129120 }, { "epoch": 0.254857100196032, "grad_norm": 0.10092160105705261, "learning_rate": 7.481662882345249e-05, "loss": 1.6197, "step": 129152 }, { "epoch": 0.2549202461574285, "grad_norm": 0.10802380740642548, "learning_rate": 7.481028859908781e-05, "loss": 1.6222, "step": 129184 }, { "epoch": 0.2549833921188249, "grad_norm": 0.11016335338354111, "learning_rate": 7.480394837472311e-05, "loss": 1.6021, "step": 129216 }, { "epoch": 0.2550465380802214, "grad_norm": 0.10332827270030975, "learning_rate": 7.479760815035842e-05, "loss": 1.6129, "step": 129248 }, { "epoch": 0.25510968404161777, "grad_norm": 0.10060596466064453, "learning_rate": 7.479126792599374e-05, "loss": 1.606, "step": 129280 }, { "epoch": 0.2551728300030142, "grad_norm": 0.10596133768558502, "learning_rate": 7.478492770162905e-05, "loss": 1.6114, "step": 129312 }, { "epoch": 0.25523597596441067, "grad_norm": 0.1052638590335846, "learning_rate": 7.477858747726435e-05, "loss": 1.6143, "step": 129344 }, { "epoch": 0.2552991219258071, "grad_norm": 0.09929129481315613, "learning_rate": 7.477224725289967e-05, "loss": 1.6181, "step": 129376 }, { "epoch": 0.25536226788720356, "grad_norm": 0.10516052693128586, "learning_rate": 7.476590702853498e-05, "loss": 1.6074, "step": 129408 }, { "epoch": 0.2554254138486, "grad_norm": 0.10188374668359756, "learning_rate": 7.475956680417028e-05, "loss": 1.6095, "step": 129440 }, { "epoch": 0.2554885598099964, "grad_norm": 0.10364662110805511, "learning_rate": 7.47532265798056e-05, "loss": 1.6175, "step": 129472 }, { "epoch": 0.25555170577139286, "grad_norm": 0.11409908533096313, "learning_rate": 7.47468863554409e-05, "loss": 1.6104, "step": 129504 }, { "epoch": 0.2556148517327893, "grad_norm": 0.11226414144039154, "learning_rate": 7.474054613107621e-05, "loss": 1.6139, "step": 129536 }, { "epoch": 0.25567799769418575, "grad_norm": 0.10771206766366959, "learning_rate": 7.473420590671153e-05, "loss": 1.6138, "step": 129568 }, { "epoch": 0.2557411436555822, "grad_norm": 0.10493512451648712, "learning_rate": 7.472786568234684e-05, "loss": 1.6201, "step": 129600 }, { "epoch": 0.25580428961697865, "grad_norm": 0.12275021523237228, "learning_rate": 7.472152545798214e-05, "loss": 1.6149, "step": 129632 }, { "epoch": 0.25586743557837505, "grad_norm": 0.1040772870182991, "learning_rate": 7.471518523361746e-05, "loss": 1.6217, "step": 129664 }, { "epoch": 0.2559305815397715, "grad_norm": 0.11437197029590607, "learning_rate": 7.470884500925277e-05, "loss": 1.6207, "step": 129696 }, { "epoch": 0.25599372750116794, "grad_norm": 0.12347360700368881, "learning_rate": 7.470250478488809e-05, "loss": 1.6151, "step": 129728 }, { "epoch": 0.2560568734625644, "grad_norm": 0.10722575336694717, "learning_rate": 7.469616456052339e-05, "loss": 1.6343, "step": 129760 }, { "epoch": 0.25612001942396084, "grad_norm": 0.10563173145055771, "learning_rate": 7.46898243361587e-05, "loss": 1.6104, "step": 129792 }, { "epoch": 0.2561831653853573, "grad_norm": 0.09876292943954468, "learning_rate": 7.468348411179402e-05, "loss": 1.6121, "step": 129824 }, { "epoch": 0.25624631134675374, "grad_norm": 0.11074431985616684, "learning_rate": 7.467714388742932e-05, "loss": 1.6155, "step": 129856 }, { "epoch": 0.25630945730815013, "grad_norm": 0.10697343945503235, "learning_rate": 7.467080366306462e-05, "loss": 1.6122, "step": 129888 }, { "epoch": 0.2563726032695466, "grad_norm": 0.10713440924882889, "learning_rate": 7.466446343869993e-05, "loss": 1.6095, "step": 129920 }, { "epoch": 0.25643574923094303, "grad_norm": 0.10320103168487549, "learning_rate": 7.465812321433525e-05, "loss": 1.591, "step": 129952 }, { "epoch": 0.2564988951923395, "grad_norm": 0.10894148051738739, "learning_rate": 7.465178298997056e-05, "loss": 1.6067, "step": 129984 }, { "epoch": 0.25656204115373593, "grad_norm": 0.10409074276685715, "learning_rate": 7.464544276560586e-05, "loss": 1.5955, "step": 130016 }, { "epoch": 0.2566251871151324, "grad_norm": 0.11294655501842499, "learning_rate": 7.463910254124118e-05, "loss": 1.6076, "step": 130048 }, { "epoch": 0.2566883330765288, "grad_norm": 0.10786151140928268, "learning_rate": 7.46327623168765e-05, "loss": 1.6136, "step": 130080 }, { "epoch": 0.2567514790379252, "grad_norm": 0.10605989396572113, "learning_rate": 7.462642209251181e-05, "loss": 1.6146, "step": 130112 }, { "epoch": 0.2568146249993217, "grad_norm": 0.11035019159317017, "learning_rate": 7.462008186814712e-05, "loss": 1.611, "step": 130144 }, { "epoch": 0.2568777709607181, "grad_norm": 0.11705031245946884, "learning_rate": 7.461374164378242e-05, "loss": 1.6104, "step": 130176 }, { "epoch": 0.25694091692211457, "grad_norm": 0.10468898713588715, "learning_rate": 7.460740141941774e-05, "loss": 1.6111, "step": 130208 }, { "epoch": 0.257004062883511, "grad_norm": 0.10858344286680222, "learning_rate": 7.460106119505304e-05, "loss": 1.6115, "step": 130240 }, { "epoch": 0.2570672088449074, "grad_norm": 0.10601144284009933, "learning_rate": 7.459472097068835e-05, "loss": 1.6158, "step": 130272 }, { "epoch": 0.25713035480630386, "grad_norm": 0.10758187621831894, "learning_rate": 7.458838074632365e-05, "loss": 1.6208, "step": 130304 }, { "epoch": 0.2571935007677003, "grad_norm": 0.10547227412462234, "learning_rate": 7.458204052195897e-05, "loss": 1.6207, "step": 130336 }, { "epoch": 0.25725664672909676, "grad_norm": 0.10425049066543579, "learning_rate": 7.457570029759428e-05, "loss": 1.6184, "step": 130368 }, { "epoch": 0.2573197926904932, "grad_norm": 0.10619667172431946, "learning_rate": 7.45693600732296e-05, "loss": 1.6157, "step": 130400 }, { "epoch": 0.25738293865188966, "grad_norm": 0.11281777918338776, "learning_rate": 7.45630198488649e-05, "loss": 1.6193, "step": 130432 }, { "epoch": 0.25744608461328605, "grad_norm": 0.10736407339572906, "learning_rate": 7.455667962450021e-05, "loss": 1.6158, "step": 130464 }, { "epoch": 0.2575092305746825, "grad_norm": 0.10818787664175034, "learning_rate": 7.455033940013553e-05, "loss": 1.6045, "step": 130496 }, { "epoch": 0.25757237653607895, "grad_norm": 0.11346203088760376, "learning_rate": 7.454399917577084e-05, "loss": 1.6309, "step": 130528 }, { "epoch": 0.2576355224974754, "grad_norm": 0.105833500623703, "learning_rate": 7.453765895140614e-05, "loss": 1.6085, "step": 130560 }, { "epoch": 0.25769866845887185, "grad_norm": 0.09954508394002914, "learning_rate": 7.453131872704146e-05, "loss": 1.6079, "step": 130592 }, { "epoch": 0.2577618144202683, "grad_norm": 0.11163540929555893, "learning_rate": 7.452497850267677e-05, "loss": 1.6159, "step": 130624 }, { "epoch": 0.2578249603816647, "grad_norm": 0.12326701730489731, "learning_rate": 7.451863827831207e-05, "loss": 1.6338, "step": 130656 }, { "epoch": 0.25788810634306114, "grad_norm": 0.10490484535694122, "learning_rate": 7.451229805394739e-05, "loss": 1.6089, "step": 130688 }, { "epoch": 0.2579512523044576, "grad_norm": 0.11521980166435242, "learning_rate": 7.450595782958269e-05, "loss": 1.6072, "step": 130720 }, { "epoch": 0.25801439826585404, "grad_norm": 0.11303827166557312, "learning_rate": 7.4499617605218e-05, "loss": 1.6084, "step": 130752 }, { "epoch": 0.2580775442272505, "grad_norm": 0.1141236275434494, "learning_rate": 7.449327738085332e-05, "loss": 1.6111, "step": 130784 }, { "epoch": 0.25814069018864694, "grad_norm": 0.11162339895963669, "learning_rate": 7.448693715648863e-05, "loss": 1.6252, "step": 130816 }, { "epoch": 0.25820383615004333, "grad_norm": 0.10696367919445038, "learning_rate": 7.448059693212393e-05, "loss": 1.6133, "step": 130848 }, { "epoch": 0.2582669821114398, "grad_norm": 0.1008777990937233, "learning_rate": 7.447425670775925e-05, "loss": 1.6059, "step": 130880 }, { "epoch": 0.25833012807283623, "grad_norm": 0.11081548780202866, "learning_rate": 7.446791648339456e-05, "loss": 1.6105, "step": 130912 }, { "epoch": 0.2583932740342327, "grad_norm": 0.11868808418512344, "learning_rate": 7.446157625902988e-05, "loss": 1.5982, "step": 130944 }, { "epoch": 0.2584564199956291, "grad_norm": 0.10667791962623596, "learning_rate": 7.445523603466518e-05, "loss": 1.6054, "step": 130976 }, { "epoch": 0.2585195659570256, "grad_norm": 0.1071515679359436, "learning_rate": 7.44488958103005e-05, "loss": 1.627, "step": 131008 }, { "epoch": 0.25858271191842197, "grad_norm": 0.10900339484214783, "learning_rate": 7.444255558593581e-05, "loss": 1.6205, "step": 131040 }, { "epoch": 0.2586458578798184, "grad_norm": 0.10290917754173279, "learning_rate": 7.443621536157111e-05, "loss": 1.6222, "step": 131072 }, { "epoch": 0.25870900384121487, "grad_norm": 0.1087384894490242, "learning_rate": 7.442987513720642e-05, "loss": 1.61, "step": 131104 }, { "epoch": 0.2587721498026113, "grad_norm": 0.11124587059020996, "learning_rate": 7.442353491284172e-05, "loss": 1.6075, "step": 131136 }, { "epoch": 0.25883529576400777, "grad_norm": 0.11455298215150833, "learning_rate": 7.441719468847704e-05, "loss": 1.6155, "step": 131168 }, { "epoch": 0.2588984417254042, "grad_norm": 0.10650830715894699, "learning_rate": 7.441085446411235e-05, "loss": 1.5983, "step": 131200 }, { "epoch": 0.2589615876868006, "grad_norm": 0.11324498057365417, "learning_rate": 7.440451423974766e-05, "loss": 1.6035, "step": 131232 }, { "epoch": 0.25902473364819706, "grad_norm": 0.10617034137248993, "learning_rate": 7.439817401538297e-05, "loss": 1.6124, "step": 131264 }, { "epoch": 0.2590878796095935, "grad_norm": 0.11221248656511307, "learning_rate": 7.439183379101828e-05, "loss": 1.6133, "step": 131296 }, { "epoch": 0.25915102557098996, "grad_norm": 0.11007510870695114, "learning_rate": 7.43854935666536e-05, "loss": 1.6133, "step": 131328 }, { "epoch": 0.2592141715323864, "grad_norm": 0.10629601776599884, "learning_rate": 7.43791533422889e-05, "loss": 1.6237, "step": 131360 }, { "epoch": 0.25927731749378286, "grad_norm": 0.11356817185878754, "learning_rate": 7.437281311792421e-05, "loss": 1.593, "step": 131392 }, { "epoch": 0.25934046345517925, "grad_norm": 0.10988583415746689, "learning_rate": 7.436647289355953e-05, "loss": 1.6087, "step": 131424 }, { "epoch": 0.2594036094165757, "grad_norm": 0.10281365364789963, "learning_rate": 7.436013266919484e-05, "loss": 1.6085, "step": 131456 }, { "epoch": 0.25946675537797215, "grad_norm": 0.1106758564710617, "learning_rate": 7.435379244483014e-05, "loss": 1.6023, "step": 131488 }, { "epoch": 0.2595299013393686, "grad_norm": 0.1066455990076065, "learning_rate": 7.434745222046545e-05, "loss": 1.6162, "step": 131520 }, { "epoch": 0.25959304730076505, "grad_norm": 0.11346118897199631, "learning_rate": 7.434111199610076e-05, "loss": 1.6166, "step": 131552 }, { "epoch": 0.2596561932621615, "grad_norm": 0.10770457983016968, "learning_rate": 7.433477177173607e-05, "loss": 1.6093, "step": 131584 }, { "epoch": 0.25971933922355794, "grad_norm": 0.1185644268989563, "learning_rate": 7.432843154737139e-05, "loss": 1.609, "step": 131616 }, { "epoch": 0.25978248518495434, "grad_norm": 0.11142583191394806, "learning_rate": 7.432209132300669e-05, "loss": 1.6096, "step": 131648 }, { "epoch": 0.2598456311463508, "grad_norm": 0.11487526446580887, "learning_rate": 7.4315751098642e-05, "loss": 1.6182, "step": 131680 }, { "epoch": 0.25990877710774724, "grad_norm": 0.11717316508293152, "learning_rate": 7.430941087427732e-05, "loss": 1.6111, "step": 131712 }, { "epoch": 0.2599719230691437, "grad_norm": 0.11668983846902847, "learning_rate": 7.430307064991263e-05, "loss": 1.6116, "step": 131744 }, { "epoch": 0.26003506903054013, "grad_norm": 0.12166266143321991, "learning_rate": 7.429673042554793e-05, "loss": 1.6101, "step": 131776 }, { "epoch": 0.2600982149919366, "grad_norm": 0.10517725348472595, "learning_rate": 7.429039020118325e-05, "loss": 1.6167, "step": 131808 }, { "epoch": 0.260161360953333, "grad_norm": 0.11283927410840988, "learning_rate": 7.428404997681856e-05, "loss": 1.6091, "step": 131840 }, { "epoch": 0.2602245069147294, "grad_norm": 0.10576330870389938, "learning_rate": 7.427770975245388e-05, "loss": 1.6239, "step": 131872 }, { "epoch": 0.2602876528761259, "grad_norm": 0.10896219313144684, "learning_rate": 7.427136952808918e-05, "loss": 1.6061, "step": 131904 }, { "epoch": 0.2603507988375223, "grad_norm": 0.10660555958747864, "learning_rate": 7.426502930372448e-05, "loss": 1.6274, "step": 131936 }, { "epoch": 0.2604139447989188, "grad_norm": 0.11188357323408127, "learning_rate": 7.42586890793598e-05, "loss": 1.6155, "step": 131968 }, { "epoch": 0.2604770907603152, "grad_norm": 0.10984139889478683, "learning_rate": 7.425234885499511e-05, "loss": 1.6091, "step": 132000 }, { "epoch": 0.2605402367217116, "grad_norm": 0.10720211267471313, "learning_rate": 7.424600863063041e-05, "loss": 1.6065, "step": 132032 }, { "epoch": 0.26060338268310806, "grad_norm": 0.11262527108192444, "learning_rate": 7.423966840626573e-05, "loss": 1.6231, "step": 132064 }, { "epoch": 0.2606665286445045, "grad_norm": 0.1069030910730362, "learning_rate": 7.423332818190104e-05, "loss": 1.6108, "step": 132096 }, { "epoch": 0.26072967460590096, "grad_norm": 0.10583042353391647, "learning_rate": 7.422698795753635e-05, "loss": 1.6128, "step": 132128 }, { "epoch": 0.2607928205672974, "grad_norm": 0.10477813333272934, "learning_rate": 7.422064773317167e-05, "loss": 1.6224, "step": 132160 }, { "epoch": 0.26085596652869386, "grad_norm": 0.10200940817594528, "learning_rate": 7.421430750880697e-05, "loss": 1.6075, "step": 132192 }, { "epoch": 0.26091911249009025, "grad_norm": 0.1078220009803772, "learning_rate": 7.420796728444228e-05, "loss": 1.618, "step": 132224 }, { "epoch": 0.2609822584514867, "grad_norm": 0.10805245488882065, "learning_rate": 7.42016270600776e-05, "loss": 1.6199, "step": 132256 }, { "epoch": 0.26104540441288315, "grad_norm": 0.10207384824752808, "learning_rate": 7.419528683571291e-05, "loss": 1.6153, "step": 132288 }, { "epoch": 0.2611085503742796, "grad_norm": 0.10227125883102417, "learning_rate": 7.418894661134821e-05, "loss": 1.6254, "step": 132320 }, { "epoch": 0.26117169633567605, "grad_norm": 0.10752616822719574, "learning_rate": 7.418260638698352e-05, "loss": 1.6081, "step": 132352 }, { "epoch": 0.2612348422970725, "grad_norm": 0.11380825936794281, "learning_rate": 7.417626616261883e-05, "loss": 1.5994, "step": 132384 }, { "epoch": 0.2612979882584689, "grad_norm": 0.11722514778375626, "learning_rate": 7.416992593825414e-05, "loss": 1.6214, "step": 132416 }, { "epoch": 0.26136113421986534, "grad_norm": 0.09964317083358765, "learning_rate": 7.416358571388945e-05, "loss": 1.6121, "step": 132448 }, { "epoch": 0.2614242801812618, "grad_norm": 0.10399201512336731, "learning_rate": 7.415724548952476e-05, "loss": 1.5978, "step": 132480 }, { "epoch": 0.26148742614265824, "grad_norm": 0.1094483882188797, "learning_rate": 7.415090526516007e-05, "loss": 1.6134, "step": 132512 }, { "epoch": 0.2615505721040547, "grad_norm": 0.11518104374408722, "learning_rate": 7.414456504079539e-05, "loss": 1.6098, "step": 132544 }, { "epoch": 0.26161371806545114, "grad_norm": 0.111690953373909, "learning_rate": 7.413822481643069e-05, "loss": 1.6153, "step": 132576 }, { "epoch": 0.26167686402684753, "grad_norm": 0.11402498185634613, "learning_rate": 7.4131884592066e-05, "loss": 1.6175, "step": 132608 }, { "epoch": 0.261740009988244, "grad_norm": 0.10443051159381866, "learning_rate": 7.412554436770132e-05, "loss": 1.6043, "step": 132640 }, { "epoch": 0.26180315594964043, "grad_norm": 0.10866302996873856, "learning_rate": 7.411920414333663e-05, "loss": 1.613, "step": 132672 }, { "epoch": 0.2618663019110369, "grad_norm": 0.10197242349386215, "learning_rate": 7.411286391897194e-05, "loss": 1.6144, "step": 132704 }, { "epoch": 0.26192944787243333, "grad_norm": 0.11655410379171371, "learning_rate": 7.410652369460725e-05, "loss": 1.627, "step": 132736 }, { "epoch": 0.2619925938338298, "grad_norm": 0.10013627260923386, "learning_rate": 7.410018347024255e-05, "loss": 1.6099, "step": 132768 }, { "epoch": 0.2620557397952262, "grad_norm": 0.10514095425605774, "learning_rate": 7.409384324587787e-05, "loss": 1.6214, "step": 132800 }, { "epoch": 0.2621188857566226, "grad_norm": 0.10498439520597458, "learning_rate": 7.408750302151318e-05, "loss": 1.6091, "step": 132832 }, { "epoch": 0.26218203171801907, "grad_norm": 0.10498408228158951, "learning_rate": 7.408116279714848e-05, "loss": 1.608, "step": 132864 }, { "epoch": 0.2622451776794155, "grad_norm": 0.10708972811698914, "learning_rate": 7.40748225727838e-05, "loss": 1.6168, "step": 132896 }, { "epoch": 0.26230832364081197, "grad_norm": 0.10591749101877213, "learning_rate": 7.406848234841911e-05, "loss": 1.6106, "step": 132928 }, { "epoch": 0.2623714696022084, "grad_norm": 0.11292807012796402, "learning_rate": 7.406214212405442e-05, "loss": 1.6064, "step": 132960 }, { "epoch": 0.2624346155636048, "grad_norm": 0.1074332743883133, "learning_rate": 7.405580189968973e-05, "loss": 1.6287, "step": 132992 }, { "epoch": 0.26249776152500126, "grad_norm": 0.10224854201078415, "learning_rate": 7.404946167532504e-05, "loss": 1.6019, "step": 133024 }, { "epoch": 0.2625609074863977, "grad_norm": 0.11157242208719254, "learning_rate": 7.404312145096035e-05, "loss": 1.6122, "step": 133056 }, { "epoch": 0.26262405344779416, "grad_norm": 0.10429177433252335, "learning_rate": 7.403678122659567e-05, "loss": 1.6218, "step": 133088 }, { "epoch": 0.2626871994091906, "grad_norm": 0.10565786808729172, "learning_rate": 7.403044100223097e-05, "loss": 1.6105, "step": 133120 }, { "epoch": 0.26275034537058706, "grad_norm": 0.1053301990032196, "learning_rate": 7.402410077786628e-05, "loss": 1.6069, "step": 133152 }, { "epoch": 0.2628134913319835, "grad_norm": 0.10900627821683884, "learning_rate": 7.401776055350159e-05, "loss": 1.6003, "step": 133184 }, { "epoch": 0.2628766372933799, "grad_norm": 0.10631921142339706, "learning_rate": 7.40114203291369e-05, "loss": 1.6104, "step": 133216 }, { "epoch": 0.26293978325477635, "grad_norm": 0.10570721328258514, "learning_rate": 7.40050801047722e-05, "loss": 1.6092, "step": 133248 }, { "epoch": 0.2630029292161728, "grad_norm": 0.11719290167093277, "learning_rate": 7.399873988040752e-05, "loss": 1.6074, "step": 133280 }, { "epoch": 0.26306607517756925, "grad_norm": 0.11129261553287506, "learning_rate": 7.399239965604283e-05, "loss": 1.6005, "step": 133312 }, { "epoch": 0.2631292211389657, "grad_norm": 0.10646923631429672, "learning_rate": 7.398605943167815e-05, "loss": 1.616, "step": 133344 }, { "epoch": 0.26319236710036215, "grad_norm": 0.10687491297721863, "learning_rate": 7.397971920731345e-05, "loss": 1.6168, "step": 133376 }, { "epoch": 0.26325551306175854, "grad_norm": 0.1165340393781662, "learning_rate": 7.397337898294876e-05, "loss": 1.6099, "step": 133408 }, { "epoch": 0.263318659023155, "grad_norm": 0.10726451873779297, "learning_rate": 7.396703875858408e-05, "loss": 1.6035, "step": 133440 }, { "epoch": 0.26338180498455144, "grad_norm": 0.10097071528434753, "learning_rate": 7.396069853421939e-05, "loss": 1.6208, "step": 133472 }, { "epoch": 0.2634449509459479, "grad_norm": 0.10605879127979279, "learning_rate": 7.39543583098547e-05, "loss": 1.6055, "step": 133504 }, { "epoch": 0.26350809690734434, "grad_norm": 0.10595313459634781, "learning_rate": 7.394801808549e-05, "loss": 1.6149, "step": 133536 }, { "epoch": 0.2635712428687408, "grad_norm": 0.1006612628698349, "learning_rate": 7.394167786112532e-05, "loss": 1.6089, "step": 133568 }, { "epoch": 0.2636343888301372, "grad_norm": 0.10585619509220123, "learning_rate": 7.393533763676062e-05, "loss": 1.6101, "step": 133600 }, { "epoch": 0.2636975347915336, "grad_norm": 0.10954471677541733, "learning_rate": 7.392899741239594e-05, "loss": 1.6046, "step": 133632 }, { "epoch": 0.2637606807529301, "grad_norm": 0.11476067453622818, "learning_rate": 7.392265718803124e-05, "loss": 1.6099, "step": 133664 }, { "epoch": 0.2638238267143265, "grad_norm": 0.11173353344202042, "learning_rate": 7.391631696366655e-05, "loss": 1.6041, "step": 133696 }, { "epoch": 0.263886972675723, "grad_norm": 0.11101987957954407, "learning_rate": 7.390997673930187e-05, "loss": 1.6081, "step": 133728 }, { "epoch": 0.2639501186371194, "grad_norm": 0.10786333680152893, "learning_rate": 7.390363651493718e-05, "loss": 1.6137, "step": 133760 }, { "epoch": 0.2640132645985158, "grad_norm": 0.10600059479475021, "learning_rate": 7.389729629057248e-05, "loss": 1.6069, "step": 133792 }, { "epoch": 0.26407641055991227, "grad_norm": 0.10257264226675034, "learning_rate": 7.38909560662078e-05, "loss": 1.6047, "step": 133824 }, { "epoch": 0.2641395565213087, "grad_norm": 0.10793018341064453, "learning_rate": 7.388461584184311e-05, "loss": 1.6041, "step": 133856 }, { "epoch": 0.26420270248270517, "grad_norm": 0.1037888303399086, "learning_rate": 7.387827561747842e-05, "loss": 1.6145, "step": 133888 }, { "epoch": 0.2642658484441016, "grad_norm": 0.10750699788331985, "learning_rate": 7.387193539311373e-05, "loss": 1.603, "step": 133920 }, { "epoch": 0.26432899440549806, "grad_norm": 0.10909721255302429, "learning_rate": 7.386559516874904e-05, "loss": 1.609, "step": 133952 }, { "epoch": 0.26439214036689446, "grad_norm": 0.10319609194993973, "learning_rate": 7.385925494438436e-05, "loss": 1.6222, "step": 133984 }, { "epoch": 0.2644552863282909, "grad_norm": 0.10609913617372513, "learning_rate": 7.385291472001966e-05, "loss": 1.608, "step": 134016 }, { "epoch": 0.26451843228968736, "grad_norm": 0.1000935509800911, "learning_rate": 7.384657449565496e-05, "loss": 1.6163, "step": 134048 }, { "epoch": 0.2645815782510838, "grad_norm": 0.10459282994270325, "learning_rate": 7.384023427129027e-05, "loss": 1.5992, "step": 134080 }, { "epoch": 0.26464472421248025, "grad_norm": 0.10995563119649887, "learning_rate": 7.383389404692559e-05, "loss": 1.6024, "step": 134112 }, { "epoch": 0.2647078701738767, "grad_norm": 0.1150684803724289, "learning_rate": 7.38275538225609e-05, "loss": 1.6127, "step": 134144 }, { "epoch": 0.2647710161352731, "grad_norm": 0.1176714152097702, "learning_rate": 7.382121359819622e-05, "loss": 1.5986, "step": 134176 }, { "epoch": 0.26483416209666955, "grad_norm": 0.11197208613157272, "learning_rate": 7.381487337383152e-05, "loss": 1.6088, "step": 134208 }, { "epoch": 0.264897308058066, "grad_norm": 0.11053857952356339, "learning_rate": 7.380853314946683e-05, "loss": 1.6074, "step": 134240 }, { "epoch": 0.26496045401946244, "grad_norm": 0.10849561542272568, "learning_rate": 7.380219292510215e-05, "loss": 1.6053, "step": 134272 }, { "epoch": 0.2650235999808589, "grad_norm": 0.10623572766780853, "learning_rate": 7.379585270073746e-05, "loss": 1.6164, "step": 134304 }, { "epoch": 0.26508674594225534, "grad_norm": 0.11052551865577698, "learning_rate": 7.378951247637276e-05, "loss": 1.6037, "step": 134336 }, { "epoch": 0.26514989190365174, "grad_norm": 0.11220145970582962, "learning_rate": 7.378317225200808e-05, "loss": 1.6159, "step": 134368 }, { "epoch": 0.2652130378650482, "grad_norm": 0.10774993151426315, "learning_rate": 7.377683202764338e-05, "loss": 1.6097, "step": 134400 }, { "epoch": 0.26527618382644463, "grad_norm": 0.1051446795463562, "learning_rate": 7.377049180327869e-05, "loss": 1.6172, "step": 134432 }, { "epoch": 0.2653393297878411, "grad_norm": 0.11249789595603943, "learning_rate": 7.376415157891399e-05, "loss": 1.6108, "step": 134464 }, { "epoch": 0.26540247574923753, "grad_norm": 0.10095071792602539, "learning_rate": 7.37578113545493e-05, "loss": 1.593, "step": 134496 }, { "epoch": 0.265465621710634, "grad_norm": 0.10578959435224533, "learning_rate": 7.375147113018462e-05, "loss": 1.6181, "step": 134528 }, { "epoch": 0.2655287676720304, "grad_norm": 0.10583876073360443, "learning_rate": 7.374513090581994e-05, "loss": 1.6035, "step": 134560 }, { "epoch": 0.2655919136334268, "grad_norm": 0.1076938733458519, "learning_rate": 7.373879068145524e-05, "loss": 1.6161, "step": 134592 }, { "epoch": 0.2656550595948233, "grad_norm": 0.1081429272890091, "learning_rate": 7.373245045709055e-05, "loss": 1.6117, "step": 134624 }, { "epoch": 0.2657182055562197, "grad_norm": 0.10474961996078491, "learning_rate": 7.372611023272587e-05, "loss": 1.6064, "step": 134656 }, { "epoch": 0.26578135151761617, "grad_norm": 0.10079970210790634, "learning_rate": 7.371977000836118e-05, "loss": 1.61, "step": 134688 }, { "epoch": 0.2658444974790126, "grad_norm": 0.10435830056667328, "learning_rate": 7.371342978399648e-05, "loss": 1.6155, "step": 134720 }, { "epoch": 0.265907643440409, "grad_norm": 0.10284986346960068, "learning_rate": 7.37070895596318e-05, "loss": 1.6114, "step": 134752 }, { "epoch": 0.26597078940180546, "grad_norm": 0.10612160712480545, "learning_rate": 7.370074933526711e-05, "loss": 1.6122, "step": 134784 }, { "epoch": 0.2660339353632019, "grad_norm": 0.10711552947759628, "learning_rate": 7.369440911090241e-05, "loss": 1.6051, "step": 134816 }, { "epoch": 0.26609708132459836, "grad_norm": 0.11176897585391998, "learning_rate": 7.368806888653773e-05, "loss": 1.6114, "step": 134848 }, { "epoch": 0.2661602272859948, "grad_norm": 0.10485158860683441, "learning_rate": 7.368172866217303e-05, "loss": 1.6178, "step": 134880 }, { "epoch": 0.26622337324739126, "grad_norm": 0.11469212919473648, "learning_rate": 7.367538843780834e-05, "loss": 1.6081, "step": 134912 }, { "epoch": 0.2662865192087877, "grad_norm": 0.10416357219219208, "learning_rate": 7.366904821344366e-05, "loss": 1.6269, "step": 134944 }, { "epoch": 0.2663496651701841, "grad_norm": 0.11275526881217957, "learning_rate": 7.366270798907897e-05, "loss": 1.6018, "step": 134976 }, { "epoch": 0.26641281113158055, "grad_norm": 0.1164131686091423, "learning_rate": 7.365636776471427e-05, "loss": 1.5981, "step": 135008 }, { "epoch": 0.266475957092977, "grad_norm": 0.10406443476676941, "learning_rate": 7.365002754034959e-05, "loss": 1.6087, "step": 135040 }, { "epoch": 0.26653910305437345, "grad_norm": 0.10632601380348206, "learning_rate": 7.36436873159849e-05, "loss": 1.6069, "step": 135072 }, { "epoch": 0.2666022490157699, "grad_norm": 0.10499055683612823, "learning_rate": 7.363734709162022e-05, "loss": 1.5964, "step": 135104 }, { "epoch": 0.26666539497716635, "grad_norm": 0.11918951570987701, "learning_rate": 7.363100686725552e-05, "loss": 1.6104, "step": 135136 }, { "epoch": 0.26672854093856274, "grad_norm": 0.11259063333272934, "learning_rate": 7.362466664289083e-05, "loss": 1.6144, "step": 135168 }, { "epoch": 0.2667916868999592, "grad_norm": 0.10633876919746399, "learning_rate": 7.361832641852615e-05, "loss": 1.6119, "step": 135200 }, { "epoch": 0.26685483286135564, "grad_norm": 0.1007336750626564, "learning_rate": 7.361198619416145e-05, "loss": 1.597, "step": 135232 }, { "epoch": 0.2669179788227521, "grad_norm": 0.09957956522703171, "learning_rate": 7.360564596979676e-05, "loss": 1.6002, "step": 135264 }, { "epoch": 0.26698112478414854, "grad_norm": 0.1052396297454834, "learning_rate": 7.359930574543206e-05, "loss": 1.5996, "step": 135296 }, { "epoch": 0.267044270745545, "grad_norm": 0.10251233726739883, "learning_rate": 7.359296552106738e-05, "loss": 1.602, "step": 135328 }, { "epoch": 0.2671074167069414, "grad_norm": 0.10895755887031555, "learning_rate": 7.358662529670269e-05, "loss": 1.6117, "step": 135360 }, { "epoch": 0.26717056266833783, "grad_norm": 0.09954611957073212, "learning_rate": 7.358028507233799e-05, "loss": 1.6074, "step": 135392 }, { "epoch": 0.2672337086297343, "grad_norm": 0.11349955946207047, "learning_rate": 7.357394484797331e-05, "loss": 1.6193, "step": 135424 }, { "epoch": 0.26729685459113073, "grad_norm": 0.10204102098941803, "learning_rate": 7.356760462360862e-05, "loss": 1.6173, "step": 135456 }, { "epoch": 0.2673600005525272, "grad_norm": 0.1054556742310524, "learning_rate": 7.356126439924394e-05, "loss": 1.6119, "step": 135488 }, { "epoch": 0.2674231465139236, "grad_norm": 0.10490527749061584, "learning_rate": 7.355492417487925e-05, "loss": 1.6047, "step": 135520 }, { "epoch": 0.26748629247532, "grad_norm": 0.10282700508832932, "learning_rate": 7.354858395051455e-05, "loss": 1.6068, "step": 135552 }, { "epoch": 0.26754943843671647, "grad_norm": 0.10347724705934525, "learning_rate": 7.354224372614987e-05, "loss": 1.5943, "step": 135584 }, { "epoch": 0.2676125843981129, "grad_norm": 0.11144138127565384, "learning_rate": 7.353590350178518e-05, "loss": 1.6076, "step": 135616 }, { "epoch": 0.26767573035950937, "grad_norm": 0.11879058182239532, "learning_rate": 7.352956327742048e-05, "loss": 1.6028, "step": 135648 }, { "epoch": 0.2677388763209058, "grad_norm": 0.10743587464094162, "learning_rate": 7.35232230530558e-05, "loss": 1.609, "step": 135680 }, { "epoch": 0.26780202228230227, "grad_norm": 0.1044100821018219, "learning_rate": 7.35168828286911e-05, "loss": 1.609, "step": 135712 }, { "epoch": 0.26786516824369866, "grad_norm": 0.10664186626672745, "learning_rate": 7.351054260432641e-05, "loss": 1.6088, "step": 135744 }, { "epoch": 0.2679283142050951, "grad_norm": 0.10664991289377213, "learning_rate": 7.350420237996173e-05, "loss": 1.6039, "step": 135776 }, { "epoch": 0.26799146016649156, "grad_norm": 0.10977598279714584, "learning_rate": 7.349786215559703e-05, "loss": 1.6142, "step": 135808 }, { "epoch": 0.268054606127888, "grad_norm": 0.10533903539180756, "learning_rate": 7.349152193123234e-05, "loss": 1.6086, "step": 135840 }, { "epoch": 0.26811775208928446, "grad_norm": 0.10877023637294769, "learning_rate": 7.348518170686766e-05, "loss": 1.6084, "step": 135872 }, { "epoch": 0.2681808980506809, "grad_norm": 0.11798279732465744, "learning_rate": 7.347884148250297e-05, "loss": 1.6036, "step": 135904 }, { "epoch": 0.2682440440120773, "grad_norm": 0.1045432984828949, "learning_rate": 7.347250125813827e-05, "loss": 1.6115, "step": 135936 }, { "epoch": 0.26830718997347375, "grad_norm": 0.115162692964077, "learning_rate": 7.346616103377359e-05, "loss": 1.6136, "step": 135968 }, { "epoch": 0.2683703359348702, "grad_norm": 0.10045327991247177, "learning_rate": 7.34598208094089e-05, "loss": 1.6187, "step": 136000 }, { "epoch": 0.26843348189626665, "grad_norm": 0.10821972787380219, "learning_rate": 7.345348058504422e-05, "loss": 1.6134, "step": 136032 }, { "epoch": 0.2684966278576631, "grad_norm": 0.11383935809135437, "learning_rate": 7.344714036067952e-05, "loss": 1.6132, "step": 136064 }, { "epoch": 0.26855977381905954, "grad_norm": 0.09907317161560059, "learning_rate": 7.344080013631482e-05, "loss": 1.6038, "step": 136096 }, { "epoch": 0.26862291978045594, "grad_norm": 0.10620388388633728, "learning_rate": 7.343445991195013e-05, "loss": 1.6052, "step": 136128 }, { "epoch": 0.2686860657418524, "grad_norm": 0.10281985998153687, "learning_rate": 7.342811968758545e-05, "loss": 1.6005, "step": 136160 }, { "epoch": 0.26874921170324884, "grad_norm": 0.1163906455039978, "learning_rate": 7.342177946322076e-05, "loss": 1.5965, "step": 136192 }, { "epoch": 0.2688123576646453, "grad_norm": 0.10981487482786179, "learning_rate": 7.341543923885606e-05, "loss": 1.6019, "step": 136224 }, { "epoch": 0.26887550362604173, "grad_norm": 0.11343888938426971, "learning_rate": 7.340909901449138e-05, "loss": 1.6161, "step": 136256 }, { "epoch": 0.2689386495874382, "grad_norm": 0.11012143641710281, "learning_rate": 7.340275879012669e-05, "loss": 1.6021, "step": 136288 }, { "epoch": 0.2690017955488346, "grad_norm": 0.12245097756385803, "learning_rate": 7.3396418565762e-05, "loss": 1.5999, "step": 136320 }, { "epoch": 0.269064941510231, "grad_norm": 0.10646633803844452, "learning_rate": 7.339007834139731e-05, "loss": 1.6145, "step": 136352 }, { "epoch": 0.2691280874716275, "grad_norm": 0.10820487141609192, "learning_rate": 7.338373811703262e-05, "loss": 1.6093, "step": 136384 }, { "epoch": 0.2691912334330239, "grad_norm": 0.10841498523950577, "learning_rate": 7.337739789266794e-05, "loss": 1.6067, "step": 136416 }, { "epoch": 0.2692543793944204, "grad_norm": 0.12096355110406876, "learning_rate": 7.337105766830325e-05, "loss": 1.6078, "step": 136448 }, { "epoch": 0.2693175253558168, "grad_norm": 0.11072580516338348, "learning_rate": 7.336471744393855e-05, "loss": 1.6083, "step": 136480 }, { "epoch": 0.2693806713172132, "grad_norm": 0.11263925582170486, "learning_rate": 7.335837721957385e-05, "loss": 1.6112, "step": 136512 }, { "epoch": 0.26944381727860967, "grad_norm": 0.11243830621242523, "learning_rate": 7.335203699520917e-05, "loss": 1.6184, "step": 136544 }, { "epoch": 0.2695069632400061, "grad_norm": 0.10702415555715561, "learning_rate": 7.334569677084448e-05, "loss": 1.603, "step": 136576 }, { "epoch": 0.26957010920140256, "grad_norm": 0.09745298326015472, "learning_rate": 7.333935654647978e-05, "loss": 1.606, "step": 136608 }, { "epoch": 0.269633255162799, "grad_norm": 0.11158228665590286, "learning_rate": 7.33330163221151e-05, "loss": 1.6055, "step": 136640 }, { "epoch": 0.26969640112419546, "grad_norm": 0.11331474781036377, "learning_rate": 7.332667609775041e-05, "loss": 1.6143, "step": 136672 }, { "epoch": 0.2697595470855919, "grad_norm": 0.1052720844745636, "learning_rate": 7.332033587338573e-05, "loss": 1.6072, "step": 136704 }, { "epoch": 0.2698226930469883, "grad_norm": 0.10362829267978668, "learning_rate": 7.331399564902103e-05, "loss": 1.6147, "step": 136736 }, { "epoch": 0.26988583900838475, "grad_norm": 0.1109706461429596, "learning_rate": 7.330765542465634e-05, "loss": 1.5882, "step": 136768 }, { "epoch": 0.2699489849697812, "grad_norm": 0.10616286098957062, "learning_rate": 7.330131520029166e-05, "loss": 1.6123, "step": 136800 }, { "epoch": 0.27001213093117765, "grad_norm": 0.11944980174303055, "learning_rate": 7.329497497592697e-05, "loss": 1.5988, "step": 136832 }, { "epoch": 0.2700752768925741, "grad_norm": 0.11348874121904373, "learning_rate": 7.328863475156229e-05, "loss": 1.6108, "step": 136864 }, { "epoch": 0.27013842285397055, "grad_norm": 0.10996314883232117, "learning_rate": 7.328229452719759e-05, "loss": 1.6109, "step": 136896 }, { "epoch": 0.27020156881536694, "grad_norm": 0.11196240037679672, "learning_rate": 7.327595430283289e-05, "loss": 1.6144, "step": 136928 }, { "epoch": 0.2702647147767634, "grad_norm": 0.10875514894723892, "learning_rate": 7.32696140784682e-05, "loss": 1.6022, "step": 136960 }, { "epoch": 0.27032786073815984, "grad_norm": 0.10737422108650208, "learning_rate": 7.326327385410352e-05, "loss": 1.6134, "step": 136992 }, { "epoch": 0.2703910066995563, "grad_norm": 0.1196058839559555, "learning_rate": 7.325693362973882e-05, "loss": 1.6186, "step": 137024 }, { "epoch": 0.27045415266095274, "grad_norm": 0.11066177487373352, "learning_rate": 7.325059340537413e-05, "loss": 1.6142, "step": 137056 }, { "epoch": 0.2705172986223492, "grad_norm": 0.10822766274213791, "learning_rate": 7.324425318100945e-05, "loss": 1.6053, "step": 137088 }, { "epoch": 0.2705804445837456, "grad_norm": 0.10514150559902191, "learning_rate": 7.323791295664476e-05, "loss": 1.6163, "step": 137120 }, { "epoch": 0.27064359054514203, "grad_norm": 0.10512989014387131, "learning_rate": 7.323157273228006e-05, "loss": 1.6124, "step": 137152 }, { "epoch": 0.2707067365065385, "grad_norm": 0.10496227443218231, "learning_rate": 7.322523250791538e-05, "loss": 1.5942, "step": 137184 }, { "epoch": 0.27076988246793493, "grad_norm": 0.11064368486404419, "learning_rate": 7.321889228355069e-05, "loss": 1.6046, "step": 137216 }, { "epoch": 0.2708330284293314, "grad_norm": 0.11531023681163788, "learning_rate": 7.3212552059186e-05, "loss": 1.6056, "step": 137248 }, { "epoch": 0.27089617439072783, "grad_norm": 0.11127570271492004, "learning_rate": 7.320621183482131e-05, "loss": 1.6244, "step": 137280 }, { "epoch": 0.2709593203521242, "grad_norm": 0.10632797330617905, "learning_rate": 7.319987161045662e-05, "loss": 1.5886, "step": 137312 }, { "epoch": 0.27102246631352067, "grad_norm": 0.10544941574335098, "learning_rate": 7.319353138609192e-05, "loss": 1.6034, "step": 137344 }, { "epoch": 0.2710856122749171, "grad_norm": 0.11425535380840302, "learning_rate": 7.318719116172724e-05, "loss": 1.6073, "step": 137376 }, { "epoch": 0.27114875823631357, "grad_norm": 0.10999225080013275, "learning_rate": 7.318085093736254e-05, "loss": 1.5998, "step": 137408 }, { "epoch": 0.27121190419771, "grad_norm": 0.10617484897375107, "learning_rate": 7.317451071299785e-05, "loss": 1.6185, "step": 137440 }, { "epoch": 0.27127505015910647, "grad_norm": 0.10912901908159256, "learning_rate": 7.316817048863317e-05, "loss": 1.6149, "step": 137472 }, { "epoch": 0.27133819612050286, "grad_norm": 0.11024027317762375, "learning_rate": 7.316183026426848e-05, "loss": 1.602, "step": 137504 }, { "epoch": 0.2714013420818993, "grad_norm": 0.11293243616819382, "learning_rate": 7.31554900399038e-05, "loss": 1.6069, "step": 137536 }, { "epoch": 0.27146448804329576, "grad_norm": 0.11466856300830841, "learning_rate": 7.31491498155391e-05, "loss": 1.6005, "step": 137568 }, { "epoch": 0.2715276340046922, "grad_norm": 0.10746665298938751, "learning_rate": 7.314280959117441e-05, "loss": 1.6253, "step": 137600 }, { "epoch": 0.27159077996608866, "grad_norm": 0.1084543839097023, "learning_rate": 7.313646936680973e-05, "loss": 1.6148, "step": 137632 }, { "epoch": 0.2716539259274851, "grad_norm": 0.10356723517179489, "learning_rate": 7.313012914244504e-05, "loss": 1.6069, "step": 137664 }, { "epoch": 0.2717170718888815, "grad_norm": 0.1094108298420906, "learning_rate": 7.312378891808034e-05, "loss": 1.6046, "step": 137696 }, { "epoch": 0.27178021785027795, "grad_norm": 0.10308845341205597, "learning_rate": 7.311744869371566e-05, "loss": 1.5995, "step": 137728 }, { "epoch": 0.2718433638116744, "grad_norm": 0.10088953375816345, "learning_rate": 7.311110846935096e-05, "loss": 1.6166, "step": 137760 }, { "epoch": 0.27190650977307085, "grad_norm": 0.10615668445825577, "learning_rate": 7.310476824498627e-05, "loss": 1.6138, "step": 137792 }, { "epoch": 0.2719696557344673, "grad_norm": 0.11264284700155258, "learning_rate": 7.309842802062157e-05, "loss": 1.6274, "step": 137824 }, { "epoch": 0.27203280169586375, "grad_norm": 0.1094990149140358, "learning_rate": 7.309208779625689e-05, "loss": 1.6043, "step": 137856 }, { "epoch": 0.27209594765726014, "grad_norm": 0.10782024264335632, "learning_rate": 7.30857475718922e-05, "loss": 1.6034, "step": 137888 }, { "epoch": 0.2721590936186566, "grad_norm": 0.10610847920179367, "learning_rate": 7.307940734752752e-05, "loss": 1.5993, "step": 137920 }, { "epoch": 0.27222223958005304, "grad_norm": 0.1120687946677208, "learning_rate": 7.307306712316282e-05, "loss": 1.612, "step": 137952 }, { "epoch": 0.2722853855414495, "grad_norm": 0.10741465538740158, "learning_rate": 7.306672689879813e-05, "loss": 1.6156, "step": 137984 }, { "epoch": 0.27234853150284594, "grad_norm": 0.101494200527668, "learning_rate": 7.306038667443345e-05, "loss": 1.6064, "step": 138016 }, { "epoch": 0.2724116774642424, "grad_norm": 0.10113751888275146, "learning_rate": 7.305404645006876e-05, "loss": 1.6121, "step": 138048 }, { "epoch": 0.2724748234256388, "grad_norm": 0.12258037179708481, "learning_rate": 7.304770622570406e-05, "loss": 1.6035, "step": 138080 }, { "epoch": 0.27253796938703523, "grad_norm": 0.10584273189306259, "learning_rate": 7.304136600133938e-05, "loss": 1.6019, "step": 138112 }, { "epoch": 0.2726011153484317, "grad_norm": 0.10857692360877991, "learning_rate": 7.303502577697469e-05, "loss": 1.5972, "step": 138144 }, { "epoch": 0.2726642613098281, "grad_norm": 0.10216984152793884, "learning_rate": 7.302868555261e-05, "loss": 1.6103, "step": 138176 }, { "epoch": 0.2727274072712246, "grad_norm": 0.1066039502620697, "learning_rate": 7.302234532824531e-05, "loss": 1.5957, "step": 138208 }, { "epoch": 0.272790553232621, "grad_norm": 0.10498004406690598, "learning_rate": 7.301600510388061e-05, "loss": 1.5859, "step": 138240 }, { "epoch": 0.2728536991940175, "grad_norm": 0.10656807571649551, "learning_rate": 7.300966487951592e-05, "loss": 1.6159, "step": 138272 }, { "epoch": 0.27291684515541387, "grad_norm": 0.11368874460458755, "learning_rate": 7.300332465515124e-05, "loss": 1.6122, "step": 138304 }, { "epoch": 0.2729799911168103, "grad_norm": 0.10525816679000854, "learning_rate": 7.299698443078655e-05, "loss": 1.6103, "step": 138336 }, { "epoch": 0.27304313707820677, "grad_norm": 0.11544210463762283, "learning_rate": 7.299064420642185e-05, "loss": 1.6156, "step": 138368 }, { "epoch": 0.2731062830396032, "grad_norm": 0.10521316528320312, "learning_rate": 7.298430398205717e-05, "loss": 1.5968, "step": 138400 }, { "epoch": 0.27316942900099966, "grad_norm": 0.10763830691576004, "learning_rate": 7.297796375769248e-05, "loss": 1.6112, "step": 138432 }, { "epoch": 0.2732325749623961, "grad_norm": 0.1082969531416893, "learning_rate": 7.29716235333278e-05, "loss": 1.6161, "step": 138464 }, { "epoch": 0.2732957209237925, "grad_norm": 0.11077774316072464, "learning_rate": 7.29652833089631e-05, "loss": 1.632, "step": 138496 }, { "epoch": 0.27335886688518896, "grad_norm": 0.11002691835165024, "learning_rate": 7.295894308459841e-05, "loss": 1.6073, "step": 138528 }, { "epoch": 0.2734220128465854, "grad_norm": 0.1076987013220787, "learning_rate": 7.295260286023371e-05, "loss": 1.6163, "step": 138560 }, { "epoch": 0.27348515880798185, "grad_norm": 0.11410069465637207, "learning_rate": 7.294626263586903e-05, "loss": 1.6108, "step": 138592 }, { "epoch": 0.2735483047693783, "grad_norm": 0.1137956976890564, "learning_rate": 7.293992241150433e-05, "loss": 1.6132, "step": 138624 }, { "epoch": 0.27361145073077475, "grad_norm": 0.10526256263256073, "learning_rate": 7.293358218713964e-05, "loss": 1.6099, "step": 138656 }, { "epoch": 0.27367459669217115, "grad_norm": 0.10719609260559082, "learning_rate": 7.292724196277496e-05, "loss": 1.6187, "step": 138688 }, { "epoch": 0.2737377426535676, "grad_norm": 0.10259390622377396, "learning_rate": 7.292090173841027e-05, "loss": 1.6231, "step": 138720 }, { "epoch": 0.27380088861496404, "grad_norm": 0.10427749902009964, "learning_rate": 7.291456151404559e-05, "loss": 1.5988, "step": 138752 }, { "epoch": 0.2738640345763605, "grad_norm": 0.10385967791080475, "learning_rate": 7.290822128968089e-05, "loss": 1.6039, "step": 138784 }, { "epoch": 0.27392718053775694, "grad_norm": 0.10709089785814285, "learning_rate": 7.29018810653162e-05, "loss": 1.6031, "step": 138816 }, { "epoch": 0.2739903264991534, "grad_norm": 0.10972519218921661, "learning_rate": 7.289554084095152e-05, "loss": 1.6081, "step": 138848 }, { "epoch": 0.2740534724605498, "grad_norm": 0.1053602397441864, "learning_rate": 7.288920061658683e-05, "loss": 1.6014, "step": 138880 }, { "epoch": 0.27411661842194623, "grad_norm": 0.1116967722773552, "learning_rate": 7.288286039222213e-05, "loss": 1.602, "step": 138912 }, { "epoch": 0.2741797643833427, "grad_norm": 0.11738558113574982, "learning_rate": 7.287652016785745e-05, "loss": 1.5972, "step": 138944 }, { "epoch": 0.27424291034473913, "grad_norm": 0.10538122057914734, "learning_rate": 7.287017994349275e-05, "loss": 1.6196, "step": 138976 }, { "epoch": 0.2743060563061356, "grad_norm": 0.10250070691108704, "learning_rate": 7.286383971912806e-05, "loss": 1.6146, "step": 139008 }, { "epoch": 0.27436920226753203, "grad_norm": 0.10746382921934128, "learning_rate": 7.285749949476336e-05, "loss": 1.61, "step": 139040 }, { "epoch": 0.2744323482289284, "grad_norm": 0.1076064258813858, "learning_rate": 7.285115927039868e-05, "loss": 1.6043, "step": 139072 }, { "epoch": 0.2744954941903249, "grad_norm": 0.10753529518842697, "learning_rate": 7.2844819046034e-05, "loss": 1.6063, "step": 139104 }, { "epoch": 0.2745586401517213, "grad_norm": 0.11315683275461197, "learning_rate": 7.283847882166931e-05, "loss": 1.6136, "step": 139136 }, { "epoch": 0.27462178611311777, "grad_norm": 0.11320410668849945, "learning_rate": 7.283213859730461e-05, "loss": 1.6001, "step": 139168 }, { "epoch": 0.2746849320745142, "grad_norm": 0.10548532009124756, "learning_rate": 7.282579837293992e-05, "loss": 1.608, "step": 139200 }, { "epoch": 0.27474807803591067, "grad_norm": 0.09956170618534088, "learning_rate": 7.281945814857524e-05, "loss": 1.6043, "step": 139232 }, { "epoch": 0.27481122399730706, "grad_norm": 0.10983692854642868, "learning_rate": 7.281311792421055e-05, "loss": 1.6079, "step": 139264 }, { "epoch": 0.2748743699587035, "grad_norm": 0.10724955797195435, "learning_rate": 7.280677769984585e-05, "loss": 1.6105, "step": 139296 }, { "epoch": 0.27493751592009996, "grad_norm": 0.10298347473144531, "learning_rate": 7.280043747548117e-05, "loss": 1.592, "step": 139328 }, { "epoch": 0.2750006618814964, "grad_norm": 0.11434242874383926, "learning_rate": 7.279409725111648e-05, "loss": 1.5987, "step": 139360 }, { "epoch": 0.27506380784289286, "grad_norm": 0.10061415284872055, "learning_rate": 7.278775702675178e-05, "loss": 1.6016, "step": 139392 }, { "epoch": 0.2751269538042893, "grad_norm": 0.11188656836748123, "learning_rate": 7.27814168023871e-05, "loss": 1.6159, "step": 139424 }, { "epoch": 0.2751900997656857, "grad_norm": 0.11809300631284714, "learning_rate": 7.27750765780224e-05, "loss": 1.6029, "step": 139456 }, { "epoch": 0.27525324572708215, "grad_norm": 0.11942886561155319, "learning_rate": 7.276873635365771e-05, "loss": 1.6032, "step": 139488 }, { "epoch": 0.2753163916884786, "grad_norm": 0.11183831840753555, "learning_rate": 7.276239612929303e-05, "loss": 1.6125, "step": 139520 }, { "epoch": 0.27537953764987505, "grad_norm": 0.11295828968286514, "learning_rate": 7.275605590492834e-05, "loss": 1.6269, "step": 139552 }, { "epoch": 0.2754426836112715, "grad_norm": 0.10338342934846878, "learning_rate": 7.274971568056364e-05, "loss": 1.6063, "step": 139584 }, { "epoch": 0.27550582957266795, "grad_norm": 0.10053700953722, "learning_rate": 7.274337545619896e-05, "loss": 1.6102, "step": 139616 }, { "epoch": 0.27556897553406434, "grad_norm": 0.10031751543283463, "learning_rate": 7.273703523183427e-05, "loss": 1.6153, "step": 139648 }, { "epoch": 0.2756321214954608, "grad_norm": 0.10287375003099442, "learning_rate": 7.273069500746959e-05, "loss": 1.6033, "step": 139680 }, { "epoch": 0.27569526745685724, "grad_norm": 0.11539004743099213, "learning_rate": 7.272435478310489e-05, "loss": 1.5963, "step": 139712 }, { "epoch": 0.2757584134182537, "grad_norm": 0.10440555214881897, "learning_rate": 7.27180145587402e-05, "loss": 1.6039, "step": 139744 }, { "epoch": 0.27582155937965014, "grad_norm": 0.11029437929391861, "learning_rate": 7.271167433437552e-05, "loss": 1.6046, "step": 139776 }, { "epoch": 0.2758847053410466, "grad_norm": 0.10828778147697449, "learning_rate": 7.270533411001082e-05, "loss": 1.5994, "step": 139808 }, { "epoch": 0.275947851302443, "grad_norm": 0.11068389564752579, "learning_rate": 7.269899388564613e-05, "loss": 1.6034, "step": 139840 }, { "epoch": 0.27601099726383943, "grad_norm": 0.10843534767627716, "learning_rate": 7.269265366128143e-05, "loss": 1.6079, "step": 139872 }, { "epoch": 0.2760741432252359, "grad_norm": 0.10967250168323517, "learning_rate": 7.268631343691675e-05, "loss": 1.6179, "step": 139904 }, { "epoch": 0.27613728918663233, "grad_norm": 0.10577895492315292, "learning_rate": 7.267997321255206e-05, "loss": 1.6053, "step": 139936 }, { "epoch": 0.2762004351480288, "grad_norm": 0.10573165118694305, "learning_rate": 7.267363298818737e-05, "loss": 1.5949, "step": 139968 }, { "epoch": 0.2762635811094252, "grad_norm": 0.10600164532661438, "learning_rate": 7.266729276382268e-05, "loss": 1.6017, "step": 140000 }, { "epoch": 0.2763267270708217, "grad_norm": 0.11545202881097794, "learning_rate": 7.2660952539458e-05, "loss": 1.6059, "step": 140032 }, { "epoch": 0.27638987303221807, "grad_norm": 0.10129143297672272, "learning_rate": 7.265461231509331e-05, "loss": 1.6038, "step": 140064 }, { "epoch": 0.2764530189936145, "grad_norm": 0.11578719317913055, "learning_rate": 7.264827209072862e-05, "loss": 1.6169, "step": 140096 }, { "epoch": 0.27651616495501097, "grad_norm": 0.10591530054807663, "learning_rate": 7.264193186636392e-05, "loss": 1.6087, "step": 140128 }, { "epoch": 0.2765793109164074, "grad_norm": 0.10603789985179901, "learning_rate": 7.263559164199924e-05, "loss": 1.6054, "step": 140160 }, { "epoch": 0.27664245687780387, "grad_norm": 0.10827932506799698, "learning_rate": 7.262925141763455e-05, "loss": 1.6216, "step": 140192 }, { "epoch": 0.2767056028392003, "grad_norm": 0.11001746356487274, "learning_rate": 7.262291119326985e-05, "loss": 1.6215, "step": 140224 }, { "epoch": 0.2767687488005967, "grad_norm": 0.10563153028488159, "learning_rate": 7.261657096890516e-05, "loss": 1.6043, "step": 140256 }, { "epoch": 0.27683189476199316, "grad_norm": 0.10176856070756912, "learning_rate": 7.261023074454047e-05, "loss": 1.6057, "step": 140288 }, { "epoch": 0.2768950407233896, "grad_norm": 0.10194772481918335, "learning_rate": 7.260389052017578e-05, "loss": 1.6102, "step": 140320 }, { "epoch": 0.27695818668478606, "grad_norm": 0.10760031640529633, "learning_rate": 7.25975502958111e-05, "loss": 1.5999, "step": 140352 }, { "epoch": 0.2770213326461825, "grad_norm": 0.11413019150495529, "learning_rate": 7.25912100714464e-05, "loss": 1.6143, "step": 140384 }, { "epoch": 0.27708447860757895, "grad_norm": 0.11079232394695282, "learning_rate": 7.258486984708171e-05, "loss": 1.5972, "step": 140416 }, { "epoch": 0.27714762456897535, "grad_norm": 0.11735320836305618, "learning_rate": 7.257852962271703e-05, "loss": 1.6214, "step": 140448 }, { "epoch": 0.2772107705303718, "grad_norm": 0.11206310242414474, "learning_rate": 7.257218939835234e-05, "loss": 1.5947, "step": 140480 }, { "epoch": 0.27727391649176825, "grad_norm": 0.10074267536401749, "learning_rate": 7.256584917398764e-05, "loss": 1.6058, "step": 140512 }, { "epoch": 0.2773370624531647, "grad_norm": 0.10533620417118073, "learning_rate": 7.255950894962296e-05, "loss": 1.614, "step": 140544 }, { "epoch": 0.27740020841456114, "grad_norm": 0.10960860550403595, "learning_rate": 7.255316872525827e-05, "loss": 1.6131, "step": 140576 }, { "epoch": 0.2774633543759576, "grad_norm": 0.10648467391729355, "learning_rate": 7.254682850089359e-05, "loss": 1.614, "step": 140608 }, { "epoch": 0.277526500337354, "grad_norm": 0.1147274300456047, "learning_rate": 7.254048827652889e-05, "loss": 1.6094, "step": 140640 }, { "epoch": 0.27758964629875044, "grad_norm": 0.11199229210615158, "learning_rate": 7.253414805216419e-05, "loss": 1.6028, "step": 140672 }, { "epoch": 0.2776527922601469, "grad_norm": 0.09860625863075256, "learning_rate": 7.25278078277995e-05, "loss": 1.6023, "step": 140704 }, { "epoch": 0.27771593822154333, "grad_norm": 0.10627433657646179, "learning_rate": 7.252146760343482e-05, "loss": 1.5919, "step": 140736 }, { "epoch": 0.2777790841829398, "grad_norm": 0.11265649646520615, "learning_rate": 7.251512737907013e-05, "loss": 1.6077, "step": 140768 }, { "epoch": 0.27784223014433623, "grad_norm": 0.11177830398082733, "learning_rate": 7.250878715470544e-05, "loss": 1.5975, "step": 140800 }, { "epoch": 0.2779053761057326, "grad_norm": 0.10543101280927658, "learning_rate": 7.250244693034075e-05, "loss": 1.6101, "step": 140832 }, { "epoch": 0.2779685220671291, "grad_norm": 0.1085662916302681, "learning_rate": 7.249610670597606e-05, "loss": 1.602, "step": 140864 }, { "epoch": 0.2780316680285255, "grad_norm": 0.10149335116147995, "learning_rate": 7.248976648161138e-05, "loss": 1.5982, "step": 140896 }, { "epoch": 0.278094813989922, "grad_norm": 0.10375605523586273, "learning_rate": 7.248342625724668e-05, "loss": 1.6002, "step": 140928 }, { "epoch": 0.2781579599513184, "grad_norm": 0.10206574946641922, "learning_rate": 7.2477086032882e-05, "loss": 1.587, "step": 140960 }, { "epoch": 0.27822110591271487, "grad_norm": 0.10288719832897186, "learning_rate": 7.247074580851731e-05, "loss": 1.5879, "step": 140992 }, { "epoch": 0.27828425187411127, "grad_norm": 0.10083068162202835, "learning_rate": 7.246440558415262e-05, "loss": 1.616, "step": 141024 }, { "epoch": 0.2783473978355077, "grad_norm": 0.10225941240787506, "learning_rate": 7.245806535978792e-05, "loss": 1.6111, "step": 141056 }, { "epoch": 0.27841054379690416, "grad_norm": 0.1042669340968132, "learning_rate": 7.245172513542323e-05, "loss": 1.6184, "step": 141088 }, { "epoch": 0.2784736897583006, "grad_norm": 0.10268131643533707, "learning_rate": 7.244538491105854e-05, "loss": 1.6174, "step": 141120 }, { "epoch": 0.27853683571969706, "grad_norm": 0.10237900912761688, "learning_rate": 7.243904468669385e-05, "loss": 1.6018, "step": 141152 }, { "epoch": 0.2785999816810935, "grad_norm": 0.1135939434170723, "learning_rate": 7.243270446232916e-05, "loss": 1.6032, "step": 141184 }, { "epoch": 0.2786631276424899, "grad_norm": 0.10880962759256363, "learning_rate": 7.242636423796447e-05, "loss": 1.594, "step": 141216 }, { "epoch": 0.27872627360388635, "grad_norm": 0.10294250398874283, "learning_rate": 7.242002401359978e-05, "loss": 1.6094, "step": 141248 }, { "epoch": 0.2787894195652828, "grad_norm": 0.10812443494796753, "learning_rate": 7.24136837892351e-05, "loss": 1.5994, "step": 141280 }, { "epoch": 0.27885256552667925, "grad_norm": 0.1082243099808693, "learning_rate": 7.24073435648704e-05, "loss": 1.6011, "step": 141312 }, { "epoch": 0.2789157114880757, "grad_norm": 0.10394982248544693, "learning_rate": 7.240100334050572e-05, "loss": 1.607, "step": 141344 }, { "epoch": 0.27897885744947215, "grad_norm": 0.10719755291938782, "learning_rate": 7.239466311614103e-05, "loss": 1.6069, "step": 141376 }, { "epoch": 0.27904200341086854, "grad_norm": 0.11045152693986893, "learning_rate": 7.238832289177634e-05, "loss": 1.6176, "step": 141408 }, { "epoch": 0.279105149372265, "grad_norm": 0.11409204453229904, "learning_rate": 7.238198266741165e-05, "loss": 1.6184, "step": 141440 }, { "epoch": 0.27916829533366144, "grad_norm": 0.11152160912752151, "learning_rate": 7.237564244304696e-05, "loss": 1.6048, "step": 141472 }, { "epoch": 0.2792314412950579, "grad_norm": 0.11334708333015442, "learning_rate": 7.236930221868226e-05, "loss": 1.6048, "step": 141504 }, { "epoch": 0.27929458725645434, "grad_norm": 0.11197617650032043, "learning_rate": 7.236296199431758e-05, "loss": 1.6035, "step": 141536 }, { "epoch": 0.2793577332178508, "grad_norm": 0.11193860322237015, "learning_rate": 7.235662176995289e-05, "loss": 1.6022, "step": 141568 }, { "epoch": 0.27942087917924724, "grad_norm": 0.12336414307355881, "learning_rate": 7.235028154558819e-05, "loss": 1.6061, "step": 141600 }, { "epoch": 0.27948402514064363, "grad_norm": 0.10348925739526749, "learning_rate": 7.23439413212235e-05, "loss": 1.6045, "step": 141632 }, { "epoch": 0.2795471711020401, "grad_norm": 0.10907229036092758, "learning_rate": 7.233760109685882e-05, "loss": 1.6184, "step": 141664 }, { "epoch": 0.27961031706343653, "grad_norm": 0.11674027144908905, "learning_rate": 7.233126087249413e-05, "loss": 1.6027, "step": 141696 }, { "epoch": 0.279673463024833, "grad_norm": 0.10845894366502762, "learning_rate": 7.232492064812944e-05, "loss": 1.6114, "step": 141728 }, { "epoch": 0.27973660898622943, "grad_norm": 0.1105903759598732, "learning_rate": 7.231858042376475e-05, "loss": 1.6059, "step": 141760 }, { "epoch": 0.2797997549476259, "grad_norm": 0.11820506304502487, "learning_rate": 7.231224019940006e-05, "loss": 1.6071, "step": 141792 }, { "epoch": 0.27986290090902227, "grad_norm": 0.11597861349582672, "learning_rate": 7.230589997503538e-05, "loss": 1.6018, "step": 141824 }, { "epoch": 0.2799260468704187, "grad_norm": 0.10703887045383453, "learning_rate": 7.229955975067068e-05, "loss": 1.6101, "step": 141856 }, { "epoch": 0.27998919283181517, "grad_norm": 0.10526274144649506, "learning_rate": 7.2293219526306e-05, "loss": 1.5902, "step": 141888 }, { "epoch": 0.2800523387932116, "grad_norm": 0.10980308800935745, "learning_rate": 7.22868793019413e-05, "loss": 1.6085, "step": 141920 }, { "epoch": 0.28011548475460807, "grad_norm": 0.11822338402271271, "learning_rate": 7.228053907757661e-05, "loss": 1.6007, "step": 141952 }, { "epoch": 0.2801786307160045, "grad_norm": 0.10629415512084961, "learning_rate": 7.227419885321191e-05, "loss": 1.6061, "step": 141984 }, { "epoch": 0.2802417766774009, "grad_norm": 0.10353866219520569, "learning_rate": 7.226785862884723e-05, "loss": 1.5906, "step": 142016 }, { "epoch": 0.28030492263879736, "grad_norm": 0.1042291447520256, "learning_rate": 7.226151840448254e-05, "loss": 1.6029, "step": 142048 }, { "epoch": 0.2803680686001938, "grad_norm": 0.11149253696203232, "learning_rate": 7.225517818011786e-05, "loss": 1.6022, "step": 142080 }, { "epoch": 0.28043121456159026, "grad_norm": 0.10171899199485779, "learning_rate": 7.224883795575317e-05, "loss": 1.6227, "step": 142112 }, { "epoch": 0.2804943605229867, "grad_norm": 0.1131385862827301, "learning_rate": 7.224249773138847e-05, "loss": 1.5984, "step": 142144 }, { "epoch": 0.28055750648438316, "grad_norm": 0.11076127737760544, "learning_rate": 7.223615750702379e-05, "loss": 1.603, "step": 142176 }, { "epoch": 0.28062065244577955, "grad_norm": 0.10894398391246796, "learning_rate": 7.22298172826591e-05, "loss": 1.6074, "step": 142208 }, { "epoch": 0.280683798407176, "grad_norm": 0.107597716152668, "learning_rate": 7.222347705829441e-05, "loss": 1.6097, "step": 142240 }, { "epoch": 0.28074694436857245, "grad_norm": 0.10396246612071991, "learning_rate": 7.221713683392972e-05, "loss": 1.5811, "step": 142272 }, { "epoch": 0.2808100903299689, "grad_norm": 0.11265064775943756, "learning_rate": 7.221079660956503e-05, "loss": 1.6068, "step": 142304 }, { "epoch": 0.28087323629136535, "grad_norm": 0.11541104316711426, "learning_rate": 7.220445638520033e-05, "loss": 1.6007, "step": 142336 }, { "epoch": 0.2809363822527618, "grad_norm": 0.10701020807027817, "learning_rate": 7.219811616083565e-05, "loss": 1.6005, "step": 142368 }, { "epoch": 0.2809995282141582, "grad_norm": 0.10639555752277374, "learning_rate": 7.219177593647095e-05, "loss": 1.59, "step": 142400 }, { "epoch": 0.28106267417555464, "grad_norm": 0.11335942149162292, "learning_rate": 7.218543571210626e-05, "loss": 1.6057, "step": 142432 }, { "epoch": 0.2811258201369511, "grad_norm": 0.10396453738212585, "learning_rate": 7.217909548774158e-05, "loss": 1.6, "step": 142464 }, { "epoch": 0.28118896609834754, "grad_norm": 0.11395416408777237, "learning_rate": 7.217275526337689e-05, "loss": 1.6118, "step": 142496 }, { "epoch": 0.281252112059744, "grad_norm": 0.10711883008480072, "learning_rate": 7.216641503901219e-05, "loss": 1.6113, "step": 142528 }, { "epoch": 0.28131525802114044, "grad_norm": 0.11413022875785828, "learning_rate": 7.21600748146475e-05, "loss": 1.6212, "step": 142560 }, { "epoch": 0.28137840398253683, "grad_norm": 0.11210522055625916, "learning_rate": 7.215373459028282e-05, "loss": 1.6146, "step": 142592 }, { "epoch": 0.2814415499439333, "grad_norm": 0.10961081087589264, "learning_rate": 7.214739436591813e-05, "loss": 1.6007, "step": 142624 }, { "epoch": 0.2815046959053297, "grad_norm": 0.10824732482433319, "learning_rate": 7.214105414155344e-05, "loss": 1.5992, "step": 142656 }, { "epoch": 0.2815678418667262, "grad_norm": 0.10795754194259644, "learning_rate": 7.213471391718875e-05, "loss": 1.6049, "step": 142688 }, { "epoch": 0.2816309878281226, "grad_norm": 0.10979624837636948, "learning_rate": 7.212837369282405e-05, "loss": 1.6099, "step": 142720 }, { "epoch": 0.2816941337895191, "grad_norm": 0.10198965668678284, "learning_rate": 7.212203346845937e-05, "loss": 1.6163, "step": 142752 }, { "epoch": 0.28175727975091547, "grad_norm": 0.1076868399977684, "learning_rate": 7.211569324409468e-05, "loss": 1.6022, "step": 142784 }, { "epoch": 0.2818204257123119, "grad_norm": 0.10252309590578079, "learning_rate": 7.210935301972998e-05, "loss": 1.6058, "step": 142816 }, { "epoch": 0.28188357167370837, "grad_norm": 0.10857546329498291, "learning_rate": 7.21030127953653e-05, "loss": 1.6054, "step": 142848 }, { "epoch": 0.2819467176351048, "grad_norm": 0.10896912962198257, "learning_rate": 7.209667257100061e-05, "loss": 1.6055, "step": 142880 }, { "epoch": 0.28200986359650126, "grad_norm": 0.1093268096446991, "learning_rate": 7.209033234663593e-05, "loss": 1.6093, "step": 142912 }, { "epoch": 0.2820730095578977, "grad_norm": 0.10696156322956085, "learning_rate": 7.208399212227123e-05, "loss": 1.5884, "step": 142944 }, { "epoch": 0.2821361555192941, "grad_norm": 0.1142810732126236, "learning_rate": 7.207765189790654e-05, "loss": 1.6073, "step": 142976 }, { "epoch": 0.28219930148069056, "grad_norm": 0.1061597615480423, "learning_rate": 7.207131167354186e-05, "loss": 1.6091, "step": 143008 }, { "epoch": 0.282262447442087, "grad_norm": 0.11574750393629074, "learning_rate": 7.206497144917717e-05, "loss": 1.596, "step": 143040 }, { "epoch": 0.28232559340348345, "grad_norm": 0.10675003379583359, "learning_rate": 7.205863122481247e-05, "loss": 1.6057, "step": 143072 }, { "epoch": 0.2823887393648799, "grad_norm": 0.11051331460475922, "learning_rate": 7.205229100044779e-05, "loss": 1.6074, "step": 143104 }, { "epoch": 0.28245188532627635, "grad_norm": 0.10394987463951111, "learning_rate": 7.204595077608309e-05, "loss": 1.6022, "step": 143136 }, { "epoch": 0.28251503128767275, "grad_norm": 0.10684893280267715, "learning_rate": 7.20396105517184e-05, "loss": 1.5979, "step": 143168 }, { "epoch": 0.2825781772490692, "grad_norm": 0.11293821036815643, "learning_rate": 7.20332703273537e-05, "loss": 1.609, "step": 143200 }, { "epoch": 0.28264132321046564, "grad_norm": 0.10789318382740021, "learning_rate": 7.202693010298902e-05, "loss": 1.6146, "step": 143232 }, { "epoch": 0.2827044691718621, "grad_norm": 0.0996130034327507, "learning_rate": 7.202058987862433e-05, "loss": 1.5966, "step": 143264 }, { "epoch": 0.28276761513325854, "grad_norm": 0.1046484187245369, "learning_rate": 7.201424965425965e-05, "loss": 1.6021, "step": 143296 }, { "epoch": 0.282830761094655, "grad_norm": 0.1035534217953682, "learning_rate": 7.200790942989495e-05, "loss": 1.5998, "step": 143328 }, { "epoch": 0.28289390705605144, "grad_norm": 0.1182725727558136, "learning_rate": 7.200156920553026e-05, "loss": 1.5969, "step": 143360 }, { "epoch": 0.28295705301744783, "grad_norm": 0.11813196539878845, "learning_rate": 7.199522898116558e-05, "loss": 1.5962, "step": 143392 }, { "epoch": 0.2830201989788443, "grad_norm": 0.11393732577562332, "learning_rate": 7.198888875680089e-05, "loss": 1.607, "step": 143424 }, { "epoch": 0.28308334494024073, "grad_norm": 0.11106444895267487, "learning_rate": 7.19825485324362e-05, "loss": 1.5973, "step": 143456 }, { "epoch": 0.2831464909016372, "grad_norm": 0.10667029768228531, "learning_rate": 7.19762083080715e-05, "loss": 1.6212, "step": 143488 }, { "epoch": 0.28320963686303363, "grad_norm": 0.10621143877506256, "learning_rate": 7.196986808370682e-05, "loss": 1.5952, "step": 143520 }, { "epoch": 0.2832727828244301, "grad_norm": 0.1070103719830513, "learning_rate": 7.196352785934212e-05, "loss": 1.6074, "step": 143552 }, { "epoch": 0.2833359287858265, "grad_norm": 0.10600332170724869, "learning_rate": 7.195718763497744e-05, "loss": 1.5992, "step": 143584 }, { "epoch": 0.2833990747472229, "grad_norm": 0.12146126478910446, "learning_rate": 7.195084741061274e-05, "loss": 1.5951, "step": 143616 }, { "epoch": 0.2834622207086194, "grad_norm": 0.104829341173172, "learning_rate": 7.194450718624805e-05, "loss": 1.604, "step": 143648 }, { "epoch": 0.2835253666700158, "grad_norm": 0.10839908570051193, "learning_rate": 7.193816696188337e-05, "loss": 1.6097, "step": 143680 }, { "epoch": 0.28358851263141227, "grad_norm": 0.11826279014348984, "learning_rate": 7.193182673751868e-05, "loss": 1.6124, "step": 143712 }, { "epoch": 0.2836516585928087, "grad_norm": 0.10221344232559204, "learning_rate": 7.192548651315398e-05, "loss": 1.6053, "step": 143744 }, { "epoch": 0.2837148045542051, "grad_norm": 0.1202864870429039, "learning_rate": 7.19191462887893e-05, "loss": 1.6076, "step": 143776 }, { "epoch": 0.28377795051560156, "grad_norm": 0.10724340379238129, "learning_rate": 7.191280606442461e-05, "loss": 1.5984, "step": 143808 }, { "epoch": 0.283841096476998, "grad_norm": 0.1065857782959938, "learning_rate": 7.190646584005993e-05, "loss": 1.6075, "step": 143840 }, { "epoch": 0.28390424243839446, "grad_norm": 0.10594381392002106, "learning_rate": 7.190012561569523e-05, "loss": 1.6038, "step": 143872 }, { "epoch": 0.2839673883997909, "grad_norm": 0.10046811401844025, "learning_rate": 7.189378539133054e-05, "loss": 1.6021, "step": 143904 }, { "epoch": 0.28403053436118736, "grad_norm": 0.10927850008010864, "learning_rate": 7.188744516696586e-05, "loss": 1.606, "step": 143936 }, { "epoch": 0.28409368032258375, "grad_norm": 0.11072222888469696, "learning_rate": 7.188110494260116e-05, "loss": 1.6049, "step": 143968 }, { "epoch": 0.2841568262839802, "grad_norm": 0.10642096400260925, "learning_rate": 7.187476471823647e-05, "loss": 1.6096, "step": 144000 }, { "epoch": 0.28421997224537665, "grad_norm": 0.11058561503887177, "learning_rate": 7.186842449387177e-05, "loss": 1.6056, "step": 144032 }, { "epoch": 0.2842831182067731, "grad_norm": 0.10767220705747604, "learning_rate": 7.186208426950709e-05, "loss": 1.6095, "step": 144064 }, { "epoch": 0.28434626416816955, "grad_norm": 0.10795776546001434, "learning_rate": 7.18557440451424e-05, "loss": 1.6033, "step": 144096 }, { "epoch": 0.284409410129566, "grad_norm": 0.10739440470933914, "learning_rate": 7.184940382077772e-05, "loss": 1.6001, "step": 144128 }, { "epoch": 0.2844725560909624, "grad_norm": 0.1105915904045105, "learning_rate": 7.184306359641302e-05, "loss": 1.6063, "step": 144160 }, { "epoch": 0.28453570205235884, "grad_norm": 0.10494209825992584, "learning_rate": 7.183672337204833e-05, "loss": 1.6111, "step": 144192 }, { "epoch": 0.2845988480137553, "grad_norm": 0.10192414373159409, "learning_rate": 7.183038314768365e-05, "loss": 1.6148, "step": 144224 }, { "epoch": 0.28466199397515174, "grad_norm": 0.11192287504673004, "learning_rate": 7.182404292331896e-05, "loss": 1.6066, "step": 144256 }, { "epoch": 0.2847251399365482, "grad_norm": 0.10883938521146774, "learning_rate": 7.181770269895426e-05, "loss": 1.5984, "step": 144288 }, { "epoch": 0.28478828589794464, "grad_norm": 0.10139583051204681, "learning_rate": 7.181136247458958e-05, "loss": 1.6042, "step": 144320 }, { "epoch": 0.28485143185934103, "grad_norm": 0.1154075488448143, "learning_rate": 7.180502225022489e-05, "loss": 1.6124, "step": 144352 }, { "epoch": 0.2849145778207375, "grad_norm": 0.10956396162509918, "learning_rate": 7.179868202586019e-05, "loss": 1.605, "step": 144384 }, { "epoch": 0.28497772378213393, "grad_norm": 0.11622713506221771, "learning_rate": 7.179234180149549e-05, "loss": 1.597, "step": 144416 }, { "epoch": 0.2850408697435304, "grad_norm": 0.11885629594326019, "learning_rate": 7.178600157713081e-05, "loss": 1.606, "step": 144448 }, { "epoch": 0.2851040157049268, "grad_norm": 0.11188899725675583, "learning_rate": 7.177966135276612e-05, "loss": 1.6088, "step": 144480 }, { "epoch": 0.2851671616663233, "grad_norm": 0.11456351727247238, "learning_rate": 7.177332112840144e-05, "loss": 1.5963, "step": 144512 }, { "epoch": 0.28523030762771967, "grad_norm": 0.11649195104837418, "learning_rate": 7.176698090403674e-05, "loss": 1.6048, "step": 144544 }, { "epoch": 0.2852934535891161, "grad_norm": 0.10673061013221741, "learning_rate": 7.176064067967205e-05, "loss": 1.5998, "step": 144576 }, { "epoch": 0.28535659955051257, "grad_norm": 0.11773225665092468, "learning_rate": 7.175430045530737e-05, "loss": 1.6071, "step": 144608 }, { "epoch": 0.285419745511909, "grad_norm": 0.11230260878801346, "learning_rate": 7.174796023094268e-05, "loss": 1.6029, "step": 144640 }, { "epoch": 0.28548289147330547, "grad_norm": 0.11644361168146133, "learning_rate": 7.174162000657798e-05, "loss": 1.5975, "step": 144672 }, { "epoch": 0.2855460374347019, "grad_norm": 0.11056544631719589, "learning_rate": 7.17352797822133e-05, "loss": 1.5879, "step": 144704 }, { "epoch": 0.2856091833960983, "grad_norm": 0.11167342960834503, "learning_rate": 7.172893955784861e-05, "loss": 1.5982, "step": 144736 }, { "epoch": 0.28567232935749476, "grad_norm": 0.10501691699028015, "learning_rate": 7.172259933348393e-05, "loss": 1.5986, "step": 144768 }, { "epoch": 0.2857354753188912, "grad_norm": 0.11405222117900848, "learning_rate": 7.171625910911923e-05, "loss": 1.6024, "step": 144800 }, { "epoch": 0.28579862128028766, "grad_norm": 0.10829059779644012, "learning_rate": 7.170991888475453e-05, "loss": 1.6031, "step": 144832 }, { "epoch": 0.2858617672416841, "grad_norm": 0.12299740314483643, "learning_rate": 7.170357866038984e-05, "loss": 1.5998, "step": 144864 }, { "epoch": 0.28592491320308056, "grad_norm": 0.10545315593481064, "learning_rate": 7.169723843602516e-05, "loss": 1.6091, "step": 144896 }, { "epoch": 0.28598805916447695, "grad_norm": 0.10883430391550064, "learning_rate": 7.169089821166047e-05, "loss": 1.5987, "step": 144928 }, { "epoch": 0.2860512051258734, "grad_norm": 0.10413713753223419, "learning_rate": 7.168455798729577e-05, "loss": 1.609, "step": 144960 }, { "epoch": 0.28611435108726985, "grad_norm": 0.1098276749253273, "learning_rate": 7.167821776293109e-05, "loss": 1.6083, "step": 144992 }, { "epoch": 0.2861774970486663, "grad_norm": 0.10545022785663605, "learning_rate": 7.16718775385664e-05, "loss": 1.6115, "step": 145024 }, { "epoch": 0.28624064301006275, "grad_norm": 0.11426562070846558, "learning_rate": 7.166553731420172e-05, "loss": 1.6015, "step": 145056 }, { "epoch": 0.2863037889714592, "grad_norm": 0.11089000105857849, "learning_rate": 7.165919708983702e-05, "loss": 1.6013, "step": 145088 }, { "epoch": 0.28636693493285564, "grad_norm": 0.1029604896903038, "learning_rate": 7.165285686547233e-05, "loss": 1.6078, "step": 145120 }, { "epoch": 0.28643008089425204, "grad_norm": 0.11236855387687683, "learning_rate": 7.164651664110765e-05, "loss": 1.5883, "step": 145152 }, { "epoch": 0.2864932268556485, "grad_norm": 0.10302947461605072, "learning_rate": 7.164017641674296e-05, "loss": 1.6048, "step": 145184 }, { "epoch": 0.28655637281704494, "grad_norm": 0.10683345049619675, "learning_rate": 7.163383619237826e-05, "loss": 1.6015, "step": 145216 }, { "epoch": 0.2866195187784414, "grad_norm": 0.1122296154499054, "learning_rate": 7.162749596801356e-05, "loss": 1.6076, "step": 145248 }, { "epoch": 0.28668266473983783, "grad_norm": 0.1146034374833107, "learning_rate": 7.162115574364888e-05, "loss": 1.6012, "step": 145280 }, { "epoch": 0.2867458107012343, "grad_norm": 0.10625258833169937, "learning_rate": 7.161481551928419e-05, "loss": 1.6087, "step": 145312 }, { "epoch": 0.2868089566626307, "grad_norm": 0.1057293638586998, "learning_rate": 7.16084752949195e-05, "loss": 1.5994, "step": 145344 }, { "epoch": 0.2868721026240271, "grad_norm": 0.1120283454656601, "learning_rate": 7.160213507055481e-05, "loss": 1.596, "step": 145376 }, { "epoch": 0.2869352485854236, "grad_norm": 0.10851830989122391, "learning_rate": 7.159579484619012e-05, "loss": 1.6061, "step": 145408 }, { "epoch": 0.28699839454682, "grad_norm": 0.11628100275993347, "learning_rate": 7.158945462182544e-05, "loss": 1.6189, "step": 145440 }, { "epoch": 0.2870615405082165, "grad_norm": 0.11050492525100708, "learning_rate": 7.158311439746075e-05, "loss": 1.6009, "step": 145472 }, { "epoch": 0.2871246864696129, "grad_norm": 0.10678663849830627, "learning_rate": 7.157677417309605e-05, "loss": 1.6039, "step": 145504 }, { "epoch": 0.2871878324310093, "grad_norm": 0.10617556422948837, "learning_rate": 7.157043394873137e-05, "loss": 1.6002, "step": 145536 }, { "epoch": 0.28725097839240576, "grad_norm": 0.1099933609366417, "learning_rate": 7.156409372436668e-05, "loss": 1.5951, "step": 145568 }, { "epoch": 0.2873141243538022, "grad_norm": 0.10714271664619446, "learning_rate": 7.155775350000198e-05, "loss": 1.5954, "step": 145600 }, { "epoch": 0.28737727031519866, "grad_norm": 0.1074344664812088, "learning_rate": 7.15514132756373e-05, "loss": 1.5993, "step": 145632 }, { "epoch": 0.2874404162765951, "grad_norm": 0.10610081255435944, "learning_rate": 7.15450730512726e-05, "loss": 1.6006, "step": 145664 }, { "epoch": 0.28750356223799156, "grad_norm": 0.11171463876962662, "learning_rate": 7.153873282690791e-05, "loss": 1.6037, "step": 145696 }, { "epoch": 0.28756670819938795, "grad_norm": 0.10531670600175858, "learning_rate": 7.153239260254323e-05, "loss": 1.5984, "step": 145728 }, { "epoch": 0.2876298541607844, "grad_norm": 0.11367079615592957, "learning_rate": 7.152605237817853e-05, "loss": 1.6137, "step": 145760 }, { "epoch": 0.28769300012218085, "grad_norm": 0.11154003441333771, "learning_rate": 7.151971215381384e-05, "loss": 1.5993, "step": 145792 }, { "epoch": 0.2877561460835773, "grad_norm": 0.12410751730203629, "learning_rate": 7.151337192944916e-05, "loss": 1.6105, "step": 145824 }, { "epoch": 0.28781929204497375, "grad_norm": 0.1111689880490303, "learning_rate": 7.150703170508447e-05, "loss": 1.5901, "step": 145856 }, { "epoch": 0.2878824380063702, "grad_norm": 0.1172381192445755, "learning_rate": 7.150069148071977e-05, "loss": 1.6027, "step": 145888 }, { "epoch": 0.2879455839677666, "grad_norm": 0.11613024026155472, "learning_rate": 7.149435125635509e-05, "loss": 1.6001, "step": 145920 }, { "epoch": 0.28800872992916304, "grad_norm": 0.11064592748880386, "learning_rate": 7.14880110319904e-05, "loss": 1.606, "step": 145952 }, { "epoch": 0.2880718758905595, "grad_norm": 0.11777277290821075, "learning_rate": 7.148167080762572e-05, "loss": 1.6022, "step": 145984 }, { "epoch": 0.28813502185195594, "grad_norm": 0.10947234183549881, "learning_rate": 7.147533058326102e-05, "loss": 1.6143, "step": 146016 }, { "epoch": 0.2881981678133524, "grad_norm": 0.10328909754753113, "learning_rate": 7.146899035889633e-05, "loss": 1.6007, "step": 146048 }, { "epoch": 0.28826131377474884, "grad_norm": 0.11179718375205994, "learning_rate": 7.146265013453163e-05, "loss": 1.6088, "step": 146080 }, { "epoch": 0.28832445973614523, "grad_norm": 0.11844056099653244, "learning_rate": 7.145630991016695e-05, "loss": 1.6014, "step": 146112 }, { "epoch": 0.2883876056975417, "grad_norm": 0.12668026983737946, "learning_rate": 7.144996968580226e-05, "loss": 1.62, "step": 146144 }, { "epoch": 0.28845075165893813, "grad_norm": 0.10801265388727188, "learning_rate": 7.144362946143756e-05, "loss": 1.6127, "step": 146176 }, { "epoch": 0.2885138976203346, "grad_norm": 0.1138705238699913, "learning_rate": 7.143728923707288e-05, "loss": 1.6062, "step": 146208 }, { "epoch": 0.28857704358173103, "grad_norm": 0.11289960891008377, "learning_rate": 7.143094901270819e-05, "loss": 1.6037, "step": 146240 }, { "epoch": 0.2886401895431275, "grad_norm": 0.11056394875049591, "learning_rate": 7.142460878834351e-05, "loss": 1.6001, "step": 146272 }, { "epoch": 0.2887033355045239, "grad_norm": 0.10921726375818253, "learning_rate": 7.141826856397881e-05, "loss": 1.619, "step": 146304 }, { "epoch": 0.2887664814659203, "grad_norm": 0.10466153174638748, "learning_rate": 7.141192833961412e-05, "loss": 1.5916, "step": 146336 }, { "epoch": 0.28882962742731677, "grad_norm": 0.1139112189412117, "learning_rate": 7.140558811524944e-05, "loss": 1.5999, "step": 146368 }, { "epoch": 0.2888927733887132, "grad_norm": 0.10813932865858078, "learning_rate": 7.139924789088475e-05, "loss": 1.6015, "step": 146400 }, { "epoch": 0.28895591935010967, "grad_norm": 0.1114303320646286, "learning_rate": 7.139290766652005e-05, "loss": 1.606, "step": 146432 }, { "epoch": 0.2890190653115061, "grad_norm": 0.10804945975542068, "learning_rate": 7.138656744215537e-05, "loss": 1.5917, "step": 146464 }, { "epoch": 0.2890822112729025, "grad_norm": 0.10753102600574493, "learning_rate": 7.138022721779067e-05, "loss": 1.5981, "step": 146496 }, { "epoch": 0.28914535723429896, "grad_norm": 0.10849519073963165, "learning_rate": 7.137388699342598e-05, "loss": 1.6072, "step": 146528 }, { "epoch": 0.2892085031956954, "grad_norm": 0.10747122019529343, "learning_rate": 7.136754676906128e-05, "loss": 1.5937, "step": 146560 }, { "epoch": 0.28927164915709186, "grad_norm": 0.10553805530071259, "learning_rate": 7.13612065446966e-05, "loss": 1.5914, "step": 146592 }, { "epoch": 0.2893347951184883, "grad_norm": 0.11188548803329468, "learning_rate": 7.135486632033191e-05, "loss": 1.6041, "step": 146624 }, { "epoch": 0.28939794107988476, "grad_norm": 0.10540486872196198, "learning_rate": 7.134852609596723e-05, "loss": 1.5981, "step": 146656 }, { "epoch": 0.2894610870412812, "grad_norm": 0.1163511872291565, "learning_rate": 7.134218587160253e-05, "loss": 1.6033, "step": 146688 }, { "epoch": 0.2895242330026776, "grad_norm": 0.10686226189136505, "learning_rate": 7.133584564723784e-05, "loss": 1.6045, "step": 146720 }, { "epoch": 0.28958737896407405, "grad_norm": 0.10692430287599564, "learning_rate": 7.132950542287316e-05, "loss": 1.586, "step": 146752 }, { "epoch": 0.2896505249254705, "grad_norm": 0.10165756940841675, "learning_rate": 7.132316519850847e-05, "loss": 1.6093, "step": 146784 }, { "epoch": 0.28971367088686695, "grad_norm": 0.11457328498363495, "learning_rate": 7.131682497414379e-05, "loss": 1.6065, "step": 146816 }, { "epoch": 0.2897768168482634, "grad_norm": 0.1179886907339096, "learning_rate": 7.131048474977909e-05, "loss": 1.5993, "step": 146848 }, { "epoch": 0.28983996280965985, "grad_norm": 0.12959076464176178, "learning_rate": 7.13041445254144e-05, "loss": 1.5865, "step": 146880 }, { "epoch": 0.28990310877105624, "grad_norm": 0.10887438803911209, "learning_rate": 7.12978043010497e-05, "loss": 1.5928, "step": 146912 }, { "epoch": 0.2899662547324527, "grad_norm": 0.11446130275726318, "learning_rate": 7.129146407668502e-05, "loss": 1.6029, "step": 146944 }, { "epoch": 0.29002940069384914, "grad_norm": 0.10503789782524109, "learning_rate": 7.128512385232032e-05, "loss": 1.5972, "step": 146976 }, { "epoch": 0.2900925466552456, "grad_norm": 0.11018822342157364, "learning_rate": 7.127878362795563e-05, "loss": 1.5938, "step": 147008 }, { "epoch": 0.29015569261664204, "grad_norm": 0.10738013684749603, "learning_rate": 7.127244340359095e-05, "loss": 1.5985, "step": 147040 }, { "epoch": 0.2902188385780385, "grad_norm": 0.11055608093738556, "learning_rate": 7.126610317922626e-05, "loss": 1.6051, "step": 147072 }, { "epoch": 0.2902819845394349, "grad_norm": 0.10610676556825638, "learning_rate": 7.125976295486156e-05, "loss": 1.5933, "step": 147104 }, { "epoch": 0.2903451305008313, "grad_norm": 0.10864364355802536, "learning_rate": 7.125342273049688e-05, "loss": 1.593, "step": 147136 }, { "epoch": 0.2904082764622278, "grad_norm": 0.10153425484895706, "learning_rate": 7.124708250613219e-05, "loss": 1.6011, "step": 147168 }, { "epoch": 0.2904714224236242, "grad_norm": 0.1137988343834877, "learning_rate": 7.124074228176751e-05, "loss": 1.6008, "step": 147200 }, { "epoch": 0.2905345683850207, "grad_norm": 0.10674472898244858, "learning_rate": 7.123440205740281e-05, "loss": 1.6047, "step": 147232 }, { "epoch": 0.2905977143464171, "grad_norm": 0.10434059798717499, "learning_rate": 7.122806183303812e-05, "loss": 1.6037, "step": 147264 }, { "epoch": 0.2906608603078135, "grad_norm": 0.11284546554088593, "learning_rate": 7.122172160867342e-05, "loss": 1.6025, "step": 147296 }, { "epoch": 0.29072400626920997, "grad_norm": 0.1067948043346405, "learning_rate": 7.121538138430874e-05, "loss": 1.612, "step": 147328 }, { "epoch": 0.2907871522306064, "grad_norm": 0.11101321876049042, "learning_rate": 7.120904115994404e-05, "loss": 1.6022, "step": 147360 }, { "epoch": 0.29085029819200287, "grad_norm": 0.1162412166595459, "learning_rate": 7.120270093557935e-05, "loss": 1.6005, "step": 147392 }, { "epoch": 0.2909134441533993, "grad_norm": 0.10932154208421707, "learning_rate": 7.119636071121467e-05, "loss": 1.6055, "step": 147424 }, { "epoch": 0.29097659011479576, "grad_norm": 0.1102326512336731, "learning_rate": 7.119002048684998e-05, "loss": 1.6023, "step": 147456 }, { "epoch": 0.29103973607619216, "grad_norm": 0.1088930070400238, "learning_rate": 7.11836802624853e-05, "loss": 1.605, "step": 147488 }, { "epoch": 0.2911028820375886, "grad_norm": 0.10465831309556961, "learning_rate": 7.11773400381206e-05, "loss": 1.6082, "step": 147520 }, { "epoch": 0.29116602799898506, "grad_norm": 0.1064116582274437, "learning_rate": 7.117099981375591e-05, "loss": 1.6036, "step": 147552 }, { "epoch": 0.2912291739603815, "grad_norm": 0.11028715968132019, "learning_rate": 7.116465958939123e-05, "loss": 1.595, "step": 147584 }, { "epoch": 0.29129231992177795, "grad_norm": 0.10586348921060562, "learning_rate": 7.115831936502654e-05, "loss": 1.5992, "step": 147616 }, { "epoch": 0.2913554658831744, "grad_norm": 0.10846854746341705, "learning_rate": 7.115197914066184e-05, "loss": 1.5942, "step": 147648 }, { "epoch": 0.2914186118445708, "grad_norm": 0.11915256083011627, "learning_rate": 7.114563891629716e-05, "loss": 1.5939, "step": 147680 }, { "epoch": 0.29148175780596725, "grad_norm": 0.10486552119255066, "learning_rate": 7.113929869193246e-05, "loss": 1.6025, "step": 147712 }, { "epoch": 0.2915449037673637, "grad_norm": 0.10865342617034912, "learning_rate": 7.113295846756777e-05, "loss": 1.6031, "step": 147744 }, { "epoch": 0.29160804972876014, "grad_norm": 0.11034132540225983, "learning_rate": 7.112661824320307e-05, "loss": 1.5948, "step": 147776 }, { "epoch": 0.2916711956901566, "grad_norm": 0.10538811981678009, "learning_rate": 7.112027801883839e-05, "loss": 1.6005, "step": 147808 }, { "epoch": 0.29173434165155304, "grad_norm": 0.10983075946569443, "learning_rate": 7.11139377944737e-05, "loss": 1.6124, "step": 147840 }, { "epoch": 0.29179748761294944, "grad_norm": 0.11809743940830231, "learning_rate": 7.110759757010902e-05, "loss": 1.6116, "step": 147872 }, { "epoch": 0.2918606335743459, "grad_norm": 0.11948984861373901, "learning_rate": 7.110125734574432e-05, "loss": 1.615, "step": 147904 }, { "epoch": 0.29192377953574233, "grad_norm": 0.11344622820615768, "learning_rate": 7.109491712137963e-05, "loss": 1.596, "step": 147936 }, { "epoch": 0.2919869254971388, "grad_norm": 0.10580223053693771, "learning_rate": 7.108857689701495e-05, "loss": 1.6069, "step": 147968 }, { "epoch": 0.29205007145853523, "grad_norm": 0.10495791584253311, "learning_rate": 7.108223667265026e-05, "loss": 1.6072, "step": 148000 }, { "epoch": 0.2921132174199317, "grad_norm": 0.10190922766923904, "learning_rate": 7.107589644828556e-05, "loss": 1.6053, "step": 148032 }, { "epoch": 0.2921763633813281, "grad_norm": 0.12029528617858887, "learning_rate": 7.106955622392088e-05, "loss": 1.6044, "step": 148064 }, { "epoch": 0.2922395093427245, "grad_norm": 0.11414968967437744, "learning_rate": 7.10632159995562e-05, "loss": 1.6161, "step": 148096 }, { "epoch": 0.292302655304121, "grad_norm": 0.10950406640768051, "learning_rate": 7.10568757751915e-05, "loss": 1.6007, "step": 148128 }, { "epoch": 0.2923658012655174, "grad_norm": 0.11942359805107117, "learning_rate": 7.105053555082681e-05, "loss": 1.5998, "step": 148160 }, { "epoch": 0.29242894722691387, "grad_norm": 0.10723134875297546, "learning_rate": 7.104419532646211e-05, "loss": 1.5952, "step": 148192 }, { "epoch": 0.2924920931883103, "grad_norm": 0.10833689570426941, "learning_rate": 7.103785510209742e-05, "loss": 1.6096, "step": 148224 }, { "epoch": 0.2925552391497067, "grad_norm": 0.11473509669303894, "learning_rate": 7.103151487773274e-05, "loss": 1.6043, "step": 148256 }, { "epoch": 0.29261838511110316, "grad_norm": 0.11084617674350739, "learning_rate": 7.102517465336805e-05, "loss": 1.5959, "step": 148288 }, { "epoch": 0.2926815310724996, "grad_norm": 0.1038217842578888, "learning_rate": 7.101883442900335e-05, "loss": 1.6055, "step": 148320 }, { "epoch": 0.29274467703389606, "grad_norm": 0.1097683310508728, "learning_rate": 7.101249420463867e-05, "loss": 1.5946, "step": 148352 }, { "epoch": 0.2928078229952925, "grad_norm": 0.1091427281498909, "learning_rate": 7.100615398027398e-05, "loss": 1.6058, "step": 148384 }, { "epoch": 0.29287096895668896, "grad_norm": 0.11220090836286545, "learning_rate": 7.09998137559093e-05, "loss": 1.5975, "step": 148416 }, { "epoch": 0.2929341149180854, "grad_norm": 0.10755281895399094, "learning_rate": 7.09934735315446e-05, "loss": 1.5954, "step": 148448 }, { "epoch": 0.2929972608794818, "grad_norm": 0.1037331148982048, "learning_rate": 7.098713330717991e-05, "loss": 1.5875, "step": 148480 }, { "epoch": 0.29306040684087825, "grad_norm": 0.10771684348583221, "learning_rate": 7.098079308281523e-05, "loss": 1.6043, "step": 148512 }, { "epoch": 0.2931235528022747, "grad_norm": 0.10596349835395813, "learning_rate": 7.097445285845053e-05, "loss": 1.6006, "step": 148544 }, { "epoch": 0.29318669876367115, "grad_norm": 0.10761568695306778, "learning_rate": 7.096811263408583e-05, "loss": 1.6281, "step": 148576 }, { "epoch": 0.2932498447250676, "grad_norm": 0.09952478110790253, "learning_rate": 7.096177240972115e-05, "loss": 1.5996, "step": 148608 }, { "epoch": 0.29331299068646405, "grad_norm": 0.10850663483142853, "learning_rate": 7.095543218535646e-05, "loss": 1.5776, "step": 148640 }, { "epoch": 0.29337613664786044, "grad_norm": 0.10966360569000244, "learning_rate": 7.094909196099177e-05, "loss": 1.5965, "step": 148672 }, { "epoch": 0.2934392826092569, "grad_norm": 0.10936962068080902, "learning_rate": 7.094275173662708e-05, "loss": 1.613, "step": 148704 }, { "epoch": 0.29350242857065334, "grad_norm": 0.11115074157714844, "learning_rate": 7.093641151226239e-05, "loss": 1.6183, "step": 148736 }, { "epoch": 0.2935655745320498, "grad_norm": 0.1090892106294632, "learning_rate": 7.09300712878977e-05, "loss": 1.582, "step": 148768 }, { "epoch": 0.29362872049344624, "grad_norm": 0.1073753759264946, "learning_rate": 7.092373106353302e-05, "loss": 1.5966, "step": 148800 }, { "epoch": 0.2936918664548427, "grad_norm": 0.10955362021923065, "learning_rate": 7.091739083916833e-05, "loss": 1.6111, "step": 148832 }, { "epoch": 0.2937550124162391, "grad_norm": 0.10309655219316483, "learning_rate": 7.091105061480363e-05, "loss": 1.6129, "step": 148864 }, { "epoch": 0.29381815837763553, "grad_norm": 0.11600329726934433, "learning_rate": 7.090471039043895e-05, "loss": 1.6014, "step": 148896 }, { "epoch": 0.293881304339032, "grad_norm": 0.10746400058269501, "learning_rate": 7.089837016607426e-05, "loss": 1.5834, "step": 148928 }, { "epoch": 0.29394445030042843, "grad_norm": 0.11148227006196976, "learning_rate": 7.089202994170956e-05, "loss": 1.5993, "step": 148960 }, { "epoch": 0.2940075962618249, "grad_norm": 0.10850510746240616, "learning_rate": 7.088568971734487e-05, "loss": 1.5943, "step": 148992 }, { "epoch": 0.2940707422232213, "grad_norm": 0.10176557302474976, "learning_rate": 7.087934949298018e-05, "loss": 1.5972, "step": 149024 }, { "epoch": 0.2941338881846177, "grad_norm": 0.11594806611537933, "learning_rate": 7.08730092686155e-05, "loss": 1.5986, "step": 149056 }, { "epoch": 0.29419703414601417, "grad_norm": 0.110724076628685, "learning_rate": 7.086666904425081e-05, "loss": 1.6094, "step": 149088 }, { "epoch": 0.2942601801074106, "grad_norm": 0.11467071622610092, "learning_rate": 7.086032881988611e-05, "loss": 1.597, "step": 149120 }, { "epoch": 0.29432332606880707, "grad_norm": 0.11094608902931213, "learning_rate": 7.085398859552142e-05, "loss": 1.5972, "step": 149152 }, { "epoch": 0.2943864720302035, "grad_norm": 0.11065344512462616, "learning_rate": 7.084764837115674e-05, "loss": 1.5825, "step": 149184 }, { "epoch": 0.29444961799159997, "grad_norm": 0.11137869209051132, "learning_rate": 7.084130814679205e-05, "loss": 1.6025, "step": 149216 }, { "epoch": 0.29451276395299636, "grad_norm": 0.10236819833517075, "learning_rate": 7.083496792242735e-05, "loss": 1.5962, "step": 149248 }, { "epoch": 0.2945759099143928, "grad_norm": 0.1143043264746666, "learning_rate": 7.082862769806267e-05, "loss": 1.6085, "step": 149280 }, { "epoch": 0.29463905587578926, "grad_norm": 0.10390573740005493, "learning_rate": 7.082228747369798e-05, "loss": 1.5948, "step": 149312 }, { "epoch": 0.2947022018371857, "grad_norm": 0.11138554662466049, "learning_rate": 7.08159472493333e-05, "loss": 1.5891, "step": 149344 }, { "epoch": 0.29476534779858216, "grad_norm": 0.1055331900715828, "learning_rate": 7.08096070249686e-05, "loss": 1.6031, "step": 149376 }, { "epoch": 0.2948284937599786, "grad_norm": 0.10739952325820923, "learning_rate": 7.08032668006039e-05, "loss": 1.5874, "step": 149408 }, { "epoch": 0.294891639721375, "grad_norm": 0.1127571314573288, "learning_rate": 7.079692657623922e-05, "loss": 1.598, "step": 149440 }, { "epoch": 0.29495478568277145, "grad_norm": 0.10634210705757141, "learning_rate": 7.079058635187453e-05, "loss": 1.6079, "step": 149472 }, { "epoch": 0.2950179316441679, "grad_norm": 0.11272866278886795, "learning_rate": 7.078424612750984e-05, "loss": 1.6142, "step": 149504 }, { "epoch": 0.29508107760556435, "grad_norm": 0.1091655045747757, "learning_rate": 7.077790590314515e-05, "loss": 1.6018, "step": 149536 }, { "epoch": 0.2951442235669608, "grad_norm": 0.11715688556432724, "learning_rate": 7.077156567878046e-05, "loss": 1.6003, "step": 149568 }, { "epoch": 0.29520736952835724, "grad_norm": 0.10742654651403427, "learning_rate": 7.076522545441577e-05, "loss": 1.6068, "step": 149600 }, { "epoch": 0.29527051548975364, "grad_norm": 0.10077372193336487, "learning_rate": 7.075888523005109e-05, "loss": 1.6075, "step": 149632 }, { "epoch": 0.2953336614511501, "grad_norm": 0.11272387951612473, "learning_rate": 7.075254500568639e-05, "loss": 1.5946, "step": 149664 }, { "epoch": 0.29539680741254654, "grad_norm": 0.11600866168737411, "learning_rate": 7.07462047813217e-05, "loss": 1.6153, "step": 149696 }, { "epoch": 0.295459953373943, "grad_norm": 0.11558971554040909, "learning_rate": 7.073986455695702e-05, "loss": 1.5928, "step": 149728 }, { "epoch": 0.29552309933533943, "grad_norm": 0.10414761304855347, "learning_rate": 7.073352433259232e-05, "loss": 1.5996, "step": 149760 }, { "epoch": 0.2955862452967359, "grad_norm": 0.11273051053285599, "learning_rate": 7.072718410822763e-05, "loss": 1.5946, "step": 149792 }, { "epoch": 0.2956493912581323, "grad_norm": 0.11486554890871048, "learning_rate": 7.072084388386294e-05, "loss": 1.5996, "step": 149824 }, { "epoch": 0.2957125372195287, "grad_norm": 0.10669771581888199, "learning_rate": 7.071450365949825e-05, "loss": 1.5841, "step": 149856 }, { "epoch": 0.2957756831809252, "grad_norm": 0.11150334775447845, "learning_rate": 7.070816343513356e-05, "loss": 1.6167, "step": 149888 }, { "epoch": 0.2958388291423216, "grad_norm": 0.11866706609725952, "learning_rate": 7.070182321076887e-05, "loss": 1.6098, "step": 149920 }, { "epoch": 0.2959019751037181, "grad_norm": 0.10640261322259903, "learning_rate": 7.069548298640418e-05, "loss": 1.5874, "step": 149952 }, { "epoch": 0.2959651210651145, "grad_norm": 0.10304625332355499, "learning_rate": 7.06891427620395e-05, "loss": 1.595, "step": 149984 }, { "epoch": 0.2960282670265109, "grad_norm": 0.11005029082298279, "learning_rate": 7.068280253767481e-05, "loss": 1.5901, "step": 150016 }, { "epoch": 0.29609141298790737, "grad_norm": 0.10300908982753754, "learning_rate": 7.067646231331012e-05, "loss": 1.6064, "step": 150048 }, { "epoch": 0.2961545589493038, "grad_norm": 0.10868167132139206, "learning_rate": 7.067012208894543e-05, "loss": 1.591, "step": 150080 }, { "epoch": 0.29621770491070026, "grad_norm": 0.10578823834657669, "learning_rate": 7.066378186458074e-05, "loss": 1.5964, "step": 150112 }, { "epoch": 0.2962808508720967, "grad_norm": 0.11526907980442047, "learning_rate": 7.065744164021605e-05, "loss": 1.603, "step": 150144 }, { "epoch": 0.29634399683349316, "grad_norm": 0.1077684685587883, "learning_rate": 7.065110141585136e-05, "loss": 1.6009, "step": 150176 }, { "epoch": 0.2964071427948896, "grad_norm": 0.10519464313983917, "learning_rate": 7.064476119148667e-05, "loss": 1.6029, "step": 150208 }, { "epoch": 0.296470288756286, "grad_norm": 0.1093609631061554, "learning_rate": 7.063842096712197e-05, "loss": 1.5931, "step": 150240 }, { "epoch": 0.29653343471768245, "grad_norm": 0.11715901643037796, "learning_rate": 7.063208074275729e-05, "loss": 1.6072, "step": 150272 }, { "epoch": 0.2965965806790789, "grad_norm": 0.1089400202035904, "learning_rate": 7.06257405183926e-05, "loss": 1.5947, "step": 150304 }, { "epoch": 0.29665972664047535, "grad_norm": 0.11363311111927032, "learning_rate": 7.06194002940279e-05, "loss": 1.6053, "step": 150336 }, { "epoch": 0.2967228726018718, "grad_norm": 0.10174848884344101, "learning_rate": 7.061306006966322e-05, "loss": 1.6003, "step": 150368 }, { "epoch": 0.29678601856326825, "grad_norm": 0.10809173434972763, "learning_rate": 7.060671984529853e-05, "loss": 1.6008, "step": 150400 }, { "epoch": 0.29684916452466464, "grad_norm": 0.11010542511940002, "learning_rate": 7.060037962093384e-05, "loss": 1.6032, "step": 150432 }, { "epoch": 0.2969123104860611, "grad_norm": 0.11021538823843002, "learning_rate": 7.059403939656915e-05, "loss": 1.5926, "step": 150464 }, { "epoch": 0.29697545644745754, "grad_norm": 0.10456708073616028, "learning_rate": 7.058769917220446e-05, "loss": 1.5978, "step": 150496 }, { "epoch": 0.297038602408854, "grad_norm": 0.11134756356477737, "learning_rate": 7.058135894783977e-05, "loss": 1.597, "step": 150528 }, { "epoch": 0.29710174837025044, "grad_norm": 0.10560545325279236, "learning_rate": 7.057501872347509e-05, "loss": 1.6003, "step": 150560 }, { "epoch": 0.2971648943316469, "grad_norm": 0.11020217835903168, "learning_rate": 7.056867849911039e-05, "loss": 1.6011, "step": 150592 }, { "epoch": 0.2972280402930433, "grad_norm": 0.10672367364168167, "learning_rate": 7.05623382747457e-05, "loss": 1.5971, "step": 150624 }, { "epoch": 0.29729118625443973, "grad_norm": 0.10412941128015518, "learning_rate": 7.0555998050381e-05, "loss": 1.5943, "step": 150656 }, { "epoch": 0.2973543322158362, "grad_norm": 0.1048407033085823, "learning_rate": 7.054965782601632e-05, "loss": 1.5907, "step": 150688 }, { "epoch": 0.29741747817723263, "grad_norm": 0.10758503526449203, "learning_rate": 7.054331760165164e-05, "loss": 1.6024, "step": 150720 }, { "epoch": 0.2974806241386291, "grad_norm": 0.10499800741672516, "learning_rate": 7.053697737728694e-05, "loss": 1.5926, "step": 150752 }, { "epoch": 0.29754377010002553, "grad_norm": 0.10877201706171036, "learning_rate": 7.053063715292225e-05, "loss": 1.6196, "step": 150784 }, { "epoch": 0.2976069160614219, "grad_norm": 0.11338555812835693, "learning_rate": 7.052429692855757e-05, "loss": 1.5973, "step": 150816 }, { "epoch": 0.29767006202281837, "grad_norm": 0.11133240908384323, "learning_rate": 7.051795670419288e-05, "loss": 1.6117, "step": 150848 }, { "epoch": 0.2977332079842148, "grad_norm": 0.1142796203494072, "learning_rate": 7.051161647982818e-05, "loss": 1.6013, "step": 150880 }, { "epoch": 0.29779635394561127, "grad_norm": 0.12019713968038559, "learning_rate": 7.05052762554635e-05, "loss": 1.5995, "step": 150912 }, { "epoch": 0.2978594999070077, "grad_norm": 0.11198505759239197, "learning_rate": 7.049893603109881e-05, "loss": 1.6067, "step": 150944 }, { "epoch": 0.29792264586840417, "grad_norm": 0.11616820842027664, "learning_rate": 7.049259580673412e-05, "loss": 1.5935, "step": 150976 }, { "epoch": 0.29798579182980056, "grad_norm": 0.10942426323890686, "learning_rate": 7.048625558236943e-05, "loss": 1.6111, "step": 151008 }, { "epoch": 0.298048937791197, "grad_norm": 0.10377158969640732, "learning_rate": 7.047991535800474e-05, "loss": 1.5991, "step": 151040 }, { "epoch": 0.29811208375259346, "grad_norm": 0.10230506956577301, "learning_rate": 7.047357513364004e-05, "loss": 1.6147, "step": 151072 }, { "epoch": 0.2981752297139899, "grad_norm": 0.1174510195851326, "learning_rate": 7.046723490927536e-05, "loss": 1.6124, "step": 151104 }, { "epoch": 0.29823837567538636, "grad_norm": 0.10018105804920197, "learning_rate": 7.046089468491066e-05, "loss": 1.5959, "step": 151136 }, { "epoch": 0.2983015216367828, "grad_norm": 0.10968267917633057, "learning_rate": 7.045455446054597e-05, "loss": 1.6065, "step": 151168 }, { "epoch": 0.2983646675981792, "grad_norm": 0.106751449406147, "learning_rate": 7.044821423618129e-05, "loss": 1.6052, "step": 151200 }, { "epoch": 0.29842781355957565, "grad_norm": 0.10615841299295425, "learning_rate": 7.04418740118166e-05, "loss": 1.6045, "step": 151232 }, { "epoch": 0.2984909595209721, "grad_norm": 0.10571028292179108, "learning_rate": 7.04355337874519e-05, "loss": 1.5928, "step": 151264 }, { "epoch": 0.29855410548236855, "grad_norm": 0.11286832392215729, "learning_rate": 7.042919356308722e-05, "loss": 1.5954, "step": 151296 }, { "epoch": 0.298617251443765, "grad_norm": 0.11962977796792984, "learning_rate": 7.042285333872253e-05, "loss": 1.5935, "step": 151328 }, { "epoch": 0.29868039740516145, "grad_norm": 0.10393813252449036, "learning_rate": 7.041651311435785e-05, "loss": 1.591, "step": 151360 }, { "epoch": 0.29874354336655784, "grad_norm": 0.11043057590723038, "learning_rate": 7.041017288999316e-05, "loss": 1.5955, "step": 151392 }, { "epoch": 0.2988066893279543, "grad_norm": 0.11294026672840118, "learning_rate": 7.040383266562846e-05, "loss": 1.5969, "step": 151424 }, { "epoch": 0.29886983528935074, "grad_norm": 0.10951151698827744, "learning_rate": 7.039749244126376e-05, "loss": 1.6099, "step": 151456 }, { "epoch": 0.2989329812507472, "grad_norm": 0.11363144218921661, "learning_rate": 7.039115221689908e-05, "loss": 1.6062, "step": 151488 }, { "epoch": 0.29899612721214364, "grad_norm": 0.10769317299127579, "learning_rate": 7.038481199253439e-05, "loss": 1.6066, "step": 151520 }, { "epoch": 0.2990592731735401, "grad_norm": 0.10310589522123337, "learning_rate": 7.037847176816969e-05, "loss": 1.5996, "step": 151552 }, { "epoch": 0.2991224191349365, "grad_norm": 0.10610410571098328, "learning_rate": 7.0372131543805e-05, "loss": 1.5932, "step": 151584 }, { "epoch": 0.29918556509633293, "grad_norm": 0.11068467795848846, "learning_rate": 7.036579131944032e-05, "loss": 1.6125, "step": 151616 }, { "epoch": 0.2992487110577294, "grad_norm": 0.10848487168550491, "learning_rate": 7.035945109507564e-05, "loss": 1.5865, "step": 151648 }, { "epoch": 0.2993118570191258, "grad_norm": 0.10966099798679352, "learning_rate": 7.035311087071094e-05, "loss": 1.5881, "step": 151680 }, { "epoch": 0.2993750029805223, "grad_norm": 0.10770150274038315, "learning_rate": 7.034677064634625e-05, "loss": 1.6034, "step": 151712 }, { "epoch": 0.2994381489419187, "grad_norm": 0.10797891765832901, "learning_rate": 7.034043042198157e-05, "loss": 1.5958, "step": 151744 }, { "epoch": 0.2995012949033152, "grad_norm": 0.11336114257574081, "learning_rate": 7.033409019761688e-05, "loss": 1.6104, "step": 151776 }, { "epoch": 0.29956444086471157, "grad_norm": 0.10820356011390686, "learning_rate": 7.032774997325218e-05, "loss": 1.6075, "step": 151808 }, { "epoch": 0.299627586826108, "grad_norm": 0.10891455411911011, "learning_rate": 7.03214097488875e-05, "loss": 1.6026, "step": 151840 }, { "epoch": 0.29969073278750447, "grad_norm": 0.10543946176767349, "learning_rate": 7.03150695245228e-05, "loss": 1.5985, "step": 151872 }, { "epoch": 0.2997538787489009, "grad_norm": 0.1041947528719902, "learning_rate": 7.030872930015811e-05, "loss": 1.5802, "step": 151904 }, { "epoch": 0.29981702471029736, "grad_norm": 0.10467489063739777, "learning_rate": 7.030238907579341e-05, "loss": 1.5884, "step": 151936 }, { "epoch": 0.2998801706716938, "grad_norm": 0.11704521626234055, "learning_rate": 7.029604885142873e-05, "loss": 1.5988, "step": 151968 }, { "epoch": 0.2999433166330902, "grad_norm": 0.10920777171850204, "learning_rate": 7.028970862706404e-05, "loss": 1.5922, "step": 152000 }, { "epoch": 0.30000646259448666, "grad_norm": 0.10438364744186401, "learning_rate": 7.028336840269936e-05, "loss": 1.5932, "step": 152032 }, { "epoch": 0.3000696085558831, "grad_norm": 0.1106170192360878, "learning_rate": 7.027702817833467e-05, "loss": 1.601, "step": 152064 }, { "epoch": 0.30013275451727955, "grad_norm": 0.10635095089673996, "learning_rate": 7.027068795396997e-05, "loss": 1.6003, "step": 152096 }, { "epoch": 0.300195900478676, "grad_norm": 0.11621138453483582, "learning_rate": 7.026434772960529e-05, "loss": 1.6044, "step": 152128 }, { "epoch": 0.30025904644007245, "grad_norm": 0.11256194114685059, "learning_rate": 7.02580075052406e-05, "loss": 1.6098, "step": 152160 }, { "epoch": 0.30032219240146885, "grad_norm": 0.11211389303207397, "learning_rate": 7.025166728087592e-05, "loss": 1.5969, "step": 152192 }, { "epoch": 0.3003853383628653, "grad_norm": 0.10912995040416718, "learning_rate": 7.024532705651122e-05, "loss": 1.606, "step": 152224 }, { "epoch": 0.30044848432426174, "grad_norm": 0.12073302268981934, "learning_rate": 7.023898683214653e-05, "loss": 1.5912, "step": 152256 }, { "epoch": 0.3005116302856582, "grad_norm": 0.11587313562631607, "learning_rate": 7.023264660778183e-05, "loss": 1.6032, "step": 152288 }, { "epoch": 0.30057477624705464, "grad_norm": 0.11150822043418884, "learning_rate": 7.022630638341715e-05, "loss": 1.5906, "step": 152320 }, { "epoch": 0.3006379222084511, "grad_norm": 0.11763504892587662, "learning_rate": 7.021996615905245e-05, "loss": 1.5941, "step": 152352 }, { "epoch": 0.3007010681698475, "grad_norm": 0.10895249992609024, "learning_rate": 7.021362593468776e-05, "loss": 1.6021, "step": 152384 }, { "epoch": 0.30076421413124393, "grad_norm": 0.10470119118690491, "learning_rate": 7.020728571032308e-05, "loss": 1.6011, "step": 152416 }, { "epoch": 0.3008273600926404, "grad_norm": 0.11427845060825348, "learning_rate": 7.020094548595839e-05, "loss": 1.6003, "step": 152448 }, { "epoch": 0.30089050605403683, "grad_norm": 0.10430949926376343, "learning_rate": 7.019460526159369e-05, "loss": 1.6075, "step": 152480 }, { "epoch": 0.3009536520154333, "grad_norm": 0.11266857385635376, "learning_rate": 7.0188265037229e-05, "loss": 1.6047, "step": 152512 }, { "epoch": 0.30101679797682973, "grad_norm": 0.1131991520524025, "learning_rate": 7.018192481286432e-05, "loss": 1.6033, "step": 152544 }, { "epoch": 0.3010799439382261, "grad_norm": 0.11302390694618225, "learning_rate": 7.017558458849964e-05, "loss": 1.6041, "step": 152576 }, { "epoch": 0.3011430898996226, "grad_norm": 0.11738061904907227, "learning_rate": 7.016924436413494e-05, "loss": 1.6032, "step": 152608 }, { "epoch": 0.301206235861019, "grad_norm": 0.10957682877779007, "learning_rate": 7.016290413977025e-05, "loss": 1.5962, "step": 152640 }, { "epoch": 0.30126938182241547, "grad_norm": 0.11225590854883194, "learning_rate": 7.015656391540557e-05, "loss": 1.5921, "step": 152672 }, { "epoch": 0.3013325277838119, "grad_norm": 0.11029886454343796, "learning_rate": 7.015022369104087e-05, "loss": 1.6007, "step": 152704 }, { "epoch": 0.30139567374520837, "grad_norm": 0.1058514267206192, "learning_rate": 7.014388346667618e-05, "loss": 1.6044, "step": 152736 }, { "epoch": 0.30145881970660476, "grad_norm": 0.10820958018302917, "learning_rate": 7.013754324231148e-05, "loss": 1.6066, "step": 152768 }, { "epoch": 0.3015219656680012, "grad_norm": 0.10125933587551117, "learning_rate": 7.01312030179468e-05, "loss": 1.5946, "step": 152800 }, { "epoch": 0.30158511162939766, "grad_norm": 0.109490305185318, "learning_rate": 7.012486279358211e-05, "loss": 1.6039, "step": 152832 }, { "epoch": 0.3016482575907941, "grad_norm": 0.1123470664024353, "learning_rate": 7.011852256921743e-05, "loss": 1.6029, "step": 152864 }, { "epoch": 0.30171140355219056, "grad_norm": 0.10927826911211014, "learning_rate": 7.011218234485273e-05, "loss": 1.6048, "step": 152896 }, { "epoch": 0.301774549513587, "grad_norm": 0.10237983614206314, "learning_rate": 7.010584212048804e-05, "loss": 1.594, "step": 152928 }, { "epoch": 0.3018376954749834, "grad_norm": 0.11437680572271347, "learning_rate": 7.009950189612336e-05, "loss": 1.5965, "step": 152960 }, { "epoch": 0.30190084143637985, "grad_norm": 0.10745229572057724, "learning_rate": 7.009316167175867e-05, "loss": 1.6127, "step": 152992 }, { "epoch": 0.3019639873977763, "grad_norm": 0.10919062793254852, "learning_rate": 7.008682144739397e-05, "loss": 1.5869, "step": 153024 }, { "epoch": 0.30202713335917275, "grad_norm": 0.1046941876411438, "learning_rate": 7.008048122302929e-05, "loss": 1.5856, "step": 153056 }, { "epoch": 0.3020902793205692, "grad_norm": 0.10780448466539383, "learning_rate": 7.00741409986646e-05, "loss": 1.5898, "step": 153088 }, { "epoch": 0.30215342528196565, "grad_norm": 0.10609892755746841, "learning_rate": 7.00678007742999e-05, "loss": 1.5981, "step": 153120 }, { "epoch": 0.30221657124336204, "grad_norm": 0.10885103791952133, "learning_rate": 7.00614605499352e-05, "loss": 1.5931, "step": 153152 }, { "epoch": 0.3022797172047585, "grad_norm": 0.10555577278137207, "learning_rate": 7.005512032557052e-05, "loss": 1.5907, "step": 153184 }, { "epoch": 0.30234286316615494, "grad_norm": 0.10858432948589325, "learning_rate": 7.004878010120583e-05, "loss": 1.5931, "step": 153216 }, { "epoch": 0.3024060091275514, "grad_norm": 0.12543374300003052, "learning_rate": 7.004243987684115e-05, "loss": 1.5937, "step": 153248 }, { "epoch": 0.30246915508894784, "grad_norm": 0.10381802171468735, "learning_rate": 7.003609965247645e-05, "loss": 1.5938, "step": 153280 }, { "epoch": 0.3025323010503443, "grad_norm": 0.12593108415603638, "learning_rate": 7.002975942811176e-05, "loss": 1.5886, "step": 153312 }, { "epoch": 0.3025954470117407, "grad_norm": 0.12112244218587875, "learning_rate": 7.002341920374708e-05, "loss": 1.6135, "step": 153344 }, { "epoch": 0.30265859297313713, "grad_norm": 0.10223555564880371, "learning_rate": 7.001707897938239e-05, "loss": 1.5847, "step": 153376 }, { "epoch": 0.3027217389345336, "grad_norm": 0.11031569540500641, "learning_rate": 7.00107387550177e-05, "loss": 1.5877, "step": 153408 }, { "epoch": 0.30278488489593003, "grad_norm": 0.10166694968938828, "learning_rate": 7.000439853065301e-05, "loss": 1.6019, "step": 153440 }, { "epoch": 0.3028480308573265, "grad_norm": 0.10694776475429535, "learning_rate": 6.999805830628832e-05, "loss": 1.6039, "step": 153472 }, { "epoch": 0.3029111768187229, "grad_norm": 0.11236537992954254, "learning_rate": 6.999171808192364e-05, "loss": 1.602, "step": 153504 }, { "epoch": 0.3029743227801194, "grad_norm": 0.11643679440021515, "learning_rate": 6.998537785755894e-05, "loss": 1.6087, "step": 153536 }, { "epoch": 0.30303746874151577, "grad_norm": 0.1105060875415802, "learning_rate": 6.997903763319424e-05, "loss": 1.6024, "step": 153568 }, { "epoch": 0.3031006147029122, "grad_norm": 0.11364857852458954, "learning_rate": 6.997269740882955e-05, "loss": 1.6041, "step": 153600 }, { "epoch": 0.30316376066430867, "grad_norm": 0.11282741278409958, "learning_rate": 6.996635718446487e-05, "loss": 1.6065, "step": 153632 }, { "epoch": 0.3032269066257051, "grad_norm": 0.11782099306583405, "learning_rate": 6.996001696010018e-05, "loss": 1.6092, "step": 153664 }, { "epoch": 0.30329005258710157, "grad_norm": 0.10855941474437714, "learning_rate": 6.995367673573548e-05, "loss": 1.6017, "step": 153696 }, { "epoch": 0.303353198548498, "grad_norm": 0.11252662539482117, "learning_rate": 6.99473365113708e-05, "loss": 1.6067, "step": 153728 }, { "epoch": 0.3034163445098944, "grad_norm": 0.11697684228420258, "learning_rate": 6.994099628700611e-05, "loss": 1.5947, "step": 153760 }, { "epoch": 0.30347949047129086, "grad_norm": 0.10977371037006378, "learning_rate": 6.993465606264143e-05, "loss": 1.6086, "step": 153792 }, { "epoch": 0.3035426364326873, "grad_norm": 0.10438380390405655, "learning_rate": 6.992831583827673e-05, "loss": 1.5985, "step": 153824 }, { "epoch": 0.30360578239408376, "grad_norm": 0.11717565357685089, "learning_rate": 6.992197561391204e-05, "loss": 1.607, "step": 153856 }, { "epoch": 0.3036689283554802, "grad_norm": 0.11341336369514465, "learning_rate": 6.991563538954736e-05, "loss": 1.5939, "step": 153888 }, { "epoch": 0.30373207431687665, "grad_norm": 0.10679785162210464, "learning_rate": 6.990929516518266e-05, "loss": 1.5883, "step": 153920 }, { "epoch": 0.30379522027827305, "grad_norm": 0.100877545773983, "learning_rate": 6.990295494081797e-05, "loss": 1.5899, "step": 153952 }, { "epoch": 0.3038583662396695, "grad_norm": 0.10690692812204361, "learning_rate": 6.989661471645327e-05, "loss": 1.614, "step": 153984 }, { "epoch": 0.30392151220106595, "grad_norm": 0.10297846049070358, "learning_rate": 6.989027449208859e-05, "loss": 1.6168, "step": 154016 }, { "epoch": 0.3039846581624624, "grad_norm": 0.10769422352313995, "learning_rate": 6.98839342677239e-05, "loss": 1.5998, "step": 154048 }, { "epoch": 0.30404780412385884, "grad_norm": 0.11395012587308884, "learning_rate": 6.987759404335922e-05, "loss": 1.5858, "step": 154080 }, { "epoch": 0.3041109500852553, "grad_norm": 0.110221728682518, "learning_rate": 6.987125381899452e-05, "loss": 1.6014, "step": 154112 }, { "epoch": 0.3041740960466517, "grad_norm": 0.10333923995494843, "learning_rate": 6.986491359462983e-05, "loss": 1.5963, "step": 154144 }, { "epoch": 0.30423724200804814, "grad_norm": 0.11066845059394836, "learning_rate": 6.985857337026515e-05, "loss": 1.5952, "step": 154176 }, { "epoch": 0.3043003879694446, "grad_norm": 0.1154382973909378, "learning_rate": 6.985223314590046e-05, "loss": 1.5841, "step": 154208 }, { "epoch": 0.30436353393084103, "grad_norm": 0.10902144014835358, "learning_rate": 6.984589292153576e-05, "loss": 1.5901, "step": 154240 }, { "epoch": 0.3044266798922375, "grad_norm": 0.1055220514535904, "learning_rate": 6.983955269717108e-05, "loss": 1.6, "step": 154272 }, { "epoch": 0.30448982585363393, "grad_norm": 0.10117088258266449, "learning_rate": 6.983321247280639e-05, "loss": 1.5832, "step": 154304 }, { "epoch": 0.3045529718150303, "grad_norm": 0.12125889211893082, "learning_rate": 6.982687224844169e-05, "loss": 1.5974, "step": 154336 }, { "epoch": 0.3046161177764268, "grad_norm": 0.11177989840507507, "learning_rate": 6.982053202407701e-05, "loss": 1.5952, "step": 154368 }, { "epoch": 0.3046792637378232, "grad_norm": 0.10721558332443237, "learning_rate": 6.981419179971231e-05, "loss": 1.6077, "step": 154400 }, { "epoch": 0.3047424096992197, "grad_norm": 0.11649288237094879, "learning_rate": 6.980785157534762e-05, "loss": 1.6024, "step": 154432 }, { "epoch": 0.3048055556606161, "grad_norm": 0.11103370040655136, "learning_rate": 6.980151135098294e-05, "loss": 1.5923, "step": 154464 }, { "epoch": 0.3048687016220126, "grad_norm": 0.10063661634922028, "learning_rate": 6.979517112661824e-05, "loss": 1.6024, "step": 154496 }, { "epoch": 0.30493184758340897, "grad_norm": 0.10404430329799652, "learning_rate": 6.978883090225355e-05, "loss": 1.6124, "step": 154528 }, { "epoch": 0.3049949935448054, "grad_norm": 0.11656454205513, "learning_rate": 6.978249067788887e-05, "loss": 1.6003, "step": 154560 }, { "epoch": 0.30505813950620186, "grad_norm": 0.10750927776098251, "learning_rate": 6.977615045352418e-05, "loss": 1.5853, "step": 154592 }, { "epoch": 0.3051212854675983, "grad_norm": 0.11315322667360306, "learning_rate": 6.976981022915948e-05, "loss": 1.5985, "step": 154624 }, { "epoch": 0.30518443142899476, "grad_norm": 0.10713580995798111, "learning_rate": 6.97634700047948e-05, "loss": 1.5903, "step": 154656 }, { "epoch": 0.3052475773903912, "grad_norm": 0.10717027634382248, "learning_rate": 6.975712978043011e-05, "loss": 1.5922, "step": 154688 }, { "epoch": 0.3053107233517876, "grad_norm": 0.11209490150213242, "learning_rate": 6.975078955606543e-05, "loss": 1.5918, "step": 154720 }, { "epoch": 0.30537386931318405, "grad_norm": 0.10829471796751022, "learning_rate": 6.974444933170073e-05, "loss": 1.5994, "step": 154752 }, { "epoch": 0.3054370152745805, "grad_norm": 0.11069749295711517, "learning_rate": 6.973810910733604e-05, "loss": 1.6018, "step": 154784 }, { "epoch": 0.30550016123597695, "grad_norm": 0.11267156153917313, "learning_rate": 6.973176888297134e-05, "loss": 1.5934, "step": 154816 }, { "epoch": 0.3055633071973734, "grad_norm": 0.11649084091186523, "learning_rate": 6.972542865860666e-05, "loss": 1.5759, "step": 154848 }, { "epoch": 0.30562645315876985, "grad_norm": 0.1033872663974762, "learning_rate": 6.971908843424197e-05, "loss": 1.5999, "step": 154880 }, { "epoch": 0.30568959912016624, "grad_norm": 0.10910879075527191, "learning_rate": 6.971274820987727e-05, "loss": 1.591, "step": 154912 }, { "epoch": 0.3057527450815627, "grad_norm": 0.1053755134344101, "learning_rate": 6.970640798551259e-05, "loss": 1.5868, "step": 154944 }, { "epoch": 0.30581589104295914, "grad_norm": 0.11260037869215012, "learning_rate": 6.97000677611479e-05, "loss": 1.6118, "step": 154976 }, { "epoch": 0.3058790370043556, "grad_norm": 0.10938689112663269, "learning_rate": 6.969372753678322e-05, "loss": 1.6104, "step": 155008 }, { "epoch": 0.30594218296575204, "grad_norm": 0.10422463715076447, "learning_rate": 6.968738731241852e-05, "loss": 1.598, "step": 155040 }, { "epoch": 0.3060053289271485, "grad_norm": 0.10344479233026505, "learning_rate": 6.968104708805383e-05, "loss": 1.6079, "step": 155072 }, { "epoch": 0.30606847488854494, "grad_norm": 0.11008403450250626, "learning_rate": 6.967470686368915e-05, "loss": 1.6025, "step": 155104 }, { "epoch": 0.30613162084994133, "grad_norm": 0.11175358295440674, "learning_rate": 6.966836663932446e-05, "loss": 1.5771, "step": 155136 }, { "epoch": 0.3061947668113378, "grad_norm": 0.10845853388309479, "learning_rate": 6.966202641495976e-05, "loss": 1.5986, "step": 155168 }, { "epoch": 0.30625791277273423, "grad_norm": 0.11384795606136322, "learning_rate": 6.965568619059508e-05, "loss": 1.5969, "step": 155200 }, { "epoch": 0.3063210587341307, "grad_norm": 0.11259113997220993, "learning_rate": 6.964934596623038e-05, "loss": 1.5955, "step": 155232 }, { "epoch": 0.30638420469552713, "grad_norm": 0.10678324103355408, "learning_rate": 6.964300574186569e-05, "loss": 1.598, "step": 155264 }, { "epoch": 0.3064473506569236, "grad_norm": 0.11580517888069153, "learning_rate": 6.9636665517501e-05, "loss": 1.5881, "step": 155296 }, { "epoch": 0.30651049661831997, "grad_norm": 0.1062786802649498, "learning_rate": 6.963032529313631e-05, "loss": 1.5901, "step": 155328 }, { "epoch": 0.3065736425797164, "grad_norm": 0.1118519976735115, "learning_rate": 6.962398506877162e-05, "loss": 1.5993, "step": 155360 }, { "epoch": 0.30663678854111287, "grad_norm": 0.10496975481510162, "learning_rate": 6.961764484440694e-05, "loss": 1.5917, "step": 155392 }, { "epoch": 0.3066999345025093, "grad_norm": 0.11165449023246765, "learning_rate": 6.961130462004225e-05, "loss": 1.5978, "step": 155424 }, { "epoch": 0.30676308046390577, "grad_norm": 0.11271575838327408, "learning_rate": 6.960496439567755e-05, "loss": 1.5939, "step": 155456 }, { "epoch": 0.3068262264253022, "grad_norm": 0.11157065629959106, "learning_rate": 6.959862417131287e-05, "loss": 1.5995, "step": 155488 }, { "epoch": 0.3068893723866986, "grad_norm": 0.10466542840003967, "learning_rate": 6.959228394694818e-05, "loss": 1.6123, "step": 155520 }, { "epoch": 0.30695251834809506, "grad_norm": 0.10684055089950562, "learning_rate": 6.95859437225835e-05, "loss": 1.5835, "step": 155552 }, { "epoch": 0.3070156643094915, "grad_norm": 0.11516595631837845, "learning_rate": 6.95796034982188e-05, "loss": 1.6044, "step": 155584 }, { "epoch": 0.30707881027088796, "grad_norm": 0.10631965845823288, "learning_rate": 6.95732632738541e-05, "loss": 1.6077, "step": 155616 }, { "epoch": 0.3071419562322844, "grad_norm": 0.10585325956344604, "learning_rate": 6.956692304948941e-05, "loss": 1.5988, "step": 155648 }, { "epoch": 0.30720510219368086, "grad_norm": 0.10833197087049484, "learning_rate": 6.956058282512473e-05, "loss": 1.5951, "step": 155680 }, { "epoch": 0.30726824815507725, "grad_norm": 0.11436755955219269, "learning_rate": 6.955424260076003e-05, "loss": 1.6001, "step": 155712 }, { "epoch": 0.3073313941164737, "grad_norm": 0.10563050210475922, "learning_rate": 6.954790237639534e-05, "loss": 1.5977, "step": 155744 }, { "epoch": 0.30739454007787015, "grad_norm": 0.10954467207193375, "learning_rate": 6.954156215203066e-05, "loss": 1.5903, "step": 155776 }, { "epoch": 0.3074576860392666, "grad_norm": 0.1087346225976944, "learning_rate": 6.953522192766597e-05, "loss": 1.6036, "step": 155808 }, { "epoch": 0.30752083200066305, "grad_norm": 0.1167093887925148, "learning_rate": 6.952888170330127e-05, "loss": 1.5952, "step": 155840 }, { "epoch": 0.3075839779620595, "grad_norm": 0.11237960308790207, "learning_rate": 6.952254147893659e-05, "loss": 1.597, "step": 155872 }, { "epoch": 0.3076471239234559, "grad_norm": 0.10673242807388306, "learning_rate": 6.95162012545719e-05, "loss": 1.5953, "step": 155904 }, { "epoch": 0.30771026988485234, "grad_norm": 0.11010044068098068, "learning_rate": 6.950986103020722e-05, "loss": 1.5848, "step": 155936 }, { "epoch": 0.3077734158462488, "grad_norm": 0.1044807955622673, "learning_rate": 6.950352080584252e-05, "loss": 1.5741, "step": 155968 }, { "epoch": 0.30783656180764524, "grad_norm": 0.10707549750804901, "learning_rate": 6.949718058147783e-05, "loss": 1.5924, "step": 156000 }, { "epoch": 0.3078997077690417, "grad_norm": 0.10744640976190567, "learning_rate": 6.949084035711313e-05, "loss": 1.6009, "step": 156032 }, { "epoch": 0.30796285373043814, "grad_norm": 0.10549584776163101, "learning_rate": 6.948450013274845e-05, "loss": 1.5933, "step": 156064 }, { "epoch": 0.30802599969183453, "grad_norm": 0.1125851422548294, "learning_rate": 6.947815990838376e-05, "loss": 1.5983, "step": 156096 }, { "epoch": 0.308089145653231, "grad_norm": 0.11948680132627487, "learning_rate": 6.947181968401906e-05, "loss": 1.597, "step": 156128 }, { "epoch": 0.3081522916146274, "grad_norm": 0.10488571226596832, "learning_rate": 6.946547945965438e-05, "loss": 1.5984, "step": 156160 }, { "epoch": 0.3082154375760239, "grad_norm": 0.10445652157068253, "learning_rate": 6.94591392352897e-05, "loss": 1.5849, "step": 156192 }, { "epoch": 0.3082785835374203, "grad_norm": 0.11123156547546387, "learning_rate": 6.945279901092501e-05, "loss": 1.5971, "step": 156224 }, { "epoch": 0.3083417294988168, "grad_norm": 0.1111505851149559, "learning_rate": 6.944645878656031e-05, "loss": 1.5985, "step": 156256 }, { "epoch": 0.30840487546021317, "grad_norm": 0.10809691250324249, "learning_rate": 6.944011856219562e-05, "loss": 1.5918, "step": 156288 }, { "epoch": 0.3084680214216096, "grad_norm": 0.10672581195831299, "learning_rate": 6.943377833783094e-05, "loss": 1.5942, "step": 156320 }, { "epoch": 0.30853116738300607, "grad_norm": 0.10128645598888397, "learning_rate": 6.942743811346625e-05, "loss": 1.5874, "step": 156352 }, { "epoch": 0.3085943133444025, "grad_norm": 0.1070328876376152, "learning_rate": 6.942109788910155e-05, "loss": 1.5951, "step": 156384 }, { "epoch": 0.30865745930579896, "grad_norm": 0.1111120730638504, "learning_rate": 6.941475766473687e-05, "loss": 1.6038, "step": 156416 }, { "epoch": 0.3087206052671954, "grad_norm": 0.10825317353010178, "learning_rate": 6.940841744037217e-05, "loss": 1.6056, "step": 156448 }, { "epoch": 0.3087837512285918, "grad_norm": 0.11818309873342514, "learning_rate": 6.940207721600748e-05, "loss": 1.5906, "step": 156480 }, { "epoch": 0.30884689718998826, "grad_norm": 0.11527826637029648, "learning_rate": 6.939573699164278e-05, "loss": 1.5933, "step": 156512 }, { "epoch": 0.3089100431513847, "grad_norm": 0.11291716992855072, "learning_rate": 6.93893967672781e-05, "loss": 1.5826, "step": 156544 }, { "epoch": 0.30897318911278115, "grad_norm": 0.10666730254888535, "learning_rate": 6.938305654291341e-05, "loss": 1.586, "step": 156576 }, { "epoch": 0.3090363350741776, "grad_norm": 0.10332287847995758, "learning_rate": 6.937671631854873e-05, "loss": 1.595, "step": 156608 }, { "epoch": 0.30909948103557405, "grad_norm": 0.1033509150147438, "learning_rate": 6.937037609418403e-05, "loss": 1.6067, "step": 156640 }, { "epoch": 0.30916262699697045, "grad_norm": 0.10919076949357986, "learning_rate": 6.936403586981934e-05, "loss": 1.604, "step": 156672 }, { "epoch": 0.3092257729583669, "grad_norm": 0.10804831236600876, "learning_rate": 6.935769564545466e-05, "loss": 1.5966, "step": 156704 }, { "epoch": 0.30928891891976334, "grad_norm": 0.11538102477788925, "learning_rate": 6.935135542108997e-05, "loss": 1.5889, "step": 156736 }, { "epoch": 0.3093520648811598, "grad_norm": 0.10929667949676514, "learning_rate": 6.934501519672529e-05, "loss": 1.6088, "step": 156768 }, { "epoch": 0.30941521084255624, "grad_norm": 0.10778364539146423, "learning_rate": 6.933867497236059e-05, "loss": 1.6032, "step": 156800 }, { "epoch": 0.3094783568039527, "grad_norm": 0.12172455340623856, "learning_rate": 6.93323347479959e-05, "loss": 1.5986, "step": 156832 }, { "epoch": 0.30954150276534914, "grad_norm": 0.10607221722602844, "learning_rate": 6.93259945236312e-05, "loss": 1.6037, "step": 156864 }, { "epoch": 0.30960464872674553, "grad_norm": 0.10583367943763733, "learning_rate": 6.931965429926652e-05, "loss": 1.5868, "step": 156896 }, { "epoch": 0.309667794688142, "grad_norm": 0.10794432461261749, "learning_rate": 6.931331407490182e-05, "loss": 1.5992, "step": 156928 }, { "epoch": 0.30973094064953843, "grad_norm": 0.10778047889471054, "learning_rate": 6.930697385053713e-05, "loss": 1.5858, "step": 156960 }, { "epoch": 0.3097940866109349, "grad_norm": 0.11203970015048981, "learning_rate": 6.930063362617245e-05, "loss": 1.596, "step": 156992 }, { "epoch": 0.30985723257233133, "grad_norm": 0.11195708066225052, "learning_rate": 6.929429340180776e-05, "loss": 1.6011, "step": 157024 }, { "epoch": 0.3099203785337278, "grad_norm": 0.11102274805307388, "learning_rate": 6.928795317744306e-05, "loss": 1.596, "step": 157056 }, { "epoch": 0.3099835244951242, "grad_norm": 0.1033940240740776, "learning_rate": 6.928161295307838e-05, "loss": 1.5889, "step": 157088 }, { "epoch": 0.3100466704565206, "grad_norm": 0.11119741946458817, "learning_rate": 6.92752727287137e-05, "loss": 1.5916, "step": 157120 }, { "epoch": 0.3101098164179171, "grad_norm": 0.11409356445074081, "learning_rate": 6.926893250434901e-05, "loss": 1.6053, "step": 157152 }, { "epoch": 0.3101729623793135, "grad_norm": 0.1127903163433075, "learning_rate": 6.926259227998431e-05, "loss": 1.5831, "step": 157184 }, { "epoch": 0.31023610834070997, "grad_norm": 0.11782166361808777, "learning_rate": 6.925625205561962e-05, "loss": 1.595, "step": 157216 }, { "epoch": 0.3102992543021064, "grad_norm": 0.11755773425102234, "learning_rate": 6.924991183125494e-05, "loss": 1.6002, "step": 157248 }, { "epoch": 0.3103624002635028, "grad_norm": 0.10300610214471817, "learning_rate": 6.924357160689024e-05, "loss": 1.5965, "step": 157280 }, { "epoch": 0.31042554622489926, "grad_norm": 0.10954927653074265, "learning_rate": 6.923723138252554e-05, "loss": 1.5879, "step": 157312 }, { "epoch": 0.3104886921862957, "grad_norm": 0.10760588198900223, "learning_rate": 6.923089115816086e-05, "loss": 1.5973, "step": 157344 }, { "epoch": 0.31055183814769216, "grad_norm": 0.1067931205034256, "learning_rate": 6.922455093379617e-05, "loss": 1.5779, "step": 157376 }, { "epoch": 0.3106149841090886, "grad_norm": 0.1142549142241478, "learning_rate": 6.921821070943148e-05, "loss": 1.5802, "step": 157408 }, { "epoch": 0.31067813007048506, "grad_norm": 0.11532670259475708, "learning_rate": 6.92118704850668e-05, "loss": 1.6029, "step": 157440 }, { "epoch": 0.31074127603188145, "grad_norm": 0.10625465214252472, "learning_rate": 6.92055302607021e-05, "loss": 1.5931, "step": 157472 }, { "epoch": 0.3108044219932779, "grad_norm": 0.10834839940071106, "learning_rate": 6.919919003633741e-05, "loss": 1.6007, "step": 157504 }, { "epoch": 0.31086756795467435, "grad_norm": 0.11619570106267929, "learning_rate": 6.919284981197273e-05, "loss": 1.5911, "step": 157536 }, { "epoch": 0.3109307139160708, "grad_norm": 0.1049533560872078, "learning_rate": 6.918650958760804e-05, "loss": 1.5836, "step": 157568 }, { "epoch": 0.31099385987746725, "grad_norm": 0.10432977974414825, "learning_rate": 6.918016936324334e-05, "loss": 1.5901, "step": 157600 }, { "epoch": 0.3110570058388637, "grad_norm": 0.11452938616275787, "learning_rate": 6.917382913887866e-05, "loss": 1.6011, "step": 157632 }, { "epoch": 0.3111201518002601, "grad_norm": 0.10731777548789978, "learning_rate": 6.916748891451397e-05, "loss": 1.5998, "step": 157664 }, { "epoch": 0.31118329776165654, "grad_norm": 0.1118592843413353, "learning_rate": 6.916114869014927e-05, "loss": 1.6049, "step": 157696 }, { "epoch": 0.311246443723053, "grad_norm": 0.10823763161897659, "learning_rate": 6.915480846578458e-05, "loss": 1.596, "step": 157728 }, { "epoch": 0.31130958968444944, "grad_norm": 0.11033618450164795, "learning_rate": 6.914846824141989e-05, "loss": 1.5786, "step": 157760 }, { "epoch": 0.3113727356458459, "grad_norm": 0.11134444177150726, "learning_rate": 6.91421280170552e-05, "loss": 1.5877, "step": 157792 }, { "epoch": 0.31143588160724234, "grad_norm": 0.10570427775382996, "learning_rate": 6.913578779269052e-05, "loss": 1.5932, "step": 157824 }, { "epoch": 0.31149902756863873, "grad_norm": 0.10497938096523285, "learning_rate": 6.912944756832582e-05, "loss": 1.5891, "step": 157856 }, { "epoch": 0.3115621735300352, "grad_norm": 0.10699564963579178, "learning_rate": 6.912310734396113e-05, "loss": 1.5875, "step": 157888 }, { "epoch": 0.31162531949143163, "grad_norm": 0.11236100643873215, "learning_rate": 6.911676711959645e-05, "loss": 1.5887, "step": 157920 }, { "epoch": 0.3116884654528281, "grad_norm": 0.12021209299564362, "learning_rate": 6.911042689523176e-05, "loss": 1.602, "step": 157952 }, { "epoch": 0.3117516114142245, "grad_norm": 0.11001028120517731, "learning_rate": 6.910408667086707e-05, "loss": 1.6039, "step": 157984 }, { "epoch": 0.311814757375621, "grad_norm": 0.114458367228508, "learning_rate": 6.909774644650238e-05, "loss": 1.6109, "step": 158016 }, { "epoch": 0.31187790333701737, "grad_norm": 0.10677354037761688, "learning_rate": 6.90914062221377e-05, "loss": 1.5957, "step": 158048 }, { "epoch": 0.3119410492984138, "grad_norm": 0.11993306130170822, "learning_rate": 6.9085065997773e-05, "loss": 1.6017, "step": 158080 }, { "epoch": 0.31200419525981027, "grad_norm": 0.11413183063268661, "learning_rate": 6.907872577340831e-05, "loss": 1.6018, "step": 158112 }, { "epoch": 0.3120673412212067, "grad_norm": 0.11328325420618057, "learning_rate": 6.907238554904361e-05, "loss": 1.597, "step": 158144 }, { "epoch": 0.31213048718260317, "grad_norm": 0.10782735794782639, "learning_rate": 6.906604532467893e-05, "loss": 1.5913, "step": 158176 }, { "epoch": 0.3121936331439996, "grad_norm": 0.11115462332963943, "learning_rate": 6.905970510031424e-05, "loss": 1.5905, "step": 158208 }, { "epoch": 0.312256779105396, "grad_norm": 0.10908707231283188, "learning_rate": 6.905336487594955e-05, "loss": 1.5783, "step": 158240 }, { "epoch": 0.31231992506679246, "grad_norm": 0.10839131474494934, "learning_rate": 6.904702465158486e-05, "loss": 1.5801, "step": 158272 }, { "epoch": 0.3123830710281889, "grad_norm": 0.10044689476490021, "learning_rate": 6.904068442722017e-05, "loss": 1.5916, "step": 158304 }, { "epoch": 0.31244621698958536, "grad_norm": 0.11105819046497345, "learning_rate": 6.903434420285548e-05, "loss": 1.5897, "step": 158336 }, { "epoch": 0.3125093629509818, "grad_norm": 0.1068510115146637, "learning_rate": 6.90280039784908e-05, "loss": 1.5842, "step": 158368 }, { "epoch": 0.31257250891237826, "grad_norm": 0.10635650157928467, "learning_rate": 6.90216637541261e-05, "loss": 1.5992, "step": 158400 }, { "epoch": 0.31263565487377465, "grad_norm": 0.10454981029033661, "learning_rate": 6.901532352976141e-05, "loss": 1.6038, "step": 158432 }, { "epoch": 0.3126988008351711, "grad_norm": 0.11232787370681763, "learning_rate": 6.900898330539673e-05, "loss": 1.6034, "step": 158464 }, { "epoch": 0.31276194679656755, "grad_norm": 0.11920122802257538, "learning_rate": 6.900264308103203e-05, "loss": 1.5945, "step": 158496 }, { "epoch": 0.312825092757964, "grad_norm": 0.11414216458797455, "learning_rate": 6.899630285666734e-05, "loss": 1.5997, "step": 158528 }, { "epoch": 0.31288823871936045, "grad_norm": 0.1111496314406395, "learning_rate": 6.898996263230265e-05, "loss": 1.594, "step": 158560 }, { "epoch": 0.3129513846807569, "grad_norm": 0.11295372247695923, "learning_rate": 6.898362240793796e-05, "loss": 1.6024, "step": 158592 }, { "epoch": 0.31301453064215334, "grad_norm": 0.10866402834653854, "learning_rate": 6.897728218357327e-05, "loss": 1.5901, "step": 158624 }, { "epoch": 0.31307767660354974, "grad_norm": 0.11278446018695831, "learning_rate": 6.897094195920858e-05, "loss": 1.5909, "step": 158656 }, { "epoch": 0.3131408225649462, "grad_norm": 0.1106196865439415, "learning_rate": 6.896460173484389e-05, "loss": 1.5888, "step": 158688 }, { "epoch": 0.31320396852634264, "grad_norm": 0.1180410087108612, "learning_rate": 6.89582615104792e-05, "loss": 1.5948, "step": 158720 }, { "epoch": 0.3132671144877391, "grad_norm": 0.10209639370441437, "learning_rate": 6.895192128611452e-05, "loss": 1.5947, "step": 158752 }, { "epoch": 0.31333026044913553, "grad_norm": 0.10905079543590546, "learning_rate": 6.894558106174983e-05, "loss": 1.59, "step": 158784 }, { "epoch": 0.313393406410532, "grad_norm": 0.10796741396188736, "learning_rate": 6.893924083738514e-05, "loss": 1.5868, "step": 158816 }, { "epoch": 0.3134565523719284, "grad_norm": 0.11838201433420181, "learning_rate": 6.893290061302045e-05, "loss": 1.584, "step": 158848 }, { "epoch": 0.3135196983333248, "grad_norm": 0.11917586624622345, "learning_rate": 6.892656038865576e-05, "loss": 1.5853, "step": 158880 }, { "epoch": 0.3135828442947213, "grad_norm": 0.10489118099212646, "learning_rate": 6.892022016429107e-05, "loss": 1.597, "step": 158912 }, { "epoch": 0.3136459902561177, "grad_norm": 0.10635361820459366, "learning_rate": 6.891387993992638e-05, "loss": 1.6046, "step": 158944 }, { "epoch": 0.3137091362175142, "grad_norm": 0.1102752611041069, "learning_rate": 6.890753971556168e-05, "loss": 1.594, "step": 158976 }, { "epoch": 0.3137722821789106, "grad_norm": 0.10674628615379333, "learning_rate": 6.8901199491197e-05, "loss": 1.5869, "step": 159008 }, { "epoch": 0.313835428140307, "grad_norm": 0.11249663680791855, "learning_rate": 6.889485926683231e-05, "loss": 1.5928, "step": 159040 }, { "epoch": 0.31389857410170346, "grad_norm": 0.11040057986974716, "learning_rate": 6.888851904246761e-05, "loss": 1.5908, "step": 159072 }, { "epoch": 0.3139617200630999, "grad_norm": 0.11433102190494537, "learning_rate": 6.888217881810293e-05, "loss": 1.5853, "step": 159104 }, { "epoch": 0.31402486602449636, "grad_norm": 0.10093247145414352, "learning_rate": 6.887583859373824e-05, "loss": 1.5982, "step": 159136 }, { "epoch": 0.3140880119858928, "grad_norm": 0.10722556710243225, "learning_rate": 6.886949836937355e-05, "loss": 1.5972, "step": 159168 }, { "epoch": 0.31415115794728926, "grad_norm": 0.11650107800960541, "learning_rate": 6.886315814500886e-05, "loss": 1.5906, "step": 159200 }, { "epoch": 0.31421430390868565, "grad_norm": 0.11217987537384033, "learning_rate": 6.885681792064417e-05, "loss": 1.6025, "step": 159232 }, { "epoch": 0.3142774498700821, "grad_norm": 0.1122429296374321, "learning_rate": 6.885047769627948e-05, "loss": 1.5926, "step": 159264 }, { "epoch": 0.31434059583147855, "grad_norm": 0.11013469845056534, "learning_rate": 6.88441374719148e-05, "loss": 1.5922, "step": 159296 }, { "epoch": 0.314403741792875, "grad_norm": 0.10739205032587051, "learning_rate": 6.88377972475501e-05, "loss": 1.609, "step": 159328 }, { "epoch": 0.31446688775427145, "grad_norm": 0.11144138872623444, "learning_rate": 6.883145702318542e-05, "loss": 1.5856, "step": 159360 }, { "epoch": 0.3145300337156679, "grad_norm": 0.11184258759021759, "learning_rate": 6.882511679882072e-05, "loss": 1.5908, "step": 159392 }, { "epoch": 0.3145931796770643, "grad_norm": 0.11355500668287277, "learning_rate": 6.881877657445603e-05, "loss": 1.5909, "step": 159424 }, { "epoch": 0.31465632563846074, "grad_norm": 0.1178479790687561, "learning_rate": 6.881243635009135e-05, "loss": 1.5941, "step": 159456 }, { "epoch": 0.3147194715998572, "grad_norm": 0.11037848144769669, "learning_rate": 6.880609612572665e-05, "loss": 1.5923, "step": 159488 }, { "epoch": 0.31478261756125364, "grad_norm": 0.10712146013975143, "learning_rate": 6.879975590136196e-05, "loss": 1.5997, "step": 159520 }, { "epoch": 0.3148457635226501, "grad_norm": 0.111323781311512, "learning_rate": 6.879341567699728e-05, "loss": 1.6061, "step": 159552 }, { "epoch": 0.31490890948404654, "grad_norm": 0.11086145788431168, "learning_rate": 6.878707545263259e-05, "loss": 1.5827, "step": 159584 }, { "epoch": 0.31497205544544293, "grad_norm": 0.10983403027057648, "learning_rate": 6.878073522826789e-05, "loss": 1.5836, "step": 159616 }, { "epoch": 0.3150352014068394, "grad_norm": 0.10668952018022537, "learning_rate": 6.87743950039032e-05, "loss": 1.5942, "step": 159648 }, { "epoch": 0.31509834736823583, "grad_norm": 0.11072690039873123, "learning_rate": 6.876805477953852e-05, "loss": 1.6003, "step": 159680 }, { "epoch": 0.3151614933296323, "grad_norm": 0.11020134389400482, "learning_rate": 6.876171455517383e-05, "loss": 1.6023, "step": 159712 }, { "epoch": 0.31522463929102873, "grad_norm": 0.11055891215801239, "learning_rate": 6.875537433080914e-05, "loss": 1.6019, "step": 159744 }, { "epoch": 0.3152877852524252, "grad_norm": 0.10114159435033798, "learning_rate": 6.874903410644444e-05, "loss": 1.5761, "step": 159776 }, { "epoch": 0.3153509312138216, "grad_norm": 0.10959020256996155, "learning_rate": 6.874269388207975e-05, "loss": 1.5921, "step": 159808 }, { "epoch": 0.315414077175218, "grad_norm": 0.112167127430439, "learning_rate": 6.873635365771507e-05, "loss": 1.594, "step": 159840 }, { "epoch": 0.31547722313661447, "grad_norm": 0.11071991175413132, "learning_rate": 6.873001343335037e-05, "loss": 1.5917, "step": 159872 }, { "epoch": 0.3155403690980109, "grad_norm": 0.11651685833930969, "learning_rate": 6.872367320898568e-05, "loss": 1.5957, "step": 159904 }, { "epoch": 0.31560351505940737, "grad_norm": 0.1150650829076767, "learning_rate": 6.8717332984621e-05, "loss": 1.5979, "step": 159936 }, { "epoch": 0.3156666610208038, "grad_norm": 0.1077534481883049, "learning_rate": 6.871099276025631e-05, "loss": 1.5877, "step": 159968 }, { "epoch": 0.3157298069822002, "grad_norm": 0.11488234251737595, "learning_rate": 6.870465253589162e-05, "loss": 1.592, "step": 160000 }, { "epoch": 0.31579295294359666, "grad_norm": 0.11051345616579056, "learning_rate": 6.869831231152693e-05, "loss": 1.592, "step": 160032 }, { "epoch": 0.3158560989049931, "grad_norm": 0.10786715149879456, "learning_rate": 6.869197208716224e-05, "loss": 1.5975, "step": 160064 }, { "epoch": 0.31591924486638956, "grad_norm": 0.11121596395969391, "learning_rate": 6.868563186279756e-05, "loss": 1.5963, "step": 160096 }, { "epoch": 0.315982390827786, "grad_norm": 0.11501624435186386, "learning_rate": 6.867929163843287e-05, "loss": 1.5988, "step": 160128 }, { "epoch": 0.31604553678918246, "grad_norm": 0.11093879491090775, "learning_rate": 6.867295141406817e-05, "loss": 1.5823, "step": 160160 }, { "epoch": 0.3161086827505789, "grad_norm": 0.1130003109574318, "learning_rate": 6.866661118970347e-05, "loss": 1.6006, "step": 160192 }, { "epoch": 0.3161718287119753, "grad_norm": 0.11832395941019058, "learning_rate": 6.866027096533879e-05, "loss": 1.5877, "step": 160224 }, { "epoch": 0.31623497467337175, "grad_norm": 0.10775455832481384, "learning_rate": 6.86539307409741e-05, "loss": 1.5882, "step": 160256 }, { "epoch": 0.3162981206347682, "grad_norm": 0.11109977215528488, "learning_rate": 6.86475905166094e-05, "loss": 1.6057, "step": 160288 }, { "epoch": 0.31636126659616465, "grad_norm": 0.11257420480251312, "learning_rate": 6.864125029224472e-05, "loss": 1.5989, "step": 160320 }, { "epoch": 0.3164244125575611, "grad_norm": 0.10840550810098648, "learning_rate": 6.863491006788003e-05, "loss": 1.5926, "step": 160352 }, { "epoch": 0.31648755851895755, "grad_norm": 0.10533870756626129, "learning_rate": 6.862856984351535e-05, "loss": 1.613, "step": 160384 }, { "epoch": 0.31655070448035394, "grad_norm": 0.1078510731458664, "learning_rate": 6.862222961915065e-05, "loss": 1.6042, "step": 160416 }, { "epoch": 0.3166138504417504, "grad_norm": 0.10418468713760376, "learning_rate": 6.861588939478596e-05, "loss": 1.5873, "step": 160448 }, { "epoch": 0.31667699640314684, "grad_norm": 0.11282748728990555, "learning_rate": 6.860954917042128e-05, "loss": 1.5813, "step": 160480 }, { "epoch": 0.3167401423645433, "grad_norm": 0.10563211143016815, "learning_rate": 6.860320894605659e-05, "loss": 1.5971, "step": 160512 }, { "epoch": 0.31680328832593974, "grad_norm": 0.10779976844787598, "learning_rate": 6.859686872169189e-05, "loss": 1.6065, "step": 160544 }, { "epoch": 0.3168664342873362, "grad_norm": 0.10460280627012253, "learning_rate": 6.85905284973272e-05, "loss": 1.5941, "step": 160576 }, { "epoch": 0.3169295802487326, "grad_norm": 0.1153113842010498, "learning_rate": 6.858418827296251e-05, "loss": 1.5903, "step": 160608 }, { "epoch": 0.316992726210129, "grad_norm": 0.10854650288820267, "learning_rate": 6.857784804859782e-05, "loss": 1.6013, "step": 160640 }, { "epoch": 0.3170558721715255, "grad_norm": 0.10419408231973648, "learning_rate": 6.857150782423314e-05, "loss": 1.5905, "step": 160672 }, { "epoch": 0.3171190181329219, "grad_norm": 0.11376214772462845, "learning_rate": 6.856516759986844e-05, "loss": 1.598, "step": 160704 }, { "epoch": 0.3171821640943184, "grad_norm": 0.11283624172210693, "learning_rate": 6.855882737550375e-05, "loss": 1.6017, "step": 160736 }, { "epoch": 0.3172453100557148, "grad_norm": 0.11211321502923965, "learning_rate": 6.855248715113907e-05, "loss": 1.593, "step": 160768 }, { "epoch": 0.3173084560171112, "grad_norm": 0.108080193400383, "learning_rate": 6.854614692677438e-05, "loss": 1.6065, "step": 160800 }, { "epoch": 0.31737160197850767, "grad_norm": 0.1091194823384285, "learning_rate": 6.853980670240968e-05, "loss": 1.5901, "step": 160832 }, { "epoch": 0.3174347479399041, "grad_norm": 0.10642543435096741, "learning_rate": 6.8533466478045e-05, "loss": 1.6039, "step": 160864 }, { "epoch": 0.31749789390130057, "grad_norm": 0.11542787402868271, "learning_rate": 6.852712625368031e-05, "loss": 1.5977, "step": 160896 }, { "epoch": 0.317561039862697, "grad_norm": 0.11110210418701172, "learning_rate": 6.852078602931563e-05, "loss": 1.5842, "step": 160928 }, { "epoch": 0.31762418582409346, "grad_norm": 0.10630090534687042, "learning_rate": 6.851444580495093e-05, "loss": 1.6028, "step": 160960 }, { "epoch": 0.31768733178548986, "grad_norm": 0.1142411008477211, "learning_rate": 6.850810558058624e-05, "loss": 1.5819, "step": 160992 }, { "epoch": 0.3177504777468863, "grad_norm": 0.10625431686639786, "learning_rate": 6.850176535622154e-05, "loss": 1.5893, "step": 161024 }, { "epoch": 0.31781362370828276, "grad_norm": 0.10537683218717575, "learning_rate": 6.849542513185686e-05, "loss": 1.5942, "step": 161056 }, { "epoch": 0.3178767696696792, "grad_norm": 0.10894153267145157, "learning_rate": 6.848908490749216e-05, "loss": 1.5887, "step": 161088 }, { "epoch": 0.31793991563107565, "grad_norm": 0.10605444759130478, "learning_rate": 6.848274468312747e-05, "loss": 1.5982, "step": 161120 }, { "epoch": 0.3180030615924721, "grad_norm": 0.11229405552148819, "learning_rate": 6.847640445876279e-05, "loss": 1.5946, "step": 161152 }, { "epoch": 0.3180662075538685, "grad_norm": 0.10472061485052109, "learning_rate": 6.84700642343981e-05, "loss": 1.6101, "step": 161184 }, { "epoch": 0.31812935351526495, "grad_norm": 0.11303604394197464, "learning_rate": 6.84637240100334e-05, "loss": 1.6002, "step": 161216 }, { "epoch": 0.3181924994766614, "grad_norm": 0.11491641402244568, "learning_rate": 6.845738378566872e-05, "loss": 1.5865, "step": 161248 }, { "epoch": 0.31825564543805784, "grad_norm": 0.11333022266626358, "learning_rate": 6.845104356130403e-05, "loss": 1.5895, "step": 161280 }, { "epoch": 0.3183187913994543, "grad_norm": 0.11232884228229523, "learning_rate": 6.844470333693935e-05, "loss": 1.5993, "step": 161312 }, { "epoch": 0.31838193736085074, "grad_norm": 0.11450444161891937, "learning_rate": 6.843836311257466e-05, "loss": 1.5887, "step": 161344 }, { "epoch": 0.31844508332224714, "grad_norm": 0.11097846180200577, "learning_rate": 6.843202288820996e-05, "loss": 1.587, "step": 161376 }, { "epoch": 0.3185082292836436, "grad_norm": 0.10028678178787231, "learning_rate": 6.842568266384528e-05, "loss": 1.5876, "step": 161408 }, { "epoch": 0.31857137524504003, "grad_norm": 0.10802214592695236, "learning_rate": 6.841934243948058e-05, "loss": 1.603, "step": 161440 }, { "epoch": 0.3186345212064365, "grad_norm": 0.10867012292146683, "learning_rate": 6.841300221511589e-05, "loss": 1.586, "step": 161472 }, { "epoch": 0.31869766716783293, "grad_norm": 0.10596240311861038, "learning_rate": 6.840666199075119e-05, "loss": 1.5819, "step": 161504 }, { "epoch": 0.3187608131292294, "grad_norm": 0.10755529999732971, "learning_rate": 6.840032176638651e-05, "loss": 1.5902, "step": 161536 }, { "epoch": 0.3188239590906258, "grad_norm": 0.11589985340833664, "learning_rate": 6.839398154202182e-05, "loss": 1.5871, "step": 161568 }, { "epoch": 0.3188871050520222, "grad_norm": 0.11852046847343445, "learning_rate": 6.838764131765714e-05, "loss": 1.5944, "step": 161600 }, { "epoch": 0.3189502510134187, "grad_norm": 0.10627137124538422, "learning_rate": 6.838130109329244e-05, "loss": 1.602, "step": 161632 }, { "epoch": 0.3190133969748151, "grad_norm": 0.10510344803333282, "learning_rate": 6.837496086892775e-05, "loss": 1.6018, "step": 161664 }, { "epoch": 0.31907654293621157, "grad_norm": 0.11405289173126221, "learning_rate": 6.836862064456307e-05, "loss": 1.586, "step": 161696 }, { "epoch": 0.319139688897608, "grad_norm": 0.12056142836809158, "learning_rate": 6.836228042019838e-05, "loss": 1.5878, "step": 161728 }, { "epoch": 0.3192028348590044, "grad_norm": 0.10971621423959732, "learning_rate": 6.835594019583368e-05, "loss": 1.5892, "step": 161760 }, { "epoch": 0.31926598082040086, "grad_norm": 0.11604329198598862, "learning_rate": 6.8349599971469e-05, "loss": 1.5927, "step": 161792 }, { "epoch": 0.3193291267817973, "grad_norm": 0.1260664165019989, "learning_rate": 6.834325974710431e-05, "loss": 1.5922, "step": 161824 }, { "epoch": 0.31939227274319376, "grad_norm": 0.10880137979984283, "learning_rate": 6.833691952273961e-05, "loss": 1.5963, "step": 161856 }, { "epoch": 0.3194554187045902, "grad_norm": 0.10119996219873428, "learning_rate": 6.833057929837491e-05, "loss": 1.5977, "step": 161888 }, { "epoch": 0.31951856466598666, "grad_norm": 0.11640535295009613, "learning_rate": 6.832423907401023e-05, "loss": 1.5963, "step": 161920 }, { "epoch": 0.3195817106273831, "grad_norm": 0.1023346483707428, "learning_rate": 6.831789884964554e-05, "loss": 1.5878, "step": 161952 }, { "epoch": 0.3196448565887795, "grad_norm": 0.1058148518204689, "learning_rate": 6.831155862528086e-05, "loss": 1.5916, "step": 161984 }, { "epoch": 0.31970800255017595, "grad_norm": 0.10869254916906357, "learning_rate": 6.830521840091617e-05, "loss": 1.5805, "step": 162016 }, { "epoch": 0.3197711485115724, "grad_norm": 0.1046665757894516, "learning_rate": 6.829887817655147e-05, "loss": 1.5931, "step": 162048 }, { "epoch": 0.31983429447296885, "grad_norm": 0.11770608276128769, "learning_rate": 6.829253795218679e-05, "loss": 1.5933, "step": 162080 }, { "epoch": 0.3198974404343653, "grad_norm": 0.11175259947776794, "learning_rate": 6.82861977278221e-05, "loss": 1.5951, "step": 162112 }, { "epoch": 0.31996058639576175, "grad_norm": 0.11490031331777573, "learning_rate": 6.827985750345742e-05, "loss": 1.5936, "step": 162144 }, { "epoch": 0.32002373235715814, "grad_norm": 0.11524728685617447, "learning_rate": 6.827351727909272e-05, "loss": 1.5938, "step": 162176 }, { "epoch": 0.3200868783185546, "grad_norm": 0.11893665790557861, "learning_rate": 6.826717705472803e-05, "loss": 1.5884, "step": 162208 }, { "epoch": 0.32015002427995104, "grad_norm": 0.11445010453462601, "learning_rate": 6.826083683036335e-05, "loss": 1.59, "step": 162240 }, { "epoch": 0.3202131702413475, "grad_norm": 0.11429999768733978, "learning_rate": 6.825449660599865e-05, "loss": 1.586, "step": 162272 }, { "epoch": 0.32027631620274394, "grad_norm": 0.10755852609872818, "learning_rate": 6.824815638163395e-05, "loss": 1.5944, "step": 162304 }, { "epoch": 0.3203394621641404, "grad_norm": 0.10976274311542511, "learning_rate": 6.824181615726926e-05, "loss": 1.5985, "step": 162336 }, { "epoch": 0.3204026081255368, "grad_norm": 0.1098981723189354, "learning_rate": 6.823547593290458e-05, "loss": 1.5919, "step": 162368 }, { "epoch": 0.32046575408693323, "grad_norm": 0.1229216530919075, "learning_rate": 6.822913570853989e-05, "loss": 1.6008, "step": 162400 }, { "epoch": 0.3205289000483297, "grad_norm": 0.11485423892736435, "learning_rate": 6.822279548417519e-05, "loss": 1.6001, "step": 162432 }, { "epoch": 0.32059204600972613, "grad_norm": 0.11030910909175873, "learning_rate": 6.821645525981051e-05, "loss": 1.5969, "step": 162464 }, { "epoch": 0.3206551919711226, "grad_norm": 0.11059672385454178, "learning_rate": 6.821011503544582e-05, "loss": 1.5781, "step": 162496 }, { "epoch": 0.320718337932519, "grad_norm": 0.11908461898565292, "learning_rate": 6.820377481108114e-05, "loss": 1.5884, "step": 162528 }, { "epoch": 0.3207814838939154, "grad_norm": 0.11256904900074005, "learning_rate": 6.819743458671644e-05, "loss": 1.5965, "step": 162560 }, { "epoch": 0.32084462985531187, "grad_norm": 0.11307980120182037, "learning_rate": 6.819109436235175e-05, "loss": 1.6062, "step": 162592 }, { "epoch": 0.3209077758167083, "grad_norm": 0.10993563383817673, "learning_rate": 6.818475413798707e-05, "loss": 1.5855, "step": 162624 }, { "epoch": 0.32097092177810477, "grad_norm": 0.10647181421518326, "learning_rate": 6.817841391362237e-05, "loss": 1.595, "step": 162656 }, { "epoch": 0.3210340677395012, "grad_norm": 0.10353179275989532, "learning_rate": 6.817207368925768e-05, "loss": 1.5861, "step": 162688 }, { "epoch": 0.32109721370089767, "grad_norm": 0.11519227176904678, "learning_rate": 6.816573346489298e-05, "loss": 1.6057, "step": 162720 }, { "epoch": 0.32116035966229406, "grad_norm": 0.10504128783941269, "learning_rate": 6.81593932405283e-05, "loss": 1.5845, "step": 162752 }, { "epoch": 0.3212235056236905, "grad_norm": 0.11948299407958984, "learning_rate": 6.815305301616361e-05, "loss": 1.5912, "step": 162784 }, { "epoch": 0.32128665158508696, "grad_norm": 0.10750807821750641, "learning_rate": 6.814671279179893e-05, "loss": 1.5895, "step": 162816 }, { "epoch": 0.3213497975464834, "grad_norm": 0.09923849254846573, "learning_rate": 6.814037256743423e-05, "loss": 1.5845, "step": 162848 }, { "epoch": 0.32141294350787986, "grad_norm": 0.10898289084434509, "learning_rate": 6.813403234306954e-05, "loss": 1.6015, "step": 162880 }, { "epoch": 0.3214760894692763, "grad_norm": 0.11153297871351242, "learning_rate": 6.812769211870486e-05, "loss": 1.5945, "step": 162912 }, { "epoch": 0.3215392354306727, "grad_norm": 0.10640190541744232, "learning_rate": 6.812135189434017e-05, "loss": 1.5921, "step": 162944 }, { "epoch": 0.32160238139206915, "grad_norm": 0.11280359327793121, "learning_rate": 6.811501166997547e-05, "loss": 1.5863, "step": 162976 }, { "epoch": 0.3216655273534656, "grad_norm": 0.1157355085015297, "learning_rate": 6.810867144561079e-05, "loss": 1.6002, "step": 163008 }, { "epoch": 0.32172867331486205, "grad_norm": 0.10953034460544586, "learning_rate": 6.81023312212461e-05, "loss": 1.6148, "step": 163040 }, { "epoch": 0.3217918192762585, "grad_norm": 0.11689390242099762, "learning_rate": 6.80959909968814e-05, "loss": 1.5786, "step": 163072 }, { "epoch": 0.32185496523765494, "grad_norm": 0.10442730039358139, "learning_rate": 6.808965077251672e-05, "loss": 1.59, "step": 163104 }, { "epoch": 0.32191811119905134, "grad_norm": 0.10704589635133743, "learning_rate": 6.808331054815202e-05, "loss": 1.5943, "step": 163136 }, { "epoch": 0.3219812571604478, "grad_norm": 0.12023435533046722, "learning_rate": 6.807697032378733e-05, "loss": 1.5835, "step": 163168 }, { "epoch": 0.32204440312184424, "grad_norm": 0.1139121726155281, "learning_rate": 6.807063009942265e-05, "loss": 1.5974, "step": 163200 }, { "epoch": 0.3221075490832407, "grad_norm": 0.11478232592344284, "learning_rate": 6.806428987505795e-05, "loss": 1.5839, "step": 163232 }, { "epoch": 0.32217069504463713, "grad_norm": 0.1022125855088234, "learning_rate": 6.805794965069326e-05, "loss": 1.5927, "step": 163264 }, { "epoch": 0.3222338410060336, "grad_norm": 0.10918080061674118, "learning_rate": 6.805160942632858e-05, "loss": 1.5942, "step": 163296 }, { "epoch": 0.32229698696743, "grad_norm": 0.11021580547094345, "learning_rate": 6.804526920196389e-05, "loss": 1.5904, "step": 163328 }, { "epoch": 0.3223601329288264, "grad_norm": 0.11104167997837067, "learning_rate": 6.803892897759921e-05, "loss": 1.6126, "step": 163360 }, { "epoch": 0.3224232788902229, "grad_norm": 0.10933711379766464, "learning_rate": 6.803258875323451e-05, "loss": 1.5954, "step": 163392 }, { "epoch": 0.3224864248516193, "grad_norm": 0.10540857166051865, "learning_rate": 6.802624852886982e-05, "loss": 1.6208, "step": 163424 }, { "epoch": 0.3225495708130158, "grad_norm": 0.11258790642023087, "learning_rate": 6.801990830450514e-05, "loss": 1.5844, "step": 163456 }, { "epoch": 0.3226127167744122, "grad_norm": 0.12042684853076935, "learning_rate": 6.801356808014044e-05, "loss": 1.5929, "step": 163488 }, { "epoch": 0.32267586273580867, "grad_norm": 0.11128998547792435, "learning_rate": 6.800722785577575e-05, "loss": 1.5868, "step": 163520 }, { "epoch": 0.32273900869720507, "grad_norm": 0.10672010481357574, "learning_rate": 6.800088763141105e-05, "loss": 1.5838, "step": 163552 }, { "epoch": 0.3228021546586015, "grad_norm": 0.1076037660241127, "learning_rate": 6.799454740704637e-05, "loss": 1.5865, "step": 163584 }, { "epoch": 0.32286530061999796, "grad_norm": 0.11323705315589905, "learning_rate": 6.798820718268168e-05, "loss": 1.5918, "step": 163616 }, { "epoch": 0.3229284465813944, "grad_norm": 0.10990415513515472, "learning_rate": 6.798186695831698e-05, "loss": 1.5998, "step": 163648 }, { "epoch": 0.32299159254279086, "grad_norm": 0.1031436026096344, "learning_rate": 6.79755267339523e-05, "loss": 1.5932, "step": 163680 }, { "epoch": 0.3230547385041873, "grad_norm": 0.11307321488857269, "learning_rate": 6.796918650958761e-05, "loss": 1.5919, "step": 163712 }, { "epoch": 0.3231178844655837, "grad_norm": 0.11065627634525299, "learning_rate": 6.796284628522293e-05, "loss": 1.592, "step": 163744 }, { "epoch": 0.32318103042698015, "grad_norm": 0.10902044922113419, "learning_rate": 6.795650606085823e-05, "loss": 1.5917, "step": 163776 }, { "epoch": 0.3232441763883766, "grad_norm": 0.10561446845531464, "learning_rate": 6.795016583649354e-05, "loss": 1.584, "step": 163808 }, { "epoch": 0.32330732234977305, "grad_norm": 0.1130950003862381, "learning_rate": 6.794382561212886e-05, "loss": 1.5859, "step": 163840 }, { "epoch": 0.3233704683111695, "grad_norm": 0.11332274973392487, "learning_rate": 6.793748538776417e-05, "loss": 1.6003, "step": 163872 }, { "epoch": 0.32343361427256595, "grad_norm": 0.10275166481733322, "learning_rate": 6.793114516339947e-05, "loss": 1.5933, "step": 163904 }, { "epoch": 0.32349676023396234, "grad_norm": 0.10680346935987473, "learning_rate": 6.792480493903477e-05, "loss": 1.5929, "step": 163936 }, { "epoch": 0.3235599061953588, "grad_norm": 0.10647042095661163, "learning_rate": 6.791846471467009e-05, "loss": 1.5995, "step": 163968 }, { "epoch": 0.32362305215675524, "grad_norm": 0.10803354531526566, "learning_rate": 6.79121244903054e-05, "loss": 1.6, "step": 164000 }, { "epoch": 0.3236861981181517, "grad_norm": 0.11125516146421432, "learning_rate": 6.790578426594072e-05, "loss": 1.5958, "step": 164032 }, { "epoch": 0.32374934407954814, "grad_norm": 0.11516519635915756, "learning_rate": 6.789944404157602e-05, "loss": 1.5857, "step": 164064 }, { "epoch": 0.3238124900409446, "grad_norm": 0.10491325706243515, "learning_rate": 6.789310381721133e-05, "loss": 1.5887, "step": 164096 }, { "epoch": 0.323875636002341, "grad_norm": 0.11010641604661942, "learning_rate": 6.788676359284665e-05, "loss": 1.5963, "step": 164128 }, { "epoch": 0.32393878196373743, "grad_norm": 0.11462292075157166, "learning_rate": 6.788042336848196e-05, "loss": 1.5922, "step": 164160 }, { "epoch": 0.3240019279251339, "grad_norm": 0.10731970518827438, "learning_rate": 6.787408314411726e-05, "loss": 1.6029, "step": 164192 }, { "epoch": 0.32406507388653033, "grad_norm": 0.10830609500408173, "learning_rate": 6.786774291975258e-05, "loss": 1.5923, "step": 164224 }, { "epoch": 0.3241282198479268, "grad_norm": 0.10924004763364792, "learning_rate": 6.786140269538789e-05, "loss": 1.5822, "step": 164256 }, { "epoch": 0.32419136580932323, "grad_norm": 0.1176849901676178, "learning_rate": 6.785506247102321e-05, "loss": 1.5892, "step": 164288 }, { "epoch": 0.3242545117707196, "grad_norm": 0.10422728210687637, "learning_rate": 6.784872224665851e-05, "loss": 1.5898, "step": 164320 }, { "epoch": 0.32431765773211607, "grad_norm": 0.11070388555526733, "learning_rate": 6.784238202229381e-05, "loss": 1.5993, "step": 164352 }, { "epoch": 0.3243808036935125, "grad_norm": 0.11063247174024582, "learning_rate": 6.783604179792912e-05, "loss": 1.5929, "step": 164384 }, { "epoch": 0.32444394965490897, "grad_norm": 0.11325111985206604, "learning_rate": 6.782970157356444e-05, "loss": 1.6053, "step": 164416 }, { "epoch": 0.3245070956163054, "grad_norm": 0.11281933635473251, "learning_rate": 6.782336134919974e-05, "loss": 1.5871, "step": 164448 }, { "epoch": 0.32457024157770187, "grad_norm": 0.10405316203832626, "learning_rate": 6.781702112483505e-05, "loss": 1.5913, "step": 164480 }, { "epoch": 0.32463338753909826, "grad_norm": 0.10825597494840622, "learning_rate": 6.781068090047037e-05, "loss": 1.5974, "step": 164512 }, { "epoch": 0.3246965335004947, "grad_norm": 0.10484517365694046, "learning_rate": 6.780434067610568e-05, "loss": 1.6022, "step": 164544 }, { "epoch": 0.32475967946189116, "grad_norm": 0.12093804031610489, "learning_rate": 6.779800045174098e-05, "loss": 1.6005, "step": 164576 }, { "epoch": 0.3248228254232876, "grad_norm": 0.11590174585580826, "learning_rate": 6.77916602273763e-05, "loss": 1.5919, "step": 164608 }, { "epoch": 0.32488597138468406, "grad_norm": 0.10578962415456772, "learning_rate": 6.778532000301161e-05, "loss": 1.5993, "step": 164640 }, { "epoch": 0.3249491173460805, "grad_norm": 0.12327004224061966, "learning_rate": 6.777897977864693e-05, "loss": 1.5802, "step": 164672 }, { "epoch": 0.3250122633074769, "grad_norm": 0.11776653677225113, "learning_rate": 6.777263955428224e-05, "loss": 1.5939, "step": 164704 }, { "epoch": 0.32507540926887335, "grad_norm": 0.1176818385720253, "learning_rate": 6.776629932991754e-05, "loss": 1.5872, "step": 164736 }, { "epoch": 0.3251385552302698, "grad_norm": 0.11193326115608215, "learning_rate": 6.775995910555284e-05, "loss": 1.5989, "step": 164768 }, { "epoch": 0.32520170119166625, "grad_norm": 0.11355994641780853, "learning_rate": 6.775361888118816e-05, "loss": 1.6054, "step": 164800 }, { "epoch": 0.3252648471530627, "grad_norm": 0.11177047342061996, "learning_rate": 6.774727865682347e-05, "loss": 1.5805, "step": 164832 }, { "epoch": 0.32532799311445915, "grad_norm": 0.11082953214645386, "learning_rate": 6.774093843245877e-05, "loss": 1.589, "step": 164864 }, { "epoch": 0.32539113907585554, "grad_norm": 0.11140895634889603, "learning_rate": 6.773459820809409e-05, "loss": 1.5966, "step": 164896 }, { "epoch": 0.325454285037252, "grad_norm": 0.10152396559715271, "learning_rate": 6.77282579837294e-05, "loss": 1.5884, "step": 164928 }, { "epoch": 0.32551743099864844, "grad_norm": 0.11770064383745193, "learning_rate": 6.772191775936472e-05, "loss": 1.6033, "step": 164960 }, { "epoch": 0.3255805769600449, "grad_norm": 0.10735750198364258, "learning_rate": 6.771557753500002e-05, "loss": 1.5941, "step": 164992 }, { "epoch": 0.32564372292144134, "grad_norm": 0.10940711200237274, "learning_rate": 6.770923731063533e-05, "loss": 1.5832, "step": 165024 }, { "epoch": 0.3257068688828378, "grad_norm": 0.10065314173698425, "learning_rate": 6.770289708627065e-05, "loss": 1.5954, "step": 165056 }, { "epoch": 0.3257700148442342, "grad_norm": 0.11828918009996414, "learning_rate": 6.769655686190596e-05, "loss": 1.581, "step": 165088 }, { "epoch": 0.32583316080563063, "grad_norm": 0.11608125269412994, "learning_rate": 6.769021663754126e-05, "loss": 1.5859, "step": 165120 }, { "epoch": 0.3258963067670271, "grad_norm": 0.10929804295301437, "learning_rate": 6.768387641317658e-05, "loss": 1.5849, "step": 165152 }, { "epoch": 0.3259594527284235, "grad_norm": 0.11840365827083588, "learning_rate": 6.767753618881188e-05, "loss": 1.5808, "step": 165184 }, { "epoch": 0.32602259868982, "grad_norm": 0.11759553104639053, "learning_rate": 6.76711959644472e-05, "loss": 1.5918, "step": 165216 }, { "epoch": 0.3260857446512164, "grad_norm": 0.11764785647392273, "learning_rate": 6.76648557400825e-05, "loss": 1.5761, "step": 165248 }, { "epoch": 0.3261488906126129, "grad_norm": 0.11398757249116898, "learning_rate": 6.765851551571781e-05, "loss": 1.5931, "step": 165280 }, { "epoch": 0.32621203657400927, "grad_norm": 0.11280125379562378, "learning_rate": 6.765217529135312e-05, "loss": 1.587, "step": 165312 }, { "epoch": 0.3262751825354057, "grad_norm": 0.113761767745018, "learning_rate": 6.764583506698844e-05, "loss": 1.588, "step": 165344 }, { "epoch": 0.32633832849680217, "grad_norm": 0.10702557861804962, "learning_rate": 6.763949484262375e-05, "loss": 1.5831, "step": 165376 }, { "epoch": 0.3264014744581986, "grad_norm": 0.1112300306558609, "learning_rate": 6.763315461825905e-05, "loss": 1.5868, "step": 165408 }, { "epoch": 0.32646462041959506, "grad_norm": 0.10162105411291122, "learning_rate": 6.762681439389437e-05, "loss": 1.5909, "step": 165440 }, { "epoch": 0.3265277663809915, "grad_norm": 0.11442627012729645, "learning_rate": 6.762047416952968e-05, "loss": 1.5919, "step": 165472 }, { "epoch": 0.3265909123423879, "grad_norm": 0.1244163066148758, "learning_rate": 6.7614133945165e-05, "loss": 1.5834, "step": 165504 }, { "epoch": 0.32665405830378436, "grad_norm": 0.11429568380117416, "learning_rate": 6.76077937208003e-05, "loss": 1.5795, "step": 165536 }, { "epoch": 0.3267172042651808, "grad_norm": 0.10941269993782043, "learning_rate": 6.760145349643561e-05, "loss": 1.5955, "step": 165568 }, { "epoch": 0.32678035022657725, "grad_norm": 0.11360456049442291, "learning_rate": 6.759511327207091e-05, "loss": 1.5839, "step": 165600 }, { "epoch": 0.3268434961879737, "grad_norm": 0.10601862519979477, "learning_rate": 6.758877304770623e-05, "loss": 1.5993, "step": 165632 }, { "epoch": 0.32690664214937015, "grad_norm": 0.10836266726255417, "learning_rate": 6.758243282334153e-05, "loss": 1.5845, "step": 165664 }, { "epoch": 0.32696978811076655, "grad_norm": 0.1072690412402153, "learning_rate": 6.757609259897684e-05, "loss": 1.5876, "step": 165696 }, { "epoch": 0.327032934072163, "grad_norm": 0.10807265341281891, "learning_rate": 6.756975237461216e-05, "loss": 1.5803, "step": 165728 }, { "epoch": 0.32709608003355944, "grad_norm": 0.10372138023376465, "learning_rate": 6.756341215024747e-05, "loss": 1.5752, "step": 165760 }, { "epoch": 0.3271592259949559, "grad_norm": 0.10341465473175049, "learning_rate": 6.755707192588277e-05, "loss": 1.5946, "step": 165792 }, { "epoch": 0.32722237195635234, "grad_norm": 0.10707486420869827, "learning_rate": 6.755073170151809e-05, "loss": 1.6083, "step": 165824 }, { "epoch": 0.3272855179177488, "grad_norm": 0.11649633944034576, "learning_rate": 6.75443914771534e-05, "loss": 1.5898, "step": 165856 }, { "epoch": 0.3273486638791452, "grad_norm": 0.1109938770532608, "learning_rate": 6.753805125278872e-05, "loss": 1.5922, "step": 165888 }, { "epoch": 0.32741180984054163, "grad_norm": 0.11330954730510712, "learning_rate": 6.753171102842402e-05, "loss": 1.5796, "step": 165920 }, { "epoch": 0.3274749558019381, "grad_norm": 0.10658450424671173, "learning_rate": 6.752537080405933e-05, "loss": 1.5737, "step": 165952 }, { "epoch": 0.32753810176333453, "grad_norm": 0.11703871190547943, "learning_rate": 6.751903057969465e-05, "loss": 1.5849, "step": 165984 }, { "epoch": 0.327601247724731, "grad_norm": 0.11008935421705246, "learning_rate": 6.751269035532995e-05, "loss": 1.5892, "step": 166016 }, { "epoch": 0.32766439368612743, "grad_norm": 0.12122257798910141, "learning_rate": 6.750635013096526e-05, "loss": 1.5914, "step": 166048 }, { "epoch": 0.3277275396475238, "grad_norm": 0.1071445420384407, "learning_rate": 6.750000990660057e-05, "loss": 1.5923, "step": 166080 }, { "epoch": 0.3277906856089203, "grad_norm": 0.09923968464136124, "learning_rate": 6.749366968223588e-05, "loss": 1.5963, "step": 166112 }, { "epoch": 0.3278538315703167, "grad_norm": 0.10645250976085663, "learning_rate": 6.74873294578712e-05, "loss": 1.5945, "step": 166144 }, { "epoch": 0.32791697753171317, "grad_norm": 0.11301776766777039, "learning_rate": 6.748098923350651e-05, "loss": 1.5893, "step": 166176 }, { "epoch": 0.3279801234931096, "grad_norm": 0.11489652097225189, "learning_rate": 6.747464900914181e-05, "loss": 1.5852, "step": 166208 }, { "epoch": 0.32804326945450607, "grad_norm": 0.10619910806417465, "learning_rate": 6.746830878477712e-05, "loss": 1.5943, "step": 166240 }, { "epoch": 0.32810641541590246, "grad_norm": 0.10980654507875443, "learning_rate": 6.746196856041244e-05, "loss": 1.5928, "step": 166272 }, { "epoch": 0.3281695613772989, "grad_norm": 0.10765120387077332, "learning_rate": 6.745562833604775e-05, "loss": 1.5987, "step": 166304 }, { "epoch": 0.32823270733869536, "grad_norm": 0.10863944888114929, "learning_rate": 6.744928811168305e-05, "loss": 1.5807, "step": 166336 }, { "epoch": 0.3282958533000918, "grad_norm": 0.11275624483823776, "learning_rate": 6.744294788731837e-05, "loss": 1.5835, "step": 166368 }, { "epoch": 0.32835899926148826, "grad_norm": 0.10429783910512924, "learning_rate": 6.743660766295368e-05, "loss": 1.5894, "step": 166400 }, { "epoch": 0.3284221452228847, "grad_norm": 0.10573472082614899, "learning_rate": 6.743026743858898e-05, "loss": 1.591, "step": 166432 }, { "epoch": 0.3284852911842811, "grad_norm": 0.11760779470205307, "learning_rate": 6.742392721422429e-05, "loss": 1.6008, "step": 166464 }, { "epoch": 0.32854843714567755, "grad_norm": 0.11851943284273148, "learning_rate": 6.74175869898596e-05, "loss": 1.5922, "step": 166496 }, { "epoch": 0.328611583107074, "grad_norm": 0.10839410871267319, "learning_rate": 6.741124676549491e-05, "loss": 1.5847, "step": 166528 }, { "epoch": 0.32867472906847045, "grad_norm": 0.10711562633514404, "learning_rate": 6.740490654113023e-05, "loss": 1.5851, "step": 166560 }, { "epoch": 0.3287378750298669, "grad_norm": 0.10874127596616745, "learning_rate": 6.739856631676553e-05, "loss": 1.5985, "step": 166592 }, { "epoch": 0.32880102099126335, "grad_norm": 0.11116418987512589, "learning_rate": 6.739222609240084e-05, "loss": 1.5868, "step": 166624 }, { "epoch": 0.32886416695265974, "grad_norm": 0.10855557769536972, "learning_rate": 6.738588586803616e-05, "loss": 1.5928, "step": 166656 }, { "epoch": 0.3289273129140562, "grad_norm": 0.10828322917222977, "learning_rate": 6.737954564367147e-05, "loss": 1.5989, "step": 166688 }, { "epoch": 0.32899045887545264, "grad_norm": 0.1115008071064949, "learning_rate": 6.737320541930679e-05, "loss": 1.5877, "step": 166720 }, { "epoch": 0.3290536048368491, "grad_norm": 0.10906339436769485, "learning_rate": 6.736686519494209e-05, "loss": 1.596, "step": 166752 }, { "epoch": 0.32911675079824554, "grad_norm": 0.11613047122955322, "learning_rate": 6.73605249705774e-05, "loss": 1.5843, "step": 166784 }, { "epoch": 0.329179896759642, "grad_norm": 0.10479158908128738, "learning_rate": 6.73541847462127e-05, "loss": 1.6021, "step": 166816 }, { "epoch": 0.3292430427210384, "grad_norm": 0.1132587343454361, "learning_rate": 6.734784452184802e-05, "loss": 1.5797, "step": 166848 }, { "epoch": 0.32930618868243483, "grad_norm": 0.1093219593167305, "learning_rate": 6.734150429748332e-05, "loss": 1.5849, "step": 166880 }, { "epoch": 0.3293693346438313, "grad_norm": 0.11402468383312225, "learning_rate": 6.733516407311864e-05, "loss": 1.592, "step": 166912 }, { "epoch": 0.32943248060522773, "grad_norm": 0.12217032164335251, "learning_rate": 6.732882384875395e-05, "loss": 1.6057, "step": 166944 }, { "epoch": 0.3294956265666242, "grad_norm": 0.1209881380200386, "learning_rate": 6.732248362438926e-05, "loss": 1.5939, "step": 166976 }, { "epoch": 0.3295587725280206, "grad_norm": 0.10653063654899597, "learning_rate": 6.731614340002457e-05, "loss": 1.5924, "step": 167008 }, { "epoch": 0.3296219184894171, "grad_norm": 0.11813493818044662, "learning_rate": 6.730980317565988e-05, "loss": 1.5961, "step": 167040 }, { "epoch": 0.32968506445081347, "grad_norm": 0.10556158423423767, "learning_rate": 6.73034629512952e-05, "loss": 1.5857, "step": 167072 }, { "epoch": 0.3297482104122099, "grad_norm": 0.10984716564416885, "learning_rate": 6.729712272693051e-05, "loss": 1.5894, "step": 167104 }, { "epoch": 0.32981135637360637, "grad_norm": 0.10736513882875443, "learning_rate": 6.729078250256581e-05, "loss": 1.5894, "step": 167136 }, { "epoch": 0.3298745023350028, "grad_norm": 0.10866238921880722, "learning_rate": 6.728444227820112e-05, "loss": 1.599, "step": 167168 }, { "epoch": 0.32993764829639927, "grad_norm": 0.11599130928516388, "learning_rate": 6.727810205383644e-05, "loss": 1.6001, "step": 167200 }, { "epoch": 0.3300007942577957, "grad_norm": 0.12034738808870316, "learning_rate": 6.727176182947174e-05, "loss": 1.5993, "step": 167232 }, { "epoch": 0.3300639402191921, "grad_norm": 0.11284483969211578, "learning_rate": 6.726542160510705e-05, "loss": 1.5889, "step": 167264 }, { "epoch": 0.33012708618058856, "grad_norm": 0.10635127872228622, "learning_rate": 6.725908138074236e-05, "loss": 1.5977, "step": 167296 }, { "epoch": 0.330190232141985, "grad_norm": 0.10802595317363739, "learning_rate": 6.725274115637767e-05, "loss": 1.5873, "step": 167328 }, { "epoch": 0.33025337810338146, "grad_norm": 0.11855754256248474, "learning_rate": 6.724640093201299e-05, "loss": 1.5866, "step": 167360 }, { "epoch": 0.3303165240647779, "grad_norm": 0.10876497626304626, "learning_rate": 6.72400607076483e-05, "loss": 1.5825, "step": 167392 }, { "epoch": 0.33037967002617435, "grad_norm": 0.11352695524692535, "learning_rate": 6.72337204832836e-05, "loss": 1.575, "step": 167424 }, { "epoch": 0.33044281598757075, "grad_norm": 0.10477931052446365, "learning_rate": 6.722738025891892e-05, "loss": 1.582, "step": 167456 }, { "epoch": 0.3305059619489672, "grad_norm": 0.11220613867044449, "learning_rate": 6.722104003455423e-05, "loss": 1.5847, "step": 167488 }, { "epoch": 0.33056910791036365, "grad_norm": 0.10531621426343918, "learning_rate": 6.721469981018954e-05, "loss": 1.5865, "step": 167520 }, { "epoch": 0.3306322538717601, "grad_norm": 0.11641617864370346, "learning_rate": 6.720835958582485e-05, "loss": 1.5926, "step": 167552 }, { "epoch": 0.33069539983315654, "grad_norm": 0.10772272944450378, "learning_rate": 6.720201936146016e-05, "loss": 1.5994, "step": 167584 }, { "epoch": 0.330758545794553, "grad_norm": 0.10645993798971176, "learning_rate": 6.719567913709547e-05, "loss": 1.59, "step": 167616 }, { "epoch": 0.3308216917559494, "grad_norm": 0.11529100686311722, "learning_rate": 6.718933891273078e-05, "loss": 1.5909, "step": 167648 }, { "epoch": 0.33088483771734584, "grad_norm": 0.11064822971820831, "learning_rate": 6.718299868836609e-05, "loss": 1.5836, "step": 167680 }, { "epoch": 0.3309479836787423, "grad_norm": 0.11568716913461685, "learning_rate": 6.717665846400139e-05, "loss": 1.5875, "step": 167712 }, { "epoch": 0.33101112964013873, "grad_norm": 0.10499885678291321, "learning_rate": 6.71703182396367e-05, "loss": 1.5965, "step": 167744 }, { "epoch": 0.3310742756015352, "grad_norm": 0.10890348255634308, "learning_rate": 6.716397801527202e-05, "loss": 1.5876, "step": 167776 }, { "epoch": 0.33113742156293163, "grad_norm": 0.111075758934021, "learning_rate": 6.715763779090732e-05, "loss": 1.5943, "step": 167808 }, { "epoch": 0.331200567524328, "grad_norm": 0.10671944916248322, "learning_rate": 6.715129756654264e-05, "loss": 1.5835, "step": 167840 }, { "epoch": 0.3312637134857245, "grad_norm": 0.11546798795461655, "learning_rate": 6.714495734217795e-05, "loss": 1.5701, "step": 167872 }, { "epoch": 0.3313268594471209, "grad_norm": 0.1191597431898117, "learning_rate": 6.713861711781326e-05, "loss": 1.5842, "step": 167904 }, { "epoch": 0.3313900054085174, "grad_norm": 0.11169959604740143, "learning_rate": 6.713227689344857e-05, "loss": 1.5893, "step": 167936 }, { "epoch": 0.3314531513699138, "grad_norm": 0.10798674821853638, "learning_rate": 6.712593666908388e-05, "loss": 1.5871, "step": 167968 }, { "epoch": 0.3315162973313103, "grad_norm": 0.11244769394397736, "learning_rate": 6.71195964447192e-05, "loss": 1.5949, "step": 168000 }, { "epoch": 0.33157944329270667, "grad_norm": 0.11217678338289261, "learning_rate": 6.711325622035451e-05, "loss": 1.5916, "step": 168032 }, { "epoch": 0.3316425892541031, "grad_norm": 0.1120830848813057, "learning_rate": 6.710691599598981e-05, "loss": 1.5848, "step": 168064 }, { "epoch": 0.33170573521549956, "grad_norm": 0.10977490246295929, "learning_rate": 6.710057577162511e-05, "loss": 1.5986, "step": 168096 }, { "epoch": 0.331768881176896, "grad_norm": 0.10606007277965546, "learning_rate": 6.709423554726043e-05, "loss": 1.5885, "step": 168128 }, { "epoch": 0.33183202713829246, "grad_norm": 0.11085110157728195, "learning_rate": 6.708789532289574e-05, "loss": 1.6018, "step": 168160 }, { "epoch": 0.3318951730996889, "grad_norm": 0.11350330710411072, "learning_rate": 6.708155509853106e-05, "loss": 1.5888, "step": 168192 }, { "epoch": 0.3319583190610853, "grad_norm": 0.10424413532018661, "learning_rate": 6.707521487416636e-05, "loss": 1.5886, "step": 168224 }, { "epoch": 0.33202146502248175, "grad_norm": 0.11231934279203415, "learning_rate": 6.706887464980167e-05, "loss": 1.5996, "step": 168256 }, { "epoch": 0.3320846109838782, "grad_norm": 0.1062673032283783, "learning_rate": 6.706253442543699e-05, "loss": 1.5897, "step": 168288 }, { "epoch": 0.33214775694527465, "grad_norm": 0.11131986230611801, "learning_rate": 6.70561942010723e-05, "loss": 1.5764, "step": 168320 }, { "epoch": 0.3322109029066711, "grad_norm": 0.11383592337369919, "learning_rate": 6.70498539767076e-05, "loss": 1.5983, "step": 168352 }, { "epoch": 0.33227404886806755, "grad_norm": 0.1052570566534996, "learning_rate": 6.704351375234292e-05, "loss": 1.593, "step": 168384 }, { "epoch": 0.33233719482946394, "grad_norm": 0.11455070972442627, "learning_rate": 6.703717352797823e-05, "loss": 1.6021, "step": 168416 }, { "epoch": 0.3324003407908604, "grad_norm": 0.11270928382873535, "learning_rate": 6.703083330361354e-05, "loss": 1.5952, "step": 168448 }, { "epoch": 0.33246348675225684, "grad_norm": 0.10765434056520462, "learning_rate": 6.702449307924885e-05, "loss": 1.6175, "step": 168480 }, { "epoch": 0.3325266327136533, "grad_norm": 0.10825809836387634, "learning_rate": 6.701815285488415e-05, "loss": 1.5961, "step": 168512 }, { "epoch": 0.33258977867504974, "grad_norm": 0.11404110491275787, "learning_rate": 6.701181263051946e-05, "loss": 1.5804, "step": 168544 }, { "epoch": 0.3326529246364462, "grad_norm": 0.10480169951915741, "learning_rate": 6.700547240615478e-05, "loss": 1.5975, "step": 168576 }, { "epoch": 0.33271607059784264, "grad_norm": 0.10455060750246048, "learning_rate": 6.699913218179008e-05, "loss": 1.5862, "step": 168608 }, { "epoch": 0.33277921655923903, "grad_norm": 0.11547906696796417, "learning_rate": 6.699279195742539e-05, "loss": 1.5853, "step": 168640 }, { "epoch": 0.3328423625206355, "grad_norm": 0.10812269151210785, "learning_rate": 6.69864517330607e-05, "loss": 1.5986, "step": 168672 }, { "epoch": 0.33290550848203193, "grad_norm": 0.10520678013563156, "learning_rate": 6.698011150869602e-05, "loss": 1.5977, "step": 168704 }, { "epoch": 0.3329686544434284, "grad_norm": 0.10478810966014862, "learning_rate": 6.697377128433134e-05, "loss": 1.5787, "step": 168736 }, { "epoch": 0.33303180040482483, "grad_norm": 0.1120506078004837, "learning_rate": 6.696743105996664e-05, "loss": 1.5932, "step": 168768 }, { "epoch": 0.3330949463662213, "grad_norm": 0.11565958708524704, "learning_rate": 6.696109083560195e-05, "loss": 1.584, "step": 168800 }, { "epoch": 0.33315809232761767, "grad_norm": 0.11421355605125427, "learning_rate": 6.695475061123727e-05, "loss": 1.595, "step": 168832 }, { "epoch": 0.3332212382890141, "grad_norm": 0.1072520837187767, "learning_rate": 6.694841038687258e-05, "loss": 1.5862, "step": 168864 }, { "epoch": 0.33328438425041057, "grad_norm": 0.10841010510921478, "learning_rate": 6.694207016250788e-05, "loss": 1.5998, "step": 168896 }, { "epoch": 0.333347530211807, "grad_norm": 0.10595608502626419, "learning_rate": 6.693572993814318e-05, "loss": 1.5926, "step": 168928 }, { "epoch": 0.33341067617320347, "grad_norm": 0.10938339680433273, "learning_rate": 6.69293897137785e-05, "loss": 1.5928, "step": 168960 }, { "epoch": 0.3334738221345999, "grad_norm": 0.12203320115804672, "learning_rate": 6.692304948941381e-05, "loss": 1.5884, "step": 168992 }, { "epoch": 0.3335369680959963, "grad_norm": 0.11459293961524963, "learning_rate": 6.691670926504911e-05, "loss": 1.5835, "step": 169024 }, { "epoch": 0.33360011405739276, "grad_norm": 0.10714321583509445, "learning_rate": 6.691036904068443e-05, "loss": 1.5894, "step": 169056 }, { "epoch": 0.3336632600187892, "grad_norm": 0.11945177614688873, "learning_rate": 6.690402881631974e-05, "loss": 1.5873, "step": 169088 }, { "epoch": 0.33372640598018566, "grad_norm": 0.10785206407308578, "learning_rate": 6.689768859195506e-05, "loss": 1.5944, "step": 169120 }, { "epoch": 0.3337895519415821, "grad_norm": 0.11360948532819748, "learning_rate": 6.689134836759036e-05, "loss": 1.579, "step": 169152 }, { "epoch": 0.33385269790297856, "grad_norm": 0.10632267594337463, "learning_rate": 6.688500814322567e-05, "loss": 1.5923, "step": 169184 }, { "epoch": 0.33391584386437495, "grad_norm": 0.11562076956033707, "learning_rate": 6.687866791886099e-05, "loss": 1.5915, "step": 169216 }, { "epoch": 0.3339789898257714, "grad_norm": 0.11135333776473999, "learning_rate": 6.68723276944963e-05, "loss": 1.589, "step": 169248 }, { "epoch": 0.33404213578716785, "grad_norm": 0.11263851821422577, "learning_rate": 6.68659874701316e-05, "loss": 1.5813, "step": 169280 }, { "epoch": 0.3341052817485643, "grad_norm": 0.10896914452314377, "learning_rate": 6.685964724576692e-05, "loss": 1.5907, "step": 169312 }, { "epoch": 0.33416842770996075, "grad_norm": 0.11386636644601822, "learning_rate": 6.685330702140222e-05, "loss": 1.5944, "step": 169344 }, { "epoch": 0.3342315736713572, "grad_norm": 0.11390720307826996, "learning_rate": 6.684696679703753e-05, "loss": 1.6007, "step": 169376 }, { "epoch": 0.3342947196327536, "grad_norm": 0.1162019595503807, "learning_rate": 6.684062657267285e-05, "loss": 1.5965, "step": 169408 }, { "epoch": 0.33435786559415004, "grad_norm": 0.10611633211374283, "learning_rate": 6.683428634830815e-05, "loss": 1.582, "step": 169440 }, { "epoch": 0.3344210115555465, "grad_norm": 0.10824020206928253, "learning_rate": 6.682794612394346e-05, "loss": 1.5698, "step": 169472 }, { "epoch": 0.33448415751694294, "grad_norm": 0.1087648794054985, "learning_rate": 6.682160589957878e-05, "loss": 1.5906, "step": 169504 }, { "epoch": 0.3345473034783394, "grad_norm": 0.10838939994573593, "learning_rate": 6.681526567521409e-05, "loss": 1.5966, "step": 169536 }, { "epoch": 0.33461044943973584, "grad_norm": 0.10725168138742447, "learning_rate": 6.680892545084939e-05, "loss": 1.5775, "step": 169568 }, { "epoch": 0.33467359540113223, "grad_norm": 0.10876808315515518, "learning_rate": 6.68025852264847e-05, "loss": 1.5843, "step": 169600 }, { "epoch": 0.3347367413625287, "grad_norm": 0.1187715157866478, "learning_rate": 6.679624500212002e-05, "loss": 1.5876, "step": 169632 }, { "epoch": 0.3347998873239251, "grad_norm": 0.11132321506738663, "learning_rate": 6.678990477775534e-05, "loss": 1.5924, "step": 169664 }, { "epoch": 0.3348630332853216, "grad_norm": 0.11269792169332504, "learning_rate": 6.678356455339064e-05, "loss": 1.5955, "step": 169696 }, { "epoch": 0.334926179246718, "grad_norm": 0.11117807775735855, "learning_rate": 6.677722432902595e-05, "loss": 1.586, "step": 169728 }, { "epoch": 0.3349893252081145, "grad_norm": 0.10820810496807098, "learning_rate": 6.677088410466125e-05, "loss": 1.5881, "step": 169760 }, { "epoch": 0.33505247116951087, "grad_norm": 0.11064624041318893, "learning_rate": 6.676454388029657e-05, "loss": 1.593, "step": 169792 }, { "epoch": 0.3351156171309073, "grad_norm": 0.1094837561249733, "learning_rate": 6.675820365593187e-05, "loss": 1.6042, "step": 169824 }, { "epoch": 0.33517876309230377, "grad_norm": 0.11262187361717224, "learning_rate": 6.675186343156718e-05, "loss": 1.6065, "step": 169856 }, { "epoch": 0.3352419090537002, "grad_norm": 0.11713772267103195, "learning_rate": 6.67455232072025e-05, "loss": 1.5779, "step": 169888 }, { "epoch": 0.33530505501509666, "grad_norm": 0.11381383240222931, "learning_rate": 6.673918298283781e-05, "loss": 1.5952, "step": 169920 }, { "epoch": 0.3353682009764931, "grad_norm": 0.09947804361581802, "learning_rate": 6.673284275847313e-05, "loss": 1.5748, "step": 169952 }, { "epoch": 0.3354313469378895, "grad_norm": 0.11649797856807709, "learning_rate": 6.672650253410843e-05, "loss": 1.5861, "step": 169984 }, { "epoch": 0.33549449289928596, "grad_norm": 0.10755553841590881, "learning_rate": 6.672016230974374e-05, "loss": 1.5873, "step": 170016 }, { "epoch": 0.3355576388606824, "grad_norm": 0.1077168732881546, "learning_rate": 6.671382208537906e-05, "loss": 1.5884, "step": 170048 }, { "epoch": 0.33562078482207885, "grad_norm": 0.11261429637670517, "learning_rate": 6.670748186101437e-05, "loss": 1.5751, "step": 170080 }, { "epoch": 0.3356839307834753, "grad_norm": 0.10956179350614548, "learning_rate": 6.670114163664967e-05, "loss": 1.5822, "step": 170112 }, { "epoch": 0.33574707674487175, "grad_norm": 0.10310094058513641, "learning_rate": 6.669480141228499e-05, "loss": 1.584, "step": 170144 }, { "epoch": 0.33581022270626815, "grad_norm": 0.10348838567733765, "learning_rate": 6.668846118792029e-05, "loss": 1.5879, "step": 170176 }, { "epoch": 0.3358733686676646, "grad_norm": 0.10774225741624832, "learning_rate": 6.66821209635556e-05, "loss": 1.5743, "step": 170208 }, { "epoch": 0.33593651462906104, "grad_norm": 0.10263609886169434, "learning_rate": 6.66757807391909e-05, "loss": 1.5811, "step": 170240 }, { "epoch": 0.3359996605904575, "grad_norm": 0.11636590957641602, "learning_rate": 6.666944051482622e-05, "loss": 1.5751, "step": 170272 }, { "epoch": 0.33606280655185394, "grad_norm": 0.11100584268569946, "learning_rate": 6.666310029046153e-05, "loss": 1.5957, "step": 170304 }, { "epoch": 0.3361259525132504, "grad_norm": 0.1203126311302185, "learning_rate": 6.665676006609685e-05, "loss": 1.5954, "step": 170336 }, { "epoch": 0.33618909847464684, "grad_norm": 0.10767781734466553, "learning_rate": 6.665041984173215e-05, "loss": 1.5838, "step": 170368 }, { "epoch": 0.33625224443604323, "grad_norm": 0.10694015026092529, "learning_rate": 6.664407961736746e-05, "loss": 1.5816, "step": 170400 }, { "epoch": 0.3363153903974397, "grad_norm": 0.10093408077955246, "learning_rate": 6.663773939300278e-05, "loss": 1.5951, "step": 170432 }, { "epoch": 0.33637853635883613, "grad_norm": 0.1062542200088501, "learning_rate": 6.663139916863809e-05, "loss": 1.5942, "step": 170464 }, { "epoch": 0.3364416823202326, "grad_norm": 0.10482421517372131, "learning_rate": 6.662505894427339e-05, "loss": 1.5658, "step": 170496 }, { "epoch": 0.33650482828162903, "grad_norm": 0.10923250019550323, "learning_rate": 6.66187187199087e-05, "loss": 1.5838, "step": 170528 }, { "epoch": 0.3365679742430255, "grad_norm": 0.11676891148090363, "learning_rate": 6.661237849554402e-05, "loss": 1.5876, "step": 170560 }, { "epoch": 0.3366311202044219, "grad_norm": 0.1030052974820137, "learning_rate": 6.660603827117932e-05, "loss": 1.5828, "step": 170592 }, { "epoch": 0.3366942661658183, "grad_norm": 0.10722127556800842, "learning_rate": 6.659969804681464e-05, "loss": 1.5908, "step": 170624 }, { "epoch": 0.3367574121272148, "grad_norm": 0.10357983410358429, "learning_rate": 6.659335782244994e-05, "loss": 1.589, "step": 170656 }, { "epoch": 0.3368205580886112, "grad_norm": 0.1128392219543457, "learning_rate": 6.658701759808525e-05, "loss": 1.5866, "step": 170688 }, { "epoch": 0.33688370405000767, "grad_norm": 0.11493240296840668, "learning_rate": 6.658067737372057e-05, "loss": 1.5916, "step": 170720 }, { "epoch": 0.3369468500114041, "grad_norm": 0.11089370399713516, "learning_rate": 6.657433714935588e-05, "loss": 1.5811, "step": 170752 }, { "epoch": 0.3370099959728005, "grad_norm": 0.11617998778820038, "learning_rate": 6.656799692499118e-05, "loss": 1.6027, "step": 170784 }, { "epoch": 0.33707314193419696, "grad_norm": 0.11355898529291153, "learning_rate": 6.65616567006265e-05, "loss": 1.5896, "step": 170816 }, { "epoch": 0.3371362878955934, "grad_norm": 0.11810681223869324, "learning_rate": 6.655531647626181e-05, "loss": 1.5916, "step": 170848 }, { "epoch": 0.33719943385698986, "grad_norm": 0.11200131475925446, "learning_rate": 6.654897625189713e-05, "loss": 1.5928, "step": 170880 }, { "epoch": 0.3372625798183863, "grad_norm": 0.10641849040985107, "learning_rate": 6.654263602753243e-05, "loss": 1.5873, "step": 170912 }, { "epoch": 0.33732572577978276, "grad_norm": 0.11036920547485352, "learning_rate": 6.653629580316774e-05, "loss": 1.5941, "step": 170944 }, { "epoch": 0.33738887174117915, "grad_norm": 0.10746380686759949, "learning_rate": 6.652995557880304e-05, "loss": 1.5897, "step": 170976 }, { "epoch": 0.3374520177025756, "grad_norm": 0.11281225830316544, "learning_rate": 6.652361535443836e-05, "loss": 1.5905, "step": 171008 }, { "epoch": 0.33751516366397205, "grad_norm": 0.10482367128133774, "learning_rate": 6.651727513007366e-05, "loss": 1.5867, "step": 171040 }, { "epoch": 0.3375783096253685, "grad_norm": 0.1079397052526474, "learning_rate": 6.651093490570897e-05, "loss": 1.5895, "step": 171072 }, { "epoch": 0.33764145558676495, "grad_norm": 0.11590129882097244, "learning_rate": 6.650459468134429e-05, "loss": 1.6014, "step": 171104 }, { "epoch": 0.3377046015481614, "grad_norm": 0.1105310395359993, "learning_rate": 6.64982544569796e-05, "loss": 1.5813, "step": 171136 }, { "epoch": 0.3377677475095578, "grad_norm": 0.11263422667980194, "learning_rate": 6.64919142326149e-05, "loss": 1.5888, "step": 171168 }, { "epoch": 0.33783089347095424, "grad_norm": 0.10923925787210464, "learning_rate": 6.648557400825022e-05, "loss": 1.5984, "step": 171200 }, { "epoch": 0.3378940394323507, "grad_norm": 0.11082915961742401, "learning_rate": 6.647923378388553e-05, "loss": 1.5982, "step": 171232 }, { "epoch": 0.33795718539374714, "grad_norm": 0.11139413714408875, "learning_rate": 6.647289355952085e-05, "loss": 1.5896, "step": 171264 }, { "epoch": 0.3380203313551436, "grad_norm": 0.1109723448753357, "learning_rate": 6.646655333515616e-05, "loss": 1.5747, "step": 171296 }, { "epoch": 0.33808347731654004, "grad_norm": 0.1085285171866417, "learning_rate": 6.646021311079146e-05, "loss": 1.5878, "step": 171328 }, { "epoch": 0.33814662327793643, "grad_norm": 0.11112835258245468, "learning_rate": 6.645387288642678e-05, "loss": 1.5816, "step": 171360 }, { "epoch": 0.3382097692393329, "grad_norm": 0.10931267589330673, "learning_rate": 6.644753266206208e-05, "loss": 1.5832, "step": 171392 }, { "epoch": 0.33827291520072933, "grad_norm": 0.1158394068479538, "learning_rate": 6.644119243769739e-05, "loss": 1.5812, "step": 171424 }, { "epoch": 0.3383360611621258, "grad_norm": 0.10848306119441986, "learning_rate": 6.64348522133327e-05, "loss": 1.5878, "step": 171456 }, { "epoch": 0.3383992071235222, "grad_norm": 0.1074216291308403, "learning_rate": 6.642851198896801e-05, "loss": 1.5807, "step": 171488 }, { "epoch": 0.3384623530849187, "grad_norm": 0.11576060950756073, "learning_rate": 6.642217176460332e-05, "loss": 1.5916, "step": 171520 }, { "epoch": 0.33852549904631507, "grad_norm": 0.11410527676343918, "learning_rate": 6.641583154023864e-05, "loss": 1.5916, "step": 171552 }, { "epoch": 0.3385886450077115, "grad_norm": 0.10908595472574234, "learning_rate": 6.640949131587394e-05, "loss": 1.588, "step": 171584 }, { "epoch": 0.33865179096910797, "grad_norm": 0.11122795194387436, "learning_rate": 6.640315109150925e-05, "loss": 1.6002, "step": 171616 }, { "epoch": 0.3387149369305044, "grad_norm": 0.10948388278484344, "learning_rate": 6.639681086714457e-05, "loss": 1.5835, "step": 171648 }, { "epoch": 0.33877808289190087, "grad_norm": 0.10557802766561508, "learning_rate": 6.639047064277988e-05, "loss": 1.5692, "step": 171680 }, { "epoch": 0.3388412288532973, "grad_norm": 0.10576324909925461, "learning_rate": 6.638413041841518e-05, "loss": 1.5869, "step": 171712 }, { "epoch": 0.3389043748146937, "grad_norm": 0.11505261808633804, "learning_rate": 6.63777901940505e-05, "loss": 1.5859, "step": 171744 }, { "epoch": 0.33896752077609016, "grad_norm": 0.11075065284967422, "learning_rate": 6.637144996968581e-05, "loss": 1.5678, "step": 171776 }, { "epoch": 0.3390306667374866, "grad_norm": 0.11918734759092331, "learning_rate": 6.636510974532111e-05, "loss": 1.5872, "step": 171808 }, { "epoch": 0.33909381269888306, "grad_norm": 0.10720392316579819, "learning_rate": 6.635876952095643e-05, "loss": 1.5761, "step": 171840 }, { "epoch": 0.3391569586602795, "grad_norm": 0.11589060723781586, "learning_rate": 6.635242929659173e-05, "loss": 1.573, "step": 171872 }, { "epoch": 0.33922010462167596, "grad_norm": 0.1120370551943779, "learning_rate": 6.634608907222704e-05, "loss": 1.6045, "step": 171904 }, { "epoch": 0.3392832505830724, "grad_norm": 0.11259763687849045, "learning_rate": 6.633974884786236e-05, "loss": 1.5857, "step": 171936 }, { "epoch": 0.3393463965444688, "grad_norm": 0.11293957382440567, "learning_rate": 6.633340862349767e-05, "loss": 1.581, "step": 171968 }, { "epoch": 0.33940954250586525, "grad_norm": 0.10410208255052567, "learning_rate": 6.632706839913297e-05, "loss": 1.6065, "step": 172000 }, { "epoch": 0.3394726884672617, "grad_norm": 0.11103110015392303, "learning_rate": 6.632072817476829e-05, "loss": 1.5928, "step": 172032 }, { "epoch": 0.33953583442865815, "grad_norm": 0.11333668231964111, "learning_rate": 6.63143879504036e-05, "loss": 1.5822, "step": 172064 }, { "epoch": 0.3395989803900546, "grad_norm": 0.11592777818441391, "learning_rate": 6.630804772603892e-05, "loss": 1.5983, "step": 172096 }, { "epoch": 0.33966212635145104, "grad_norm": 0.10337577760219574, "learning_rate": 6.630170750167422e-05, "loss": 1.5903, "step": 172128 }, { "epoch": 0.33972527231284744, "grad_norm": 0.10988391935825348, "learning_rate": 6.629536727730953e-05, "loss": 1.5935, "step": 172160 }, { "epoch": 0.3397884182742439, "grad_norm": 0.10925642400979996, "learning_rate": 6.628902705294485e-05, "loss": 1.5778, "step": 172192 }, { "epoch": 0.33985156423564034, "grad_norm": 0.11613163352012634, "learning_rate": 6.628268682858015e-05, "loss": 1.5786, "step": 172224 }, { "epoch": 0.3399147101970368, "grad_norm": 0.12016303837299347, "learning_rate": 6.627634660421545e-05, "loss": 1.5803, "step": 172256 }, { "epoch": 0.33997785615843323, "grad_norm": 0.11270086467266083, "learning_rate": 6.627000637985076e-05, "loss": 1.5938, "step": 172288 }, { "epoch": 0.3400410021198297, "grad_norm": 0.10815758258104324, "learning_rate": 6.626366615548608e-05, "loss": 1.5804, "step": 172320 }, { "epoch": 0.3401041480812261, "grad_norm": 0.11350827664136887, "learning_rate": 6.625732593112139e-05, "loss": 1.5949, "step": 172352 }, { "epoch": 0.3401672940426225, "grad_norm": 0.11088835448026657, "learning_rate": 6.62509857067567e-05, "loss": 1.5931, "step": 172384 }, { "epoch": 0.340230440004019, "grad_norm": 0.12247086316347122, "learning_rate": 6.624464548239201e-05, "loss": 1.5896, "step": 172416 }, { "epoch": 0.3402935859654154, "grad_norm": 0.1066143736243248, "learning_rate": 6.623830525802732e-05, "loss": 1.5795, "step": 172448 }, { "epoch": 0.3403567319268119, "grad_norm": 0.11791466176509857, "learning_rate": 6.623196503366264e-05, "loss": 1.5887, "step": 172480 }, { "epoch": 0.3404198778882083, "grad_norm": 0.11066970229148865, "learning_rate": 6.622562480929794e-05, "loss": 1.5897, "step": 172512 }, { "epoch": 0.3404830238496047, "grad_norm": 0.1076154038310051, "learning_rate": 6.621928458493325e-05, "loss": 1.5906, "step": 172544 }, { "epoch": 0.34054616981100116, "grad_norm": 0.11044386774301529, "learning_rate": 6.621294436056857e-05, "loss": 1.5741, "step": 172576 }, { "epoch": 0.3406093157723976, "grad_norm": 0.11065259575843811, "learning_rate": 6.620660413620388e-05, "loss": 1.5854, "step": 172608 }, { "epoch": 0.34067246173379406, "grad_norm": 0.12373436242341995, "learning_rate": 6.620026391183918e-05, "loss": 1.5941, "step": 172640 }, { "epoch": 0.3407356076951905, "grad_norm": 0.115959532558918, "learning_rate": 6.619392368747448e-05, "loss": 1.5816, "step": 172672 }, { "epoch": 0.34079875365658696, "grad_norm": 0.10721660405397415, "learning_rate": 6.61875834631098e-05, "loss": 1.586, "step": 172704 }, { "epoch": 0.34086189961798335, "grad_norm": 0.10889130085706711, "learning_rate": 6.618124323874511e-05, "loss": 1.5881, "step": 172736 }, { "epoch": 0.3409250455793798, "grad_norm": 0.10835132747888565, "learning_rate": 6.617490301438043e-05, "loss": 1.5991, "step": 172768 }, { "epoch": 0.34098819154077625, "grad_norm": 0.11941508948802948, "learning_rate": 6.616856279001573e-05, "loss": 1.575, "step": 172800 }, { "epoch": 0.3410513375021727, "grad_norm": 0.10933182388544083, "learning_rate": 6.616222256565104e-05, "loss": 1.5795, "step": 172832 }, { "epoch": 0.34111448346356915, "grad_norm": 0.1083269789814949, "learning_rate": 6.615588234128636e-05, "loss": 1.5838, "step": 172864 }, { "epoch": 0.3411776294249656, "grad_norm": 0.10989560186862946, "learning_rate": 6.614954211692167e-05, "loss": 1.5876, "step": 172896 }, { "epoch": 0.341240775386362, "grad_norm": 0.11485954374074936, "learning_rate": 6.614320189255697e-05, "loss": 1.6061, "step": 172928 }, { "epoch": 0.34130392134775844, "grad_norm": 0.10763099044561386, "learning_rate": 6.613686166819229e-05, "loss": 1.5744, "step": 172960 }, { "epoch": 0.3413670673091549, "grad_norm": 0.10989924520254135, "learning_rate": 6.61305214438276e-05, "loss": 1.584, "step": 172992 }, { "epoch": 0.34143021327055134, "grad_norm": 0.1087680235505104, "learning_rate": 6.612418121946292e-05, "loss": 1.5735, "step": 173024 }, { "epoch": 0.3414933592319478, "grad_norm": 0.10286132991313934, "learning_rate": 6.611784099509822e-05, "loss": 1.5747, "step": 173056 }, { "epoch": 0.34155650519334424, "grad_norm": 0.1099427342414856, "learning_rate": 6.611150077073352e-05, "loss": 1.5892, "step": 173088 }, { "epoch": 0.34161965115474063, "grad_norm": 0.1077076867222786, "learning_rate": 6.610516054636883e-05, "loss": 1.5825, "step": 173120 }, { "epoch": 0.3416827971161371, "grad_norm": 0.11097551882266998, "learning_rate": 6.609882032200415e-05, "loss": 1.5871, "step": 173152 }, { "epoch": 0.34174594307753353, "grad_norm": 0.10670367628335953, "learning_rate": 6.609248009763945e-05, "loss": 1.5827, "step": 173184 }, { "epoch": 0.34180908903893, "grad_norm": 0.10981249064207077, "learning_rate": 6.608613987327476e-05, "loss": 1.5912, "step": 173216 }, { "epoch": 0.34187223500032643, "grad_norm": 0.10873278230428696, "learning_rate": 6.607979964891008e-05, "loss": 1.5774, "step": 173248 }, { "epoch": 0.3419353809617229, "grad_norm": 0.11112535744905472, "learning_rate": 6.607345942454539e-05, "loss": 1.5902, "step": 173280 }, { "epoch": 0.3419985269231193, "grad_norm": 0.12162293493747711, "learning_rate": 6.606711920018071e-05, "loss": 1.5909, "step": 173312 }, { "epoch": 0.3420616728845157, "grad_norm": 0.11760684847831726, "learning_rate": 6.606077897581601e-05, "loss": 1.587, "step": 173344 }, { "epoch": 0.34212481884591217, "grad_norm": 0.10883896797895432, "learning_rate": 6.605443875145132e-05, "loss": 1.6, "step": 173376 }, { "epoch": 0.3421879648073086, "grad_norm": 0.1142784133553505, "learning_rate": 6.604809852708664e-05, "loss": 1.5971, "step": 173408 }, { "epoch": 0.34225111076870507, "grad_norm": 0.10538744181394577, "learning_rate": 6.604175830272194e-05, "loss": 1.5685, "step": 173440 }, { "epoch": 0.3423142567301015, "grad_norm": 0.11750444024801254, "learning_rate": 6.603541807835725e-05, "loss": 1.5901, "step": 173472 }, { "epoch": 0.3423774026914979, "grad_norm": 0.11520704627037048, "learning_rate": 6.602907785399255e-05, "loss": 1.5924, "step": 173504 }, { "epoch": 0.34244054865289436, "grad_norm": 0.11306174844503403, "learning_rate": 6.602273762962787e-05, "loss": 1.5865, "step": 173536 }, { "epoch": 0.3425036946142908, "grad_norm": 0.11422741413116455, "learning_rate": 6.601639740526318e-05, "loss": 1.5979, "step": 173568 }, { "epoch": 0.34256684057568726, "grad_norm": 0.10371016710996628, "learning_rate": 6.601005718089848e-05, "loss": 1.5848, "step": 173600 }, { "epoch": 0.3426299865370837, "grad_norm": 0.10575991123914719, "learning_rate": 6.60037169565338e-05, "loss": 1.5968, "step": 173632 }, { "epoch": 0.34269313249848016, "grad_norm": 0.10149675607681274, "learning_rate": 6.599737673216911e-05, "loss": 1.5909, "step": 173664 }, { "epoch": 0.3427562784598766, "grad_norm": 0.10535045713186264, "learning_rate": 6.599103650780443e-05, "loss": 1.5913, "step": 173696 }, { "epoch": 0.342819424421273, "grad_norm": 0.10517001152038574, "learning_rate": 6.598469628343973e-05, "loss": 1.589, "step": 173728 }, { "epoch": 0.34288257038266945, "grad_norm": 0.13139063119888306, "learning_rate": 6.597835605907504e-05, "loss": 1.5818, "step": 173760 }, { "epoch": 0.3429457163440659, "grad_norm": 0.1072242334485054, "learning_rate": 6.597201583471036e-05, "loss": 1.5891, "step": 173792 }, { "epoch": 0.34300886230546235, "grad_norm": 0.10862624645233154, "learning_rate": 6.596567561034567e-05, "loss": 1.5793, "step": 173824 }, { "epoch": 0.3430720082668588, "grad_norm": 0.11293728649616241, "learning_rate": 6.595933538598097e-05, "loss": 1.5897, "step": 173856 }, { "epoch": 0.34313515422825525, "grad_norm": 0.10044048726558685, "learning_rate": 6.595299516161629e-05, "loss": 1.6005, "step": 173888 }, { "epoch": 0.34319830018965164, "grad_norm": 0.11105641722679138, "learning_rate": 6.594665493725159e-05, "loss": 1.5954, "step": 173920 }, { "epoch": 0.3432614461510481, "grad_norm": 0.10460119694471359, "learning_rate": 6.59403147128869e-05, "loss": 1.5827, "step": 173952 }, { "epoch": 0.34332459211244454, "grad_norm": 0.11204526573419571, "learning_rate": 6.593397448852222e-05, "loss": 1.5883, "step": 173984 }, { "epoch": 0.343387738073841, "grad_norm": 0.11562290787696838, "learning_rate": 6.592763426415752e-05, "loss": 1.5798, "step": 174016 }, { "epoch": 0.34345088403523744, "grad_norm": 0.1139347031712532, "learning_rate": 6.592129403979283e-05, "loss": 1.5852, "step": 174048 }, { "epoch": 0.3435140299966339, "grad_norm": 0.11531013995409012, "learning_rate": 6.591495381542815e-05, "loss": 1.5861, "step": 174080 }, { "epoch": 0.3435771759580303, "grad_norm": 0.09973633289337158, "learning_rate": 6.590861359106346e-05, "loss": 1.5897, "step": 174112 }, { "epoch": 0.34364032191942673, "grad_norm": 0.11261215060949326, "learning_rate": 6.590227336669876e-05, "loss": 1.5909, "step": 174144 }, { "epoch": 0.3437034678808232, "grad_norm": 0.11859069764614105, "learning_rate": 6.589593314233408e-05, "loss": 1.5881, "step": 174176 }, { "epoch": 0.3437666138422196, "grad_norm": 0.10515476018190384, "learning_rate": 6.58895929179694e-05, "loss": 1.5974, "step": 174208 }, { "epoch": 0.3438297598036161, "grad_norm": 0.1277274787425995, "learning_rate": 6.588325269360471e-05, "loss": 1.5826, "step": 174240 }, { "epoch": 0.3438929057650125, "grad_norm": 0.11068448424339294, "learning_rate": 6.587691246924001e-05, "loss": 1.5834, "step": 174272 }, { "epoch": 0.3439560517264089, "grad_norm": 0.11146080493927002, "learning_rate": 6.587057224487532e-05, "loss": 1.5855, "step": 174304 }, { "epoch": 0.34401919768780537, "grad_norm": 0.10851216316223145, "learning_rate": 6.586423202051062e-05, "loss": 1.5915, "step": 174336 }, { "epoch": 0.3440823436492018, "grad_norm": 0.10507667809724808, "learning_rate": 6.585789179614594e-05, "loss": 1.5883, "step": 174368 }, { "epoch": 0.34414548961059827, "grad_norm": 0.11206206679344177, "learning_rate": 6.585155157178124e-05, "loss": 1.5825, "step": 174400 }, { "epoch": 0.3442086355719947, "grad_norm": 0.11833879351615906, "learning_rate": 6.584521134741655e-05, "loss": 1.5917, "step": 174432 }, { "epoch": 0.34427178153339116, "grad_norm": 0.10395336151123047, "learning_rate": 6.583887112305187e-05, "loss": 1.5918, "step": 174464 }, { "epoch": 0.34433492749478756, "grad_norm": 0.11111441254615784, "learning_rate": 6.583253089868718e-05, "loss": 1.5915, "step": 174496 }, { "epoch": 0.344398073456184, "grad_norm": 0.10616868734359741, "learning_rate": 6.582619067432248e-05, "loss": 1.5944, "step": 174528 }, { "epoch": 0.34446121941758046, "grad_norm": 0.12092773616313934, "learning_rate": 6.58198504499578e-05, "loss": 1.5807, "step": 174560 }, { "epoch": 0.3445243653789769, "grad_norm": 0.11427041888237, "learning_rate": 6.581351022559311e-05, "loss": 1.5916, "step": 174592 }, { "epoch": 0.34458751134037335, "grad_norm": 0.11512736976146698, "learning_rate": 6.580717000122843e-05, "loss": 1.5743, "step": 174624 }, { "epoch": 0.3446506573017698, "grad_norm": 0.10949216037988663, "learning_rate": 6.580082977686374e-05, "loss": 1.5849, "step": 174656 }, { "epoch": 0.3447138032631662, "grad_norm": 0.11290450394153595, "learning_rate": 6.579448955249904e-05, "loss": 1.5951, "step": 174688 }, { "epoch": 0.34477694922456265, "grad_norm": 0.1092032715678215, "learning_rate": 6.578814932813436e-05, "loss": 1.5981, "step": 174720 }, { "epoch": 0.3448400951859591, "grad_norm": 0.10919303447008133, "learning_rate": 6.578180910376966e-05, "loss": 1.5905, "step": 174752 }, { "epoch": 0.34490324114735554, "grad_norm": 0.10852492600679398, "learning_rate": 6.577546887940497e-05, "loss": 1.5897, "step": 174784 }, { "epoch": 0.344966387108752, "grad_norm": 0.10703276097774506, "learning_rate": 6.576912865504028e-05, "loss": 1.5889, "step": 174816 }, { "epoch": 0.34502953307014844, "grad_norm": 0.10765741020441055, "learning_rate": 6.576278843067559e-05, "loss": 1.5733, "step": 174848 }, { "epoch": 0.34509267903154484, "grad_norm": 0.1112956702709198, "learning_rate": 6.57564482063109e-05, "loss": 1.5889, "step": 174880 }, { "epoch": 0.3451558249929413, "grad_norm": 0.11600566655397415, "learning_rate": 6.575010798194622e-05, "loss": 1.5851, "step": 174912 }, { "epoch": 0.34521897095433773, "grad_norm": 0.10642077028751373, "learning_rate": 6.574376775758152e-05, "loss": 1.5886, "step": 174944 }, { "epoch": 0.3452821169157342, "grad_norm": 0.1070687472820282, "learning_rate": 6.573742753321683e-05, "loss": 1.5836, "step": 174976 }, { "epoch": 0.34534526287713063, "grad_norm": 0.10959191620349884, "learning_rate": 6.573108730885215e-05, "loss": 1.5849, "step": 175008 }, { "epoch": 0.3454084088385271, "grad_norm": 0.10763245075941086, "learning_rate": 6.572474708448746e-05, "loss": 1.5852, "step": 175040 }, { "epoch": 0.3454715547999235, "grad_norm": 0.10724011063575745, "learning_rate": 6.571840686012276e-05, "loss": 1.5882, "step": 175072 }, { "epoch": 0.3455347007613199, "grad_norm": 0.1093960627913475, "learning_rate": 6.571206663575808e-05, "loss": 1.5837, "step": 175104 }, { "epoch": 0.3455978467227164, "grad_norm": 0.11032553762197495, "learning_rate": 6.570572641139338e-05, "loss": 1.5919, "step": 175136 }, { "epoch": 0.3456609926841128, "grad_norm": 0.11518137156963348, "learning_rate": 6.56993861870287e-05, "loss": 1.6009, "step": 175168 }, { "epoch": 0.34572413864550927, "grad_norm": 0.10641338676214218, "learning_rate": 6.5693045962664e-05, "loss": 1.5828, "step": 175200 }, { "epoch": 0.3457872846069057, "grad_norm": 0.10659778118133545, "learning_rate": 6.568670573829931e-05, "loss": 1.5957, "step": 175232 }, { "epoch": 0.3458504305683021, "grad_norm": 0.11084144562482834, "learning_rate": 6.568036551393462e-05, "loss": 1.5882, "step": 175264 }, { "epoch": 0.34591357652969856, "grad_norm": 0.10810660570859909, "learning_rate": 6.567402528956994e-05, "loss": 1.5896, "step": 175296 }, { "epoch": 0.345976722491095, "grad_norm": 0.10464351624250412, "learning_rate": 6.566768506520525e-05, "loss": 1.5886, "step": 175328 }, { "epoch": 0.34603986845249146, "grad_norm": 0.12545694410800934, "learning_rate": 6.566134484084056e-05, "loss": 1.5837, "step": 175360 }, { "epoch": 0.3461030144138879, "grad_norm": 0.10950300842523575, "learning_rate": 6.565500461647587e-05, "loss": 1.581, "step": 175392 }, { "epoch": 0.34616616037528436, "grad_norm": 0.11077903211116791, "learning_rate": 6.564866439211118e-05, "loss": 1.5854, "step": 175424 }, { "epoch": 0.3462293063366808, "grad_norm": 0.128065824508667, "learning_rate": 6.56423241677465e-05, "loss": 1.5919, "step": 175456 }, { "epoch": 0.3462924522980772, "grad_norm": 0.11247207969427109, "learning_rate": 6.56359839433818e-05, "loss": 1.5747, "step": 175488 }, { "epoch": 0.34635559825947365, "grad_norm": 0.10944617539644241, "learning_rate": 6.562964371901711e-05, "loss": 1.5823, "step": 175520 }, { "epoch": 0.3464187442208701, "grad_norm": 0.10773425549268723, "learning_rate": 6.562330349465242e-05, "loss": 1.587, "step": 175552 }, { "epoch": 0.34648189018226655, "grad_norm": 0.11003351956605911, "learning_rate": 6.561696327028773e-05, "loss": 1.5934, "step": 175584 }, { "epoch": 0.346545036143663, "grad_norm": 0.12288950383663177, "learning_rate": 6.561062304592303e-05, "loss": 1.5897, "step": 175616 }, { "epoch": 0.34660818210505945, "grad_norm": 0.11313644051551819, "learning_rate": 6.560428282155835e-05, "loss": 1.5944, "step": 175648 }, { "epoch": 0.34667132806645584, "grad_norm": 0.10772547125816345, "learning_rate": 6.559794259719366e-05, "loss": 1.5955, "step": 175680 }, { "epoch": 0.3467344740278523, "grad_norm": 0.11855486780405045, "learning_rate": 6.559160237282897e-05, "loss": 1.5885, "step": 175712 }, { "epoch": 0.34679761998924874, "grad_norm": 0.11729282885789871, "learning_rate": 6.558526214846428e-05, "loss": 1.5808, "step": 175744 }, { "epoch": 0.3468607659506452, "grad_norm": 0.1073223426938057, "learning_rate": 6.557892192409959e-05, "loss": 1.5937, "step": 175776 }, { "epoch": 0.34692391191204164, "grad_norm": 0.10576095432043076, "learning_rate": 6.55725816997349e-05, "loss": 1.6005, "step": 175808 }, { "epoch": 0.3469870578734381, "grad_norm": 0.1085980162024498, "learning_rate": 6.556624147537022e-05, "loss": 1.59, "step": 175840 }, { "epoch": 0.3470502038348345, "grad_norm": 0.12293184548616409, "learning_rate": 6.555990125100552e-05, "loss": 1.588, "step": 175872 }, { "epoch": 0.34711334979623093, "grad_norm": 0.11466878652572632, "learning_rate": 6.555356102664083e-05, "loss": 1.5894, "step": 175904 }, { "epoch": 0.3471764957576274, "grad_norm": 0.10859908908605576, "learning_rate": 6.554722080227615e-05, "loss": 1.5913, "step": 175936 }, { "epoch": 0.34723964171902383, "grad_norm": 0.11201232671737671, "learning_rate": 6.554088057791145e-05, "loss": 1.581, "step": 175968 }, { "epoch": 0.3473027876804203, "grad_norm": 0.10304196923971176, "learning_rate": 6.553454035354676e-05, "loss": 1.5722, "step": 176000 }, { "epoch": 0.3473659336418167, "grad_norm": 0.10468000173568726, "learning_rate": 6.552820012918207e-05, "loss": 1.598, "step": 176032 }, { "epoch": 0.3474290796032131, "grad_norm": 0.10960904508829117, "learning_rate": 6.552185990481738e-05, "loss": 1.6054, "step": 176064 }, { "epoch": 0.34749222556460957, "grad_norm": 0.11137548834085464, "learning_rate": 6.55155196804527e-05, "loss": 1.5967, "step": 176096 }, { "epoch": 0.347555371526006, "grad_norm": 0.10918192565441132, "learning_rate": 6.550917945608801e-05, "loss": 1.5815, "step": 176128 }, { "epoch": 0.34761851748740247, "grad_norm": 0.11181139945983887, "learning_rate": 6.550283923172331e-05, "loss": 1.5851, "step": 176160 }, { "epoch": 0.3476816634487989, "grad_norm": 0.10973136126995087, "learning_rate": 6.549649900735863e-05, "loss": 1.5897, "step": 176192 }, { "epoch": 0.34774480941019537, "grad_norm": 0.11366064846515656, "learning_rate": 6.549015878299394e-05, "loss": 1.584, "step": 176224 }, { "epoch": 0.34780795537159176, "grad_norm": 0.1052897498011589, "learning_rate": 6.548381855862925e-05, "loss": 1.5741, "step": 176256 }, { "epoch": 0.3478711013329882, "grad_norm": 0.1174076721072197, "learning_rate": 6.547747833426456e-05, "loss": 1.5786, "step": 176288 }, { "epoch": 0.34793424729438466, "grad_norm": 0.11128125339746475, "learning_rate": 6.547113810989987e-05, "loss": 1.5902, "step": 176320 }, { "epoch": 0.3479973932557811, "grad_norm": 0.11117946356534958, "learning_rate": 6.546479788553518e-05, "loss": 1.5909, "step": 176352 }, { "epoch": 0.34806053921717756, "grad_norm": 0.1188756674528122, "learning_rate": 6.545845766117049e-05, "loss": 1.5779, "step": 176384 }, { "epoch": 0.348123685178574, "grad_norm": 0.11214076727628708, "learning_rate": 6.545211743680579e-05, "loss": 1.589, "step": 176416 }, { "epoch": 0.3481868311399704, "grad_norm": 0.11385166645050049, "learning_rate": 6.54457772124411e-05, "loss": 1.5874, "step": 176448 }, { "epoch": 0.34824997710136685, "grad_norm": 0.1099756583571434, "learning_rate": 6.543943698807642e-05, "loss": 1.5779, "step": 176480 }, { "epoch": 0.3483131230627633, "grad_norm": 0.11098700761795044, "learning_rate": 6.543309676371173e-05, "loss": 1.5763, "step": 176512 }, { "epoch": 0.34837626902415975, "grad_norm": 0.10740330815315247, "learning_rate": 6.542675653934703e-05, "loss": 1.587, "step": 176544 }, { "epoch": 0.3484394149855562, "grad_norm": 0.11414963752031326, "learning_rate": 6.542041631498235e-05, "loss": 1.5904, "step": 176576 }, { "epoch": 0.34850256094695264, "grad_norm": 0.11507175862789154, "learning_rate": 6.541407609061766e-05, "loss": 1.593, "step": 176608 }, { "epoch": 0.34856570690834904, "grad_norm": 0.10829048603773117, "learning_rate": 6.540773586625297e-05, "loss": 1.5822, "step": 176640 }, { "epoch": 0.3486288528697455, "grad_norm": 0.11237678676843643, "learning_rate": 6.540139564188829e-05, "loss": 1.5847, "step": 176672 }, { "epoch": 0.34869199883114194, "grad_norm": 0.10770285129547119, "learning_rate": 6.539505541752359e-05, "loss": 1.5895, "step": 176704 }, { "epoch": 0.3487551447925384, "grad_norm": 0.11052990704774857, "learning_rate": 6.53887151931589e-05, "loss": 1.5883, "step": 176736 }, { "epoch": 0.34881829075393483, "grad_norm": 0.10141550749540329, "learning_rate": 6.538237496879422e-05, "loss": 1.5772, "step": 176768 }, { "epoch": 0.3488814367153313, "grad_norm": 0.10469438135623932, "learning_rate": 6.537603474442952e-05, "loss": 1.5846, "step": 176800 }, { "epoch": 0.3489445826767277, "grad_norm": 0.11447334289550781, "learning_rate": 6.536969452006482e-05, "loss": 1.5869, "step": 176832 }, { "epoch": 0.3490077286381241, "grad_norm": 0.10925020277500153, "learning_rate": 6.536335429570014e-05, "loss": 1.6061, "step": 176864 }, { "epoch": 0.3490708745995206, "grad_norm": 0.10936908423900604, "learning_rate": 6.535701407133545e-05, "loss": 1.5985, "step": 176896 }, { "epoch": 0.349134020560917, "grad_norm": 0.12269753962755203, "learning_rate": 6.535067384697077e-05, "loss": 1.5808, "step": 176928 }, { "epoch": 0.3491971665223135, "grad_norm": 0.1172834113240242, "learning_rate": 6.534433362260607e-05, "loss": 1.5936, "step": 176960 }, { "epoch": 0.3492603124837099, "grad_norm": 0.1211320161819458, "learning_rate": 6.533799339824138e-05, "loss": 1.5749, "step": 176992 }, { "epoch": 0.34932345844510637, "grad_norm": 0.11673060059547424, "learning_rate": 6.53316531738767e-05, "loss": 1.5905, "step": 177024 }, { "epoch": 0.34938660440650277, "grad_norm": 0.10510196536779404, "learning_rate": 6.532531294951201e-05, "loss": 1.5782, "step": 177056 }, { "epoch": 0.3494497503678992, "grad_norm": 0.11799222975969315, "learning_rate": 6.531897272514731e-05, "loss": 1.5822, "step": 177088 }, { "epoch": 0.34951289632929566, "grad_norm": 0.102109894156456, "learning_rate": 6.531263250078263e-05, "loss": 1.5913, "step": 177120 }, { "epoch": 0.3495760422906921, "grad_norm": 0.11021828651428223, "learning_rate": 6.530629227641794e-05, "loss": 1.5971, "step": 177152 }, { "epoch": 0.34963918825208856, "grad_norm": 0.10070045292377472, "learning_rate": 6.529995205205325e-05, "loss": 1.5885, "step": 177184 }, { "epoch": 0.349702334213485, "grad_norm": 0.10902727395296097, "learning_rate": 6.529361182768856e-05, "loss": 1.5893, "step": 177216 }, { "epoch": 0.3497654801748814, "grad_norm": 0.10452146828174591, "learning_rate": 6.528727160332386e-05, "loss": 1.5858, "step": 177248 }, { "epoch": 0.34982862613627785, "grad_norm": 0.11428716033697128, "learning_rate": 6.528093137895917e-05, "loss": 1.5676, "step": 177280 }, { "epoch": 0.3498917720976743, "grad_norm": 0.10437949746847153, "learning_rate": 6.527459115459449e-05, "loss": 1.5789, "step": 177312 }, { "epoch": 0.34995491805907075, "grad_norm": 0.1125776618719101, "learning_rate": 6.52682509302298e-05, "loss": 1.5818, "step": 177344 }, { "epoch": 0.3500180640204672, "grad_norm": 0.10946209728717804, "learning_rate": 6.52619107058651e-05, "loss": 1.5825, "step": 177376 }, { "epoch": 0.35008120998186365, "grad_norm": 0.10685037821531296, "learning_rate": 6.525557048150042e-05, "loss": 1.5807, "step": 177408 }, { "epoch": 0.35014435594326004, "grad_norm": 0.11477645486593246, "learning_rate": 6.524923025713573e-05, "loss": 1.5913, "step": 177440 }, { "epoch": 0.3502075019046565, "grad_norm": 0.1101703867316246, "learning_rate": 6.524289003277105e-05, "loss": 1.5832, "step": 177472 }, { "epoch": 0.35027064786605294, "grad_norm": 0.12006646394729614, "learning_rate": 6.523654980840635e-05, "loss": 1.5885, "step": 177504 }, { "epoch": 0.3503337938274494, "grad_norm": 0.12149064242839813, "learning_rate": 6.523020958404166e-05, "loss": 1.5878, "step": 177536 }, { "epoch": 0.35039693978884584, "grad_norm": 0.10605932772159576, "learning_rate": 6.522386935967698e-05, "loss": 1.5874, "step": 177568 }, { "epoch": 0.3504600857502423, "grad_norm": 0.1071997582912445, "learning_rate": 6.521752913531229e-05, "loss": 1.5831, "step": 177600 }, { "epoch": 0.3505232317116387, "grad_norm": 0.10740258544683456, "learning_rate": 6.521118891094759e-05, "loss": 1.5896, "step": 177632 }, { "epoch": 0.35058637767303513, "grad_norm": 0.11575182527303696, "learning_rate": 6.520484868658289e-05, "loss": 1.5911, "step": 177664 }, { "epoch": 0.3506495236344316, "grad_norm": 0.11003038287162781, "learning_rate": 6.51985084622182e-05, "loss": 1.5907, "step": 177696 }, { "epoch": 0.35071266959582803, "grad_norm": 0.12044735252857208, "learning_rate": 6.519216823785352e-05, "loss": 1.5838, "step": 177728 }, { "epoch": 0.3507758155572245, "grad_norm": 0.109721340239048, "learning_rate": 6.518582801348882e-05, "loss": 1.5891, "step": 177760 }, { "epoch": 0.35083896151862093, "grad_norm": 0.11115572601556778, "learning_rate": 6.517948778912414e-05, "loss": 1.583, "step": 177792 }, { "epoch": 0.3509021074800173, "grad_norm": 0.11124666780233383, "learning_rate": 6.517314756475945e-05, "loss": 1.5853, "step": 177824 }, { "epoch": 0.35096525344141377, "grad_norm": 0.11155048757791519, "learning_rate": 6.516680734039477e-05, "loss": 1.5947, "step": 177856 }, { "epoch": 0.3510283994028102, "grad_norm": 0.11156784743070602, "learning_rate": 6.516046711603007e-05, "loss": 1.5877, "step": 177888 }, { "epoch": 0.35109154536420667, "grad_norm": 0.10897670686244965, "learning_rate": 6.515412689166538e-05, "loss": 1.5977, "step": 177920 }, { "epoch": 0.3511546913256031, "grad_norm": 0.1054021343588829, "learning_rate": 6.51477866673007e-05, "loss": 1.5833, "step": 177952 }, { "epoch": 0.35121783728699957, "grad_norm": 0.11119698733091354, "learning_rate": 6.514144644293601e-05, "loss": 1.5949, "step": 177984 }, { "epoch": 0.35128098324839596, "grad_norm": 0.10701705515384674, "learning_rate": 6.513510621857131e-05, "loss": 1.5924, "step": 178016 }, { "epoch": 0.3513441292097924, "grad_norm": 0.10838789492845535, "learning_rate": 6.512876599420663e-05, "loss": 1.5879, "step": 178048 }, { "epoch": 0.35140727517118886, "grad_norm": 0.10813632607460022, "learning_rate": 6.512242576984193e-05, "loss": 1.5943, "step": 178080 }, { "epoch": 0.3514704211325853, "grad_norm": 0.10789559781551361, "learning_rate": 6.511608554547724e-05, "loss": 1.5738, "step": 178112 }, { "epoch": 0.35153356709398176, "grad_norm": 0.11196893453598022, "learning_rate": 6.510974532111256e-05, "loss": 1.592, "step": 178144 }, { "epoch": 0.3515967130553782, "grad_norm": 0.10628174245357513, "learning_rate": 6.510340509674786e-05, "loss": 1.5911, "step": 178176 }, { "epoch": 0.3516598590167746, "grad_norm": 0.11501415073871613, "learning_rate": 6.509706487238317e-05, "loss": 1.5891, "step": 178208 }, { "epoch": 0.35172300497817105, "grad_norm": 0.10158290714025497, "learning_rate": 6.509072464801849e-05, "loss": 1.5757, "step": 178240 }, { "epoch": 0.3517861509395675, "grad_norm": 0.12369918823242188, "learning_rate": 6.50843844236538e-05, "loss": 1.5879, "step": 178272 }, { "epoch": 0.35184929690096395, "grad_norm": 0.11359665542840958, "learning_rate": 6.50780441992891e-05, "loss": 1.5913, "step": 178304 }, { "epoch": 0.3519124428623604, "grad_norm": 0.10677289217710495, "learning_rate": 6.507170397492442e-05, "loss": 1.5814, "step": 178336 }, { "epoch": 0.35197558882375685, "grad_norm": 0.11651216447353363, "learning_rate": 6.506536375055973e-05, "loss": 1.5808, "step": 178368 }, { "epoch": 0.35203873478515324, "grad_norm": 0.10844559222459793, "learning_rate": 6.505902352619505e-05, "loss": 1.5811, "step": 178400 }, { "epoch": 0.3521018807465497, "grad_norm": 0.10941294580698013, "learning_rate": 6.505268330183035e-05, "loss": 1.593, "step": 178432 }, { "epoch": 0.35216502670794614, "grad_norm": 0.111540287733078, "learning_rate": 6.504634307746566e-05, "loss": 1.5875, "step": 178464 }, { "epoch": 0.3522281726693426, "grad_norm": 0.10734125226736069, "learning_rate": 6.504000285310096e-05, "loss": 1.5803, "step": 178496 }, { "epoch": 0.35229131863073904, "grad_norm": 0.10194408893585205, "learning_rate": 6.503366262873628e-05, "loss": 1.5845, "step": 178528 }, { "epoch": 0.3523544645921355, "grad_norm": 0.12050027400255203, "learning_rate": 6.502732240437158e-05, "loss": 1.5886, "step": 178560 }, { "epoch": 0.3524176105535319, "grad_norm": 0.10722140967845917, "learning_rate": 6.502098218000689e-05, "loss": 1.5785, "step": 178592 }, { "epoch": 0.35248075651492833, "grad_norm": 0.10467955470085144, "learning_rate": 6.50146419556422e-05, "loss": 1.574, "step": 178624 }, { "epoch": 0.3525439024763248, "grad_norm": 0.10946305841207504, "learning_rate": 6.500830173127752e-05, "loss": 1.5818, "step": 178656 }, { "epoch": 0.3526070484377212, "grad_norm": 0.10749588906764984, "learning_rate": 6.500196150691284e-05, "loss": 1.574, "step": 178688 }, { "epoch": 0.3526701943991177, "grad_norm": 0.10729070752859116, "learning_rate": 6.499562128254814e-05, "loss": 1.5817, "step": 178720 }, { "epoch": 0.3527333403605141, "grad_norm": 0.11417505890130997, "learning_rate": 6.498928105818345e-05, "loss": 1.5789, "step": 178752 }, { "epoch": 0.3527964863219106, "grad_norm": 0.11822691559791565, "learning_rate": 6.498294083381877e-05, "loss": 1.5818, "step": 178784 }, { "epoch": 0.35285963228330697, "grad_norm": 0.11468329280614853, "learning_rate": 6.497660060945408e-05, "loss": 1.5808, "step": 178816 }, { "epoch": 0.3529227782447034, "grad_norm": 0.10972462594509125, "learning_rate": 6.497026038508938e-05, "loss": 1.5785, "step": 178848 }, { "epoch": 0.35298592420609987, "grad_norm": 0.11151985824108124, "learning_rate": 6.49639201607247e-05, "loss": 1.5893, "step": 178880 }, { "epoch": 0.3530490701674963, "grad_norm": 0.11051822453737259, "learning_rate": 6.495757993636e-05, "loss": 1.5987, "step": 178912 }, { "epoch": 0.35311221612889276, "grad_norm": 0.1133396327495575, "learning_rate": 6.495123971199531e-05, "loss": 1.5903, "step": 178944 }, { "epoch": 0.3531753620902892, "grad_norm": 0.11220772564411163, "learning_rate": 6.494489948763061e-05, "loss": 1.584, "step": 178976 }, { "epoch": 0.3532385080516856, "grad_norm": 0.11535008251667023, "learning_rate": 6.493855926326593e-05, "loss": 1.5819, "step": 179008 }, { "epoch": 0.35330165401308206, "grad_norm": 0.10733187198638916, "learning_rate": 6.493221903890124e-05, "loss": 1.5979, "step": 179040 }, { "epoch": 0.3533647999744785, "grad_norm": 0.10744732618331909, "learning_rate": 6.492587881453656e-05, "loss": 1.587, "step": 179072 }, { "epoch": 0.35342794593587495, "grad_norm": 0.10880184173583984, "learning_rate": 6.491953859017186e-05, "loss": 1.5961, "step": 179104 }, { "epoch": 0.3534910918972714, "grad_norm": 0.1068880558013916, "learning_rate": 6.491319836580717e-05, "loss": 1.5836, "step": 179136 }, { "epoch": 0.35355423785866785, "grad_norm": 0.11626382917165756, "learning_rate": 6.490685814144249e-05, "loss": 1.5749, "step": 179168 }, { "epoch": 0.35361738382006425, "grad_norm": 0.10463514924049377, "learning_rate": 6.49005179170778e-05, "loss": 1.5714, "step": 179200 }, { "epoch": 0.3536805297814607, "grad_norm": 0.11262864619493484, "learning_rate": 6.489417769271312e-05, "loss": 1.5738, "step": 179232 }, { "epoch": 0.35374367574285714, "grad_norm": 0.10983620584011078, "learning_rate": 6.488783746834842e-05, "loss": 1.5801, "step": 179264 }, { "epoch": 0.3538068217042536, "grad_norm": 0.11021733283996582, "learning_rate": 6.488149724398372e-05, "loss": 1.597, "step": 179296 }, { "epoch": 0.35386996766565004, "grad_norm": 0.10846909880638123, "learning_rate": 6.487515701961903e-05, "loss": 1.5888, "step": 179328 }, { "epoch": 0.3539331136270465, "grad_norm": 0.1092953160405159, "learning_rate": 6.486881679525435e-05, "loss": 1.5821, "step": 179360 }, { "epoch": 0.3539962595884429, "grad_norm": 0.11354577541351318, "learning_rate": 6.486247657088965e-05, "loss": 1.5841, "step": 179392 }, { "epoch": 0.35405940554983933, "grad_norm": 0.1108522117137909, "learning_rate": 6.485613634652496e-05, "loss": 1.5808, "step": 179424 }, { "epoch": 0.3541225515112358, "grad_norm": 0.11519111692905426, "learning_rate": 6.484979612216028e-05, "loss": 1.5882, "step": 179456 }, { "epoch": 0.35418569747263223, "grad_norm": 0.1082671582698822, "learning_rate": 6.484345589779559e-05, "loss": 1.5898, "step": 179488 }, { "epoch": 0.3542488434340287, "grad_norm": 0.11377017945051193, "learning_rate": 6.483711567343089e-05, "loss": 1.5849, "step": 179520 }, { "epoch": 0.35431198939542513, "grad_norm": 0.11119108647108078, "learning_rate": 6.483077544906621e-05, "loss": 1.5893, "step": 179552 }, { "epoch": 0.3543751353568215, "grad_norm": 0.11232598125934601, "learning_rate": 6.482443522470152e-05, "loss": 1.589, "step": 179584 }, { "epoch": 0.354438281318218, "grad_norm": 0.10830990970134735, "learning_rate": 6.481809500033684e-05, "loss": 1.5762, "step": 179616 }, { "epoch": 0.3545014272796144, "grad_norm": 0.11087454855442047, "learning_rate": 6.481175477597214e-05, "loss": 1.5785, "step": 179648 }, { "epoch": 0.35456457324101087, "grad_norm": 0.11044250428676605, "learning_rate": 6.480541455160745e-05, "loss": 1.5842, "step": 179680 }, { "epoch": 0.3546277192024073, "grad_norm": 0.11231352388858795, "learning_rate": 6.479907432724275e-05, "loss": 1.5774, "step": 179712 }, { "epoch": 0.35469086516380377, "grad_norm": 0.11182203143835068, "learning_rate": 6.479273410287807e-05, "loss": 1.578, "step": 179744 }, { "epoch": 0.35475401112520016, "grad_norm": 0.1104021817445755, "learning_rate": 6.478639387851337e-05, "loss": 1.5822, "step": 179776 }, { "epoch": 0.3548171570865966, "grad_norm": 0.11247829347848892, "learning_rate": 6.478005365414868e-05, "loss": 1.5873, "step": 179808 }, { "epoch": 0.35488030304799306, "grad_norm": 0.11255551874637604, "learning_rate": 6.4773713429784e-05, "loss": 1.5823, "step": 179840 }, { "epoch": 0.3549434490093895, "grad_norm": 0.10529974848031998, "learning_rate": 6.476737320541931e-05, "loss": 1.5871, "step": 179872 }, { "epoch": 0.35500659497078596, "grad_norm": 0.10930514335632324, "learning_rate": 6.476103298105463e-05, "loss": 1.585, "step": 179904 }, { "epoch": 0.3550697409321824, "grad_norm": 0.10999929904937744, "learning_rate": 6.475469275668993e-05, "loss": 1.5813, "step": 179936 }, { "epoch": 0.3551328868935788, "grad_norm": 0.11748002469539642, "learning_rate": 6.474835253232524e-05, "loss": 1.5888, "step": 179968 }, { "epoch": 0.35519603285497525, "grad_norm": 0.11176477372646332, "learning_rate": 6.474201230796056e-05, "loss": 1.5967, "step": 180000 }, { "epoch": 0.3552591788163717, "grad_norm": 0.11481215059757233, "learning_rate": 6.473567208359587e-05, "loss": 1.5936, "step": 180032 }, { "epoch": 0.35532232477776815, "grad_norm": 0.10812889039516449, "learning_rate": 6.472933185923117e-05, "loss": 1.5836, "step": 180064 }, { "epoch": 0.3553854707391646, "grad_norm": 0.10911864042282104, "learning_rate": 6.472299163486649e-05, "loss": 1.5993, "step": 180096 }, { "epoch": 0.35544861670056105, "grad_norm": 0.10474446415901184, "learning_rate": 6.471665141050179e-05, "loss": 1.5831, "step": 180128 }, { "epoch": 0.35551176266195744, "grad_norm": 0.11591237783432007, "learning_rate": 6.47103111861371e-05, "loss": 1.5909, "step": 180160 }, { "epoch": 0.3555749086233539, "grad_norm": 0.10848182439804077, "learning_rate": 6.47039709617724e-05, "loss": 1.5913, "step": 180192 }, { "epoch": 0.35563805458475034, "grad_norm": 0.10682623088359833, "learning_rate": 6.469763073740772e-05, "loss": 1.5791, "step": 180224 }, { "epoch": 0.3557012005461468, "grad_norm": 0.1151672974228859, "learning_rate": 6.469129051304303e-05, "loss": 1.5782, "step": 180256 }, { "epoch": 0.35576434650754324, "grad_norm": 0.11238664388656616, "learning_rate": 6.468495028867835e-05, "loss": 1.5824, "step": 180288 }, { "epoch": 0.3558274924689397, "grad_norm": 0.110641248524189, "learning_rate": 6.467861006431365e-05, "loss": 1.5814, "step": 180320 }, { "epoch": 0.3558906384303361, "grad_norm": 0.10974923521280289, "learning_rate": 6.467226983994896e-05, "loss": 1.5838, "step": 180352 }, { "epoch": 0.35595378439173253, "grad_norm": 0.1073017567396164, "learning_rate": 6.466592961558428e-05, "loss": 1.5786, "step": 180384 }, { "epoch": 0.356016930353129, "grad_norm": 0.10749690234661102, "learning_rate": 6.465958939121959e-05, "loss": 1.574, "step": 180416 }, { "epoch": 0.35608007631452543, "grad_norm": 0.10572580993175507, "learning_rate": 6.465324916685489e-05, "loss": 1.5825, "step": 180448 }, { "epoch": 0.3561432222759219, "grad_norm": 0.10467702150344849, "learning_rate": 6.464690894249021e-05, "loss": 1.5886, "step": 180480 }, { "epoch": 0.3562063682373183, "grad_norm": 0.11328021436929703, "learning_rate": 6.464056871812552e-05, "loss": 1.5801, "step": 180512 }, { "epoch": 0.3562695141987148, "grad_norm": 0.1265615075826645, "learning_rate": 6.463422849376082e-05, "loss": 1.584, "step": 180544 }, { "epoch": 0.35633266016011117, "grad_norm": 0.1107073575258255, "learning_rate": 6.462788826939614e-05, "loss": 1.5918, "step": 180576 }, { "epoch": 0.3563958061215076, "grad_norm": 0.1132175475358963, "learning_rate": 6.462154804503144e-05, "loss": 1.5858, "step": 180608 }, { "epoch": 0.35645895208290407, "grad_norm": 0.1220729649066925, "learning_rate": 6.461520782066675e-05, "loss": 1.5916, "step": 180640 }, { "epoch": 0.3565220980443005, "grad_norm": 0.10473572462797165, "learning_rate": 6.460886759630207e-05, "loss": 1.5934, "step": 180672 }, { "epoch": 0.35658524400569697, "grad_norm": 0.10555895417928696, "learning_rate": 6.460252737193738e-05, "loss": 1.5913, "step": 180704 }, { "epoch": 0.3566483899670934, "grad_norm": 0.11519496142864227, "learning_rate": 6.459618714757268e-05, "loss": 1.584, "step": 180736 }, { "epoch": 0.3567115359284898, "grad_norm": 0.10348035395145416, "learning_rate": 6.4589846923208e-05, "loss": 1.5882, "step": 180768 }, { "epoch": 0.35677468188988626, "grad_norm": 0.10846167057752609, "learning_rate": 6.458350669884331e-05, "loss": 1.5798, "step": 180800 }, { "epoch": 0.3568378278512827, "grad_norm": 0.10748612135648727, "learning_rate": 6.457716647447863e-05, "loss": 1.5933, "step": 180832 }, { "epoch": 0.35690097381267916, "grad_norm": 0.10867191851139069, "learning_rate": 6.457082625011393e-05, "loss": 1.5794, "step": 180864 }, { "epoch": 0.3569641197740756, "grad_norm": 0.10982093214988708, "learning_rate": 6.456448602574924e-05, "loss": 1.5834, "step": 180896 }, { "epoch": 0.35702726573547205, "grad_norm": 0.11110133677721024, "learning_rate": 6.455814580138456e-05, "loss": 1.5785, "step": 180928 }, { "epoch": 0.35709041169686845, "grad_norm": 0.11195295304059982, "learning_rate": 6.455180557701986e-05, "loss": 1.5856, "step": 180960 }, { "epoch": 0.3571535576582649, "grad_norm": 0.12834830582141876, "learning_rate": 6.454546535265516e-05, "loss": 1.5893, "step": 180992 }, { "epoch": 0.35721670361966135, "grad_norm": 0.10547395795583725, "learning_rate": 6.453912512829047e-05, "loss": 1.5856, "step": 181024 }, { "epoch": 0.3572798495810578, "grad_norm": 0.11166314035654068, "learning_rate": 6.453278490392579e-05, "loss": 1.5797, "step": 181056 }, { "epoch": 0.35734299554245424, "grad_norm": 0.11106620728969574, "learning_rate": 6.45264446795611e-05, "loss": 1.5852, "step": 181088 }, { "epoch": 0.3574061415038507, "grad_norm": 0.10392887145280838, "learning_rate": 6.45201044551964e-05, "loss": 1.5699, "step": 181120 }, { "epoch": 0.3574692874652471, "grad_norm": 0.11312718689441681, "learning_rate": 6.451376423083172e-05, "loss": 1.5799, "step": 181152 }, { "epoch": 0.35753243342664354, "grad_norm": 0.1205592006444931, "learning_rate": 6.450742400646703e-05, "loss": 1.5844, "step": 181184 }, { "epoch": 0.35759557938804, "grad_norm": 0.11224983632564545, "learning_rate": 6.450108378210235e-05, "loss": 1.5688, "step": 181216 }, { "epoch": 0.35765872534943643, "grad_norm": 0.11771758645772934, "learning_rate": 6.449474355773766e-05, "loss": 1.5784, "step": 181248 }, { "epoch": 0.3577218713108329, "grad_norm": 0.11228399723768234, "learning_rate": 6.448840333337296e-05, "loss": 1.5861, "step": 181280 }, { "epoch": 0.35778501727222933, "grad_norm": 0.11208807677030563, "learning_rate": 6.448206310900828e-05, "loss": 1.5612, "step": 181312 }, { "epoch": 0.3578481632336257, "grad_norm": 0.10504967719316483, "learning_rate": 6.447572288464359e-05, "loss": 1.5812, "step": 181344 }, { "epoch": 0.3579113091950222, "grad_norm": 0.10955792665481567, "learning_rate": 6.446938266027889e-05, "loss": 1.5809, "step": 181376 }, { "epoch": 0.3579744551564186, "grad_norm": 0.11303912103176117, "learning_rate": 6.44630424359142e-05, "loss": 1.5843, "step": 181408 }, { "epoch": 0.3580376011178151, "grad_norm": 0.1057058796286583, "learning_rate": 6.445670221154951e-05, "loss": 1.5789, "step": 181440 }, { "epoch": 0.3581007470792115, "grad_norm": 0.11012724041938782, "learning_rate": 6.445036198718482e-05, "loss": 1.5933, "step": 181472 }, { "epoch": 0.358163893040608, "grad_norm": 0.11209618300199509, "learning_rate": 6.444402176282014e-05, "loss": 1.589, "step": 181504 }, { "epoch": 0.35822703900200437, "grad_norm": 0.11269824951887131, "learning_rate": 6.443768153845544e-05, "loss": 1.591, "step": 181536 }, { "epoch": 0.3582901849634008, "grad_norm": 0.11304093897342682, "learning_rate": 6.443134131409075e-05, "loss": 1.609, "step": 181568 }, { "epoch": 0.35835333092479726, "grad_norm": 0.11303514242172241, "learning_rate": 6.442500108972607e-05, "loss": 1.5863, "step": 181600 }, { "epoch": 0.3584164768861937, "grad_norm": 0.10825137048959732, "learning_rate": 6.441866086536138e-05, "loss": 1.5748, "step": 181632 }, { "epoch": 0.35847962284759016, "grad_norm": 0.12855233252048492, "learning_rate": 6.441232064099668e-05, "loss": 1.588, "step": 181664 }, { "epoch": 0.3585427688089866, "grad_norm": 0.10915649682283401, "learning_rate": 6.4405980416632e-05, "loss": 1.5745, "step": 181696 }, { "epoch": 0.358605914770383, "grad_norm": 0.1154591366648674, "learning_rate": 6.439964019226731e-05, "loss": 1.5836, "step": 181728 }, { "epoch": 0.35866906073177945, "grad_norm": 0.11371373385190964, "learning_rate": 6.439329996790263e-05, "loss": 1.5863, "step": 181760 }, { "epoch": 0.3587322066931759, "grad_norm": 0.1083037257194519, "learning_rate": 6.438695974353793e-05, "loss": 1.587, "step": 181792 }, { "epoch": 0.35879535265457235, "grad_norm": 0.10600948333740234, "learning_rate": 6.438061951917323e-05, "loss": 1.585, "step": 181824 }, { "epoch": 0.3588584986159688, "grad_norm": 0.110209159553051, "learning_rate": 6.437427929480854e-05, "loss": 1.5848, "step": 181856 }, { "epoch": 0.35892164457736525, "grad_norm": 0.10848981142044067, "learning_rate": 6.436793907044386e-05, "loss": 1.5907, "step": 181888 }, { "epoch": 0.35898479053876164, "grad_norm": 0.10719979554414749, "learning_rate": 6.436159884607917e-05, "loss": 1.5727, "step": 181920 }, { "epoch": 0.3590479365001581, "grad_norm": 0.10645796358585358, "learning_rate": 6.435525862171447e-05, "loss": 1.5879, "step": 181952 }, { "epoch": 0.35911108246155454, "grad_norm": 0.1148567944765091, "learning_rate": 6.434891839734979e-05, "loss": 1.5841, "step": 181984 }, { "epoch": 0.359174228422951, "grad_norm": 0.11215532571077347, "learning_rate": 6.43425781729851e-05, "loss": 1.5902, "step": 182016 }, { "epoch": 0.35923737438434744, "grad_norm": 0.11848314106464386, "learning_rate": 6.433623794862042e-05, "loss": 1.5895, "step": 182048 }, { "epoch": 0.3593005203457439, "grad_norm": 0.12311811000108719, "learning_rate": 6.432989772425572e-05, "loss": 1.5816, "step": 182080 }, { "epoch": 0.35936366630714034, "grad_norm": 0.10764306783676147, "learning_rate": 6.432355749989103e-05, "loss": 1.5952, "step": 182112 }, { "epoch": 0.35942681226853673, "grad_norm": 0.10449346899986267, "learning_rate": 6.431721727552635e-05, "loss": 1.5855, "step": 182144 }, { "epoch": 0.3594899582299332, "grad_norm": 0.11061811447143555, "learning_rate": 6.431087705116165e-05, "loss": 1.5842, "step": 182176 }, { "epoch": 0.35955310419132963, "grad_norm": 0.1019996777176857, "learning_rate": 6.430453682679696e-05, "loss": 1.6047, "step": 182208 }, { "epoch": 0.3596162501527261, "grad_norm": 0.10651903599500656, "learning_rate": 6.429819660243226e-05, "loss": 1.5804, "step": 182240 }, { "epoch": 0.35967939611412253, "grad_norm": 0.11617961525917053, "learning_rate": 6.429185637806758e-05, "loss": 1.5903, "step": 182272 }, { "epoch": 0.359742542075519, "grad_norm": 0.1261436939239502, "learning_rate": 6.42855161537029e-05, "loss": 1.5817, "step": 182304 }, { "epoch": 0.35980568803691537, "grad_norm": 0.1021326333284378, "learning_rate": 6.42791759293382e-05, "loss": 1.5903, "step": 182336 }, { "epoch": 0.3598688339983118, "grad_norm": 0.1128995269536972, "learning_rate": 6.427283570497351e-05, "loss": 1.5831, "step": 182368 }, { "epoch": 0.35993197995970827, "grad_norm": 0.10895900428295135, "learning_rate": 6.426649548060882e-05, "loss": 1.5926, "step": 182400 }, { "epoch": 0.3599951259211047, "grad_norm": 0.11161261051893234, "learning_rate": 6.426015525624414e-05, "loss": 1.5923, "step": 182432 }, { "epoch": 0.36005827188250117, "grad_norm": 0.12293332815170288, "learning_rate": 6.425381503187944e-05, "loss": 1.5899, "step": 182464 }, { "epoch": 0.3601214178438976, "grad_norm": 0.11045447736978531, "learning_rate": 6.424747480751475e-05, "loss": 1.585, "step": 182496 }, { "epoch": 0.360184563805294, "grad_norm": 0.11175920814275742, "learning_rate": 6.424113458315007e-05, "loss": 1.5801, "step": 182528 }, { "epoch": 0.36024770976669046, "grad_norm": 0.11152239143848419, "learning_rate": 6.423479435878538e-05, "loss": 1.5836, "step": 182560 }, { "epoch": 0.3603108557280869, "grad_norm": 0.10802054405212402, "learning_rate": 6.422845413442068e-05, "loss": 1.576, "step": 182592 }, { "epoch": 0.36037400168948336, "grad_norm": 0.10338658094406128, "learning_rate": 6.4222113910056e-05, "loss": 1.5804, "step": 182624 }, { "epoch": 0.3604371476508798, "grad_norm": 0.11012491583824158, "learning_rate": 6.42157736856913e-05, "loss": 1.5894, "step": 182656 }, { "epoch": 0.36050029361227626, "grad_norm": 0.10463359206914902, "learning_rate": 6.420943346132661e-05, "loss": 1.5779, "step": 182688 }, { "epoch": 0.36056343957367265, "grad_norm": 0.10699468106031418, "learning_rate": 6.420309323696193e-05, "loss": 1.5997, "step": 182720 }, { "epoch": 0.3606265855350691, "grad_norm": 0.11880798637866974, "learning_rate": 6.419675301259723e-05, "loss": 1.5779, "step": 182752 }, { "epoch": 0.36068973149646555, "grad_norm": 0.1022857204079628, "learning_rate": 6.419041278823254e-05, "loss": 1.5948, "step": 182784 }, { "epoch": 0.360752877457862, "grad_norm": 0.11548960953950882, "learning_rate": 6.418407256386786e-05, "loss": 1.5934, "step": 182816 }, { "epoch": 0.36081602341925845, "grad_norm": 0.11352226883172989, "learning_rate": 6.417773233950317e-05, "loss": 1.5802, "step": 182848 }, { "epoch": 0.3608791693806549, "grad_norm": 0.12137927860021591, "learning_rate": 6.417139211513847e-05, "loss": 1.5766, "step": 182880 }, { "epoch": 0.3609423153420513, "grad_norm": 0.11009381711483002, "learning_rate": 6.416505189077379e-05, "loss": 1.5922, "step": 182912 }, { "epoch": 0.36100546130344774, "grad_norm": 0.10775876045227051, "learning_rate": 6.41587116664091e-05, "loss": 1.58, "step": 182944 }, { "epoch": 0.3610686072648442, "grad_norm": 0.10785428434610367, "learning_rate": 6.415237144204442e-05, "loss": 1.5828, "step": 182976 }, { "epoch": 0.36113175322624064, "grad_norm": 0.1145273894071579, "learning_rate": 6.414603121767972e-05, "loss": 1.5927, "step": 183008 }, { "epoch": 0.3611948991876371, "grad_norm": 0.10673095285892487, "learning_rate": 6.413969099331503e-05, "loss": 1.5785, "step": 183040 }, { "epoch": 0.36125804514903354, "grad_norm": 0.11440155655145645, "learning_rate": 6.413335076895033e-05, "loss": 1.5913, "step": 183072 }, { "epoch": 0.36132119111042993, "grad_norm": 0.11792333424091339, "learning_rate": 6.412701054458565e-05, "loss": 1.5923, "step": 183104 }, { "epoch": 0.3613843370718264, "grad_norm": 0.10590378940105438, "learning_rate": 6.412067032022095e-05, "loss": 1.5758, "step": 183136 }, { "epoch": 0.3614474830332228, "grad_norm": 0.10517742484807968, "learning_rate": 6.411433009585626e-05, "loss": 1.5954, "step": 183168 }, { "epoch": 0.3615106289946193, "grad_norm": 0.11259836703538895, "learning_rate": 6.410798987149158e-05, "loss": 1.5927, "step": 183200 }, { "epoch": 0.3615737749560157, "grad_norm": 0.10897098481655121, "learning_rate": 6.41016496471269e-05, "loss": 1.5876, "step": 183232 }, { "epoch": 0.3616369209174122, "grad_norm": 0.10789348930120468, "learning_rate": 6.409530942276221e-05, "loss": 1.5839, "step": 183264 }, { "epoch": 0.36170006687880857, "grad_norm": 0.10714706778526306, "learning_rate": 6.408896919839751e-05, "loss": 1.5846, "step": 183296 }, { "epoch": 0.361763212840205, "grad_norm": 0.11360534280538559, "learning_rate": 6.408262897403282e-05, "loss": 1.5952, "step": 183328 }, { "epoch": 0.36182635880160147, "grad_norm": 0.11198006570339203, "learning_rate": 6.407628874966814e-05, "loss": 1.5902, "step": 183360 }, { "epoch": 0.3618895047629979, "grad_norm": 0.11150224506855011, "learning_rate": 6.406994852530345e-05, "loss": 1.5863, "step": 183392 }, { "epoch": 0.36195265072439436, "grad_norm": 0.11451698839664459, "learning_rate": 6.406360830093875e-05, "loss": 1.5852, "step": 183424 }, { "epoch": 0.3620157966857908, "grad_norm": 0.11168801784515381, "learning_rate": 6.405726807657406e-05, "loss": 1.5928, "step": 183456 }, { "epoch": 0.3620789426471872, "grad_norm": 0.11182107776403427, "learning_rate": 6.405092785220937e-05, "loss": 1.5982, "step": 183488 }, { "epoch": 0.36214208860858366, "grad_norm": 0.11775576323270798, "learning_rate": 6.404458762784468e-05, "loss": 1.5818, "step": 183520 }, { "epoch": 0.3622052345699801, "grad_norm": 0.10620637983083725, "learning_rate": 6.403824740347999e-05, "loss": 1.5794, "step": 183552 }, { "epoch": 0.36226838053137655, "grad_norm": 0.11593019962310791, "learning_rate": 6.40319071791153e-05, "loss": 1.5875, "step": 183584 }, { "epoch": 0.362331526492773, "grad_norm": 0.11101772636175156, "learning_rate": 6.402556695475061e-05, "loss": 1.5867, "step": 183616 }, { "epoch": 0.36239467245416945, "grad_norm": 0.11641628295183182, "learning_rate": 6.401922673038593e-05, "loss": 1.5832, "step": 183648 }, { "epoch": 0.36245781841556585, "grad_norm": 0.10823922604322433, "learning_rate": 6.401288650602123e-05, "loss": 1.5849, "step": 183680 }, { "epoch": 0.3625209643769623, "grad_norm": 0.10185832530260086, "learning_rate": 6.400654628165654e-05, "loss": 1.5705, "step": 183712 }, { "epoch": 0.36258411033835874, "grad_norm": 0.1149238720536232, "learning_rate": 6.400020605729186e-05, "loss": 1.5872, "step": 183744 }, { "epoch": 0.3626472562997552, "grad_norm": 0.10884139686822891, "learning_rate": 6.399386583292717e-05, "loss": 1.5804, "step": 183776 }, { "epoch": 0.36271040226115164, "grad_norm": 0.11320940405130386, "learning_rate": 6.398752560856247e-05, "loss": 1.5896, "step": 183808 }, { "epoch": 0.3627735482225481, "grad_norm": 0.12907955050468445, "learning_rate": 6.398118538419779e-05, "loss": 1.5802, "step": 183840 }, { "epoch": 0.36283669418394454, "grad_norm": 0.11195775121450424, "learning_rate": 6.397484515983309e-05, "loss": 1.5783, "step": 183872 }, { "epoch": 0.36289984014534094, "grad_norm": 0.11155863851308823, "learning_rate": 6.39685049354684e-05, "loss": 1.5717, "step": 183904 }, { "epoch": 0.3629629861067374, "grad_norm": 0.11714331060647964, "learning_rate": 6.396216471110372e-05, "loss": 1.5828, "step": 183936 }, { "epoch": 0.36302613206813383, "grad_norm": 0.1108788549900055, "learning_rate": 6.395582448673902e-05, "loss": 1.5884, "step": 183968 }, { "epoch": 0.3630892780295303, "grad_norm": 0.11498186737298965, "learning_rate": 6.394948426237433e-05, "loss": 1.5828, "step": 184000 }, { "epoch": 0.36315242399092673, "grad_norm": 0.10536820441484451, "learning_rate": 6.394314403800965e-05, "loss": 1.5842, "step": 184032 }, { "epoch": 0.3632155699523232, "grad_norm": 0.11589545011520386, "learning_rate": 6.393680381364496e-05, "loss": 1.5773, "step": 184064 }, { "epoch": 0.3632787159137196, "grad_norm": 0.11279499530792236, "learning_rate": 6.393046358928027e-05, "loss": 1.5928, "step": 184096 }, { "epoch": 0.363341861875116, "grad_norm": 0.10008393973112106, "learning_rate": 6.392412336491558e-05, "loss": 1.5799, "step": 184128 }, { "epoch": 0.3634050078365125, "grad_norm": 0.10640986263751984, "learning_rate": 6.39177831405509e-05, "loss": 1.5927, "step": 184160 }, { "epoch": 0.3634681537979089, "grad_norm": 0.11636866629123688, "learning_rate": 6.391144291618621e-05, "loss": 1.5905, "step": 184192 }, { "epoch": 0.36353129975930537, "grad_norm": 0.11361801624298096, "learning_rate": 6.390510269182151e-05, "loss": 1.5733, "step": 184224 }, { "epoch": 0.3635944457207018, "grad_norm": 0.113465316593647, "learning_rate": 6.389876246745682e-05, "loss": 1.5875, "step": 184256 }, { "epoch": 0.3636575916820982, "grad_norm": 0.10863377153873444, "learning_rate": 6.389242224309213e-05, "loss": 1.5766, "step": 184288 }, { "epoch": 0.36372073764349466, "grad_norm": 0.11061983555555344, "learning_rate": 6.388608201872744e-05, "loss": 1.5831, "step": 184320 }, { "epoch": 0.3637838836048911, "grad_norm": 0.1096784844994545, "learning_rate": 6.387974179436274e-05, "loss": 1.5817, "step": 184352 }, { "epoch": 0.36384702956628756, "grad_norm": 0.10557596385478973, "learning_rate": 6.387340156999806e-05, "loss": 1.5825, "step": 184384 }, { "epoch": 0.363910175527684, "grad_norm": 0.11243075132369995, "learning_rate": 6.386706134563337e-05, "loss": 1.597, "step": 184416 }, { "epoch": 0.36397332148908046, "grad_norm": 0.11064084619283676, "learning_rate": 6.386072112126868e-05, "loss": 1.5931, "step": 184448 }, { "epoch": 0.36403646745047685, "grad_norm": 0.10731770843267441, "learning_rate": 6.385438089690399e-05, "loss": 1.5895, "step": 184480 }, { "epoch": 0.3640996134118733, "grad_norm": 0.11211124062538147, "learning_rate": 6.38480406725393e-05, "loss": 1.5773, "step": 184512 }, { "epoch": 0.36416275937326975, "grad_norm": 0.11388519406318665, "learning_rate": 6.384170044817461e-05, "loss": 1.5924, "step": 184544 }, { "epoch": 0.3642259053346662, "grad_norm": 0.12159216403961182, "learning_rate": 6.383536022380993e-05, "loss": 1.5875, "step": 184576 }, { "epoch": 0.36428905129606265, "grad_norm": 0.11033236235380173, "learning_rate": 6.382901999944524e-05, "loss": 1.5909, "step": 184608 }, { "epoch": 0.3643521972574591, "grad_norm": 0.10566152632236481, "learning_rate": 6.382267977508054e-05, "loss": 1.5676, "step": 184640 }, { "epoch": 0.3644153432188555, "grad_norm": 0.1056530699133873, "learning_rate": 6.381633955071586e-05, "loss": 1.6003, "step": 184672 }, { "epoch": 0.36447848918025194, "grad_norm": 0.10893242806196213, "learning_rate": 6.380999932635116e-05, "loss": 1.5796, "step": 184704 }, { "epoch": 0.3645416351416484, "grad_norm": 0.10994032770395279, "learning_rate": 6.380365910198648e-05, "loss": 1.5853, "step": 184736 }, { "epoch": 0.36460478110304484, "grad_norm": 0.11297830194234848, "learning_rate": 6.379731887762178e-05, "loss": 1.5887, "step": 184768 }, { "epoch": 0.3646679270644413, "grad_norm": 0.11538960784673691, "learning_rate": 6.379097865325709e-05, "loss": 1.5807, "step": 184800 }, { "epoch": 0.36473107302583774, "grad_norm": 0.11083732545375824, "learning_rate": 6.37846384288924e-05, "loss": 1.5869, "step": 184832 }, { "epoch": 0.36479421898723413, "grad_norm": 0.11004041135311127, "learning_rate": 6.377829820452772e-05, "loss": 1.5857, "step": 184864 }, { "epoch": 0.3648573649486306, "grad_norm": 0.10743048787117004, "learning_rate": 6.377195798016302e-05, "loss": 1.5787, "step": 184896 }, { "epoch": 0.36492051091002703, "grad_norm": 0.1123712956905365, "learning_rate": 6.376561775579834e-05, "loss": 1.583, "step": 184928 }, { "epoch": 0.3649836568714235, "grad_norm": 0.11037390679121017, "learning_rate": 6.375927753143365e-05, "loss": 1.5869, "step": 184960 }, { "epoch": 0.3650468028328199, "grad_norm": 0.10735499858856201, "learning_rate": 6.375293730706896e-05, "loss": 1.5662, "step": 184992 }, { "epoch": 0.3651099487942164, "grad_norm": 0.10350576043128967, "learning_rate": 6.374659708270427e-05, "loss": 1.5813, "step": 185024 }, { "epoch": 0.36517309475561277, "grad_norm": 0.11766477674245834, "learning_rate": 6.374025685833958e-05, "loss": 1.5849, "step": 185056 }, { "epoch": 0.3652362407170092, "grad_norm": 0.11200088262557983, "learning_rate": 6.37339166339749e-05, "loss": 1.5904, "step": 185088 }, { "epoch": 0.36529938667840567, "grad_norm": 0.1167105957865715, "learning_rate": 6.37275764096102e-05, "loss": 1.5947, "step": 185120 }, { "epoch": 0.3653625326398021, "grad_norm": 0.11037057638168335, "learning_rate": 6.37212361852455e-05, "loss": 1.5628, "step": 185152 }, { "epoch": 0.36542567860119857, "grad_norm": 0.1177527904510498, "learning_rate": 6.371489596088081e-05, "loss": 1.5811, "step": 185184 }, { "epoch": 0.365488824562595, "grad_norm": 0.10527502745389938, "learning_rate": 6.370855573651613e-05, "loss": 1.5818, "step": 185216 }, { "epoch": 0.3655519705239914, "grad_norm": 0.10623817145824432, "learning_rate": 6.370221551215144e-05, "loss": 1.5764, "step": 185248 }, { "epoch": 0.36561511648538786, "grad_norm": 0.10605397820472717, "learning_rate": 6.369587528778675e-05, "loss": 1.5891, "step": 185280 }, { "epoch": 0.3656782624467843, "grad_norm": 0.11802637577056885, "learning_rate": 6.368953506342206e-05, "loss": 1.5709, "step": 185312 }, { "epoch": 0.36574140840818076, "grad_norm": 0.11343847215175629, "learning_rate": 6.368319483905737e-05, "loss": 1.5856, "step": 185344 }, { "epoch": 0.3658045543695772, "grad_norm": 0.10883905738592148, "learning_rate": 6.367685461469268e-05, "loss": 1.589, "step": 185376 }, { "epoch": 0.36586770033097366, "grad_norm": 0.10828329622745514, "learning_rate": 6.3670514390328e-05, "loss": 1.5672, "step": 185408 }, { "epoch": 0.3659308462923701, "grad_norm": 0.11625433713197708, "learning_rate": 6.36641741659633e-05, "loss": 1.5741, "step": 185440 }, { "epoch": 0.3659939922537665, "grad_norm": 0.10105549544095993, "learning_rate": 6.365783394159862e-05, "loss": 1.5741, "step": 185472 }, { "epoch": 0.36605713821516295, "grad_norm": 0.10638256371021271, "learning_rate": 6.365149371723393e-05, "loss": 1.5937, "step": 185504 }, { "epoch": 0.3661202841765594, "grad_norm": 0.10467677563428879, "learning_rate": 6.364515349286923e-05, "loss": 1.584, "step": 185536 }, { "epoch": 0.36618343013795585, "grad_norm": 0.11417409032583237, "learning_rate": 6.363881326850453e-05, "loss": 1.595, "step": 185568 }, { "epoch": 0.3662465760993523, "grad_norm": 0.10955195873975754, "learning_rate": 6.363247304413985e-05, "loss": 1.5834, "step": 185600 }, { "epoch": 0.36630972206074874, "grad_norm": 0.11689722537994385, "learning_rate": 6.362613281977516e-05, "loss": 1.5864, "step": 185632 }, { "epoch": 0.36637286802214514, "grad_norm": 0.11240193992853165, "learning_rate": 6.361979259541048e-05, "loss": 1.5856, "step": 185664 }, { "epoch": 0.3664360139835416, "grad_norm": 0.10780593752861023, "learning_rate": 6.361345237104578e-05, "loss": 1.5942, "step": 185696 }, { "epoch": 0.36649915994493804, "grad_norm": 0.11237487941980362, "learning_rate": 6.360711214668109e-05, "loss": 1.5855, "step": 185728 }, { "epoch": 0.3665623059063345, "grad_norm": 0.11068807542324066, "learning_rate": 6.36007719223164e-05, "loss": 1.5895, "step": 185760 }, { "epoch": 0.36662545186773093, "grad_norm": 0.11342378705739975, "learning_rate": 6.359443169795172e-05, "loss": 1.5798, "step": 185792 }, { "epoch": 0.3666885978291274, "grad_norm": 0.10851754993200302, "learning_rate": 6.358809147358702e-05, "loss": 1.5823, "step": 185824 }, { "epoch": 0.3667517437905238, "grad_norm": 0.10945183783769608, "learning_rate": 6.358175124922234e-05, "loss": 1.5779, "step": 185856 }, { "epoch": 0.3668148897519202, "grad_norm": 0.11009562760591507, "learning_rate": 6.357541102485765e-05, "loss": 1.5762, "step": 185888 }, { "epoch": 0.3668780357133167, "grad_norm": 0.10616668313741684, "learning_rate": 6.356907080049296e-05, "loss": 1.5855, "step": 185920 }, { "epoch": 0.3669411816747131, "grad_norm": 0.11221147328615189, "learning_rate": 6.356273057612827e-05, "loss": 1.5763, "step": 185952 }, { "epoch": 0.3670043276361096, "grad_norm": 0.12302812188863754, "learning_rate": 6.355639035176357e-05, "loss": 1.5915, "step": 185984 }, { "epoch": 0.367067473597506, "grad_norm": 0.10927576571702957, "learning_rate": 6.355005012739888e-05, "loss": 1.5831, "step": 186016 }, { "epoch": 0.3671306195589024, "grad_norm": 0.11248224228620529, "learning_rate": 6.35437099030342e-05, "loss": 1.5675, "step": 186048 }, { "epoch": 0.36719376552029886, "grad_norm": 0.1288052648305893, "learning_rate": 6.353736967866951e-05, "loss": 1.5896, "step": 186080 }, { "epoch": 0.3672569114816953, "grad_norm": 0.1102360412478447, "learning_rate": 6.353102945430481e-05, "loss": 1.5953, "step": 186112 }, { "epoch": 0.36732005744309176, "grad_norm": 0.10656482726335526, "learning_rate": 6.352468922994013e-05, "loss": 1.5756, "step": 186144 }, { "epoch": 0.3673832034044882, "grad_norm": 0.108738012611866, "learning_rate": 6.351834900557544e-05, "loss": 1.5688, "step": 186176 }, { "epoch": 0.36744634936588466, "grad_norm": 0.1082652285695076, "learning_rate": 6.351200878121076e-05, "loss": 1.577, "step": 186208 }, { "epoch": 0.36750949532728105, "grad_norm": 0.11207770556211472, "learning_rate": 6.350566855684606e-05, "loss": 1.5827, "step": 186240 }, { "epoch": 0.3675726412886775, "grad_norm": 0.11149799823760986, "learning_rate": 6.349932833248137e-05, "loss": 1.5718, "step": 186272 }, { "epoch": 0.36763578725007395, "grad_norm": 0.12550869584083557, "learning_rate": 6.349298810811669e-05, "loss": 1.5937, "step": 186304 }, { "epoch": 0.3676989332114704, "grad_norm": 0.10630583763122559, "learning_rate": 6.348664788375199e-05, "loss": 1.5784, "step": 186336 }, { "epoch": 0.36776207917286685, "grad_norm": 0.11240078508853912, "learning_rate": 6.34803076593873e-05, "loss": 1.576, "step": 186368 }, { "epoch": 0.3678252251342633, "grad_norm": 0.11415791511535645, "learning_rate": 6.34739674350226e-05, "loss": 1.5774, "step": 186400 }, { "epoch": 0.3678883710956597, "grad_norm": 0.11179355531930923, "learning_rate": 6.346762721065792e-05, "loss": 1.5878, "step": 186432 }, { "epoch": 0.36795151705705614, "grad_norm": 0.10920584201812744, "learning_rate": 6.346128698629323e-05, "loss": 1.5845, "step": 186464 }, { "epoch": 0.3680146630184526, "grad_norm": 0.10972471535205841, "learning_rate": 6.345494676192853e-05, "loss": 1.583, "step": 186496 }, { "epoch": 0.36807780897984904, "grad_norm": 0.10917947441339493, "learning_rate": 6.344860653756385e-05, "loss": 1.5843, "step": 186528 }, { "epoch": 0.3681409549412455, "grad_norm": 0.10343483090400696, "learning_rate": 6.344226631319916e-05, "loss": 1.5913, "step": 186560 }, { "epoch": 0.36820410090264194, "grad_norm": 0.11454430967569351, "learning_rate": 6.343592608883448e-05, "loss": 1.5826, "step": 186592 }, { "epoch": 0.36826724686403833, "grad_norm": 0.1080147922039032, "learning_rate": 6.342958586446979e-05, "loss": 1.5873, "step": 186624 }, { "epoch": 0.3683303928254348, "grad_norm": 0.11471916735172272, "learning_rate": 6.342324564010509e-05, "loss": 1.5831, "step": 186656 }, { "epoch": 0.36839353878683123, "grad_norm": 0.10837042331695557, "learning_rate": 6.34169054157404e-05, "loss": 1.5866, "step": 186688 }, { "epoch": 0.3684566847482277, "grad_norm": 0.10801560431718826, "learning_rate": 6.341056519137572e-05, "loss": 1.6044, "step": 186720 }, { "epoch": 0.36851983070962413, "grad_norm": 0.11287467181682587, "learning_rate": 6.340422496701102e-05, "loss": 1.5897, "step": 186752 }, { "epoch": 0.3685829766710206, "grad_norm": 0.10864336788654327, "learning_rate": 6.339788474264634e-05, "loss": 1.5779, "step": 186784 }, { "epoch": 0.368646122632417, "grad_norm": 0.11014921218156815, "learning_rate": 6.339154451828164e-05, "loss": 1.5901, "step": 186816 }, { "epoch": 0.3687092685938134, "grad_norm": 0.11572349071502686, "learning_rate": 6.338520429391695e-05, "loss": 1.5711, "step": 186848 }, { "epoch": 0.36877241455520987, "grad_norm": 0.1091943010687828, "learning_rate": 6.337886406955227e-05, "loss": 1.5754, "step": 186880 }, { "epoch": 0.3688355605166063, "grad_norm": 0.11174189299345016, "learning_rate": 6.337252384518757e-05, "loss": 1.5924, "step": 186912 }, { "epoch": 0.36889870647800277, "grad_norm": 0.11803366988897324, "learning_rate": 6.336618362082288e-05, "loss": 1.5802, "step": 186944 }, { "epoch": 0.3689618524393992, "grad_norm": 0.112164206802845, "learning_rate": 6.33598433964582e-05, "loss": 1.5938, "step": 186976 }, { "epoch": 0.3690249984007956, "grad_norm": 0.1092149019241333, "learning_rate": 6.335350317209351e-05, "loss": 1.5906, "step": 187008 }, { "epoch": 0.36908814436219206, "grad_norm": 0.10716573894023895, "learning_rate": 6.334716294772881e-05, "loss": 1.5869, "step": 187040 }, { "epoch": 0.3691512903235885, "grad_norm": 0.12248466908931732, "learning_rate": 6.334082272336413e-05, "loss": 1.5761, "step": 187072 }, { "epoch": 0.36921443628498496, "grad_norm": 0.10569100081920624, "learning_rate": 6.333448249899944e-05, "loss": 1.5766, "step": 187104 }, { "epoch": 0.3692775822463814, "grad_norm": 0.11047903448343277, "learning_rate": 6.332814227463476e-05, "loss": 1.577, "step": 187136 }, { "epoch": 0.36934072820777786, "grad_norm": 0.11852037161588669, "learning_rate": 6.332180205027006e-05, "loss": 1.5805, "step": 187168 }, { "epoch": 0.3694038741691743, "grad_norm": 0.11280177533626556, "learning_rate": 6.331546182590537e-05, "loss": 1.5888, "step": 187200 }, { "epoch": 0.3694670201305707, "grad_norm": 0.11293172836303711, "learning_rate": 6.330912160154067e-05, "loss": 1.5788, "step": 187232 }, { "epoch": 0.36953016609196715, "grad_norm": 0.10566996783018112, "learning_rate": 6.330278137717599e-05, "loss": 1.5906, "step": 187264 }, { "epoch": 0.3695933120533636, "grad_norm": 0.1050875261425972, "learning_rate": 6.32964411528113e-05, "loss": 1.5784, "step": 187296 }, { "epoch": 0.36965645801476005, "grad_norm": 0.11344988644123077, "learning_rate": 6.32901009284466e-05, "loss": 1.5813, "step": 187328 }, { "epoch": 0.3697196039761565, "grad_norm": 0.10801728069782257, "learning_rate": 6.328376070408192e-05, "loss": 1.5862, "step": 187360 }, { "epoch": 0.36978274993755295, "grad_norm": 0.11555488407611847, "learning_rate": 6.327742047971723e-05, "loss": 1.5682, "step": 187392 }, { "epoch": 0.36984589589894934, "grad_norm": 0.1096230298280716, "learning_rate": 6.327108025535255e-05, "loss": 1.5892, "step": 187424 }, { "epoch": 0.3699090418603458, "grad_norm": 0.10769311338663101, "learning_rate": 6.326474003098785e-05, "loss": 1.5902, "step": 187456 }, { "epoch": 0.36997218782174224, "grad_norm": 0.10758940875530243, "learning_rate": 6.325839980662316e-05, "loss": 1.5808, "step": 187488 }, { "epoch": 0.3700353337831387, "grad_norm": 0.11722041666507721, "learning_rate": 6.325205958225848e-05, "loss": 1.5754, "step": 187520 }, { "epoch": 0.37009847974453514, "grad_norm": 0.10478208214044571, "learning_rate": 6.324571935789379e-05, "loss": 1.5898, "step": 187552 }, { "epoch": 0.3701616257059316, "grad_norm": 0.11724715679883957, "learning_rate": 6.323937913352909e-05, "loss": 1.5778, "step": 187584 }, { "epoch": 0.370224771667328, "grad_norm": 0.11673936247825623, "learning_rate": 6.323303890916439e-05, "loss": 1.5776, "step": 187616 }, { "epoch": 0.37028791762872443, "grad_norm": 0.11095519363880157, "learning_rate": 6.322669868479971e-05, "loss": 1.5778, "step": 187648 }, { "epoch": 0.3703510635901209, "grad_norm": 0.1110343262553215, "learning_rate": 6.322035846043502e-05, "loss": 1.5813, "step": 187680 }, { "epoch": 0.3704142095515173, "grad_norm": 0.11634550988674164, "learning_rate": 6.321401823607032e-05, "loss": 1.5733, "step": 187712 }, { "epoch": 0.3704773555129138, "grad_norm": 0.10587144643068314, "learning_rate": 6.320767801170564e-05, "loss": 1.5924, "step": 187744 }, { "epoch": 0.3705405014743102, "grad_norm": 0.11117944121360779, "learning_rate": 6.320133778734095e-05, "loss": 1.579, "step": 187776 }, { "epoch": 0.3706036474357066, "grad_norm": 0.11027168482542038, "learning_rate": 6.319499756297627e-05, "loss": 1.5792, "step": 187808 }, { "epoch": 0.37066679339710307, "grad_norm": 0.11100541800260544, "learning_rate": 6.318865733861157e-05, "loss": 1.5791, "step": 187840 }, { "epoch": 0.3707299393584995, "grad_norm": 0.11651395261287689, "learning_rate": 6.318231711424688e-05, "loss": 1.5806, "step": 187872 }, { "epoch": 0.37079308531989597, "grad_norm": 0.1091759130358696, "learning_rate": 6.31759768898822e-05, "loss": 1.5852, "step": 187904 }, { "epoch": 0.3708562312812924, "grad_norm": 0.11268169432878494, "learning_rate": 6.316963666551751e-05, "loss": 1.5655, "step": 187936 }, { "epoch": 0.37091937724268886, "grad_norm": 0.10827192664146423, "learning_rate": 6.316329644115283e-05, "loss": 1.5883, "step": 187968 }, { "epoch": 0.37098252320408526, "grad_norm": 0.1223548874258995, "learning_rate": 6.315695621678813e-05, "loss": 1.5881, "step": 188000 }, { "epoch": 0.3710456691654817, "grad_norm": 0.11432972550392151, "learning_rate": 6.315061599242343e-05, "loss": 1.5763, "step": 188032 }, { "epoch": 0.37110881512687816, "grad_norm": 0.11161147058010101, "learning_rate": 6.314427576805874e-05, "loss": 1.5721, "step": 188064 }, { "epoch": 0.3711719610882746, "grad_norm": 0.10936208069324493, "learning_rate": 6.313793554369406e-05, "loss": 1.5879, "step": 188096 }, { "epoch": 0.37123510704967105, "grad_norm": 0.10863802582025528, "learning_rate": 6.313159531932936e-05, "loss": 1.5788, "step": 188128 }, { "epoch": 0.3712982530110675, "grad_norm": 0.1166510283946991, "learning_rate": 6.312525509496467e-05, "loss": 1.5797, "step": 188160 }, { "epoch": 0.3713613989724639, "grad_norm": 0.11168581247329712, "learning_rate": 6.311891487059999e-05, "loss": 1.5825, "step": 188192 }, { "epoch": 0.37142454493386035, "grad_norm": 0.10761485248804092, "learning_rate": 6.31125746462353e-05, "loss": 1.5883, "step": 188224 }, { "epoch": 0.3714876908952568, "grad_norm": 0.12380202114582062, "learning_rate": 6.31062344218706e-05, "loss": 1.5718, "step": 188256 }, { "epoch": 0.37155083685665324, "grad_norm": 0.10772494971752167, "learning_rate": 6.309989419750592e-05, "loss": 1.5724, "step": 188288 }, { "epoch": 0.3716139828180497, "grad_norm": 0.11718533933162689, "learning_rate": 6.309355397314123e-05, "loss": 1.5922, "step": 188320 }, { "epoch": 0.37167712877944614, "grad_norm": 0.10808006674051285, "learning_rate": 6.308721374877655e-05, "loss": 1.5777, "step": 188352 }, { "epoch": 0.37174027474084254, "grad_norm": 0.11395537108182907, "learning_rate": 6.308087352441185e-05, "loss": 1.5815, "step": 188384 }, { "epoch": 0.371803420702239, "grad_norm": 0.11253679543733597, "learning_rate": 6.307453330004716e-05, "loss": 1.5827, "step": 188416 }, { "epoch": 0.37186656666363543, "grad_norm": 0.1086331456899643, "learning_rate": 6.306819307568246e-05, "loss": 1.5627, "step": 188448 }, { "epoch": 0.3719297126250319, "grad_norm": 0.1119273379445076, "learning_rate": 6.306185285131778e-05, "loss": 1.5649, "step": 188480 }, { "epoch": 0.37199285858642833, "grad_norm": 0.11097158491611481, "learning_rate": 6.305551262695308e-05, "loss": 1.578, "step": 188512 }, { "epoch": 0.3720560045478248, "grad_norm": 0.11210909485816956, "learning_rate": 6.304917240258839e-05, "loss": 1.5931, "step": 188544 }, { "epoch": 0.3721191505092212, "grad_norm": 0.10717438906431198, "learning_rate": 6.304283217822371e-05, "loss": 1.5813, "step": 188576 }, { "epoch": 0.3721822964706176, "grad_norm": 0.10911908000707626, "learning_rate": 6.303649195385902e-05, "loss": 1.5902, "step": 188608 }, { "epoch": 0.3722454424320141, "grad_norm": 0.11350075900554657, "learning_rate": 6.303015172949434e-05, "loss": 1.5821, "step": 188640 }, { "epoch": 0.3723085883934105, "grad_norm": 0.10877273231744766, "learning_rate": 6.302381150512964e-05, "loss": 1.5749, "step": 188672 }, { "epoch": 0.37237173435480697, "grad_norm": 0.10497533529996872, "learning_rate": 6.301747128076495e-05, "loss": 1.5787, "step": 188704 }, { "epoch": 0.3724348803162034, "grad_norm": 0.10719005018472672, "learning_rate": 6.301113105640027e-05, "loss": 1.5939, "step": 188736 }, { "epoch": 0.3724980262775998, "grad_norm": 0.11375787109136581, "learning_rate": 6.300479083203558e-05, "loss": 1.5854, "step": 188768 }, { "epoch": 0.37256117223899626, "grad_norm": 0.11353506147861481, "learning_rate": 6.299845060767088e-05, "loss": 1.5841, "step": 188800 }, { "epoch": 0.3726243182003927, "grad_norm": 0.11330796778202057, "learning_rate": 6.29921103833062e-05, "loss": 1.5835, "step": 188832 }, { "epoch": 0.37268746416178916, "grad_norm": 0.10765306651592255, "learning_rate": 6.29857701589415e-05, "loss": 1.5716, "step": 188864 }, { "epoch": 0.3727506101231856, "grad_norm": 0.115536168217659, "learning_rate": 6.297942993457681e-05, "loss": 1.5767, "step": 188896 }, { "epoch": 0.37281375608458206, "grad_norm": 0.11861121654510498, "learning_rate": 6.297308971021211e-05, "loss": 1.5825, "step": 188928 }, { "epoch": 0.3728769020459785, "grad_norm": 0.10668471455574036, "learning_rate": 6.296674948584743e-05, "loss": 1.5745, "step": 188960 }, { "epoch": 0.3729400480073749, "grad_norm": 0.1124548688530922, "learning_rate": 6.296040926148274e-05, "loss": 1.5855, "step": 188992 }, { "epoch": 0.37300319396877135, "grad_norm": 0.11227509379386902, "learning_rate": 6.295406903711806e-05, "loss": 1.5819, "step": 189024 }, { "epoch": 0.3730663399301678, "grad_norm": 0.12284498661756516, "learning_rate": 6.294772881275336e-05, "loss": 1.5829, "step": 189056 }, { "epoch": 0.37312948589156425, "grad_norm": 0.11107189208269119, "learning_rate": 6.294138858838867e-05, "loss": 1.5859, "step": 189088 }, { "epoch": 0.3731926318529607, "grad_norm": 0.10985364764928818, "learning_rate": 6.293504836402399e-05, "loss": 1.5772, "step": 189120 }, { "epoch": 0.37325577781435715, "grad_norm": 0.10888254642486572, "learning_rate": 6.29287081396593e-05, "loss": 1.5831, "step": 189152 }, { "epoch": 0.37331892377575354, "grad_norm": 0.1114615648984909, "learning_rate": 6.29223679152946e-05, "loss": 1.5767, "step": 189184 }, { "epoch": 0.37338206973715, "grad_norm": 0.11195087432861328, "learning_rate": 6.291602769092992e-05, "loss": 1.5908, "step": 189216 }, { "epoch": 0.37344521569854644, "grad_norm": 0.11618414521217346, "learning_rate": 6.290968746656523e-05, "loss": 1.5838, "step": 189248 }, { "epoch": 0.3735083616599429, "grad_norm": 0.11566731333732605, "learning_rate": 6.290334724220053e-05, "loss": 1.5942, "step": 189280 }, { "epoch": 0.37357150762133934, "grad_norm": 0.11618366837501526, "learning_rate": 6.289700701783585e-05, "loss": 1.584, "step": 189312 }, { "epoch": 0.3736346535827358, "grad_norm": 0.11000082641839981, "learning_rate": 6.289066679347115e-05, "loss": 1.5809, "step": 189344 }, { "epoch": 0.3736977995441322, "grad_norm": 0.11952618509531021, "learning_rate": 6.288432656910646e-05, "loss": 1.578, "step": 189376 }, { "epoch": 0.37376094550552863, "grad_norm": 0.10670780390501022, "learning_rate": 6.287798634474178e-05, "loss": 1.5737, "step": 189408 }, { "epoch": 0.3738240914669251, "grad_norm": 0.11222748458385468, "learning_rate": 6.287164612037709e-05, "loss": 1.591, "step": 189440 }, { "epoch": 0.37388723742832153, "grad_norm": 0.11076555401086807, "learning_rate": 6.28653058960124e-05, "loss": 1.5819, "step": 189472 }, { "epoch": 0.373950383389718, "grad_norm": 0.11821553856134415, "learning_rate": 6.285896567164771e-05, "loss": 1.5873, "step": 189504 }, { "epoch": 0.3740135293511144, "grad_norm": 0.12025181949138641, "learning_rate": 6.285262544728302e-05, "loss": 1.5672, "step": 189536 }, { "epoch": 0.3740766753125108, "grad_norm": 0.1173589751124382, "learning_rate": 6.284628522291834e-05, "loss": 1.5903, "step": 189568 }, { "epoch": 0.37413982127390727, "grad_norm": 0.11068599671125412, "learning_rate": 6.283994499855364e-05, "loss": 1.5772, "step": 189600 }, { "epoch": 0.3742029672353037, "grad_norm": 0.10940337926149368, "learning_rate": 6.283360477418895e-05, "loss": 1.5761, "step": 189632 }, { "epoch": 0.37426611319670017, "grad_norm": 0.1058943122625351, "learning_rate": 6.282726454982427e-05, "loss": 1.5879, "step": 189664 }, { "epoch": 0.3743292591580966, "grad_norm": 0.11430080980062485, "learning_rate": 6.282092432545957e-05, "loss": 1.5982, "step": 189696 }, { "epoch": 0.37439240511949307, "grad_norm": 0.11159449815750122, "learning_rate": 6.281458410109487e-05, "loss": 1.5769, "step": 189728 }, { "epoch": 0.37445555108088946, "grad_norm": 0.11083097755908966, "learning_rate": 6.280824387673018e-05, "loss": 1.5755, "step": 189760 }, { "epoch": 0.3745186970422859, "grad_norm": 0.11005783826112747, "learning_rate": 6.28019036523655e-05, "loss": 1.5858, "step": 189792 }, { "epoch": 0.37458184300368236, "grad_norm": 0.106862373650074, "learning_rate": 6.279556342800081e-05, "loss": 1.5724, "step": 189824 }, { "epoch": 0.3746449889650788, "grad_norm": 0.11401025205850601, "learning_rate": 6.278922320363613e-05, "loss": 1.5833, "step": 189856 }, { "epoch": 0.37470813492647526, "grad_norm": 0.11320473253726959, "learning_rate": 6.278288297927143e-05, "loss": 1.5728, "step": 189888 }, { "epoch": 0.3747712808878717, "grad_norm": 0.11194812506437302, "learning_rate": 6.277654275490674e-05, "loss": 1.6002, "step": 189920 }, { "epoch": 0.3748344268492681, "grad_norm": 0.10899345576763153, "learning_rate": 6.277020253054206e-05, "loss": 1.5887, "step": 189952 }, { "epoch": 0.37489757281066455, "grad_norm": 0.10808474570512772, "learning_rate": 6.276386230617737e-05, "loss": 1.5686, "step": 189984 }, { "epoch": 0.374960718772061, "grad_norm": 0.11248473078012466, "learning_rate": 6.275752208181267e-05, "loss": 1.5782, "step": 190016 }, { "epoch": 0.37502386473345745, "grad_norm": 0.1137518584728241, "learning_rate": 6.275118185744799e-05, "loss": 1.586, "step": 190048 }, { "epoch": 0.3750870106948539, "grad_norm": 0.1211237832903862, "learning_rate": 6.27448416330833e-05, "loss": 1.5788, "step": 190080 }, { "epoch": 0.37515015665625034, "grad_norm": 0.11008920520544052, "learning_rate": 6.27385014087186e-05, "loss": 1.5796, "step": 190112 }, { "epoch": 0.37521330261764674, "grad_norm": 0.10260678082704544, "learning_rate": 6.27321611843539e-05, "loss": 1.5874, "step": 190144 }, { "epoch": 0.3752764485790432, "grad_norm": 0.11161653697490692, "learning_rate": 6.272582095998922e-05, "loss": 1.577, "step": 190176 }, { "epoch": 0.37533959454043964, "grad_norm": 0.12450781464576721, "learning_rate": 6.271948073562453e-05, "loss": 1.5882, "step": 190208 }, { "epoch": 0.3754027405018361, "grad_norm": 0.11262410134077072, "learning_rate": 6.271314051125985e-05, "loss": 1.5839, "step": 190240 }, { "epoch": 0.37546588646323253, "grad_norm": 0.11589303612709045, "learning_rate": 6.270680028689515e-05, "loss": 1.5746, "step": 190272 }, { "epoch": 0.375529032424629, "grad_norm": 0.11190151423215866, "learning_rate": 6.270046006253046e-05, "loss": 1.5815, "step": 190304 }, { "epoch": 0.3755921783860254, "grad_norm": 0.1065768450498581, "learning_rate": 6.269411983816578e-05, "loss": 1.5759, "step": 190336 }, { "epoch": 0.3756553243474218, "grad_norm": 0.11990329623222351, "learning_rate": 6.268777961380109e-05, "loss": 1.5816, "step": 190368 }, { "epoch": 0.3757184703088183, "grad_norm": 0.10311850905418396, "learning_rate": 6.26814393894364e-05, "loss": 1.5719, "step": 190400 }, { "epoch": 0.3757816162702147, "grad_norm": 0.10978825390338898, "learning_rate": 6.267509916507171e-05, "loss": 1.5798, "step": 190432 }, { "epoch": 0.3758447622316112, "grad_norm": 0.11228978633880615, "learning_rate": 6.266875894070702e-05, "loss": 1.5829, "step": 190464 }, { "epoch": 0.3759079081930076, "grad_norm": 0.10573288053274155, "learning_rate": 6.266241871634232e-05, "loss": 1.592, "step": 190496 }, { "epoch": 0.37597105415440407, "grad_norm": 0.11256791651248932, "learning_rate": 6.265607849197764e-05, "loss": 1.5982, "step": 190528 }, { "epoch": 0.37603420011580047, "grad_norm": 0.10375548154115677, "learning_rate": 6.264973826761294e-05, "loss": 1.5888, "step": 190560 }, { "epoch": 0.3760973460771969, "grad_norm": 0.10288132727146149, "learning_rate": 6.264339804324825e-05, "loss": 1.5796, "step": 190592 }, { "epoch": 0.37616049203859336, "grad_norm": 0.1160019040107727, "learning_rate": 6.263705781888357e-05, "loss": 1.5684, "step": 190624 }, { "epoch": 0.3762236379999898, "grad_norm": 0.11431177705526352, "learning_rate": 6.263071759451888e-05, "loss": 1.5761, "step": 190656 }, { "epoch": 0.37628678396138626, "grad_norm": 0.11775610595941544, "learning_rate": 6.262437737015418e-05, "loss": 1.5926, "step": 190688 }, { "epoch": 0.3763499299227827, "grad_norm": 0.11620460450649261, "learning_rate": 6.26180371457895e-05, "loss": 1.5806, "step": 190720 }, { "epoch": 0.3764130758841791, "grad_norm": 0.11400526762008667, "learning_rate": 6.261169692142481e-05, "loss": 1.5613, "step": 190752 }, { "epoch": 0.37647622184557555, "grad_norm": 0.11397536098957062, "learning_rate": 6.260535669706013e-05, "loss": 1.5769, "step": 190784 }, { "epoch": 0.376539367806972, "grad_norm": 0.11190059036016464, "learning_rate": 6.259901647269543e-05, "loss": 1.5664, "step": 190816 }, { "epoch": 0.37660251376836845, "grad_norm": 0.11497852951288223, "learning_rate": 6.259267624833074e-05, "loss": 1.5884, "step": 190848 }, { "epoch": 0.3766656597297649, "grad_norm": 0.11309961974620819, "learning_rate": 6.258633602396606e-05, "loss": 1.5801, "step": 190880 }, { "epoch": 0.37672880569116135, "grad_norm": 0.1095379963517189, "learning_rate": 6.257999579960136e-05, "loss": 1.5813, "step": 190912 }, { "epoch": 0.37679195165255774, "grad_norm": 0.11139626801013947, "learning_rate": 6.257365557523667e-05, "loss": 1.5693, "step": 190944 }, { "epoch": 0.3768550976139542, "grad_norm": 0.11057497560977936, "learning_rate": 6.256731535087197e-05, "loss": 1.5683, "step": 190976 }, { "epoch": 0.37691824357535064, "grad_norm": 0.110712930560112, "learning_rate": 6.256097512650729e-05, "loss": 1.5796, "step": 191008 }, { "epoch": 0.3769813895367471, "grad_norm": 0.11229637265205383, "learning_rate": 6.25546349021426e-05, "loss": 1.5879, "step": 191040 }, { "epoch": 0.37704453549814354, "grad_norm": 0.11921262741088867, "learning_rate": 6.25482946777779e-05, "loss": 1.5832, "step": 191072 }, { "epoch": 0.37710768145954, "grad_norm": 0.11263344436883926, "learning_rate": 6.254195445341322e-05, "loss": 1.5868, "step": 191104 }, { "epoch": 0.3771708274209364, "grad_norm": 0.11202883720397949, "learning_rate": 6.253561422904853e-05, "loss": 1.5725, "step": 191136 }, { "epoch": 0.37723397338233283, "grad_norm": 0.11726159602403641, "learning_rate": 6.252927400468385e-05, "loss": 1.5763, "step": 191168 }, { "epoch": 0.3772971193437293, "grad_norm": 0.10986708104610443, "learning_rate": 6.252293378031916e-05, "loss": 1.5753, "step": 191200 }, { "epoch": 0.37736026530512573, "grad_norm": 0.11207885295152664, "learning_rate": 6.251659355595446e-05, "loss": 1.582, "step": 191232 }, { "epoch": 0.3774234112665222, "grad_norm": 0.1060028225183487, "learning_rate": 6.251025333158978e-05, "loss": 1.5728, "step": 191264 }, { "epoch": 0.37748655722791863, "grad_norm": 0.10829560458660126, "learning_rate": 6.250391310722509e-05, "loss": 1.5865, "step": 191296 }, { "epoch": 0.377549703189315, "grad_norm": 0.11095945537090302, "learning_rate": 6.24975728828604e-05, "loss": 1.5856, "step": 191328 }, { "epoch": 0.37761284915071147, "grad_norm": 0.10997764021158218, "learning_rate": 6.249123265849571e-05, "loss": 1.5802, "step": 191360 }, { "epoch": 0.3776759951121079, "grad_norm": 0.1037118136882782, "learning_rate": 6.248489243413101e-05, "loss": 1.5782, "step": 191392 }, { "epoch": 0.37773914107350437, "grad_norm": 0.10766377300024033, "learning_rate": 6.247855220976632e-05, "loss": 1.5982, "step": 191424 }, { "epoch": 0.3778022870349008, "grad_norm": 0.10773643106222153, "learning_rate": 6.247221198540164e-05, "loss": 1.5771, "step": 191456 }, { "epoch": 0.37786543299629727, "grad_norm": 0.10769221931695938, "learning_rate": 6.246587176103694e-05, "loss": 1.5762, "step": 191488 }, { "epoch": 0.37792857895769366, "grad_norm": 0.11420951038599014, "learning_rate": 6.245953153667225e-05, "loss": 1.5734, "step": 191520 }, { "epoch": 0.3779917249190901, "grad_norm": 0.11754541844129562, "learning_rate": 6.245319131230757e-05, "loss": 1.5967, "step": 191552 }, { "epoch": 0.37805487088048656, "grad_norm": 0.11201565712690353, "learning_rate": 6.244685108794288e-05, "loss": 1.5828, "step": 191584 }, { "epoch": 0.378118016841883, "grad_norm": 0.10708951205015182, "learning_rate": 6.244051086357818e-05, "loss": 1.5724, "step": 191616 }, { "epoch": 0.37818116280327946, "grad_norm": 0.11266741156578064, "learning_rate": 6.24341706392135e-05, "loss": 1.5759, "step": 191648 }, { "epoch": 0.3782443087646759, "grad_norm": 0.11262353509664536, "learning_rate": 6.242783041484881e-05, "loss": 1.5853, "step": 191680 }, { "epoch": 0.3783074547260723, "grad_norm": 0.11155575513839722, "learning_rate": 6.242149019048413e-05, "loss": 1.5882, "step": 191712 }, { "epoch": 0.37837060068746875, "grad_norm": 0.11081994324922562, "learning_rate": 6.241514996611943e-05, "loss": 1.5734, "step": 191744 }, { "epoch": 0.3784337466488652, "grad_norm": 0.11958064883947372, "learning_rate": 6.240880974175473e-05, "loss": 1.5889, "step": 191776 }, { "epoch": 0.37849689261026165, "grad_norm": 0.1084212213754654, "learning_rate": 6.240246951739004e-05, "loss": 1.5814, "step": 191808 }, { "epoch": 0.3785600385716581, "grad_norm": 0.10623104870319366, "learning_rate": 6.239612929302536e-05, "loss": 1.5779, "step": 191840 }, { "epoch": 0.37862318453305455, "grad_norm": 0.10781396925449371, "learning_rate": 6.238978906866067e-05, "loss": 1.5822, "step": 191872 }, { "epoch": 0.37868633049445094, "grad_norm": 0.1161232590675354, "learning_rate": 6.238344884429597e-05, "loss": 1.591, "step": 191904 }, { "epoch": 0.3787494764558474, "grad_norm": 0.11555363237857819, "learning_rate": 6.237710861993129e-05, "loss": 1.5881, "step": 191936 }, { "epoch": 0.37881262241724384, "grad_norm": 0.10289894044399261, "learning_rate": 6.23707683955666e-05, "loss": 1.5781, "step": 191968 }, { "epoch": 0.3788757683786403, "grad_norm": 0.10856793075799942, "learning_rate": 6.236442817120192e-05, "loss": 1.5751, "step": 192000 }, { "epoch": 0.37893891434003674, "grad_norm": 0.11440318822860718, "learning_rate": 6.235808794683722e-05, "loss": 1.5916, "step": 192032 }, { "epoch": 0.3790020603014332, "grad_norm": 0.11252167820930481, "learning_rate": 6.235174772247253e-05, "loss": 1.5816, "step": 192064 }, { "epoch": 0.3790652062628296, "grad_norm": 0.11253858357667923, "learning_rate": 6.234540749810785e-05, "loss": 1.5755, "step": 192096 }, { "epoch": 0.37912835222422603, "grad_norm": 0.12229254841804504, "learning_rate": 6.233906727374316e-05, "loss": 1.5863, "step": 192128 }, { "epoch": 0.3791914981856225, "grad_norm": 0.10934390872716904, "learning_rate": 6.233272704937846e-05, "loss": 1.5826, "step": 192160 }, { "epoch": 0.3792546441470189, "grad_norm": 0.10436520725488663, "learning_rate": 6.232638682501377e-05, "loss": 1.5719, "step": 192192 }, { "epoch": 0.3793177901084154, "grad_norm": 0.1138714849948883, "learning_rate": 6.232004660064908e-05, "loss": 1.5733, "step": 192224 }, { "epoch": 0.3793809360698118, "grad_norm": 0.10553093254566193, "learning_rate": 6.23137063762844e-05, "loss": 1.5742, "step": 192256 }, { "epoch": 0.3794440820312083, "grad_norm": 0.10531999915838242, "learning_rate": 6.23073661519197e-05, "loss": 1.5729, "step": 192288 }, { "epoch": 0.37950722799260467, "grad_norm": 0.11378216743469238, "learning_rate": 6.230102592755501e-05, "loss": 1.5691, "step": 192320 }, { "epoch": 0.3795703739540011, "grad_norm": 0.11386200040578842, "learning_rate": 6.229468570319032e-05, "loss": 1.5916, "step": 192352 }, { "epoch": 0.37963351991539757, "grad_norm": 0.11795624345541, "learning_rate": 6.228834547882564e-05, "loss": 1.5784, "step": 192384 }, { "epoch": 0.379696665876794, "grad_norm": 0.12270095199346542, "learning_rate": 6.228200525446094e-05, "loss": 1.5816, "step": 192416 }, { "epoch": 0.37975981183819046, "grad_norm": 0.11291665583848953, "learning_rate": 6.227566503009625e-05, "loss": 1.5791, "step": 192448 }, { "epoch": 0.3798229577995869, "grad_norm": 0.11110670864582062, "learning_rate": 6.226932480573157e-05, "loss": 1.5834, "step": 192480 }, { "epoch": 0.3798861037609833, "grad_norm": 0.11037114262580872, "learning_rate": 6.226298458136688e-05, "loss": 1.583, "step": 192512 }, { "epoch": 0.37994924972237976, "grad_norm": 0.10810991376638412, "learning_rate": 6.22566443570022e-05, "loss": 1.578, "step": 192544 }, { "epoch": 0.3800123956837762, "grad_norm": 0.10720200836658478, "learning_rate": 6.22503041326375e-05, "loss": 1.5839, "step": 192576 }, { "epoch": 0.38007554164517265, "grad_norm": 0.11847103387117386, "learning_rate": 6.22439639082728e-05, "loss": 1.5784, "step": 192608 }, { "epoch": 0.3801386876065691, "grad_norm": 0.11450802534818649, "learning_rate": 6.223762368390811e-05, "loss": 1.5781, "step": 192640 }, { "epoch": 0.38020183356796555, "grad_norm": 0.11172592639923096, "learning_rate": 6.223128345954343e-05, "loss": 1.5805, "step": 192672 }, { "epoch": 0.38026497952936195, "grad_norm": 0.11719383299350739, "learning_rate": 6.222494323517873e-05, "loss": 1.5755, "step": 192704 }, { "epoch": 0.3803281254907584, "grad_norm": 0.11797864735126495, "learning_rate": 6.221860301081405e-05, "loss": 1.5758, "step": 192736 }, { "epoch": 0.38039127145215484, "grad_norm": 0.11396016925573349, "learning_rate": 6.221226278644936e-05, "loss": 1.5732, "step": 192768 }, { "epoch": 0.3804544174135513, "grad_norm": 0.11699241399765015, "learning_rate": 6.220592256208467e-05, "loss": 1.5837, "step": 192800 }, { "epoch": 0.38051756337494774, "grad_norm": 0.11038757860660553, "learning_rate": 6.219958233771998e-05, "loss": 1.5879, "step": 192832 }, { "epoch": 0.3805807093363442, "grad_norm": 0.11245683580636978, "learning_rate": 6.219324211335529e-05, "loss": 1.5719, "step": 192864 }, { "epoch": 0.3806438552977406, "grad_norm": 0.11117320507764816, "learning_rate": 6.21869018889906e-05, "loss": 1.5674, "step": 192896 }, { "epoch": 0.38070700125913703, "grad_norm": 0.10423646867275238, "learning_rate": 6.218056166462592e-05, "loss": 1.5692, "step": 192928 }, { "epoch": 0.3807701472205335, "grad_norm": 0.11134912818670273, "learning_rate": 6.217422144026122e-05, "loss": 1.5853, "step": 192960 }, { "epoch": 0.38083329318192993, "grad_norm": 0.11681429296731949, "learning_rate": 6.216788121589653e-05, "loss": 1.5763, "step": 192992 }, { "epoch": 0.3808964391433264, "grad_norm": 0.11456853896379471, "learning_rate": 6.216154099153184e-05, "loss": 1.5776, "step": 193024 }, { "epoch": 0.38095958510472283, "grad_norm": 0.11466328799724579, "learning_rate": 6.215520076716715e-05, "loss": 1.5796, "step": 193056 }, { "epoch": 0.3810227310661192, "grad_norm": 0.10899902880191803, "learning_rate": 6.214886054280245e-05, "loss": 1.5761, "step": 193088 }, { "epoch": 0.3810858770275157, "grad_norm": 0.11018490046262741, "learning_rate": 6.214252031843777e-05, "loss": 1.5764, "step": 193120 }, { "epoch": 0.3811490229889121, "grad_norm": 0.12358193099498749, "learning_rate": 6.213618009407308e-05, "loss": 1.5704, "step": 193152 }, { "epoch": 0.38121216895030857, "grad_norm": 0.12298901379108429, "learning_rate": 6.21298398697084e-05, "loss": 1.5744, "step": 193184 }, { "epoch": 0.381275314911705, "grad_norm": 0.10603820532560349, "learning_rate": 6.212349964534371e-05, "loss": 1.572, "step": 193216 }, { "epoch": 0.38133846087310147, "grad_norm": 0.12213683873414993, "learning_rate": 6.211715942097901e-05, "loss": 1.5836, "step": 193248 }, { "epoch": 0.38140160683449786, "grad_norm": 0.1066909059882164, "learning_rate": 6.211081919661432e-05, "loss": 1.5694, "step": 193280 }, { "epoch": 0.3814647527958943, "grad_norm": 0.13556654751300812, "learning_rate": 6.210447897224964e-05, "loss": 1.5812, "step": 193312 }, { "epoch": 0.38152789875729076, "grad_norm": 0.11676943302154541, "learning_rate": 6.209813874788495e-05, "loss": 1.5741, "step": 193344 }, { "epoch": 0.3815910447186872, "grad_norm": 0.10768906027078629, "learning_rate": 6.209179852352025e-05, "loss": 1.573, "step": 193376 }, { "epoch": 0.38165419068008366, "grad_norm": 0.12190432846546173, "learning_rate": 6.208545829915557e-05, "loss": 1.5749, "step": 193408 }, { "epoch": 0.3817173366414801, "grad_norm": 0.11266548931598663, "learning_rate": 6.207911807479087e-05, "loss": 1.5922, "step": 193440 }, { "epoch": 0.3817804826028765, "grad_norm": 0.10480077564716339, "learning_rate": 6.207277785042619e-05, "loss": 1.5857, "step": 193472 }, { "epoch": 0.38184362856427295, "grad_norm": 0.10797309130430222, "learning_rate": 6.206643762606149e-05, "loss": 1.5905, "step": 193504 }, { "epoch": 0.3819067745256694, "grad_norm": 0.10765618830919266, "learning_rate": 6.20600974016968e-05, "loss": 1.5811, "step": 193536 }, { "epoch": 0.38196992048706585, "grad_norm": 0.11219711601734161, "learning_rate": 6.205375717733212e-05, "loss": 1.5647, "step": 193568 }, { "epoch": 0.3820330664484623, "grad_norm": 0.10743927210569382, "learning_rate": 6.204741695296743e-05, "loss": 1.5866, "step": 193600 }, { "epoch": 0.38209621240985875, "grad_norm": 0.11673557013273239, "learning_rate": 6.204107672860273e-05, "loss": 1.5766, "step": 193632 }, { "epoch": 0.38215935837125514, "grad_norm": 0.11123722046613693, "learning_rate": 6.203473650423805e-05, "loss": 1.579, "step": 193664 }, { "epoch": 0.3822225043326516, "grad_norm": 0.11459823697805405, "learning_rate": 6.202839627987336e-05, "loss": 1.5669, "step": 193696 }, { "epoch": 0.38228565029404804, "grad_norm": 0.11273475736379623, "learning_rate": 6.202205605550867e-05, "loss": 1.5746, "step": 193728 }, { "epoch": 0.3823487962554445, "grad_norm": 0.11307339370250702, "learning_rate": 6.201571583114398e-05, "loss": 1.597, "step": 193760 }, { "epoch": 0.38241194221684094, "grad_norm": 0.10869086533784866, "learning_rate": 6.200937560677929e-05, "loss": 1.5897, "step": 193792 }, { "epoch": 0.3824750881782374, "grad_norm": 0.10897192358970642, "learning_rate": 6.20030353824146e-05, "loss": 1.583, "step": 193824 }, { "epoch": 0.38253823413963384, "grad_norm": 0.11251529306173325, "learning_rate": 6.19966951580499e-05, "loss": 1.5786, "step": 193856 }, { "epoch": 0.38260138010103023, "grad_norm": 0.1086493730545044, "learning_rate": 6.199035493368522e-05, "loss": 1.5915, "step": 193888 }, { "epoch": 0.3826645260624267, "grad_norm": 0.10941314697265625, "learning_rate": 6.198401470932052e-05, "loss": 1.5868, "step": 193920 }, { "epoch": 0.38272767202382313, "grad_norm": 0.11620660126209259, "learning_rate": 6.197767448495584e-05, "loss": 1.5681, "step": 193952 }, { "epoch": 0.3827908179852196, "grad_norm": 0.10568537563085556, "learning_rate": 6.197133426059115e-05, "loss": 1.5756, "step": 193984 }, { "epoch": 0.382853963946616, "grad_norm": 0.114474818110466, "learning_rate": 6.196499403622646e-05, "loss": 1.5711, "step": 194016 }, { "epoch": 0.3829171099080125, "grad_norm": 0.11805200576782227, "learning_rate": 6.195865381186177e-05, "loss": 1.5777, "step": 194048 }, { "epoch": 0.38298025586940887, "grad_norm": 0.1138908788561821, "learning_rate": 6.195231358749708e-05, "loss": 1.5711, "step": 194080 }, { "epoch": 0.3830434018308053, "grad_norm": 0.11321444809436798, "learning_rate": 6.19459733631324e-05, "loss": 1.5834, "step": 194112 }, { "epoch": 0.38310654779220177, "grad_norm": 0.10935060679912567, "learning_rate": 6.193963313876771e-05, "loss": 1.567, "step": 194144 }, { "epoch": 0.3831696937535982, "grad_norm": 0.1189296543598175, "learning_rate": 6.193329291440301e-05, "loss": 1.5924, "step": 194176 }, { "epoch": 0.38323283971499467, "grad_norm": 0.11690651625394821, "learning_rate": 6.192695269003833e-05, "loss": 1.5853, "step": 194208 }, { "epoch": 0.3832959856763911, "grad_norm": 0.10909455269575119, "learning_rate": 6.192061246567364e-05, "loss": 1.5802, "step": 194240 }, { "epoch": 0.3833591316377875, "grad_norm": 0.1151445284485817, "learning_rate": 6.191427224130894e-05, "loss": 1.5769, "step": 194272 }, { "epoch": 0.38342227759918396, "grad_norm": 0.1197195053100586, "learning_rate": 6.190793201694424e-05, "loss": 1.5814, "step": 194304 }, { "epoch": 0.3834854235605804, "grad_norm": 0.11474685370922089, "learning_rate": 6.190159179257956e-05, "loss": 1.5771, "step": 194336 }, { "epoch": 0.38354856952197686, "grad_norm": 0.11637883633375168, "learning_rate": 6.189525156821487e-05, "loss": 1.5773, "step": 194368 }, { "epoch": 0.3836117154833733, "grad_norm": 0.11324753612279892, "learning_rate": 6.188891134385019e-05, "loss": 1.5862, "step": 194400 }, { "epoch": 0.38367486144476975, "grad_norm": 0.10944308340549469, "learning_rate": 6.188257111948549e-05, "loss": 1.5815, "step": 194432 }, { "epoch": 0.38373800740616615, "grad_norm": 0.1073647141456604, "learning_rate": 6.18762308951208e-05, "loss": 1.5849, "step": 194464 }, { "epoch": 0.3838011533675626, "grad_norm": 0.11327075958251953, "learning_rate": 6.186989067075612e-05, "loss": 1.5674, "step": 194496 }, { "epoch": 0.38386429932895905, "grad_norm": 0.12217191606760025, "learning_rate": 6.186355044639143e-05, "loss": 1.578, "step": 194528 }, { "epoch": 0.3839274452903555, "grad_norm": 0.11427702009677887, "learning_rate": 6.185721022202674e-05, "loss": 1.5842, "step": 194560 }, { "epoch": 0.38399059125175194, "grad_norm": 0.11438681185245514, "learning_rate": 6.185086999766205e-05, "loss": 1.5772, "step": 194592 }, { "epoch": 0.3840537372131484, "grad_norm": 0.11550986021757126, "learning_rate": 6.184452977329736e-05, "loss": 1.5699, "step": 194624 }, { "epoch": 0.3841168831745448, "grad_norm": 0.11881895363330841, "learning_rate": 6.183818954893266e-05, "loss": 1.5809, "step": 194656 }, { "epoch": 0.38418002913594124, "grad_norm": 0.11112011224031448, "learning_rate": 6.183184932456798e-05, "loss": 1.5933, "step": 194688 }, { "epoch": 0.3842431750973377, "grad_norm": 0.11052552610635757, "learning_rate": 6.182550910020328e-05, "loss": 1.5922, "step": 194720 }, { "epoch": 0.38430632105873413, "grad_norm": 0.11437926441431046, "learning_rate": 6.181916887583859e-05, "loss": 1.5738, "step": 194752 }, { "epoch": 0.3843694670201306, "grad_norm": 0.11414770036935806, "learning_rate": 6.18128286514739e-05, "loss": 1.5811, "step": 194784 }, { "epoch": 0.38443261298152703, "grad_norm": 0.10907621681690216, "learning_rate": 6.180648842710922e-05, "loss": 1.5852, "step": 194816 }, { "epoch": 0.3844957589429234, "grad_norm": 0.10684679448604584, "learning_rate": 6.180014820274452e-05, "loss": 1.585, "step": 194848 }, { "epoch": 0.3845589049043199, "grad_norm": 0.1143089011311531, "learning_rate": 6.179380797837984e-05, "loss": 1.5803, "step": 194880 }, { "epoch": 0.3846220508657163, "grad_norm": 0.11577370762825012, "learning_rate": 6.178746775401515e-05, "loss": 1.5828, "step": 194912 }, { "epoch": 0.3846851968271128, "grad_norm": 0.11089134961366653, "learning_rate": 6.178112752965047e-05, "loss": 1.5771, "step": 194944 }, { "epoch": 0.3847483427885092, "grad_norm": 0.10856077820062637, "learning_rate": 6.177478730528577e-05, "loss": 1.5656, "step": 194976 }, { "epoch": 0.3848114887499057, "grad_norm": 0.11164447665214539, "learning_rate": 6.176844708092108e-05, "loss": 1.5736, "step": 195008 }, { "epoch": 0.38487463471130207, "grad_norm": 0.10956843197345734, "learning_rate": 6.17621068565564e-05, "loss": 1.5742, "step": 195040 }, { "epoch": 0.3849377806726985, "grad_norm": 0.10592419654130936, "learning_rate": 6.17557666321917e-05, "loss": 1.5705, "step": 195072 }, { "epoch": 0.38500092663409496, "grad_norm": 0.10649097710847855, "learning_rate": 6.174942640782701e-05, "loss": 1.5725, "step": 195104 }, { "epoch": 0.3850640725954914, "grad_norm": 0.11064901202917099, "learning_rate": 6.174308618346231e-05, "loss": 1.5805, "step": 195136 }, { "epoch": 0.38512721855688786, "grad_norm": 0.11852394044399261, "learning_rate": 6.173674595909763e-05, "loss": 1.5839, "step": 195168 }, { "epoch": 0.3851903645182843, "grad_norm": 0.11115214228630066, "learning_rate": 6.173040573473294e-05, "loss": 1.582, "step": 195200 }, { "epoch": 0.3852535104796807, "grad_norm": 0.11130701750516891, "learning_rate": 6.172406551036826e-05, "loss": 1.5936, "step": 195232 }, { "epoch": 0.38531665644107715, "grad_norm": 0.1142987310886383, "learning_rate": 6.171772528600356e-05, "loss": 1.5739, "step": 195264 }, { "epoch": 0.3853798024024736, "grad_norm": 0.11486975848674774, "learning_rate": 6.171138506163887e-05, "loss": 1.5883, "step": 195296 }, { "epoch": 0.38544294836387005, "grad_norm": 0.10437566041946411, "learning_rate": 6.170504483727419e-05, "loss": 1.5796, "step": 195328 }, { "epoch": 0.3855060943252665, "grad_norm": 0.10351017117500305, "learning_rate": 6.16987046129095e-05, "loss": 1.5845, "step": 195360 }, { "epoch": 0.38556924028666295, "grad_norm": 0.11035379767417908, "learning_rate": 6.16923643885448e-05, "loss": 1.5936, "step": 195392 }, { "epoch": 0.38563238624805934, "grad_norm": 0.11144288629293442, "learning_rate": 6.168602416418012e-05, "loss": 1.5755, "step": 195424 }, { "epoch": 0.3856955322094558, "grad_norm": 0.10670211166143417, "learning_rate": 6.167968393981543e-05, "loss": 1.5687, "step": 195456 }, { "epoch": 0.38575867817085224, "grad_norm": 0.11042036861181259, "learning_rate": 6.167334371545073e-05, "loss": 1.5799, "step": 195488 }, { "epoch": 0.3858218241322487, "grad_norm": 0.11746933311223984, "learning_rate": 6.166700349108605e-05, "loss": 1.5903, "step": 195520 }, { "epoch": 0.38588497009364514, "grad_norm": 0.11040772497653961, "learning_rate": 6.166066326672135e-05, "loss": 1.5742, "step": 195552 }, { "epoch": 0.3859481160550416, "grad_norm": 0.11326251924037933, "learning_rate": 6.165432304235666e-05, "loss": 1.5886, "step": 195584 }, { "epoch": 0.38601126201643804, "grad_norm": 0.11498472094535828, "learning_rate": 6.164798281799198e-05, "loss": 1.5862, "step": 195616 }, { "epoch": 0.38607440797783443, "grad_norm": 0.10761165618896484, "learning_rate": 6.164164259362728e-05, "loss": 1.5752, "step": 195648 }, { "epoch": 0.3861375539392309, "grad_norm": 0.1089029312133789, "learning_rate": 6.163530236926259e-05, "loss": 1.588, "step": 195680 }, { "epoch": 0.38620069990062733, "grad_norm": 0.10804031044244766, "learning_rate": 6.16289621448979e-05, "loss": 1.5642, "step": 195712 }, { "epoch": 0.3862638458620238, "grad_norm": 0.10668666660785675, "learning_rate": 6.162262192053322e-05, "loss": 1.5716, "step": 195744 }, { "epoch": 0.38632699182342023, "grad_norm": 0.11377915740013123, "learning_rate": 6.161628169616852e-05, "loss": 1.5715, "step": 195776 }, { "epoch": 0.3863901377848167, "grad_norm": 0.11925654113292694, "learning_rate": 6.160994147180384e-05, "loss": 1.5834, "step": 195808 }, { "epoch": 0.38645328374621307, "grad_norm": 0.11272826790809631, "learning_rate": 6.160360124743915e-05, "loss": 1.5727, "step": 195840 }, { "epoch": 0.3865164297076095, "grad_norm": 0.11087898910045624, "learning_rate": 6.159726102307447e-05, "loss": 1.5846, "step": 195872 }, { "epoch": 0.38657957566900597, "grad_norm": 0.1085660457611084, "learning_rate": 6.159092079870977e-05, "loss": 1.5751, "step": 195904 }, { "epoch": 0.3866427216304024, "grad_norm": 0.11042071133852005, "learning_rate": 6.158458057434507e-05, "loss": 1.5622, "step": 195936 }, { "epoch": 0.38670586759179887, "grad_norm": 0.12418434768915176, "learning_rate": 6.157824034998038e-05, "loss": 1.5807, "step": 195968 }, { "epoch": 0.3867690135531953, "grad_norm": 0.11331064999103546, "learning_rate": 6.15719001256157e-05, "loss": 1.5893, "step": 196000 }, { "epoch": 0.3868321595145917, "grad_norm": 0.11731262505054474, "learning_rate": 6.156555990125101e-05, "loss": 1.5804, "step": 196032 }, { "epoch": 0.38689530547598816, "grad_norm": 0.10680776834487915, "learning_rate": 6.155921967688631e-05, "loss": 1.5781, "step": 196064 }, { "epoch": 0.3869584514373846, "grad_norm": 0.11508776247501373, "learning_rate": 6.155287945252163e-05, "loss": 1.5825, "step": 196096 }, { "epoch": 0.38702159739878106, "grad_norm": 0.11240911483764648, "learning_rate": 6.154653922815694e-05, "loss": 1.5853, "step": 196128 }, { "epoch": 0.3870847433601775, "grad_norm": 0.10966435819864273, "learning_rate": 6.154019900379226e-05, "loss": 1.5899, "step": 196160 }, { "epoch": 0.38714788932157396, "grad_norm": 0.10919499397277832, "learning_rate": 6.153385877942756e-05, "loss": 1.5804, "step": 196192 }, { "epoch": 0.38721103528297035, "grad_norm": 0.11427536606788635, "learning_rate": 6.152751855506287e-05, "loss": 1.5607, "step": 196224 }, { "epoch": 0.3872741812443668, "grad_norm": 0.11667704582214355, "learning_rate": 6.152117833069819e-05, "loss": 1.5761, "step": 196256 }, { "epoch": 0.38733732720576325, "grad_norm": 0.10493740439414978, "learning_rate": 6.15148381063335e-05, "loss": 1.5779, "step": 196288 }, { "epoch": 0.3874004731671597, "grad_norm": 0.11263034492731094, "learning_rate": 6.15084978819688e-05, "loss": 1.5686, "step": 196320 }, { "epoch": 0.38746361912855615, "grad_norm": 0.11960586905479431, "learning_rate": 6.15021576576041e-05, "loss": 1.5873, "step": 196352 }, { "epoch": 0.3875267650899526, "grad_norm": 0.12207693606615067, "learning_rate": 6.149581743323942e-05, "loss": 1.5862, "step": 196384 }, { "epoch": 0.387589911051349, "grad_norm": 0.11116329580545425, "learning_rate": 6.148947720887473e-05, "loss": 1.5743, "step": 196416 }, { "epoch": 0.38765305701274544, "grad_norm": 0.10941682010889053, "learning_rate": 6.148313698451003e-05, "loss": 1.5865, "step": 196448 }, { "epoch": 0.3877162029741419, "grad_norm": 0.11591745167970657, "learning_rate": 6.147679676014535e-05, "loss": 1.5834, "step": 196480 }, { "epoch": 0.38777934893553834, "grad_norm": 0.11391013115644455, "learning_rate": 6.147045653578066e-05, "loss": 1.577, "step": 196512 }, { "epoch": 0.3878424948969348, "grad_norm": 0.10873373597860336, "learning_rate": 6.146411631141598e-05, "loss": 1.5747, "step": 196544 }, { "epoch": 0.38790564085833124, "grad_norm": 0.11320281773805618, "learning_rate": 6.145777608705129e-05, "loss": 1.5858, "step": 196576 }, { "epoch": 0.38796878681972763, "grad_norm": 0.10731580853462219, "learning_rate": 6.145143586268659e-05, "loss": 1.5817, "step": 196608 }, { "epoch": 0.3880319327811241, "grad_norm": 0.10921253263950348, "learning_rate": 6.14450956383219e-05, "loss": 1.5692, "step": 196640 }, { "epoch": 0.3880950787425205, "grad_norm": 0.10596117377281189, "learning_rate": 6.143875541395722e-05, "loss": 1.5832, "step": 196672 }, { "epoch": 0.388158224703917, "grad_norm": 0.11492400616407394, "learning_rate": 6.143241518959254e-05, "loss": 1.5813, "step": 196704 }, { "epoch": 0.3882213706653134, "grad_norm": 0.11025400459766388, "learning_rate": 6.142607496522784e-05, "loss": 1.5814, "step": 196736 }, { "epoch": 0.3882845166267099, "grad_norm": 0.10787409543991089, "learning_rate": 6.141973474086314e-05, "loss": 1.5634, "step": 196768 }, { "epoch": 0.38834766258810627, "grad_norm": 0.1080102026462555, "learning_rate": 6.141339451649845e-05, "loss": 1.582, "step": 196800 }, { "epoch": 0.3884108085495027, "grad_norm": 0.10854236781597137, "learning_rate": 6.140705429213377e-05, "loss": 1.5647, "step": 196832 }, { "epoch": 0.38847395451089917, "grad_norm": 0.1165924221277237, "learning_rate": 6.140071406776907e-05, "loss": 1.5821, "step": 196864 }, { "epoch": 0.3885371004722956, "grad_norm": 0.11356721818447113, "learning_rate": 6.139437384340438e-05, "loss": 1.5894, "step": 196896 }, { "epoch": 0.38860024643369206, "grad_norm": 0.11147555708885193, "learning_rate": 6.13880336190397e-05, "loss": 1.5829, "step": 196928 }, { "epoch": 0.3886633923950885, "grad_norm": 0.11362142115831375, "learning_rate": 6.138169339467501e-05, "loss": 1.5818, "step": 196960 }, { "epoch": 0.3887265383564849, "grad_norm": 0.10542314499616623, "learning_rate": 6.137535317031031e-05, "loss": 1.5767, "step": 196992 }, { "epoch": 0.38878968431788136, "grad_norm": 0.11809996515512466, "learning_rate": 6.136901294594563e-05, "loss": 1.5883, "step": 197024 }, { "epoch": 0.3888528302792778, "grad_norm": 0.1132158488035202, "learning_rate": 6.136267272158094e-05, "loss": 1.579, "step": 197056 }, { "epoch": 0.38891597624067425, "grad_norm": 0.1142975464463234, "learning_rate": 6.135633249721626e-05, "loss": 1.574, "step": 197088 }, { "epoch": 0.3889791222020707, "grad_norm": 0.1133769080042839, "learning_rate": 6.134999227285156e-05, "loss": 1.5874, "step": 197120 }, { "epoch": 0.38904226816346715, "grad_norm": 0.11044921725988388, "learning_rate": 6.134365204848687e-05, "loss": 1.598, "step": 197152 }, { "epoch": 0.38910541412486355, "grad_norm": 0.10900735855102539, "learning_rate": 6.133731182412217e-05, "loss": 1.5843, "step": 197184 }, { "epoch": 0.38916856008626, "grad_norm": 0.10551530122756958, "learning_rate": 6.133097159975749e-05, "loss": 1.5755, "step": 197216 }, { "epoch": 0.38923170604765644, "grad_norm": 0.11120200157165527, "learning_rate": 6.13246313753928e-05, "loss": 1.5646, "step": 197248 }, { "epoch": 0.3892948520090529, "grad_norm": 0.11255236715078354, "learning_rate": 6.13182911510281e-05, "loss": 1.5818, "step": 197280 }, { "epoch": 0.38935799797044934, "grad_norm": 0.10892856866121292, "learning_rate": 6.131195092666342e-05, "loss": 1.5796, "step": 197312 }, { "epoch": 0.3894211439318458, "grad_norm": 0.11646037548780441, "learning_rate": 6.130561070229873e-05, "loss": 1.591, "step": 197344 }, { "epoch": 0.38948428989324224, "grad_norm": 0.12235072255134583, "learning_rate": 6.129927047793405e-05, "loss": 1.5769, "step": 197376 }, { "epoch": 0.38954743585463864, "grad_norm": 0.10207653045654297, "learning_rate": 6.129293025356935e-05, "loss": 1.5696, "step": 197408 }, { "epoch": 0.3896105818160351, "grad_norm": 0.11503254622220993, "learning_rate": 6.128659002920466e-05, "loss": 1.5717, "step": 197440 }, { "epoch": 0.38967372777743153, "grad_norm": 0.10136708617210388, "learning_rate": 6.128024980483998e-05, "loss": 1.564, "step": 197472 }, { "epoch": 0.389736873738828, "grad_norm": 0.11082805693149567, "learning_rate": 6.127390958047529e-05, "loss": 1.587, "step": 197504 }, { "epoch": 0.38980001970022443, "grad_norm": 0.12247280776500702, "learning_rate": 6.126756935611059e-05, "loss": 1.5784, "step": 197536 }, { "epoch": 0.3898631656616209, "grad_norm": 0.10812677443027496, "learning_rate": 6.126122913174591e-05, "loss": 1.5669, "step": 197568 }, { "epoch": 0.3899263116230173, "grad_norm": 0.10772112756967545, "learning_rate": 6.125488890738121e-05, "loss": 1.5817, "step": 197600 }, { "epoch": 0.3899894575844137, "grad_norm": 0.10378552973270416, "learning_rate": 6.124854868301652e-05, "loss": 1.5782, "step": 197632 }, { "epoch": 0.3900526035458102, "grad_norm": 0.10763803124427795, "learning_rate": 6.124220845865182e-05, "loss": 1.584, "step": 197664 }, { "epoch": 0.3901157495072066, "grad_norm": 0.11229921877384186, "learning_rate": 6.123586823428714e-05, "loss": 1.5807, "step": 197696 }, { "epoch": 0.39017889546860307, "grad_norm": 0.12197394669055939, "learning_rate": 6.122952800992245e-05, "loss": 1.5701, "step": 197728 }, { "epoch": 0.3902420414299995, "grad_norm": 0.11499131470918655, "learning_rate": 6.122318778555777e-05, "loss": 1.5824, "step": 197760 }, { "epoch": 0.3903051873913959, "grad_norm": 0.11056692153215408, "learning_rate": 6.121684756119307e-05, "loss": 1.579, "step": 197792 }, { "epoch": 0.39036833335279236, "grad_norm": 0.1076459214091301, "learning_rate": 6.121050733682838e-05, "loss": 1.5627, "step": 197824 }, { "epoch": 0.3904314793141888, "grad_norm": 0.11634511500597, "learning_rate": 6.12041671124637e-05, "loss": 1.5706, "step": 197856 }, { "epoch": 0.39049462527558526, "grad_norm": 0.11486385017633438, "learning_rate": 6.119782688809901e-05, "loss": 1.5729, "step": 197888 }, { "epoch": 0.3905577712369817, "grad_norm": 0.11045890301465988, "learning_rate": 6.119148666373433e-05, "loss": 1.5867, "step": 197920 }, { "epoch": 0.39062091719837816, "grad_norm": 0.10614673793315887, "learning_rate": 6.118514643936963e-05, "loss": 1.573, "step": 197952 }, { "epoch": 0.39068406315977455, "grad_norm": 0.12246210873126984, "learning_rate": 6.117880621500494e-05, "loss": 1.5937, "step": 197984 }, { "epoch": 0.390747209121171, "grad_norm": 0.11638857424259186, "learning_rate": 6.117246599064024e-05, "loss": 1.5845, "step": 198016 }, { "epoch": 0.39081035508256745, "grad_norm": 0.1108914241194725, "learning_rate": 6.116612576627556e-05, "loss": 1.5858, "step": 198048 }, { "epoch": 0.3908735010439639, "grad_norm": 0.11220531910657883, "learning_rate": 6.115978554191086e-05, "loss": 1.5848, "step": 198080 }, { "epoch": 0.39093664700536035, "grad_norm": 0.1126028373837471, "learning_rate": 6.115344531754617e-05, "loss": 1.5653, "step": 198112 }, { "epoch": 0.3909997929667568, "grad_norm": 0.10485746711492538, "learning_rate": 6.114710509318149e-05, "loss": 1.5839, "step": 198144 }, { "epoch": 0.3910629389281532, "grad_norm": 0.11530809104442596, "learning_rate": 6.11407648688168e-05, "loss": 1.5815, "step": 198176 }, { "epoch": 0.39112608488954964, "grad_norm": 0.11372833698987961, "learning_rate": 6.11344246444521e-05, "loss": 1.5826, "step": 198208 }, { "epoch": 0.3911892308509461, "grad_norm": 0.11040119081735611, "learning_rate": 6.112808442008742e-05, "loss": 1.5814, "step": 198240 }, { "epoch": 0.39125237681234254, "grad_norm": 0.11814737319946289, "learning_rate": 6.112174419572273e-05, "loss": 1.5835, "step": 198272 }, { "epoch": 0.391315522773739, "grad_norm": 0.11235001683235168, "learning_rate": 6.111540397135805e-05, "loss": 1.5842, "step": 198304 }, { "epoch": 0.39137866873513544, "grad_norm": 0.11039021611213684, "learning_rate": 6.110906374699335e-05, "loss": 1.5902, "step": 198336 }, { "epoch": 0.39144181469653183, "grad_norm": 0.12517079710960388, "learning_rate": 6.110272352262866e-05, "loss": 1.5619, "step": 198368 }, { "epoch": 0.3915049606579283, "grad_norm": 0.1051490530371666, "learning_rate": 6.109638329826398e-05, "loss": 1.5728, "step": 198400 }, { "epoch": 0.39156810661932473, "grad_norm": 0.11648889631032944, "learning_rate": 6.109004307389928e-05, "loss": 1.5799, "step": 198432 }, { "epoch": 0.3916312525807212, "grad_norm": 0.11079149693250656, "learning_rate": 6.108370284953458e-05, "loss": 1.5703, "step": 198464 }, { "epoch": 0.3916943985421176, "grad_norm": 0.11616569012403488, "learning_rate": 6.10773626251699e-05, "loss": 1.5915, "step": 198496 }, { "epoch": 0.3917575445035141, "grad_norm": 0.11501801013946533, "learning_rate": 6.107102240080521e-05, "loss": 1.5724, "step": 198528 }, { "epoch": 0.39182069046491047, "grad_norm": 0.11039219051599503, "learning_rate": 6.106468217644052e-05, "loss": 1.5906, "step": 198560 }, { "epoch": 0.3918838364263069, "grad_norm": 0.10678219795227051, "learning_rate": 6.105834195207584e-05, "loss": 1.5669, "step": 198592 }, { "epoch": 0.39194698238770337, "grad_norm": 0.1385871320962906, "learning_rate": 6.105200172771114e-05, "loss": 1.5865, "step": 198624 }, { "epoch": 0.3920101283490998, "grad_norm": 0.11026018857955933, "learning_rate": 6.104566150334645e-05, "loss": 1.5876, "step": 198656 }, { "epoch": 0.39207327431049627, "grad_norm": 0.10629809647798538, "learning_rate": 6.103932127898177e-05, "loss": 1.5842, "step": 198688 }, { "epoch": 0.3921364202718927, "grad_norm": 0.12065635621547699, "learning_rate": 6.1032981054617075e-05, "loss": 1.5825, "step": 198720 }, { "epoch": 0.3921995662332891, "grad_norm": 0.11541282385587692, "learning_rate": 6.1026640830252376e-05, "loss": 1.57, "step": 198752 }, { "epoch": 0.39226271219468556, "grad_norm": 0.11166813224554062, "learning_rate": 6.102030060588769e-05, "loss": 1.5764, "step": 198784 }, { "epoch": 0.392325858156082, "grad_norm": 0.11553598195314407, "learning_rate": 6.1013960381523006e-05, "loss": 1.5756, "step": 198816 }, { "epoch": 0.39238900411747846, "grad_norm": 0.11878856271505356, "learning_rate": 6.100762015715832e-05, "loss": 1.5705, "step": 198848 }, { "epoch": 0.3924521500788749, "grad_norm": 0.11386990547180176, "learning_rate": 6.100127993279362e-05, "loss": 1.5845, "step": 198880 }, { "epoch": 0.39251529604027136, "grad_norm": 0.1106158047914505, "learning_rate": 6.0994939708428936e-05, "loss": 1.5732, "step": 198912 }, { "epoch": 0.3925784420016678, "grad_norm": 0.11698216944932938, "learning_rate": 6.0988599484064243e-05, "loss": 1.57, "step": 198944 }, { "epoch": 0.3926415879630642, "grad_norm": 0.11797782778739929, "learning_rate": 6.098225925969956e-05, "loss": 1.5769, "step": 198976 }, { "epoch": 0.39270473392446065, "grad_norm": 0.10832861810922623, "learning_rate": 6.097591903533486e-05, "loss": 1.5623, "step": 199008 }, { "epoch": 0.3927678798858571, "grad_norm": 0.11208319664001465, "learning_rate": 6.0969578810970174e-05, "loss": 1.5737, "step": 199040 }, { "epoch": 0.39283102584725355, "grad_norm": 0.11608047038316727, "learning_rate": 6.096323858660549e-05, "loss": 1.5689, "step": 199072 }, { "epoch": 0.39289417180865, "grad_norm": 0.12055353820323944, "learning_rate": 6.09568983622408e-05, "loss": 1.5836, "step": 199104 }, { "epoch": 0.39295731777004644, "grad_norm": 0.11328496038913727, "learning_rate": 6.0950558137876104e-05, "loss": 1.5757, "step": 199136 }, { "epoch": 0.39302046373144284, "grad_norm": 0.10745330899953842, "learning_rate": 6.094421791351141e-05, "loss": 1.5712, "step": 199168 }, { "epoch": 0.3930836096928393, "grad_norm": 0.10427608340978622, "learning_rate": 6.0937877689146726e-05, "loss": 1.5799, "step": 199200 }, { "epoch": 0.39314675565423574, "grad_norm": 0.12024036794900894, "learning_rate": 6.093153746478204e-05, "loss": 1.5823, "step": 199232 }, { "epoch": 0.3932099016156322, "grad_norm": 0.11198412626981735, "learning_rate": 6.0925197240417355e-05, "loss": 1.5721, "step": 199264 }, { "epoch": 0.39327304757702863, "grad_norm": 0.1111232116818428, "learning_rate": 6.0918857016052656e-05, "loss": 1.5724, "step": 199296 }, { "epoch": 0.3933361935384251, "grad_norm": 0.11201640963554382, "learning_rate": 6.091251679168797e-05, "loss": 1.572, "step": 199328 }, { "epoch": 0.3933993394998215, "grad_norm": 0.10741990804672241, "learning_rate": 6.090617656732328e-05, "loss": 1.5742, "step": 199360 }, { "epoch": 0.3934624854612179, "grad_norm": 0.108777716755867, "learning_rate": 6.089983634295859e-05, "loss": 1.5682, "step": 199392 }, { "epoch": 0.3935256314226144, "grad_norm": 0.11616198718547821, "learning_rate": 6.0893496118593894e-05, "loss": 1.566, "step": 199424 }, { "epoch": 0.3935887773840108, "grad_norm": 0.11209969967603683, "learning_rate": 6.088715589422921e-05, "loss": 1.5812, "step": 199456 }, { "epoch": 0.3936519233454073, "grad_norm": 0.11173689365386963, "learning_rate": 6.088081566986452e-05, "loss": 1.5821, "step": 199488 }, { "epoch": 0.3937150693068037, "grad_norm": 0.10855965316295624, "learning_rate": 6.087447544549984e-05, "loss": 1.5813, "step": 199520 }, { "epoch": 0.3937782152682001, "grad_norm": 0.11043226718902588, "learning_rate": 6.086813522113514e-05, "loss": 1.5777, "step": 199552 }, { "epoch": 0.39384136122959656, "grad_norm": 0.11845913529396057, "learning_rate": 6.0861794996770447e-05, "loss": 1.5789, "step": 199584 }, { "epoch": 0.393904507190993, "grad_norm": 0.11043913662433624, "learning_rate": 6.085545477240576e-05, "loss": 1.591, "step": 199616 }, { "epoch": 0.39396765315238946, "grad_norm": 0.10733698308467865, "learning_rate": 6.0849114548041076e-05, "loss": 1.577, "step": 199648 }, { "epoch": 0.3940307991137859, "grad_norm": 0.11167687922716141, "learning_rate": 6.084277432367638e-05, "loss": 1.5764, "step": 199680 }, { "epoch": 0.39409394507518236, "grad_norm": 0.10581743717193604, "learning_rate": 6.083643409931169e-05, "loss": 1.5779, "step": 199712 }, { "epoch": 0.39415709103657876, "grad_norm": 0.1120852380990982, "learning_rate": 6.0830093874947006e-05, "loss": 1.5852, "step": 199744 }, { "epoch": 0.3942202369979752, "grad_norm": 0.11101929843425751, "learning_rate": 6.0823753650582314e-05, "loss": 1.5714, "step": 199776 }, { "epoch": 0.39428338295937165, "grad_norm": 0.10848644375801086, "learning_rate": 6.081741342621763e-05, "loss": 1.5767, "step": 199808 }, { "epoch": 0.3943465289207681, "grad_norm": 0.1133238673210144, "learning_rate": 6.081107320185293e-05, "loss": 1.5696, "step": 199840 }, { "epoch": 0.39440967488216455, "grad_norm": 0.11962737143039703, "learning_rate": 6.0804732977488244e-05, "loss": 1.5851, "step": 199872 }, { "epoch": 0.394472820843561, "grad_norm": 0.10968546569347382, "learning_rate": 6.079839275312356e-05, "loss": 1.5791, "step": 199904 }, { "epoch": 0.3945359668049574, "grad_norm": 0.10896339267492294, "learning_rate": 6.0792052528758866e-05, "loss": 1.5708, "step": 199936 }, { "epoch": 0.39459911276635384, "grad_norm": 0.10600137710571289, "learning_rate": 6.0785712304394174e-05, "loss": 1.5842, "step": 199968 }, { "epoch": 0.3946622587277503, "grad_norm": 0.10372083634138107, "learning_rate": 6.077937208002948e-05, "loss": 1.5675, "step": 200000 }, { "epoch": 0.39472540468914674, "grad_norm": 0.10900106281042099, "learning_rate": 6.0773031855664796e-05, "loss": 1.5709, "step": 200032 }, { "epoch": 0.3947885506505432, "grad_norm": 0.11632402241230011, "learning_rate": 6.076669163130011e-05, "loss": 1.5768, "step": 200064 }, { "epoch": 0.39485169661193964, "grad_norm": 0.10969655215740204, "learning_rate": 6.076035140693541e-05, "loss": 1.579, "step": 200096 }, { "epoch": 0.39491484257333603, "grad_norm": 0.10480445623397827, "learning_rate": 6.0754011182570726e-05, "loss": 1.5708, "step": 200128 }, { "epoch": 0.3949779885347325, "grad_norm": 0.1100243628025055, "learning_rate": 6.074767095820604e-05, "loss": 1.5842, "step": 200160 }, { "epoch": 0.39504113449612893, "grad_norm": 0.11034812033176422, "learning_rate": 6.074133073384135e-05, "loss": 1.5744, "step": 200192 }, { "epoch": 0.3951042804575254, "grad_norm": 0.1140894666314125, "learning_rate": 6.073499050947665e-05, "loss": 1.5799, "step": 200224 }, { "epoch": 0.39516742641892183, "grad_norm": 0.10401913523674011, "learning_rate": 6.0728650285111964e-05, "loss": 1.576, "step": 200256 }, { "epoch": 0.3952305723803183, "grad_norm": 0.11680079251527786, "learning_rate": 6.072231006074728e-05, "loss": 1.5888, "step": 200288 }, { "epoch": 0.3952937183417147, "grad_norm": 0.10789495706558228, "learning_rate": 6.0715969836382593e-05, "loss": 1.5715, "step": 200320 }, { "epoch": 0.3953568643031111, "grad_norm": 0.11175715923309326, "learning_rate": 6.0709629612017894e-05, "loss": 1.5688, "step": 200352 }, { "epoch": 0.39542001026450757, "grad_norm": 0.10848549008369446, "learning_rate": 6.070328938765321e-05, "loss": 1.5862, "step": 200384 }, { "epoch": 0.395483156225904, "grad_norm": 0.11460563540458679, "learning_rate": 6.069694916328852e-05, "loss": 1.5731, "step": 200416 }, { "epoch": 0.39554630218730047, "grad_norm": 0.11236392706632614, "learning_rate": 6.069060893892383e-05, "loss": 1.575, "step": 200448 }, { "epoch": 0.3956094481486969, "grad_norm": 0.11141322553157806, "learning_rate": 6.0684268714559146e-05, "loss": 1.5734, "step": 200480 }, { "epoch": 0.3956725941100933, "grad_norm": 0.12026748806238174, "learning_rate": 6.067792849019445e-05, "loss": 1.5738, "step": 200512 }, { "epoch": 0.39573574007148976, "grad_norm": 0.11951130628585815, "learning_rate": 6.067158826582976e-05, "loss": 1.5958, "step": 200544 }, { "epoch": 0.3957988860328862, "grad_norm": 0.1094803735613823, "learning_rate": 6.066524804146507e-05, "loss": 1.5785, "step": 200576 }, { "epoch": 0.39586203199428266, "grad_norm": 0.1262432336807251, "learning_rate": 6.0658907817100384e-05, "loss": 1.5771, "step": 200608 }, { "epoch": 0.3959251779556791, "grad_norm": 0.10395687818527222, "learning_rate": 6.0652567592735685e-05, "loss": 1.5709, "step": 200640 }, { "epoch": 0.39598832391707556, "grad_norm": 0.10931379348039627, "learning_rate": 6.0646227368371e-05, "loss": 1.5707, "step": 200672 }, { "epoch": 0.396051469878472, "grad_norm": 0.12008059769868851, "learning_rate": 6.0639887144006314e-05, "loss": 1.5895, "step": 200704 }, { "epoch": 0.3961146158398684, "grad_norm": 0.12564410269260406, "learning_rate": 6.063354691964163e-05, "loss": 1.5916, "step": 200736 }, { "epoch": 0.39617776180126485, "grad_norm": 0.11612212657928467, "learning_rate": 6.062720669527693e-05, "loss": 1.5876, "step": 200768 }, { "epoch": 0.3962409077626613, "grad_norm": 0.11274783313274384, "learning_rate": 6.0620866470912244e-05, "loss": 1.5669, "step": 200800 }, { "epoch": 0.39630405372405775, "grad_norm": 0.11187774688005447, "learning_rate": 6.061452624654755e-05, "loss": 1.5878, "step": 200832 }, { "epoch": 0.3963671996854542, "grad_norm": 0.10816112160682678, "learning_rate": 6.0608186022182866e-05, "loss": 1.5697, "step": 200864 }, { "epoch": 0.39643034564685065, "grad_norm": 0.10745732486248016, "learning_rate": 6.060184579781817e-05, "loss": 1.578, "step": 200896 }, { "epoch": 0.39649349160824704, "grad_norm": 0.10564573854207993, "learning_rate": 6.059550557345348e-05, "loss": 1.5656, "step": 200928 }, { "epoch": 0.3965566375696435, "grad_norm": 0.11073864251375198, "learning_rate": 6.0589165349088797e-05, "loss": 1.5802, "step": 200960 }, { "epoch": 0.39661978353103994, "grad_norm": 0.11043330281972885, "learning_rate": 6.0582825124724104e-05, "loss": 1.5776, "step": 200992 }, { "epoch": 0.3966829294924364, "grad_norm": 0.11076078563928604, "learning_rate": 6.057648490035941e-05, "loss": 1.5665, "step": 201024 }, { "epoch": 0.39674607545383284, "grad_norm": 0.11658389121294022, "learning_rate": 6.057014467599472e-05, "loss": 1.5756, "step": 201056 }, { "epoch": 0.3968092214152293, "grad_norm": 0.10756539553403854, "learning_rate": 6.0563804451630035e-05, "loss": 1.5681, "step": 201088 }, { "epoch": 0.3968723673766257, "grad_norm": 0.10969916731119156, "learning_rate": 6.055746422726535e-05, "loss": 1.5819, "step": 201120 }, { "epoch": 0.39693551333802213, "grad_norm": 0.12701939046382904, "learning_rate": 6.0551124002900664e-05, "loss": 1.5707, "step": 201152 }, { "epoch": 0.3969986592994186, "grad_norm": 0.11056146025657654, "learning_rate": 6.0544783778535965e-05, "loss": 1.5809, "step": 201184 }, { "epoch": 0.397061805260815, "grad_norm": 0.10754179954528809, "learning_rate": 6.053844355417127e-05, "loss": 1.5782, "step": 201216 }, { "epoch": 0.3971249512222115, "grad_norm": 0.1104857474565506, "learning_rate": 6.053210332980659e-05, "loss": 1.5679, "step": 201248 }, { "epoch": 0.3971880971836079, "grad_norm": 0.11548054218292236, "learning_rate": 6.05257631054419e-05, "loss": 1.5762, "step": 201280 }, { "epoch": 0.3972512431450043, "grad_norm": 0.11069951951503754, "learning_rate": 6.05194228810772e-05, "loss": 1.5914, "step": 201312 }, { "epoch": 0.39731438910640077, "grad_norm": 0.1240776926279068, "learning_rate": 6.051308265671252e-05, "loss": 1.5787, "step": 201344 }, { "epoch": 0.3973775350677972, "grad_norm": 0.11726592481136322, "learning_rate": 6.050674243234783e-05, "loss": 1.5733, "step": 201376 }, { "epoch": 0.39744068102919367, "grad_norm": 0.11535972356796265, "learning_rate": 6.050040220798314e-05, "loss": 1.5776, "step": 201408 }, { "epoch": 0.3975038269905901, "grad_norm": 0.11283685266971588, "learning_rate": 6.049406198361845e-05, "loss": 1.576, "step": 201440 }, { "epoch": 0.39756697295198656, "grad_norm": 0.11160247772932053, "learning_rate": 6.0487721759253755e-05, "loss": 1.5798, "step": 201472 }, { "epoch": 0.39763011891338296, "grad_norm": 0.11608602851629257, "learning_rate": 6.048138153488907e-05, "loss": 1.5868, "step": 201504 }, { "epoch": 0.3976932648747794, "grad_norm": 0.11454786360263824, "learning_rate": 6.0475041310524384e-05, "loss": 1.57, "step": 201536 }, { "epoch": 0.39775641083617586, "grad_norm": 0.12565377354621887, "learning_rate": 6.0468701086159685e-05, "loss": 1.5768, "step": 201568 }, { "epoch": 0.3978195567975723, "grad_norm": 0.10945910215377808, "learning_rate": 6.0462360861795e-05, "loss": 1.5659, "step": 201600 }, { "epoch": 0.39788270275896875, "grad_norm": 0.129023939371109, "learning_rate": 6.045602063743031e-05, "loss": 1.5792, "step": 201632 }, { "epoch": 0.3979458487203652, "grad_norm": 0.1132936179637909, "learning_rate": 6.044968041306562e-05, "loss": 1.5647, "step": 201664 }, { "epoch": 0.3980089946817616, "grad_norm": 0.11813409626483917, "learning_rate": 6.044334018870092e-05, "loss": 1.5677, "step": 201696 }, { "epoch": 0.39807214064315805, "grad_norm": 0.12388277053833008, "learning_rate": 6.043699996433624e-05, "loss": 1.5806, "step": 201728 }, { "epoch": 0.3981352866045545, "grad_norm": 0.10976135730743408, "learning_rate": 6.043065973997155e-05, "loss": 1.5794, "step": 201760 }, { "epoch": 0.39819843256595094, "grad_norm": 0.10981694608926773, "learning_rate": 6.042431951560687e-05, "loss": 1.5687, "step": 201792 }, { "epoch": 0.3982615785273474, "grad_norm": 0.10986041277647018, "learning_rate": 6.0417979291242175e-05, "loss": 1.5913, "step": 201824 }, { "epoch": 0.39832472448874384, "grad_norm": 0.12826699018478394, "learning_rate": 6.0411639066877476e-05, "loss": 1.578, "step": 201856 }, { "epoch": 0.39838787045014024, "grad_norm": 0.10725842416286469, "learning_rate": 6.040529884251279e-05, "loss": 1.5701, "step": 201888 }, { "epoch": 0.3984510164115367, "grad_norm": 0.1162799596786499, "learning_rate": 6.0398958618148105e-05, "loss": 1.5872, "step": 201920 }, { "epoch": 0.39851416237293313, "grad_norm": 0.11675944924354553, "learning_rate": 6.039261839378342e-05, "loss": 1.5743, "step": 201952 }, { "epoch": 0.3985773083343296, "grad_norm": 0.10371293872594833, "learning_rate": 6.038627816941872e-05, "loss": 1.5736, "step": 201984 }, { "epoch": 0.39864045429572603, "grad_norm": 0.1117289662361145, "learning_rate": 6.0379937945054035e-05, "loss": 1.5746, "step": 202016 }, { "epoch": 0.3987036002571225, "grad_norm": 0.10964484512805939, "learning_rate": 6.037359772068934e-05, "loss": 1.5646, "step": 202048 }, { "epoch": 0.3987667462185189, "grad_norm": 0.10782244801521301, "learning_rate": 6.036725749632466e-05, "loss": 1.5647, "step": 202080 }, { "epoch": 0.3988298921799153, "grad_norm": 0.1156175285577774, "learning_rate": 6.036091727195996e-05, "loss": 1.5759, "step": 202112 }, { "epoch": 0.3988930381413118, "grad_norm": 0.10407830774784088, "learning_rate": 6.035457704759527e-05, "loss": 1.5801, "step": 202144 }, { "epoch": 0.3989561841027082, "grad_norm": 0.10547314584255219, "learning_rate": 6.034823682323059e-05, "loss": 1.577, "step": 202176 }, { "epoch": 0.39901933006410467, "grad_norm": 0.1167195662856102, "learning_rate": 6.03418965988659e-05, "loss": 1.5745, "step": 202208 }, { "epoch": 0.3990824760255011, "grad_norm": 0.11346475780010223, "learning_rate": 6.03355563745012e-05, "loss": 1.5798, "step": 202240 }, { "epoch": 0.3991456219868975, "grad_norm": 0.11195581406354904, "learning_rate": 6.032921615013651e-05, "loss": 1.5648, "step": 202272 }, { "epoch": 0.39920876794829396, "grad_norm": 0.11395737528800964, "learning_rate": 6.0322875925771825e-05, "loss": 1.5743, "step": 202304 }, { "epoch": 0.3992719139096904, "grad_norm": 0.11736161261796951, "learning_rate": 6.031653570140714e-05, "loss": 1.5795, "step": 202336 }, { "epoch": 0.39933505987108686, "grad_norm": 0.13533258438110352, "learning_rate": 6.031019547704244e-05, "loss": 1.5762, "step": 202368 }, { "epoch": 0.3993982058324833, "grad_norm": 0.10975204408168793, "learning_rate": 6.0303855252677755e-05, "loss": 1.5763, "step": 202400 }, { "epoch": 0.39946135179387976, "grad_norm": 0.11502835154533386, "learning_rate": 6.029751502831307e-05, "loss": 1.5715, "step": 202432 }, { "epoch": 0.3995244977552762, "grad_norm": 0.11812707036733627, "learning_rate": 6.029117480394838e-05, "loss": 1.5798, "step": 202464 }, { "epoch": 0.3995876437166726, "grad_norm": 0.12040771543979645, "learning_rate": 6.028483457958369e-05, "loss": 1.5762, "step": 202496 }, { "epoch": 0.39965078967806905, "grad_norm": 0.11257939785718918, "learning_rate": 6.027849435521899e-05, "loss": 1.5734, "step": 202528 }, { "epoch": 0.3997139356394655, "grad_norm": 0.10618849843740463, "learning_rate": 6.027215413085431e-05, "loss": 1.579, "step": 202560 }, { "epoch": 0.39977708160086195, "grad_norm": 0.11009246110916138, "learning_rate": 6.026581390648962e-05, "loss": 1.5727, "step": 202592 }, { "epoch": 0.3998402275622584, "grad_norm": 0.10986966639757156, "learning_rate": 6.025947368212494e-05, "loss": 1.5919, "step": 202624 }, { "epoch": 0.39990337352365485, "grad_norm": 0.10643728077411652, "learning_rate": 6.025313345776024e-05, "loss": 1.585, "step": 202656 }, { "epoch": 0.39996651948505124, "grad_norm": 0.11467166244983673, "learning_rate": 6.0246793233395546e-05, "loss": 1.5767, "step": 202688 }, { "epoch": 0.4000296654464477, "grad_norm": 0.10692814737558365, "learning_rate": 6.024045300903086e-05, "loss": 1.5665, "step": 202720 }, { "epoch": 0.40009281140784414, "grad_norm": 0.11108317971229553, "learning_rate": 6.0234112784666175e-05, "loss": 1.5812, "step": 202752 }, { "epoch": 0.4001559573692406, "grad_norm": 0.11161785572767258, "learning_rate": 6.0227772560301476e-05, "loss": 1.5857, "step": 202784 }, { "epoch": 0.40021910333063704, "grad_norm": 0.11147263646125793, "learning_rate": 6.022143233593679e-05, "loss": 1.5731, "step": 202816 }, { "epoch": 0.4002822492920335, "grad_norm": 0.10793454945087433, "learning_rate": 6.0215092111572105e-05, "loss": 1.5718, "step": 202848 }, { "epoch": 0.4003453952534299, "grad_norm": 0.12187591195106506, "learning_rate": 6.020875188720741e-05, "loss": 1.566, "step": 202880 }, { "epoch": 0.40040854121482633, "grad_norm": 0.11108333617448807, "learning_rate": 6.0202411662842714e-05, "loss": 1.5865, "step": 202912 }, { "epoch": 0.4004716871762228, "grad_norm": 0.1154702678322792, "learning_rate": 6.019607143847803e-05, "loss": 1.5675, "step": 202944 }, { "epoch": 0.40053483313761923, "grad_norm": 0.10599309951066971, "learning_rate": 6.018973121411334e-05, "loss": 1.5795, "step": 202976 }, { "epoch": 0.4005979790990157, "grad_norm": 0.11422599107027054, "learning_rate": 6.018339098974866e-05, "loss": 1.5697, "step": 203008 }, { "epoch": 0.4006611250604121, "grad_norm": 0.12432745844125748, "learning_rate": 6.017705076538396e-05, "loss": 1.5739, "step": 203040 }, { "epoch": 0.4007242710218085, "grad_norm": 0.11741861701011658, "learning_rate": 6.017071054101927e-05, "loss": 1.5793, "step": 203072 }, { "epoch": 0.40078741698320497, "grad_norm": 0.11207618564367294, "learning_rate": 6.016437031665458e-05, "loss": 1.5718, "step": 203104 }, { "epoch": 0.4008505629446014, "grad_norm": 0.10925807803869247, "learning_rate": 6.0158030092289895e-05, "loss": 1.5888, "step": 203136 }, { "epoch": 0.40091370890599787, "grad_norm": 0.11147225648164749, "learning_rate": 6.015168986792521e-05, "loss": 1.5831, "step": 203168 }, { "epoch": 0.4009768548673943, "grad_norm": 0.11625019460916519, "learning_rate": 6.014534964356051e-05, "loss": 1.5781, "step": 203200 }, { "epoch": 0.40104000082879077, "grad_norm": 0.11803005635738373, "learning_rate": 6.0139009419195826e-05, "loss": 1.5738, "step": 203232 }, { "epoch": 0.40110314679018716, "grad_norm": 0.11831724643707275, "learning_rate": 6.013266919483114e-05, "loss": 1.5708, "step": 203264 }, { "epoch": 0.4011662927515836, "grad_norm": 0.11004021763801575, "learning_rate": 6.012632897046645e-05, "loss": 1.5755, "step": 203296 }, { "epoch": 0.40122943871298006, "grad_norm": 0.1137085109949112, "learning_rate": 6.011998874610175e-05, "loss": 1.5873, "step": 203328 }, { "epoch": 0.4012925846743765, "grad_norm": 0.11433461308479309, "learning_rate": 6.0113648521737064e-05, "loss": 1.5818, "step": 203360 }, { "epoch": 0.40135573063577296, "grad_norm": 0.1072268933057785, "learning_rate": 6.010730829737238e-05, "loss": 1.5763, "step": 203392 }, { "epoch": 0.4014188765971694, "grad_norm": 0.10801132768392563, "learning_rate": 6.010096807300769e-05, "loss": 1.5786, "step": 203424 }, { "epoch": 0.4014820225585658, "grad_norm": 0.11331193894147873, "learning_rate": 6.0094627848642994e-05, "loss": 1.5681, "step": 203456 }, { "epoch": 0.40154516851996225, "grad_norm": 0.11307258158922195, "learning_rate": 6.008828762427831e-05, "loss": 1.5718, "step": 203488 }, { "epoch": 0.4016083144813587, "grad_norm": 0.10479161143302917, "learning_rate": 6.0081947399913616e-05, "loss": 1.5695, "step": 203520 }, { "epoch": 0.40167146044275515, "grad_norm": 0.11359573900699615, "learning_rate": 6.007560717554893e-05, "loss": 1.5744, "step": 203552 }, { "epoch": 0.4017346064041516, "grad_norm": 0.10413649678230286, "learning_rate": 6.006926695118423e-05, "loss": 1.5608, "step": 203584 }, { "epoch": 0.40179775236554804, "grad_norm": 0.10917824506759644, "learning_rate": 6.0062926726819546e-05, "loss": 1.5695, "step": 203616 }, { "epoch": 0.40186089832694444, "grad_norm": 0.11242704838514328, "learning_rate": 6.005658650245486e-05, "loss": 1.5775, "step": 203648 }, { "epoch": 0.4019240442883409, "grad_norm": 0.129756361246109, "learning_rate": 6.0050246278090175e-05, "loss": 1.5791, "step": 203680 }, { "epoch": 0.40198719024973734, "grad_norm": 0.10912884771823883, "learning_rate": 6.0043906053725476e-05, "loss": 1.5649, "step": 203712 }, { "epoch": 0.4020503362111338, "grad_norm": 0.11269836872816086, "learning_rate": 6.0037565829360784e-05, "loss": 1.5732, "step": 203744 }, { "epoch": 0.40211348217253023, "grad_norm": 0.1197069063782692, "learning_rate": 6.00312256049961e-05, "loss": 1.5762, "step": 203776 }, { "epoch": 0.4021766281339267, "grad_norm": 0.11089413613080978, "learning_rate": 6.002488538063141e-05, "loss": 1.5678, "step": 203808 }, { "epoch": 0.4022397740953231, "grad_norm": 0.11494425684213638, "learning_rate": 6.001854515626673e-05, "loss": 1.5886, "step": 203840 }, { "epoch": 0.4023029200567195, "grad_norm": 0.11617664992809296, "learning_rate": 6.001220493190203e-05, "loss": 1.5773, "step": 203872 }, { "epoch": 0.402366066018116, "grad_norm": 0.1175367683172226, "learning_rate": 6.000586470753734e-05, "loss": 1.5683, "step": 203904 }, { "epoch": 0.4024292119795124, "grad_norm": 0.11487821489572525, "learning_rate": 5.999952448317265e-05, "loss": 1.572, "step": 203936 }, { "epoch": 0.4024923579409089, "grad_norm": 0.11277016252279282, "learning_rate": 5.9993184258807966e-05, "loss": 1.5746, "step": 203968 }, { "epoch": 0.4025555039023053, "grad_norm": 0.10945415496826172, "learning_rate": 5.998684403444327e-05, "loss": 1.5769, "step": 204000 }, { "epoch": 0.40261864986370177, "grad_norm": 0.1095406711101532, "learning_rate": 5.998050381007858e-05, "loss": 1.5735, "step": 204032 }, { "epoch": 0.40268179582509817, "grad_norm": 0.11577536910772324, "learning_rate": 5.9974163585713896e-05, "loss": 1.5988, "step": 204064 }, { "epoch": 0.4027449417864946, "grad_norm": 0.11286237090826035, "learning_rate": 5.9967823361349204e-05, "loss": 1.566, "step": 204096 }, { "epoch": 0.40280808774789106, "grad_norm": 0.1081601157784462, "learning_rate": 5.996148313698451e-05, "loss": 1.5745, "step": 204128 }, { "epoch": 0.4028712337092875, "grad_norm": 0.11012493073940277, "learning_rate": 5.995514291261982e-05, "loss": 1.5732, "step": 204160 }, { "epoch": 0.40293437967068396, "grad_norm": 0.11891575902700424, "learning_rate": 5.9948802688255134e-05, "loss": 1.569, "step": 204192 }, { "epoch": 0.4029975256320804, "grad_norm": 0.11340872943401337, "learning_rate": 5.994246246389045e-05, "loss": 1.572, "step": 204224 }, { "epoch": 0.4030606715934768, "grad_norm": 0.11022622138261795, "learning_rate": 5.993612223952575e-05, "loss": 1.5883, "step": 204256 }, { "epoch": 0.40312381755487325, "grad_norm": 0.1104617491364479, "learning_rate": 5.9929782015161064e-05, "loss": 1.5858, "step": 204288 }, { "epoch": 0.4031869635162697, "grad_norm": 0.11066514253616333, "learning_rate": 5.992344179079638e-05, "loss": 1.5836, "step": 204320 }, { "epoch": 0.40325010947766615, "grad_norm": 0.11127970367670059, "learning_rate": 5.9917101566431686e-05, "loss": 1.5755, "step": 204352 }, { "epoch": 0.4033132554390626, "grad_norm": 0.11764411628246307, "learning_rate": 5.991076134206699e-05, "loss": 1.5856, "step": 204384 }, { "epoch": 0.40337640140045905, "grad_norm": 0.11639747023582458, "learning_rate": 5.99044211177023e-05, "loss": 1.5748, "step": 204416 }, { "epoch": 0.40343954736185544, "grad_norm": 0.10433321446180344, "learning_rate": 5.9898080893337616e-05, "loss": 1.5755, "step": 204448 }, { "epoch": 0.4035026933232519, "grad_norm": 0.11697738617658615, "learning_rate": 5.989174066897293e-05, "loss": 1.5729, "step": 204480 }, { "epoch": 0.40356583928464834, "grad_norm": 0.1136200875043869, "learning_rate": 5.988540044460824e-05, "loss": 1.5765, "step": 204512 }, { "epoch": 0.4036289852460448, "grad_norm": 0.12079986184835434, "learning_rate": 5.9879060220243547e-05, "loss": 1.5886, "step": 204544 }, { "epoch": 0.40369213120744124, "grad_norm": 0.10879378020763397, "learning_rate": 5.9872719995878854e-05, "loss": 1.5786, "step": 204576 }, { "epoch": 0.4037552771688377, "grad_norm": 0.11408454924821854, "learning_rate": 5.986637977151417e-05, "loss": 1.575, "step": 204608 }, { "epoch": 0.4038184231302341, "grad_norm": 0.11341025680303574, "learning_rate": 5.9860039547149483e-05, "loss": 1.5763, "step": 204640 }, { "epoch": 0.40388156909163053, "grad_norm": 0.10993221402168274, "learning_rate": 5.9853699322784784e-05, "loss": 1.5835, "step": 204672 }, { "epoch": 0.403944715053027, "grad_norm": 0.11652026325464249, "learning_rate": 5.98473590984201e-05, "loss": 1.5699, "step": 204704 }, { "epoch": 0.40400786101442343, "grad_norm": 0.10848741233348846, "learning_rate": 5.984101887405541e-05, "loss": 1.5737, "step": 204736 }, { "epoch": 0.4040710069758199, "grad_norm": 0.11425282806158066, "learning_rate": 5.983467864969072e-05, "loss": 1.5699, "step": 204768 }, { "epoch": 0.40413415293721633, "grad_norm": 0.10717558860778809, "learning_rate": 5.982833842532602e-05, "loss": 1.5696, "step": 204800 }, { "epoch": 0.4041972988986127, "grad_norm": 0.12416622787714005, "learning_rate": 5.982199820096134e-05, "loss": 1.571, "step": 204832 }, { "epoch": 0.40426044486000917, "grad_norm": 0.10836885124444962, "learning_rate": 5.981565797659665e-05, "loss": 1.5564, "step": 204864 }, { "epoch": 0.4043235908214056, "grad_norm": 0.11000969260931015, "learning_rate": 5.9809317752231966e-05, "loss": 1.5837, "step": 204896 }, { "epoch": 0.40438673678280207, "grad_norm": 0.11707895249128342, "learning_rate": 5.980297752786727e-05, "loss": 1.5673, "step": 204928 }, { "epoch": 0.4044498827441985, "grad_norm": 0.1101837307214737, "learning_rate": 5.979663730350258e-05, "loss": 1.573, "step": 204960 }, { "epoch": 0.40451302870559497, "grad_norm": 0.11137574911117554, "learning_rate": 5.979029707913789e-05, "loss": 1.5783, "step": 204992 }, { "epoch": 0.40457617466699136, "grad_norm": 0.10906076431274414, "learning_rate": 5.9783956854773204e-05, "loss": 1.5864, "step": 205024 }, { "epoch": 0.4046393206283878, "grad_norm": 0.11517034471035004, "learning_rate": 5.9777616630408505e-05, "loss": 1.581, "step": 205056 }, { "epoch": 0.40470246658978426, "grad_norm": 0.12411621958017349, "learning_rate": 5.977127640604382e-05, "loss": 1.5717, "step": 205088 }, { "epoch": 0.4047656125511807, "grad_norm": 0.11467491835355759, "learning_rate": 5.9764936181679134e-05, "loss": 1.5776, "step": 205120 }, { "epoch": 0.40482875851257716, "grad_norm": 0.11423167586326599, "learning_rate": 5.975859595731444e-05, "loss": 1.5794, "step": 205152 }, { "epoch": 0.4048919044739736, "grad_norm": 0.11539997905492783, "learning_rate": 5.9752255732949756e-05, "loss": 1.5782, "step": 205184 }, { "epoch": 0.40495505043537, "grad_norm": 0.1092941015958786, "learning_rate": 5.974591550858506e-05, "loss": 1.5667, "step": 205216 }, { "epoch": 0.40501819639676645, "grad_norm": 0.11151275783777237, "learning_rate": 5.973957528422037e-05, "loss": 1.5782, "step": 205248 }, { "epoch": 0.4050813423581629, "grad_norm": 0.10875088721513748, "learning_rate": 5.9733235059855687e-05, "loss": 1.5535, "step": 205280 }, { "epoch": 0.40514448831955935, "grad_norm": 0.11437881737947464, "learning_rate": 5.9726894835491e-05, "loss": 1.5796, "step": 205312 }, { "epoch": 0.4052076342809558, "grad_norm": 0.11793049424886703, "learning_rate": 5.97205546111263e-05, "loss": 1.5737, "step": 205344 }, { "epoch": 0.40527078024235225, "grad_norm": 0.10634225606918335, "learning_rate": 5.971421438676161e-05, "loss": 1.5723, "step": 205376 }, { "epoch": 0.40533392620374864, "grad_norm": 0.11165452748537064, "learning_rate": 5.9707874162396924e-05, "loss": 1.5781, "step": 205408 }, { "epoch": 0.4053970721651451, "grad_norm": 0.11905699223279953, "learning_rate": 5.970153393803224e-05, "loss": 1.5718, "step": 205440 }, { "epoch": 0.40546021812654154, "grad_norm": 0.11469445377588272, "learning_rate": 5.969519371366754e-05, "loss": 1.5696, "step": 205472 }, { "epoch": 0.405523364087938, "grad_norm": 0.11540370434522629, "learning_rate": 5.9688853489302855e-05, "loss": 1.5856, "step": 205504 }, { "epoch": 0.40558651004933444, "grad_norm": 0.10299599170684814, "learning_rate": 5.968251326493817e-05, "loss": 1.5727, "step": 205536 }, { "epoch": 0.4056496560107309, "grad_norm": 0.11487219482660294, "learning_rate": 5.967617304057348e-05, "loss": 1.5939, "step": 205568 }, { "epoch": 0.4057128019721273, "grad_norm": 0.11906348168849945, "learning_rate": 5.9669832816208785e-05, "loss": 1.5934, "step": 205600 }, { "epoch": 0.40577594793352373, "grad_norm": 0.10434137284755707, "learning_rate": 5.966349259184409e-05, "loss": 1.5803, "step": 205632 }, { "epoch": 0.4058390938949202, "grad_norm": 0.10675708949565887, "learning_rate": 5.965715236747941e-05, "loss": 1.5797, "step": 205664 }, { "epoch": 0.4059022398563166, "grad_norm": 0.1201130822300911, "learning_rate": 5.965081214311472e-05, "loss": 1.5773, "step": 205696 }, { "epoch": 0.4059653858177131, "grad_norm": 0.11443489789962769, "learning_rate": 5.964447191875002e-05, "loss": 1.5843, "step": 205728 }, { "epoch": 0.4060285317791095, "grad_norm": 0.1270044595003128, "learning_rate": 5.963813169438534e-05, "loss": 1.5832, "step": 205760 }, { "epoch": 0.406091677740506, "grad_norm": 0.11829876154661179, "learning_rate": 5.9631791470020645e-05, "loss": 1.5638, "step": 205792 }, { "epoch": 0.40615482370190237, "grad_norm": 0.1094130277633667, "learning_rate": 5.962545124565596e-05, "loss": 1.5852, "step": 205824 }, { "epoch": 0.4062179696632988, "grad_norm": 0.10748355090618134, "learning_rate": 5.9619111021291274e-05, "loss": 1.5676, "step": 205856 }, { "epoch": 0.40628111562469527, "grad_norm": 0.11317727714776993, "learning_rate": 5.9612770796926575e-05, "loss": 1.5662, "step": 205888 }, { "epoch": 0.4063442615860917, "grad_norm": 0.1065066009759903, "learning_rate": 5.960643057256189e-05, "loss": 1.571, "step": 205920 }, { "epoch": 0.40640740754748816, "grad_norm": 0.10893092304468155, "learning_rate": 5.9600090348197204e-05, "loss": 1.5706, "step": 205952 }, { "epoch": 0.4064705535088846, "grad_norm": 0.11343265324831009, "learning_rate": 5.959375012383251e-05, "loss": 1.5589, "step": 205984 }, { "epoch": 0.406533699470281, "grad_norm": 0.11336424946784973, "learning_rate": 5.958740989946782e-05, "loss": 1.5799, "step": 206016 }, { "epoch": 0.40659684543167746, "grad_norm": 0.10973262786865234, "learning_rate": 5.958106967510313e-05, "loss": 1.5755, "step": 206048 }, { "epoch": 0.4066599913930739, "grad_norm": 0.10886547714471817, "learning_rate": 5.957472945073844e-05, "loss": 1.5775, "step": 206080 }, { "epoch": 0.40672313735447035, "grad_norm": 0.11491454392671585, "learning_rate": 5.956838922637376e-05, "loss": 1.5713, "step": 206112 }, { "epoch": 0.4067862833158668, "grad_norm": 0.1137968972325325, "learning_rate": 5.956204900200906e-05, "loss": 1.5726, "step": 206144 }, { "epoch": 0.40684942927726325, "grad_norm": 0.1123242974281311, "learning_rate": 5.955570877764437e-05, "loss": 1.571, "step": 206176 }, { "epoch": 0.40691257523865965, "grad_norm": 0.11520496755838394, "learning_rate": 5.954936855327968e-05, "loss": 1.5909, "step": 206208 }, { "epoch": 0.4069757212000561, "grad_norm": 0.11327225714921951, "learning_rate": 5.9543028328914995e-05, "loss": 1.5996, "step": 206240 }, { "epoch": 0.40703886716145254, "grad_norm": 0.11600563675165176, "learning_rate": 5.9536688104550296e-05, "loss": 1.5643, "step": 206272 }, { "epoch": 0.407102013122849, "grad_norm": 0.11213836818933487, "learning_rate": 5.953034788018561e-05, "loss": 1.5694, "step": 206304 }, { "epoch": 0.40716515908424544, "grad_norm": 0.12320289760828018, "learning_rate": 5.9524007655820925e-05, "loss": 1.5738, "step": 206336 }, { "epoch": 0.4072283050456419, "grad_norm": 0.11507931351661682, "learning_rate": 5.951766743145624e-05, "loss": 1.5807, "step": 206368 }, { "epoch": 0.4072914510070383, "grad_norm": 0.11074212193489075, "learning_rate": 5.951132720709154e-05, "loss": 1.5629, "step": 206400 }, { "epoch": 0.40735459696843473, "grad_norm": 0.10792914777994156, "learning_rate": 5.950498698272685e-05, "loss": 1.5629, "step": 206432 }, { "epoch": 0.4074177429298312, "grad_norm": 0.1168747991323471, "learning_rate": 5.949864675836216e-05, "loss": 1.5687, "step": 206464 }, { "epoch": 0.40748088889122763, "grad_norm": 0.11281394958496094, "learning_rate": 5.949230653399748e-05, "loss": 1.5668, "step": 206496 }, { "epoch": 0.4075440348526241, "grad_norm": 0.10642575472593307, "learning_rate": 5.948596630963279e-05, "loss": 1.5818, "step": 206528 }, { "epoch": 0.40760718081402053, "grad_norm": 0.11196409910917282, "learning_rate": 5.947962608526809e-05, "loss": 1.5814, "step": 206560 }, { "epoch": 0.4076703267754169, "grad_norm": 0.11565840989351273, "learning_rate": 5.947328586090341e-05, "loss": 1.5831, "step": 206592 }, { "epoch": 0.4077334727368134, "grad_norm": 0.10333291441202164, "learning_rate": 5.9466945636538715e-05, "loss": 1.5775, "step": 206624 }, { "epoch": 0.4077966186982098, "grad_norm": 0.1070685088634491, "learning_rate": 5.946060541217403e-05, "loss": 1.5732, "step": 206656 }, { "epoch": 0.40785976465960627, "grad_norm": 0.11558514833450317, "learning_rate": 5.945426518780933e-05, "loss": 1.5785, "step": 206688 }, { "epoch": 0.4079229106210027, "grad_norm": 0.10835957527160645, "learning_rate": 5.9447924963444645e-05, "loss": 1.5648, "step": 206720 }, { "epoch": 0.40798605658239917, "grad_norm": 0.11364418268203735, "learning_rate": 5.944158473907996e-05, "loss": 1.5757, "step": 206752 }, { "epoch": 0.40804920254379556, "grad_norm": 0.12189406901597977, "learning_rate": 5.9435244514715275e-05, "loss": 1.5745, "step": 206784 }, { "epoch": 0.408112348505192, "grad_norm": 0.1169697642326355, "learning_rate": 5.9428904290350576e-05, "loss": 1.5685, "step": 206816 }, { "epoch": 0.40817549446658846, "grad_norm": 0.1133742704987526, "learning_rate": 5.942256406598588e-05, "loss": 1.5802, "step": 206848 }, { "epoch": 0.4082386404279849, "grad_norm": 0.10903415083885193, "learning_rate": 5.94162238416212e-05, "loss": 1.5596, "step": 206880 }, { "epoch": 0.40830178638938136, "grad_norm": 0.11916998028755188, "learning_rate": 5.940988361725651e-05, "loss": 1.5695, "step": 206912 }, { "epoch": 0.4083649323507778, "grad_norm": 0.11026991903781891, "learning_rate": 5.9403543392891813e-05, "loss": 1.5548, "step": 206944 }, { "epoch": 0.4084280783121742, "grad_norm": 0.10510311275720596, "learning_rate": 5.939720316852713e-05, "loss": 1.573, "step": 206976 }, { "epoch": 0.40849122427357065, "grad_norm": 0.11702536791563034, "learning_rate": 5.939086294416244e-05, "loss": 1.5641, "step": 207008 }, { "epoch": 0.4085543702349671, "grad_norm": 0.1176532730460167, "learning_rate": 5.938452271979775e-05, "loss": 1.5828, "step": 207040 }, { "epoch": 0.40861751619636355, "grad_norm": 0.11026246845722198, "learning_rate": 5.937818249543305e-05, "loss": 1.577, "step": 207072 }, { "epoch": 0.40868066215776, "grad_norm": 0.10813239961862564, "learning_rate": 5.9371842271068366e-05, "loss": 1.5833, "step": 207104 }, { "epoch": 0.40874380811915645, "grad_norm": 0.11306612193584442, "learning_rate": 5.936550204670368e-05, "loss": 1.5761, "step": 207136 }, { "epoch": 0.40880695408055284, "grad_norm": 0.10613807290792465, "learning_rate": 5.9359161822338995e-05, "loss": 1.5735, "step": 207168 }, { "epoch": 0.4088701000419493, "grad_norm": 0.1124691516160965, "learning_rate": 5.935282159797431e-05, "loss": 1.579, "step": 207200 }, { "epoch": 0.40893324600334574, "grad_norm": 0.1268845945596695, "learning_rate": 5.934648137360961e-05, "loss": 1.5866, "step": 207232 }, { "epoch": 0.4089963919647422, "grad_norm": 0.10868234932422638, "learning_rate": 5.934014114924492e-05, "loss": 1.5733, "step": 207264 }, { "epoch": 0.40905953792613864, "grad_norm": 0.10653870552778244, "learning_rate": 5.933380092488023e-05, "loss": 1.5721, "step": 207296 }, { "epoch": 0.4091226838875351, "grad_norm": 0.1217452734708786, "learning_rate": 5.932746070051555e-05, "loss": 1.5902, "step": 207328 }, { "epoch": 0.40918582984893154, "grad_norm": 0.10850151628255844, "learning_rate": 5.932112047615085e-05, "loss": 1.5777, "step": 207360 }, { "epoch": 0.40924897581032793, "grad_norm": 0.11223257333040237, "learning_rate": 5.931478025178616e-05, "loss": 1.5711, "step": 207392 }, { "epoch": 0.4093121217717244, "grad_norm": 0.11708072572946548, "learning_rate": 5.930844002742148e-05, "loss": 1.5755, "step": 207424 }, { "epoch": 0.40937526773312083, "grad_norm": 0.12039277702569962, "learning_rate": 5.9302099803056785e-05, "loss": 1.5776, "step": 207456 }, { "epoch": 0.4094384136945173, "grad_norm": 0.11246675252914429, "learning_rate": 5.9295759578692086e-05, "loss": 1.5727, "step": 207488 }, { "epoch": 0.4095015596559137, "grad_norm": 0.11134649813175201, "learning_rate": 5.92894193543274e-05, "loss": 1.5809, "step": 207520 }, { "epoch": 0.4095647056173102, "grad_norm": 0.11862438917160034, "learning_rate": 5.9283079129962716e-05, "loss": 1.5921, "step": 207552 }, { "epoch": 0.40962785157870657, "grad_norm": 0.11024701595306396, "learning_rate": 5.927673890559803e-05, "loss": 1.5748, "step": 207584 }, { "epoch": 0.409690997540103, "grad_norm": 0.1241542175412178, "learning_rate": 5.927039868123333e-05, "loss": 1.5823, "step": 207616 }, { "epoch": 0.40975414350149947, "grad_norm": 0.11487919837236404, "learning_rate": 5.9264058456868646e-05, "loss": 1.5594, "step": 207648 }, { "epoch": 0.4098172894628959, "grad_norm": 0.11190209537744522, "learning_rate": 5.9257718232503954e-05, "loss": 1.5626, "step": 207680 }, { "epoch": 0.40988043542429237, "grad_norm": 0.1269836276769638, "learning_rate": 5.925137800813927e-05, "loss": 1.5773, "step": 207712 }, { "epoch": 0.4099435813856888, "grad_norm": 0.11494969576597214, "learning_rate": 5.924503778377457e-05, "loss": 1.581, "step": 207744 }, { "epoch": 0.4100067273470852, "grad_norm": 0.10899503529071808, "learning_rate": 5.9238697559409884e-05, "loss": 1.5652, "step": 207776 }, { "epoch": 0.41006987330848166, "grad_norm": 0.1093950867652893, "learning_rate": 5.92323573350452e-05, "loss": 1.5775, "step": 207808 }, { "epoch": 0.4101330192698781, "grad_norm": 0.1146049052476883, "learning_rate": 5.922601711068051e-05, "loss": 1.5834, "step": 207840 }, { "epoch": 0.41019616523127456, "grad_norm": 0.10726942867040634, "learning_rate": 5.921967688631582e-05, "loss": 1.5782, "step": 207872 }, { "epoch": 0.410259311192671, "grad_norm": 0.11150894314050674, "learning_rate": 5.921333666195112e-05, "loss": 1.5709, "step": 207904 }, { "epoch": 0.41032245715406745, "grad_norm": 0.12000482529401779, "learning_rate": 5.9206996437586436e-05, "loss": 1.5766, "step": 207936 }, { "epoch": 0.41038560311546385, "grad_norm": 0.11264950037002563, "learning_rate": 5.920065621322175e-05, "loss": 1.5626, "step": 207968 }, { "epoch": 0.4104487490768603, "grad_norm": 0.1066565290093422, "learning_rate": 5.9194315988857065e-05, "loss": 1.5745, "step": 208000 }, { "epoch": 0.41051189503825675, "grad_norm": 0.10454881191253662, "learning_rate": 5.9187975764492366e-05, "loss": 1.577, "step": 208032 }, { "epoch": 0.4105750409996532, "grad_norm": 0.1147308200597763, "learning_rate": 5.918163554012768e-05, "loss": 1.5574, "step": 208064 }, { "epoch": 0.41063818696104964, "grad_norm": 0.11452020704746246, "learning_rate": 5.917529531576299e-05, "loss": 1.5672, "step": 208096 }, { "epoch": 0.4107013329224461, "grad_norm": 0.12036538124084473, "learning_rate": 5.91689550913983e-05, "loss": 1.5739, "step": 208128 }, { "epoch": 0.4107644788838425, "grad_norm": 0.11893625557422638, "learning_rate": 5.9162614867033604e-05, "loss": 1.5759, "step": 208160 }, { "epoch": 0.41082762484523894, "grad_norm": 0.10815265029668808, "learning_rate": 5.915627464266892e-05, "loss": 1.5693, "step": 208192 }, { "epoch": 0.4108907708066354, "grad_norm": 0.11385013163089752, "learning_rate": 5.914993441830423e-05, "loss": 1.5845, "step": 208224 }, { "epoch": 0.41095391676803184, "grad_norm": 0.11224812269210815, "learning_rate": 5.914359419393954e-05, "loss": 1.5697, "step": 208256 }, { "epoch": 0.4110170627294283, "grad_norm": 0.11066325008869171, "learning_rate": 5.913725396957485e-05, "loss": 1.5756, "step": 208288 }, { "epoch": 0.41108020869082473, "grad_norm": 0.11634143441915512, "learning_rate": 5.913091374521016e-05, "loss": 1.5866, "step": 208320 }, { "epoch": 0.4111433546522211, "grad_norm": 0.10700631141662598, "learning_rate": 5.912457352084547e-05, "loss": 1.5725, "step": 208352 }, { "epoch": 0.4112065006136176, "grad_norm": 0.11063332855701447, "learning_rate": 5.9118233296480786e-05, "loss": 1.582, "step": 208384 }, { "epoch": 0.411269646575014, "grad_norm": 0.11440924555063248, "learning_rate": 5.911189307211609e-05, "loss": 1.5804, "step": 208416 }, { "epoch": 0.4113327925364105, "grad_norm": 0.11152104288339615, "learning_rate": 5.91055528477514e-05, "loss": 1.5719, "step": 208448 }, { "epoch": 0.4113959384978069, "grad_norm": 0.10765744745731354, "learning_rate": 5.9099212623386716e-05, "loss": 1.5747, "step": 208480 }, { "epoch": 0.4114590844592034, "grad_norm": 0.11994259059429169, "learning_rate": 5.9092872399022024e-05, "loss": 1.5839, "step": 208512 }, { "epoch": 0.41152223042059977, "grad_norm": 0.11146427690982819, "learning_rate": 5.908653217465734e-05, "loss": 1.5714, "step": 208544 }, { "epoch": 0.4115853763819962, "grad_norm": 0.11730946600437164, "learning_rate": 5.908019195029264e-05, "loss": 1.5753, "step": 208576 }, { "epoch": 0.41164852234339266, "grad_norm": 0.11481603980064392, "learning_rate": 5.9073851725927954e-05, "loss": 1.5837, "step": 208608 }, { "epoch": 0.4117116683047891, "grad_norm": 0.10346874594688416, "learning_rate": 5.906751150156327e-05, "loss": 1.5672, "step": 208640 }, { "epoch": 0.41177481426618556, "grad_norm": 0.11606725305318832, "learning_rate": 5.9061171277198576e-05, "loss": 1.5687, "step": 208672 }, { "epoch": 0.411837960227582, "grad_norm": 0.11690467596054077, "learning_rate": 5.9054831052833884e-05, "loss": 1.5747, "step": 208704 }, { "epoch": 0.4119011061889784, "grad_norm": 0.11952147632837296, "learning_rate": 5.904849082846919e-05, "loss": 1.559, "step": 208736 }, { "epoch": 0.41196425215037485, "grad_norm": 0.10767176002264023, "learning_rate": 5.9042150604104506e-05, "loss": 1.5723, "step": 208768 }, { "epoch": 0.4120273981117713, "grad_norm": 0.11477220058441162, "learning_rate": 5.903581037973982e-05, "loss": 1.5767, "step": 208800 }, { "epoch": 0.41209054407316775, "grad_norm": 0.1156458705663681, "learning_rate": 5.902947015537512e-05, "loss": 1.5881, "step": 208832 }, { "epoch": 0.4121536900345642, "grad_norm": 0.11916563659906387, "learning_rate": 5.9023129931010436e-05, "loss": 1.5851, "step": 208864 }, { "epoch": 0.41221683599596065, "grad_norm": 0.11724422872066498, "learning_rate": 5.9016789706645744e-05, "loss": 1.5702, "step": 208896 }, { "epoch": 0.41227998195735704, "grad_norm": 0.11063949018716812, "learning_rate": 5.901044948228106e-05, "loss": 1.5661, "step": 208928 }, { "epoch": 0.4123431279187535, "grad_norm": 0.11034804582595825, "learning_rate": 5.900410925791636e-05, "loss": 1.5789, "step": 208960 }, { "epoch": 0.41240627388014994, "grad_norm": 0.11543915420770645, "learning_rate": 5.8997769033551674e-05, "loss": 1.5768, "step": 208992 }, { "epoch": 0.4124694198415464, "grad_norm": 0.11726246029138565, "learning_rate": 5.899142880918699e-05, "loss": 1.5754, "step": 209024 }, { "epoch": 0.41253256580294284, "grad_norm": 0.12239725887775421, "learning_rate": 5.8985088584822304e-05, "loss": 1.5755, "step": 209056 }, { "epoch": 0.4125957117643393, "grad_norm": 0.11276612430810928, "learning_rate": 5.8978748360457605e-05, "loss": 1.5709, "step": 209088 }, { "epoch": 0.41265885772573574, "grad_norm": 0.11039990931749344, "learning_rate": 5.897240813609292e-05, "loss": 1.5782, "step": 209120 }, { "epoch": 0.41272200368713213, "grad_norm": 0.12096954137086868, "learning_rate": 5.896606791172823e-05, "loss": 1.5711, "step": 209152 }, { "epoch": 0.4127851496485286, "grad_norm": 0.10877064615488052, "learning_rate": 5.895972768736354e-05, "loss": 1.5918, "step": 209184 }, { "epoch": 0.41284829560992503, "grad_norm": 0.11142287403345108, "learning_rate": 5.8953387462998856e-05, "loss": 1.5739, "step": 209216 }, { "epoch": 0.4129114415713215, "grad_norm": 0.10661587119102478, "learning_rate": 5.894704723863416e-05, "loss": 1.5662, "step": 209248 }, { "epoch": 0.41297458753271793, "grad_norm": 0.11213047802448273, "learning_rate": 5.894070701426947e-05, "loss": 1.5721, "step": 209280 }, { "epoch": 0.4130377334941144, "grad_norm": 0.1153879463672638, "learning_rate": 5.893436678990478e-05, "loss": 1.5727, "step": 209312 }, { "epoch": 0.41310087945551077, "grad_norm": 0.11843599379062653, "learning_rate": 5.8928026565540094e-05, "loss": 1.5742, "step": 209344 }, { "epoch": 0.4131640254169072, "grad_norm": 0.10982224345207214, "learning_rate": 5.8921686341175395e-05, "loss": 1.5739, "step": 209376 }, { "epoch": 0.41322717137830367, "grad_norm": 0.10911291837692261, "learning_rate": 5.891534611681071e-05, "loss": 1.5661, "step": 209408 }, { "epoch": 0.4132903173397001, "grad_norm": 0.10778553783893585, "learning_rate": 5.8909005892446024e-05, "loss": 1.5761, "step": 209440 }, { "epoch": 0.41335346330109657, "grad_norm": 0.1131877675652504, "learning_rate": 5.890266566808134e-05, "loss": 1.579, "step": 209472 }, { "epoch": 0.413416609262493, "grad_norm": 0.11634394526481628, "learning_rate": 5.889632544371664e-05, "loss": 1.573, "step": 209504 }, { "epoch": 0.4134797552238894, "grad_norm": 0.10641346126794815, "learning_rate": 5.888998521935195e-05, "loss": 1.5679, "step": 209536 }, { "epoch": 0.41354290118528586, "grad_norm": 0.11223439872264862, "learning_rate": 5.888364499498726e-05, "loss": 1.5733, "step": 209568 }, { "epoch": 0.4136060471466823, "grad_norm": 0.1170460656285286, "learning_rate": 5.8877304770622577e-05, "loss": 1.5669, "step": 209600 }, { "epoch": 0.41366919310807876, "grad_norm": 0.10966886579990387, "learning_rate": 5.887096454625788e-05, "loss": 1.5727, "step": 209632 }, { "epoch": 0.4137323390694752, "grad_norm": 0.11373679339885712, "learning_rate": 5.886462432189319e-05, "loss": 1.5856, "step": 209664 }, { "epoch": 0.41379548503087166, "grad_norm": 0.11283385008573532, "learning_rate": 5.885828409752851e-05, "loss": 1.5671, "step": 209696 }, { "epoch": 0.41385863099226805, "grad_norm": 0.1171746626496315, "learning_rate": 5.8851943873163814e-05, "loss": 1.5687, "step": 209728 }, { "epoch": 0.4139217769536645, "grad_norm": 0.10731907933950424, "learning_rate": 5.884560364879913e-05, "loss": 1.5654, "step": 209760 }, { "epoch": 0.41398492291506095, "grad_norm": 0.11476921290159225, "learning_rate": 5.883926342443443e-05, "loss": 1.5884, "step": 209792 }, { "epoch": 0.4140480688764574, "grad_norm": 0.10662003606557846, "learning_rate": 5.8832923200069745e-05, "loss": 1.5797, "step": 209824 }, { "epoch": 0.41411121483785385, "grad_norm": 0.1128525584936142, "learning_rate": 5.882658297570506e-05, "loss": 1.5782, "step": 209856 }, { "epoch": 0.4141743607992503, "grad_norm": 0.11338004469871521, "learning_rate": 5.8820242751340374e-05, "loss": 1.5969, "step": 209888 }, { "epoch": 0.4142375067606467, "grad_norm": 0.11333412677049637, "learning_rate": 5.8813902526975675e-05, "loss": 1.5711, "step": 209920 }, { "epoch": 0.41430065272204314, "grad_norm": 0.11807472258806229, "learning_rate": 5.880756230261098e-05, "loss": 1.5805, "step": 209952 }, { "epoch": 0.4143637986834396, "grad_norm": 0.1206955760717392, "learning_rate": 5.88012220782463e-05, "loss": 1.5732, "step": 209984 }, { "epoch": 0.41442694464483604, "grad_norm": 0.10973561555147171, "learning_rate": 5.879488185388161e-05, "loss": 1.5799, "step": 210016 }, { "epoch": 0.4144900906062325, "grad_norm": 0.11385183781385422, "learning_rate": 5.878854162951691e-05, "loss": 1.5707, "step": 210048 }, { "epoch": 0.41455323656762894, "grad_norm": 0.1270441859960556, "learning_rate": 5.878220140515223e-05, "loss": 1.574, "step": 210080 }, { "epoch": 0.41461638252902533, "grad_norm": 0.11708077043294907, "learning_rate": 5.877586118078754e-05, "loss": 1.576, "step": 210112 }, { "epoch": 0.4146795284904218, "grad_norm": 0.10859125107526779, "learning_rate": 5.876952095642285e-05, "loss": 1.5623, "step": 210144 }, { "epoch": 0.4147426744518182, "grad_norm": 0.11469090729951859, "learning_rate": 5.876318073205816e-05, "loss": 1.5896, "step": 210176 }, { "epoch": 0.4148058204132147, "grad_norm": 0.12022719532251358, "learning_rate": 5.8756840507693465e-05, "loss": 1.5685, "step": 210208 }, { "epoch": 0.4148689663746111, "grad_norm": 0.11533736437559128, "learning_rate": 5.875050028332878e-05, "loss": 1.571, "step": 210240 }, { "epoch": 0.4149321123360076, "grad_norm": 0.11421668529510498, "learning_rate": 5.8744160058964094e-05, "loss": 1.5799, "step": 210272 }, { "epoch": 0.41499525829740397, "grad_norm": 0.10821504890918732, "learning_rate": 5.8737819834599395e-05, "loss": 1.5884, "step": 210304 }, { "epoch": 0.4150584042588004, "grad_norm": 0.11552296578884125, "learning_rate": 5.873147961023471e-05, "loss": 1.5645, "step": 210336 }, { "epoch": 0.41512155022019687, "grad_norm": 0.11359403282403946, "learning_rate": 5.872513938587002e-05, "loss": 1.5773, "step": 210368 }, { "epoch": 0.4151846961815933, "grad_norm": 0.11828986555337906, "learning_rate": 5.871879916150533e-05, "loss": 1.5743, "step": 210400 }, { "epoch": 0.41524784214298976, "grad_norm": 0.11255080252885818, "learning_rate": 5.871245893714065e-05, "loss": 1.582, "step": 210432 }, { "epoch": 0.4153109881043862, "grad_norm": 0.111783467233181, "learning_rate": 5.870611871277595e-05, "loss": 1.5702, "step": 210464 }, { "epoch": 0.4153741340657826, "grad_norm": 0.12021353840827942, "learning_rate": 5.869977848841126e-05, "loss": 1.5721, "step": 210496 }, { "epoch": 0.41543728002717906, "grad_norm": 0.11928527802228928, "learning_rate": 5.869343826404658e-05, "loss": 1.5638, "step": 210528 }, { "epoch": 0.4155004259885755, "grad_norm": 0.11085432767868042, "learning_rate": 5.8687098039681885e-05, "loss": 1.5623, "step": 210560 }, { "epoch": 0.41556357194997195, "grad_norm": 0.10697130113840103, "learning_rate": 5.8680757815317186e-05, "loss": 1.5673, "step": 210592 }, { "epoch": 0.4156267179113684, "grad_norm": 0.10721738636493683, "learning_rate": 5.86744175909525e-05, "loss": 1.5802, "step": 210624 }, { "epoch": 0.41568986387276485, "grad_norm": 0.11581702530384064, "learning_rate": 5.8668077366587815e-05, "loss": 1.5853, "step": 210656 }, { "epoch": 0.41575300983416125, "grad_norm": 0.11685071140527725, "learning_rate": 5.866173714222313e-05, "loss": 1.573, "step": 210688 }, { "epoch": 0.4158161557955577, "grad_norm": 0.12459532171487808, "learning_rate": 5.865539691785843e-05, "loss": 1.5747, "step": 210720 }, { "epoch": 0.41587930175695415, "grad_norm": 0.11517032235860825, "learning_rate": 5.8649056693493745e-05, "loss": 1.5769, "step": 210752 }, { "epoch": 0.4159424477183506, "grad_norm": 0.11076007783412933, "learning_rate": 5.864271646912905e-05, "loss": 1.5768, "step": 210784 }, { "epoch": 0.41600559367974704, "grad_norm": 0.11767956614494324, "learning_rate": 5.863637624476437e-05, "loss": 1.5665, "step": 210816 }, { "epoch": 0.4160687396411435, "grad_norm": 0.11118356883525848, "learning_rate": 5.863003602039967e-05, "loss": 1.5862, "step": 210848 }, { "epoch": 0.41613188560253994, "grad_norm": 0.11311709135770798, "learning_rate": 5.862369579603498e-05, "loss": 1.5821, "step": 210880 }, { "epoch": 0.41619503156393634, "grad_norm": 0.11672328412532806, "learning_rate": 5.86173555716703e-05, "loss": 1.5687, "step": 210912 }, { "epoch": 0.4162581775253328, "grad_norm": 0.11094388365745544, "learning_rate": 5.861101534730561e-05, "loss": 1.5781, "step": 210944 }, { "epoch": 0.41632132348672923, "grad_norm": 0.11598347127437592, "learning_rate": 5.860467512294091e-05, "loss": 1.5817, "step": 210976 }, { "epoch": 0.4163844694481257, "grad_norm": 0.1184142604470253, "learning_rate": 5.859833489857622e-05, "loss": 1.5699, "step": 211008 }, { "epoch": 0.41644761540952213, "grad_norm": 0.11470816284418106, "learning_rate": 5.8591994674211535e-05, "loss": 1.5742, "step": 211040 }, { "epoch": 0.4165107613709186, "grad_norm": 0.11247041076421738, "learning_rate": 5.858565444984685e-05, "loss": 1.5899, "step": 211072 }, { "epoch": 0.416573907332315, "grad_norm": 0.10590722411870956, "learning_rate": 5.8579314225482164e-05, "loss": 1.5901, "step": 211104 }, { "epoch": 0.4166370532937114, "grad_norm": 0.10712088644504547, "learning_rate": 5.8572974001117465e-05, "loss": 1.5613, "step": 211136 }, { "epoch": 0.4167001992551079, "grad_norm": 0.11482211202383041, "learning_rate": 5.856663377675278e-05, "loss": 1.5741, "step": 211168 }, { "epoch": 0.4167633452165043, "grad_norm": 0.11243411898612976, "learning_rate": 5.856029355238809e-05, "loss": 1.5825, "step": 211200 }, { "epoch": 0.41682649117790077, "grad_norm": 0.11756385862827301, "learning_rate": 5.85539533280234e-05, "loss": 1.5844, "step": 211232 }, { "epoch": 0.4168896371392972, "grad_norm": 0.12397975474596024, "learning_rate": 5.8547613103658703e-05, "loss": 1.5693, "step": 211264 }, { "epoch": 0.4169527831006936, "grad_norm": 0.12299605458974838, "learning_rate": 5.854127287929402e-05, "loss": 1.5804, "step": 211296 }, { "epoch": 0.41701592906209006, "grad_norm": 0.11917003244161606, "learning_rate": 5.853493265492933e-05, "loss": 1.5757, "step": 211328 }, { "epoch": 0.4170790750234865, "grad_norm": 0.10854147374629974, "learning_rate": 5.852859243056465e-05, "loss": 1.5834, "step": 211360 }, { "epoch": 0.41714222098488296, "grad_norm": 0.11584977805614471, "learning_rate": 5.852225220619995e-05, "loss": 1.5885, "step": 211392 }, { "epoch": 0.4172053669462794, "grad_norm": 0.11360142379999161, "learning_rate": 5.8515911981835256e-05, "loss": 1.5766, "step": 211424 }, { "epoch": 0.41726851290767586, "grad_norm": 0.11507585644721985, "learning_rate": 5.850957175747057e-05, "loss": 1.5673, "step": 211456 }, { "epoch": 0.41733165886907225, "grad_norm": 0.1230563074350357, "learning_rate": 5.8503231533105885e-05, "loss": 1.5702, "step": 211488 }, { "epoch": 0.4173948048304687, "grad_norm": 0.10865850746631622, "learning_rate": 5.8496891308741186e-05, "loss": 1.5718, "step": 211520 }, { "epoch": 0.41745795079186515, "grad_norm": 0.1129273921251297, "learning_rate": 5.84905510843765e-05, "loss": 1.5635, "step": 211552 }, { "epoch": 0.4175210967532616, "grad_norm": 0.10906870663166046, "learning_rate": 5.8484210860011815e-05, "loss": 1.5698, "step": 211584 }, { "epoch": 0.41758424271465805, "grad_norm": 0.11242571473121643, "learning_rate": 5.847787063564712e-05, "loss": 1.564, "step": 211616 }, { "epoch": 0.4176473886760545, "grad_norm": 0.11419042944908142, "learning_rate": 5.8471530411282424e-05, "loss": 1.5768, "step": 211648 }, { "epoch": 0.4177105346374509, "grad_norm": 0.11575905233621597, "learning_rate": 5.846519018691774e-05, "loss": 1.5718, "step": 211680 }, { "epoch": 0.41777368059884734, "grad_norm": 0.10888403654098511, "learning_rate": 5.845884996255305e-05, "loss": 1.5788, "step": 211712 }, { "epoch": 0.4178368265602438, "grad_norm": 0.11863566190004349, "learning_rate": 5.845250973818837e-05, "loss": 1.5726, "step": 211744 }, { "epoch": 0.41789997252164024, "grad_norm": 0.12143582105636597, "learning_rate": 5.8446169513823675e-05, "loss": 1.5803, "step": 211776 }, { "epoch": 0.4179631184830367, "grad_norm": 0.11661967635154724, "learning_rate": 5.843982928945898e-05, "loss": 1.57, "step": 211808 }, { "epoch": 0.41802626444443314, "grad_norm": 0.12119210511445999, "learning_rate": 5.843348906509429e-05, "loss": 1.5678, "step": 211840 }, { "epoch": 0.41808941040582953, "grad_norm": 0.10788056999444962, "learning_rate": 5.8427148840729606e-05, "loss": 1.5778, "step": 211872 }, { "epoch": 0.418152556367226, "grad_norm": 0.12212945520877838, "learning_rate": 5.842080861636492e-05, "loss": 1.5855, "step": 211904 }, { "epoch": 0.41821570232862243, "grad_norm": 0.11086183786392212, "learning_rate": 5.841446839200022e-05, "loss": 1.5835, "step": 211936 }, { "epoch": 0.4182788482900189, "grad_norm": 0.11380373686552048, "learning_rate": 5.8408128167635536e-05, "loss": 1.5733, "step": 211968 }, { "epoch": 0.41834199425141533, "grad_norm": 0.12136074900627136, "learning_rate": 5.840178794327085e-05, "loss": 1.5837, "step": 212000 }, { "epoch": 0.4184051402128118, "grad_norm": 0.11972972005605698, "learning_rate": 5.839544771890616e-05, "loss": 1.5803, "step": 212032 }, { "epoch": 0.41846828617420817, "grad_norm": 0.12095468491315842, "learning_rate": 5.838910749454146e-05, "loss": 1.5969, "step": 212064 }, { "epoch": 0.4185314321356046, "grad_norm": 0.11016728729009628, "learning_rate": 5.8382767270176774e-05, "loss": 1.5783, "step": 212096 }, { "epoch": 0.41859457809700107, "grad_norm": 0.11938399076461792, "learning_rate": 5.837642704581209e-05, "loss": 1.585, "step": 212128 }, { "epoch": 0.4186577240583975, "grad_norm": 0.11566765606403351, "learning_rate": 5.83700868214474e-05, "loss": 1.5644, "step": 212160 }, { "epoch": 0.41872087001979397, "grad_norm": 0.11348102241754532, "learning_rate": 5.8363746597082704e-05, "loss": 1.5819, "step": 212192 }, { "epoch": 0.4187840159811904, "grad_norm": 0.11203183978796005, "learning_rate": 5.835740637271802e-05, "loss": 1.5709, "step": 212224 }, { "epoch": 0.4188471619425868, "grad_norm": 0.11854086071252823, "learning_rate": 5.8351066148353326e-05, "loss": 1.5731, "step": 212256 }, { "epoch": 0.41891030790398326, "grad_norm": 0.11306841671466827, "learning_rate": 5.834472592398864e-05, "loss": 1.5692, "step": 212288 }, { "epoch": 0.4189734538653797, "grad_norm": 0.109061598777771, "learning_rate": 5.833838569962394e-05, "loss": 1.5798, "step": 212320 }, { "epoch": 0.41903659982677616, "grad_norm": 0.11595797538757324, "learning_rate": 5.8332045475259256e-05, "loss": 1.5812, "step": 212352 }, { "epoch": 0.4190997457881726, "grad_norm": 0.11549539864063263, "learning_rate": 5.832570525089457e-05, "loss": 1.5844, "step": 212384 }, { "epoch": 0.41916289174956906, "grad_norm": 0.11134572327136993, "learning_rate": 5.831936502652988e-05, "loss": 1.5708, "step": 212416 }, { "epoch": 0.4192260377109655, "grad_norm": 0.11597440391778946, "learning_rate": 5.831302480216519e-05, "loss": 1.5806, "step": 212448 }, { "epoch": 0.4192891836723619, "grad_norm": 0.11357767879962921, "learning_rate": 5.8306684577800494e-05, "loss": 1.5736, "step": 212480 }, { "epoch": 0.41935232963375835, "grad_norm": 0.11139217019081116, "learning_rate": 5.830034435343581e-05, "loss": 1.577, "step": 212512 }, { "epoch": 0.4194154755951548, "grad_norm": 0.11596512049436569, "learning_rate": 5.829400412907112e-05, "loss": 1.5669, "step": 212544 }, { "epoch": 0.41947862155655125, "grad_norm": 0.11166244000196457, "learning_rate": 5.828766390470644e-05, "loss": 1.5691, "step": 212576 }, { "epoch": 0.4195417675179477, "grad_norm": 0.10859309136867523, "learning_rate": 5.828132368034174e-05, "loss": 1.5687, "step": 212608 }, { "epoch": 0.41960491347934414, "grad_norm": 0.11687473952770233, "learning_rate": 5.8274983455977053e-05, "loss": 1.5849, "step": 212640 }, { "epoch": 0.41966805944074054, "grad_norm": 0.11086977273225784, "learning_rate": 5.826864323161236e-05, "loss": 1.5768, "step": 212672 }, { "epoch": 0.419731205402137, "grad_norm": 0.1037912368774414, "learning_rate": 5.8262303007247676e-05, "loss": 1.5645, "step": 212704 }, { "epoch": 0.41979435136353344, "grad_norm": 0.13082370162010193, "learning_rate": 5.825596278288298e-05, "loss": 1.5722, "step": 212736 }, { "epoch": 0.4198574973249299, "grad_norm": 0.11623410135507584, "learning_rate": 5.824962255851829e-05, "loss": 1.5809, "step": 212768 }, { "epoch": 0.41992064328632633, "grad_norm": 0.11510179936885834, "learning_rate": 5.8243282334153606e-05, "loss": 1.5852, "step": 212800 }, { "epoch": 0.4199837892477228, "grad_norm": 0.11736845225095749, "learning_rate": 5.8236942109788914e-05, "loss": 1.5779, "step": 212832 }, { "epoch": 0.4200469352091192, "grad_norm": 0.10826859623193741, "learning_rate": 5.823060188542422e-05, "loss": 1.5655, "step": 212864 }, { "epoch": 0.4201100811705156, "grad_norm": 0.10915494710206985, "learning_rate": 5.822426166105953e-05, "loss": 1.575, "step": 212896 }, { "epoch": 0.4201732271319121, "grad_norm": 0.11843578517436981, "learning_rate": 5.8217921436694844e-05, "loss": 1.5774, "step": 212928 }, { "epoch": 0.4202363730933085, "grad_norm": 0.10358286648988724, "learning_rate": 5.821158121233016e-05, "loss": 1.566, "step": 212960 }, { "epoch": 0.420299519054705, "grad_norm": 0.11201329529285431, "learning_rate": 5.820524098796546e-05, "loss": 1.5822, "step": 212992 }, { "epoch": 0.4203626650161014, "grad_norm": 0.11048104614019394, "learning_rate": 5.8198900763600774e-05, "loss": 1.568, "step": 213024 }, { "epoch": 0.4204258109774978, "grad_norm": 0.11986801028251648, "learning_rate": 5.819256053923608e-05, "loss": 1.5719, "step": 213056 }, { "epoch": 0.42048895693889426, "grad_norm": 0.10660749673843384, "learning_rate": 5.8186220314871396e-05, "loss": 1.5737, "step": 213088 }, { "epoch": 0.4205521029002907, "grad_norm": 0.11020977795124054, "learning_rate": 5.817988009050671e-05, "loss": 1.5593, "step": 213120 }, { "epoch": 0.42061524886168716, "grad_norm": 0.13178670406341553, "learning_rate": 5.817353986614201e-05, "loss": 1.5729, "step": 213152 }, { "epoch": 0.4206783948230836, "grad_norm": 0.11084149777889252, "learning_rate": 5.8167199641777326e-05, "loss": 1.5542, "step": 213184 }, { "epoch": 0.42074154078448006, "grad_norm": 0.11329176276922226, "learning_rate": 5.816085941741264e-05, "loss": 1.5791, "step": 213216 }, { "epoch": 0.42080468674587646, "grad_norm": 0.11462894082069397, "learning_rate": 5.815451919304795e-05, "loss": 1.5838, "step": 213248 }, { "epoch": 0.4208678327072729, "grad_norm": 0.11463163793087006, "learning_rate": 5.8148178968683257e-05, "loss": 1.5662, "step": 213280 }, { "epoch": 0.42093097866866935, "grad_norm": 0.11173997074365616, "learning_rate": 5.8141838744318564e-05, "loss": 1.5656, "step": 213312 }, { "epoch": 0.4209941246300658, "grad_norm": 0.1229645386338234, "learning_rate": 5.813549851995388e-05, "loss": 1.5708, "step": 213344 }, { "epoch": 0.42105727059146225, "grad_norm": 0.11651979386806488, "learning_rate": 5.8129158295589193e-05, "loss": 1.5783, "step": 213376 }, { "epoch": 0.4211204165528587, "grad_norm": 0.11476512998342514, "learning_rate": 5.8122818071224494e-05, "loss": 1.5689, "step": 213408 }, { "epoch": 0.4211835625142551, "grad_norm": 0.11378980427980423, "learning_rate": 5.811647784685981e-05, "loss": 1.5692, "step": 213440 }, { "epoch": 0.42124670847565154, "grad_norm": 0.1159764975309372, "learning_rate": 5.811013762249512e-05, "loss": 1.5731, "step": 213472 }, { "epoch": 0.421309854437048, "grad_norm": 0.11891423910856247, "learning_rate": 5.810379739813043e-05, "loss": 1.5789, "step": 213504 }, { "epoch": 0.42137300039844444, "grad_norm": 0.11131199449300766, "learning_rate": 5.809745717376573e-05, "loss": 1.5839, "step": 213536 }, { "epoch": 0.4214361463598409, "grad_norm": 0.11010655015707016, "learning_rate": 5.809111694940105e-05, "loss": 1.5775, "step": 213568 }, { "epoch": 0.42149929232123734, "grad_norm": 0.11034880578517914, "learning_rate": 5.808477672503636e-05, "loss": 1.5776, "step": 213600 }, { "epoch": 0.42156243828263373, "grad_norm": 0.11275182664394379, "learning_rate": 5.8078436500671676e-05, "loss": 1.5695, "step": 213632 }, { "epoch": 0.4216255842440302, "grad_norm": 0.11139434576034546, "learning_rate": 5.807209627630698e-05, "loss": 1.5716, "step": 213664 }, { "epoch": 0.42168873020542663, "grad_norm": 0.11640458554029465, "learning_rate": 5.806575605194229e-05, "loss": 1.5729, "step": 213696 }, { "epoch": 0.4217518761668231, "grad_norm": 0.11134900152683258, "learning_rate": 5.80594158275776e-05, "loss": 1.57, "step": 213728 }, { "epoch": 0.42181502212821953, "grad_norm": 0.11026263236999512, "learning_rate": 5.8053075603212914e-05, "loss": 1.5736, "step": 213760 }, { "epoch": 0.421878168089616, "grad_norm": 0.11456513404846191, "learning_rate": 5.804673537884823e-05, "loss": 1.5746, "step": 213792 }, { "epoch": 0.4219413140510124, "grad_norm": 0.11510647088289261, "learning_rate": 5.804039515448353e-05, "loss": 1.565, "step": 213824 }, { "epoch": 0.4220044600124088, "grad_norm": 0.11348488181829453, "learning_rate": 5.8034054930118844e-05, "loss": 1.5789, "step": 213856 }, { "epoch": 0.42206760597380527, "grad_norm": 0.11129774898290634, "learning_rate": 5.802771470575415e-05, "loss": 1.5756, "step": 213888 }, { "epoch": 0.4221307519352017, "grad_norm": 0.10939408093690872, "learning_rate": 5.8021374481389467e-05, "loss": 1.5751, "step": 213920 }, { "epoch": 0.42219389789659817, "grad_norm": 0.11541156470775604, "learning_rate": 5.801503425702477e-05, "loss": 1.579, "step": 213952 }, { "epoch": 0.4222570438579946, "grad_norm": 0.11404352635145187, "learning_rate": 5.800869403266008e-05, "loss": 1.5834, "step": 213984 }, { "epoch": 0.422320189819391, "grad_norm": 0.10567382723093033, "learning_rate": 5.80023538082954e-05, "loss": 1.5848, "step": 214016 }, { "epoch": 0.42238333578078746, "grad_norm": 0.1112348660826683, "learning_rate": 5.799601358393071e-05, "loss": 1.5696, "step": 214048 }, { "epoch": 0.4224464817421839, "grad_norm": 0.11303667724132538, "learning_rate": 5.798967335956601e-05, "loss": 1.572, "step": 214080 }, { "epoch": 0.42250962770358036, "grad_norm": 0.11266178637742996, "learning_rate": 5.798333313520132e-05, "loss": 1.5786, "step": 214112 }, { "epoch": 0.4225727736649768, "grad_norm": 0.111946240067482, "learning_rate": 5.7976992910836635e-05, "loss": 1.5714, "step": 214144 }, { "epoch": 0.42263591962637326, "grad_norm": 0.11566639691591263, "learning_rate": 5.797065268647195e-05, "loss": 1.5765, "step": 214176 }, { "epoch": 0.4226990655877697, "grad_norm": 0.10516596585512161, "learning_rate": 5.796431246210725e-05, "loss": 1.5638, "step": 214208 }, { "epoch": 0.4227622115491661, "grad_norm": 0.11059646308422089, "learning_rate": 5.7957972237742565e-05, "loss": 1.5635, "step": 214240 }, { "epoch": 0.42282535751056255, "grad_norm": 0.12049796432256699, "learning_rate": 5.795163201337788e-05, "loss": 1.5678, "step": 214272 }, { "epoch": 0.422888503471959, "grad_norm": 0.11232086271047592, "learning_rate": 5.794529178901319e-05, "loss": 1.5747, "step": 214304 }, { "epoch": 0.42295164943335545, "grad_norm": 0.10908524692058563, "learning_rate": 5.7938951564648495e-05, "loss": 1.5734, "step": 214336 }, { "epoch": 0.4230147953947519, "grad_norm": 0.10831624269485474, "learning_rate": 5.79326113402838e-05, "loss": 1.5613, "step": 214368 }, { "epoch": 0.42307794135614835, "grad_norm": 0.12005595862865448, "learning_rate": 5.792627111591912e-05, "loss": 1.5769, "step": 214400 }, { "epoch": 0.42314108731754474, "grad_norm": 0.10607882589101791, "learning_rate": 5.791993089155443e-05, "loss": 1.566, "step": 214432 }, { "epoch": 0.4232042332789412, "grad_norm": 0.11583924293518066, "learning_rate": 5.7913590667189746e-05, "loss": 1.5665, "step": 214464 }, { "epoch": 0.42326737924033764, "grad_norm": 0.11221151053905487, "learning_rate": 5.790725044282505e-05, "loss": 1.5691, "step": 214496 }, { "epoch": 0.4233305252017341, "grad_norm": 0.10671308636665344, "learning_rate": 5.7900910218460355e-05, "loss": 1.5757, "step": 214528 }, { "epoch": 0.42339367116313054, "grad_norm": 0.11174337565898895, "learning_rate": 5.789456999409567e-05, "loss": 1.576, "step": 214560 }, { "epoch": 0.423456817124527, "grad_norm": 0.10919161885976791, "learning_rate": 5.7888229769730984e-05, "loss": 1.5788, "step": 214592 }, { "epoch": 0.4235199630859234, "grad_norm": 0.11323666572570801, "learning_rate": 5.7881889545366285e-05, "loss": 1.5663, "step": 214624 }, { "epoch": 0.42358310904731983, "grad_norm": 0.10981086641550064, "learning_rate": 5.78755493210016e-05, "loss": 1.5776, "step": 214656 }, { "epoch": 0.4236462550087163, "grad_norm": 0.12076502293348312, "learning_rate": 5.7869209096636914e-05, "loss": 1.5684, "step": 214688 }, { "epoch": 0.4237094009701127, "grad_norm": 0.11037950217723846, "learning_rate": 5.786286887227222e-05, "loss": 1.5704, "step": 214720 }, { "epoch": 0.4237725469315092, "grad_norm": 0.10478291660547256, "learning_rate": 5.785652864790752e-05, "loss": 1.5835, "step": 214752 }, { "epoch": 0.4238356928929056, "grad_norm": 0.11317232996225357, "learning_rate": 5.785018842354284e-05, "loss": 1.5653, "step": 214784 }, { "epoch": 0.423898838854302, "grad_norm": 0.1120942011475563, "learning_rate": 5.784384819917815e-05, "loss": 1.5616, "step": 214816 }, { "epoch": 0.42396198481569847, "grad_norm": 0.11217595636844635, "learning_rate": 5.783750797481347e-05, "loss": 1.5706, "step": 214848 }, { "epoch": 0.4240251307770949, "grad_norm": 0.10993542522192001, "learning_rate": 5.783116775044877e-05, "loss": 1.5764, "step": 214880 }, { "epoch": 0.42408827673849137, "grad_norm": 0.1143922507762909, "learning_rate": 5.782482752608408e-05, "loss": 1.5797, "step": 214912 }, { "epoch": 0.4241514226998878, "grad_norm": 0.11156270653009415, "learning_rate": 5.781848730171939e-05, "loss": 1.5682, "step": 214944 }, { "epoch": 0.42421456866128426, "grad_norm": 0.1151362732052803, "learning_rate": 5.7812147077354705e-05, "loss": 1.5718, "step": 214976 }, { "epoch": 0.42427771462268066, "grad_norm": 0.13502606749534607, "learning_rate": 5.7805806852990006e-05, "loss": 1.576, "step": 215008 }, { "epoch": 0.4243408605840771, "grad_norm": 0.11473239958286285, "learning_rate": 5.779946662862532e-05, "loss": 1.5754, "step": 215040 }, { "epoch": 0.42440400654547356, "grad_norm": 0.11435826867818832, "learning_rate": 5.7793126404260635e-05, "loss": 1.5791, "step": 215072 }, { "epoch": 0.42446715250687, "grad_norm": 0.11932343244552612, "learning_rate": 5.778678617989595e-05, "loss": 1.5833, "step": 215104 }, { "epoch": 0.42453029846826645, "grad_norm": 0.10759299248456955, "learning_rate": 5.778044595553126e-05, "loss": 1.5845, "step": 215136 }, { "epoch": 0.4245934444296629, "grad_norm": 0.11628827452659607, "learning_rate": 5.777410573116656e-05, "loss": 1.5732, "step": 215168 }, { "epoch": 0.4246565903910593, "grad_norm": 0.10682403296232224, "learning_rate": 5.776776550680187e-05, "loss": 1.5689, "step": 215200 }, { "epoch": 0.42471973635245575, "grad_norm": 0.1116858646273613, "learning_rate": 5.776142528243719e-05, "loss": 1.5724, "step": 215232 }, { "epoch": 0.4247828823138522, "grad_norm": 0.11500170081853867, "learning_rate": 5.77550850580725e-05, "loss": 1.5661, "step": 215264 }, { "epoch": 0.42484602827524864, "grad_norm": 0.1105748862028122, "learning_rate": 5.77487448337078e-05, "loss": 1.5663, "step": 215296 }, { "epoch": 0.4249091742366451, "grad_norm": 0.12328539043664932, "learning_rate": 5.774240460934312e-05, "loss": 1.5587, "step": 215328 }, { "epoch": 0.42497232019804154, "grad_norm": 0.11484191566705704, "learning_rate": 5.7736064384978425e-05, "loss": 1.5744, "step": 215360 }, { "epoch": 0.42503546615943794, "grad_norm": 0.1185460016131401, "learning_rate": 5.772972416061374e-05, "loss": 1.5848, "step": 215392 }, { "epoch": 0.4250986121208344, "grad_norm": 0.10631705820560455, "learning_rate": 5.772338393624904e-05, "loss": 1.5652, "step": 215424 }, { "epoch": 0.42516175808223083, "grad_norm": 0.1144087016582489, "learning_rate": 5.7717043711884355e-05, "loss": 1.5786, "step": 215456 }, { "epoch": 0.4252249040436273, "grad_norm": 0.1128242239356041, "learning_rate": 5.771070348751967e-05, "loss": 1.5704, "step": 215488 }, { "epoch": 0.42528805000502373, "grad_norm": 0.10929341614246368, "learning_rate": 5.7704363263154985e-05, "loss": 1.56, "step": 215520 }, { "epoch": 0.4253511959664202, "grad_norm": 0.11630631238222122, "learning_rate": 5.7698023038790286e-05, "loss": 1.5629, "step": 215552 }, { "epoch": 0.4254143419278166, "grad_norm": 0.11064088344573975, "learning_rate": 5.769168281442559e-05, "loss": 1.5796, "step": 215584 }, { "epoch": 0.425477487889213, "grad_norm": 0.12014969438314438, "learning_rate": 5.768534259006091e-05, "loss": 1.5718, "step": 215616 }, { "epoch": 0.4255406338506095, "grad_norm": 0.11737433075904846, "learning_rate": 5.767900236569622e-05, "loss": 1.5749, "step": 215648 }, { "epoch": 0.4256037798120059, "grad_norm": 0.11125744879245758, "learning_rate": 5.7672662141331523e-05, "loss": 1.5777, "step": 215680 }, { "epoch": 0.42566692577340237, "grad_norm": 0.11548735201358795, "learning_rate": 5.766632191696684e-05, "loss": 1.5867, "step": 215712 }, { "epoch": 0.4257300717347988, "grad_norm": 0.11033716052770615, "learning_rate": 5.765998169260215e-05, "loss": 1.5676, "step": 215744 }, { "epoch": 0.42579321769619527, "grad_norm": 0.11361586302518845, "learning_rate": 5.765364146823746e-05, "loss": 1.5622, "step": 215776 }, { "epoch": 0.42585636365759166, "grad_norm": 0.11350362002849579, "learning_rate": 5.7647301243872775e-05, "loss": 1.5681, "step": 215808 }, { "epoch": 0.4259195096189881, "grad_norm": 0.11969402432441711, "learning_rate": 5.7640961019508076e-05, "loss": 1.579, "step": 215840 }, { "epoch": 0.42598265558038456, "grad_norm": 0.11505340039730072, "learning_rate": 5.763462079514339e-05, "loss": 1.5593, "step": 215872 }, { "epoch": 0.426045801541781, "grad_norm": 0.11213871836662292, "learning_rate": 5.7628280570778705e-05, "loss": 1.5783, "step": 215904 }, { "epoch": 0.42610894750317746, "grad_norm": 0.11875083297491074, "learning_rate": 5.762194034641401e-05, "loss": 1.5729, "step": 215936 }, { "epoch": 0.4261720934645739, "grad_norm": 0.10955949872732162, "learning_rate": 5.761560012204932e-05, "loss": 1.5589, "step": 215968 }, { "epoch": 0.4262352394259703, "grad_norm": 0.11102863401174545, "learning_rate": 5.760925989768463e-05, "loss": 1.5806, "step": 216000 }, { "epoch": 0.42629838538736675, "grad_norm": 0.10898289829492569, "learning_rate": 5.760291967331994e-05, "loss": 1.5577, "step": 216032 }, { "epoch": 0.4263615313487632, "grad_norm": 0.11119260638952255, "learning_rate": 5.759657944895526e-05, "loss": 1.5503, "step": 216064 }, { "epoch": 0.42642467731015965, "grad_norm": 0.1125345304608345, "learning_rate": 5.759023922459056e-05, "loss": 1.5698, "step": 216096 }, { "epoch": 0.4264878232715561, "grad_norm": 0.12304135411977768, "learning_rate": 5.758389900022587e-05, "loss": 1.5855, "step": 216128 }, { "epoch": 0.42655096923295255, "grad_norm": 0.10996831953525543, "learning_rate": 5.757755877586119e-05, "loss": 1.5827, "step": 216160 }, { "epoch": 0.42661411519434894, "grad_norm": 0.11370484530925751, "learning_rate": 5.7571218551496496e-05, "loss": 1.5817, "step": 216192 }, { "epoch": 0.4266772611557454, "grad_norm": 0.10931141674518585, "learning_rate": 5.7564878327131797e-05, "loss": 1.5655, "step": 216224 }, { "epoch": 0.42674040711714184, "grad_norm": 0.11213477700948715, "learning_rate": 5.755853810276711e-05, "loss": 1.5803, "step": 216256 }, { "epoch": 0.4268035530785383, "grad_norm": 0.11812477558851242, "learning_rate": 5.7552197878402426e-05, "loss": 1.5706, "step": 216288 }, { "epoch": 0.42686669903993474, "grad_norm": 0.12508949637413025, "learning_rate": 5.754585765403774e-05, "loss": 1.5781, "step": 216320 }, { "epoch": 0.4269298450013312, "grad_norm": 0.12047788500785828, "learning_rate": 5.753951742967304e-05, "loss": 1.5608, "step": 216352 }, { "epoch": 0.4269929909627276, "grad_norm": 0.11848469823598862, "learning_rate": 5.7533177205308356e-05, "loss": 1.5662, "step": 216384 }, { "epoch": 0.42705613692412403, "grad_norm": 0.1096002534031868, "learning_rate": 5.7526836980943664e-05, "loss": 1.5619, "step": 216416 }, { "epoch": 0.4271192828855205, "grad_norm": 0.11447635293006897, "learning_rate": 5.752049675657898e-05, "loss": 1.5887, "step": 216448 }, { "epoch": 0.42718242884691693, "grad_norm": 0.10712642967700958, "learning_rate": 5.751415653221429e-05, "loss": 1.5771, "step": 216480 }, { "epoch": 0.4272455748083134, "grad_norm": 0.112143874168396, "learning_rate": 5.7507816307849594e-05, "loss": 1.5499, "step": 216512 }, { "epoch": 0.4273087207697098, "grad_norm": 0.11540811508893967, "learning_rate": 5.750147608348491e-05, "loss": 1.5802, "step": 216544 }, { "epoch": 0.4273718667311062, "grad_norm": 0.1156095638871193, "learning_rate": 5.7495135859120216e-05, "loss": 1.5744, "step": 216576 }, { "epoch": 0.42743501269250267, "grad_norm": 0.10763511806726456, "learning_rate": 5.748879563475553e-05, "loss": 1.5668, "step": 216608 }, { "epoch": 0.4274981586538991, "grad_norm": 0.11237064003944397, "learning_rate": 5.748245541039083e-05, "loss": 1.5695, "step": 216640 }, { "epoch": 0.42756130461529557, "grad_norm": 0.1260167509317398, "learning_rate": 5.7476115186026146e-05, "loss": 1.577, "step": 216672 }, { "epoch": 0.427624450576692, "grad_norm": 0.120932936668396, "learning_rate": 5.746977496166146e-05, "loss": 1.5782, "step": 216704 }, { "epoch": 0.42768759653808847, "grad_norm": 0.11022268980741501, "learning_rate": 5.7463434737296775e-05, "loss": 1.5807, "step": 216736 }, { "epoch": 0.42775074249948486, "grad_norm": 0.11237247288227081, "learning_rate": 5.7457094512932076e-05, "loss": 1.5672, "step": 216768 }, { "epoch": 0.4278138884608813, "grad_norm": 0.10768884420394897, "learning_rate": 5.745075428856739e-05, "loss": 1.5711, "step": 216800 }, { "epoch": 0.42787703442227776, "grad_norm": 0.11572138965129852, "learning_rate": 5.74444140642027e-05, "loss": 1.5737, "step": 216832 }, { "epoch": 0.4279401803836742, "grad_norm": 0.11492817848920822, "learning_rate": 5.743807383983801e-05, "loss": 1.5657, "step": 216864 }, { "epoch": 0.42800332634507066, "grad_norm": 0.1095789223909378, "learning_rate": 5.7431733615473314e-05, "loss": 1.5735, "step": 216896 }, { "epoch": 0.4280664723064671, "grad_norm": 0.10906712710857391, "learning_rate": 5.742539339110863e-05, "loss": 1.5787, "step": 216928 }, { "epoch": 0.4281296182678635, "grad_norm": 0.10804124921560287, "learning_rate": 5.741905316674394e-05, "loss": 1.5786, "step": 216960 }, { "epoch": 0.42819276422925995, "grad_norm": 0.12142904847860336, "learning_rate": 5.741271294237925e-05, "loss": 1.5761, "step": 216992 }, { "epoch": 0.4282559101906564, "grad_norm": 0.11196298897266388, "learning_rate": 5.740637271801456e-05, "loss": 1.5704, "step": 217024 }, { "epoch": 0.42831905615205285, "grad_norm": 0.1128426119685173, "learning_rate": 5.740003249364987e-05, "loss": 1.5792, "step": 217056 }, { "epoch": 0.4283822021134493, "grad_norm": 0.12256114184856415, "learning_rate": 5.739369226928518e-05, "loss": 1.5865, "step": 217088 }, { "epoch": 0.42844534807484574, "grad_norm": 0.11635429412126541, "learning_rate": 5.7387352044920496e-05, "loss": 1.5817, "step": 217120 }, { "epoch": 0.42850849403624214, "grad_norm": 0.1104697734117508, "learning_rate": 5.738101182055581e-05, "loss": 1.5711, "step": 217152 }, { "epoch": 0.4285716399976386, "grad_norm": 0.111278235912323, "learning_rate": 5.737467159619111e-05, "loss": 1.5672, "step": 217184 }, { "epoch": 0.42863478595903504, "grad_norm": 0.10895587503910065, "learning_rate": 5.736833137182642e-05, "loss": 1.5732, "step": 217216 }, { "epoch": 0.4286979319204315, "grad_norm": 0.117510586977005, "learning_rate": 5.7361991147461734e-05, "loss": 1.5647, "step": 217248 }, { "epoch": 0.42876107788182793, "grad_norm": 0.10830023884773254, "learning_rate": 5.735565092309705e-05, "loss": 1.5885, "step": 217280 }, { "epoch": 0.4288242238432244, "grad_norm": 0.1161816194653511, "learning_rate": 5.734931069873235e-05, "loss": 1.563, "step": 217312 }, { "epoch": 0.4288873698046208, "grad_norm": 0.11264339834451675, "learning_rate": 5.7342970474367664e-05, "loss": 1.5744, "step": 217344 }, { "epoch": 0.4289505157660172, "grad_norm": 0.11148174107074738, "learning_rate": 5.733663025000298e-05, "loss": 1.5597, "step": 217376 }, { "epoch": 0.4290136617274137, "grad_norm": 0.11672309786081314, "learning_rate": 5.7330290025638286e-05, "loss": 1.5657, "step": 217408 }, { "epoch": 0.4290768076888101, "grad_norm": 0.1104683056473732, "learning_rate": 5.7323949801273594e-05, "loss": 1.5698, "step": 217440 }, { "epoch": 0.4291399536502066, "grad_norm": 0.109275221824646, "learning_rate": 5.73176095769089e-05, "loss": 1.5652, "step": 217472 }, { "epoch": 0.429203099611603, "grad_norm": 0.11759917438030243, "learning_rate": 5.7311269352544216e-05, "loss": 1.5744, "step": 217504 }, { "epoch": 0.42926624557299947, "grad_norm": 0.11129330098628998, "learning_rate": 5.730492912817953e-05, "loss": 1.5656, "step": 217536 }, { "epoch": 0.42932939153439587, "grad_norm": 0.11111228168010712, "learning_rate": 5.729858890381483e-05, "loss": 1.5607, "step": 217568 }, { "epoch": 0.4293925374957923, "grad_norm": 0.11939235031604767, "learning_rate": 5.7292248679450147e-05, "loss": 1.5801, "step": 217600 }, { "epoch": 0.42945568345718876, "grad_norm": 0.11422089487314224, "learning_rate": 5.7285908455085454e-05, "loss": 1.5748, "step": 217632 }, { "epoch": 0.4295188294185852, "grad_norm": 0.12160011380910873, "learning_rate": 5.727956823072077e-05, "loss": 1.572, "step": 217664 }, { "epoch": 0.42958197537998166, "grad_norm": 0.1145854964852333, "learning_rate": 5.727322800635607e-05, "loss": 1.5721, "step": 217696 }, { "epoch": 0.4296451213413781, "grad_norm": 0.1209818497300148, "learning_rate": 5.7266887781991384e-05, "loss": 1.5702, "step": 217728 }, { "epoch": 0.4297082673027745, "grad_norm": 0.11725013703107834, "learning_rate": 5.72605475576267e-05, "loss": 1.5752, "step": 217760 }, { "epoch": 0.42977141326417095, "grad_norm": 0.11543697118759155, "learning_rate": 5.7254207333262014e-05, "loss": 1.5685, "step": 217792 }, { "epoch": 0.4298345592255674, "grad_norm": 0.11653497815132141, "learning_rate": 5.724786710889732e-05, "loss": 1.5775, "step": 217824 }, { "epoch": 0.42989770518696385, "grad_norm": 0.11175195127725601, "learning_rate": 5.724152688453263e-05, "loss": 1.5761, "step": 217856 }, { "epoch": 0.4299608511483603, "grad_norm": 0.11420013755559921, "learning_rate": 5.723518666016794e-05, "loss": 1.5821, "step": 217888 }, { "epoch": 0.43002399710975675, "grad_norm": 0.12244796007871628, "learning_rate": 5.722884643580325e-05, "loss": 1.5757, "step": 217920 }, { "epoch": 0.43008714307115314, "grad_norm": 0.11154807358980179, "learning_rate": 5.7222506211438566e-05, "loss": 1.557, "step": 217952 }, { "epoch": 0.4301502890325496, "grad_norm": 0.11421848833560944, "learning_rate": 5.721616598707387e-05, "loss": 1.5815, "step": 217984 }, { "epoch": 0.43021343499394604, "grad_norm": 0.1296013742685318, "learning_rate": 5.720982576270918e-05, "loss": 1.5722, "step": 218016 }, { "epoch": 0.4302765809553425, "grad_norm": 0.12777550518512726, "learning_rate": 5.720348553834449e-05, "loss": 1.5817, "step": 218048 }, { "epoch": 0.43033972691673894, "grad_norm": 0.11427032202482224, "learning_rate": 5.7197145313979804e-05, "loss": 1.5782, "step": 218080 }, { "epoch": 0.4304028728781354, "grad_norm": 0.11090344190597534, "learning_rate": 5.7190805089615105e-05, "loss": 1.5622, "step": 218112 }, { "epoch": 0.4304660188395318, "grad_norm": 0.11822871118783951, "learning_rate": 5.718446486525042e-05, "loss": 1.5732, "step": 218144 }, { "epoch": 0.43052916480092823, "grad_norm": 0.11690302193164825, "learning_rate": 5.7178124640885734e-05, "loss": 1.5688, "step": 218176 }, { "epoch": 0.4305923107623247, "grad_norm": 0.10858042538166046, "learning_rate": 5.717178441652105e-05, "loss": 1.574, "step": 218208 }, { "epoch": 0.43065545672372113, "grad_norm": 0.1063372790813446, "learning_rate": 5.716544419215635e-05, "loss": 1.5689, "step": 218240 }, { "epoch": 0.4307186026851176, "grad_norm": 0.11257363110780716, "learning_rate": 5.715910396779166e-05, "loss": 1.5583, "step": 218272 }, { "epoch": 0.43078174864651403, "grad_norm": 0.10806793719530106, "learning_rate": 5.715276374342697e-05, "loss": 1.5576, "step": 218304 }, { "epoch": 0.4308448946079104, "grad_norm": 0.1094035729765892, "learning_rate": 5.7146423519062287e-05, "loss": 1.5715, "step": 218336 }, { "epoch": 0.43090804056930687, "grad_norm": 0.1087304875254631, "learning_rate": 5.714008329469759e-05, "loss": 1.5677, "step": 218368 }, { "epoch": 0.4309711865307033, "grad_norm": 0.11429969221353531, "learning_rate": 5.71337430703329e-05, "loss": 1.5745, "step": 218400 }, { "epoch": 0.43103433249209977, "grad_norm": 0.11125301569700241, "learning_rate": 5.712740284596822e-05, "loss": 1.5671, "step": 218432 }, { "epoch": 0.4310974784534962, "grad_norm": 0.11256994307041168, "learning_rate": 5.7121062621603525e-05, "loss": 1.5491, "step": 218464 }, { "epoch": 0.43116062441489267, "grad_norm": 0.11583176255226135, "learning_rate": 5.711472239723884e-05, "loss": 1.5792, "step": 218496 }, { "epoch": 0.43122377037628906, "grad_norm": 0.11268655955791473, "learning_rate": 5.710838217287414e-05, "loss": 1.5839, "step": 218528 }, { "epoch": 0.4312869163376855, "grad_norm": 0.1278001070022583, "learning_rate": 5.7102041948509455e-05, "loss": 1.5753, "step": 218560 }, { "epoch": 0.43135006229908196, "grad_norm": 0.11566419154405594, "learning_rate": 5.709570172414477e-05, "loss": 1.5705, "step": 218592 }, { "epoch": 0.4314132082604784, "grad_norm": 0.11827423423528671, "learning_rate": 5.7089361499780084e-05, "loss": 1.5763, "step": 218624 }, { "epoch": 0.43147635422187486, "grad_norm": 0.10856103152036667, "learning_rate": 5.7083021275415385e-05, "loss": 1.5671, "step": 218656 }, { "epoch": 0.4315395001832713, "grad_norm": 0.11640467494726181, "learning_rate": 5.707668105105069e-05, "loss": 1.5718, "step": 218688 }, { "epoch": 0.4316026461446677, "grad_norm": 0.12964989244937897, "learning_rate": 5.707034082668601e-05, "loss": 1.5773, "step": 218720 }, { "epoch": 0.43166579210606415, "grad_norm": 0.11477820575237274, "learning_rate": 5.706400060232132e-05, "loss": 1.5856, "step": 218752 }, { "epoch": 0.4317289380674606, "grad_norm": 0.11888802796602249, "learning_rate": 5.705766037795662e-05, "loss": 1.5871, "step": 218784 }, { "epoch": 0.43179208402885705, "grad_norm": 0.11207152903079987, "learning_rate": 5.705132015359194e-05, "loss": 1.5712, "step": 218816 }, { "epoch": 0.4318552299902535, "grad_norm": 0.11219317466020584, "learning_rate": 5.704497992922725e-05, "loss": 1.583, "step": 218848 }, { "epoch": 0.43191837595164995, "grad_norm": 0.10959234088659286, "learning_rate": 5.703863970486256e-05, "loss": 1.5733, "step": 218880 }, { "epoch": 0.43198152191304634, "grad_norm": 0.10953658074140549, "learning_rate": 5.703229948049786e-05, "loss": 1.5606, "step": 218912 }, { "epoch": 0.4320446678744428, "grad_norm": 0.11791291832923889, "learning_rate": 5.7025959256133175e-05, "loss": 1.5746, "step": 218944 }, { "epoch": 0.43210781383583924, "grad_norm": 0.12534630298614502, "learning_rate": 5.701961903176849e-05, "loss": 1.579, "step": 218976 }, { "epoch": 0.4321709597972357, "grad_norm": 0.119520403444767, "learning_rate": 5.7013278807403804e-05, "loss": 1.5841, "step": 219008 }, { "epoch": 0.43223410575863214, "grad_norm": 0.1143878623843193, "learning_rate": 5.7006938583039105e-05, "loss": 1.5856, "step": 219040 }, { "epoch": 0.4322972517200286, "grad_norm": 0.1126607283949852, "learning_rate": 5.700059835867442e-05, "loss": 1.5805, "step": 219072 }, { "epoch": 0.432360397681425, "grad_norm": 0.1200566217303276, "learning_rate": 5.699425813430973e-05, "loss": 1.5676, "step": 219104 }, { "epoch": 0.43242354364282143, "grad_norm": 0.10853078216314316, "learning_rate": 5.698791790994504e-05, "loss": 1.5833, "step": 219136 }, { "epoch": 0.4324866896042179, "grad_norm": 0.11355725675821304, "learning_rate": 5.698157768558036e-05, "loss": 1.5714, "step": 219168 }, { "epoch": 0.4325498355656143, "grad_norm": 0.10941006988286972, "learning_rate": 5.697523746121566e-05, "loss": 1.5691, "step": 219200 }, { "epoch": 0.4326129815270108, "grad_norm": 0.1205168291926384, "learning_rate": 5.696889723685097e-05, "loss": 1.5728, "step": 219232 }, { "epoch": 0.4326761274884072, "grad_norm": 0.1134066954255104, "learning_rate": 5.696255701248629e-05, "loss": 1.5725, "step": 219264 }, { "epoch": 0.4327392734498037, "grad_norm": 0.11044749617576599, "learning_rate": 5.6956216788121595e-05, "loss": 1.5726, "step": 219296 }, { "epoch": 0.43280241941120007, "grad_norm": 0.11439785361289978, "learning_rate": 5.6949876563756896e-05, "loss": 1.5813, "step": 219328 }, { "epoch": 0.4328655653725965, "grad_norm": 0.11201561987400055, "learning_rate": 5.694353633939221e-05, "loss": 1.5738, "step": 219360 }, { "epoch": 0.43292871133399297, "grad_norm": 0.1089145615696907, "learning_rate": 5.6937196115027525e-05, "loss": 1.5848, "step": 219392 }, { "epoch": 0.4329918572953894, "grad_norm": 0.10721936076879501, "learning_rate": 5.693085589066284e-05, "loss": 1.5625, "step": 219424 }, { "epoch": 0.43305500325678586, "grad_norm": 0.12367662787437439, "learning_rate": 5.692451566629814e-05, "loss": 1.57, "step": 219456 }, { "epoch": 0.4331181492181823, "grad_norm": 0.11392983049154282, "learning_rate": 5.6918175441933455e-05, "loss": 1.5743, "step": 219488 }, { "epoch": 0.4331812951795787, "grad_norm": 0.11306256800889969, "learning_rate": 5.691183521756876e-05, "loss": 1.5713, "step": 219520 }, { "epoch": 0.43324444114097516, "grad_norm": 0.1093650683760643, "learning_rate": 5.690549499320408e-05, "loss": 1.5758, "step": 219552 }, { "epoch": 0.4333075871023716, "grad_norm": 0.1123494878411293, "learning_rate": 5.689915476883938e-05, "loss": 1.5713, "step": 219584 }, { "epoch": 0.43337073306376805, "grad_norm": 0.1146850511431694, "learning_rate": 5.689281454447469e-05, "loss": 1.5835, "step": 219616 }, { "epoch": 0.4334338790251645, "grad_norm": 0.10450323671102524, "learning_rate": 5.688647432011001e-05, "loss": 1.5675, "step": 219648 }, { "epoch": 0.43349702498656095, "grad_norm": 0.10758733749389648, "learning_rate": 5.688013409574532e-05, "loss": 1.5722, "step": 219680 }, { "epoch": 0.43356017094795735, "grad_norm": 0.11030951887369156, "learning_rate": 5.687379387138063e-05, "loss": 1.5847, "step": 219712 }, { "epoch": 0.4336233169093538, "grad_norm": 0.11307337135076523, "learning_rate": 5.686745364701593e-05, "loss": 1.5769, "step": 219744 }, { "epoch": 0.43368646287075024, "grad_norm": 0.10753865540027618, "learning_rate": 5.6861113422651245e-05, "loss": 1.5717, "step": 219776 }, { "epoch": 0.4337496088321467, "grad_norm": 0.11488639563322067, "learning_rate": 5.685477319828656e-05, "loss": 1.5746, "step": 219808 }, { "epoch": 0.43381275479354314, "grad_norm": 0.11072096228599548, "learning_rate": 5.6848432973921875e-05, "loss": 1.5644, "step": 219840 }, { "epoch": 0.4338759007549396, "grad_norm": 0.11318612098693848, "learning_rate": 5.6842092749557176e-05, "loss": 1.5757, "step": 219872 }, { "epoch": 0.433939046716336, "grad_norm": 0.11152063310146332, "learning_rate": 5.683575252519249e-05, "loss": 1.582, "step": 219904 }, { "epoch": 0.43400219267773243, "grad_norm": 0.10941016674041748, "learning_rate": 5.68294123008278e-05, "loss": 1.5613, "step": 219936 }, { "epoch": 0.4340653386391289, "grad_norm": 0.1090482622385025, "learning_rate": 5.682307207646311e-05, "loss": 1.5801, "step": 219968 }, { "epoch": 0.43412848460052533, "grad_norm": 0.11290223151445389, "learning_rate": 5.6816731852098413e-05, "loss": 1.5793, "step": 220000 }, { "epoch": 0.4341916305619218, "grad_norm": 0.11280801147222519, "learning_rate": 5.681039162773373e-05, "loss": 1.5752, "step": 220032 }, { "epoch": 0.43425477652331823, "grad_norm": 0.1137833371758461, "learning_rate": 5.680405140336904e-05, "loss": 1.5715, "step": 220064 }, { "epoch": 0.4343179224847146, "grad_norm": 0.11288630217313766, "learning_rate": 5.679771117900435e-05, "loss": 1.5667, "step": 220096 }, { "epoch": 0.4343810684461111, "grad_norm": 0.1094576045870781, "learning_rate": 5.679137095463966e-05, "loss": 1.5807, "step": 220128 }, { "epoch": 0.4344442144075075, "grad_norm": 0.11282413452863693, "learning_rate": 5.6785030730274966e-05, "loss": 1.5823, "step": 220160 }, { "epoch": 0.43450736036890397, "grad_norm": 0.11341885477304459, "learning_rate": 5.677869050591028e-05, "loss": 1.5733, "step": 220192 }, { "epoch": 0.4345705063303004, "grad_norm": 0.11951928585767746, "learning_rate": 5.6772350281545595e-05, "loss": 1.573, "step": 220224 }, { "epoch": 0.43463365229169687, "grad_norm": 0.11937609314918518, "learning_rate": 5.6766010057180896e-05, "loss": 1.5716, "step": 220256 }, { "epoch": 0.43469679825309326, "grad_norm": 0.11358558386564255, "learning_rate": 5.675966983281621e-05, "loss": 1.5756, "step": 220288 }, { "epoch": 0.4347599442144897, "grad_norm": 0.11427044868469238, "learning_rate": 5.6753329608451525e-05, "loss": 1.5769, "step": 220320 }, { "epoch": 0.43482309017588616, "grad_norm": 0.1126299500465393, "learning_rate": 5.674698938408683e-05, "loss": 1.573, "step": 220352 }, { "epoch": 0.4348862361372826, "grad_norm": 0.12942196428775787, "learning_rate": 5.674064915972215e-05, "loss": 1.5747, "step": 220384 }, { "epoch": 0.43494938209867906, "grad_norm": 0.11733990907669067, "learning_rate": 5.673430893535745e-05, "loss": 1.5634, "step": 220416 }, { "epoch": 0.4350125280600755, "grad_norm": 0.1132250502705574, "learning_rate": 5.672796871099276e-05, "loss": 1.5759, "step": 220448 }, { "epoch": 0.4350756740214719, "grad_norm": 0.1102571189403534, "learning_rate": 5.672162848662808e-05, "loss": 1.5608, "step": 220480 }, { "epoch": 0.43513881998286835, "grad_norm": 0.11909134685993195, "learning_rate": 5.6715288262263385e-05, "loss": 1.5681, "step": 220512 }, { "epoch": 0.4352019659442648, "grad_norm": 0.10762350261211395, "learning_rate": 5.670894803789869e-05, "loss": 1.5777, "step": 220544 }, { "epoch": 0.43526511190566125, "grad_norm": 0.10702694952487946, "learning_rate": 5.6702607813534e-05, "loss": 1.5708, "step": 220576 }, { "epoch": 0.4353282578670577, "grad_norm": 0.11215417087078094, "learning_rate": 5.6696267589169316e-05, "loss": 1.5607, "step": 220608 }, { "epoch": 0.43539140382845415, "grad_norm": 0.11287208646535873, "learning_rate": 5.668992736480463e-05, "loss": 1.565, "step": 220640 }, { "epoch": 0.43545454978985054, "grad_norm": 0.11429022997617722, "learning_rate": 5.668358714043993e-05, "loss": 1.5766, "step": 220672 }, { "epoch": 0.435517695751247, "grad_norm": 0.1195773184299469, "learning_rate": 5.6677246916075246e-05, "loss": 1.5743, "step": 220704 }, { "epoch": 0.43558084171264344, "grad_norm": 0.11271505802869797, "learning_rate": 5.6670906691710554e-05, "loss": 1.5755, "step": 220736 }, { "epoch": 0.4356439876740399, "grad_norm": 0.11656443774700165, "learning_rate": 5.666456646734587e-05, "loss": 1.5646, "step": 220768 }, { "epoch": 0.43570713363543634, "grad_norm": 0.11756814271211624, "learning_rate": 5.665822624298117e-05, "loss": 1.5645, "step": 220800 }, { "epoch": 0.4357702795968328, "grad_norm": 0.11012377589941025, "learning_rate": 5.6651886018616484e-05, "loss": 1.5781, "step": 220832 }, { "epoch": 0.43583342555822924, "grad_norm": 0.11887282133102417, "learning_rate": 5.66455457942518e-05, "loss": 1.5688, "step": 220864 }, { "epoch": 0.43589657151962563, "grad_norm": 0.11284030973911285, "learning_rate": 5.663920556988711e-05, "loss": 1.5656, "step": 220896 }, { "epoch": 0.4359597174810221, "grad_norm": 0.11179853230714798, "learning_rate": 5.6632865345522414e-05, "loss": 1.5605, "step": 220928 }, { "epoch": 0.43602286344241853, "grad_norm": 0.11030498892068863, "learning_rate": 5.662652512115773e-05, "loss": 1.5778, "step": 220960 }, { "epoch": 0.436086009403815, "grad_norm": 0.11525856703519821, "learning_rate": 5.6620184896793036e-05, "loss": 1.567, "step": 220992 }, { "epoch": 0.4361491553652114, "grad_norm": 0.11659888923168182, "learning_rate": 5.661384467242835e-05, "loss": 1.5755, "step": 221024 }, { "epoch": 0.4362123013266079, "grad_norm": 0.11479858309030533, "learning_rate": 5.6607504448063665e-05, "loss": 1.5653, "step": 221056 }, { "epoch": 0.43627544728800427, "grad_norm": 0.10940715670585632, "learning_rate": 5.6601164223698966e-05, "loss": 1.5795, "step": 221088 }, { "epoch": 0.4363385932494007, "grad_norm": 0.12156342715024948, "learning_rate": 5.659482399933428e-05, "loss": 1.5707, "step": 221120 }, { "epoch": 0.43640173921079717, "grad_norm": 0.11272057890892029, "learning_rate": 5.658848377496959e-05, "loss": 1.5699, "step": 221152 }, { "epoch": 0.4364648851721936, "grad_norm": 0.10929644852876663, "learning_rate": 5.65821435506049e-05, "loss": 1.575, "step": 221184 }, { "epoch": 0.43652803113359007, "grad_norm": 0.12085824459791183, "learning_rate": 5.6575803326240204e-05, "loss": 1.5657, "step": 221216 }, { "epoch": 0.4365911770949865, "grad_norm": 0.11290782690048218, "learning_rate": 5.656946310187552e-05, "loss": 1.5715, "step": 221248 }, { "epoch": 0.4366543230563829, "grad_norm": 0.11898967623710632, "learning_rate": 5.656312287751083e-05, "loss": 1.5683, "step": 221280 }, { "epoch": 0.43671746901777936, "grad_norm": 0.1148371770977974, "learning_rate": 5.655678265314615e-05, "loss": 1.5605, "step": 221312 }, { "epoch": 0.4367806149791758, "grad_norm": 0.11350247263908386, "learning_rate": 5.655044242878145e-05, "loss": 1.5729, "step": 221344 }, { "epoch": 0.43684376094057226, "grad_norm": 0.11910045146942139, "learning_rate": 5.6544102204416763e-05, "loss": 1.5643, "step": 221376 }, { "epoch": 0.4369069069019687, "grad_norm": 0.11791671067476273, "learning_rate": 5.653776198005207e-05, "loss": 1.5709, "step": 221408 }, { "epoch": 0.43697005286336515, "grad_norm": 0.10673581808805466, "learning_rate": 5.6531421755687386e-05, "loss": 1.5751, "step": 221440 }, { "epoch": 0.43703319882476155, "grad_norm": 0.11276498436927795, "learning_rate": 5.652508153132269e-05, "loss": 1.5636, "step": 221472 }, { "epoch": 0.437096344786158, "grad_norm": 0.11368037760257721, "learning_rate": 5.6518741306958e-05, "loss": 1.5747, "step": 221504 }, { "epoch": 0.43715949074755445, "grad_norm": 0.12395352125167847, "learning_rate": 5.6512401082593316e-05, "loss": 1.5554, "step": 221536 }, { "epoch": 0.4372226367089509, "grad_norm": 0.10890449583530426, "learning_rate": 5.6506060858228624e-05, "loss": 1.5659, "step": 221568 }, { "epoch": 0.43728578267034734, "grad_norm": 0.11172203719615936, "learning_rate": 5.649972063386393e-05, "loss": 1.5746, "step": 221600 }, { "epoch": 0.4373489286317438, "grad_norm": 0.10597629845142365, "learning_rate": 5.649338040949924e-05, "loss": 1.5841, "step": 221632 }, { "epoch": 0.4374120745931402, "grad_norm": 0.11421770602464676, "learning_rate": 5.6487040185134554e-05, "loss": 1.5672, "step": 221664 }, { "epoch": 0.43747522055453664, "grad_norm": 0.12479004263877869, "learning_rate": 5.648069996076987e-05, "loss": 1.561, "step": 221696 }, { "epoch": 0.4375383665159331, "grad_norm": 0.11543096601963043, "learning_rate": 5.647435973640518e-05, "loss": 1.5819, "step": 221728 }, { "epoch": 0.43760151247732954, "grad_norm": 0.11432941257953644, "learning_rate": 5.6468019512040484e-05, "loss": 1.5856, "step": 221760 }, { "epoch": 0.437664658438726, "grad_norm": 0.11683588474988937, "learning_rate": 5.646167928767579e-05, "loss": 1.5702, "step": 221792 }, { "epoch": 0.43772780440012243, "grad_norm": 0.105549655854702, "learning_rate": 5.6455339063311106e-05, "loss": 1.5828, "step": 221824 }, { "epoch": 0.4377909503615188, "grad_norm": 0.1098296195268631, "learning_rate": 5.644899883894642e-05, "loss": 1.5715, "step": 221856 }, { "epoch": 0.4378540963229153, "grad_norm": 0.11159711331129074, "learning_rate": 5.644265861458172e-05, "loss": 1.5694, "step": 221888 }, { "epoch": 0.4379172422843117, "grad_norm": 0.10768149048089981, "learning_rate": 5.6436318390217036e-05, "loss": 1.5761, "step": 221920 }, { "epoch": 0.4379803882457082, "grad_norm": 0.1107068657875061, "learning_rate": 5.642997816585235e-05, "loss": 1.5558, "step": 221952 }, { "epoch": 0.4380435342071046, "grad_norm": 0.11191987246274948, "learning_rate": 5.642363794148766e-05, "loss": 1.5752, "step": 221984 }, { "epoch": 0.4381066801685011, "grad_norm": 0.10686592757701874, "learning_rate": 5.641729771712297e-05, "loss": 1.566, "step": 222016 }, { "epoch": 0.43816982612989747, "grad_norm": 0.1115211769938469, "learning_rate": 5.6410957492758274e-05, "loss": 1.5667, "step": 222048 }, { "epoch": 0.4382329720912939, "grad_norm": 0.11843854933977127, "learning_rate": 5.640461726839359e-05, "loss": 1.5657, "step": 222080 }, { "epoch": 0.43829611805269036, "grad_norm": 0.1118033155798912, "learning_rate": 5.6398277044028904e-05, "loss": 1.5741, "step": 222112 }, { "epoch": 0.4383592640140868, "grad_norm": 0.11383120715618134, "learning_rate": 5.6391936819664205e-05, "loss": 1.5662, "step": 222144 }, { "epoch": 0.43842240997548326, "grad_norm": 0.1308271735906601, "learning_rate": 5.638559659529952e-05, "loss": 1.5664, "step": 222176 }, { "epoch": 0.4384855559368797, "grad_norm": 0.11619142442941666, "learning_rate": 5.637925637093483e-05, "loss": 1.5571, "step": 222208 }, { "epoch": 0.4385487018982761, "grad_norm": 0.10571594536304474, "learning_rate": 5.637291614657014e-05, "loss": 1.5656, "step": 222240 }, { "epoch": 0.43861184785967255, "grad_norm": 0.11335493624210358, "learning_rate": 5.636657592220544e-05, "loss": 1.5706, "step": 222272 }, { "epoch": 0.438674993821069, "grad_norm": 0.1135411486029625, "learning_rate": 5.636023569784076e-05, "loss": 1.5781, "step": 222304 }, { "epoch": 0.43873813978246545, "grad_norm": 0.11494645476341248, "learning_rate": 5.635389547347607e-05, "loss": 1.5787, "step": 222336 }, { "epoch": 0.4388012857438619, "grad_norm": 0.11322207748889923, "learning_rate": 5.6347555249111386e-05, "loss": 1.5772, "step": 222368 }, { "epoch": 0.43886443170525835, "grad_norm": 0.11661253124475479, "learning_rate": 5.6341215024746694e-05, "loss": 1.5762, "step": 222400 }, { "epoch": 0.43892757766665474, "grad_norm": 0.11384445428848267, "learning_rate": 5.6334874800381995e-05, "loss": 1.5707, "step": 222432 }, { "epoch": 0.4389907236280512, "grad_norm": 0.10868734866380692, "learning_rate": 5.632853457601731e-05, "loss": 1.5545, "step": 222464 }, { "epoch": 0.43905386958944764, "grad_norm": 0.10969744622707367, "learning_rate": 5.6322194351652624e-05, "loss": 1.574, "step": 222496 }, { "epoch": 0.4391170155508441, "grad_norm": 0.11876323819160461, "learning_rate": 5.631585412728794e-05, "loss": 1.5576, "step": 222528 }, { "epoch": 0.43918016151224054, "grad_norm": 0.11136127263307571, "learning_rate": 5.630951390292324e-05, "loss": 1.572, "step": 222560 }, { "epoch": 0.439243307473637, "grad_norm": 0.12057178467512131, "learning_rate": 5.6303173678558554e-05, "loss": 1.5735, "step": 222592 }, { "epoch": 0.43930645343503344, "grad_norm": 0.1075592190027237, "learning_rate": 5.629683345419386e-05, "loss": 1.5741, "step": 222624 }, { "epoch": 0.43936959939642983, "grad_norm": 0.11222700774669647, "learning_rate": 5.6290493229829177e-05, "loss": 1.5753, "step": 222656 }, { "epoch": 0.4394327453578263, "grad_norm": 0.1292693316936493, "learning_rate": 5.628415300546448e-05, "loss": 1.5765, "step": 222688 }, { "epoch": 0.43949589131922273, "grad_norm": 0.12229131162166595, "learning_rate": 5.627781278109979e-05, "loss": 1.5642, "step": 222720 }, { "epoch": 0.4395590372806192, "grad_norm": 0.10908335447311401, "learning_rate": 5.627147255673511e-05, "loss": 1.5639, "step": 222752 }, { "epoch": 0.43962218324201563, "grad_norm": 0.10967562347650528, "learning_rate": 5.626513233237042e-05, "loss": 1.5617, "step": 222784 }, { "epoch": 0.4396853292034121, "grad_norm": 0.10927215963602066, "learning_rate": 5.625879210800572e-05, "loss": 1.5724, "step": 222816 }, { "epoch": 0.43974847516480847, "grad_norm": 0.12057118117809296, "learning_rate": 5.625245188364103e-05, "loss": 1.5673, "step": 222848 }, { "epoch": 0.4398116211262049, "grad_norm": 0.10720538347959518, "learning_rate": 5.6246111659276345e-05, "loss": 1.5737, "step": 222880 }, { "epoch": 0.43987476708760137, "grad_norm": 0.10852023959159851, "learning_rate": 5.623977143491166e-05, "loss": 1.5879, "step": 222912 }, { "epoch": 0.4399379130489978, "grad_norm": 0.11148054152727127, "learning_rate": 5.623343121054696e-05, "loss": 1.5604, "step": 222944 }, { "epoch": 0.44000105901039427, "grad_norm": 0.1090625748038292, "learning_rate": 5.6227090986182275e-05, "loss": 1.5623, "step": 222976 }, { "epoch": 0.4400642049717907, "grad_norm": 0.11345841735601425, "learning_rate": 5.622075076181759e-05, "loss": 1.5586, "step": 223008 }, { "epoch": 0.4401273509331871, "grad_norm": 0.1110571026802063, "learning_rate": 5.62144105374529e-05, "loss": 1.5646, "step": 223040 }, { "epoch": 0.44019049689458356, "grad_norm": 0.1142256110906601, "learning_rate": 5.620807031308821e-05, "loss": 1.5691, "step": 223072 }, { "epoch": 0.44025364285598, "grad_norm": 0.11341311782598495, "learning_rate": 5.620173008872351e-05, "loss": 1.5704, "step": 223104 }, { "epoch": 0.44031678881737646, "grad_norm": 0.1143636628985405, "learning_rate": 5.619538986435883e-05, "loss": 1.5726, "step": 223136 }, { "epoch": 0.4403799347787729, "grad_norm": 0.11442039161920547, "learning_rate": 5.618904963999414e-05, "loss": 1.5664, "step": 223168 }, { "epoch": 0.44044308074016936, "grad_norm": 0.1149403378367424, "learning_rate": 5.6182709415629456e-05, "loss": 1.5679, "step": 223200 }, { "epoch": 0.44050622670156575, "grad_norm": 0.12742525339126587, "learning_rate": 5.617636919126476e-05, "loss": 1.5689, "step": 223232 }, { "epoch": 0.4405693726629622, "grad_norm": 0.10830821096897125, "learning_rate": 5.6170028966900065e-05, "loss": 1.5683, "step": 223264 }, { "epoch": 0.44063251862435865, "grad_norm": 0.11148485541343689, "learning_rate": 5.616368874253538e-05, "loss": 1.5631, "step": 223296 }, { "epoch": 0.4406956645857551, "grad_norm": 0.11369830369949341, "learning_rate": 5.6157348518170694e-05, "loss": 1.5668, "step": 223328 }, { "epoch": 0.44075881054715155, "grad_norm": 0.11327175796031952, "learning_rate": 5.6151008293805995e-05, "loss": 1.5615, "step": 223360 }, { "epoch": 0.440821956508548, "grad_norm": 0.11536484211683273, "learning_rate": 5.614466806944131e-05, "loss": 1.5665, "step": 223392 }, { "epoch": 0.4408851024699444, "grad_norm": 0.11269336193799973, "learning_rate": 5.6138327845076624e-05, "loss": 1.5691, "step": 223424 }, { "epoch": 0.44094824843134084, "grad_norm": 0.10846231132745743, "learning_rate": 5.613198762071193e-05, "loss": 1.5747, "step": 223456 }, { "epoch": 0.4410113943927373, "grad_norm": 0.11808638274669647, "learning_rate": 5.612564739634723e-05, "loss": 1.5587, "step": 223488 }, { "epoch": 0.44107454035413374, "grad_norm": 0.10712187737226486, "learning_rate": 5.611930717198255e-05, "loss": 1.5672, "step": 223520 }, { "epoch": 0.4411376863155302, "grad_norm": 0.1203143373131752, "learning_rate": 5.611296694761786e-05, "loss": 1.5547, "step": 223552 }, { "epoch": 0.44120083227692664, "grad_norm": 0.1094353124499321, "learning_rate": 5.610662672325318e-05, "loss": 1.5624, "step": 223584 }, { "epoch": 0.44126397823832303, "grad_norm": 0.11553500592708588, "learning_rate": 5.610028649888848e-05, "loss": 1.5741, "step": 223616 }, { "epoch": 0.4413271241997195, "grad_norm": 0.11306028068065643, "learning_rate": 5.609394627452379e-05, "loss": 1.5613, "step": 223648 }, { "epoch": 0.4413902701611159, "grad_norm": 0.11836506426334381, "learning_rate": 5.60876060501591e-05, "loss": 1.5698, "step": 223680 }, { "epoch": 0.4414534161225124, "grad_norm": 0.11492486298084259, "learning_rate": 5.6081265825794415e-05, "loss": 1.5777, "step": 223712 }, { "epoch": 0.4415165620839088, "grad_norm": 0.11137628555297852, "learning_rate": 5.607492560142973e-05, "loss": 1.5652, "step": 223744 }, { "epoch": 0.4415797080453053, "grad_norm": 0.10922759771347046, "learning_rate": 5.606858537706503e-05, "loss": 1.5645, "step": 223776 }, { "epoch": 0.44164285400670167, "grad_norm": 0.12418708950281143, "learning_rate": 5.6062245152700345e-05, "loss": 1.5639, "step": 223808 }, { "epoch": 0.4417059999680981, "grad_norm": 0.11384117603302002, "learning_rate": 5.605590492833566e-05, "loss": 1.5717, "step": 223840 }, { "epoch": 0.44176914592949457, "grad_norm": 0.12185539305210114, "learning_rate": 5.604956470397097e-05, "loss": 1.5631, "step": 223872 }, { "epoch": 0.441832291890891, "grad_norm": 0.11555115133523941, "learning_rate": 5.604322447960627e-05, "loss": 1.575, "step": 223904 }, { "epoch": 0.44189543785228746, "grad_norm": 0.11330656707286835, "learning_rate": 5.603688425524158e-05, "loss": 1.5627, "step": 223936 }, { "epoch": 0.4419585838136839, "grad_norm": 0.11715026199817657, "learning_rate": 5.60305440308769e-05, "loss": 1.5715, "step": 223968 }, { "epoch": 0.4420217297750803, "grad_norm": 0.11196105182170868, "learning_rate": 5.602420380651221e-05, "loss": 1.5772, "step": 224000 }, { "epoch": 0.44208487573647676, "grad_norm": 0.11672034114599228, "learning_rate": 5.601786358214751e-05, "loss": 1.5755, "step": 224032 }, { "epoch": 0.4421480216978732, "grad_norm": 0.11318805813789368, "learning_rate": 5.601152335778283e-05, "loss": 1.5604, "step": 224064 }, { "epoch": 0.44221116765926965, "grad_norm": 0.11469605565071106, "learning_rate": 5.6005183133418135e-05, "loss": 1.5569, "step": 224096 }, { "epoch": 0.4422743136206661, "grad_norm": 0.10346221923828125, "learning_rate": 5.599884290905345e-05, "loss": 1.5855, "step": 224128 }, { "epoch": 0.44233745958206255, "grad_norm": 0.10945054143667221, "learning_rate": 5.599250268468875e-05, "loss": 1.5611, "step": 224160 }, { "epoch": 0.442400605543459, "grad_norm": 0.11730410158634186, "learning_rate": 5.5986162460324066e-05, "loss": 1.5697, "step": 224192 }, { "epoch": 0.4424637515048554, "grad_norm": 0.1142803505063057, "learning_rate": 5.597982223595938e-05, "loss": 1.5713, "step": 224224 }, { "epoch": 0.44252689746625185, "grad_norm": 0.1277397722005844, "learning_rate": 5.597348201159469e-05, "loss": 1.5721, "step": 224256 }, { "epoch": 0.4425900434276483, "grad_norm": 0.11798173189163208, "learning_rate": 5.5967141787229996e-05, "loss": 1.5756, "step": 224288 }, { "epoch": 0.44265318938904474, "grad_norm": 0.11593777686357498, "learning_rate": 5.5960801562865303e-05, "loss": 1.5678, "step": 224320 }, { "epoch": 0.4427163353504412, "grad_norm": 0.1148558184504509, "learning_rate": 5.595446133850062e-05, "loss": 1.5768, "step": 224352 }, { "epoch": 0.44277948131183764, "grad_norm": 0.11958102136850357, "learning_rate": 5.594812111413593e-05, "loss": 1.5714, "step": 224384 }, { "epoch": 0.44284262727323404, "grad_norm": 0.11051355302333832, "learning_rate": 5.594178088977125e-05, "loss": 1.5574, "step": 224416 }, { "epoch": 0.4429057732346305, "grad_norm": 0.11384843289852142, "learning_rate": 5.593544066540655e-05, "loss": 1.5627, "step": 224448 }, { "epoch": 0.44296891919602693, "grad_norm": 0.11094524711370468, "learning_rate": 5.592910044104186e-05, "loss": 1.571, "step": 224480 }, { "epoch": 0.4430320651574234, "grad_norm": 0.11181817203760147, "learning_rate": 5.592276021667717e-05, "loss": 1.5768, "step": 224512 }, { "epoch": 0.44309521111881983, "grad_norm": 0.11415065079927444, "learning_rate": 5.5916419992312485e-05, "loss": 1.5852, "step": 224544 }, { "epoch": 0.4431583570802163, "grad_norm": 0.11386341601610184, "learning_rate": 5.5910079767947786e-05, "loss": 1.5603, "step": 224576 }, { "epoch": 0.4432215030416127, "grad_norm": 0.10986695438623428, "learning_rate": 5.59037395435831e-05, "loss": 1.5747, "step": 224608 }, { "epoch": 0.4432846490030091, "grad_norm": 0.10902444273233414, "learning_rate": 5.5897399319218415e-05, "loss": 1.574, "step": 224640 }, { "epoch": 0.4433477949644056, "grad_norm": 0.11428419500589371, "learning_rate": 5.589105909485372e-05, "loss": 1.5682, "step": 224672 }, { "epoch": 0.443410940925802, "grad_norm": 0.11163070797920227, "learning_rate": 5.588471887048903e-05, "loss": 1.5745, "step": 224704 }, { "epoch": 0.44347408688719847, "grad_norm": 0.11591023206710815, "learning_rate": 5.587837864612434e-05, "loss": 1.5763, "step": 224736 }, { "epoch": 0.4435372328485949, "grad_norm": 0.11970361322164536, "learning_rate": 5.587203842175965e-05, "loss": 1.5783, "step": 224768 }, { "epoch": 0.4436003788099913, "grad_norm": 0.12001791596412659, "learning_rate": 5.586569819739497e-05, "loss": 1.5835, "step": 224800 }, { "epoch": 0.44366352477138776, "grad_norm": 0.11000420898199081, "learning_rate": 5.585935797303027e-05, "loss": 1.5723, "step": 224832 }, { "epoch": 0.4437266707327842, "grad_norm": 0.1081654354929924, "learning_rate": 5.585301774866558e-05, "loss": 1.5644, "step": 224864 }, { "epoch": 0.44378981669418066, "grad_norm": 0.1101280227303505, "learning_rate": 5.584667752430089e-05, "loss": 1.5772, "step": 224896 }, { "epoch": 0.4438529626555771, "grad_norm": 0.12191268056631088, "learning_rate": 5.5840337299936206e-05, "loss": 1.5798, "step": 224928 }, { "epoch": 0.44391610861697356, "grad_norm": 0.11502295732498169, "learning_rate": 5.5833997075571507e-05, "loss": 1.5643, "step": 224960 }, { "epoch": 0.44397925457836995, "grad_norm": 0.11195903271436691, "learning_rate": 5.582765685120682e-05, "loss": 1.5854, "step": 224992 }, { "epoch": 0.4440424005397664, "grad_norm": 0.1128184124827385, "learning_rate": 5.5821316626842136e-05, "loss": 1.5732, "step": 225024 }, { "epoch": 0.44410554650116285, "grad_norm": 0.12109333276748657, "learning_rate": 5.581497640247745e-05, "loss": 1.5761, "step": 225056 }, { "epoch": 0.4441686924625593, "grad_norm": 0.10844786465167999, "learning_rate": 5.580863617811276e-05, "loss": 1.5692, "step": 225088 }, { "epoch": 0.44423183842395575, "grad_norm": 0.11165008693933487, "learning_rate": 5.5802295953748066e-05, "loss": 1.5773, "step": 225120 }, { "epoch": 0.4442949843853522, "grad_norm": 0.11832918226718903, "learning_rate": 5.5795955729383374e-05, "loss": 1.5771, "step": 225152 }, { "epoch": 0.4443581303467486, "grad_norm": 0.12097255140542984, "learning_rate": 5.578961550501869e-05, "loss": 1.5842, "step": 225184 }, { "epoch": 0.44442127630814504, "grad_norm": 0.11156468838453293, "learning_rate": 5.5783275280654e-05, "loss": 1.5732, "step": 225216 }, { "epoch": 0.4444844222695415, "grad_norm": 0.11373218894004822, "learning_rate": 5.5776935056289304e-05, "loss": 1.5621, "step": 225248 }, { "epoch": 0.44454756823093794, "grad_norm": 0.1159541979432106, "learning_rate": 5.577059483192462e-05, "loss": 1.56, "step": 225280 }, { "epoch": 0.4446107141923344, "grad_norm": 0.10900960117578506, "learning_rate": 5.5764254607559926e-05, "loss": 1.5613, "step": 225312 }, { "epoch": 0.44467386015373084, "grad_norm": 0.10891033709049225, "learning_rate": 5.575791438319524e-05, "loss": 1.5664, "step": 225344 }, { "epoch": 0.44473700611512723, "grad_norm": 0.11245907843112946, "learning_rate": 5.575157415883054e-05, "loss": 1.5779, "step": 225376 }, { "epoch": 0.4448001520765237, "grad_norm": 0.10898572951555252, "learning_rate": 5.5745233934465856e-05, "loss": 1.5696, "step": 225408 }, { "epoch": 0.44486329803792013, "grad_norm": 0.11658014357089996, "learning_rate": 5.573889371010117e-05, "loss": 1.5656, "step": 225440 }, { "epoch": 0.4449264439993166, "grad_norm": 0.10766708850860596, "learning_rate": 5.5732553485736485e-05, "loss": 1.5705, "step": 225472 }, { "epoch": 0.44498958996071303, "grad_norm": 0.12098678946495056, "learning_rate": 5.5726213261371786e-05, "loss": 1.5676, "step": 225504 }, { "epoch": 0.4450527359221095, "grad_norm": 0.1107996255159378, "learning_rate": 5.57198730370071e-05, "loss": 1.5779, "step": 225536 }, { "epoch": 0.44511588188350587, "grad_norm": 0.11110315471887589, "learning_rate": 5.571353281264241e-05, "loss": 1.5708, "step": 225568 }, { "epoch": 0.4451790278449023, "grad_norm": 0.11498510837554932, "learning_rate": 5.570719258827772e-05, "loss": 1.5671, "step": 225600 }, { "epoch": 0.44524217380629877, "grad_norm": 0.11473532766103745, "learning_rate": 5.5700852363913024e-05, "loss": 1.5821, "step": 225632 }, { "epoch": 0.4453053197676952, "grad_norm": 0.11622277647256851, "learning_rate": 5.569451213954834e-05, "loss": 1.5739, "step": 225664 }, { "epoch": 0.44536846572909167, "grad_norm": 0.10744191706180573, "learning_rate": 5.5688171915183653e-05, "loss": 1.5738, "step": 225696 }, { "epoch": 0.4454316116904881, "grad_norm": 0.11692357063293457, "learning_rate": 5.568183169081896e-05, "loss": 1.5703, "step": 225728 }, { "epoch": 0.4454947576518845, "grad_norm": 0.10964972525835037, "learning_rate": 5.5675491466454276e-05, "loss": 1.5685, "step": 225760 }, { "epoch": 0.44555790361328096, "grad_norm": 0.11665034294128418, "learning_rate": 5.566915124208958e-05, "loss": 1.5583, "step": 225792 }, { "epoch": 0.4456210495746774, "grad_norm": 0.11089098453521729, "learning_rate": 5.566281101772489e-05, "loss": 1.5808, "step": 225824 }, { "epoch": 0.44568419553607386, "grad_norm": 0.12039243429899216, "learning_rate": 5.5656470793360206e-05, "loss": 1.5795, "step": 225856 }, { "epoch": 0.4457473414974703, "grad_norm": 0.12202098965644836, "learning_rate": 5.565013056899552e-05, "loss": 1.5655, "step": 225888 }, { "epoch": 0.44581048745886676, "grad_norm": 0.12360382825136185, "learning_rate": 5.564379034463082e-05, "loss": 1.5789, "step": 225920 }, { "epoch": 0.4458736334202632, "grad_norm": 0.10443336516618729, "learning_rate": 5.563745012026613e-05, "loss": 1.5763, "step": 225952 }, { "epoch": 0.4459367793816596, "grad_norm": 0.10910505801439285, "learning_rate": 5.5631109895901444e-05, "loss": 1.5589, "step": 225984 }, { "epoch": 0.44599992534305605, "grad_norm": 0.11002499610185623, "learning_rate": 5.562476967153676e-05, "loss": 1.5648, "step": 226016 }, { "epoch": 0.4460630713044525, "grad_norm": 0.12160021811723709, "learning_rate": 5.561842944717206e-05, "loss": 1.5655, "step": 226048 }, { "epoch": 0.44612621726584895, "grad_norm": 0.11697404831647873, "learning_rate": 5.5612089222807374e-05, "loss": 1.5684, "step": 226080 }, { "epoch": 0.4461893632272454, "grad_norm": 0.1245870590209961, "learning_rate": 5.560574899844269e-05, "loss": 1.573, "step": 226112 }, { "epoch": 0.44625250918864184, "grad_norm": 0.11575856059789658, "learning_rate": 5.5599408774077996e-05, "loss": 1.5684, "step": 226144 }, { "epoch": 0.44631565515003824, "grad_norm": 0.11695855110883713, "learning_rate": 5.5593068549713304e-05, "loss": 1.5709, "step": 226176 }, { "epoch": 0.4463788011114347, "grad_norm": 0.1154797151684761, "learning_rate": 5.558672832534861e-05, "loss": 1.5726, "step": 226208 }, { "epoch": 0.44644194707283114, "grad_norm": 0.12129583954811096, "learning_rate": 5.5580388100983926e-05, "loss": 1.5703, "step": 226240 }, { "epoch": 0.4465050930342276, "grad_norm": 0.1199704259634018, "learning_rate": 5.557404787661924e-05, "loss": 1.57, "step": 226272 }, { "epoch": 0.44656823899562403, "grad_norm": 0.11570403724908829, "learning_rate": 5.556770765225454e-05, "loss": 1.5578, "step": 226304 }, { "epoch": 0.4466313849570205, "grad_norm": 0.10727885365486145, "learning_rate": 5.5561367427889857e-05, "loss": 1.5598, "step": 226336 }, { "epoch": 0.4466945309184169, "grad_norm": 0.12253036350011826, "learning_rate": 5.5555027203525164e-05, "loss": 1.5698, "step": 226368 }, { "epoch": 0.4467576768798133, "grad_norm": 0.10797633230686188, "learning_rate": 5.554868697916048e-05, "loss": 1.5617, "step": 226400 }, { "epoch": 0.4468208228412098, "grad_norm": 0.11121440678834915, "learning_rate": 5.5542346754795794e-05, "loss": 1.563, "step": 226432 }, { "epoch": 0.4468839688026062, "grad_norm": 0.1147451102733612, "learning_rate": 5.5536006530431095e-05, "loss": 1.5654, "step": 226464 }, { "epoch": 0.4469471147640027, "grad_norm": 0.10988610237836838, "learning_rate": 5.552966630606641e-05, "loss": 1.565, "step": 226496 }, { "epoch": 0.4470102607253991, "grad_norm": 0.11073459684848785, "learning_rate": 5.5523326081701724e-05, "loss": 1.579, "step": 226528 }, { "epoch": 0.4470734066867955, "grad_norm": 0.12059857696294785, "learning_rate": 5.551698585733703e-05, "loss": 1.5728, "step": 226560 }, { "epoch": 0.44713655264819196, "grad_norm": 0.11194503307342529, "learning_rate": 5.551064563297233e-05, "loss": 1.5695, "step": 226592 }, { "epoch": 0.4471996986095884, "grad_norm": 0.11144059896469116, "learning_rate": 5.550430540860765e-05, "loss": 1.5699, "step": 226624 }, { "epoch": 0.44726284457098486, "grad_norm": 0.11113991588354111, "learning_rate": 5.549796518424296e-05, "loss": 1.5815, "step": 226656 }, { "epoch": 0.4473259905323813, "grad_norm": 0.1123872920870781, "learning_rate": 5.5491624959878276e-05, "loss": 1.5779, "step": 226688 }, { "epoch": 0.44738913649377776, "grad_norm": 0.11324728280305862, "learning_rate": 5.548528473551358e-05, "loss": 1.5608, "step": 226720 }, { "epoch": 0.44745228245517416, "grad_norm": 0.11159668117761612, "learning_rate": 5.547894451114889e-05, "loss": 1.5596, "step": 226752 }, { "epoch": 0.4475154284165706, "grad_norm": 0.10906250774860382, "learning_rate": 5.54726042867842e-05, "loss": 1.5752, "step": 226784 }, { "epoch": 0.44757857437796705, "grad_norm": 0.10427848249673843, "learning_rate": 5.5466264062419514e-05, "loss": 1.5719, "step": 226816 }, { "epoch": 0.4476417203393635, "grad_norm": 0.11987894773483276, "learning_rate": 5.5459923838054815e-05, "loss": 1.585, "step": 226848 }, { "epoch": 0.44770486630075995, "grad_norm": 0.112233005464077, "learning_rate": 5.545358361369013e-05, "loss": 1.5783, "step": 226880 }, { "epoch": 0.4477680122621564, "grad_norm": 0.11146263778209686, "learning_rate": 5.5447243389325444e-05, "loss": 1.5782, "step": 226912 }, { "epoch": 0.4478311582235528, "grad_norm": 0.10751260817050934, "learning_rate": 5.544090316496076e-05, "loss": 1.5837, "step": 226944 }, { "epoch": 0.44789430418494924, "grad_norm": 0.1107482835650444, "learning_rate": 5.543456294059606e-05, "loss": 1.5846, "step": 226976 }, { "epoch": 0.4479574501463457, "grad_norm": 0.10877012461423874, "learning_rate": 5.542822271623137e-05, "loss": 1.5757, "step": 227008 }, { "epoch": 0.44802059610774214, "grad_norm": 0.10649998486042023, "learning_rate": 5.542188249186668e-05, "loss": 1.5527, "step": 227040 }, { "epoch": 0.4480837420691386, "grad_norm": 0.11101628094911575, "learning_rate": 5.5415542267502e-05, "loss": 1.5585, "step": 227072 }, { "epoch": 0.44814688803053504, "grad_norm": 0.10834810137748718, "learning_rate": 5.540920204313731e-05, "loss": 1.5686, "step": 227104 }, { "epoch": 0.44821003399193143, "grad_norm": 0.11642777174711227, "learning_rate": 5.540286181877261e-05, "loss": 1.5727, "step": 227136 }, { "epoch": 0.4482731799533279, "grad_norm": 0.11910204589366913, "learning_rate": 5.539652159440793e-05, "loss": 1.5733, "step": 227168 }, { "epoch": 0.44833632591472433, "grad_norm": 0.12381482869386673, "learning_rate": 5.5390181370043235e-05, "loss": 1.5716, "step": 227200 }, { "epoch": 0.4483994718761208, "grad_norm": 0.112065888941288, "learning_rate": 5.538384114567855e-05, "loss": 1.5726, "step": 227232 }, { "epoch": 0.44846261783751723, "grad_norm": 0.12411680072546005, "learning_rate": 5.537750092131385e-05, "loss": 1.5627, "step": 227264 }, { "epoch": 0.4485257637989137, "grad_norm": 0.11828617006540298, "learning_rate": 5.5371160696949165e-05, "loss": 1.5684, "step": 227296 }, { "epoch": 0.4485889097603101, "grad_norm": 0.11670609563589096, "learning_rate": 5.536482047258448e-05, "loss": 1.5606, "step": 227328 }, { "epoch": 0.4486520557217065, "grad_norm": 0.10628534853458405, "learning_rate": 5.5358480248219794e-05, "loss": 1.5666, "step": 227360 }, { "epoch": 0.44871520168310297, "grad_norm": 0.11339747905731201, "learning_rate": 5.5352140023855095e-05, "loss": 1.5715, "step": 227392 }, { "epoch": 0.4487783476444994, "grad_norm": 0.1146375760436058, "learning_rate": 5.53457997994904e-05, "loss": 1.5642, "step": 227424 }, { "epoch": 0.44884149360589587, "grad_norm": 0.1137080267071724, "learning_rate": 5.533945957512572e-05, "loss": 1.5756, "step": 227456 }, { "epoch": 0.4489046395672923, "grad_norm": 0.11582580208778381, "learning_rate": 5.533311935076103e-05, "loss": 1.5603, "step": 227488 }, { "epoch": 0.4489677855286887, "grad_norm": 0.11359766870737076, "learning_rate": 5.532677912639633e-05, "loss": 1.5597, "step": 227520 }, { "epoch": 0.44903093149008516, "grad_norm": 0.11395041644573212, "learning_rate": 5.532043890203165e-05, "loss": 1.5744, "step": 227552 }, { "epoch": 0.4490940774514816, "grad_norm": 0.11050038039684296, "learning_rate": 5.531409867766696e-05, "loss": 1.5772, "step": 227584 }, { "epoch": 0.44915722341287806, "grad_norm": 0.117493636906147, "learning_rate": 5.530775845330227e-05, "loss": 1.5722, "step": 227616 }, { "epoch": 0.4492203693742745, "grad_norm": 0.11625221371650696, "learning_rate": 5.530141822893757e-05, "loss": 1.5841, "step": 227648 }, { "epoch": 0.44928351533567096, "grad_norm": 0.11636626720428467, "learning_rate": 5.5295078004572885e-05, "loss": 1.5579, "step": 227680 }, { "epoch": 0.4493466612970674, "grad_norm": 0.12009144574403763, "learning_rate": 5.52887377802082e-05, "loss": 1.5821, "step": 227712 }, { "epoch": 0.4494098072584638, "grad_norm": 0.11495005339384079, "learning_rate": 5.5282397555843514e-05, "loss": 1.5779, "step": 227744 }, { "epoch": 0.44947295321986025, "grad_norm": 0.11084603518247604, "learning_rate": 5.527605733147882e-05, "loss": 1.5736, "step": 227776 }, { "epoch": 0.4495360991812567, "grad_norm": 0.10887359082698822, "learning_rate": 5.526971710711413e-05, "loss": 1.5741, "step": 227808 }, { "epoch": 0.44959924514265315, "grad_norm": 0.114344023168087, "learning_rate": 5.526337688274944e-05, "loss": 1.5728, "step": 227840 }, { "epoch": 0.4496623911040496, "grad_norm": 0.11170511692762375, "learning_rate": 5.525703665838475e-05, "loss": 1.559, "step": 227872 }, { "epoch": 0.44972553706544605, "grad_norm": 0.11704278737306595, "learning_rate": 5.525069643402007e-05, "loss": 1.568, "step": 227904 }, { "epoch": 0.44978868302684244, "grad_norm": 0.10768473893404007, "learning_rate": 5.524435620965537e-05, "loss": 1.562, "step": 227936 }, { "epoch": 0.4498518289882389, "grad_norm": 0.12023501098155975, "learning_rate": 5.523801598529068e-05, "loss": 1.5663, "step": 227968 }, { "epoch": 0.44991497494963534, "grad_norm": 0.11995047330856323, "learning_rate": 5.5231675760926e-05, "loss": 1.5811, "step": 228000 }, { "epoch": 0.4499781209110318, "grad_norm": 0.1100674718618393, "learning_rate": 5.5225335536561305e-05, "loss": 1.5721, "step": 228032 }, { "epoch": 0.45004126687242824, "grad_norm": 0.11524344235658646, "learning_rate": 5.5218995312196606e-05, "loss": 1.5699, "step": 228064 }, { "epoch": 0.4501044128338247, "grad_norm": 0.11840048432350159, "learning_rate": 5.521265508783192e-05, "loss": 1.5662, "step": 228096 }, { "epoch": 0.4501675587952211, "grad_norm": 0.11256358027458191, "learning_rate": 5.5206314863467235e-05, "loss": 1.5577, "step": 228128 }, { "epoch": 0.45023070475661753, "grad_norm": 0.12191502004861832, "learning_rate": 5.519997463910255e-05, "loss": 1.5737, "step": 228160 }, { "epoch": 0.450293850718014, "grad_norm": 0.11852116882801056, "learning_rate": 5.519363441473785e-05, "loss": 1.5714, "step": 228192 }, { "epoch": 0.4503569966794104, "grad_norm": 0.12248721718788147, "learning_rate": 5.5187294190373165e-05, "loss": 1.5668, "step": 228224 }, { "epoch": 0.4504201426408069, "grad_norm": 0.11424346268177032, "learning_rate": 5.518095396600847e-05, "loss": 1.5646, "step": 228256 }, { "epoch": 0.4504832886022033, "grad_norm": 0.11144455522298813, "learning_rate": 5.517461374164379e-05, "loss": 1.5616, "step": 228288 }, { "epoch": 0.4505464345635997, "grad_norm": 0.11450143158435822, "learning_rate": 5.516827351727909e-05, "loss": 1.5719, "step": 228320 }, { "epoch": 0.45060958052499617, "grad_norm": 0.11503936350345612, "learning_rate": 5.51619332929144e-05, "loss": 1.5632, "step": 228352 }, { "epoch": 0.4506727264863926, "grad_norm": 0.11710304766893387, "learning_rate": 5.515559306854972e-05, "loss": 1.577, "step": 228384 }, { "epoch": 0.45073587244778907, "grad_norm": 0.1098899394273758, "learning_rate": 5.5149252844185025e-05, "loss": 1.5784, "step": 228416 }, { "epoch": 0.4507990184091855, "grad_norm": 0.10603275895118713, "learning_rate": 5.514291261982034e-05, "loss": 1.5683, "step": 228448 }, { "epoch": 0.45086216437058196, "grad_norm": 0.1132390946149826, "learning_rate": 5.513657239545564e-05, "loss": 1.5684, "step": 228480 }, { "epoch": 0.45092531033197836, "grad_norm": 0.11591226607561111, "learning_rate": 5.5130232171090955e-05, "loss": 1.5903, "step": 228512 }, { "epoch": 0.4509884562933748, "grad_norm": 0.10810161381959915, "learning_rate": 5.512389194672627e-05, "loss": 1.5749, "step": 228544 }, { "epoch": 0.45105160225477126, "grad_norm": 0.12791648507118225, "learning_rate": 5.5117551722361585e-05, "loss": 1.5773, "step": 228576 }, { "epoch": 0.4511147482161677, "grad_norm": 0.112993985414505, "learning_rate": 5.5111211497996886e-05, "loss": 1.5816, "step": 228608 }, { "epoch": 0.45117789417756415, "grad_norm": 0.11797860264778137, "learning_rate": 5.51048712736322e-05, "loss": 1.5708, "step": 228640 }, { "epoch": 0.4512410401389606, "grad_norm": 0.11705265194177628, "learning_rate": 5.509853104926751e-05, "loss": 1.5576, "step": 228672 }, { "epoch": 0.451304186100357, "grad_norm": 0.11175525188446045, "learning_rate": 5.509219082490282e-05, "loss": 1.5717, "step": 228704 }, { "epoch": 0.45136733206175345, "grad_norm": 0.11266481131315231, "learning_rate": 5.5085850600538124e-05, "loss": 1.5624, "step": 228736 }, { "epoch": 0.4514304780231499, "grad_norm": 0.11547210067510605, "learning_rate": 5.507951037617344e-05, "loss": 1.5707, "step": 228768 }, { "epoch": 0.45149362398454634, "grad_norm": 0.12393493950366974, "learning_rate": 5.507317015180875e-05, "loss": 1.5673, "step": 228800 }, { "epoch": 0.4515567699459428, "grad_norm": 0.11057010293006897, "learning_rate": 5.506682992744406e-05, "loss": 1.5623, "step": 228832 }, { "epoch": 0.45161991590733924, "grad_norm": 0.10849099606275558, "learning_rate": 5.506048970307937e-05, "loss": 1.5706, "step": 228864 }, { "epoch": 0.45168306186873564, "grad_norm": 0.11320403218269348, "learning_rate": 5.5054149478714676e-05, "loss": 1.5733, "step": 228896 }, { "epoch": 0.4517462078301321, "grad_norm": 0.11227769404649734, "learning_rate": 5.504780925434999e-05, "loss": 1.57, "step": 228928 }, { "epoch": 0.45180935379152853, "grad_norm": 0.10526099056005478, "learning_rate": 5.5041469029985305e-05, "loss": 1.5743, "step": 228960 }, { "epoch": 0.451872499752925, "grad_norm": 0.1137545257806778, "learning_rate": 5.5035128805620606e-05, "loss": 1.5719, "step": 228992 }, { "epoch": 0.45193564571432143, "grad_norm": 0.11737561225891113, "learning_rate": 5.502878858125592e-05, "loss": 1.5638, "step": 229024 }, { "epoch": 0.4519987916757179, "grad_norm": 0.11427541822195053, "learning_rate": 5.5022448356891235e-05, "loss": 1.5721, "step": 229056 }, { "epoch": 0.4520619376371143, "grad_norm": 0.10818526893854141, "learning_rate": 5.501610813252654e-05, "loss": 1.5639, "step": 229088 }, { "epoch": 0.4521250835985107, "grad_norm": 0.1136135458946228, "learning_rate": 5.500976790816186e-05, "loss": 1.5714, "step": 229120 }, { "epoch": 0.4521882295599072, "grad_norm": 0.10835583508014679, "learning_rate": 5.500342768379716e-05, "loss": 1.5579, "step": 229152 }, { "epoch": 0.4522513755213036, "grad_norm": 0.10844231396913528, "learning_rate": 5.499708745943247e-05, "loss": 1.5799, "step": 229184 }, { "epoch": 0.45231452148270007, "grad_norm": 0.11937616765499115, "learning_rate": 5.499074723506779e-05, "loss": 1.562, "step": 229216 }, { "epoch": 0.4523776674440965, "grad_norm": 0.11806692183017731, "learning_rate": 5.4984407010703096e-05, "loss": 1.5561, "step": 229248 }, { "epoch": 0.45244081340549297, "grad_norm": 0.1146462932229042, "learning_rate": 5.49780667863384e-05, "loss": 1.5676, "step": 229280 }, { "epoch": 0.45250395936688936, "grad_norm": 0.11114591360092163, "learning_rate": 5.497172656197371e-05, "loss": 1.5774, "step": 229312 }, { "epoch": 0.4525671053282858, "grad_norm": 0.11399415135383606, "learning_rate": 5.4965386337609026e-05, "loss": 1.5711, "step": 229344 }, { "epoch": 0.45263025128968226, "grad_norm": 0.11704014241695404, "learning_rate": 5.495904611324434e-05, "loss": 1.5633, "step": 229376 } ], "logging_steps": 32, "max_steps": 506762, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4096, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 5.505181661216427e+19, "train_batch_size": 15, "trial_name": null, "trial_params": null }