{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3703.703703703704, "eval_steps": 500, "global_step": 60000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.06172839506172839, "learning_rate": 5e-09, "loss": 8.604, "step": 1 }, { "epoch": 1.2345679012345678, "learning_rate": 1e-07, "loss": 8.0873, "step": 20 }, { "epoch": 2.4691358024691357, "learning_rate": 2e-07, "loss": 7.8074, "step": 40 }, { "epoch": 3.7037037037037037, "learning_rate": 3e-07, "loss": 7.0763, "step": 60 }, { "epoch": 4.938271604938271, "learning_rate": 4e-07, "loss": 6.2969, "step": 80 }, { "epoch": 6.172839506172839, "learning_rate": 5e-07, "loss": 5.9334, "step": 100 }, { "epoch": 7.407407407407407, "learning_rate": 6e-07, "loss": 5.4919, "step": 120 }, { "epoch": 8.641975308641975, "learning_rate": 7e-07, "loss": 5.0659, "step": 140 }, { "epoch": 9.876543209876543, "learning_rate": 8e-07, "loss": 4.9089, "step": 160 }, { "epoch": 11.11111111111111, "learning_rate": 9e-07, "loss": 4.7144, "step": 180 }, { "epoch": 12.345679012345679, "learning_rate": 1e-06, "loss": 4.7373, "step": 200 }, { "epoch": 13.580246913580247, "learning_rate": 9.997245010407738e-07, "loss": 4.3848, "step": 220 }, { "epoch": 14.814814814814815, "learning_rate": 9.994490020815477e-07, "loss": 4.3457, "step": 240 }, { "epoch": 16.049382716049383, "learning_rate": 9.991735031223215e-07, "loss": 4.2217, "step": 260 }, { "epoch": 17.28395061728395, "learning_rate": 9.988980041630952e-07, "loss": 4.1273, "step": 280 }, { "epoch": 18.51851851851852, "learning_rate": 9.986225052038692e-07, "loss": 4.0123, "step": 300 }, { "epoch": 19.753086419753085, "learning_rate": 9.98347006244643e-07, "loss": 4.0784, "step": 320 }, { "epoch": 20.987654320987655, "learning_rate": 9.98071507285417e-07, "loss": 3.9415, "step": 340 }, { "epoch": 22.22222222222222, "learning_rate": 9.977960083261906e-07, "loss": 3.843, "step": 360 }, { "epoch": 23.45679012345679, "learning_rate": 9.975205093669647e-07, "loss": 3.8174, "step": 380 }, { "epoch": 24.691358024691358, "learning_rate": 9.972450104077383e-07, "loss": 3.7376, "step": 400 }, { "epoch": 25.925925925925927, "learning_rate": 9.969695114485122e-07, "loss": 3.7876, "step": 420 }, { "epoch": 27.160493827160494, "learning_rate": 9.96694012489286e-07, "loss": 3.7585, "step": 440 }, { "epoch": 28.395061728395063, "learning_rate": 9.9641851353006e-07, "loss": 3.6304, "step": 460 }, { "epoch": 29.62962962962963, "learning_rate": 9.961430145708338e-07, "loss": 3.6751, "step": 480 }, { "epoch": 30.864197530864196, "learning_rate": 9.958675156116076e-07, "loss": 3.677, "step": 500 }, { "epoch": 32.098765432098766, "learning_rate": 9.955920166523815e-07, "loss": 3.6556, "step": 520 }, { "epoch": 33.333333333333336, "learning_rate": 9.953165176931551e-07, "loss": 3.5897, "step": 540 }, { "epoch": 34.5679012345679, "learning_rate": 9.950410187339292e-07, "loss": 3.7304, "step": 560 }, { "epoch": 35.80246913580247, "learning_rate": 9.94765519774703e-07, "loss": 3.568, "step": 580 }, { "epoch": 37.03703703703704, "learning_rate": 9.94490020815477e-07, "loss": 3.6207, "step": 600 }, { "epoch": 38.27160493827161, "learning_rate": 9.942145218562508e-07, "loss": 3.4759, "step": 620 }, { "epoch": 39.50617283950617, "learning_rate": 9.939390228970246e-07, "loss": 3.4513, "step": 640 }, { "epoch": 40.74074074074074, "learning_rate": 9.936635239377985e-07, "loss": 3.372, "step": 660 }, { "epoch": 41.97530864197531, "learning_rate": 9.933880249785724e-07, "loss": 3.2601, "step": 680 }, { "epoch": 43.20987654320987, "learning_rate": 9.93112526019346e-07, "loss": 3.3071, "step": 700 }, { "epoch": 44.44444444444444, "learning_rate": 9.9283702706012e-07, "loss": 3.2768, "step": 720 }, { "epoch": 45.67901234567901, "learning_rate": 9.925615281008937e-07, "loss": 3.3103, "step": 740 }, { "epoch": 46.91358024691358, "learning_rate": 9.922860291416678e-07, "loss": 3.2807, "step": 760 }, { "epoch": 48.148148148148145, "learning_rate": 9.920105301824414e-07, "loss": 3.1839, "step": 780 }, { "epoch": 49.382716049382715, "learning_rate": 9.917350312232155e-07, "loss": 3.1689, "step": 800 }, { "epoch": 50.617283950617285, "learning_rate": 9.914595322639892e-07, "loss": 3.17, "step": 820 }, { "epoch": 51.851851851851855, "learning_rate": 9.91184033304763e-07, "loss": 3.1598, "step": 840 }, { "epoch": 53.08641975308642, "learning_rate": 9.909085343455369e-07, "loss": 3.1215, "step": 860 }, { "epoch": 54.32098765432099, "learning_rate": 9.906330353863107e-07, "loss": 3.102, "step": 880 }, { "epoch": 55.55555555555556, "learning_rate": 9.903575364270846e-07, "loss": 3.0819, "step": 900 }, { "epoch": 56.79012345679013, "learning_rate": 9.900820374678584e-07, "loss": 3.0729, "step": 920 }, { "epoch": 58.02469135802469, "learning_rate": 9.898065385086323e-07, "loss": 3.0639, "step": 940 }, { "epoch": 59.25925925925926, "learning_rate": 9.89531039549406e-07, "loss": 3.0172, "step": 960 }, { "epoch": 60.49382716049383, "learning_rate": 9.8925554059018e-07, "loss": 3.0463, "step": 980 }, { "epoch": 61.72839506172839, "learning_rate": 9.889800416309537e-07, "loss": 2.9424, "step": 1000 }, { "epoch": 62.96296296296296, "learning_rate": 9.887045426717277e-07, "loss": 3.0018, "step": 1020 }, { "epoch": 64.19753086419753, "learning_rate": 9.884290437125014e-07, "loss": 3.0268, "step": 1040 }, { "epoch": 65.4320987654321, "learning_rate": 9.881535447532755e-07, "loss": 3.0157, "step": 1060 }, { "epoch": 66.66666666666667, "learning_rate": 9.878780457940491e-07, "loss": 2.9853, "step": 1080 }, { "epoch": 67.90123456790124, "learning_rate": 9.87602546834823e-07, "loss": 2.8881, "step": 1100 }, { "epoch": 69.1358024691358, "learning_rate": 9.873270478755968e-07, "loss": 2.965, "step": 1120 }, { "epoch": 70.37037037037037, "learning_rate": 9.870515489163707e-07, "loss": 2.9127, "step": 1140 }, { "epoch": 71.60493827160494, "learning_rate": 9.867760499571445e-07, "loss": 2.8881, "step": 1160 }, { "epoch": 72.8395061728395, "learning_rate": 9.865005509979184e-07, "loss": 2.8964, "step": 1180 }, { "epoch": 74.07407407407408, "learning_rate": 9.862250520386923e-07, "loss": 2.932, "step": 1200 }, { "epoch": 75.30864197530865, "learning_rate": 9.85949553079466e-07, "loss": 2.9062, "step": 1220 }, { "epoch": 76.54320987654322, "learning_rate": 9.8567405412024e-07, "loss": 2.9141, "step": 1240 }, { "epoch": 77.77777777777777, "learning_rate": 9.853985551610136e-07, "loss": 2.8072, "step": 1260 }, { "epoch": 79.01234567901234, "learning_rate": 9.851230562017877e-07, "loss": 2.8403, "step": 1280 }, { "epoch": 80.24691358024691, "learning_rate": 9.848475572425613e-07, "loss": 2.8484, "step": 1300 }, { "epoch": 81.48148148148148, "learning_rate": 9.845720582833354e-07, "loss": 2.8565, "step": 1320 }, { "epoch": 82.71604938271605, "learning_rate": 9.842965593241093e-07, "loss": 2.8241, "step": 1340 }, { "epoch": 83.95061728395062, "learning_rate": 9.840210603648831e-07, "loss": 2.8383, "step": 1360 }, { "epoch": 85.18518518518519, "learning_rate": 9.837455614056568e-07, "loss": 2.8188, "step": 1380 }, { "epoch": 86.41975308641975, "learning_rate": 9.834700624464309e-07, "loss": 2.8681, "step": 1400 }, { "epoch": 87.65432098765432, "learning_rate": 9.831945634872045e-07, "loss": 2.7453, "step": 1420 }, { "epoch": 88.88888888888889, "learning_rate": 9.829190645279786e-07, "loss": 2.7488, "step": 1440 }, { "epoch": 90.12345679012346, "learning_rate": 9.826435655687522e-07, "loss": 2.7859, "step": 1460 }, { "epoch": 91.35802469135803, "learning_rate": 9.82368066609526e-07, "loss": 2.8079, "step": 1480 }, { "epoch": 92.5925925925926, "learning_rate": 9.820925676503e-07, "loss": 2.717, "step": 1500 }, { "epoch": 93.82716049382717, "learning_rate": 9.818170686910738e-07, "loss": 2.7502, "step": 1520 }, { "epoch": 95.06172839506173, "learning_rate": 9.815415697318477e-07, "loss": 2.7395, "step": 1540 }, { "epoch": 96.29629629629629, "learning_rate": 9.812660707726215e-07, "loss": 2.7172, "step": 1560 }, { "epoch": 97.53086419753086, "learning_rate": 9.809905718133954e-07, "loss": 2.6894, "step": 1580 }, { "epoch": 98.76543209876543, "learning_rate": 9.807150728541692e-07, "loss": 2.7527, "step": 1600 }, { "epoch": 100.0, "learning_rate": 9.80439573894943e-07, "loss": 2.7461, "step": 1620 }, { "epoch": 101.23456790123457, "learning_rate": 9.801640749357167e-07, "loss": 2.782, "step": 1640 }, { "epoch": 102.46913580246914, "learning_rate": 9.798885759764908e-07, "loss": 2.7753, "step": 1660 }, { "epoch": 103.70370370370371, "learning_rate": 9.796130770172645e-07, "loss": 2.7902, "step": 1680 }, { "epoch": 104.93827160493827, "learning_rate": 9.793375780580385e-07, "loss": 2.7305, "step": 1700 }, { "epoch": 106.17283950617283, "learning_rate": 9.790620790988122e-07, "loss": 2.7476, "step": 1720 }, { "epoch": 107.4074074074074, "learning_rate": 9.787865801395862e-07, "loss": 2.715, "step": 1740 }, { "epoch": 108.64197530864197, "learning_rate": 9.785110811803599e-07, "loss": 2.6883, "step": 1760 }, { "epoch": 109.87654320987654, "learning_rate": 9.782355822211337e-07, "loss": 2.6921, "step": 1780 }, { "epoch": 111.11111111111111, "learning_rate": 9.779600832619076e-07, "loss": 2.7132, "step": 1800 }, { "epoch": 112.34567901234568, "learning_rate": 9.776845843026815e-07, "loss": 2.6779, "step": 1820 }, { "epoch": 113.58024691358025, "learning_rate": 9.774090853434553e-07, "loss": 2.6119, "step": 1840 }, { "epoch": 114.81481481481481, "learning_rate": 9.771335863842292e-07, "loss": 2.6716, "step": 1860 }, { "epoch": 116.04938271604938, "learning_rate": 9.76858087425003e-07, "loss": 2.6666, "step": 1880 }, { "epoch": 117.28395061728395, "learning_rate": 9.765825884657767e-07, "loss": 2.6476, "step": 1900 }, { "epoch": 118.51851851851852, "learning_rate": 9.763070895065508e-07, "loss": 2.6393, "step": 1920 }, { "epoch": 119.75308641975309, "learning_rate": 9.760315905473244e-07, "loss": 2.627, "step": 1940 }, { "epoch": 120.98765432098766, "learning_rate": 9.757560915880985e-07, "loss": 2.6067, "step": 1960 }, { "epoch": 122.22222222222223, "learning_rate": 9.754805926288721e-07, "loss": 2.6015, "step": 1980 }, { "epoch": 123.45679012345678, "learning_rate": 9.752050936696462e-07, "loss": 2.6428, "step": 2000 }, { "epoch": 124.69135802469135, "learning_rate": 9.749295947104198e-07, "loss": 2.6251, "step": 2020 }, { "epoch": 125.92592592592592, "learning_rate": 9.746540957511937e-07, "loss": 2.655, "step": 2040 }, { "epoch": 127.1604938271605, "learning_rate": 9.743785967919676e-07, "loss": 2.5351, "step": 2060 }, { "epoch": 128.39506172839506, "learning_rate": 9.741030978327416e-07, "loss": 2.582, "step": 2080 }, { "epoch": 129.62962962962962, "learning_rate": 9.738275988735153e-07, "loss": 2.5874, "step": 2100 }, { "epoch": 130.8641975308642, "learning_rate": 9.735520999142893e-07, "loss": 2.5603, "step": 2120 }, { "epoch": 132.09876543209876, "learning_rate": 9.73276600955063e-07, "loss": 2.5398, "step": 2140 }, { "epoch": 133.33333333333334, "learning_rate": 9.730011019958369e-07, "loss": 2.5265, "step": 2160 }, { "epoch": 134.5679012345679, "learning_rate": 9.727256030366107e-07, "loss": 2.6271, "step": 2180 }, { "epoch": 135.80246913580248, "learning_rate": 9.724501040773846e-07, "loss": 2.5626, "step": 2200 }, { "epoch": 137.03703703703704, "learning_rate": 9.721746051181584e-07, "loss": 2.5425, "step": 2220 }, { "epoch": 138.2716049382716, "learning_rate": 9.718991061589323e-07, "loss": 2.5251, "step": 2240 }, { "epoch": 139.50617283950618, "learning_rate": 9.716236071997061e-07, "loss": 2.5817, "step": 2260 }, { "epoch": 140.74074074074073, "learning_rate": 9.7134810824048e-07, "loss": 2.4888, "step": 2280 }, { "epoch": 141.97530864197532, "learning_rate": 9.710726092812539e-07, "loss": 2.5426, "step": 2300 }, { "epoch": 143.20987654320987, "learning_rate": 9.707971103220275e-07, "loss": 2.5293, "step": 2320 }, { "epoch": 144.44444444444446, "learning_rate": 9.705216113628016e-07, "loss": 2.4874, "step": 2340 }, { "epoch": 145.679012345679, "learning_rate": 9.702461124035752e-07, "loss": 2.5487, "step": 2360 }, { "epoch": 146.91358024691357, "learning_rate": 9.699706134443493e-07, "loss": 2.5153, "step": 2380 }, { "epoch": 148.14814814814815, "learning_rate": 9.69695114485123e-07, "loss": 2.4468, "step": 2400 }, { "epoch": 149.3827160493827, "learning_rate": 9.69419615525897e-07, "loss": 2.4731, "step": 2420 }, { "epoch": 150.6172839506173, "learning_rate": 9.691441165666707e-07, "loss": 2.5007, "step": 2440 }, { "epoch": 151.85185185185185, "learning_rate": 9.688686176074445e-07, "loss": 2.4948, "step": 2460 }, { "epoch": 153.08641975308643, "learning_rate": 9.685931186482184e-07, "loss": 2.4867, "step": 2480 }, { "epoch": 154.320987654321, "learning_rate": 9.683176196889922e-07, "loss": 2.4654, "step": 2500 }, { "epoch": 155.55555555555554, "learning_rate": 9.68042120729766e-07, "loss": 2.4621, "step": 2520 }, { "epoch": 156.79012345679013, "learning_rate": 9.6776662177054e-07, "loss": 2.4738, "step": 2540 }, { "epoch": 158.02469135802468, "learning_rate": 9.674911228113138e-07, "loss": 2.4562, "step": 2560 }, { "epoch": 159.25925925925927, "learning_rate": 9.672156238520875e-07, "loss": 2.4937, "step": 2580 }, { "epoch": 160.49382716049382, "learning_rate": 9.669401248928615e-07, "loss": 2.4574, "step": 2600 }, { "epoch": 161.7283950617284, "learning_rate": 9.666646259336352e-07, "loss": 2.4642, "step": 2620 }, { "epoch": 162.96296296296296, "learning_rate": 9.663891269744093e-07, "loss": 2.4736, "step": 2640 }, { "epoch": 164.19753086419752, "learning_rate": 9.66113628015183e-07, "loss": 2.4926, "step": 2660 }, { "epoch": 165.4320987654321, "learning_rate": 9.65838129055957e-07, "loss": 2.4323, "step": 2680 }, { "epoch": 166.66666666666666, "learning_rate": 9.655626300967306e-07, "loss": 2.4508, "step": 2700 }, { "epoch": 167.90123456790124, "learning_rate": 9.652871311375045e-07, "loss": 2.4295, "step": 2720 }, { "epoch": 169.1358024691358, "learning_rate": 9.650116321782783e-07, "loss": 2.4753, "step": 2740 }, { "epoch": 170.37037037037038, "learning_rate": 9.647361332190522e-07, "loss": 2.4323, "step": 2760 }, { "epoch": 171.60493827160494, "learning_rate": 9.64460634259826e-07, "loss": 2.392, "step": 2780 }, { "epoch": 172.8395061728395, "learning_rate": 9.641851353006e-07, "loss": 2.4579, "step": 2800 }, { "epoch": 174.07407407407408, "learning_rate": 9.639096363413738e-07, "loss": 2.4091, "step": 2820 }, { "epoch": 175.30864197530863, "learning_rate": 9.636341373821476e-07, "loss": 2.3802, "step": 2840 }, { "epoch": 176.54320987654322, "learning_rate": 9.633586384229215e-07, "loss": 2.3483, "step": 2860 }, { "epoch": 177.77777777777777, "learning_rate": 9.630831394636953e-07, "loss": 2.374, "step": 2880 }, { "epoch": 179.01234567901236, "learning_rate": 9.628076405044692e-07, "loss": 2.414, "step": 2900 }, { "epoch": 180.2469135802469, "learning_rate": 9.62532141545243e-07, "loss": 2.3971, "step": 2920 }, { "epoch": 181.4814814814815, "learning_rate": 9.62256642586017e-07, "loss": 2.3508, "step": 2940 }, { "epoch": 182.71604938271605, "learning_rate": 9.619811436267908e-07, "loss": 2.4072, "step": 2960 }, { "epoch": 183.9506172839506, "learning_rate": 9.617056446675646e-07, "loss": 2.3853, "step": 2980 }, { "epoch": 185.1851851851852, "learning_rate": 9.614301457083383e-07, "loss": 2.4256, "step": 3000 }, { "epoch": 186.41975308641975, "learning_rate": 9.611546467491124e-07, "loss": 2.3795, "step": 3020 }, { "epoch": 187.65432098765433, "learning_rate": 9.60879147789886e-07, "loss": 2.3763, "step": 3040 }, { "epoch": 188.88888888888889, "learning_rate": 9.6060364883066e-07, "loss": 2.3474, "step": 3060 }, { "epoch": 190.12345679012347, "learning_rate": 9.603281498714337e-07, "loss": 2.4074, "step": 3080 }, { "epoch": 191.35802469135803, "learning_rate": 9.600526509122076e-07, "loss": 2.3438, "step": 3100 }, { "epoch": 192.59259259259258, "learning_rate": 9.597771519529814e-07, "loss": 2.3209, "step": 3120 }, { "epoch": 193.82716049382717, "learning_rate": 9.595016529937553e-07, "loss": 2.3364, "step": 3140 }, { "epoch": 195.06172839506172, "learning_rate": 9.592261540345292e-07, "loss": 2.3624, "step": 3160 }, { "epoch": 196.2962962962963, "learning_rate": 9.58950655075303e-07, "loss": 2.3004, "step": 3180 }, { "epoch": 197.53086419753086, "learning_rate": 9.586751561160769e-07, "loss": 2.2904, "step": 3200 }, { "epoch": 198.76543209876544, "learning_rate": 9.583996571568507e-07, "loss": 2.2971, "step": 3220 }, { "epoch": 200.0, "learning_rate": 9.581241581976246e-07, "loss": 2.3364, "step": 3240 }, { "epoch": 201.23456790123456, "learning_rate": 9.578486592383982e-07, "loss": 2.3191, "step": 3260 }, { "epoch": 202.46913580246914, "learning_rate": 9.575731602791723e-07, "loss": 2.3663, "step": 3280 }, { "epoch": 203.7037037037037, "learning_rate": 9.57297661319946e-07, "loss": 2.3649, "step": 3300 }, { "epoch": 204.93827160493828, "learning_rate": 9.5702216236072e-07, "loss": 2.2762, "step": 3320 }, { "epoch": 206.17283950617283, "learning_rate": 9.567466634014937e-07, "loss": 2.315, "step": 3340 }, { "epoch": 207.40740740740742, "learning_rate": 9.564711644422677e-07, "loss": 2.2924, "step": 3360 }, { "epoch": 208.64197530864197, "learning_rate": 9.561956654830414e-07, "loss": 2.2383, "step": 3380 }, { "epoch": 209.87654320987653, "learning_rate": 9.559201665238153e-07, "loss": 2.2903, "step": 3400 }, { "epoch": 211.11111111111111, "learning_rate": 9.556446675645891e-07, "loss": 2.3423, "step": 3420 }, { "epoch": 212.34567901234567, "learning_rate": 9.55369168605363e-07, "loss": 2.2747, "step": 3440 }, { "epoch": 213.58024691358025, "learning_rate": 9.550936696461368e-07, "loss": 2.2997, "step": 3460 }, { "epoch": 214.8148148148148, "learning_rate": 9.548181706869107e-07, "loss": 2.2166, "step": 3480 }, { "epoch": 216.0493827160494, "learning_rate": 9.545426717276845e-07, "loss": 2.349, "step": 3500 }, { "epoch": 217.28395061728395, "learning_rate": 9.542671727684582e-07, "loss": 2.2522, "step": 3520 }, { "epoch": 218.5185185185185, "learning_rate": 9.539916738092323e-07, "loss": 2.2854, "step": 3540 }, { "epoch": 219.7530864197531, "learning_rate": 9.537161748500061e-07, "loss": 2.2806, "step": 3560 }, { "epoch": 220.98765432098764, "learning_rate": 9.5344067589078e-07, "loss": 2.233, "step": 3580 }, { "epoch": 222.22222222222223, "learning_rate": 9.531651769315538e-07, "loss": 2.2588, "step": 3600 }, { "epoch": 223.45679012345678, "learning_rate": 9.528896779723277e-07, "loss": 2.2729, "step": 3620 }, { "epoch": 224.69135802469137, "learning_rate": 9.526141790131015e-07, "loss": 2.2818, "step": 3640 }, { "epoch": 225.92592592592592, "learning_rate": 9.523386800538753e-07, "loss": 2.2291, "step": 3660 }, { "epoch": 227.1604938271605, "learning_rate": 9.520631810946491e-07, "loss": 2.2552, "step": 3680 }, { "epoch": 228.39506172839506, "learning_rate": 9.51787682135423e-07, "loss": 2.2028, "step": 3700 }, { "epoch": 229.62962962962962, "learning_rate": 9.515121831761969e-07, "loss": 2.1948, "step": 3720 }, { "epoch": 230.8641975308642, "learning_rate": 9.512366842169707e-07, "loss": 2.2981, "step": 3740 }, { "epoch": 232.09876543209876, "learning_rate": 9.509611852577446e-07, "loss": 2.2519, "step": 3760 }, { "epoch": 233.33333333333334, "learning_rate": 9.506856862985184e-07, "loss": 2.2159, "step": 3780 }, { "epoch": 234.5679012345679, "learning_rate": 9.504101873392922e-07, "loss": 2.2122, "step": 3800 }, { "epoch": 235.80246913580248, "learning_rate": 9.501346883800661e-07, "loss": 2.2165, "step": 3820 }, { "epoch": 237.03703703703704, "learning_rate": 9.498591894208399e-07, "loss": 2.2362, "step": 3840 }, { "epoch": 238.2716049382716, "learning_rate": 9.495836904616138e-07, "loss": 2.1995, "step": 3860 }, { "epoch": 239.50617283950618, "learning_rate": 9.493081915023877e-07, "loss": 2.248, "step": 3880 }, { "epoch": 240.74074074074073, "learning_rate": 9.490326925431615e-07, "loss": 2.1703, "step": 3900 }, { "epoch": 241.97530864197532, "learning_rate": 9.487571935839353e-07, "loss": 2.1987, "step": 3920 }, { "epoch": 243.20987654320987, "learning_rate": 9.484816946247091e-07, "loss": 2.2023, "step": 3940 }, { "epoch": 244.44444444444446, "learning_rate": 9.48206195665483e-07, "loss": 2.2292, "step": 3960 }, { "epoch": 245.679012345679, "learning_rate": 9.479306967062568e-07, "loss": 2.1746, "step": 3980 }, { "epoch": 246.91358024691357, "learning_rate": 9.476551977470307e-07, "loss": 2.1809, "step": 4000 }, { "epoch": 248.14814814814815, "learning_rate": 9.473796987878046e-07, "loss": 2.1631, "step": 4020 }, { "epoch": 249.3827160493827, "learning_rate": 9.471041998285784e-07, "loss": 2.1437, "step": 4040 }, { "epoch": 250.6172839506173, "learning_rate": 9.468287008693522e-07, "loss": 2.1719, "step": 4060 }, { "epoch": 251.85185185185185, "learning_rate": 9.46553201910126e-07, "loss": 2.1754, "step": 4080 }, { "epoch": 253.08641975308643, "learning_rate": 9.462777029508999e-07, "loss": 2.172, "step": 4100 }, { "epoch": 254.320987654321, "learning_rate": 9.460022039916737e-07, "loss": 2.2135, "step": 4120 }, { "epoch": 255.55555555555554, "learning_rate": 9.457267050324476e-07, "loss": 2.1143, "step": 4140 }, { "epoch": 256.7901234567901, "learning_rate": 9.454512060732215e-07, "loss": 2.1804, "step": 4160 }, { "epoch": 258.0246913580247, "learning_rate": 9.451757071139952e-07, "loss": 2.147, "step": 4180 }, { "epoch": 259.25925925925924, "learning_rate": 9.449002081547691e-07, "loss": 2.1618, "step": 4200 }, { "epoch": 260.4938271604938, "learning_rate": 9.446247091955429e-07, "loss": 2.1434, "step": 4220 }, { "epoch": 261.7283950617284, "learning_rate": 9.443492102363168e-07, "loss": 2.1535, "step": 4240 }, { "epoch": 262.962962962963, "learning_rate": 9.440737112770907e-07, "loss": 2.2116, "step": 4260 }, { "epoch": 264.1975308641975, "learning_rate": 9.437982123178645e-07, "loss": 2.1857, "step": 4280 }, { "epoch": 265.4320987654321, "learning_rate": 9.435227133586384e-07, "loss": 2.1337, "step": 4300 }, { "epoch": 266.6666666666667, "learning_rate": 9.432472143994122e-07, "loss": 2.1554, "step": 4320 }, { "epoch": 267.9012345679012, "learning_rate": 9.429717154401861e-07, "loss": 2.1565, "step": 4340 }, { "epoch": 269.1358024691358, "learning_rate": 9.4269621648096e-07, "loss": 2.168, "step": 4360 }, { "epoch": 270.3703703703704, "learning_rate": 9.424207175217338e-07, "loss": 2.1392, "step": 4380 }, { "epoch": 271.60493827160496, "learning_rate": 9.421452185625077e-07, "loss": 2.1726, "step": 4400 }, { "epoch": 272.8395061728395, "learning_rate": 9.418697196032815e-07, "loss": 2.135, "step": 4420 }, { "epoch": 274.0740740740741, "learning_rate": 9.415942206440553e-07, "loss": 2.1329, "step": 4440 }, { "epoch": 275.30864197530866, "learning_rate": 9.413187216848291e-07, "loss": 2.1334, "step": 4460 }, { "epoch": 276.5432098765432, "learning_rate": 9.41043222725603e-07, "loss": 2.1309, "step": 4480 }, { "epoch": 277.77777777777777, "learning_rate": 9.407677237663769e-07, "loss": 2.0872, "step": 4500 }, { "epoch": 279.01234567901236, "learning_rate": 9.404922248071507e-07, "loss": 2.1426, "step": 4520 }, { "epoch": 280.24691358024694, "learning_rate": 9.402167258479246e-07, "loss": 2.1331, "step": 4540 }, { "epoch": 281.48148148148147, "learning_rate": 9.399412268886984e-07, "loss": 2.0859, "step": 4560 }, { "epoch": 282.71604938271605, "learning_rate": 9.396657279294723e-07, "loss": 2.0755, "step": 4580 }, { "epoch": 283.95061728395063, "learning_rate": 9.39390228970246e-07, "loss": 2.1203, "step": 4600 }, { "epoch": 285.18518518518516, "learning_rate": 9.391147300110199e-07, "loss": 2.0846, "step": 4620 }, { "epoch": 286.41975308641975, "learning_rate": 9.388392310517938e-07, "loss": 2.1284, "step": 4640 }, { "epoch": 287.65432098765433, "learning_rate": 9.385637320925677e-07, "loss": 2.107, "step": 4660 }, { "epoch": 288.8888888888889, "learning_rate": 9.382882331333415e-07, "loss": 2.2206, "step": 4680 }, { "epoch": 290.12345679012344, "learning_rate": 9.380127341741153e-07, "loss": 2.2475, "step": 4700 }, { "epoch": 291.358024691358, "learning_rate": 9.377372352148891e-07, "loss": 2.1887, "step": 4720 }, { "epoch": 292.5925925925926, "learning_rate": 9.37461736255663e-07, "loss": 2.1352, "step": 4740 }, { "epoch": 293.82716049382714, "learning_rate": 9.371862372964368e-07, "loss": 2.1565, "step": 4760 }, { "epoch": 295.0617283950617, "learning_rate": 9.369107383372107e-07, "loss": 2.1574, "step": 4780 }, { "epoch": 296.2962962962963, "learning_rate": 9.366352393779845e-07, "loss": 2.1384, "step": 4800 }, { "epoch": 297.5308641975309, "learning_rate": 9.363597404187584e-07, "loss": 2.1534, "step": 4820 }, { "epoch": 298.7654320987654, "learning_rate": 9.360842414595322e-07, "loss": 2.0941, "step": 4840 }, { "epoch": 300.0, "learning_rate": 9.35808742500306e-07, "loss": 2.101, "step": 4860 }, { "epoch": 301.2345679012346, "learning_rate": 9.355332435410799e-07, "loss": 2.1181, "step": 4880 }, { "epoch": 302.4691358024691, "learning_rate": 9.352577445818537e-07, "loss": 2.0771, "step": 4900 }, { "epoch": 303.7037037037037, "learning_rate": 9.349822456226276e-07, "loss": 2.0886, "step": 4920 }, { "epoch": 304.9382716049383, "learning_rate": 9.347067466634014e-07, "loss": 2.0715, "step": 4940 }, { "epoch": 306.17283950617286, "learning_rate": 9.344312477041753e-07, "loss": 2.0883, "step": 4960 }, { "epoch": 307.4074074074074, "learning_rate": 9.34155748744949e-07, "loss": 2.0767, "step": 4980 }, { "epoch": 308.641975308642, "learning_rate": 9.338802497857229e-07, "loss": 2.0757, "step": 5000 }, { "epoch": 309.87654320987656, "learning_rate": 9.336047508264968e-07, "loss": 2.0775, "step": 5020 }, { "epoch": 311.1111111111111, "learning_rate": 9.333292518672707e-07, "loss": 2.0763, "step": 5040 }, { "epoch": 312.34567901234567, "learning_rate": 9.330537529080446e-07, "loss": 2.065, "step": 5060 }, { "epoch": 313.58024691358025, "learning_rate": 9.327782539488184e-07, "loss": 2.0996, "step": 5080 }, { "epoch": 314.81481481481484, "learning_rate": 9.325027549895923e-07, "loss": 2.0837, "step": 5100 }, { "epoch": 316.04938271604937, "learning_rate": 9.322272560303662e-07, "loss": 2.0985, "step": 5120 }, { "epoch": 317.28395061728395, "learning_rate": 9.319517570711399e-07, "loss": 2.0662, "step": 5140 }, { "epoch": 318.51851851851853, "learning_rate": 9.316762581119138e-07, "loss": 2.1054, "step": 5160 }, { "epoch": 319.75308641975306, "learning_rate": 9.314007591526876e-07, "loss": 2.0688, "step": 5180 }, { "epoch": 320.98765432098764, "learning_rate": 9.311252601934615e-07, "loss": 2.06, "step": 5200 }, { "epoch": 322.22222222222223, "learning_rate": 9.308497612342353e-07, "loss": 2.0608, "step": 5220 }, { "epoch": 323.4567901234568, "learning_rate": 9.305742622750092e-07, "loss": 2.0238, "step": 5240 }, { "epoch": 324.69135802469134, "learning_rate": 9.30298763315783e-07, "loss": 2.0672, "step": 5260 }, { "epoch": 325.9259259259259, "learning_rate": 9.300232643565568e-07, "loss": 2.0045, "step": 5280 }, { "epoch": 327.1604938271605, "learning_rate": 9.297477653973307e-07, "loss": 2.0297, "step": 5300 }, { "epoch": 328.39506172839504, "learning_rate": 9.294722664381045e-07, "loss": 2.0939, "step": 5320 }, { "epoch": 329.6296296296296, "learning_rate": 9.291967674788784e-07, "loss": 2.0309, "step": 5340 }, { "epoch": 330.8641975308642, "learning_rate": 9.289212685196523e-07, "loss": 2.0221, "step": 5360 }, { "epoch": 332.0987654320988, "learning_rate": 9.286457695604261e-07, "loss": 2.0629, "step": 5380 }, { "epoch": 333.3333333333333, "learning_rate": 9.283702706011999e-07, "loss": 2.0113, "step": 5400 }, { "epoch": 334.5679012345679, "learning_rate": 9.280947716419737e-07, "loss": 2.0351, "step": 5420 }, { "epoch": 335.8024691358025, "learning_rate": 9.278192726827476e-07, "loss": 2.0535, "step": 5440 }, { "epoch": 337.037037037037, "learning_rate": 9.275437737235214e-07, "loss": 2.0653, "step": 5460 }, { "epoch": 338.2716049382716, "learning_rate": 9.272682747642953e-07, "loss": 2.0103, "step": 5480 }, { "epoch": 339.5061728395062, "learning_rate": 9.269927758050692e-07, "loss": 2.0367, "step": 5500 }, { "epoch": 340.74074074074076, "learning_rate": 9.267172768458429e-07, "loss": 1.9869, "step": 5520 }, { "epoch": 341.9753086419753, "learning_rate": 9.264417778866168e-07, "loss": 2.0831, "step": 5540 }, { "epoch": 343.2098765432099, "learning_rate": 9.261662789273906e-07, "loss": 2.0198, "step": 5560 }, { "epoch": 344.44444444444446, "learning_rate": 9.258907799681645e-07, "loss": 2.0057, "step": 5580 }, { "epoch": 345.679012345679, "learning_rate": 9.256152810089383e-07, "loss": 1.9973, "step": 5600 }, { "epoch": 346.91358024691357, "learning_rate": 9.253397820497122e-07, "loss": 2.0431, "step": 5620 }, { "epoch": 348.14814814814815, "learning_rate": 9.250642830904861e-07, "loss": 2.0132, "step": 5640 }, { "epoch": 349.38271604938274, "learning_rate": 9.247887841312599e-07, "loss": 2.0589, "step": 5660 }, { "epoch": 350.61728395061726, "learning_rate": 9.245132851720337e-07, "loss": 1.9975, "step": 5680 }, { "epoch": 351.85185185185185, "learning_rate": 9.242377862128075e-07, "loss": 2.0196, "step": 5700 }, { "epoch": 353.08641975308643, "learning_rate": 9.239622872535814e-07, "loss": 1.9605, "step": 5720 }, { "epoch": 354.320987654321, "learning_rate": 9.236867882943551e-07, "loss": 2.0043, "step": 5740 }, { "epoch": 355.55555555555554, "learning_rate": 9.234112893351291e-07, "loss": 1.9835, "step": 5760 }, { "epoch": 356.7901234567901, "learning_rate": 9.23135790375903e-07, "loss": 2.0274, "step": 5780 }, { "epoch": 358.0246913580247, "learning_rate": 9.228602914166768e-07, "loss": 2.0303, "step": 5800 }, { "epoch": 359.25925925925924, "learning_rate": 9.225847924574507e-07, "loss": 1.9691, "step": 5820 }, { "epoch": 360.4938271604938, "learning_rate": 9.223092934982245e-07, "loss": 2.0049, "step": 5840 }, { "epoch": 361.7283950617284, "learning_rate": 9.220337945389984e-07, "loss": 1.9652, "step": 5860 }, { "epoch": 362.962962962963, "learning_rate": 9.217582955797723e-07, "loss": 2.019, "step": 5880 }, { "epoch": 364.1975308641975, "learning_rate": 9.214827966205461e-07, "loss": 1.9819, "step": 5900 }, { "epoch": 365.4320987654321, "learning_rate": 9.2120729766132e-07, "loss": 2.0201, "step": 5920 }, { "epoch": 366.6666666666667, "learning_rate": 9.209317987020937e-07, "loss": 1.9934, "step": 5940 }, { "epoch": 367.9012345679012, "learning_rate": 9.206562997428676e-07, "loss": 1.9566, "step": 5960 }, { "epoch": 369.1358024691358, "learning_rate": 9.203808007836415e-07, "loss": 1.9902, "step": 5980 }, { "epoch": 370.3703703703704, "learning_rate": 9.201053018244152e-07, "loss": 2.0059, "step": 6000 }, { "epoch": 371.60493827160496, "learning_rate": 9.198298028651892e-07, "loss": 1.988, "step": 6020 }, { "epoch": 372.8395061728395, "learning_rate": 9.19554303905963e-07, "loss": 1.9659, "step": 6040 }, { "epoch": 374.0740740740741, "learning_rate": 9.192788049467368e-07, "loss": 2.0319, "step": 6060 }, { "epoch": 375.30864197530866, "learning_rate": 9.190033059875106e-07, "loss": 1.9308, "step": 6080 }, { "epoch": 376.5432098765432, "learning_rate": 9.187278070282845e-07, "loss": 1.967, "step": 6100 }, { "epoch": 377.77777777777777, "learning_rate": 9.184523080690584e-07, "loss": 2.0184, "step": 6120 }, { "epoch": 379.01234567901236, "learning_rate": 9.181768091098322e-07, "loss": 1.9865, "step": 6140 }, { "epoch": 380.24691358024694, "learning_rate": 9.179013101506061e-07, "loss": 1.9415, "step": 6160 }, { "epoch": 381.48148148148147, "learning_rate": 9.176258111913799e-07, "loss": 1.9619, "step": 6180 }, { "epoch": 382.71604938271605, "learning_rate": 9.173503122321538e-07, "loss": 1.949, "step": 6200 }, { "epoch": 383.95061728395063, "learning_rate": 9.170748132729275e-07, "loss": 1.9467, "step": 6220 }, { "epoch": 385.18518518518516, "learning_rate": 9.167993143137014e-07, "loss": 1.9691, "step": 6240 }, { "epoch": 386.41975308641975, "learning_rate": 9.165238153544753e-07, "loss": 1.9643, "step": 6260 }, { "epoch": 387.65432098765433, "learning_rate": 9.162483163952491e-07, "loss": 1.942, "step": 6280 }, { "epoch": 388.8888888888889, "learning_rate": 9.15972817436023e-07, "loss": 1.9002, "step": 6300 }, { "epoch": 390.12345679012344, "learning_rate": 9.156973184767968e-07, "loss": 1.9965, "step": 6320 }, { "epoch": 391.358024691358, "learning_rate": 9.154218195175706e-07, "loss": 1.9343, "step": 6340 }, { "epoch": 392.5925925925926, "learning_rate": 9.151463205583446e-07, "loss": 1.931, "step": 6360 }, { "epoch": 393.82716049382714, "learning_rate": 9.148708215991183e-07, "loss": 1.9628, "step": 6380 }, { "epoch": 395.0617283950617, "learning_rate": 9.145953226398922e-07, "loss": 1.9752, "step": 6400 }, { "epoch": 396.2962962962963, "learning_rate": 9.14319823680666e-07, "loss": 1.9499, "step": 6420 }, { "epoch": 397.5308641975309, "learning_rate": 9.140443247214399e-07, "loss": 1.9546, "step": 6440 }, { "epoch": 398.7654320987654, "learning_rate": 9.137688257622137e-07, "loss": 1.8583, "step": 6460 }, { "epoch": 400.0, "learning_rate": 9.134933268029875e-07, "loss": 1.9405, "step": 6480 }, { "epoch": 401.2345679012346, "learning_rate": 9.132178278437614e-07, "loss": 1.941, "step": 6500 }, { "epoch": 402.4691358024691, "learning_rate": 9.129423288845352e-07, "loss": 1.9432, "step": 6520 }, { "epoch": 403.7037037037037, "learning_rate": 9.126668299253092e-07, "loss": 1.9603, "step": 6540 }, { "epoch": 404.9382716049383, "learning_rate": 9.12391330966083e-07, "loss": 1.9456, "step": 6560 }, { "epoch": 406.17283950617286, "learning_rate": 9.121158320068569e-07, "loss": 1.9488, "step": 6580 }, { "epoch": 407.4074074074074, "learning_rate": 9.118403330476307e-07, "loss": 1.9248, "step": 6600 }, { "epoch": 408.641975308642, "learning_rate": 9.115648340884045e-07, "loss": 1.8897, "step": 6620 }, { "epoch": 409.87654320987656, "learning_rate": 9.112893351291784e-07, "loss": 1.9315, "step": 6640 }, { "epoch": 411.1111111111111, "learning_rate": 9.110138361699522e-07, "loss": 1.9599, "step": 6660 }, { "epoch": 412.34567901234567, "learning_rate": 9.107383372107261e-07, "loss": 1.9441, "step": 6680 }, { "epoch": 413.58024691358025, "learning_rate": 9.104628382515e-07, "loss": 1.8974, "step": 6700 }, { "epoch": 414.81481481481484, "learning_rate": 9.101873392922738e-07, "loss": 1.9078, "step": 6720 }, { "epoch": 416.04938271604937, "learning_rate": 9.099118403330477e-07, "loss": 1.9587, "step": 6740 }, { "epoch": 417.28395061728395, "learning_rate": 9.096363413738214e-07, "loss": 2.0016, "step": 6760 }, { "epoch": 418.51851851851853, "learning_rate": 9.093608424145953e-07, "loss": 1.9557, "step": 6780 }, { "epoch": 419.75308641975306, "learning_rate": 9.090853434553691e-07, "loss": 1.9719, "step": 6800 }, { "epoch": 420.98765432098764, "learning_rate": 9.08809844496143e-07, "loss": 1.9779, "step": 6820 }, { "epoch": 422.22222222222223, "learning_rate": 9.085343455369169e-07, "loss": 1.9754, "step": 6840 }, { "epoch": 423.4567901234568, "learning_rate": 9.082588465776907e-07, "loss": 2.0078, "step": 6860 }, { "epoch": 424.69135802469134, "learning_rate": 9.079833476184645e-07, "loss": 1.9856, "step": 6880 }, { "epoch": 425.9259259259259, "learning_rate": 9.077078486592383e-07, "loss": 1.9698, "step": 6900 }, { "epoch": 427.1604938271605, "learning_rate": 9.074323497000122e-07, "loss": 1.9826, "step": 6920 }, { "epoch": 428.39506172839504, "learning_rate": 9.07156850740786e-07, "loss": 1.9513, "step": 6940 }, { "epoch": 429.6296296296296, "learning_rate": 9.068813517815599e-07, "loss": 1.93, "step": 6960 }, { "epoch": 430.8641975308642, "learning_rate": 9.066058528223339e-07, "loss": 1.9779, "step": 6980 }, { "epoch": 432.0987654320988, "learning_rate": 9.063303538631076e-07, "loss": 1.9937, "step": 7000 }, { "epoch": 433.3333333333333, "learning_rate": 9.060548549038814e-07, "loss": 1.955, "step": 7020 }, { "epoch": 434.5679012345679, "learning_rate": 9.057793559446552e-07, "loss": 1.9652, "step": 7040 }, { "epoch": 435.8024691358025, "learning_rate": 9.055038569854291e-07, "loss": 1.9568, "step": 7060 }, { "epoch": 437.037037037037, "learning_rate": 9.05228358026203e-07, "loss": 1.9787, "step": 7080 }, { "epoch": 438.2716049382716, "learning_rate": 9.049528590669768e-07, "loss": 1.9137, "step": 7100 }, { "epoch": 439.5061728395062, "learning_rate": 9.046773601077507e-07, "loss": 1.9314, "step": 7120 }, { "epoch": 440.74074074074076, "learning_rate": 9.044018611485244e-07, "loss": 1.9961, "step": 7140 }, { "epoch": 441.9753086419753, "learning_rate": 9.041263621892983e-07, "loss": 1.9303, "step": 7160 }, { "epoch": 443.2098765432099, "learning_rate": 9.038508632300721e-07, "loss": 1.982, "step": 7180 }, { "epoch": 444.44444444444446, "learning_rate": 9.03575364270846e-07, "loss": 1.9133, "step": 7200 }, { "epoch": 445.679012345679, "learning_rate": 9.032998653116199e-07, "loss": 1.9376, "step": 7220 }, { "epoch": 446.91358024691357, "learning_rate": 9.030243663523937e-07, "loss": 1.9254, "step": 7240 }, { "epoch": 448.14814814814815, "learning_rate": 9.027488673931676e-07, "loss": 1.9491, "step": 7260 }, { "epoch": 449.38271604938274, "learning_rate": 9.024733684339414e-07, "loss": 1.9455, "step": 7280 }, { "epoch": 450.61728395061726, "learning_rate": 9.021978694747153e-07, "loss": 1.9128, "step": 7300 }, { "epoch": 451.85185185185185, "learning_rate": 9.019223705154892e-07, "loss": 1.9006, "step": 7320 }, { "epoch": 453.08641975308643, "learning_rate": 9.016468715562629e-07, "loss": 1.9172, "step": 7340 }, { "epoch": 454.320987654321, "learning_rate": 9.013713725970369e-07, "loss": 1.9754, "step": 7360 }, { "epoch": 455.55555555555554, "learning_rate": 9.010958736378107e-07, "loss": 1.9759, "step": 7380 }, { "epoch": 456.7901234567901, "learning_rate": 9.008203746785846e-07, "loss": 1.9452, "step": 7400 }, { "epoch": 458.0246913580247, "learning_rate": 9.005448757193583e-07, "loss": 1.9618, "step": 7420 }, { "epoch": 459.25925925925924, "learning_rate": 9.002693767601322e-07, "loss": 1.9364, "step": 7440 }, { "epoch": 460.4938271604938, "learning_rate": 8.999938778009061e-07, "loss": 1.9513, "step": 7460 }, { "epoch": 461.7283950617284, "learning_rate": 8.997183788416799e-07, "loss": 1.9473, "step": 7480 }, { "epoch": 462.962962962963, "learning_rate": 8.994428798824538e-07, "loss": 1.9159, "step": 7500 }, { "epoch": 464.1975308641975, "learning_rate": 8.991673809232276e-07, "loss": 1.9195, "step": 7520 }, { "epoch": 465.4320987654321, "learning_rate": 8.988918819640015e-07, "loss": 1.9647, "step": 7540 }, { "epoch": 466.6666666666667, "learning_rate": 8.986163830047752e-07, "loss": 1.895, "step": 7560 }, { "epoch": 467.9012345679012, "learning_rate": 8.983408840455491e-07, "loss": 1.9408, "step": 7580 }, { "epoch": 469.1358024691358, "learning_rate": 8.98065385086323e-07, "loss": 1.978, "step": 7600 }, { "epoch": 470.3703703703704, "learning_rate": 8.977898861270968e-07, "loss": 1.9085, "step": 7620 }, { "epoch": 471.60493827160496, "learning_rate": 8.975143871678707e-07, "loss": 1.9833, "step": 7640 }, { "epoch": 472.8395061728395, "learning_rate": 8.972388882086445e-07, "loss": 1.9559, "step": 7660 }, { "epoch": 474.0740740740741, "learning_rate": 8.969633892494183e-07, "loss": 1.9333, "step": 7680 }, { "epoch": 475.30864197530866, "learning_rate": 8.966878902901923e-07, "loss": 1.9075, "step": 7700 }, { "epoch": 476.5432098765432, "learning_rate": 8.96412391330966e-07, "loss": 1.9343, "step": 7720 }, { "epoch": 477.77777777777777, "learning_rate": 8.961368923717399e-07, "loss": 1.9107, "step": 7740 }, { "epoch": 479.01234567901236, "learning_rate": 8.958613934125137e-07, "loss": 1.9344, "step": 7760 }, { "epoch": 480.24691358024694, "learning_rate": 8.955858944532876e-07, "loss": 1.9044, "step": 7780 }, { "epoch": 481.48148148148147, "learning_rate": 8.953103954940614e-07, "loss": 1.974, "step": 7800 }, { "epoch": 482.71604938271605, "learning_rate": 8.950348965348353e-07, "loss": 1.9241, "step": 7820 }, { "epoch": 483.95061728395063, "learning_rate": 8.947593975756091e-07, "loss": 1.9377, "step": 7840 }, { "epoch": 485.18518518518516, "learning_rate": 8.944838986163829e-07, "loss": 1.9326, "step": 7860 }, { "epoch": 486.41975308641975, "learning_rate": 8.942083996571568e-07, "loss": 1.9028, "step": 7880 }, { "epoch": 487.65432098765433, "learning_rate": 8.939329006979306e-07, "loss": 1.8872, "step": 7900 }, { "epoch": 488.8888888888889, "learning_rate": 8.936574017387045e-07, "loss": 1.904, "step": 7920 }, { "epoch": 490.12345679012344, "learning_rate": 8.933819027794784e-07, "loss": 1.8897, "step": 7940 }, { "epoch": 491.358024691358, "learning_rate": 8.931064038202521e-07, "loss": 1.9026, "step": 7960 }, { "epoch": 492.5925925925926, "learning_rate": 8.92830904861026e-07, "loss": 1.9287, "step": 7980 }, { "epoch": 493.82716049382714, "learning_rate": 8.925554059017998e-07, "loss": 1.8277, "step": 8000 }, { "epoch": 495.0617283950617, "learning_rate": 8.922799069425738e-07, "loss": 1.8716, "step": 8020 }, { "epoch": 496.2962962962963, "learning_rate": 8.920044079833476e-07, "loss": 1.8907, "step": 8040 }, { "epoch": 497.5308641975309, "learning_rate": 8.917289090241215e-07, "loss": 1.8754, "step": 8060 }, { "epoch": 498.7654320987654, "learning_rate": 8.914534100648954e-07, "loss": 1.8713, "step": 8080 }, { "epoch": 500.0, "learning_rate": 8.911779111056691e-07, "loss": 1.8645, "step": 8100 }, { "epoch": 501.2345679012346, "learning_rate": 8.90902412146443e-07, "loss": 1.896, "step": 8120 }, { "epoch": 502.4691358024691, "learning_rate": 8.906269131872168e-07, "loss": 1.8824, "step": 8140 }, { "epoch": 503.7037037037037, "learning_rate": 8.903514142279907e-07, "loss": 1.8612, "step": 8160 }, { "epoch": 504.9382716049383, "learning_rate": 8.900759152687646e-07, "loss": 1.8747, "step": 8180 }, { "epoch": 506.17283950617286, "learning_rate": 8.898004163095384e-07, "loss": 1.8882, "step": 8200 }, { "epoch": 507.4074074074074, "learning_rate": 8.895249173503122e-07, "loss": 1.8858, "step": 8220 }, { "epoch": 508.641975308642, "learning_rate": 8.89249418391086e-07, "loss": 1.8599, "step": 8240 }, { "epoch": 509.87654320987656, "learning_rate": 8.889739194318599e-07, "loss": 1.9073, "step": 8260 }, { "epoch": 511.1111111111111, "learning_rate": 8.886984204726337e-07, "loss": 1.9222, "step": 8280 }, { "epoch": 512.3456790123457, "learning_rate": 8.884229215134076e-07, "loss": 1.904, "step": 8300 }, { "epoch": 513.5802469135803, "learning_rate": 8.881474225541814e-07, "loss": 1.8662, "step": 8320 }, { "epoch": 514.8148148148148, "learning_rate": 8.878719235949553e-07, "loss": 1.8384, "step": 8340 }, { "epoch": 516.0493827160494, "learning_rate": 8.875964246357292e-07, "loss": 1.9238, "step": 8360 }, { "epoch": 517.283950617284, "learning_rate": 8.873209256765029e-07, "loss": 1.8626, "step": 8380 }, { "epoch": 518.5185185185185, "learning_rate": 8.870454267172768e-07, "loss": 1.9077, "step": 8400 }, { "epoch": 519.7530864197531, "learning_rate": 8.867699277580506e-07, "loss": 1.8783, "step": 8420 }, { "epoch": 520.9876543209876, "learning_rate": 8.864944287988245e-07, "loss": 1.8721, "step": 8440 }, { "epoch": 522.2222222222222, "learning_rate": 8.862189298395984e-07, "loss": 1.8961, "step": 8460 }, { "epoch": 523.4567901234568, "learning_rate": 8.859434308803721e-07, "loss": 1.8552, "step": 8480 }, { "epoch": 524.6913580246913, "learning_rate": 8.85667931921146e-07, "loss": 1.9263, "step": 8500 }, { "epoch": 525.925925925926, "learning_rate": 8.853924329619198e-07, "loss": 1.8674, "step": 8520 }, { "epoch": 527.1604938271605, "learning_rate": 8.851169340026937e-07, "loss": 1.8876, "step": 8540 }, { "epoch": 528.395061728395, "learning_rate": 8.848414350434676e-07, "loss": 1.8834, "step": 8560 }, { "epoch": 529.6296296296297, "learning_rate": 8.845659360842414e-07, "loss": 1.8467, "step": 8580 }, { "epoch": 530.8641975308642, "learning_rate": 8.842904371250153e-07, "loss": 1.8957, "step": 8600 }, { "epoch": 532.0987654320987, "learning_rate": 8.840149381657891e-07, "loss": 1.8911, "step": 8620 }, { "epoch": 533.3333333333334, "learning_rate": 8.837394392065629e-07, "loss": 1.8994, "step": 8640 }, { "epoch": 534.5679012345679, "learning_rate": 8.834639402473367e-07, "loss": 1.9276, "step": 8660 }, { "epoch": 535.8024691358024, "learning_rate": 8.831884412881107e-07, "loss": 1.8605, "step": 8680 }, { "epoch": 537.0370370370371, "learning_rate": 8.829129423288845e-07, "loss": 1.8733, "step": 8700 }, { "epoch": 538.2716049382716, "learning_rate": 8.826374433696583e-07, "loss": 1.9128, "step": 8720 }, { "epoch": 539.5061728395061, "learning_rate": 8.823619444104322e-07, "loss": 1.8563, "step": 8740 }, { "epoch": 540.7407407407408, "learning_rate": 8.82086445451206e-07, "loss": 1.8647, "step": 8760 }, { "epoch": 541.9753086419753, "learning_rate": 8.818109464919799e-07, "loss": 1.8478, "step": 8780 }, { "epoch": 543.2098765432099, "learning_rate": 8.815354475327538e-07, "loss": 1.877, "step": 8800 }, { "epoch": 544.4444444444445, "learning_rate": 8.812599485735276e-07, "loss": 1.8432, "step": 8820 }, { "epoch": 545.679012345679, "learning_rate": 8.809844496143015e-07, "loss": 1.8245, "step": 8840 }, { "epoch": 546.9135802469136, "learning_rate": 8.807089506550753e-07, "loss": 1.841, "step": 8860 }, { "epoch": 548.1481481481482, "learning_rate": 8.804334516958492e-07, "loss": 1.8609, "step": 8880 }, { "epoch": 549.3827160493827, "learning_rate": 8.80157952736623e-07, "loss": 1.8815, "step": 8900 }, { "epoch": 550.6172839506173, "learning_rate": 8.798824537773968e-07, "loss": 1.8866, "step": 8920 }, { "epoch": 551.8518518518518, "learning_rate": 8.796069548181707e-07, "loss": 1.8573, "step": 8940 }, { "epoch": 553.0864197530864, "learning_rate": 8.793314558589445e-07, "loss": 1.8837, "step": 8960 }, { "epoch": 554.320987654321, "learning_rate": 8.790559568997184e-07, "loss": 1.8875, "step": 8980 }, { "epoch": 555.5555555555555, "learning_rate": 8.787804579404922e-07, "loss": 1.934, "step": 9000 }, { "epoch": 556.7901234567901, "learning_rate": 8.78504958981266e-07, "loss": 1.9294, "step": 9020 }, { "epoch": 558.0246913580247, "learning_rate": 8.782294600220397e-07, "loss": 2.0228, "step": 9040 }, { "epoch": 559.2592592592592, "learning_rate": 8.779539610628137e-07, "loss": 2.0074, "step": 9060 }, { "epoch": 560.4938271604939, "learning_rate": 8.776784621035876e-07, "loss": 1.8819, "step": 9080 }, { "epoch": 561.7283950617284, "learning_rate": 8.774029631443614e-07, "loss": 1.8354, "step": 9100 }, { "epoch": 562.9629629629629, "learning_rate": 8.771274641851353e-07, "loss": 1.8683, "step": 9120 }, { "epoch": 564.1975308641976, "learning_rate": 8.768519652259091e-07, "loss": 1.8644, "step": 9140 }, { "epoch": 565.4320987654321, "learning_rate": 8.76576466266683e-07, "loss": 1.8822, "step": 9160 }, { "epoch": 566.6666666666666, "learning_rate": 8.763009673074568e-07, "loss": 1.8544, "step": 9180 }, { "epoch": 567.9012345679013, "learning_rate": 8.760254683482306e-07, "loss": 1.8507, "step": 9200 }, { "epoch": 569.1358024691358, "learning_rate": 8.757499693890045e-07, "loss": 1.8816, "step": 9220 }, { "epoch": 570.3703703703703, "learning_rate": 8.754744704297783e-07, "loss": 1.7993, "step": 9240 }, { "epoch": 571.604938271605, "learning_rate": 8.751989714705522e-07, "loss": 1.8231, "step": 9260 }, { "epoch": 572.8395061728395, "learning_rate": 8.74923472511326e-07, "loss": 1.8354, "step": 9280 }, { "epoch": 574.074074074074, "learning_rate": 8.746479735520998e-07, "loss": 1.8731, "step": 9300 }, { "epoch": 575.3086419753087, "learning_rate": 8.743724745928737e-07, "loss": 1.8377, "step": 9320 }, { "epoch": 576.5432098765432, "learning_rate": 8.740969756336475e-07, "loss": 1.8211, "step": 9340 }, { "epoch": 577.7777777777778, "learning_rate": 8.738214766744214e-07, "loss": 1.8321, "step": 9360 }, { "epoch": 579.0123456790124, "learning_rate": 8.735459777151952e-07, "loss": 1.8091, "step": 9380 }, { "epoch": 580.2469135802469, "learning_rate": 8.732704787559691e-07, "loss": 1.7772, "step": 9400 }, { "epoch": 581.4814814814815, "learning_rate": 8.72994979796743e-07, "loss": 1.7844, "step": 9420 }, { "epoch": 582.716049382716, "learning_rate": 8.727194808375168e-07, "loss": 1.7728, "step": 9440 }, { "epoch": 583.9506172839506, "learning_rate": 8.724439818782906e-07, "loss": 1.7777, "step": 9460 }, { "epoch": 585.1851851851852, "learning_rate": 8.721684829190644e-07, "loss": 1.7895, "step": 9480 }, { "epoch": 586.4197530864197, "learning_rate": 8.718929839598383e-07, "loss": 1.7949, "step": 9500 }, { "epoch": 587.6543209876543, "learning_rate": 8.716174850006122e-07, "loss": 1.7314, "step": 9520 }, { "epoch": 588.8888888888889, "learning_rate": 8.713419860413861e-07, "loss": 1.7573, "step": 9540 }, { "epoch": 590.1234567901234, "learning_rate": 8.710664870821599e-07, "loss": 1.7663, "step": 9560 }, { "epoch": 591.358024691358, "learning_rate": 8.707909881229337e-07, "loss": 1.8203, "step": 9580 }, { "epoch": 592.5925925925926, "learning_rate": 8.705154891637076e-07, "loss": 1.8092, "step": 9600 }, { "epoch": 593.8271604938271, "learning_rate": 8.702399902044814e-07, "loss": 1.7717, "step": 9620 }, { "epoch": 595.0617283950618, "learning_rate": 8.699644912452553e-07, "loss": 1.7992, "step": 9640 }, { "epoch": 596.2962962962963, "learning_rate": 8.69688992286029e-07, "loss": 1.7772, "step": 9660 }, { "epoch": 597.5308641975308, "learning_rate": 8.69413493326803e-07, "loss": 1.7884, "step": 9680 }, { "epoch": 598.7654320987655, "learning_rate": 8.691379943675769e-07, "loss": 1.7618, "step": 9700 }, { "epoch": 600.0, "learning_rate": 8.688624954083506e-07, "loss": 1.7789, "step": 9720 }, { "epoch": 601.2345679012345, "learning_rate": 8.685869964491245e-07, "loss": 1.7624, "step": 9740 }, { "epoch": 602.4691358024692, "learning_rate": 8.683114974898983e-07, "loss": 1.7588, "step": 9760 }, { "epoch": 603.7037037037037, "learning_rate": 8.680359985306722e-07, "loss": 1.793, "step": 9780 }, { "epoch": 604.9382716049382, "learning_rate": 8.677604995714461e-07, "loss": 1.7527, "step": 9800 }, { "epoch": 606.1728395061729, "learning_rate": 8.674850006122198e-07, "loss": 1.7472, "step": 9820 }, { "epoch": 607.4074074074074, "learning_rate": 8.672095016529937e-07, "loss": 1.8087, "step": 9840 }, { "epoch": 608.641975308642, "learning_rate": 8.669340026937675e-07, "loss": 1.7727, "step": 9860 }, { "epoch": 609.8765432098766, "learning_rate": 8.666585037345414e-07, "loss": 1.7672, "step": 9880 }, { "epoch": 611.1111111111111, "learning_rate": 8.663830047753152e-07, "loss": 1.7614, "step": 9900 }, { "epoch": 612.3456790123457, "learning_rate": 8.661075058160891e-07, "loss": 1.7775, "step": 9920 }, { "epoch": 613.5802469135803, "learning_rate": 8.65832006856863e-07, "loss": 1.7512, "step": 9940 }, { "epoch": 614.8148148148148, "learning_rate": 8.655565078976368e-07, "loss": 1.7858, "step": 9960 }, { "epoch": 616.0493827160494, "learning_rate": 8.652810089384107e-07, "loss": 1.8064, "step": 9980 }, { "epoch": 617.283950617284, "learning_rate": 8.650055099791844e-07, "loss": 1.7649, "step": 10000 }, { "epoch": 618.5185185185185, "learning_rate": 8.647300110199584e-07, "loss": 1.8184, "step": 10020 }, { "epoch": 619.7530864197531, "learning_rate": 8.644545120607322e-07, "loss": 1.7532, "step": 10040 }, { "epoch": 620.9876543209876, "learning_rate": 8.64179013101506e-07, "loss": 1.7751, "step": 10060 }, { "epoch": 622.2222222222222, "learning_rate": 8.639035141422799e-07, "loss": 1.7995, "step": 10080 }, { "epoch": 623.4567901234568, "learning_rate": 8.636280151830536e-07, "loss": 1.7737, "step": 10100 }, { "epoch": 624.6913580246913, "learning_rate": 8.633525162238275e-07, "loss": 1.7962, "step": 10120 }, { "epoch": 625.925925925926, "learning_rate": 8.630770172646013e-07, "loss": 1.8228, "step": 10140 }, { "epoch": 627.1604938271605, "learning_rate": 8.628015183053752e-07, "loss": 1.8136, "step": 10160 }, { "epoch": 628.395061728395, "learning_rate": 8.625260193461491e-07, "loss": 1.7708, "step": 10180 }, { "epoch": 629.6296296296297, "learning_rate": 8.622505203869229e-07, "loss": 1.7872, "step": 10200 }, { "epoch": 630.8641975308642, "learning_rate": 8.619750214276968e-07, "loss": 1.7869, "step": 10220 }, { "epoch": 632.0987654320987, "learning_rate": 8.616995224684706e-07, "loss": 1.8117, "step": 10240 }, { "epoch": 633.3333333333334, "learning_rate": 8.614240235092445e-07, "loss": 1.7404, "step": 10260 }, { "epoch": 634.5679012345679, "learning_rate": 8.611485245500185e-07, "loss": 1.7706, "step": 10280 }, { "epoch": 635.8024691358024, "learning_rate": 8.608730255907922e-07, "loss": 1.7781, "step": 10300 }, { "epoch": 637.0370370370371, "learning_rate": 8.605975266315661e-07, "loss": 1.798, "step": 10320 }, { "epoch": 638.2716049382716, "learning_rate": 8.603220276723399e-07, "loss": 1.7768, "step": 10340 }, { "epoch": 639.5061728395061, "learning_rate": 8.600465287131137e-07, "loss": 1.7392, "step": 10360 }, { "epoch": 640.7407407407408, "learning_rate": 8.597710297538874e-07, "loss": 1.801, "step": 10380 }, { "epoch": 641.9753086419753, "learning_rate": 8.594955307946614e-07, "loss": 1.8121, "step": 10400 }, { "epoch": 643.2098765432099, "learning_rate": 8.592200318354353e-07, "loss": 1.7606, "step": 10420 }, { "epoch": 644.4444444444445, "learning_rate": 8.589445328762091e-07, "loss": 1.8174, "step": 10440 }, { "epoch": 645.679012345679, "learning_rate": 8.58669033916983e-07, "loss": 1.7442, "step": 10460 }, { "epoch": 646.9135802469136, "learning_rate": 8.583935349577568e-07, "loss": 1.7462, "step": 10480 }, { "epoch": 648.1481481481482, "learning_rate": 8.581180359985307e-07, "loss": 1.7649, "step": 10500 }, { "epoch": 649.3827160493827, "learning_rate": 8.578425370393046e-07, "loss": 1.7519, "step": 10520 }, { "epoch": 650.6172839506173, "learning_rate": 8.575670380800783e-07, "loss": 1.7684, "step": 10540 }, { "epoch": 651.8518518518518, "learning_rate": 8.572915391208522e-07, "loss": 1.7459, "step": 10560 }, { "epoch": 653.0864197530864, "learning_rate": 8.57016040161626e-07, "loss": 1.7555, "step": 10580 }, { "epoch": 654.320987654321, "learning_rate": 8.567405412023999e-07, "loss": 1.7332, "step": 10600 }, { "epoch": 655.5555555555555, "learning_rate": 8.564650422431737e-07, "loss": 1.7313, "step": 10620 }, { "epoch": 656.7901234567901, "learning_rate": 8.561895432839475e-07, "loss": 1.7064, "step": 10640 }, { "epoch": 658.0246913580247, "learning_rate": 8.559140443247214e-07, "loss": 1.7381, "step": 10660 }, { "epoch": 659.2592592592592, "learning_rate": 8.556385453654952e-07, "loss": 1.7343, "step": 10680 }, { "epoch": 660.4938271604939, "learning_rate": 8.553630464062691e-07, "loss": 1.7368, "step": 10700 }, { "epoch": 661.7283950617284, "learning_rate": 8.550875474470429e-07, "loss": 1.727, "step": 10720 }, { "epoch": 662.9629629629629, "learning_rate": 8.548120484878168e-07, "loss": 1.7363, "step": 10740 }, { "epoch": 664.1975308641976, "learning_rate": 8.545365495285906e-07, "loss": 1.7531, "step": 10760 }, { "epoch": 665.4320987654321, "learning_rate": 8.542610505693645e-07, "loss": 1.7282, "step": 10780 }, { "epoch": 666.6666666666666, "learning_rate": 8.539855516101383e-07, "loss": 1.7276, "step": 10800 }, { "epoch": 667.9012345679013, "learning_rate": 8.537100526509121e-07, "loss": 1.7316, "step": 10820 }, { "epoch": 669.1358024691358, "learning_rate": 8.53434553691686e-07, "loss": 1.7049, "step": 10840 }, { "epoch": 670.3703703703703, "learning_rate": 8.531590547324598e-07, "loss": 1.7183, "step": 10860 }, { "epoch": 671.604938271605, "learning_rate": 8.528835557732337e-07, "loss": 1.7383, "step": 10880 }, { "epoch": 672.8395061728395, "learning_rate": 8.526080568140074e-07, "loss": 1.7376, "step": 10900 }, { "epoch": 674.074074074074, "learning_rate": 8.523325578547813e-07, "loss": 1.7909, "step": 10920 }, { "epoch": 675.3086419753087, "learning_rate": 8.520570588955552e-07, "loss": 1.7334, "step": 10940 }, { "epoch": 676.5432098765432, "learning_rate": 8.51781559936329e-07, "loss": 1.7561, "step": 10960 }, { "epoch": 677.7777777777778, "learning_rate": 8.515060609771029e-07, "loss": 1.6731, "step": 10980 }, { "epoch": 679.0123456790124, "learning_rate": 8.512305620178768e-07, "loss": 1.737, "step": 11000 }, { "epoch": 680.2469135802469, "learning_rate": 8.509550630586507e-07, "loss": 1.7584, "step": 11020 }, { "epoch": 681.4814814814815, "learning_rate": 8.506795640994246e-07, "loss": 1.7177, "step": 11040 }, { "epoch": 682.716049382716, "learning_rate": 8.504040651401984e-07, "loss": 1.7428, "step": 11060 }, { "epoch": 683.9506172839506, "learning_rate": 8.501285661809722e-07, "loss": 1.7631, "step": 11080 }, { "epoch": 685.1851851851852, "learning_rate": 8.49853067221746e-07, "loss": 1.7386, "step": 11100 }, { "epoch": 686.4197530864197, "learning_rate": 8.495775682625199e-07, "loss": 1.7397, "step": 11120 }, { "epoch": 687.6543209876543, "learning_rate": 8.493020693032938e-07, "loss": 1.748, "step": 11140 }, { "epoch": 688.8888888888889, "learning_rate": 8.490265703440676e-07, "loss": 1.7534, "step": 11160 }, { "epoch": 690.1234567901234, "learning_rate": 8.487510713848414e-07, "loss": 1.7246, "step": 11180 }, { "epoch": 691.358024691358, "learning_rate": 8.484755724256152e-07, "loss": 1.7368, "step": 11200 }, { "epoch": 692.5925925925926, "learning_rate": 8.482000734663891e-07, "loss": 1.7433, "step": 11220 }, { "epoch": 693.8271604938271, "learning_rate": 8.479245745071629e-07, "loss": 1.6926, "step": 11240 }, { "epoch": 695.0617283950618, "learning_rate": 8.476490755479368e-07, "loss": 1.7433, "step": 11260 }, { "epoch": 696.2962962962963, "learning_rate": 8.473735765887107e-07, "loss": 1.7399, "step": 11280 }, { "epoch": 697.5308641975308, "learning_rate": 8.470980776294845e-07, "loss": 1.7287, "step": 11300 }, { "epoch": 698.7654320987655, "learning_rate": 8.468225786702584e-07, "loss": 1.724, "step": 11320 }, { "epoch": 700.0, "learning_rate": 8.465470797110321e-07, "loss": 1.7264, "step": 11340 }, { "epoch": 701.2345679012345, "learning_rate": 8.462715807518059e-07, "loss": 1.6885, "step": 11360 }, { "epoch": 702.4691358024692, "learning_rate": 8.459960817925798e-07, "loss": 1.7409, "step": 11380 }, { "epoch": 703.7037037037037, "learning_rate": 8.457205828333537e-07, "loss": 1.7205, "step": 11400 }, { "epoch": 704.9382716049382, "learning_rate": 8.454450838741276e-07, "loss": 1.7151, "step": 11420 }, { "epoch": 706.1728395061729, "learning_rate": 8.451695849149013e-07, "loss": 1.7348, "step": 11440 }, { "epoch": 707.4074074074074, "learning_rate": 8.448940859556752e-07, "loss": 1.7653, "step": 11460 }, { "epoch": 708.641975308642, "learning_rate": 8.44618586996449e-07, "loss": 1.6979, "step": 11480 }, { "epoch": 709.8765432098766, "learning_rate": 8.443430880372229e-07, "loss": 1.7564, "step": 11500 }, { "epoch": 711.1111111111111, "learning_rate": 8.440675890779968e-07, "loss": 1.7528, "step": 11520 }, { "epoch": 712.3456790123457, "learning_rate": 8.437920901187706e-07, "loss": 1.7172, "step": 11540 }, { "epoch": 713.5802469135803, "learning_rate": 8.435165911595445e-07, "loss": 1.7624, "step": 11560 }, { "epoch": 714.8148148148148, "learning_rate": 8.432410922003183e-07, "loss": 1.7049, "step": 11580 }, { "epoch": 716.0493827160494, "learning_rate": 8.429655932410922e-07, "loss": 1.6822, "step": 11600 }, { "epoch": 717.283950617284, "learning_rate": 8.426900942818659e-07, "loss": 1.7359, "step": 11620 }, { "epoch": 718.5185185185185, "learning_rate": 8.424145953226398e-07, "loss": 1.6829, "step": 11640 }, { "epoch": 719.7530864197531, "learning_rate": 8.421390963634137e-07, "loss": 1.7477, "step": 11660 }, { "epoch": 720.9876543209876, "learning_rate": 8.418635974041875e-07, "loss": 1.7161, "step": 11680 }, { "epoch": 722.2222222222222, "learning_rate": 8.415880984449614e-07, "loss": 1.7509, "step": 11700 }, { "epoch": 723.4567901234568, "learning_rate": 8.413125994857352e-07, "loss": 1.7336, "step": 11720 }, { "epoch": 724.6913580246913, "learning_rate": 8.410371005265091e-07, "loss": 1.7179, "step": 11740 }, { "epoch": 725.925925925926, "learning_rate": 8.40761601567283e-07, "loss": 1.7154, "step": 11760 }, { "epoch": 727.1604938271605, "learning_rate": 8.404861026080568e-07, "loss": 1.7359, "step": 11780 }, { "epoch": 728.395061728395, "learning_rate": 8.402106036488307e-07, "loss": 1.7177, "step": 11800 }, { "epoch": 729.6296296296297, "learning_rate": 8.399351046896045e-07, "loss": 1.6956, "step": 11820 }, { "epoch": 730.8641975308642, "learning_rate": 8.396596057303784e-07, "loss": 1.7127, "step": 11840 }, { "epoch": 732.0987654320987, "learning_rate": 8.393841067711522e-07, "loss": 1.7155, "step": 11860 }, { "epoch": 733.3333333333334, "learning_rate": 8.39108607811926e-07, "loss": 1.7437, "step": 11880 }, { "epoch": 734.5679012345679, "learning_rate": 8.388331088526999e-07, "loss": 1.7857, "step": 11900 }, { "epoch": 735.8024691358024, "learning_rate": 8.385576098934737e-07, "loss": 1.7504, "step": 11920 }, { "epoch": 737.0370370370371, "learning_rate": 8.382821109342476e-07, "loss": 1.7573, "step": 11940 }, { "epoch": 738.2716049382716, "learning_rate": 8.380066119750214e-07, "loss": 1.7569, "step": 11960 }, { "epoch": 739.5061728395061, "learning_rate": 8.377311130157952e-07, "loss": 1.7422, "step": 11980 }, { "epoch": 740.7407407407408, "learning_rate": 8.37455614056569e-07, "loss": 1.7292, "step": 12000 }, { "epoch": 741.9753086419753, "learning_rate": 8.371801150973429e-07, "loss": 1.7548, "step": 12020 }, { "epoch": 743.2098765432099, "learning_rate": 8.369046161381168e-07, "loss": 1.7435, "step": 12040 }, { "epoch": 744.4444444444445, "learning_rate": 8.366291171788906e-07, "loss": 1.73, "step": 12060 }, { "epoch": 745.679012345679, "learning_rate": 8.363536182196645e-07, "loss": 1.7321, "step": 12080 }, { "epoch": 746.9135802469136, "learning_rate": 8.360781192604383e-07, "loss": 1.7358, "step": 12100 }, { "epoch": 748.1481481481482, "learning_rate": 8.358026203012122e-07, "loss": 1.7249, "step": 12120 }, { "epoch": 749.3827160493827, "learning_rate": 8.355271213419861e-07, "loss": 1.744, "step": 12140 }, { "epoch": 750.6172839506173, "learning_rate": 8.352516223827598e-07, "loss": 1.7205, "step": 12160 }, { "epoch": 751.8518518518518, "learning_rate": 8.349761234235337e-07, "loss": 1.7333, "step": 12180 }, { "epoch": 753.0864197530864, "learning_rate": 8.347006244643075e-07, "loss": 1.7223, "step": 12200 }, { "epoch": 754.320987654321, "learning_rate": 8.344251255050814e-07, "loss": 1.7959, "step": 12220 }, { "epoch": 755.5555555555555, "learning_rate": 8.341496265458553e-07, "loss": 1.7236, "step": 12240 }, { "epoch": 756.7901234567901, "learning_rate": 8.33874127586629e-07, "loss": 1.6977, "step": 12260 }, { "epoch": 758.0246913580247, "learning_rate": 8.335986286274029e-07, "loss": 1.7903, "step": 12280 }, { "epoch": 759.2592592592592, "learning_rate": 8.333231296681767e-07, "loss": 1.8314, "step": 12300 }, { "epoch": 760.4938271604939, "learning_rate": 8.330476307089506e-07, "loss": 1.7828, "step": 12320 }, { "epoch": 761.7283950617284, "learning_rate": 8.327721317497245e-07, "loss": 1.7989, "step": 12340 }, { "epoch": 762.9629629629629, "learning_rate": 8.324966327904983e-07, "loss": 1.8263, "step": 12360 }, { "epoch": 764.1975308641976, "learning_rate": 8.322211338312722e-07, "loss": 1.7722, "step": 12380 }, { "epoch": 765.4320987654321, "learning_rate": 8.31945634872046e-07, "loss": 1.7722, "step": 12400 }, { "epoch": 766.6666666666666, "learning_rate": 8.316701359128198e-07, "loss": 1.8189, "step": 12420 }, { "epoch": 767.9012345679013, "learning_rate": 8.313946369535936e-07, "loss": 1.8084, "step": 12440 }, { "epoch": 769.1358024691358, "learning_rate": 8.311191379943675e-07, "loss": 1.8459, "step": 12460 }, { "epoch": 770.3703703703703, "learning_rate": 8.308436390351413e-07, "loss": 1.8127, "step": 12480 }, { "epoch": 771.604938271605, "learning_rate": 8.305681400759153e-07, "loss": 1.8462, "step": 12500 }, { "epoch": 772.8395061728395, "learning_rate": 8.302926411166891e-07, "loss": 1.8975, "step": 12520 }, { "epoch": 774.074074074074, "learning_rate": 8.300171421574629e-07, "loss": 1.8836, "step": 12540 }, { "epoch": 775.3086419753087, "learning_rate": 8.297416431982368e-07, "loss": 1.8912, "step": 12560 }, { "epoch": 776.5432098765432, "learning_rate": 8.294661442390106e-07, "loss": 1.962, "step": 12580 }, { "epoch": 777.7777777777778, "learning_rate": 8.291906452797846e-07, "loss": 1.9474, "step": 12600 }, { "epoch": 779.0123456790124, "learning_rate": 8.289151463205584e-07, "loss": 1.9509, "step": 12620 }, { "epoch": 780.2469135802469, "learning_rate": 8.286396473613322e-07, "loss": 1.9723, "step": 12640 }, { "epoch": 781.4814814814815, "learning_rate": 8.283641484021061e-07, "loss": 2.0032, "step": 12660 }, { "epoch": 782.716049382716, "learning_rate": 8.280886494428799e-07, "loss": 2.0202, "step": 12680 }, { "epoch": 783.9506172839506, "learning_rate": 8.278131504836536e-07, "loss": 2.0187, "step": 12700 }, { "epoch": 785.1851851851852, "learning_rate": 8.275376515244275e-07, "loss": 2.0322, "step": 12720 }, { "epoch": 786.4197530864197, "learning_rate": 8.272621525652014e-07, "loss": 1.9868, "step": 12740 }, { "epoch": 787.6543209876543, "learning_rate": 8.269866536059753e-07, "loss": 2.0095, "step": 12760 }, { "epoch": 788.8888888888889, "learning_rate": 8.267111546467491e-07, "loss": 2.0626, "step": 12780 }, { "epoch": 790.1234567901234, "learning_rate": 8.264356556875229e-07, "loss": 1.9852, "step": 12800 }, { "epoch": 791.358024691358, "learning_rate": 8.261601567282967e-07, "loss": 2.1492, "step": 12820 }, { "epoch": 792.5925925925926, "learning_rate": 8.258846577690706e-07, "loss": 2.1298, "step": 12840 }, { "epoch": 793.8271604938271, "learning_rate": 8.256091588098445e-07, "loss": 2.0899, "step": 12860 }, { "epoch": 795.0617283950618, "learning_rate": 8.253336598506183e-07, "loss": 2.1285, "step": 12880 }, { "epoch": 796.2962962962963, "learning_rate": 8.250581608913922e-07, "loss": 2.1273, "step": 12900 }, { "epoch": 797.5308641975308, "learning_rate": 8.24782661932166e-07, "loss": 2.0783, "step": 12920 }, { "epoch": 798.7654320987655, "learning_rate": 8.245071629729399e-07, "loss": 2.0597, "step": 12940 }, { "epoch": 800.0, "learning_rate": 8.242316640137136e-07, "loss": 2.0814, "step": 12960 }, { "epoch": 801.2345679012345, "learning_rate": 8.239561650544875e-07, "loss": 2.0731, "step": 12980 }, { "epoch": 802.4691358024692, "learning_rate": 8.236806660952614e-07, "loss": 2.0179, "step": 13000 }, { "epoch": 803.7037037037037, "learning_rate": 8.234051671360352e-07, "loss": 2.056, "step": 13020 }, { "epoch": 804.9382716049382, "learning_rate": 8.231296681768091e-07, "loss": 1.9941, "step": 13040 }, { "epoch": 806.1728395061729, "learning_rate": 8.228541692175829e-07, "loss": 1.9576, "step": 13060 }, { "epoch": 807.4074074074074, "learning_rate": 8.225786702583567e-07, "loss": 1.9323, "step": 13080 }, { "epoch": 808.641975308642, "learning_rate": 8.223031712991305e-07, "loss": 1.9424, "step": 13100 }, { "epoch": 809.8765432098766, "learning_rate": 8.220276723399044e-07, "loss": 1.9416, "step": 13120 }, { "epoch": 811.1111111111111, "learning_rate": 8.217521733806783e-07, "loss": 1.9588, "step": 13140 }, { "epoch": 812.3456790123457, "learning_rate": 8.214766744214521e-07, "loss": 1.91, "step": 13160 }, { "epoch": 813.5802469135803, "learning_rate": 8.21201175462226e-07, "loss": 1.9721, "step": 13180 }, { "epoch": 814.8148148148148, "learning_rate": 8.209256765029998e-07, "loss": 1.912, "step": 13200 }, { "epoch": 816.0493827160494, "learning_rate": 8.206501775437737e-07, "loss": 1.9495, "step": 13220 }, { "epoch": 817.283950617284, "learning_rate": 8.203746785845476e-07, "loss": 1.9077, "step": 13240 }, { "epoch": 818.5185185185185, "learning_rate": 8.200991796253214e-07, "loss": 1.9338, "step": 13260 }, { "epoch": 819.7530864197531, "learning_rate": 8.198236806660953e-07, "loss": 1.9473, "step": 13280 }, { "epoch": 820.9876543209876, "learning_rate": 8.195481817068691e-07, "loss": 1.9889, "step": 13300 }, { "epoch": 822.2222222222222, "learning_rate": 8.19272682747643e-07, "loss": 1.9169, "step": 13320 }, { "epoch": 823.4567901234568, "learning_rate": 8.189971837884167e-07, "loss": 1.9493, "step": 13340 }, { "epoch": 824.6913580246913, "learning_rate": 8.187216848291906e-07, "loss": 1.9277, "step": 13360 }, { "epoch": 825.925925925926, "learning_rate": 8.184461858699645e-07, "loss": 1.8767, "step": 13380 }, { "epoch": 827.1604938271605, "learning_rate": 8.181706869107383e-07, "loss": 1.9362, "step": 13400 }, { "epoch": 828.395061728395, "learning_rate": 8.178951879515122e-07, "loss": 1.8912, "step": 13420 }, { "epoch": 829.6296296296297, "learning_rate": 8.17619688992286e-07, "loss": 1.9401, "step": 13440 }, { "epoch": 830.8641975308642, "learning_rate": 8.173441900330599e-07, "loss": 1.9745, "step": 13460 }, { "epoch": 832.0987654320987, "learning_rate": 8.170686910738338e-07, "loss": 1.9682, "step": 13480 }, { "epoch": 833.3333333333334, "learning_rate": 8.167931921146075e-07, "loss": 1.8886, "step": 13500 }, { "epoch": 834.5679012345679, "learning_rate": 8.165176931553814e-07, "loss": 1.858, "step": 13520 }, { "epoch": 835.8024691358024, "learning_rate": 8.162421941961552e-07, "loss": 1.8989, "step": 13540 }, { "epoch": 837.0370370370371, "learning_rate": 8.159666952369291e-07, "loss": 1.9077, "step": 13560 }, { "epoch": 838.2716049382716, "learning_rate": 8.156911962777029e-07, "loss": 1.8887, "step": 13580 }, { "epoch": 839.5061728395061, "learning_rate": 8.154156973184767e-07, "loss": 1.8853, "step": 13600 }, { "epoch": 840.7407407407408, "learning_rate": 8.151401983592506e-07, "loss": 1.8671, "step": 13620 }, { "epoch": 841.9753086419753, "learning_rate": 8.148646994000244e-07, "loss": 1.9438, "step": 13640 }, { "epoch": 843.2098765432099, "learning_rate": 8.145892004407983e-07, "loss": 1.9565, "step": 13660 }, { "epoch": 844.4444444444445, "learning_rate": 8.14313701481572e-07, "loss": 1.927, "step": 13680 }, { "epoch": 845.679012345679, "learning_rate": 8.14038202522346e-07, "loss": 1.9143, "step": 13700 }, { "epoch": 846.9135802469136, "learning_rate": 8.137627035631199e-07, "loss": 1.9466, "step": 13720 }, { "epoch": 848.1481481481482, "learning_rate": 8.134872046038937e-07, "loss": 1.9136, "step": 13740 }, { "epoch": 849.3827160493827, "learning_rate": 8.132117056446676e-07, "loss": 1.9526, "step": 13760 }, { "epoch": 850.6172839506173, "learning_rate": 8.129362066854413e-07, "loss": 1.9294, "step": 13780 }, { "epoch": 851.8518518518518, "learning_rate": 8.126607077262152e-07, "loss": 1.9162, "step": 13800 }, { "epoch": 853.0864197530864, "learning_rate": 8.12385208766989e-07, "loss": 1.9758, "step": 13820 }, { "epoch": 854.320987654321, "learning_rate": 8.121097098077629e-07, "loss": 1.9344, "step": 13840 }, { "epoch": 855.5555555555555, "learning_rate": 8.118342108485368e-07, "loss": 1.9263, "step": 13860 }, { "epoch": 856.7901234567901, "learning_rate": 8.115587118893105e-07, "loss": 1.9164, "step": 13880 }, { "epoch": 858.0246913580247, "learning_rate": 8.112832129300844e-07, "loss": 1.9291, "step": 13900 }, { "epoch": 859.2592592592592, "learning_rate": 8.110077139708582e-07, "loss": 1.9555, "step": 13920 }, { "epoch": 860.4938271604939, "learning_rate": 8.107322150116321e-07, "loss": 1.9301, "step": 13940 }, { "epoch": 861.7283950617284, "learning_rate": 8.104567160524059e-07, "loss": 1.9491, "step": 13960 }, { "epoch": 862.9629629629629, "learning_rate": 8.101812170931799e-07, "loss": 1.9694, "step": 13980 }, { "epoch": 864.1975308641976, "learning_rate": 8.099057181339538e-07, "loss": 1.9086, "step": 14000 }, { "epoch": 865.4320987654321, "learning_rate": 8.096302191747276e-07, "loss": 1.9431, "step": 14020 }, { "epoch": 866.6666666666666, "learning_rate": 8.093547202155014e-07, "loss": 1.9224, "step": 14040 }, { "epoch": 867.9012345679013, "learning_rate": 8.090792212562752e-07, "loss": 1.9335, "step": 14060 }, { "epoch": 869.1358024691358, "learning_rate": 8.088037222970491e-07, "loss": 1.9382, "step": 14080 }, { "epoch": 870.3703703703703, "learning_rate": 8.08528223337823e-07, "loss": 1.9268, "step": 14100 }, { "epoch": 871.604938271605, "learning_rate": 8.082527243785968e-07, "loss": 1.9787, "step": 14120 }, { "epoch": 872.8395061728395, "learning_rate": 8.079772254193706e-07, "loss": 1.9271, "step": 14140 }, { "epoch": 874.074074074074, "learning_rate": 8.077017264601444e-07, "loss": 1.9718, "step": 14160 }, { "epoch": 875.3086419753087, "learning_rate": 8.074262275009183e-07, "loss": 1.9799, "step": 14180 }, { "epoch": 876.5432098765432, "learning_rate": 8.071507285416921e-07, "loss": 1.9316, "step": 14200 }, { "epoch": 877.7777777777778, "learning_rate": 8.06875229582466e-07, "loss": 1.8673, "step": 14220 }, { "epoch": 879.0123456790124, "learning_rate": 8.065997306232399e-07, "loss": 1.9195, "step": 14240 }, { "epoch": 880.2469135802469, "learning_rate": 8.063242316640137e-07, "loss": 1.8892, "step": 14260 }, { "epoch": 881.4814814814815, "learning_rate": 8.060487327047876e-07, "loss": 1.9126, "step": 14280 }, { "epoch": 882.716049382716, "learning_rate": 8.057732337455613e-07, "loss": 1.8663, "step": 14300 }, { "epoch": 883.9506172839506, "learning_rate": 8.054977347863352e-07, "loss": 1.9217, "step": 14320 }, { "epoch": 885.1851851851852, "learning_rate": 8.05222235827109e-07, "loss": 1.9216, "step": 14340 }, { "epoch": 886.4197530864197, "learning_rate": 8.049467368678829e-07, "loss": 1.904, "step": 14360 }, { "epoch": 887.6543209876543, "learning_rate": 8.046712379086568e-07, "loss": 1.8645, "step": 14380 }, { "epoch": 888.8888888888889, "learning_rate": 8.043957389494306e-07, "loss": 1.8726, "step": 14400 }, { "epoch": 890.1234567901234, "learning_rate": 8.041202399902044e-07, "loss": 1.9218, "step": 14420 }, { "epoch": 891.358024691358, "learning_rate": 8.038447410309782e-07, "loss": 1.875, "step": 14440 }, { "epoch": 892.5925925925926, "learning_rate": 8.035692420717521e-07, "loss": 1.8218, "step": 14460 }, { "epoch": 893.8271604938271, "learning_rate": 8.03293743112526e-07, "loss": 1.8583, "step": 14480 }, { "epoch": 895.0617283950618, "learning_rate": 8.030182441532998e-07, "loss": 1.8696, "step": 14500 }, { "epoch": 896.2962962962963, "learning_rate": 8.027427451940737e-07, "loss": 1.883, "step": 14520 }, { "epoch": 897.5308641975308, "learning_rate": 8.024672462348475e-07, "loss": 1.9137, "step": 14540 }, { "epoch": 898.7654320987655, "learning_rate": 8.021917472756214e-07, "loss": 1.871, "step": 14560 }, { "epoch": 900.0, "learning_rate": 8.019162483163951e-07, "loss": 1.8984, "step": 14580 }, { "epoch": 901.2345679012345, "learning_rate": 8.01640749357169e-07, "loss": 1.8821, "step": 14600 }, { "epoch": 902.4691358024692, "learning_rate": 8.013652503979429e-07, "loss": 1.9334, "step": 14620 }, { "epoch": 903.7037037037037, "learning_rate": 8.010897514387167e-07, "loss": 1.9117, "step": 14640 }, { "epoch": 904.9382716049382, "learning_rate": 8.008142524794907e-07, "loss": 1.9062, "step": 14660 }, { "epoch": 906.1728395061729, "learning_rate": 8.005387535202643e-07, "loss": 1.8953, "step": 14680 }, { "epoch": 907.4074074074074, "learning_rate": 8.002632545610383e-07, "loss": 1.8603, "step": 14700 }, { "epoch": 908.641975308642, "learning_rate": 7.999877556018122e-07, "loss": 1.8817, "step": 14720 }, { "epoch": 909.8765432098766, "learning_rate": 7.99712256642586e-07, "loss": 1.8897, "step": 14740 }, { "epoch": 911.1111111111111, "learning_rate": 7.994367576833599e-07, "loss": 1.8911, "step": 14760 }, { "epoch": 912.3456790123457, "learning_rate": 7.991612587241337e-07, "loss": 1.9044, "step": 14780 }, { "epoch": 913.5802469135803, "learning_rate": 7.988857597649076e-07, "loss": 1.8823, "step": 14800 }, { "epoch": 914.8148148148148, "learning_rate": 7.986102608056815e-07, "loss": 1.8894, "step": 14820 }, { "epoch": 916.0493827160494, "learning_rate": 7.983347618464552e-07, "loss": 1.8829, "step": 14840 }, { "epoch": 917.283950617284, "learning_rate": 7.980592628872291e-07, "loss": 1.8715, "step": 14860 }, { "epoch": 918.5185185185185, "learning_rate": 7.977837639280029e-07, "loss": 1.8413, "step": 14880 }, { "epoch": 919.7530864197531, "learning_rate": 7.975082649687768e-07, "loss": 1.8672, "step": 14900 }, { "epoch": 920.9876543209876, "learning_rate": 7.972327660095507e-07, "loss": 1.9188, "step": 14920 }, { "epoch": 922.2222222222222, "learning_rate": 7.969572670503245e-07, "loss": 1.8789, "step": 14940 }, { "epoch": 923.4567901234568, "learning_rate": 7.966817680910983e-07, "loss": 1.8792, "step": 14960 }, { "epoch": 924.6913580246913, "learning_rate": 7.964062691318721e-07, "loss": 1.9256, "step": 14980 }, { "epoch": 925.925925925926, "learning_rate": 7.96130770172646e-07, "loss": 1.8604, "step": 15000 }, { "epoch": 927.1604938271605, "learning_rate": 7.958552712134197e-07, "loss": 1.8824, "step": 15020 }, { "epoch": 928.395061728395, "learning_rate": 7.955797722541937e-07, "loss": 1.845, "step": 15040 }, { "epoch": 929.6296296296297, "learning_rate": 7.953042732949675e-07, "loss": 1.8786, "step": 15060 }, { "epoch": 930.8641975308642, "learning_rate": 7.950287743357414e-07, "loss": 1.8718, "step": 15080 }, { "epoch": 932.0987654320987, "learning_rate": 7.947532753765153e-07, "loss": 1.8616, "step": 15100 }, { "epoch": 933.3333333333334, "learning_rate": 7.94477776417289e-07, "loss": 1.8385, "step": 15120 }, { "epoch": 934.5679012345679, "learning_rate": 7.942022774580629e-07, "loss": 1.8861, "step": 15140 }, { "epoch": 935.8024691358024, "learning_rate": 7.939267784988367e-07, "loss": 1.8108, "step": 15160 }, { "epoch": 937.0370370370371, "learning_rate": 7.936512795396106e-07, "loss": 1.8679, "step": 15180 }, { "epoch": 938.2716049382716, "learning_rate": 7.933757805803845e-07, "loss": 1.8664, "step": 15200 }, { "epoch": 939.5061728395061, "learning_rate": 7.931002816211582e-07, "loss": 1.8981, "step": 15220 }, { "epoch": 940.7407407407408, "learning_rate": 7.928247826619321e-07, "loss": 1.8679, "step": 15240 }, { "epoch": 941.9753086419753, "learning_rate": 7.925492837027059e-07, "loss": 1.8222, "step": 15260 }, { "epoch": 943.2098765432099, "learning_rate": 7.922737847434798e-07, "loss": 1.8878, "step": 15280 }, { "epoch": 944.4444444444445, "learning_rate": 7.919982857842536e-07, "loss": 1.9096, "step": 15300 }, { "epoch": 945.679012345679, "learning_rate": 7.917227868250275e-07, "loss": 1.9034, "step": 15320 }, { "epoch": 946.9135802469136, "learning_rate": 7.914472878658014e-07, "loss": 1.8802, "step": 15340 }, { "epoch": 948.1481481481482, "learning_rate": 7.911717889065752e-07, "loss": 1.935, "step": 15360 }, { "epoch": 949.3827160493827, "learning_rate": 7.908962899473489e-07, "loss": 1.8363, "step": 15380 }, { "epoch": 950.6172839506173, "learning_rate": 7.906207909881228e-07, "loss": 1.945, "step": 15400 }, { "epoch": 951.8518518518518, "learning_rate": 7.903452920288967e-07, "loss": 1.8431, "step": 15420 }, { "epoch": 953.0864197530864, "learning_rate": 7.900697930696705e-07, "loss": 1.9474, "step": 15440 }, { "epoch": 954.320987654321, "learning_rate": 7.897942941104444e-07, "loss": 1.8394, "step": 15460 }, { "epoch": 955.5555555555555, "learning_rate": 7.895187951512183e-07, "loss": 1.8496, "step": 15480 }, { "epoch": 956.7901234567901, "learning_rate": 7.892432961919921e-07, "loss": 1.8566, "step": 15500 }, { "epoch": 958.0246913580247, "learning_rate": 7.88967797232766e-07, "loss": 1.8255, "step": 15520 }, { "epoch": 959.2592592592592, "learning_rate": 7.886922982735398e-07, "loss": 1.8211, "step": 15540 }, { "epoch": 960.4938271604939, "learning_rate": 7.884167993143137e-07, "loss": 1.9098, "step": 15560 }, { "epoch": 961.7283950617284, "learning_rate": 7.881413003550876e-07, "loss": 1.8611, "step": 15580 }, { "epoch": 962.9629629629629, "learning_rate": 7.878658013958614e-07, "loss": 1.8489, "step": 15600 }, { "epoch": 964.1975308641976, "learning_rate": 7.875903024366353e-07, "loss": 1.8596, "step": 15620 }, { "epoch": 965.4320987654321, "learning_rate": 7.873148034774091e-07, "loss": 1.8228, "step": 15640 }, { "epoch": 966.6666666666666, "learning_rate": 7.870393045181829e-07, "loss": 1.8544, "step": 15660 }, { "epoch": 967.9012345679013, "learning_rate": 7.867638055589567e-07, "loss": 1.8485, "step": 15680 }, { "epoch": 969.1358024691358, "learning_rate": 7.864883065997306e-07, "loss": 1.8627, "step": 15700 }, { "epoch": 970.3703703703703, "learning_rate": 7.862128076405045e-07, "loss": 1.8513, "step": 15720 }, { "epoch": 971.604938271605, "learning_rate": 7.859373086812783e-07, "loss": 1.8237, "step": 15740 }, { "epoch": 972.8395061728395, "learning_rate": 7.856618097220521e-07, "loss": 1.8937, "step": 15760 }, { "epoch": 974.074074074074, "learning_rate": 7.853863107628259e-07, "loss": 1.8339, "step": 15780 }, { "epoch": 975.3086419753087, "learning_rate": 7.851108118035998e-07, "loss": 1.8441, "step": 15800 }, { "epoch": 976.5432098765432, "learning_rate": 7.848353128443737e-07, "loss": 1.8302, "step": 15820 }, { "epoch": 977.7777777777778, "learning_rate": 7.845598138851475e-07, "loss": 1.8206, "step": 15840 }, { "epoch": 979.0123456790124, "learning_rate": 7.842843149259214e-07, "loss": 1.8555, "step": 15860 }, { "epoch": 980.2469135802469, "learning_rate": 7.840088159666952e-07, "loss": 1.8069, "step": 15880 }, { "epoch": 981.4814814814815, "learning_rate": 7.837333170074691e-07, "loss": 1.8273, "step": 15900 }, { "epoch": 982.716049382716, "learning_rate": 7.834578180482428e-07, "loss": 1.7991, "step": 15920 }, { "epoch": 983.9506172839506, "learning_rate": 7.831823190890167e-07, "loss": 1.8335, "step": 15940 }, { "epoch": 985.1851851851852, "learning_rate": 7.829068201297906e-07, "loss": 1.8476, "step": 15960 }, { "epoch": 986.4197530864197, "learning_rate": 7.826313211705644e-07, "loss": 1.8351, "step": 15980 }, { "epoch": 987.6543209876543, "learning_rate": 7.823558222113382e-07, "loss": 1.8428, "step": 16000 }, { "epoch": 988.8888888888889, "learning_rate": 7.82080323252112e-07, "loss": 1.8176, "step": 16020 }, { "epoch": 990.1234567901234, "learning_rate": 7.818048242928859e-07, "loss": 1.83, "step": 16040 }, { "epoch": 991.358024691358, "learning_rate": 7.815293253336597e-07, "loss": 1.8648, "step": 16060 }, { "epoch": 992.5925925925926, "learning_rate": 7.812538263744336e-07, "loss": 1.8545, "step": 16080 }, { "epoch": 993.8271604938271, "learning_rate": 7.809783274152075e-07, "loss": 1.8217, "step": 16100 }, { "epoch": 995.0617283950618, "learning_rate": 7.807028284559813e-07, "loss": 1.7575, "step": 16120 }, { "epoch": 996.2962962962963, "learning_rate": 7.804273294967552e-07, "loss": 1.7868, "step": 16140 }, { "epoch": 997.5308641975308, "learning_rate": 7.80151830537529e-07, "loss": 1.8232, "step": 16160 }, { "epoch": 998.7654320987655, "learning_rate": 7.798763315783029e-07, "loss": 1.7882, "step": 16180 }, { "epoch": 1000.0, "learning_rate": 7.796008326190768e-07, "loss": 1.8071, "step": 16200 }, { "epoch": 1001.2345679012345, "learning_rate": 7.793253336598506e-07, "loss": 1.8153, "step": 16220 }, { "epoch": 1002.4691358024692, "learning_rate": 7.790498347006245e-07, "loss": 1.7843, "step": 16240 }, { "epoch": 1003.7037037037037, "learning_rate": 7.787743357413984e-07, "loss": 1.8265, "step": 16260 }, { "epoch": 1004.9382716049382, "learning_rate": 7.784988367821722e-07, "loss": 1.8141, "step": 16280 }, { "epoch": 1006.1728395061729, "learning_rate": 7.78223337822946e-07, "loss": 1.8397, "step": 16300 }, { "epoch": 1007.4074074074074, "learning_rate": 7.779478388637198e-07, "loss": 1.844, "step": 16320 }, { "epoch": 1008.641975308642, "learning_rate": 7.776723399044937e-07, "loss": 1.799, "step": 16340 }, { "epoch": 1009.8765432098766, "learning_rate": 7.773968409452675e-07, "loss": 1.8023, "step": 16360 }, { "epoch": 1011.1111111111111, "learning_rate": 7.771213419860414e-07, "loss": 1.8189, "step": 16380 }, { "epoch": 1012.3456790123457, "learning_rate": 7.768458430268152e-07, "loss": 1.7896, "step": 16400 }, { "epoch": 1013.5802469135803, "learning_rate": 7.765703440675891e-07, "loss": 1.7747, "step": 16420 }, { "epoch": 1014.8148148148148, "learning_rate": 7.76294845108363e-07, "loss": 1.8221, "step": 16440 }, { "epoch": 1016.0493827160494, "learning_rate": 7.760193461491367e-07, "loss": 1.8302, "step": 16460 }, { "epoch": 1017.283950617284, "learning_rate": 7.757438471899106e-07, "loss": 1.8115, "step": 16480 }, { "epoch": 1018.5185185185185, "learning_rate": 7.754683482306844e-07, "loss": 1.7948, "step": 16500 }, { "epoch": 1019.7530864197531, "learning_rate": 7.751928492714583e-07, "loss": 1.791, "step": 16520 }, { "epoch": 1020.9876543209876, "learning_rate": 7.749173503122321e-07, "loss": 1.8166, "step": 16540 }, { "epoch": 1022.2222222222222, "learning_rate": 7.746418513530059e-07, "loss": 1.8231, "step": 16560 }, { "epoch": 1023.4567901234568, "learning_rate": 7.743663523937798e-07, "loss": 1.7667, "step": 16580 }, { "epoch": 1024.6913580246915, "learning_rate": 7.740908534345536e-07, "loss": 1.8331, "step": 16600 }, { "epoch": 1025.9259259259259, "learning_rate": 7.738153544753275e-07, "loss": 1.8244, "step": 16620 }, { "epoch": 1027.1604938271605, "learning_rate": 7.735398555161013e-07, "loss": 1.8083, "step": 16640 }, { "epoch": 1028.3950617283951, "learning_rate": 7.732643565568752e-07, "loss": 1.9108, "step": 16660 }, { "epoch": 1029.6296296296296, "learning_rate": 7.729888575976491e-07, "loss": 1.8943, "step": 16680 }, { "epoch": 1030.8641975308642, "learning_rate": 7.727133586384229e-07, "loss": 1.8622, "step": 16700 }, { "epoch": 1032.0987654320988, "learning_rate": 7.724378596791968e-07, "loss": 1.8061, "step": 16720 }, { "epoch": 1033.3333333333333, "learning_rate": 7.721623607199706e-07, "loss": 1.8007, "step": 16740 }, { "epoch": 1034.567901234568, "learning_rate": 7.718868617607444e-07, "loss": 1.8463, "step": 16760 }, { "epoch": 1035.8024691358025, "learning_rate": 7.716113628015182e-07, "loss": 1.8533, "step": 16780 }, { "epoch": 1037.037037037037, "learning_rate": 7.713358638422921e-07, "loss": 1.8287, "step": 16800 }, { "epoch": 1038.2716049382716, "learning_rate": 7.710603648830659e-07, "loss": 1.8153, "step": 16820 }, { "epoch": 1039.5061728395062, "learning_rate": 7.707848659238397e-07, "loss": 1.7903, "step": 16840 }, { "epoch": 1040.7407407407406, "learning_rate": 7.705093669646136e-07, "loss": 1.8491, "step": 16860 }, { "epoch": 1041.9753086419753, "learning_rate": 7.702338680053874e-07, "loss": 1.8319, "step": 16880 }, { "epoch": 1043.20987654321, "learning_rate": 7.699583690461613e-07, "loss": 1.856, "step": 16900 }, { "epoch": 1044.4444444444443, "learning_rate": 7.696828700869351e-07, "loss": 1.8419, "step": 16920 }, { "epoch": 1045.679012345679, "learning_rate": 7.69407371127709e-07, "loss": 1.7753, "step": 16940 }, { "epoch": 1046.9135802469136, "learning_rate": 7.69131872168483e-07, "loss": 1.8129, "step": 16960 }, { "epoch": 1048.148148148148, "learning_rate": 7.688563732092568e-07, "loss": 1.7887, "step": 16980 }, { "epoch": 1049.3827160493827, "learning_rate": 7.685808742500307e-07, "loss": 1.8068, "step": 17000 }, { "epoch": 1050.6172839506173, "learning_rate": 7.683053752908044e-07, "loss": 1.8225, "step": 17020 }, { "epoch": 1051.851851851852, "learning_rate": 7.680298763315783e-07, "loss": 1.7596, "step": 17040 }, { "epoch": 1053.0864197530864, "learning_rate": 7.677543773723522e-07, "loss": 1.8343, "step": 17060 }, { "epoch": 1054.320987654321, "learning_rate": 7.674788784131259e-07, "loss": 1.7608, "step": 17080 }, { "epoch": 1055.5555555555557, "learning_rate": 7.672033794538998e-07, "loss": 1.8318, "step": 17100 }, { "epoch": 1056.79012345679, "learning_rate": 7.669278804946736e-07, "loss": 1.8261, "step": 17120 }, { "epoch": 1058.0246913580247, "learning_rate": 7.666523815354475e-07, "loss": 1.8249, "step": 17140 }, { "epoch": 1059.2592592592594, "learning_rate": 7.663768825762213e-07, "loss": 1.8174, "step": 17160 }, { "epoch": 1060.4938271604938, "learning_rate": 7.661013836169952e-07, "loss": 1.7851, "step": 17180 }, { "epoch": 1061.7283950617284, "learning_rate": 7.658258846577691e-07, "loss": 1.8058, "step": 17200 }, { "epoch": 1062.962962962963, "learning_rate": 7.655503856985429e-07, "loss": 1.8048, "step": 17220 }, { "epoch": 1064.1975308641975, "learning_rate": 7.652748867393169e-07, "loss": 1.7883, "step": 17240 }, { "epoch": 1065.432098765432, "learning_rate": 7.649993877800906e-07, "loss": 1.8055, "step": 17260 }, { "epoch": 1066.6666666666667, "learning_rate": 7.647238888208645e-07, "loss": 1.7948, "step": 17280 }, { "epoch": 1067.9012345679012, "learning_rate": 7.644483898616383e-07, "loss": 1.7925, "step": 17300 }, { "epoch": 1069.1358024691358, "learning_rate": 7.641728909024121e-07, "loss": 1.8294, "step": 17320 }, { "epoch": 1070.3703703703704, "learning_rate": 7.638973919431859e-07, "loss": 1.7673, "step": 17340 }, { "epoch": 1071.6049382716049, "learning_rate": 7.636218929839597e-07, "loss": 1.7872, "step": 17360 }, { "epoch": 1072.8395061728395, "learning_rate": 7.633463940247336e-07, "loss": 1.765, "step": 17380 }, { "epoch": 1074.0740740740741, "learning_rate": 7.630708950655074e-07, "loss": 1.7924, "step": 17400 }, { "epoch": 1075.3086419753085, "learning_rate": 7.627953961062813e-07, "loss": 1.7804, "step": 17420 }, { "epoch": 1076.5432098765432, "learning_rate": 7.625198971470552e-07, "loss": 1.7999, "step": 17440 }, { "epoch": 1077.7777777777778, "learning_rate": 7.62244398187829e-07, "loss": 1.8317, "step": 17460 }, { "epoch": 1079.0123456790122, "learning_rate": 7.619688992286029e-07, "loss": 1.7989, "step": 17480 }, { "epoch": 1080.2469135802469, "learning_rate": 7.616934002693767e-07, "loss": 1.8067, "step": 17500 }, { "epoch": 1081.4814814814815, "learning_rate": 7.614179013101506e-07, "loss": 1.798, "step": 17520 }, { "epoch": 1082.716049382716, "learning_rate": 7.611424023509245e-07, "loss": 1.7895, "step": 17540 }, { "epoch": 1083.9506172839506, "learning_rate": 7.608669033916982e-07, "loss": 1.7702, "step": 17560 }, { "epoch": 1085.1851851851852, "learning_rate": 7.605914044324721e-07, "loss": 1.7835, "step": 17580 }, { "epoch": 1086.4197530864199, "learning_rate": 7.603159054732459e-07, "loss": 1.7743, "step": 17600 }, { "epoch": 1087.6543209876543, "learning_rate": 7.600404065140197e-07, "loss": 1.835, "step": 17620 }, { "epoch": 1088.888888888889, "learning_rate": 7.597649075547935e-07, "loss": 1.7897, "step": 17640 }, { "epoch": 1090.1234567901236, "learning_rate": 7.594894085955674e-07, "loss": 1.7955, "step": 17660 }, { "epoch": 1091.358024691358, "learning_rate": 7.592139096363414e-07, "loss": 1.8113, "step": 17680 }, { "epoch": 1092.5925925925926, "learning_rate": 7.589384106771152e-07, "loss": 1.7714, "step": 17700 }, { "epoch": 1093.8271604938273, "learning_rate": 7.586629117178891e-07, "loss": 1.8433, "step": 17720 }, { "epoch": 1095.0617283950617, "learning_rate": 7.583874127586629e-07, "loss": 1.8022, "step": 17740 }, { "epoch": 1096.2962962962963, "learning_rate": 7.581119137994368e-07, "loss": 1.7671, "step": 17760 }, { "epoch": 1097.530864197531, "learning_rate": 7.578364148402107e-07, "loss": 1.7749, "step": 17780 }, { "epoch": 1098.7654320987654, "learning_rate": 7.575609158809845e-07, "loss": 1.7352, "step": 17800 }, { "epoch": 1100.0, "learning_rate": 7.572854169217584e-07, "loss": 1.7511, "step": 17820 }, { "epoch": 1101.2345679012346, "learning_rate": 7.570099179625321e-07, "loss": 1.7498, "step": 17840 }, { "epoch": 1102.469135802469, "learning_rate": 7.56734419003306e-07, "loss": 1.771, "step": 17860 }, { "epoch": 1103.7037037037037, "learning_rate": 7.564589200440798e-07, "loss": 1.7921, "step": 17880 }, { "epoch": 1104.9382716049383, "learning_rate": 7.561834210848536e-07, "loss": 1.7689, "step": 17900 }, { "epoch": 1106.1728395061727, "learning_rate": 7.559079221256275e-07, "loss": 1.7358, "step": 17920 }, { "epoch": 1107.4074074074074, "learning_rate": 7.556324231664013e-07, "loss": 1.7594, "step": 17940 }, { "epoch": 1108.641975308642, "learning_rate": 7.553569242071753e-07, "loss": 1.7568, "step": 17960 }, { "epoch": 1109.8765432098764, "learning_rate": 7.55081425247949e-07, "loss": 1.7872, "step": 17980 }, { "epoch": 1111.111111111111, "learning_rate": 7.548059262887229e-07, "loss": 1.8052, "step": 18000 }, { "epoch": 1112.3456790123457, "learning_rate": 7.545304273294968e-07, "loss": 1.757, "step": 18020 }, { "epoch": 1113.5802469135801, "learning_rate": 7.542549283702706e-07, "loss": 1.7756, "step": 18040 }, { "epoch": 1114.8148148148148, "learning_rate": 7.539794294110445e-07, "loss": 1.7773, "step": 18060 }, { "epoch": 1116.0493827160494, "learning_rate": 7.537039304518183e-07, "loss": 1.8416, "step": 18080 }, { "epoch": 1117.283950617284, "learning_rate": 7.534284314925921e-07, "loss": 1.7244, "step": 18100 }, { "epoch": 1118.5185185185185, "learning_rate": 7.531529325333659e-07, "loss": 1.7225, "step": 18120 }, { "epoch": 1119.7530864197531, "learning_rate": 7.528774335741398e-07, "loss": 1.7795, "step": 18140 }, { "epoch": 1120.9876543209878, "learning_rate": 7.526019346149136e-07, "loss": 1.768, "step": 18160 }, { "epoch": 1122.2222222222222, "learning_rate": 7.523264356556874e-07, "loss": 1.7673, "step": 18180 }, { "epoch": 1123.4567901234568, "learning_rate": 7.520509366964613e-07, "loss": 1.7372, "step": 18200 }, { "epoch": 1124.6913580246915, "learning_rate": 7.517754377372351e-07, "loss": 1.7272, "step": 18220 }, { "epoch": 1125.9259259259259, "learning_rate": 7.51499938778009e-07, "loss": 1.7696, "step": 18240 }, { "epoch": 1127.1604938271605, "learning_rate": 7.512244398187828e-07, "loss": 1.819, "step": 18260 }, { "epoch": 1128.3950617283951, "learning_rate": 7.509489408595567e-07, "loss": 1.7688, "step": 18280 }, { "epoch": 1129.6296296296296, "learning_rate": 7.506734419003306e-07, "loss": 1.7766, "step": 18300 }, { "epoch": 1130.8641975308642, "learning_rate": 7.503979429411043e-07, "loss": 1.7581, "step": 18320 }, { "epoch": 1132.0987654320988, "learning_rate": 7.501224439818783e-07, "loss": 1.7735, "step": 18340 }, { "epoch": 1133.3333333333333, "learning_rate": 7.498469450226521e-07, "loss": 1.7485, "step": 18360 }, { "epoch": 1134.567901234568, "learning_rate": 7.495714460634259e-07, "loss": 1.7472, "step": 18380 }, { "epoch": 1135.8024691358025, "learning_rate": 7.492959471041998e-07, "loss": 1.749, "step": 18400 }, { "epoch": 1137.037037037037, "learning_rate": 7.490204481449736e-07, "loss": 1.7946, "step": 18420 }, { "epoch": 1138.2716049382716, "learning_rate": 7.487449491857475e-07, "loss": 1.7932, "step": 18440 }, { "epoch": 1139.5061728395062, "learning_rate": 7.484694502265213e-07, "loss": 1.7318, "step": 18460 }, { "epoch": 1140.7407407407406, "learning_rate": 7.481939512672952e-07, "loss": 1.7591, "step": 18480 }, { "epoch": 1141.9753086419753, "learning_rate": 7.47918452308069e-07, "loss": 1.7951, "step": 18500 }, { "epoch": 1143.20987654321, "learning_rate": 7.476429533488429e-07, "loss": 1.7578, "step": 18520 }, { "epoch": 1144.4444444444443, "learning_rate": 7.473674543896168e-07, "loss": 1.7653, "step": 18540 }, { "epoch": 1145.679012345679, "learning_rate": 7.470919554303906e-07, "loss": 1.7731, "step": 18560 }, { "epoch": 1146.9135802469136, "learning_rate": 7.468164564711646e-07, "loss": 1.7214, "step": 18580 }, { "epoch": 1148.148148148148, "learning_rate": 7.465409575119383e-07, "loss": 1.775, "step": 18600 }, { "epoch": 1149.3827160493827, "learning_rate": 7.462654585527122e-07, "loss": 1.7218, "step": 18620 }, { "epoch": 1150.6172839506173, "learning_rate": 7.45989959593486e-07, "loss": 1.7347, "step": 18640 }, { "epoch": 1151.851851851852, "learning_rate": 7.457144606342598e-07, "loss": 1.7236, "step": 18660 }, { "epoch": 1153.0864197530864, "learning_rate": 7.454389616750337e-07, "loss": 1.7785, "step": 18680 }, { "epoch": 1154.320987654321, "learning_rate": 7.451634627158074e-07, "loss": 1.7863, "step": 18700 }, { "epoch": 1155.5555555555557, "learning_rate": 7.448879637565813e-07, "loss": 1.7431, "step": 18720 }, { "epoch": 1156.79012345679, "learning_rate": 7.446124647973551e-07, "loss": 1.8058, "step": 18740 }, { "epoch": 1158.0246913580247, "learning_rate": 7.44336965838129e-07, "loss": 1.7335, "step": 18760 }, { "epoch": 1159.2592592592594, "learning_rate": 7.440614668789029e-07, "loss": 1.7839, "step": 18780 }, { "epoch": 1160.4938271604938, "learning_rate": 7.437859679196767e-07, "loss": 1.7525, "step": 18800 }, { "epoch": 1161.7283950617284, "learning_rate": 7.435104689604506e-07, "loss": 1.7545, "step": 18820 }, { "epoch": 1162.962962962963, "learning_rate": 7.432349700012244e-07, "loss": 1.7494, "step": 18840 }, { "epoch": 1164.1975308641975, "learning_rate": 7.429594710419983e-07, "loss": 1.7774, "step": 18860 }, { "epoch": 1165.432098765432, "learning_rate": 7.426839720827722e-07, "loss": 1.7327, "step": 18880 }, { "epoch": 1166.6666666666667, "learning_rate": 7.424084731235459e-07, "loss": 1.7474, "step": 18900 }, { "epoch": 1167.9012345679012, "learning_rate": 7.421329741643198e-07, "loss": 1.7702, "step": 18920 }, { "epoch": 1169.1358024691358, "learning_rate": 7.418574752050936e-07, "loss": 1.7782, "step": 18940 }, { "epoch": 1170.3703703703704, "learning_rate": 7.415819762458675e-07, "loss": 1.7498, "step": 18960 }, { "epoch": 1171.6049382716049, "learning_rate": 7.413064772866412e-07, "loss": 1.7764, "step": 18980 }, { "epoch": 1172.8395061728395, "learning_rate": 7.410309783274151e-07, "loss": 1.6855, "step": 19000 }, { "epoch": 1174.0740740740741, "learning_rate": 7.40755479368189e-07, "loss": 1.764, "step": 19020 }, { "epoch": 1175.3086419753085, "learning_rate": 7.404799804089627e-07, "loss": 1.7617, "step": 19040 }, { "epoch": 1176.5432098765432, "learning_rate": 7.402044814497367e-07, "loss": 1.7556, "step": 19060 }, { "epoch": 1177.7777777777778, "learning_rate": 7.399289824905105e-07, "loss": 1.7351, "step": 19080 }, { "epoch": 1179.0123456790122, "learning_rate": 7.396534835312844e-07, "loss": 1.7619, "step": 19100 }, { "epoch": 1180.2469135802469, "learning_rate": 7.393779845720582e-07, "loss": 1.7406, "step": 19120 }, { "epoch": 1181.4814814814815, "learning_rate": 7.391024856128321e-07, "loss": 1.7334, "step": 19140 }, { "epoch": 1182.716049382716, "learning_rate": 7.38826986653606e-07, "loss": 1.7695, "step": 19160 }, { "epoch": 1183.9506172839506, "learning_rate": 7.385514876943798e-07, "loss": 1.7869, "step": 19180 }, { "epoch": 1185.1851851851852, "learning_rate": 7.382759887351537e-07, "loss": 1.7892, "step": 19200 }, { "epoch": 1186.4197530864199, "learning_rate": 7.380004897759275e-07, "loss": 1.7485, "step": 19220 }, { "epoch": 1187.6543209876543, "learning_rate": 7.377249908167013e-07, "loss": 1.7163, "step": 19240 }, { "epoch": 1188.888888888889, "learning_rate": 7.374494918574752e-07, "loss": 1.7288, "step": 19260 }, { "epoch": 1190.1234567901236, "learning_rate": 7.37173992898249e-07, "loss": 1.8203, "step": 19280 }, { "epoch": 1191.358024691358, "learning_rate": 7.36898493939023e-07, "loss": 1.7354, "step": 19300 }, { "epoch": 1192.5925925925926, "learning_rate": 7.366229949797967e-07, "loss": 1.7581, "step": 19320 }, { "epoch": 1193.8271604938273, "learning_rate": 7.363474960205706e-07, "loss": 1.7342, "step": 19340 }, { "epoch": 1195.0617283950617, "learning_rate": 7.360719970613444e-07, "loss": 1.7549, "step": 19360 }, { "epoch": 1196.2962962962963, "learning_rate": 7.357964981021183e-07, "loss": 1.7604, "step": 19380 }, { "epoch": 1197.530864197531, "learning_rate": 7.355209991428922e-07, "loss": 1.7603, "step": 19400 }, { "epoch": 1198.7654320987654, "learning_rate": 7.35245500183666e-07, "loss": 1.7071, "step": 19420 }, { "epoch": 1200.0, "learning_rate": 7.349700012244398e-07, "loss": 1.7339, "step": 19440 }, { "epoch": 1201.2345679012346, "learning_rate": 7.346945022652136e-07, "loss": 1.7343, "step": 19460 }, { "epoch": 1202.469135802469, "learning_rate": 7.344190033059875e-07, "loss": 1.7962, "step": 19480 }, { "epoch": 1203.7037037037037, "learning_rate": 7.341435043467614e-07, "loss": 1.7555, "step": 19500 }, { "epoch": 1204.9382716049383, "learning_rate": 7.338680053875351e-07, "loss": 1.7909, "step": 19520 }, { "epoch": 1206.1728395061727, "learning_rate": 7.33592506428309e-07, "loss": 1.7417, "step": 19540 }, { "epoch": 1207.4074074074074, "learning_rate": 7.333170074690828e-07, "loss": 1.7785, "step": 19560 }, { "epoch": 1208.641975308642, "learning_rate": 7.330415085098567e-07, "loss": 1.7561, "step": 19580 }, { "epoch": 1209.8765432098764, "learning_rate": 7.327660095506305e-07, "loss": 1.7791, "step": 19600 }, { "epoch": 1211.111111111111, "learning_rate": 7.324905105914044e-07, "loss": 1.7881, "step": 19620 }, { "epoch": 1212.3456790123457, "learning_rate": 7.322150116321783e-07, "loss": 1.8316, "step": 19640 }, { "epoch": 1213.5802469135801, "learning_rate": 7.31939512672952e-07, "loss": 2.3297, "step": 19660 }, { "epoch": 1214.8148148148148, "learning_rate": 7.31664013713726e-07, "loss": 3.0734, "step": 19680 }, { "epoch": 1216.0493827160494, "learning_rate": 7.313885147544998e-07, "loss": 3.3818, "step": 19700 }, { "epoch": 1217.283950617284, "learning_rate": 7.311130157952736e-07, "loss": 3.4044, "step": 19720 }, { "epoch": 1218.5185185185185, "learning_rate": 7.308375168360474e-07, "loss": 3.3076, "step": 19740 }, { "epoch": 1219.7530864197531, "learning_rate": 7.305620178768213e-07, "loss": 3.3052, "step": 19760 }, { "epoch": 1220.9876543209878, "learning_rate": 7.302865189175951e-07, "loss": 3.2461, "step": 19780 }, { "epoch": 1222.2222222222222, "learning_rate": 7.300110199583689e-07, "loss": 3.2139, "step": 19800 }, { "epoch": 1223.4567901234568, "learning_rate": 7.297355209991428e-07, "loss": 3.2006, "step": 19820 }, { "epoch": 1224.6913580246915, "learning_rate": 7.294600220399166e-07, "loss": 3.2578, "step": 19840 }, { "epoch": 1225.9259259259259, "learning_rate": 7.291845230806905e-07, "loss": 3.1139, "step": 19860 }, { "epoch": 1227.1604938271605, "learning_rate": 7.289090241214644e-07, "loss": 3.1, "step": 19880 }, { "epoch": 1228.3950617283951, "learning_rate": 7.286335251622382e-07, "loss": 3.158, "step": 19900 }, { "epoch": 1229.6296296296296, "learning_rate": 7.283580262030121e-07, "loss": 3.1806, "step": 19920 }, { "epoch": 1230.8641975308642, "learning_rate": 7.28082527243786e-07, "loss": 3.0692, "step": 19940 }, { "epoch": 1232.0987654320988, "learning_rate": 7.278070282845599e-07, "loss": 3.1239, "step": 19960 }, { "epoch": 1233.3333333333333, "learning_rate": 7.275315293253336e-07, "loss": 3.1122, "step": 19980 }, { "epoch": 1234.567901234568, "learning_rate": 7.272560303661075e-07, "loss": 3.0677, "step": 20000 }, { "epoch": 1235.8024691358025, "learning_rate": 7.269805314068814e-07, "loss": 3.1123, "step": 20020 }, { "epoch": 1237.037037037037, "learning_rate": 7.267050324476552e-07, "loss": 3.113, "step": 20040 }, { "epoch": 1238.2716049382716, "learning_rate": 7.26429533488429e-07, "loss": 3.0655, "step": 20060 }, { "epoch": 1239.5061728395062, "learning_rate": 7.261540345292028e-07, "loss": 3.1083, "step": 20080 }, { "epoch": 1240.7407407407406, "learning_rate": 7.258785355699767e-07, "loss": 3.0929, "step": 20100 }, { "epoch": 1241.9753086419753, "learning_rate": 7.256030366107506e-07, "loss": 3.0803, "step": 20120 }, { "epoch": 1243.20987654321, "learning_rate": 7.253275376515244e-07, "loss": 3.1016, "step": 20140 }, { "epoch": 1244.4444444444443, "learning_rate": 7.250520386922983e-07, "loss": 3.0868, "step": 20160 }, { "epoch": 1245.679012345679, "learning_rate": 7.247765397330721e-07, "loss": 3.053, "step": 20180 }, { "epoch": 1246.9135802469136, "learning_rate": 7.24501040773846e-07, "loss": 3.0749, "step": 20200 }, { "epoch": 1248.148148148148, "learning_rate": 7.242255418146198e-07, "loss": 3.125, "step": 20220 }, { "epoch": 1249.3827160493827, "learning_rate": 7.239500428553937e-07, "loss": 3.1073, "step": 20240 }, { "epoch": 1250.6172839506173, "learning_rate": 7.236745438961675e-07, "loss": 3.0613, "step": 20260 }, { "epoch": 1251.851851851852, "learning_rate": 7.233990449369414e-07, "loss": 3.0474, "step": 20280 }, { "epoch": 1253.0864197530864, "learning_rate": 7.231235459777152e-07, "loss": 3.0647, "step": 20300 }, { "epoch": 1254.320987654321, "learning_rate": 7.228480470184889e-07, "loss": 3.0641, "step": 20320 }, { "epoch": 1255.5555555555557, "learning_rate": 7.225725480592628e-07, "loss": 3.0684, "step": 20340 }, { "epoch": 1256.79012345679, "learning_rate": 7.222970491000366e-07, "loss": 3.0527, "step": 20360 }, { "epoch": 1258.0246913580247, "learning_rate": 7.220215501408105e-07, "loss": 3.1258, "step": 20380 }, { "epoch": 1259.2592592592594, "learning_rate": 7.217460511815844e-07, "loss": 3.0887, "step": 20400 }, { "epoch": 1260.4938271604938, "learning_rate": 7.214705522223582e-07, "loss": 3.1021, "step": 20420 }, { "epoch": 1261.7283950617284, "learning_rate": 7.211950532631321e-07, "loss": 3.034, "step": 20440 }, { "epoch": 1262.962962962963, "learning_rate": 7.209195543039059e-07, "loss": 3.0488, "step": 20460 }, { "epoch": 1264.1975308641975, "learning_rate": 7.206440553446798e-07, "loss": 3.1048, "step": 20480 }, { "epoch": 1265.432098765432, "learning_rate": 7.203685563854537e-07, "loss": 3.0026, "step": 20500 }, { "epoch": 1266.6666666666667, "learning_rate": 7.200930574262274e-07, "loss": 2.9731, "step": 20520 }, { "epoch": 1267.9012345679012, "learning_rate": 7.198175584670013e-07, "loss": 3.0784, "step": 20540 }, { "epoch": 1269.1358024691358, "learning_rate": 7.195420595077751e-07, "loss": 3.0579, "step": 20560 }, { "epoch": 1270.3703703703704, "learning_rate": 7.19266560548549e-07, "loss": 3.1232, "step": 20580 }, { "epoch": 1271.6049382716049, "learning_rate": 7.189910615893227e-07, "loss": 3.0752, "step": 20600 }, { "epoch": 1272.8395061728395, "learning_rate": 7.187155626300966e-07, "loss": 2.9859, "step": 20620 }, { "epoch": 1274.0740740740741, "learning_rate": 7.184400636708705e-07, "loss": 3.0488, "step": 20640 }, { "epoch": 1275.3086419753085, "learning_rate": 7.181645647116444e-07, "loss": 3.1021, "step": 20660 }, { "epoch": 1276.5432098765432, "learning_rate": 7.178890657524183e-07, "loss": 3.0604, "step": 20680 }, { "epoch": 1277.7777777777778, "learning_rate": 7.176135667931921e-07, "loss": 3.0458, "step": 20700 }, { "epoch": 1279.0123456790122, "learning_rate": 7.17338067833966e-07, "loss": 3.0895, "step": 20720 }, { "epoch": 1280.2469135802469, "learning_rate": 7.170625688747399e-07, "loss": 2.9886, "step": 20740 }, { "epoch": 1281.4814814814815, "learning_rate": 7.167870699155137e-07, "loss": 3.086, "step": 20760 }, { "epoch": 1282.716049382716, "learning_rate": 7.165115709562876e-07, "loss": 3.006, "step": 20780 }, { "epoch": 1283.9506172839506, "learning_rate": 7.162360719970613e-07, "loss": 3.0638, "step": 20800 }, { "epoch": 1285.1851851851852, "learning_rate": 7.159605730378352e-07, "loss": 3.0063, "step": 20820 }, { "epoch": 1286.4197530864199, "learning_rate": 7.15685074078609e-07, "loss": 3.0392, "step": 20840 }, { "epoch": 1287.6543209876543, "learning_rate": 7.154095751193828e-07, "loss": 3.0259, "step": 20860 }, { "epoch": 1288.888888888889, "learning_rate": 7.151340761601567e-07, "loss": 3.0477, "step": 20880 }, { "epoch": 1290.1234567901236, "learning_rate": 7.148585772009305e-07, "loss": 3.0454, "step": 20900 }, { "epoch": 1291.358024691358, "learning_rate": 7.145830782417044e-07, "loss": 3.009, "step": 20920 }, { "epoch": 1292.5925925925926, "learning_rate": 7.143075792824782e-07, "loss": 3.0454, "step": 20940 }, { "epoch": 1293.8271604938273, "learning_rate": 7.140320803232521e-07, "loss": 3.0392, "step": 20960 }, { "epoch": 1295.0617283950617, "learning_rate": 7.13756581364026e-07, "loss": 3.046, "step": 20980 }, { "epoch": 1296.2962962962963, "learning_rate": 7.134810824047998e-07, "loss": 3.0165, "step": 21000 }, { "epoch": 1297.530864197531, "learning_rate": 7.132055834455737e-07, "loss": 3.031, "step": 21020 }, { "epoch": 1298.7654320987654, "learning_rate": 7.129300844863475e-07, "loss": 3.0662, "step": 21040 }, { "epoch": 1300.0, "learning_rate": 7.126545855271213e-07, "loss": 3.0765, "step": 21060 }, { "epoch": 1301.2345679012346, "learning_rate": 7.123790865678951e-07, "loss": 3.014, "step": 21080 }, { "epoch": 1302.469135802469, "learning_rate": 7.12103587608669e-07, "loss": 2.9775, "step": 21100 }, { "epoch": 1303.7037037037037, "learning_rate": 7.118280886494429e-07, "loss": 3.0938, "step": 21120 }, { "epoch": 1304.9382716049383, "learning_rate": 7.115525896902166e-07, "loss": 3.0424, "step": 21140 }, { "epoch": 1306.1728395061727, "learning_rate": 7.112770907309905e-07, "loss": 3.053, "step": 21160 }, { "epoch": 1307.4074074074074, "learning_rate": 7.110015917717643e-07, "loss": 3.0423, "step": 21180 }, { "epoch": 1308.641975308642, "learning_rate": 7.107260928125382e-07, "loss": 3.003, "step": 21200 }, { "epoch": 1309.8765432098764, "learning_rate": 7.10450593853312e-07, "loss": 3.0905, "step": 21220 }, { "epoch": 1311.111111111111, "learning_rate": 7.101750948940859e-07, "loss": 3.0676, "step": 21240 }, { "epoch": 1312.3456790123457, "learning_rate": 7.098995959348598e-07, "loss": 2.9869, "step": 21260 }, { "epoch": 1313.5802469135801, "learning_rate": 7.096240969756336e-07, "loss": 3.082, "step": 21280 }, { "epoch": 1314.8148148148148, "learning_rate": 7.093485980164075e-07, "loss": 2.9242, "step": 21300 }, { "epoch": 1316.0493827160494, "learning_rate": 7.090730990571813e-07, "loss": 3.0285, "step": 21320 }, { "epoch": 1317.283950617284, "learning_rate": 7.087976000979551e-07, "loss": 3.0237, "step": 21340 }, { "epoch": 1318.5185185185185, "learning_rate": 7.085221011387288e-07, "loss": 2.9958, "step": 21360 }, { "epoch": 1319.7530864197531, "learning_rate": 7.082466021795028e-07, "loss": 2.9874, "step": 21380 }, { "epoch": 1320.9876543209878, "learning_rate": 7.079711032202767e-07, "loss": 3.0377, "step": 21400 }, { "epoch": 1322.2222222222222, "learning_rate": 7.076956042610505e-07, "loss": 3.0517, "step": 21420 }, { "epoch": 1323.4567901234568, "learning_rate": 7.074201053018244e-07, "loss": 2.9989, "step": 21440 }, { "epoch": 1324.6913580246915, "learning_rate": 7.071446063425982e-07, "loss": 3.0555, "step": 21460 }, { "epoch": 1325.9259259259259, "learning_rate": 7.068691073833721e-07, "loss": 3.0958, "step": 21480 }, { "epoch": 1327.1604938271605, "learning_rate": 7.06593608424146e-07, "loss": 3.0202, "step": 21500 }, { "epoch": 1328.3950617283951, "learning_rate": 7.063181094649198e-07, "loss": 3.0313, "step": 21520 }, { "epoch": 1329.6296296296296, "learning_rate": 7.060426105056937e-07, "loss": 3.0543, "step": 21540 }, { "epoch": 1330.8641975308642, "learning_rate": 7.057671115464675e-07, "loss": 3.0223, "step": 21560 }, { "epoch": 1332.0987654320988, "learning_rate": 7.054916125872414e-07, "loss": 2.9771, "step": 21580 }, { "epoch": 1333.3333333333333, "learning_rate": 7.052161136280152e-07, "loss": 3.0458, "step": 21600 }, { "epoch": 1334.567901234568, "learning_rate": 7.049406146687891e-07, "loss": 3.0683, "step": 21620 }, { "epoch": 1335.8024691358025, "learning_rate": 7.046651157095629e-07, "loss": 3.0293, "step": 21640 }, { "epoch": 1337.037037037037, "learning_rate": 7.043896167503367e-07, "loss": 3.0793, "step": 21660 }, { "epoch": 1338.2716049382716, "learning_rate": 7.041141177911105e-07, "loss": 3.0518, "step": 21680 }, { "epoch": 1339.5061728395062, "learning_rate": 7.038386188318843e-07, "loss": 2.9899, "step": 21700 }, { "epoch": 1340.7407407407406, "learning_rate": 7.035631198726582e-07, "loss": 3.0019, "step": 21720 }, { "epoch": 1341.9753086419753, "learning_rate": 7.032876209134321e-07, "loss": 2.99, "step": 21740 }, { "epoch": 1343.20987654321, "learning_rate": 7.030121219542059e-07, "loss": 3.0385, "step": 21760 }, { "epoch": 1344.4444444444443, "learning_rate": 7.027366229949798e-07, "loss": 3.0483, "step": 21780 }, { "epoch": 1345.679012345679, "learning_rate": 7.024611240357536e-07, "loss": 3.0463, "step": 21800 }, { "epoch": 1346.9135802469136, "learning_rate": 7.021856250765275e-07, "loss": 3.0503, "step": 21820 }, { "epoch": 1348.148148148148, "learning_rate": 7.019101261173014e-07, "loss": 2.9832, "step": 21840 }, { "epoch": 1349.3827160493827, "learning_rate": 7.016346271580752e-07, "loss": 2.9928, "step": 21860 }, { "epoch": 1350.6172839506173, "learning_rate": 7.01359128198849e-07, "loss": 3.0193, "step": 21880 }, { "epoch": 1351.851851851852, "learning_rate": 7.010836292396228e-07, "loss": 3.0242, "step": 21900 }, { "epoch": 1353.0864197530864, "learning_rate": 7.008081302803967e-07, "loss": 3.0383, "step": 21920 }, { "epoch": 1354.320987654321, "learning_rate": 7.005326313211704e-07, "loss": 3.0641, "step": 21940 }, { "epoch": 1355.5555555555557, "learning_rate": 7.002571323619443e-07, "loss": 2.9852, "step": 21960 }, { "epoch": 1356.79012345679, "learning_rate": 6.999816334027183e-07, "loss": 3.0579, "step": 21980 }, { "epoch": 1358.0246913580247, "learning_rate": 6.99706134443492e-07, "loss": 3.1143, "step": 22000 }, { "epoch": 1359.2592592592594, "learning_rate": 6.994306354842659e-07, "loss": 2.9811, "step": 22020 }, { "epoch": 1360.4938271604938, "learning_rate": 6.991551365250397e-07, "loss": 2.9968, "step": 22040 }, { "epoch": 1361.7283950617284, "learning_rate": 6.988796375658136e-07, "loss": 2.9376, "step": 22060 }, { "epoch": 1362.962962962963, "learning_rate": 6.986041386065874e-07, "loss": 2.7708, "step": 22080 }, { "epoch": 1364.1975308641975, "learning_rate": 6.983286396473613e-07, "loss": 2.7872, "step": 22100 }, { "epoch": 1365.432098765432, "learning_rate": 6.980531406881352e-07, "loss": 2.9491, "step": 22120 }, { "epoch": 1366.6666666666667, "learning_rate": 6.977776417289089e-07, "loss": 2.9494, "step": 22140 }, { "epoch": 1367.9012345679012, "learning_rate": 6.975021427696829e-07, "loss": 2.952, "step": 22160 }, { "epoch": 1369.1358024691358, "learning_rate": 6.972266438104567e-07, "loss": 2.8546, "step": 22180 }, { "epoch": 1370.3703703703704, "learning_rate": 6.969511448512306e-07, "loss": 2.9859, "step": 22200 }, { "epoch": 1371.6049382716049, "learning_rate": 6.966756458920044e-07, "loss": 3.0157, "step": 22220 }, { "epoch": 1372.8395061728395, "learning_rate": 6.964001469327782e-07, "loss": 3.0706, "step": 22240 }, { "epoch": 1374.0740740740741, "learning_rate": 6.961246479735521e-07, "loss": 3.0858, "step": 22260 }, { "epoch": 1375.3086419753085, "learning_rate": 6.958491490143259e-07, "loss": 2.9814, "step": 22280 }, { "epoch": 1376.5432098765432, "learning_rate": 6.955736500550998e-07, "loss": 3.0067, "step": 22300 }, { "epoch": 1377.7777777777778, "learning_rate": 6.952981510958736e-07, "loss": 2.993, "step": 22320 }, { "epoch": 1379.0123456790122, "learning_rate": 6.950226521366475e-07, "loss": 3.0182, "step": 22340 }, { "epoch": 1380.2469135802469, "learning_rate": 6.947471531774214e-07, "loss": 2.9773, "step": 22360 }, { "epoch": 1381.4814814814815, "learning_rate": 6.944716542181952e-07, "loss": 2.9697, "step": 22380 }, { "epoch": 1382.716049382716, "learning_rate": 6.941961552589691e-07, "loss": 3.0496, "step": 22400 }, { "epoch": 1383.9506172839506, "learning_rate": 6.939206562997428e-07, "loss": 3.0031, "step": 22420 }, { "epoch": 1385.1851851851852, "learning_rate": 6.936451573405167e-07, "loss": 3.0418, "step": 22440 }, { "epoch": 1386.4197530864199, "learning_rate": 6.933696583812906e-07, "loss": 3.0117, "step": 22460 }, { "epoch": 1387.6543209876543, "learning_rate": 6.930941594220643e-07, "loss": 2.9879, "step": 22480 }, { "epoch": 1388.888888888889, "learning_rate": 6.928186604628382e-07, "loss": 3.0156, "step": 22500 }, { "epoch": 1390.1234567901236, "learning_rate": 6.92543161503612e-07, "loss": 2.9637, "step": 22520 }, { "epoch": 1391.358024691358, "learning_rate": 6.922676625443859e-07, "loss": 3.0093, "step": 22540 }, { "epoch": 1392.5925925925926, "learning_rate": 6.919921635851597e-07, "loss": 3.0227, "step": 22560 }, { "epoch": 1393.8271604938273, "learning_rate": 6.917166646259336e-07, "loss": 3.0063, "step": 22580 }, { "epoch": 1395.0617283950617, "learning_rate": 6.914411656667076e-07, "loss": 3.0236, "step": 22600 }, { "epoch": 1396.2962962962963, "learning_rate": 6.911656667074813e-07, "loss": 2.9828, "step": 22620 }, { "epoch": 1397.530864197531, "learning_rate": 6.908901677482552e-07, "loss": 3.0113, "step": 22640 }, { "epoch": 1398.7654320987654, "learning_rate": 6.90614668789029e-07, "loss": 3.0021, "step": 22660 }, { "epoch": 1400.0, "learning_rate": 6.903391698298028e-07, "loss": 2.9998, "step": 22680 }, { "epoch": 1401.2345679012346, "learning_rate": 6.900636708705766e-07, "loss": 3.0051, "step": 22700 }, { "epoch": 1402.469135802469, "learning_rate": 6.897881719113505e-07, "loss": 3.0361, "step": 22720 }, { "epoch": 1403.7037037037037, "learning_rate": 6.895126729521244e-07, "loss": 3.0125, "step": 22740 }, { "epoch": 1404.9382716049383, "learning_rate": 6.892371739928981e-07, "loss": 2.9742, "step": 22760 }, { "epoch": 1406.1728395061727, "learning_rate": 6.88961675033672e-07, "loss": 2.9415, "step": 22780 }, { "epoch": 1407.4074074074074, "learning_rate": 6.886861760744458e-07, "loss": 2.9931, "step": 22800 }, { "epoch": 1408.641975308642, "learning_rate": 6.884106771152197e-07, "loss": 2.9967, "step": 22820 }, { "epoch": 1409.8765432098764, "learning_rate": 6.881351781559936e-07, "loss": 2.971, "step": 22840 }, { "epoch": 1411.111111111111, "learning_rate": 6.878596791967674e-07, "loss": 2.9922, "step": 22860 }, { "epoch": 1412.3456790123457, "learning_rate": 6.875841802375413e-07, "loss": 3.0975, "step": 22880 }, { "epoch": 1413.5802469135801, "learning_rate": 6.873086812783151e-07, "loss": 2.9825, "step": 22900 }, { "epoch": 1414.8148148148148, "learning_rate": 6.87033182319089e-07, "loss": 3.023, "step": 22920 }, { "epoch": 1416.0493827160494, "learning_rate": 6.86757683359863e-07, "loss": 3.0236, "step": 22940 }, { "epoch": 1417.283950617284, "learning_rate": 6.864821844006366e-07, "loss": 3.0156, "step": 22960 }, { "epoch": 1418.5185185185185, "learning_rate": 6.862066854414106e-07, "loss": 2.9708, "step": 22980 }, { "epoch": 1419.7530864197531, "learning_rate": 6.859311864821844e-07, "loss": 3.0535, "step": 23000 }, { "epoch": 1420.9876543209878, "learning_rate": 6.856556875229582e-07, "loss": 2.9992, "step": 23020 }, { "epoch": 1422.2222222222222, "learning_rate": 6.85380188563732e-07, "loss": 2.9647, "step": 23040 }, { "epoch": 1423.4567901234568, "learning_rate": 6.851046896045059e-07, "loss": 2.9628, "step": 23060 }, { "epoch": 1424.6913580246915, "learning_rate": 6.848291906452798e-07, "loss": 3.0062, "step": 23080 }, { "epoch": 1425.9259259259259, "learning_rate": 6.845536916860536e-07, "loss": 2.943, "step": 23100 }, { "epoch": 1427.1604938271605, "learning_rate": 6.842781927268275e-07, "loss": 3.0331, "step": 23120 }, { "epoch": 1428.3950617283951, "learning_rate": 6.840026937676013e-07, "loss": 3.0069, "step": 23140 }, { "epoch": 1429.6296296296296, "learning_rate": 6.837271948083752e-07, "loss": 3.1004, "step": 23160 }, { "epoch": 1430.8641975308642, "learning_rate": 6.83451695849149e-07, "loss": 2.9466, "step": 23180 }, { "epoch": 1432.0987654320988, "learning_rate": 6.831761968899229e-07, "loss": 3.0308, "step": 23200 }, { "epoch": 1433.3333333333333, "learning_rate": 6.829006979306967e-07, "loss": 3.0082, "step": 23220 }, { "epoch": 1434.567901234568, "learning_rate": 6.826251989714705e-07, "loss": 2.9983, "step": 23240 }, { "epoch": 1435.8024691358025, "learning_rate": 6.823497000122444e-07, "loss": 2.9632, "step": 23260 }, { "epoch": 1437.037037037037, "learning_rate": 6.820742010530181e-07, "loss": 2.986, "step": 23280 }, { "epoch": 1438.2716049382716, "learning_rate": 6.81798702093792e-07, "loss": 3.0113, "step": 23300 }, { "epoch": 1439.5061728395062, "learning_rate": 6.81523203134566e-07, "loss": 2.9418, "step": 23320 }, { "epoch": 1440.7407407407406, "learning_rate": 6.812477041753397e-07, "loss": 2.9326, "step": 23340 }, { "epoch": 1441.9753086419753, "learning_rate": 6.809722052161136e-07, "loss": 2.9788, "step": 23360 }, { "epoch": 1443.20987654321, "learning_rate": 6.806967062568874e-07, "loss": 3.0468, "step": 23380 }, { "epoch": 1444.4444444444443, "learning_rate": 6.804212072976613e-07, "loss": 3.0129, "step": 23400 }, { "epoch": 1445.679012345679, "learning_rate": 6.801457083384351e-07, "loss": 2.9964, "step": 23420 }, { "epoch": 1446.9135802469136, "learning_rate": 6.79870209379209e-07, "loss": 3.0094, "step": 23440 }, { "epoch": 1448.148148148148, "learning_rate": 6.795947104199829e-07, "loss": 2.9579, "step": 23460 }, { "epoch": 1449.3827160493827, "learning_rate": 6.793192114607567e-07, "loss": 3.0166, "step": 23480 }, { "epoch": 1450.6172839506173, "learning_rate": 6.790437125015305e-07, "loss": 3.0441, "step": 23500 }, { "epoch": 1451.851851851852, "learning_rate": 6.787682135423043e-07, "loss": 2.9766, "step": 23520 }, { "epoch": 1453.0864197530864, "learning_rate": 6.784927145830782e-07, "loss": 3.0122, "step": 23540 }, { "epoch": 1454.320987654321, "learning_rate": 6.782172156238519e-07, "loss": 3.0115, "step": 23560 }, { "epoch": 1455.5555555555557, "learning_rate": 6.779417166646258e-07, "loss": 3.0546, "step": 23580 }, { "epoch": 1456.79012345679, "learning_rate": 6.776662177053997e-07, "loss": 2.9454, "step": 23600 }, { "epoch": 1458.0246913580247, "learning_rate": 6.773907187461735e-07, "loss": 3.0118, "step": 23620 }, { "epoch": 1459.2592592592594, "learning_rate": 6.771152197869475e-07, "loss": 3.0519, "step": 23640 }, { "epoch": 1460.4938271604938, "learning_rate": 6.768397208277213e-07, "loss": 2.9304, "step": 23660 }, { "epoch": 1461.7283950617284, "learning_rate": 6.765642218684952e-07, "loss": 2.9727, "step": 23680 }, { "epoch": 1462.962962962963, "learning_rate": 6.762887229092691e-07, "loss": 2.9836, "step": 23700 }, { "epoch": 1464.1975308641975, "learning_rate": 6.760132239500429e-07, "loss": 3.0426, "step": 23720 }, { "epoch": 1465.432098765432, "learning_rate": 6.757377249908168e-07, "loss": 3.0034, "step": 23740 }, { "epoch": 1466.6666666666667, "learning_rate": 6.754622260315905e-07, "loss": 2.9911, "step": 23760 }, { "epoch": 1467.9012345679012, "learning_rate": 6.751867270723644e-07, "loss": 2.9945, "step": 23780 }, { "epoch": 1469.1358024691358, "learning_rate": 6.749112281131383e-07, "loss": 2.9749, "step": 23800 }, { "epoch": 1470.3703703703704, "learning_rate": 6.74635729153912e-07, "loss": 2.9992, "step": 23820 }, { "epoch": 1471.6049382716049, "learning_rate": 6.743602301946859e-07, "loss": 2.9742, "step": 23840 }, { "epoch": 1472.8395061728395, "learning_rate": 6.740847312354597e-07, "loss": 3.0011, "step": 23860 }, { "epoch": 1474.0740740740741, "learning_rate": 6.738092322762336e-07, "loss": 2.9287, "step": 23880 }, { "epoch": 1475.3086419753085, "learning_rate": 6.735337333170074e-07, "loss": 2.9665, "step": 23900 }, { "epoch": 1476.5432098765432, "learning_rate": 6.732582343577813e-07, "loss": 2.973, "step": 23920 }, { "epoch": 1477.7777777777778, "learning_rate": 6.729827353985553e-07, "loss": 2.9416, "step": 23940 }, { "epoch": 1479.0123456790122, "learning_rate": 6.72707236439329e-07, "loss": 3.0906, "step": 23960 }, { "epoch": 1480.2469135802469, "learning_rate": 6.724317374801029e-07, "loss": 2.9303, "step": 23980 }, { "epoch": 1481.4814814814815, "learning_rate": 6.721562385208767e-07, "loss": 2.9875, "step": 24000 }, { "epoch": 1482.716049382716, "learning_rate": 6.718807395616506e-07, "loss": 2.9577, "step": 24020 }, { "epoch": 1483.9506172839506, "learning_rate": 6.716052406024243e-07, "loss": 3.0099, "step": 24040 }, { "epoch": 1485.1851851851852, "learning_rate": 6.713297416431982e-07, "loss": 2.9635, "step": 24060 }, { "epoch": 1486.4197530864199, "learning_rate": 6.710542426839721e-07, "loss": 3.0368, "step": 24080 }, { "epoch": 1487.6543209876543, "learning_rate": 6.707787437247458e-07, "loss": 2.8998, "step": 24100 }, { "epoch": 1488.888888888889, "learning_rate": 6.705032447655197e-07, "loss": 2.9467, "step": 24120 }, { "epoch": 1490.1234567901236, "learning_rate": 6.702277458062935e-07, "loss": 3.0443, "step": 24140 }, { "epoch": 1491.358024691358, "learning_rate": 6.699522468470674e-07, "loss": 2.9395, "step": 24160 }, { "epoch": 1492.5925925925926, "learning_rate": 6.696767478878413e-07, "loss": 3.0041, "step": 24180 }, { "epoch": 1493.8271604938273, "learning_rate": 6.694012489286151e-07, "loss": 2.9951, "step": 24200 }, { "epoch": 1495.0617283950617, "learning_rate": 6.69125749969389e-07, "loss": 3.0352, "step": 24220 }, { "epoch": 1496.2962962962963, "learning_rate": 6.688502510101628e-07, "loss": 2.9081, "step": 24240 }, { "epoch": 1497.530864197531, "learning_rate": 6.685747520509367e-07, "loss": 3.0379, "step": 24260 }, { "epoch": 1498.7654320987654, "learning_rate": 6.682992530917105e-07, "loss": 2.9549, "step": 24280 }, { "epoch": 1500.0, "learning_rate": 6.680237541324844e-07, "loss": 3.0366, "step": 24300 }, { "epoch": 1501.2345679012346, "learning_rate": 6.677482551732582e-07, "loss": 2.9446, "step": 24320 }, { "epoch": 1502.469135802469, "learning_rate": 6.67472756214032e-07, "loss": 2.9547, "step": 24340 }, { "epoch": 1503.7037037037037, "learning_rate": 6.671972572548059e-07, "loss": 2.9982, "step": 24360 }, { "epoch": 1504.9382716049383, "learning_rate": 6.669217582955797e-07, "loss": 3.0066, "step": 24380 }, { "epoch": 1506.1728395061727, "learning_rate": 6.666462593363536e-07, "loss": 3.0048, "step": 24400 }, { "epoch": 1507.4074074074074, "learning_rate": 6.663707603771275e-07, "loss": 3.0346, "step": 24420 }, { "epoch": 1508.641975308642, "learning_rate": 6.660952614179013e-07, "loss": 2.9453, "step": 24440 }, { "epoch": 1509.8765432098764, "learning_rate": 6.658197624586752e-07, "loss": 2.9708, "step": 24460 }, { "epoch": 1511.111111111111, "learning_rate": 6.65544263499449e-07, "loss": 2.9684, "step": 24480 }, { "epoch": 1512.3456790123457, "learning_rate": 6.652687645402229e-07, "loss": 2.9751, "step": 24500 }, { "epoch": 1513.5802469135801, "learning_rate": 6.649932655809967e-07, "loss": 2.9727, "step": 24520 }, { "epoch": 1514.8148148148148, "learning_rate": 6.647177666217706e-07, "loss": 2.9699, "step": 24540 }, { "epoch": 1516.0493827160494, "learning_rate": 6.644422676625444e-07, "loss": 2.9626, "step": 24560 }, { "epoch": 1517.283950617284, "learning_rate": 6.641667687033182e-07, "loss": 3.0035, "step": 24580 }, { "epoch": 1518.5185185185185, "learning_rate": 6.638912697440921e-07, "loss": 2.9803, "step": 24600 }, { "epoch": 1519.7530864197531, "learning_rate": 6.636157707848659e-07, "loss": 2.9758, "step": 24620 }, { "epoch": 1520.9876543209878, "learning_rate": 6.633402718256397e-07, "loss": 2.9671, "step": 24640 }, { "epoch": 1522.2222222222222, "learning_rate": 6.630647728664134e-07, "loss": 3.0017, "step": 24660 }, { "epoch": 1523.4567901234568, "learning_rate": 6.627892739071874e-07, "loss": 2.9997, "step": 24680 }, { "epoch": 1524.6913580246915, "learning_rate": 6.625137749479613e-07, "loss": 2.9376, "step": 24700 }, { "epoch": 1525.9259259259259, "learning_rate": 6.622382759887351e-07, "loss": 2.9338, "step": 24720 }, { "epoch": 1527.1604938271605, "learning_rate": 6.61962777029509e-07, "loss": 3.0424, "step": 24740 }, { "epoch": 1528.3950617283951, "learning_rate": 6.616872780702828e-07, "loss": 3.0168, "step": 24760 }, { "epoch": 1529.6296296296296, "learning_rate": 6.614117791110567e-07, "loss": 2.9788, "step": 24780 }, { "epoch": 1530.8641975308642, "learning_rate": 6.611362801518306e-07, "loss": 2.968, "step": 24800 }, { "epoch": 1532.0987654320988, "learning_rate": 6.608607811926044e-07, "loss": 3.0261, "step": 24820 }, { "epoch": 1533.3333333333333, "learning_rate": 6.605852822333782e-07, "loss": 3.0065, "step": 24840 }, { "epoch": 1534.567901234568, "learning_rate": 6.60309783274152e-07, "loss": 2.9672, "step": 24860 }, { "epoch": 1535.8024691358025, "learning_rate": 6.600342843149259e-07, "loss": 2.9691, "step": 24880 }, { "epoch": 1537.037037037037, "learning_rate": 6.597587853556996e-07, "loss": 2.9562, "step": 24900 }, { "epoch": 1538.2716049382716, "learning_rate": 6.594832863964735e-07, "loss": 2.9616, "step": 24920 }, { "epoch": 1539.5061728395062, "learning_rate": 6.592077874372474e-07, "loss": 3.0074, "step": 24940 }, { "epoch": 1540.7407407407406, "learning_rate": 6.589322884780212e-07, "loss": 2.9765, "step": 24960 }, { "epoch": 1541.9753086419753, "learning_rate": 6.586567895187951e-07, "loss": 2.9252, "step": 24980 }, { "epoch": 1543.20987654321, "learning_rate": 6.583812905595689e-07, "loss": 2.9334, "step": 25000 }, { "epoch": 1544.4444444444443, "learning_rate": 6.581057916003428e-07, "loss": 2.9412, "step": 25020 }, { "epoch": 1545.679012345679, "learning_rate": 6.578302926411167e-07, "loss": 2.9712, "step": 25040 }, { "epoch": 1546.9135802469136, "learning_rate": 6.575547936818905e-07, "loss": 3.0192, "step": 25060 }, { "epoch": 1548.148148148148, "learning_rate": 6.572792947226644e-07, "loss": 3.0106, "step": 25080 }, { "epoch": 1549.3827160493827, "learning_rate": 6.570037957634381e-07, "loss": 2.9342, "step": 25100 }, { "epoch": 1550.6172839506173, "learning_rate": 6.56728296804212e-07, "loss": 2.9943, "step": 25120 }, { "epoch": 1551.851851851852, "learning_rate": 6.564527978449859e-07, "loss": 2.8885, "step": 25140 }, { "epoch": 1553.0864197530864, "learning_rate": 6.561772988857598e-07, "loss": 2.9782, "step": 25160 }, { "epoch": 1554.320987654321, "learning_rate": 6.559017999265336e-07, "loss": 2.9691, "step": 25180 }, { "epoch": 1555.5555555555557, "learning_rate": 6.556263009673074e-07, "loss": 2.9399, "step": 25200 }, { "epoch": 1556.79012345679, "learning_rate": 6.553508020080813e-07, "loss": 2.9602, "step": 25220 }, { "epoch": 1558.0246913580247, "learning_rate": 6.550753030488551e-07, "loss": 2.9456, "step": 25240 }, { "epoch": 1559.2592592592594, "learning_rate": 6.54799804089629e-07, "loss": 3.0105, "step": 25260 }, { "epoch": 1560.4938271604938, "learning_rate": 6.545243051304027e-07, "loss": 2.9561, "step": 25280 }, { "epoch": 1561.7283950617284, "learning_rate": 6.542488061711767e-07, "loss": 2.9641, "step": 25300 }, { "epoch": 1562.962962962963, "learning_rate": 6.539733072119506e-07, "loss": 3.0078, "step": 25320 }, { "epoch": 1564.1975308641975, "learning_rate": 6.536978082527244e-07, "loss": 2.9818, "step": 25340 }, { "epoch": 1565.432098765432, "learning_rate": 6.534223092934983e-07, "loss": 3.0106, "step": 25360 }, { "epoch": 1566.6666666666667, "learning_rate": 6.53146810334272e-07, "loss": 2.9646, "step": 25380 }, { "epoch": 1567.9012345679012, "learning_rate": 6.528713113750459e-07, "loss": 2.9095, "step": 25400 }, { "epoch": 1569.1358024691358, "learning_rate": 6.525958124158198e-07, "loss": 2.9554, "step": 25420 }, { "epoch": 1570.3703703703704, "learning_rate": 6.523203134565935e-07, "loss": 2.8864, "step": 25440 }, { "epoch": 1571.6049382716049, "learning_rate": 6.520448144973674e-07, "loss": 2.9818, "step": 25460 }, { "epoch": 1572.8395061728395, "learning_rate": 6.517693155381412e-07, "loss": 2.9394, "step": 25480 }, { "epoch": 1574.0740740740741, "learning_rate": 6.514938165789151e-07, "loss": 2.9811, "step": 25500 }, { "epoch": 1575.3086419753085, "learning_rate": 6.51218317619689e-07, "loss": 2.963, "step": 25520 }, { "epoch": 1576.5432098765432, "learning_rate": 6.509428186604628e-07, "loss": 2.9593, "step": 25540 }, { "epoch": 1577.7777777777778, "learning_rate": 6.506673197012367e-07, "loss": 2.9009, "step": 25560 }, { "epoch": 1579.0123456790122, "learning_rate": 6.503918207420105e-07, "loss": 2.9137, "step": 25580 }, { "epoch": 1580.2469135802469, "learning_rate": 6.501163217827844e-07, "loss": 2.8327, "step": 25600 }, { "epoch": 1581.4814814814815, "learning_rate": 6.498408228235582e-07, "loss": 2.9212, "step": 25620 }, { "epoch": 1582.716049382716, "learning_rate": 6.495653238643321e-07, "loss": 2.8549, "step": 25640 }, { "epoch": 1583.9506172839506, "learning_rate": 6.492898249051059e-07, "loss": 2.889, "step": 25660 }, { "epoch": 1585.1851851851852, "learning_rate": 6.490143259458797e-07, "loss": 2.8057, "step": 25680 }, { "epoch": 1586.4197530864199, "learning_rate": 6.487388269866536e-07, "loss": 2.7912, "step": 25700 }, { "epoch": 1587.6543209876543, "learning_rate": 6.484633280274273e-07, "loss": 2.8488, "step": 25720 }, { "epoch": 1588.888888888889, "learning_rate": 6.481878290682012e-07, "loss": 2.8975, "step": 25740 }, { "epoch": 1590.1234567901236, "learning_rate": 6.47912330108975e-07, "loss": 2.7681, "step": 25760 }, { "epoch": 1591.358024691358, "learning_rate": 6.476368311497489e-07, "loss": 2.3341, "step": 25780 }, { "epoch": 1592.5925925925926, "learning_rate": 6.473613321905228e-07, "loss": 2.1036, "step": 25800 }, { "epoch": 1593.8271604938273, "learning_rate": 6.470858332312966e-07, "loss": 2.0129, "step": 25820 }, { "epoch": 1595.0617283950617, "learning_rate": 6.468103342720705e-07, "loss": 2.0491, "step": 25840 }, { "epoch": 1596.2962962962963, "learning_rate": 6.465348353128443e-07, "loss": 2.0864, "step": 25860 }, { "epoch": 1597.530864197531, "learning_rate": 6.462593363536182e-07, "loss": 2.1008, "step": 25880 }, { "epoch": 1598.7654320987654, "learning_rate": 6.459838373943921e-07, "loss": 1.9265, "step": 25900 }, { "epoch": 1600.0, "learning_rate": 6.457083384351659e-07, "loss": 1.8548, "step": 25920 }, { "epoch": 1601.2345679012346, "learning_rate": 6.454328394759398e-07, "loss": 1.8596, "step": 25940 }, { "epoch": 1602.469135802469, "learning_rate": 6.451573405167136e-07, "loss": 1.7976, "step": 25960 }, { "epoch": 1603.7037037037037, "learning_rate": 6.448818415574874e-07, "loss": 1.7973, "step": 25980 }, { "epoch": 1604.9382716049383, "learning_rate": 6.446063425982611e-07, "loss": 1.8429, "step": 26000 }, { "epoch": 1606.1728395061727, "learning_rate": 6.443308436390351e-07, "loss": 1.8667, "step": 26020 }, { "epoch": 1607.4074074074074, "learning_rate": 6.44055344679809e-07, "loss": 1.8682, "step": 26040 }, { "epoch": 1608.641975308642, "learning_rate": 6.437798457205828e-07, "loss": 1.8548, "step": 26060 }, { "epoch": 1609.8765432098764, "learning_rate": 6.435043467613567e-07, "loss": 1.787, "step": 26080 }, { "epoch": 1611.111111111111, "learning_rate": 6.432288478021305e-07, "loss": 1.8336, "step": 26100 }, { "epoch": 1612.3456790123457, "learning_rate": 6.429533488429044e-07, "loss": 1.7849, "step": 26120 }, { "epoch": 1613.5802469135801, "learning_rate": 6.426778498836783e-07, "loss": 1.7917, "step": 26140 }, { "epoch": 1614.8148148148148, "learning_rate": 6.424023509244521e-07, "loss": 1.7625, "step": 26160 }, { "epoch": 1616.0493827160494, "learning_rate": 6.421268519652259e-07, "loss": 1.8147, "step": 26180 }, { "epoch": 1617.283950617284, "learning_rate": 6.418513530059997e-07, "loss": 1.749, "step": 26200 }, { "epoch": 1618.5185185185185, "learning_rate": 6.415758540467736e-07, "loss": 1.7624, "step": 26220 }, { "epoch": 1619.7530864197531, "learning_rate": 6.413003550875474e-07, "loss": 1.7598, "step": 26240 }, { "epoch": 1620.9876543209878, "learning_rate": 6.410248561283212e-07, "loss": 1.7928, "step": 26260 }, { "epoch": 1622.2222222222222, "learning_rate": 6.407493571690951e-07, "loss": 1.7973, "step": 26280 }, { "epoch": 1623.4567901234568, "learning_rate": 6.404738582098689e-07, "loss": 1.7961, "step": 26300 }, { "epoch": 1624.6913580246915, "learning_rate": 6.401983592506428e-07, "loss": 1.811, "step": 26320 }, { "epoch": 1625.9259259259259, "learning_rate": 6.399228602914166e-07, "loss": 1.7704, "step": 26340 }, { "epoch": 1627.1604938271605, "learning_rate": 6.396473613321905e-07, "loss": 1.7649, "step": 26360 }, { "epoch": 1628.3950617283951, "learning_rate": 6.393718623729643e-07, "loss": 1.7882, "step": 26380 }, { "epoch": 1629.6296296296296, "learning_rate": 6.390963634137382e-07, "loss": 1.7661, "step": 26400 }, { "epoch": 1630.8641975308642, "learning_rate": 6.388208644545121e-07, "loss": 1.7519, "step": 26420 }, { "epoch": 1632.0987654320988, "learning_rate": 6.385453654952859e-07, "loss": 1.7378, "step": 26440 }, { "epoch": 1633.3333333333333, "learning_rate": 6.382698665360597e-07, "loss": 1.7445, "step": 26460 }, { "epoch": 1634.567901234568, "learning_rate": 6.379943675768335e-07, "loss": 1.7006, "step": 26480 }, { "epoch": 1635.8024691358025, "learning_rate": 6.377188686176074e-07, "loss": 1.8112, "step": 26500 }, { "epoch": 1637.037037037037, "learning_rate": 6.374433696583811e-07, "loss": 1.7643, "step": 26520 }, { "epoch": 1638.2716049382716, "learning_rate": 6.37167870699155e-07, "loss": 1.7564, "step": 26540 }, { "epoch": 1639.5061728395062, "learning_rate": 6.368923717399289e-07, "loss": 1.7366, "step": 26560 }, { "epoch": 1640.7407407407406, "learning_rate": 6.366168727807027e-07, "loss": 1.7218, "step": 26580 }, { "epoch": 1641.9753086419753, "learning_rate": 6.363413738214766e-07, "loss": 1.7528, "step": 26600 }, { "epoch": 1643.20987654321, "learning_rate": 6.360658748622505e-07, "loss": 1.7528, "step": 26620 }, { "epoch": 1644.4444444444443, "learning_rate": 6.357903759030244e-07, "loss": 1.7733, "step": 26640 }, { "epoch": 1645.679012345679, "learning_rate": 6.355148769437983e-07, "loss": 1.7352, "step": 26660 }, { "epoch": 1646.9135802469136, "learning_rate": 6.352393779845721e-07, "loss": 1.7101, "step": 26680 }, { "epoch": 1648.148148148148, "learning_rate": 6.34963879025346e-07, "loss": 1.7967, "step": 26700 }, { "epoch": 1649.3827160493827, "learning_rate": 6.346883800661197e-07, "loss": 1.7317, "step": 26720 }, { "epoch": 1650.6172839506173, "learning_rate": 6.344128811068936e-07, "loss": 1.766, "step": 26740 }, { "epoch": 1651.851851851852, "learning_rate": 6.341373821476675e-07, "loss": 1.7895, "step": 26760 }, { "epoch": 1653.0864197530864, "learning_rate": 6.338618831884413e-07, "loss": 1.7515, "step": 26780 }, { "epoch": 1654.320987654321, "learning_rate": 6.335863842292151e-07, "loss": 1.7262, "step": 26800 }, { "epoch": 1655.5555555555557, "learning_rate": 6.333108852699889e-07, "loss": 1.7339, "step": 26820 }, { "epoch": 1656.79012345679, "learning_rate": 6.330353863107628e-07, "loss": 1.721, "step": 26840 }, { "epoch": 1658.0246913580247, "learning_rate": 6.327598873515366e-07, "loss": 1.757, "step": 26860 }, { "epoch": 1659.2592592592594, "learning_rate": 6.324843883923105e-07, "loss": 1.7432, "step": 26880 }, { "epoch": 1660.4938271604938, "learning_rate": 6.322088894330844e-07, "loss": 1.7564, "step": 26900 }, { "epoch": 1661.7283950617284, "learning_rate": 6.319333904738582e-07, "loss": 1.7716, "step": 26920 }, { "epoch": 1662.962962962963, "learning_rate": 6.316578915146321e-07, "loss": 1.7049, "step": 26940 }, { "epoch": 1664.1975308641975, "learning_rate": 6.313823925554059e-07, "loss": 1.7469, "step": 26960 }, { "epoch": 1665.432098765432, "learning_rate": 6.311068935961798e-07, "loss": 1.7347, "step": 26980 }, { "epoch": 1666.6666666666667, "learning_rate": 6.308313946369535e-07, "loss": 1.7357, "step": 27000 }, { "epoch": 1667.9012345679012, "learning_rate": 6.305558956777274e-07, "loss": 1.7542, "step": 27020 }, { "epoch": 1669.1358024691358, "learning_rate": 6.302803967185013e-07, "loss": 1.6961, "step": 27040 }, { "epoch": 1670.3703703703704, "learning_rate": 6.30004897759275e-07, "loss": 1.7537, "step": 27060 }, { "epoch": 1671.6049382716049, "learning_rate": 6.297293988000489e-07, "loss": 1.7612, "step": 27080 }, { "epoch": 1672.8395061728395, "learning_rate": 6.294538998408227e-07, "loss": 1.7779, "step": 27100 }, { "epoch": 1674.0740740740741, "learning_rate": 6.291784008815966e-07, "loss": 1.7238, "step": 27120 }, { "epoch": 1675.3086419753085, "learning_rate": 6.289029019223705e-07, "loss": 1.7107, "step": 27140 }, { "epoch": 1676.5432098765432, "learning_rate": 6.286274029631443e-07, "loss": 1.7065, "step": 27160 }, { "epoch": 1677.7777777777778, "learning_rate": 6.283519040039182e-07, "loss": 1.7517, "step": 27180 }, { "epoch": 1679.0123456790122, "learning_rate": 6.28076405044692e-07, "loss": 1.7725, "step": 27200 }, { "epoch": 1680.2469135802469, "learning_rate": 6.278009060854659e-07, "loss": 1.7428, "step": 27220 }, { "epoch": 1681.4814814814815, "learning_rate": 6.275254071262399e-07, "loss": 1.7686, "step": 27240 }, { "epoch": 1682.716049382716, "learning_rate": 6.272499081670135e-07, "loss": 1.7229, "step": 27260 }, { "epoch": 1683.9506172839506, "learning_rate": 6.269744092077874e-07, "loss": 1.7524, "step": 27280 }, { "epoch": 1685.1851851851852, "learning_rate": 6.266989102485612e-07, "loss": 1.7512, "step": 27300 }, { "epoch": 1686.4197530864199, "learning_rate": 6.264234112893351e-07, "loss": 1.7376, "step": 27320 }, { "epoch": 1687.6543209876543, "learning_rate": 6.261479123301089e-07, "loss": 1.7337, "step": 27340 }, { "epoch": 1688.888888888889, "learning_rate": 6.258724133708828e-07, "loss": 1.7469, "step": 27360 }, { "epoch": 1690.1234567901236, "learning_rate": 6.255969144116567e-07, "loss": 1.7249, "step": 27380 }, { "epoch": 1691.358024691358, "learning_rate": 6.253214154524305e-07, "loss": 1.728, "step": 27400 }, { "epoch": 1692.5925925925926, "learning_rate": 6.250459164932044e-07, "loss": 1.7316, "step": 27420 }, { "epoch": 1693.8271604938273, "learning_rate": 6.247704175339782e-07, "loss": 1.7038, "step": 27440 }, { "epoch": 1695.0617283950617, "learning_rate": 6.244949185747521e-07, "loss": 1.7378, "step": 27460 }, { "epoch": 1696.2962962962963, "learning_rate": 6.24219419615526e-07, "loss": 1.7121, "step": 27480 }, { "epoch": 1697.530864197531, "learning_rate": 6.239439206562998e-07, "loss": 1.7308, "step": 27500 }, { "epoch": 1698.7654320987654, "learning_rate": 6.236684216970737e-07, "loss": 1.7108, "step": 27520 }, { "epoch": 1700.0, "learning_rate": 6.233929227378474e-07, "loss": 1.7702, "step": 27540 }, { "epoch": 1701.2345679012346, "learning_rate": 6.231174237786213e-07, "loss": 1.7137, "step": 27560 }, { "epoch": 1702.469135802469, "learning_rate": 6.228419248193951e-07, "loss": 1.7127, "step": 27580 }, { "epoch": 1703.7037037037037, "learning_rate": 6.225664258601689e-07, "loss": 1.7142, "step": 27600 }, { "epoch": 1704.9382716049383, "learning_rate": 6.222909269009427e-07, "loss": 1.771, "step": 27620 }, { "epoch": 1706.1728395061727, "learning_rate": 6.220154279417166e-07, "loss": 1.7219, "step": 27640 }, { "epoch": 1707.4074074074074, "learning_rate": 6.217399289824905e-07, "loss": 1.7772, "step": 27660 }, { "epoch": 1708.641975308642, "learning_rate": 6.214644300232643e-07, "loss": 1.6827, "step": 27680 }, { "epoch": 1709.8765432098764, "learning_rate": 6.211889310640382e-07, "loss": 1.7013, "step": 27700 }, { "epoch": 1711.111111111111, "learning_rate": 6.20913432104812e-07, "loss": 1.7148, "step": 27720 }, { "epoch": 1712.3456790123457, "learning_rate": 6.206379331455859e-07, "loss": 1.7623, "step": 27740 }, { "epoch": 1713.5802469135801, "learning_rate": 6.203624341863598e-07, "loss": 1.7249, "step": 27760 }, { "epoch": 1714.8148148148148, "learning_rate": 6.200869352271336e-07, "loss": 1.7041, "step": 27780 }, { "epoch": 1716.0493827160494, "learning_rate": 6.198114362679074e-07, "loss": 1.6833, "step": 27800 }, { "epoch": 1717.283950617284, "learning_rate": 6.195359373086812e-07, "loss": 1.7555, "step": 27820 }, { "epoch": 1718.5185185185185, "learning_rate": 6.192604383494551e-07, "loss": 1.6939, "step": 27840 }, { "epoch": 1719.7530864197531, "learning_rate": 6.18984939390229e-07, "loss": 1.6779, "step": 27860 }, { "epoch": 1720.9876543209878, "learning_rate": 6.187094404310027e-07, "loss": 1.7628, "step": 27880 }, { "epoch": 1722.2222222222222, "learning_rate": 6.184339414717766e-07, "loss": 1.7266, "step": 27900 }, { "epoch": 1723.4567901234568, "learning_rate": 6.181584425125504e-07, "loss": 1.7034, "step": 27920 }, { "epoch": 1724.6913580246915, "learning_rate": 6.178829435533243e-07, "loss": 1.758, "step": 27940 }, { "epoch": 1725.9259259259259, "learning_rate": 6.176074445940982e-07, "loss": 1.7253, "step": 27960 }, { "epoch": 1727.1604938271605, "learning_rate": 6.17331945634872e-07, "loss": 1.7962, "step": 27980 }, { "epoch": 1728.3950617283951, "learning_rate": 6.170564466756459e-07, "loss": 1.7053, "step": 28000 }, { "epoch": 1729.6296296296296, "learning_rate": 6.167809477164197e-07, "loss": 1.7582, "step": 28020 }, { "epoch": 1730.8641975308642, "learning_rate": 6.165054487571936e-07, "loss": 1.7062, "step": 28040 }, { "epoch": 1732.0987654320988, "learning_rate": 6.162299497979674e-07, "loss": 1.7443, "step": 28060 }, { "epoch": 1733.3333333333333, "learning_rate": 6.159544508387412e-07, "loss": 1.7365, "step": 28080 }, { "epoch": 1734.567901234568, "learning_rate": 6.15678951879515e-07, "loss": 1.6809, "step": 28100 }, { "epoch": 1735.8024691358025, "learning_rate": 6.15403452920289e-07, "loss": 1.7113, "step": 28120 }, { "epoch": 1737.037037037037, "learning_rate": 6.151279539610628e-07, "loss": 1.7568, "step": 28140 }, { "epoch": 1738.2716049382716, "learning_rate": 6.148524550018366e-07, "loss": 1.7214, "step": 28160 }, { "epoch": 1739.5061728395062, "learning_rate": 6.145769560426105e-07, "loss": 1.6456, "step": 28180 }, { "epoch": 1740.7407407407406, "learning_rate": 6.143014570833843e-07, "loss": 1.707, "step": 28200 }, { "epoch": 1741.9753086419753, "learning_rate": 6.140259581241582e-07, "loss": 1.6937, "step": 28220 }, { "epoch": 1743.20987654321, "learning_rate": 6.137504591649321e-07, "loss": 1.7067, "step": 28240 }, { "epoch": 1744.4444444444443, "learning_rate": 6.134749602057059e-07, "loss": 1.726, "step": 28260 }, { "epoch": 1745.679012345679, "learning_rate": 6.131994612464798e-07, "loss": 1.7014, "step": 28280 }, { "epoch": 1746.9135802469136, "learning_rate": 6.129239622872536e-07, "loss": 1.7578, "step": 28300 }, { "epoch": 1748.148148148148, "learning_rate": 6.126484633280275e-07, "loss": 1.6701, "step": 28320 }, { "epoch": 1749.3827160493827, "learning_rate": 6.123729643688012e-07, "loss": 1.6948, "step": 28340 }, { "epoch": 1750.6172839506173, "learning_rate": 6.120974654095751e-07, "loss": 1.699, "step": 28360 }, { "epoch": 1751.851851851852, "learning_rate": 6.11821966450349e-07, "loss": 1.6875, "step": 28380 }, { "epoch": 1753.0864197530864, "learning_rate": 6.115464674911228e-07, "loss": 1.7007, "step": 28400 }, { "epoch": 1754.320987654321, "learning_rate": 6.112709685318966e-07, "loss": 1.7035, "step": 28420 }, { "epoch": 1755.5555555555557, "learning_rate": 6.109954695726704e-07, "loss": 1.6506, "step": 28440 }, { "epoch": 1756.79012345679, "learning_rate": 6.107199706134443e-07, "loss": 1.7308, "step": 28460 }, { "epoch": 1758.0246913580247, "learning_rate": 6.104444716542182e-07, "loss": 1.7238, "step": 28480 }, { "epoch": 1759.2592592592594, "learning_rate": 6.10168972694992e-07, "loss": 1.6749, "step": 28500 }, { "epoch": 1760.4938271604938, "learning_rate": 6.098934737357659e-07, "loss": 1.6481, "step": 28520 }, { "epoch": 1761.7283950617284, "learning_rate": 6.096179747765397e-07, "loss": 1.7056, "step": 28540 }, { "epoch": 1762.962962962963, "learning_rate": 6.093424758173136e-07, "loss": 1.7139, "step": 28560 }, { "epoch": 1764.1975308641975, "learning_rate": 6.090669768580873e-07, "loss": 1.6868, "step": 28580 }, { "epoch": 1765.432098765432, "learning_rate": 6.087914778988613e-07, "loss": 1.7287, "step": 28600 }, { "epoch": 1766.6666666666667, "learning_rate": 6.085159789396351e-07, "loss": 1.676, "step": 28620 }, { "epoch": 1767.9012345679012, "learning_rate": 6.082404799804089e-07, "loss": 1.7063, "step": 28640 }, { "epoch": 1769.1358024691358, "learning_rate": 6.079649810211828e-07, "loss": 1.7094, "step": 28660 }, { "epoch": 1770.3703703703704, "learning_rate": 6.076894820619566e-07, "loss": 1.7092, "step": 28680 }, { "epoch": 1771.6049382716049, "learning_rate": 6.074139831027304e-07, "loss": 1.7185, "step": 28700 }, { "epoch": 1772.8395061728395, "learning_rate": 6.071384841435042e-07, "loss": 1.666, "step": 28720 }, { "epoch": 1774.0740740740741, "learning_rate": 6.068629851842781e-07, "loss": 1.7145, "step": 28740 }, { "epoch": 1775.3086419753085, "learning_rate": 6.06587486225052e-07, "loss": 1.7243, "step": 28760 }, { "epoch": 1776.5432098765432, "learning_rate": 6.063119872658258e-07, "loss": 1.7053, "step": 28780 }, { "epoch": 1777.7777777777778, "learning_rate": 6.060364883065997e-07, "loss": 1.7349, "step": 28800 }, { "epoch": 1779.0123456790122, "learning_rate": 6.057609893473735e-07, "loss": 1.7217, "step": 28820 }, { "epoch": 1780.2469135802469, "learning_rate": 6.054854903881474e-07, "loss": 1.6761, "step": 28840 }, { "epoch": 1781.4814814814815, "learning_rate": 6.052099914289213e-07, "loss": 1.6951, "step": 28860 }, { "epoch": 1782.716049382716, "learning_rate": 6.049344924696951e-07, "loss": 1.7104, "step": 28880 }, { "epoch": 1783.9506172839506, "learning_rate": 6.04658993510469e-07, "loss": 1.6784, "step": 28900 }, { "epoch": 1785.1851851851852, "learning_rate": 6.043834945512428e-07, "loss": 1.6877, "step": 28920 }, { "epoch": 1786.4197530864199, "learning_rate": 6.041079955920167e-07, "loss": 1.6964, "step": 28940 }, { "epoch": 1787.6543209876543, "learning_rate": 6.038324966327904e-07, "loss": 1.6788, "step": 28960 }, { "epoch": 1788.888888888889, "learning_rate": 6.035569976735643e-07, "loss": 1.6828, "step": 28980 }, { "epoch": 1790.1234567901236, "learning_rate": 6.032814987143382e-07, "loss": 1.6915, "step": 29000 }, { "epoch": 1791.358024691358, "learning_rate": 6.03005999755112e-07, "loss": 1.6709, "step": 29020 }, { "epoch": 1792.5925925925926, "learning_rate": 6.027305007958859e-07, "loss": 1.7406, "step": 29040 }, { "epoch": 1793.8271604938273, "learning_rate": 6.024550018366597e-07, "loss": 1.6912, "step": 29060 }, { "epoch": 1795.0617283950617, "learning_rate": 6.021795028774336e-07, "loss": 1.6927, "step": 29080 }, { "epoch": 1796.2962962962963, "learning_rate": 6.019040039182075e-07, "loss": 1.725, "step": 29100 }, { "epoch": 1797.530864197531, "learning_rate": 6.016285049589813e-07, "loss": 1.6578, "step": 29120 }, { "epoch": 1798.7654320987654, "learning_rate": 6.013530059997552e-07, "loss": 1.7392, "step": 29140 }, { "epoch": 1800.0, "learning_rate": 6.010775070405289e-07, "loss": 1.6798, "step": 29160 }, { "epoch": 1801.2345679012346, "learning_rate": 6.008020080813028e-07, "loss": 1.6791, "step": 29180 }, { "epoch": 1802.469135802469, "learning_rate": 6.005265091220766e-07, "loss": 1.6692, "step": 29200 }, { "epoch": 1803.7037037037037, "learning_rate": 6.002510101628504e-07, "loss": 1.6823, "step": 29220 }, { "epoch": 1804.9382716049383, "learning_rate": 5.999755112036243e-07, "loss": 1.6822, "step": 29240 }, { "epoch": 1806.1728395061727, "learning_rate": 5.997000122443981e-07, "loss": 1.6831, "step": 29260 }, { "epoch": 1807.4074074074074, "learning_rate": 5.99424513285172e-07, "loss": 1.7249, "step": 29280 }, { "epoch": 1808.641975308642, "learning_rate": 5.991490143259457e-07, "loss": 1.6903, "step": 29300 }, { "epoch": 1809.8765432098764, "learning_rate": 5.988735153667197e-07, "loss": 1.6981, "step": 29320 }, { "epoch": 1811.111111111111, "learning_rate": 5.985980164074936e-07, "loss": 1.699, "step": 29340 }, { "epoch": 1812.3456790123457, "learning_rate": 5.983225174482674e-07, "loss": 1.6921, "step": 29360 }, { "epoch": 1813.5802469135801, "learning_rate": 5.980470184890413e-07, "loss": 1.69, "step": 29380 }, { "epoch": 1814.8148148148148, "learning_rate": 5.977715195298151e-07, "loss": 1.6803, "step": 29400 }, { "epoch": 1816.0493827160494, "learning_rate": 5.974960205705889e-07, "loss": 1.7028, "step": 29420 }, { "epoch": 1817.283950617284, "learning_rate": 5.972205216113627e-07, "loss": 1.6634, "step": 29440 }, { "epoch": 1818.5185185185185, "learning_rate": 5.969450226521366e-07, "loss": 1.6827, "step": 29460 }, { "epoch": 1819.7530864197531, "learning_rate": 5.966695236929104e-07, "loss": 1.6808, "step": 29480 }, { "epoch": 1820.9876543209878, "learning_rate": 5.963940247336842e-07, "loss": 1.6874, "step": 29500 }, { "epoch": 1822.2222222222222, "learning_rate": 5.961185257744581e-07, "loss": 1.7005, "step": 29520 }, { "epoch": 1823.4567901234568, "learning_rate": 5.958430268152319e-07, "loss": 1.7055, "step": 29540 }, { "epoch": 1824.6913580246915, "learning_rate": 5.955675278560058e-07, "loss": 1.6762, "step": 29560 }, { "epoch": 1825.9259259259259, "learning_rate": 5.952920288967796e-07, "loss": 1.6896, "step": 29580 }, { "epoch": 1827.1604938271605, "learning_rate": 5.950165299375535e-07, "loss": 1.6752, "step": 29600 }, { "epoch": 1828.3950617283951, "learning_rate": 5.947410309783275e-07, "loss": 1.712, "step": 29620 }, { "epoch": 1829.6296296296296, "learning_rate": 5.944655320191013e-07, "loss": 1.6502, "step": 29640 }, { "epoch": 1830.8641975308642, "learning_rate": 5.941900330598752e-07, "loss": 1.7099, "step": 29660 }, { "epoch": 1832.0987654320988, "learning_rate": 5.93914534100649e-07, "loss": 1.6656, "step": 29680 }, { "epoch": 1833.3333333333333, "learning_rate": 5.936390351414228e-07, "loss": 1.6611, "step": 29700 }, { "epoch": 1834.567901234568, "learning_rate": 5.933635361821967e-07, "loss": 1.7461, "step": 29720 }, { "epoch": 1835.8024691358025, "learning_rate": 5.930880372229705e-07, "loss": 1.7029, "step": 29740 }, { "epoch": 1837.037037037037, "learning_rate": 5.928125382637443e-07, "loss": 1.6585, "step": 29760 }, { "epoch": 1838.2716049382716, "learning_rate": 5.925370393045181e-07, "loss": 1.6802, "step": 29780 }, { "epoch": 1839.5061728395062, "learning_rate": 5.92261540345292e-07, "loss": 1.6939, "step": 29800 }, { "epoch": 1840.7407407407406, "learning_rate": 5.919860413860658e-07, "loss": 1.6721, "step": 29820 }, { "epoch": 1841.9753086419753, "learning_rate": 5.917105424268397e-07, "loss": 1.6698, "step": 29840 }, { "epoch": 1843.20987654321, "learning_rate": 5.914350434676136e-07, "loss": 1.7243, "step": 29860 }, { "epoch": 1844.4444444444443, "learning_rate": 5.911595445083874e-07, "loss": 1.6764, "step": 29880 }, { "epoch": 1845.679012345679, "learning_rate": 5.908840455491613e-07, "loss": 1.6299, "step": 29900 }, { "epoch": 1846.9135802469136, "learning_rate": 5.90608546589935e-07, "loss": 1.6761, "step": 29920 }, { "epoch": 1848.148148148148, "learning_rate": 5.90333047630709e-07, "loss": 1.689, "step": 29940 }, { "epoch": 1849.3827160493827, "learning_rate": 5.900575486714828e-07, "loss": 1.6907, "step": 29960 }, { "epoch": 1850.6172839506173, "learning_rate": 5.897820497122566e-07, "loss": 1.6604, "step": 29980 }, { "epoch": 1851.851851851852, "learning_rate": 5.895065507530305e-07, "loss": 1.6979, "step": 30000 }, { "epoch": 1853.0864197530864, "learning_rate": 5.892310517938041e-07, "loss": 1.6912, "step": 30020 }, { "epoch": 1854.320987654321, "learning_rate": 5.889555528345781e-07, "loss": 1.6986, "step": 30040 }, { "epoch": 1855.5555555555557, "learning_rate": 5.886800538753519e-07, "loss": 1.6842, "step": 30060 }, { "epoch": 1856.79012345679, "learning_rate": 5.884045549161258e-07, "loss": 1.653, "step": 30080 }, { "epoch": 1858.0246913580247, "learning_rate": 5.881290559568997e-07, "loss": 1.7042, "step": 30100 }, { "epoch": 1859.2592592592594, "learning_rate": 5.878535569976735e-07, "loss": 1.6651, "step": 30120 }, { "epoch": 1860.4938271604938, "learning_rate": 5.875780580384474e-07, "loss": 1.6653, "step": 30140 }, { "epoch": 1861.7283950617284, "learning_rate": 5.873025590792212e-07, "loss": 1.6434, "step": 30160 }, { "epoch": 1862.962962962963, "learning_rate": 5.870270601199951e-07, "loss": 1.6873, "step": 30180 }, { "epoch": 1864.1975308641975, "learning_rate": 5.86751561160769e-07, "loss": 1.725, "step": 30200 }, { "epoch": 1865.432098765432, "learning_rate": 5.864760622015427e-07, "loss": 1.6511, "step": 30220 }, { "epoch": 1866.6666666666667, "learning_rate": 5.862005632423166e-07, "loss": 1.6844, "step": 30240 }, { "epoch": 1867.9012345679012, "learning_rate": 5.859250642830904e-07, "loss": 1.6549, "step": 30260 }, { "epoch": 1869.1358024691358, "learning_rate": 5.856495653238644e-07, "loss": 1.6959, "step": 30280 }, { "epoch": 1870.3703703703704, "learning_rate": 5.85374066364638e-07, "loss": 1.6674, "step": 30300 }, { "epoch": 1871.6049382716049, "learning_rate": 5.85098567405412e-07, "loss": 1.6911, "step": 30320 }, { "epoch": 1872.8395061728395, "learning_rate": 5.848230684461859e-07, "loss": 1.6571, "step": 30340 }, { "epoch": 1874.0740740740741, "learning_rate": 5.845475694869597e-07, "loss": 1.7023, "step": 30360 }, { "epoch": 1875.3086419753085, "learning_rate": 5.842720705277336e-07, "loss": 1.6806, "step": 30380 }, { "epoch": 1876.5432098765432, "learning_rate": 5.839965715685074e-07, "loss": 1.6539, "step": 30400 }, { "epoch": 1877.7777777777778, "learning_rate": 5.837210726092813e-07, "loss": 1.7039, "step": 30420 }, { "epoch": 1879.0123456790122, "learning_rate": 5.834455736500552e-07, "loss": 1.6855, "step": 30440 }, { "epoch": 1880.2469135802469, "learning_rate": 5.83170074690829e-07, "loss": 1.6723, "step": 30460 }, { "epoch": 1881.4814814814815, "learning_rate": 5.828945757316029e-07, "loss": 1.6378, "step": 30480 }, { "epoch": 1882.716049382716, "learning_rate": 5.826190767723766e-07, "loss": 1.6862, "step": 30500 }, { "epoch": 1883.9506172839506, "learning_rate": 5.823435778131505e-07, "loss": 1.7127, "step": 30520 }, { "epoch": 1885.1851851851852, "learning_rate": 5.820680788539243e-07, "loss": 1.6837, "step": 30540 }, { "epoch": 1886.4197530864199, "learning_rate": 5.817925798946981e-07, "loss": 1.6474, "step": 30560 }, { "epoch": 1887.6543209876543, "learning_rate": 5.81517080935472e-07, "loss": 1.6561, "step": 30580 }, { "epoch": 1888.888888888889, "learning_rate": 5.812415819762458e-07, "loss": 1.6545, "step": 30600 }, { "epoch": 1890.1234567901236, "learning_rate": 5.809660830170197e-07, "loss": 1.6694, "step": 30620 }, { "epoch": 1891.358024691358, "learning_rate": 5.806905840577934e-07, "loss": 1.6651, "step": 30640 }, { "epoch": 1892.5925925925926, "learning_rate": 5.804150850985674e-07, "loss": 1.6752, "step": 30660 }, { "epoch": 1893.8271604938273, "learning_rate": 5.801395861393412e-07, "loss": 1.674, "step": 30680 }, { "epoch": 1895.0617283950617, "learning_rate": 5.798640871801151e-07, "loss": 1.6675, "step": 30700 }, { "epoch": 1896.2962962962963, "learning_rate": 5.79588588220889e-07, "loss": 1.698, "step": 30720 }, { "epoch": 1897.530864197531, "learning_rate": 5.793130892616628e-07, "loss": 1.6722, "step": 30740 }, { "epoch": 1898.7654320987654, "learning_rate": 5.790375903024366e-07, "loss": 1.6899, "step": 30760 }, { "epoch": 1900.0, "learning_rate": 5.787620913432104e-07, "loss": 1.6784, "step": 30780 }, { "epoch": 1901.2345679012346, "learning_rate": 5.784865923839843e-07, "loss": 1.6472, "step": 30800 }, { "epoch": 1902.469135802469, "learning_rate": 5.782110934247582e-07, "loss": 1.6882, "step": 30820 }, { "epoch": 1903.7037037037037, "learning_rate": 5.779355944655319e-07, "loss": 1.6812, "step": 30840 }, { "epoch": 1904.9382716049383, "learning_rate": 5.776600955063058e-07, "loss": 1.6791, "step": 30860 }, { "epoch": 1906.1728395061727, "learning_rate": 5.773845965470796e-07, "loss": 1.6917, "step": 30880 }, { "epoch": 1907.4074074074074, "learning_rate": 5.771090975878535e-07, "loss": 1.6563, "step": 30900 }, { "epoch": 1908.641975308642, "learning_rate": 5.768335986286274e-07, "loss": 1.6402, "step": 30920 }, { "epoch": 1909.8765432098764, "learning_rate": 5.765580996694013e-07, "loss": 1.6835, "step": 30940 }, { "epoch": 1911.111111111111, "learning_rate": 5.762826007101752e-07, "loss": 1.6657, "step": 30960 }, { "epoch": 1912.3456790123457, "learning_rate": 5.76007101750949e-07, "loss": 1.7137, "step": 30980 }, { "epoch": 1913.5802469135801, "learning_rate": 5.757316027917229e-07, "loss": 1.6696, "step": 31000 }, { "epoch": 1914.8148148148148, "learning_rate": 5.754561038324967e-07, "loss": 1.6753, "step": 31020 }, { "epoch": 1916.0493827160494, "learning_rate": 5.751806048732704e-07, "loss": 1.6665, "step": 31040 }, { "epoch": 1917.283950617284, "learning_rate": 5.749051059140442e-07, "loss": 1.6984, "step": 31060 }, { "epoch": 1918.5185185185185, "learning_rate": 5.746296069548181e-07, "loss": 1.6797, "step": 31080 }, { "epoch": 1919.7530864197531, "learning_rate": 5.74354107995592e-07, "loss": 1.6895, "step": 31100 }, { "epoch": 1920.9876543209878, "learning_rate": 5.740786090363658e-07, "loss": 1.6677, "step": 31120 }, { "epoch": 1922.2222222222222, "learning_rate": 5.738031100771397e-07, "loss": 1.6921, "step": 31140 }, { "epoch": 1923.4567901234568, "learning_rate": 5.735276111179135e-07, "loss": 1.6702, "step": 31160 }, { "epoch": 1924.6913580246915, "learning_rate": 5.732521121586874e-07, "loss": 1.6713, "step": 31180 }, { "epoch": 1925.9259259259259, "learning_rate": 5.729766131994613e-07, "loss": 1.6473, "step": 31200 }, { "epoch": 1927.1604938271605, "learning_rate": 5.727011142402351e-07, "loss": 1.6703, "step": 31220 }, { "epoch": 1928.3950617283951, "learning_rate": 5.72425615281009e-07, "loss": 1.6477, "step": 31240 }, { "epoch": 1929.6296296296296, "learning_rate": 5.721501163217828e-07, "loss": 1.6524, "step": 31260 }, { "epoch": 1930.8641975308642, "learning_rate": 5.718746173625567e-07, "loss": 1.6835, "step": 31280 }, { "epoch": 1932.0987654320988, "learning_rate": 5.715991184033304e-07, "loss": 1.7149, "step": 31300 }, { "epoch": 1933.3333333333333, "learning_rate": 5.713236194441043e-07, "loss": 1.6778, "step": 31320 }, { "epoch": 1934.567901234568, "learning_rate": 5.710481204848782e-07, "loss": 1.676, "step": 31340 }, { "epoch": 1935.8024691358025, "learning_rate": 5.70772621525652e-07, "loss": 1.6905, "step": 31360 }, { "epoch": 1937.037037037037, "learning_rate": 5.704971225664258e-07, "loss": 1.687, "step": 31380 }, { "epoch": 1938.2716049382716, "learning_rate": 5.702216236071996e-07, "loss": 1.6852, "step": 31400 }, { "epoch": 1939.5061728395062, "learning_rate": 5.699461246479735e-07, "loss": 1.6914, "step": 31420 }, { "epoch": 1940.7407407407406, "learning_rate": 5.696706256887474e-07, "loss": 1.7071, "step": 31440 }, { "epoch": 1941.9753086419753, "learning_rate": 5.693951267295212e-07, "loss": 1.6584, "step": 31460 }, { "epoch": 1943.20987654321, "learning_rate": 5.691196277702951e-07, "loss": 1.6533, "step": 31480 }, { "epoch": 1944.4444444444443, "learning_rate": 5.688441288110689e-07, "loss": 1.646, "step": 31500 }, { "epoch": 1945.679012345679, "learning_rate": 5.685686298518428e-07, "loss": 1.6853, "step": 31520 }, { "epoch": 1946.9135802469136, "learning_rate": 5.682931308926166e-07, "loss": 1.6666, "step": 31540 }, { "epoch": 1948.148148148148, "learning_rate": 5.680176319333905e-07, "loss": 1.6918, "step": 31560 }, { "epoch": 1949.3827160493827, "learning_rate": 5.677421329741643e-07, "loss": 1.6719, "step": 31580 }, { "epoch": 1950.6172839506173, "learning_rate": 5.674666340149381e-07, "loss": 1.6708, "step": 31600 }, { "epoch": 1951.851851851852, "learning_rate": 5.671911350557119e-07, "loss": 1.6474, "step": 31620 }, { "epoch": 1953.0864197530864, "learning_rate": 5.669156360964857e-07, "loss": 1.6686, "step": 31640 }, { "epoch": 1954.320987654321, "learning_rate": 5.666401371372596e-07, "loss": 1.6534, "step": 31660 }, { "epoch": 1955.5555555555557, "learning_rate": 5.663646381780334e-07, "loss": 1.6554, "step": 31680 }, { "epoch": 1956.79012345679, "learning_rate": 5.660891392188073e-07, "loss": 1.6135, "step": 31700 }, { "epoch": 1958.0246913580247, "learning_rate": 5.658136402595812e-07, "loss": 1.6707, "step": 31720 }, { "epoch": 1959.2592592592594, "learning_rate": 5.65538141300355e-07, "loss": 1.6569, "step": 31740 }, { "epoch": 1960.4938271604938, "learning_rate": 5.652626423411289e-07, "loss": 1.7037, "step": 31760 }, { "epoch": 1961.7283950617284, "learning_rate": 5.649871433819027e-07, "loss": 1.6324, "step": 31780 }, { "epoch": 1962.962962962963, "learning_rate": 5.647116444226766e-07, "loss": 1.666, "step": 31800 }, { "epoch": 1964.1975308641975, "learning_rate": 5.644361454634505e-07, "loss": 1.651, "step": 31820 }, { "epoch": 1965.432098765432, "learning_rate": 5.641606465042243e-07, "loss": 1.6548, "step": 31840 }, { "epoch": 1966.6666666666667, "learning_rate": 5.638851475449982e-07, "loss": 1.6696, "step": 31860 }, { "epoch": 1967.9012345679012, "learning_rate": 5.636096485857721e-07, "loss": 1.7076, "step": 31880 }, { "epoch": 1969.1358024691358, "learning_rate": 5.633341496265459e-07, "loss": 1.702, "step": 31900 }, { "epoch": 1970.3703703703704, "learning_rate": 5.630586506673195e-07, "loss": 1.6458, "step": 31920 }, { "epoch": 1971.6049382716049, "learning_rate": 5.627831517080934e-07, "loss": 1.6834, "step": 31940 }, { "epoch": 1972.8395061728395, "learning_rate": 5.625076527488673e-07, "loss": 1.6328, "step": 31960 }, { "epoch": 1974.0740740740741, "learning_rate": 5.622321537896412e-07, "loss": 1.6697, "step": 31980 }, { "epoch": 1975.3086419753085, "learning_rate": 5.61956654830415e-07, "loss": 1.6517, "step": 32000 }, { "epoch": 1976.5432098765432, "learning_rate": 5.616811558711889e-07, "loss": 1.6757, "step": 32020 }, { "epoch": 1977.7777777777778, "learning_rate": 5.614056569119628e-07, "loss": 1.6457, "step": 32040 }, { "epoch": 1979.0123456790122, "learning_rate": 5.611301579527367e-07, "loss": 1.6244, "step": 32060 }, { "epoch": 1980.2469135802469, "learning_rate": 5.608546589935105e-07, "loss": 1.6646, "step": 32080 }, { "epoch": 1981.4814814814815, "learning_rate": 5.605791600342844e-07, "loss": 1.6297, "step": 32100 }, { "epoch": 1982.716049382716, "learning_rate": 5.603036610750581e-07, "loss": 1.6971, "step": 32120 }, { "epoch": 1983.9506172839506, "learning_rate": 5.60028162115832e-07, "loss": 1.6461, "step": 32140 }, { "epoch": 1985.1851851851852, "learning_rate": 5.597526631566058e-07, "loss": 1.6862, "step": 32160 }, { "epoch": 1986.4197530864199, "learning_rate": 5.594771641973796e-07, "loss": 1.6764, "step": 32180 }, { "epoch": 1987.6543209876543, "learning_rate": 5.592016652381535e-07, "loss": 1.6503, "step": 32200 }, { "epoch": 1988.888888888889, "learning_rate": 5.589261662789273e-07, "loss": 1.6331, "step": 32220 }, { "epoch": 1990.1234567901236, "learning_rate": 5.586506673197012e-07, "loss": 1.6758, "step": 32240 }, { "epoch": 1991.358024691358, "learning_rate": 5.58375168360475e-07, "loss": 1.6388, "step": 32260 }, { "epoch": 1992.5925925925926, "learning_rate": 5.580996694012489e-07, "loss": 1.6492, "step": 32280 }, { "epoch": 1993.8271604938273, "learning_rate": 5.578241704420228e-07, "loss": 1.635, "step": 32300 }, { "epoch": 1995.0617283950617, "learning_rate": 5.575486714827966e-07, "loss": 1.65, "step": 32320 }, { "epoch": 1996.2962962962963, "learning_rate": 5.572731725235705e-07, "loss": 1.6663, "step": 32340 }, { "epoch": 1997.530864197531, "learning_rate": 5.569976735643443e-07, "loss": 1.6435, "step": 32360 }, { "epoch": 1998.7654320987654, "learning_rate": 5.567221746051181e-07, "loss": 1.6865, "step": 32380 }, { "epoch": 2000.0, "learning_rate": 5.564466756458919e-07, "loss": 1.6806, "step": 32400 }, { "epoch": 2001.2345679012346, "learning_rate": 5.561711766866658e-07, "loss": 1.664, "step": 32420 }, { "epoch": 2002.469135802469, "learning_rate": 5.558956777274397e-07, "loss": 1.583, "step": 32440 }, { "epoch": 2003.7037037037037, "learning_rate": 5.556201787682135e-07, "loss": 1.7005, "step": 32460 }, { "epoch": 2004.9382716049383, "learning_rate": 5.553446798089874e-07, "loss": 1.6248, "step": 32480 }, { "epoch": 2006.1728395061727, "learning_rate": 5.550691808497612e-07, "loss": 1.6806, "step": 32500 }, { "epoch": 2007.4074074074074, "learning_rate": 5.547936818905351e-07, "loss": 1.6522, "step": 32520 }, { "epoch": 2008.641975308642, "learning_rate": 5.54518182931309e-07, "loss": 1.6834, "step": 32540 }, { "epoch": 2009.8765432098764, "learning_rate": 5.542426839720828e-07, "loss": 1.6222, "step": 32560 }, { "epoch": 2011.111111111111, "learning_rate": 5.539671850128567e-07, "loss": 1.6515, "step": 32580 }, { "epoch": 2012.3456790123457, "learning_rate": 5.536916860536305e-07, "loss": 1.6526, "step": 32600 }, { "epoch": 2013.5802469135801, "learning_rate": 5.534161870944044e-07, "loss": 1.6346, "step": 32620 }, { "epoch": 2014.8148148148148, "learning_rate": 5.531406881351782e-07, "loss": 1.6423, "step": 32640 }, { "epoch": 2016.0493827160494, "learning_rate": 5.528651891759519e-07, "loss": 1.6667, "step": 32660 }, { "epoch": 2017.283950617284, "learning_rate": 5.525896902167258e-07, "loss": 1.6526, "step": 32680 }, { "epoch": 2018.5185185185185, "learning_rate": 5.523141912574996e-07, "loss": 1.6486, "step": 32700 }, { "epoch": 2019.7530864197531, "learning_rate": 5.520386922982735e-07, "loss": 1.6536, "step": 32720 }, { "epoch": 2020.9876543209878, "learning_rate": 5.517631933390473e-07, "loss": 1.6297, "step": 32740 }, { "epoch": 2022.2222222222222, "learning_rate": 5.514876943798212e-07, "loss": 1.648, "step": 32760 }, { "epoch": 2023.4567901234568, "learning_rate": 5.51212195420595e-07, "loss": 1.632, "step": 32780 }, { "epoch": 2024.6913580246915, "learning_rate": 5.509366964613689e-07, "loss": 1.6787, "step": 32800 }, { "epoch": 2025.9259259259259, "learning_rate": 5.506611975021428e-07, "loss": 1.6219, "step": 32820 }, { "epoch": 2027.1604938271605, "learning_rate": 5.503856985429166e-07, "loss": 1.6477, "step": 32840 }, { "epoch": 2028.3950617283951, "learning_rate": 5.501101995836905e-07, "loss": 1.6809, "step": 32860 }, { "epoch": 2029.6296296296296, "learning_rate": 5.498347006244643e-07, "loss": 1.6266, "step": 32880 }, { "epoch": 2030.8641975308642, "learning_rate": 5.495592016652381e-07, "loss": 1.6525, "step": 32900 }, { "epoch": 2032.0987654320988, "learning_rate": 5.49283702706012e-07, "loss": 1.6831, "step": 32920 }, { "epoch": 2033.3333333333333, "learning_rate": 5.490082037467858e-07, "loss": 1.6116, "step": 32940 }, { "epoch": 2034.567901234568, "learning_rate": 5.487327047875596e-07, "loss": 1.6331, "step": 32960 }, { "epoch": 2035.8024691358025, "learning_rate": 5.484572058283335e-07, "loss": 1.6585, "step": 32980 }, { "epoch": 2037.037037037037, "learning_rate": 5.481817068691074e-07, "loss": 1.6951, "step": 33000 }, { "epoch": 2038.2716049382716, "learning_rate": 5.479062079098811e-07, "loss": 1.7036, "step": 33020 }, { "epoch": 2039.5061728395062, "learning_rate": 5.47630708950655e-07, "loss": 1.6319, "step": 33040 }, { "epoch": 2040.7407407407406, "learning_rate": 5.473552099914289e-07, "loss": 1.6505, "step": 33060 }, { "epoch": 2041.9753086419753, "learning_rate": 5.470797110322027e-07, "loss": 1.6459, "step": 33080 }, { "epoch": 2043.20987654321, "learning_rate": 5.468042120729766e-07, "loss": 1.6577, "step": 33100 }, { "epoch": 2044.4444444444443, "learning_rate": 5.465287131137504e-07, "loss": 1.6493, "step": 33120 }, { "epoch": 2045.679012345679, "learning_rate": 5.462532141545243e-07, "loss": 1.6472, "step": 33140 }, { "epoch": 2046.9135802469136, "learning_rate": 5.459777151952982e-07, "loss": 1.6356, "step": 33160 }, { "epoch": 2048.1481481481483, "learning_rate": 5.45702216236072e-07, "loss": 1.647, "step": 33180 }, { "epoch": 2049.382716049383, "learning_rate": 5.454267172768459e-07, "loss": 1.6356, "step": 33200 }, { "epoch": 2050.617283950617, "learning_rate": 5.451512183176196e-07, "loss": 1.5994, "step": 33220 }, { "epoch": 2051.8518518518517, "learning_rate": 5.448757193583936e-07, "loss": 1.6527, "step": 33240 }, { "epoch": 2053.0864197530864, "learning_rate": 5.446002203991674e-07, "loss": 1.6733, "step": 33260 }, { "epoch": 2054.320987654321, "learning_rate": 5.443247214399413e-07, "loss": 1.679, "step": 33280 }, { "epoch": 2055.5555555555557, "learning_rate": 5.44049222480715e-07, "loss": 1.6314, "step": 33300 }, { "epoch": 2056.7901234567903, "learning_rate": 5.437737235214889e-07, "loss": 1.6589, "step": 33320 }, { "epoch": 2058.0246913580245, "learning_rate": 5.434982245622627e-07, "loss": 1.6226, "step": 33340 }, { "epoch": 2059.259259259259, "learning_rate": 5.432227256030365e-07, "loss": 1.6508, "step": 33360 }, { "epoch": 2060.4938271604938, "learning_rate": 5.429472266438104e-07, "loss": 1.6574, "step": 33380 }, { "epoch": 2061.7283950617284, "learning_rate": 5.426717276845842e-07, "loss": 1.6206, "step": 33400 }, { "epoch": 2062.962962962963, "learning_rate": 5.423962287253581e-07, "loss": 1.6472, "step": 33420 }, { "epoch": 2064.1975308641977, "learning_rate": 5.42120729766132e-07, "loss": 1.675, "step": 33440 }, { "epoch": 2065.432098765432, "learning_rate": 5.418452308069058e-07, "loss": 1.6319, "step": 33460 }, { "epoch": 2066.6666666666665, "learning_rate": 5.415697318476797e-07, "loss": 1.6193, "step": 33480 }, { "epoch": 2067.901234567901, "learning_rate": 5.412942328884535e-07, "loss": 1.6383, "step": 33500 }, { "epoch": 2069.135802469136, "learning_rate": 5.410187339292274e-07, "loss": 1.6552, "step": 33520 }, { "epoch": 2070.3703703703704, "learning_rate": 5.407432349700013e-07, "loss": 1.6484, "step": 33540 }, { "epoch": 2071.604938271605, "learning_rate": 5.40467736010775e-07, "loss": 1.6406, "step": 33560 }, { "epoch": 2072.8395061728397, "learning_rate": 5.40192237051549e-07, "loss": 1.6387, "step": 33580 }, { "epoch": 2074.074074074074, "learning_rate": 5.399167380923227e-07, "loss": 1.6443, "step": 33600 }, { "epoch": 2075.3086419753085, "learning_rate": 5.396412391330966e-07, "loss": 1.5999, "step": 33620 }, { "epoch": 2076.543209876543, "learning_rate": 5.393657401738705e-07, "loss": 1.6797, "step": 33640 }, { "epoch": 2077.777777777778, "learning_rate": 5.390902412146443e-07, "loss": 1.6671, "step": 33660 }, { "epoch": 2079.0123456790125, "learning_rate": 5.388147422554182e-07, "loss": 1.6938, "step": 33680 }, { "epoch": 2080.246913580247, "learning_rate": 5.385392432961919e-07, "loss": 1.6288, "step": 33700 }, { "epoch": 2081.4814814814813, "learning_rate": 5.382637443369658e-07, "loss": 1.6256, "step": 33720 }, { "epoch": 2082.716049382716, "learning_rate": 5.379882453777396e-07, "loss": 1.6537, "step": 33740 }, { "epoch": 2083.9506172839506, "learning_rate": 5.377127464185135e-07, "loss": 1.618, "step": 33760 }, { "epoch": 2085.185185185185, "learning_rate": 5.374372474592874e-07, "loss": 1.6029, "step": 33780 }, { "epoch": 2086.41975308642, "learning_rate": 5.371617485000612e-07, "loss": 1.6189, "step": 33800 }, { "epoch": 2087.6543209876545, "learning_rate": 5.368862495408351e-07, "loss": 1.6409, "step": 33820 }, { "epoch": 2088.8888888888887, "learning_rate": 5.366107505816088e-07, "loss": 1.6319, "step": 33840 }, { "epoch": 2090.1234567901233, "learning_rate": 5.363352516223827e-07, "loss": 1.6839, "step": 33860 }, { "epoch": 2091.358024691358, "learning_rate": 5.360597526631567e-07, "loss": 1.6088, "step": 33880 }, { "epoch": 2092.5925925925926, "learning_rate": 5.357842537039305e-07, "loss": 1.6999, "step": 33900 }, { "epoch": 2093.8271604938273, "learning_rate": 5.355087547447044e-07, "loss": 1.6296, "step": 33920 }, { "epoch": 2095.061728395062, "learning_rate": 5.352332557854782e-07, "loss": 1.6524, "step": 33940 }, { "epoch": 2096.296296296296, "learning_rate": 5.349577568262519e-07, "loss": 1.6768, "step": 33960 }, { "epoch": 2097.5308641975307, "learning_rate": 5.346822578670257e-07, "loss": 1.6304, "step": 33980 }, { "epoch": 2098.7654320987654, "learning_rate": 5.344067589077996e-07, "loss": 1.6687, "step": 34000 }, { "epoch": 2100.0, "learning_rate": 5.341312599485735e-07, "loss": 1.639, "step": 34020 }, { "epoch": 2101.2345679012346, "learning_rate": 5.338557609893473e-07, "loss": 1.6512, "step": 34040 }, { "epoch": 2102.4691358024693, "learning_rate": 5.335802620301212e-07, "loss": 1.6303, "step": 34060 }, { "epoch": 2103.703703703704, "learning_rate": 5.33304763070895e-07, "loss": 1.6472, "step": 34080 }, { "epoch": 2104.938271604938, "learning_rate": 5.330292641116689e-07, "loss": 1.6132, "step": 34100 }, { "epoch": 2106.1728395061727, "learning_rate": 5.327537651524427e-07, "loss": 1.6328, "step": 34120 }, { "epoch": 2107.4074074074074, "learning_rate": 5.324782661932166e-07, "loss": 1.6175, "step": 34140 }, { "epoch": 2108.641975308642, "learning_rate": 5.322027672339905e-07, "loss": 1.6078, "step": 34160 }, { "epoch": 2109.8765432098767, "learning_rate": 5.319272682747643e-07, "loss": 1.6795, "step": 34180 }, { "epoch": 2111.1111111111113, "learning_rate": 5.316517693155383e-07, "loss": 1.6347, "step": 34200 }, { "epoch": 2112.3456790123455, "learning_rate": 5.31376270356312e-07, "loss": 1.6583, "step": 34220 }, { "epoch": 2113.58024691358, "learning_rate": 5.311007713970858e-07, "loss": 1.649, "step": 34240 }, { "epoch": 2114.814814814815, "learning_rate": 5.308252724378597e-07, "loss": 1.6458, "step": 34260 }, { "epoch": 2116.0493827160494, "learning_rate": 5.305497734786335e-07, "loss": 1.6741, "step": 34280 }, { "epoch": 2117.283950617284, "learning_rate": 5.302742745194074e-07, "loss": 1.6458, "step": 34300 }, { "epoch": 2118.5185185185187, "learning_rate": 5.299987755601812e-07, "loss": 1.6725, "step": 34320 }, { "epoch": 2119.753086419753, "learning_rate": 5.297232766009551e-07, "loss": 1.6372, "step": 34340 }, { "epoch": 2120.9876543209875, "learning_rate": 5.294477776417289e-07, "loss": 1.5928, "step": 34360 }, { "epoch": 2122.222222222222, "learning_rate": 5.291722786825027e-07, "loss": 1.6509, "step": 34380 }, { "epoch": 2123.456790123457, "learning_rate": 5.288967797232766e-07, "loss": 1.688, "step": 34400 }, { "epoch": 2124.6913580246915, "learning_rate": 5.286212807640504e-07, "loss": 1.6257, "step": 34420 }, { "epoch": 2125.925925925926, "learning_rate": 5.283457818048243e-07, "loss": 1.6655, "step": 34440 }, { "epoch": 2127.1604938271603, "learning_rate": 5.280702828455981e-07, "loss": 1.6803, "step": 34460 }, { "epoch": 2128.395061728395, "learning_rate": 5.27794783886372e-07, "loss": 1.5928, "step": 34480 }, { "epoch": 2129.6296296296296, "learning_rate": 5.275192849271457e-07, "loss": 1.6843, "step": 34500 }, { "epoch": 2130.864197530864, "learning_rate": 5.272437859679196e-07, "loss": 1.6173, "step": 34520 }, { "epoch": 2132.098765432099, "learning_rate": 5.269682870086935e-07, "loss": 1.6635, "step": 34540 }, { "epoch": 2133.3333333333335, "learning_rate": 5.266927880494673e-07, "loss": 1.651, "step": 34560 }, { "epoch": 2134.567901234568, "learning_rate": 5.264172890902412e-07, "loss": 1.5994, "step": 34580 }, { "epoch": 2135.8024691358023, "learning_rate": 5.26141790131015e-07, "loss": 1.6398, "step": 34600 }, { "epoch": 2137.037037037037, "learning_rate": 5.258662911717889e-07, "loss": 1.638, "step": 34620 }, { "epoch": 2138.2716049382716, "learning_rate": 5.255907922125627e-07, "loss": 1.6332, "step": 34640 }, { "epoch": 2139.5061728395062, "learning_rate": 5.253152932533364e-07, "loss": 1.6314, "step": 34660 }, { "epoch": 2140.740740740741, "learning_rate": 5.250397942941104e-07, "loss": 1.6453, "step": 34680 }, { "epoch": 2141.9753086419755, "learning_rate": 5.247642953348842e-07, "loss": 1.6315, "step": 34700 }, { "epoch": 2143.2098765432097, "learning_rate": 5.244887963756581e-07, "loss": 1.6325, "step": 34720 }, { "epoch": 2144.4444444444443, "learning_rate": 5.242132974164319e-07, "loss": 1.6387, "step": 34740 }, { "epoch": 2145.679012345679, "learning_rate": 5.239377984572058e-07, "loss": 1.6771, "step": 34760 }, { "epoch": 2146.9135802469136, "learning_rate": 5.236622994979797e-07, "loss": 1.6287, "step": 34780 }, { "epoch": 2148.1481481481483, "learning_rate": 5.233868005387535e-07, "loss": 1.6298, "step": 34800 }, { "epoch": 2149.382716049383, "learning_rate": 5.231113015795274e-07, "loss": 1.6201, "step": 34820 }, { "epoch": 2150.617283950617, "learning_rate": 5.228358026203012e-07, "loss": 1.64, "step": 34840 }, { "epoch": 2151.8518518518517, "learning_rate": 5.225603036610751e-07, "loss": 1.6608, "step": 34860 }, { "epoch": 2153.0864197530864, "learning_rate": 5.22284804701849e-07, "loss": 1.6253, "step": 34880 }, { "epoch": 2154.320987654321, "learning_rate": 5.220093057426228e-07, "loss": 1.6644, "step": 34900 }, { "epoch": 2155.5555555555557, "learning_rate": 5.217338067833965e-07, "loss": 1.6186, "step": 34920 }, { "epoch": 2156.7901234567903, "learning_rate": 5.214583078241703e-07, "loss": 1.7043, "step": 34940 }, { "epoch": 2158.0246913580245, "learning_rate": 5.211828088649442e-07, "loss": 1.6333, "step": 34960 }, { "epoch": 2159.259259259259, "learning_rate": 5.20907309905718e-07, "loss": 1.6256, "step": 34980 }, { "epoch": 2160.4938271604938, "learning_rate": 5.20631810946492e-07, "loss": 1.5845, "step": 35000 }, { "epoch": 2161.7283950617284, "learning_rate": 5.203563119872659e-07, "loss": 1.681, "step": 35020 }, { "epoch": 2162.962962962963, "learning_rate": 5.200808130280396e-07, "loss": 1.6559, "step": 35040 }, { "epoch": 2164.1975308641977, "learning_rate": 5.198053140688135e-07, "loss": 1.6413, "step": 35060 }, { "epoch": 2165.432098765432, "learning_rate": 5.195298151095873e-07, "loss": 1.6549, "step": 35080 }, { "epoch": 2166.6666666666665, "learning_rate": 5.192543161503612e-07, "loss": 1.6315, "step": 35100 }, { "epoch": 2167.901234567901, "learning_rate": 5.18978817191135e-07, "loss": 1.5978, "step": 35120 }, { "epoch": 2169.135802469136, "learning_rate": 5.187033182319089e-07, "loss": 1.6748, "step": 35140 }, { "epoch": 2170.3703703703704, "learning_rate": 5.184278192726828e-07, "loss": 1.6301, "step": 35160 }, { "epoch": 2171.604938271605, "learning_rate": 5.181523203134565e-07, "loss": 1.6186, "step": 35180 }, { "epoch": 2172.8395061728397, "learning_rate": 5.178768213542304e-07, "loss": 1.6626, "step": 35200 }, { "epoch": 2174.074074074074, "learning_rate": 5.176013223950042e-07, "loss": 1.6176, "step": 35220 }, { "epoch": 2175.3086419753085, "learning_rate": 5.173258234357781e-07, "loss": 1.6184, "step": 35240 }, { "epoch": 2176.543209876543, "learning_rate": 5.17050324476552e-07, "loss": 1.669, "step": 35260 }, { "epoch": 2177.777777777778, "learning_rate": 5.167748255173257e-07, "loss": 1.6438, "step": 35280 }, { "epoch": 2179.0123456790125, "learning_rate": 5.164993265580997e-07, "loss": 1.6993, "step": 35300 }, { "epoch": 2180.246913580247, "learning_rate": 5.162238275988734e-07, "loss": 1.6705, "step": 35320 }, { "epoch": 2181.4814814814813, "learning_rate": 5.159483286396473e-07, "loss": 1.6452, "step": 35340 }, { "epoch": 2182.716049382716, "learning_rate": 5.156728296804211e-07, "loss": 1.6083, "step": 35360 }, { "epoch": 2183.9506172839506, "learning_rate": 5.15397330721195e-07, "loss": 1.6284, "step": 35380 }, { "epoch": 2185.185185185185, "learning_rate": 5.151218317619689e-07, "loss": 1.6395, "step": 35400 }, { "epoch": 2186.41975308642, "learning_rate": 5.148463328027427e-07, "loss": 1.6433, "step": 35420 }, { "epoch": 2187.6543209876545, "learning_rate": 5.145708338435166e-07, "loss": 1.6108, "step": 35440 }, { "epoch": 2188.8888888888887, "learning_rate": 5.142953348842904e-07, "loss": 1.6232, "step": 35460 }, { "epoch": 2190.1234567901233, "learning_rate": 5.140198359250643e-07, "loss": 1.6295, "step": 35480 }, { "epoch": 2191.358024691358, "learning_rate": 5.137443369658382e-07, "loss": 1.6395, "step": 35500 }, { "epoch": 2192.5925925925926, "learning_rate": 5.13468838006612e-07, "loss": 1.637, "step": 35520 }, { "epoch": 2193.8271604938273, "learning_rate": 5.131933390473858e-07, "loss": 1.6075, "step": 35540 }, { "epoch": 2195.061728395062, "learning_rate": 5.129178400881597e-07, "loss": 1.6256, "step": 35560 }, { "epoch": 2196.296296296296, "learning_rate": 5.126423411289334e-07, "loss": 1.6463, "step": 35580 }, { "epoch": 2197.5308641975307, "learning_rate": 5.123668421697072e-07, "loss": 1.6383, "step": 35600 }, { "epoch": 2198.7654320987654, "learning_rate": 5.120913432104811e-07, "loss": 1.6451, "step": 35620 }, { "epoch": 2200.0, "learning_rate": 5.118158442512551e-07, "loss": 1.6669, "step": 35640 }, { "epoch": 2201.2345679012346, "learning_rate": 5.115403452920288e-07, "loss": 1.6535, "step": 35660 }, { "epoch": 2202.4691358024693, "learning_rate": 5.112648463328027e-07, "loss": 1.623, "step": 35680 }, { "epoch": 2203.703703703704, "learning_rate": 5.109893473735765e-07, "loss": 1.6265, "step": 35700 }, { "epoch": 2204.938271604938, "learning_rate": 5.107138484143504e-07, "loss": 1.6418, "step": 35720 }, { "epoch": 2206.1728395061727, "learning_rate": 5.104383494551243e-07, "loss": 1.6721, "step": 35740 }, { "epoch": 2207.4074074074074, "learning_rate": 5.101628504958981e-07, "loss": 1.6456, "step": 35760 }, { "epoch": 2208.641975308642, "learning_rate": 5.09887351536672e-07, "loss": 1.6195, "step": 35780 }, { "epoch": 2209.8765432098767, "learning_rate": 5.096118525774458e-07, "loss": 1.6122, "step": 35800 }, { "epoch": 2211.1111111111113, "learning_rate": 5.093363536182197e-07, "loss": 1.6416, "step": 35820 }, { "epoch": 2212.3456790123455, "learning_rate": 5.090608546589935e-07, "loss": 1.6261, "step": 35840 }, { "epoch": 2213.58024691358, "learning_rate": 5.087853556997673e-07, "loss": 1.6247, "step": 35860 }, { "epoch": 2214.814814814815, "learning_rate": 5.085098567405412e-07, "loss": 1.6314, "step": 35880 }, { "epoch": 2216.0493827160494, "learning_rate": 5.082343577813151e-07, "loss": 1.6695, "step": 35900 }, { "epoch": 2217.283950617284, "learning_rate": 5.079588588220889e-07, "loss": 1.7002, "step": 35920 }, { "epoch": 2218.5185185185187, "learning_rate": 5.076833598628627e-07, "loss": 1.6342, "step": 35940 }, { "epoch": 2219.753086419753, "learning_rate": 5.074078609036366e-07, "loss": 1.6208, "step": 35960 }, { "epoch": 2220.9876543209875, "learning_rate": 5.071323619444105e-07, "loss": 1.648, "step": 35980 }, { "epoch": 2222.222222222222, "learning_rate": 5.068568629851841e-07, "loss": 1.6647, "step": 36000 }, { "epoch": 2223.456790123457, "learning_rate": 5.065813640259581e-07, "loss": 1.6099, "step": 36020 }, { "epoch": 2224.6913580246915, "learning_rate": 5.063058650667319e-07, "loss": 1.6258, "step": 36040 }, { "epoch": 2225.925925925926, "learning_rate": 5.060303661075058e-07, "loss": 1.6306, "step": 36060 }, { "epoch": 2227.1604938271603, "learning_rate": 5.057548671482796e-07, "loss": 1.6182, "step": 36080 }, { "epoch": 2228.395061728395, "learning_rate": 5.054793681890535e-07, "loss": 1.5924, "step": 36100 }, { "epoch": 2229.6296296296296, "learning_rate": 5.052038692298274e-07, "loss": 1.6585, "step": 36120 }, { "epoch": 2230.864197530864, "learning_rate": 5.049283702706012e-07, "loss": 1.6297, "step": 36140 }, { "epoch": 2232.098765432099, "learning_rate": 5.046528713113751e-07, "loss": 1.6579, "step": 36160 }, { "epoch": 2233.3333333333335, "learning_rate": 5.043773723521489e-07, "loss": 1.6233, "step": 36180 }, { "epoch": 2234.567901234568, "learning_rate": 5.041018733929228e-07, "loss": 1.6651, "step": 36200 }, { "epoch": 2235.8024691358023, "learning_rate": 5.038263744336967e-07, "loss": 1.6159, "step": 36220 }, { "epoch": 2237.037037037037, "learning_rate": 5.035508754744705e-07, "loss": 1.6339, "step": 36240 }, { "epoch": 2238.2716049382716, "learning_rate": 5.032753765152442e-07, "loss": 1.6332, "step": 36260 }, { "epoch": 2239.5061728395062, "learning_rate": 5.02999877556018e-07, "loss": 1.6244, "step": 36280 }, { "epoch": 2240.740740740741, "learning_rate": 5.027243785967919e-07, "loss": 1.6199, "step": 36300 }, { "epoch": 2241.9753086419755, "learning_rate": 5.024488796375657e-07, "loss": 1.5973, "step": 36320 }, { "epoch": 2243.2098765432097, "learning_rate": 5.021733806783396e-07, "loss": 1.641, "step": 36340 }, { "epoch": 2244.4444444444443, "learning_rate": 5.018978817191135e-07, "loss": 1.6018, "step": 36360 }, { "epoch": 2245.679012345679, "learning_rate": 5.016223827598873e-07, "loss": 1.6101, "step": 36380 }, { "epoch": 2246.9135802469136, "learning_rate": 5.013468838006612e-07, "loss": 1.6013, "step": 36400 }, { "epoch": 2248.1481481481483, "learning_rate": 5.01071384841435e-07, "loss": 1.6538, "step": 36420 }, { "epoch": 2249.382716049383, "learning_rate": 5.007958858822089e-07, "loss": 1.6441, "step": 36440 }, { "epoch": 2250.617283950617, "learning_rate": 5.005203869229827e-07, "loss": 1.5998, "step": 36460 }, { "epoch": 2251.8518518518517, "learning_rate": 5.002448879637566e-07, "loss": 1.6576, "step": 36480 }, { "epoch": 2253.0864197530864, "learning_rate": 4.999693890045305e-07, "loss": 1.6271, "step": 36500 }, { "epoch": 2254.320987654321, "learning_rate": 4.996938900453043e-07, "loss": 1.6348, "step": 36520 }, { "epoch": 2255.5555555555557, "learning_rate": 4.994183910860781e-07, "loss": 1.6658, "step": 36540 }, { "epoch": 2256.7901234567903, "learning_rate": 4.991428921268519e-07, "loss": 1.5881, "step": 36560 }, { "epoch": 2258.0246913580245, "learning_rate": 4.988673931676258e-07, "loss": 1.6299, "step": 36580 }, { "epoch": 2259.259259259259, "learning_rate": 4.985918942083997e-07, "loss": 1.6025, "step": 36600 }, { "epoch": 2260.4938271604938, "learning_rate": 4.983163952491735e-07, "loss": 1.6725, "step": 36620 }, { "epoch": 2261.7283950617284, "learning_rate": 4.980408962899474e-07, "loss": 1.6149, "step": 36640 }, { "epoch": 2262.962962962963, "learning_rate": 4.977653973307211e-07, "loss": 1.6358, "step": 36660 }, { "epoch": 2264.1975308641977, "learning_rate": 4.97489898371495e-07, "loss": 1.6036, "step": 36680 }, { "epoch": 2265.432098765432, "learning_rate": 4.972143994122688e-07, "loss": 1.6184, "step": 36700 }, { "epoch": 2266.6666666666665, "learning_rate": 4.969389004530427e-07, "loss": 1.6165, "step": 36720 }, { "epoch": 2267.901234567901, "learning_rate": 4.966634014938166e-07, "loss": 1.6563, "step": 36740 }, { "epoch": 2269.135802469136, "learning_rate": 4.963879025345904e-07, "loss": 1.6184, "step": 36760 }, { "epoch": 2270.3703703703704, "learning_rate": 4.961124035753643e-07, "loss": 1.6589, "step": 36780 }, { "epoch": 2271.604938271605, "learning_rate": 4.95836904616138e-07, "loss": 1.6132, "step": 36800 }, { "epoch": 2272.8395061728397, "learning_rate": 4.955614056569119e-07, "loss": 1.6007, "step": 36820 }, { "epoch": 2274.074074074074, "learning_rate": 4.952859066976857e-07, "loss": 1.6238, "step": 36840 }, { "epoch": 2275.3086419753085, "learning_rate": 4.950104077384597e-07, "loss": 1.6, "step": 36860 }, { "epoch": 2276.543209876543, "learning_rate": 4.947349087792336e-07, "loss": 1.6028, "step": 36880 }, { "epoch": 2277.777777777778, "learning_rate": 4.944594098200074e-07, "loss": 1.6577, "step": 36900 }, { "epoch": 2279.0123456790125, "learning_rate": 4.941839108607813e-07, "loss": 1.6336, "step": 36920 }, { "epoch": 2280.246913580247, "learning_rate": 4.939084119015549e-07, "loss": 1.6477, "step": 36940 }, { "epoch": 2281.4814814814813, "learning_rate": 4.936329129423288e-07, "loss": 1.6654, "step": 36960 }, { "epoch": 2282.716049382716, "learning_rate": 4.933574139831027e-07, "loss": 1.5985, "step": 36980 }, { "epoch": 2283.9506172839506, "learning_rate": 4.930819150238765e-07, "loss": 1.6147, "step": 37000 }, { "epoch": 2285.185185185185, "learning_rate": 4.928064160646504e-07, "loss": 1.6644, "step": 37020 }, { "epoch": 2286.41975308642, "learning_rate": 4.925309171054242e-07, "loss": 1.6265, "step": 37040 }, { "epoch": 2287.6543209876545, "learning_rate": 4.922554181461981e-07, "loss": 1.6096, "step": 37060 }, { "epoch": 2288.8888888888887, "learning_rate": 4.91979919186972e-07, "loss": 1.6549, "step": 37080 }, { "epoch": 2290.1234567901233, "learning_rate": 4.917044202277458e-07, "loss": 1.6385, "step": 37100 }, { "epoch": 2291.358024691358, "learning_rate": 4.914289212685197e-07, "loss": 1.6274, "step": 37120 }, { "epoch": 2292.5925925925926, "learning_rate": 4.911534223092935e-07, "loss": 1.6508, "step": 37140 }, { "epoch": 2293.8271604938273, "learning_rate": 4.908779233500674e-07, "loss": 1.6107, "step": 37160 }, { "epoch": 2295.061728395062, "learning_rate": 4.906024243908412e-07, "loss": 1.6367, "step": 37180 }, { "epoch": 2296.296296296296, "learning_rate": 4.903269254316149e-07, "loss": 1.6522, "step": 37200 }, { "epoch": 2297.5308641975307, "learning_rate": 4.900514264723889e-07, "loss": 1.5979, "step": 37220 }, { "epoch": 2298.7654320987654, "learning_rate": 4.897759275131627e-07, "loss": 1.684, "step": 37240 }, { "epoch": 2300.0, "learning_rate": 4.895004285539366e-07, "loss": 1.615, "step": 37260 }, { "epoch": 2301.2345679012346, "learning_rate": 4.892249295947104e-07, "loss": 1.6249, "step": 37280 }, { "epoch": 2302.4691358024693, "learning_rate": 4.889494306354843e-07, "loss": 1.6209, "step": 37300 }, { "epoch": 2303.703703703704, "learning_rate": 4.886739316762581e-07, "loss": 1.6521, "step": 37320 }, { "epoch": 2304.938271604938, "learning_rate": 4.883984327170319e-07, "loss": 1.6456, "step": 37340 }, { "epoch": 2306.1728395061727, "learning_rate": 4.881229337578058e-07, "loss": 1.64, "step": 37360 }, { "epoch": 2307.4074074074074, "learning_rate": 4.878474347985796e-07, "loss": 1.6339, "step": 37380 }, { "epoch": 2308.641975308642, "learning_rate": 4.875719358393535e-07, "loss": 1.6251, "step": 37400 }, { "epoch": 2309.8765432098767, "learning_rate": 4.872964368801273e-07, "loss": 1.6307, "step": 37420 }, { "epoch": 2311.1111111111113, "learning_rate": 4.870209379209012e-07, "loss": 1.6564, "step": 37440 }, { "epoch": 2312.3456790123455, "learning_rate": 4.867454389616751e-07, "loss": 1.6458, "step": 37460 }, { "epoch": 2313.58024691358, "learning_rate": 4.864699400024488e-07, "loss": 1.5916, "step": 37480 }, { "epoch": 2314.814814814815, "learning_rate": 4.861944410432227e-07, "loss": 1.5901, "step": 37500 }, { "epoch": 2316.0493827160494, "learning_rate": 4.859189420839965e-07, "loss": 1.6709, "step": 37520 }, { "epoch": 2317.283950617284, "learning_rate": 4.856434431247704e-07, "loss": 1.6112, "step": 37540 }, { "epoch": 2318.5185185185187, "learning_rate": 4.853679441655442e-07, "loss": 1.6341, "step": 37560 }, { "epoch": 2319.753086419753, "learning_rate": 4.850924452063181e-07, "loss": 1.6095, "step": 37580 }, { "epoch": 2320.9876543209875, "learning_rate": 4.848169462470919e-07, "loss": 1.6618, "step": 37600 }, { "epoch": 2322.222222222222, "learning_rate": 4.845414472878657e-07, "loss": 1.6223, "step": 37620 }, { "epoch": 2323.456790123457, "learning_rate": 4.842659483286396e-07, "loss": 1.6658, "step": 37640 }, { "epoch": 2324.6913580246915, "learning_rate": 4.839904493694134e-07, "loss": 1.5882, "step": 37660 }, { "epoch": 2325.925925925926, "learning_rate": 4.837149504101873e-07, "loss": 1.586, "step": 37680 }, { "epoch": 2327.1604938271603, "learning_rate": 4.834394514509611e-07, "loss": 1.6513, "step": 37700 }, { "epoch": 2328.395061728395, "learning_rate": 4.83163952491735e-07, "loss": 1.602, "step": 37720 }, { "epoch": 2329.6296296296296, "learning_rate": 4.828884535325089e-07, "loss": 1.6642, "step": 37740 }, { "epoch": 2330.864197530864, "learning_rate": 4.826129545732827e-07, "loss": 1.6289, "step": 37760 }, { "epoch": 2332.098765432099, "learning_rate": 4.823374556140566e-07, "loss": 1.6072, "step": 37780 }, { "epoch": 2333.3333333333335, "learning_rate": 4.820619566548304e-07, "loss": 1.6095, "step": 37800 }, { "epoch": 2334.567901234568, "learning_rate": 4.817864576956043e-07, "loss": 1.643, "step": 37820 }, { "epoch": 2335.8024691358023, "learning_rate": 4.815109587363782e-07, "loss": 1.617, "step": 37840 }, { "epoch": 2337.037037037037, "learning_rate": 4.81235459777152e-07, "loss": 1.6112, "step": 37860 }, { "epoch": 2338.2716049382716, "learning_rate": 4.809599608179257e-07, "loss": 1.6165, "step": 37880 }, { "epoch": 2339.5061728395062, "learning_rate": 4.806844618586995e-07, "loss": 1.633, "step": 37900 }, { "epoch": 2340.740740740741, "learning_rate": 4.804089628994734e-07, "loss": 1.6311, "step": 37920 }, { "epoch": 2341.9753086419755, "learning_rate": 4.801334639402472e-07, "loss": 1.5876, "step": 37940 }, { "epoch": 2343.2098765432097, "learning_rate": 4.798579649810211e-07, "loss": 1.6237, "step": 37960 }, { "epoch": 2344.4444444444443, "learning_rate": 4.795824660217951e-07, "loss": 1.6244, "step": 37980 }, { "epoch": 2345.679012345679, "learning_rate": 4.793069670625689e-07, "loss": 1.586, "step": 38000 }, { "epoch": 2346.9135802469136, "learning_rate": 4.790314681033427e-07, "loss": 1.6345, "step": 38020 }, { "epoch": 2348.1481481481483, "learning_rate": 4.787559691441165e-07, "loss": 1.6393, "step": 38040 }, { "epoch": 2349.382716049383, "learning_rate": 4.784804701848904e-07, "loss": 1.6173, "step": 38060 }, { "epoch": 2350.617283950617, "learning_rate": 4.782049712256643e-07, "loss": 1.6243, "step": 38080 }, { "epoch": 2351.8518518518517, "learning_rate": 4.779294722664381e-07, "loss": 1.6464, "step": 38100 }, { "epoch": 2353.0864197530864, "learning_rate": 4.77653973307212e-07, "loss": 1.6287, "step": 38120 }, { "epoch": 2354.320987654321, "learning_rate": 4.773784743479857e-07, "loss": 1.6247, "step": 38140 }, { "epoch": 2355.5555555555557, "learning_rate": 4.771029753887596e-07, "loss": 1.6204, "step": 38160 }, { "epoch": 2356.7901234567903, "learning_rate": 4.768274764295335e-07, "loss": 1.6291, "step": 38180 }, { "epoch": 2358.0246913580245, "learning_rate": 4.765519774703073e-07, "loss": 1.6355, "step": 38200 }, { "epoch": 2359.259259259259, "learning_rate": 4.7627647851108116e-07, "loss": 1.6379, "step": 38220 }, { "epoch": 2360.4938271604938, "learning_rate": 4.76000979551855e-07, "loss": 1.6773, "step": 38240 }, { "epoch": 2361.7283950617284, "learning_rate": 4.757254805926289e-07, "loss": 1.5918, "step": 38260 }, { "epoch": 2362.962962962963, "learning_rate": 4.7544998163340263e-07, "loss": 1.6183, "step": 38280 }, { "epoch": 2364.1975308641977, "learning_rate": 4.751744826741765e-07, "loss": 1.5934, "step": 38300 }, { "epoch": 2365.432098765432, "learning_rate": 4.7489898371495035e-07, "loss": 1.5774, "step": 38320 }, { "epoch": 2366.6666666666665, "learning_rate": 4.7462348475572426e-07, "loss": 1.6208, "step": 38340 }, { "epoch": 2367.901234567901, "learning_rate": 4.743479857964981e-07, "loss": 1.6088, "step": 38360 }, { "epoch": 2369.135802469136, "learning_rate": 4.74072486837272e-07, "loss": 1.6248, "step": 38380 }, { "epoch": 2370.3703703703704, "learning_rate": 4.7379698787804584e-07, "loss": 1.5924, "step": 38400 }, { "epoch": 2371.604938271605, "learning_rate": 4.735214889188196e-07, "loss": 1.6087, "step": 38420 }, { "epoch": 2372.8395061728397, "learning_rate": 4.7324598995959345e-07, "loss": 1.5936, "step": 38440 }, { "epoch": 2374.074074074074, "learning_rate": 4.729704910003673e-07, "loss": 1.6532, "step": 38460 }, { "epoch": 2375.3086419753085, "learning_rate": 4.7269499204114117e-07, "loss": 1.6262, "step": 38480 }, { "epoch": 2376.543209876543, "learning_rate": 4.72419493081915e-07, "loss": 1.6101, "step": 38500 }, { "epoch": 2377.777777777778, "learning_rate": 4.721439941226889e-07, "loss": 1.634, "step": 38520 }, { "epoch": 2379.0123456790125, "learning_rate": 4.7186849516346274e-07, "loss": 1.6157, "step": 38540 }, { "epoch": 2380.246913580247, "learning_rate": 4.715929962042365e-07, "loss": 1.6171, "step": 38560 }, { "epoch": 2381.4814814814813, "learning_rate": 4.7131749724501036e-07, "loss": 1.6804, "step": 38580 }, { "epoch": 2382.716049382716, "learning_rate": 4.710419982857842e-07, "loss": 1.596, "step": 38600 }, { "epoch": 2383.9506172839506, "learning_rate": 4.7076649932655807e-07, "loss": 1.6372, "step": 38620 }, { "epoch": 2385.185185185185, "learning_rate": 4.7049100036733193e-07, "loss": 1.6268, "step": 38640 }, { "epoch": 2386.41975308642, "learning_rate": 4.702155014081058e-07, "loss": 1.6041, "step": 38660 }, { "epoch": 2387.6543209876545, "learning_rate": 4.6994000244887954e-07, "loss": 1.6394, "step": 38680 }, { "epoch": 2388.8888888888887, "learning_rate": 4.6966450348965346e-07, "loss": 1.6243, "step": 38700 }, { "epoch": 2390.1234567901233, "learning_rate": 4.693890045304273e-07, "loss": 1.6462, "step": 38720 }, { "epoch": 2391.358024691358, "learning_rate": 4.6911350557120117e-07, "loss": 1.608, "step": 38740 }, { "epoch": 2392.5925925925926, "learning_rate": 4.6883800661197503e-07, "loss": 1.6236, "step": 38760 }, { "epoch": 2393.8271604938273, "learning_rate": 4.685625076527489e-07, "loss": 1.639, "step": 38780 }, { "epoch": 2395.061728395062, "learning_rate": 4.682870086935227e-07, "loss": 1.6154, "step": 38800 }, { "epoch": 2396.296296296296, "learning_rate": 4.680115097342965e-07, "loss": 1.6044, "step": 38820 }, { "epoch": 2397.5308641975307, "learning_rate": 4.6773601077507036e-07, "loss": 1.6302, "step": 38840 }, { "epoch": 2398.7654320987654, "learning_rate": 4.674605118158442e-07, "loss": 1.6219, "step": 38860 }, { "epoch": 2400.0, "learning_rate": 4.671850128566181e-07, "loss": 1.6178, "step": 38880 }, { "epoch": 2401.2345679012346, "learning_rate": 4.6690951389739194e-07, "loss": 1.6197, "step": 38900 }, { "epoch": 2402.4691358024693, "learning_rate": 4.666340149381658e-07, "loss": 1.6042, "step": 38920 }, { "epoch": 2403.703703703704, "learning_rate": 4.6635851597893966e-07, "loss": 1.591, "step": 38940 }, { "epoch": 2404.938271604938, "learning_rate": 4.660830170197134e-07, "loss": 1.6311, "step": 38960 }, { "epoch": 2406.1728395061727, "learning_rate": 4.6580751806048727e-07, "loss": 1.644, "step": 38980 }, { "epoch": 2407.4074074074074, "learning_rate": 4.6553201910126113e-07, "loss": 1.6399, "step": 39000 }, { "epoch": 2408.641975308642, "learning_rate": 4.65256520142035e-07, "loss": 1.6386, "step": 39020 }, { "epoch": 2409.8765432098767, "learning_rate": 4.6498102118280884e-07, "loss": 1.6388, "step": 39040 }, { "epoch": 2411.1111111111113, "learning_rate": 4.647055222235827e-07, "loss": 1.6258, "step": 39060 }, { "epoch": 2412.3456790123455, "learning_rate": 4.6443002326435656e-07, "loss": 1.6054, "step": 39080 }, { "epoch": 2413.58024691358, "learning_rate": 4.6415452430513037e-07, "loss": 1.666, "step": 39100 }, { "epoch": 2414.814814814815, "learning_rate": 4.6387902534590423e-07, "loss": 1.6082, "step": 39120 }, { "epoch": 2416.0493827160494, "learning_rate": 4.636035263866781e-07, "loss": 1.6282, "step": 39140 }, { "epoch": 2417.283950617284, "learning_rate": 4.633280274274519e-07, "loss": 1.6102, "step": 39160 }, { "epoch": 2418.5185185185187, "learning_rate": 4.630525284682258e-07, "loss": 1.6249, "step": 39180 }, { "epoch": 2419.753086419753, "learning_rate": 4.6277702950899966e-07, "loss": 1.6388, "step": 39200 }, { "epoch": 2420.9876543209875, "learning_rate": 4.625015305497734e-07, "loss": 1.6135, "step": 39220 }, { "epoch": 2422.222222222222, "learning_rate": 4.622260315905473e-07, "loss": 1.6742, "step": 39240 }, { "epoch": 2423.456790123457, "learning_rate": 4.6195053263132113e-07, "loss": 1.6415, "step": 39260 }, { "epoch": 2424.6913580246915, "learning_rate": 4.61675033672095e-07, "loss": 1.6385, "step": 39280 }, { "epoch": 2425.925925925926, "learning_rate": 4.6139953471286885e-07, "loss": 1.6201, "step": 39300 }, { "epoch": 2427.1604938271603, "learning_rate": 4.611240357536427e-07, "loss": 1.6216, "step": 39320 }, { "epoch": 2428.395061728395, "learning_rate": 4.6084853679441657e-07, "loss": 1.5967, "step": 39340 }, { "epoch": 2429.6296296296296, "learning_rate": 4.605730378351903e-07, "loss": 1.6069, "step": 39360 }, { "epoch": 2430.864197530864, "learning_rate": 4.602975388759642e-07, "loss": 1.6185, "step": 39380 }, { "epoch": 2432.098765432099, "learning_rate": 4.6002203991673804e-07, "loss": 1.6183, "step": 39400 }, { "epoch": 2433.3333333333335, "learning_rate": 4.597465409575119e-07, "loss": 1.613, "step": 39420 }, { "epoch": 2434.567901234568, "learning_rate": 4.5947104199828576e-07, "loss": 1.6076, "step": 39440 }, { "epoch": 2435.8024691358023, "learning_rate": 4.591955430390596e-07, "loss": 1.6154, "step": 39460 }, { "epoch": 2437.037037037037, "learning_rate": 4.5892004407983353e-07, "loss": 1.6337, "step": 39480 }, { "epoch": 2438.2716049382716, "learning_rate": 4.586445451206073e-07, "loss": 1.6416, "step": 39500 }, { "epoch": 2439.5061728395062, "learning_rate": 4.583690461613811e-07, "loss": 1.615, "step": 39520 }, { "epoch": 2440.740740740741, "learning_rate": 4.58093547202155e-07, "loss": 1.6198, "step": 39540 }, { "epoch": 2441.9753086419755, "learning_rate": 4.5781804824292886e-07, "loss": 1.6119, "step": 39560 }, { "epoch": 2443.2098765432097, "learning_rate": 4.575425492837027e-07, "loss": 1.6009, "step": 39580 }, { "epoch": 2444.4444444444443, "learning_rate": 4.572670503244766e-07, "loss": 1.5824, "step": 39600 }, { "epoch": 2445.679012345679, "learning_rate": 4.5699155136525033e-07, "loss": 1.6549, "step": 39620 }, { "epoch": 2446.9135802469136, "learning_rate": 4.567160524060242e-07, "loss": 1.636, "step": 39640 }, { "epoch": 2448.1481481481483, "learning_rate": 4.5644055344679804e-07, "loss": 1.6012, "step": 39660 }, { "epoch": 2449.382716049383, "learning_rate": 4.561650544875719e-07, "loss": 1.6254, "step": 39680 }, { "epoch": 2450.617283950617, "learning_rate": 4.5588955552834576e-07, "loss": 1.5667, "step": 39700 }, { "epoch": 2451.8518518518517, "learning_rate": 4.556140565691196e-07, "loss": 1.6614, "step": 39720 }, { "epoch": 2453.0864197530864, "learning_rate": 4.553385576098935e-07, "loss": 1.6136, "step": 39740 }, { "epoch": 2454.320987654321, "learning_rate": 4.5506305865066723e-07, "loss": 1.6299, "step": 39760 }, { "epoch": 2455.5555555555557, "learning_rate": 4.547875596914411e-07, "loss": 1.5855, "step": 39780 }, { "epoch": 2456.7901234567903, "learning_rate": 4.5451206073221495e-07, "loss": 1.6241, "step": 39800 }, { "epoch": 2458.0246913580245, "learning_rate": 4.542365617729888e-07, "loss": 1.6204, "step": 39820 }, { "epoch": 2459.259259259259, "learning_rate": 4.539610628137627e-07, "loss": 1.6197, "step": 39840 }, { "epoch": 2460.4938271604938, "learning_rate": 4.536855638545366e-07, "loss": 1.5769, "step": 39860 }, { "epoch": 2461.7283950617284, "learning_rate": 4.5341006489531044e-07, "loss": 1.6498, "step": 39880 }, { "epoch": 2462.962962962963, "learning_rate": 4.531345659360842e-07, "loss": 1.59, "step": 39900 }, { "epoch": 2464.1975308641977, "learning_rate": 4.5285906697685805e-07, "loss": 1.6261, "step": 39920 }, { "epoch": 2465.432098765432, "learning_rate": 4.525835680176319e-07, "loss": 1.6421, "step": 39940 }, { "epoch": 2466.6666666666665, "learning_rate": 4.5230806905840577e-07, "loss": 1.5835, "step": 39960 }, { "epoch": 2467.901234567901, "learning_rate": 4.5203257009917963e-07, "loss": 1.5877, "step": 39980 }, { "epoch": 2469.135802469136, "learning_rate": 4.517570711399535e-07, "loss": 1.6351, "step": 40000 }, { "epoch": 2470.3703703703704, "learning_rate": 4.5148157218072735e-07, "loss": 1.5755, "step": 40020 }, { "epoch": 2471.604938271605, "learning_rate": 4.512060732215011e-07, "loss": 1.6251, "step": 40040 }, { "epoch": 2472.8395061728397, "learning_rate": 4.5093057426227496e-07, "loss": 1.6108, "step": 40060 }, { "epoch": 2474.074074074074, "learning_rate": 4.506550753030488e-07, "loss": 1.6189, "step": 40080 }, { "epoch": 2475.3086419753085, "learning_rate": 4.503795763438227e-07, "loss": 1.596, "step": 40100 }, { "epoch": 2476.543209876543, "learning_rate": 4.5010407738459653e-07, "loss": 1.6244, "step": 40120 }, { "epoch": 2477.777777777778, "learning_rate": 4.4982857842537045e-07, "loss": 1.5974, "step": 40140 }, { "epoch": 2479.0123456790125, "learning_rate": 4.4955307946614415e-07, "loss": 1.6546, "step": 40160 }, { "epoch": 2480.246913580247, "learning_rate": 4.49277580506918e-07, "loss": 1.6515, "step": 40180 }, { "epoch": 2481.4814814814813, "learning_rate": 4.490020815476919e-07, "loss": 1.5839, "step": 40200 }, { "epoch": 2482.716049382716, "learning_rate": 4.487265825884658e-07, "loss": 1.599, "step": 40220 }, { "epoch": 2483.9506172839506, "learning_rate": 4.4845108362923963e-07, "loss": 1.6131, "step": 40240 }, { "epoch": 2485.185185185185, "learning_rate": 4.481755846700135e-07, "loss": 1.6605, "step": 40260 }, { "epoch": 2486.41975308642, "learning_rate": 4.4790008571078735e-07, "loss": 1.6226, "step": 40280 }, { "epoch": 2487.6543209876545, "learning_rate": 4.476245867515611e-07, "loss": 1.6297, "step": 40300 }, { "epoch": 2488.8888888888887, "learning_rate": 4.4734908779233496e-07, "loss": 1.5975, "step": 40320 }, { "epoch": 2490.1234567901233, "learning_rate": 4.470735888331088e-07, "loss": 1.6291, "step": 40340 }, { "epoch": 2491.358024691358, "learning_rate": 4.467980898738827e-07, "loss": 1.614, "step": 40360 }, { "epoch": 2492.5925925925926, "learning_rate": 4.4652259091465654e-07, "loss": 1.6366, "step": 40380 }, { "epoch": 2493.8271604938273, "learning_rate": 4.462470919554304e-07, "loss": 1.6082, "step": 40400 }, { "epoch": 2495.061728395062, "learning_rate": 4.4597159299620426e-07, "loss": 1.5866, "step": 40420 }, { "epoch": 2496.296296296296, "learning_rate": 4.45696094036978e-07, "loss": 1.5895, "step": 40440 }, { "epoch": 2497.5308641975307, "learning_rate": 4.4542059507775187e-07, "loss": 1.628, "step": 40460 }, { "epoch": 2498.7654320987654, "learning_rate": 4.4514509611852573e-07, "loss": 1.6217, "step": 40480 }, { "epoch": 2500.0, "learning_rate": 4.4486959715929964e-07, "loss": 1.6411, "step": 40500 }, { "epoch": 2501.2345679012346, "learning_rate": 4.4459409820007345e-07, "loss": 1.6156, "step": 40520 }, { "epoch": 2502.4691358024693, "learning_rate": 4.443185992408473e-07, "loss": 1.5974, "step": 40540 }, { "epoch": 2503.703703703704, "learning_rate": 4.4404310028162116e-07, "loss": 1.603, "step": 40560 }, { "epoch": 2504.938271604938, "learning_rate": 4.4376760132239497e-07, "loss": 1.6142, "step": 40580 }, { "epoch": 2506.1728395061727, "learning_rate": 4.4349210236316883e-07, "loss": 1.6063, "step": 40600 }, { "epoch": 2507.4074074074074, "learning_rate": 4.432166034039427e-07, "loss": 1.6022, "step": 40620 }, { "epoch": 2508.641975308642, "learning_rate": 4.4294110444471655e-07, "loss": 1.5694, "step": 40640 }, { "epoch": 2509.8765432098767, "learning_rate": 4.426656054854904e-07, "loss": 1.6473, "step": 40660 }, { "epoch": 2511.1111111111113, "learning_rate": 4.4239010652626426e-07, "loss": 1.6251, "step": 40680 }, { "epoch": 2512.3456790123455, "learning_rate": 4.42114607567038e-07, "loss": 1.5859, "step": 40700 }, { "epoch": 2513.58024691358, "learning_rate": 4.418391086078119e-07, "loss": 1.5816, "step": 40720 }, { "epoch": 2514.814814814815, "learning_rate": 4.4156360964858573e-07, "loss": 1.6203, "step": 40740 }, { "epoch": 2516.0493827160494, "learning_rate": 4.412881106893596e-07, "loss": 1.6278, "step": 40760 }, { "epoch": 2517.283950617284, "learning_rate": 4.4101261173013345e-07, "loss": 1.5997, "step": 40780 }, { "epoch": 2518.5185185185187, "learning_rate": 4.407371127709073e-07, "loss": 1.5874, "step": 40800 }, { "epoch": 2519.753086419753, "learning_rate": 4.4046161381168117e-07, "loss": 1.5911, "step": 40820 }, { "epoch": 2520.9876543209875, "learning_rate": 4.401861148524549e-07, "loss": 1.6143, "step": 40840 }, { "epoch": 2522.222222222222, "learning_rate": 4.3991061589322883e-07, "loss": 1.6012, "step": 40860 }, { "epoch": 2523.456790123457, "learning_rate": 4.3963511693400264e-07, "loss": 1.6533, "step": 40880 }, { "epoch": 2524.6913580246915, "learning_rate": 4.393596179747765e-07, "loss": 1.5982, "step": 40900 }, { "epoch": 2525.925925925926, "learning_rate": 4.3908411901555036e-07, "loss": 1.6277, "step": 40920 }, { "epoch": 2527.1604938271603, "learning_rate": 4.388086200563242e-07, "loss": 1.6174, "step": 40940 }, { "epoch": 2528.395061728395, "learning_rate": 4.385331210970981e-07, "loss": 1.5914, "step": 40960 }, { "epoch": 2529.6296296296296, "learning_rate": 4.382576221378719e-07, "loss": 1.5819, "step": 40980 }, { "epoch": 2530.864197530864, "learning_rate": 4.3798212317864574e-07, "loss": 1.613, "step": 41000 }, { "epoch": 2532.098765432099, "learning_rate": 4.377066242194196e-07, "loss": 1.6322, "step": 41020 }, { "epoch": 2533.3333333333335, "learning_rate": 4.3743112526019346e-07, "loss": 1.5941, "step": 41040 }, { "epoch": 2534.567901234568, "learning_rate": 4.371556263009673e-07, "loss": 1.6098, "step": 41060 }, { "epoch": 2535.8024691358023, "learning_rate": 4.368801273417412e-07, "loss": 1.5908, "step": 41080 }, { "epoch": 2537.037037037037, "learning_rate": 4.3660462838251503e-07, "loss": 1.6191, "step": 41100 }, { "epoch": 2538.2716049382716, "learning_rate": 4.363291294232888e-07, "loss": 1.6222, "step": 41120 }, { "epoch": 2539.5061728395062, "learning_rate": 4.3605363046406265e-07, "loss": 1.6025, "step": 41140 }, { "epoch": 2540.740740740741, "learning_rate": 4.357781315048365e-07, "loss": 1.5945, "step": 41160 }, { "epoch": 2541.9753086419755, "learning_rate": 4.3550263254561036e-07, "loss": 1.5879, "step": 41180 }, { "epoch": 2543.2098765432097, "learning_rate": 4.352271335863842e-07, "loss": 1.6491, "step": 41200 }, { "epoch": 2544.4444444444443, "learning_rate": 4.349516346271581e-07, "loss": 1.5941, "step": 41220 }, { "epoch": 2545.679012345679, "learning_rate": 4.3467613566793183e-07, "loss": 1.6061, "step": 41240 }, { "epoch": 2546.9135802469136, "learning_rate": 4.344006367087057e-07, "loss": 1.5921, "step": 41260 }, { "epoch": 2548.1481481481483, "learning_rate": 4.3412513774947955e-07, "loss": 1.5977, "step": 41280 }, { "epoch": 2549.382716049383, "learning_rate": 4.338496387902534e-07, "loss": 1.6343, "step": 41300 }, { "epoch": 2550.617283950617, "learning_rate": 4.335741398310273e-07, "loss": 1.613, "step": 41320 }, { "epoch": 2551.8518518518517, "learning_rate": 4.332986408718012e-07, "loss": 1.5943, "step": 41340 }, { "epoch": 2553.0864197530864, "learning_rate": 4.3302314191257504e-07, "loss": 1.6437, "step": 41360 }, { "epoch": 2554.320987654321, "learning_rate": 4.327476429533488e-07, "loss": 1.6181, "step": 41380 }, { "epoch": 2555.5555555555557, "learning_rate": 4.3247214399412265e-07, "loss": 1.6019, "step": 41400 }, { "epoch": 2556.7901234567903, "learning_rate": 4.321966450348965e-07, "loss": 1.5988, "step": 41420 }, { "epoch": 2558.0246913580245, "learning_rate": 4.3192114607567037e-07, "loss": 1.5741, "step": 41440 }, { "epoch": 2559.259259259259, "learning_rate": 4.3164564711644423e-07, "loss": 1.6289, "step": 41460 }, { "epoch": 2560.4938271604938, "learning_rate": 4.3137014815721804e-07, "loss": 1.6128, "step": 41480 }, { "epoch": 2561.7283950617284, "learning_rate": 4.3109464919799195e-07, "loss": 1.6329, "step": 41500 }, { "epoch": 2562.962962962963, "learning_rate": 4.308191502387657e-07, "loss": 1.6324, "step": 41520 }, { "epoch": 2564.1975308641977, "learning_rate": 4.3054365127953956e-07, "loss": 1.6622, "step": 41540 }, { "epoch": 2565.432098765432, "learning_rate": 4.302681523203134e-07, "loss": 1.5906, "step": 41560 }, { "epoch": 2566.6666666666665, "learning_rate": 4.299926533610873e-07, "loss": 1.603, "step": 41580 }, { "epoch": 2567.901234567901, "learning_rate": 4.2971715440186114e-07, "loss": 1.615, "step": 41600 }, { "epoch": 2569.135802469136, "learning_rate": 4.29441655442635e-07, "loss": 1.6302, "step": 41620 }, { "epoch": 2570.3703703703704, "learning_rate": 4.2916615648340885e-07, "loss": 1.561, "step": 41640 }, { "epoch": 2571.604938271605, "learning_rate": 4.288906575241826e-07, "loss": 1.6053, "step": 41660 }, { "epoch": 2572.8395061728397, "learning_rate": 4.286151585649565e-07, "loss": 1.5834, "step": 41680 }, { "epoch": 2574.074074074074, "learning_rate": 4.283396596057304e-07, "loss": 1.622, "step": 41700 }, { "epoch": 2575.3086419753085, "learning_rate": 4.2806416064650424e-07, "loss": 1.6139, "step": 41720 }, { "epoch": 2576.543209876543, "learning_rate": 4.277886616872781e-07, "loss": 1.625, "step": 41740 }, { "epoch": 2577.777777777778, "learning_rate": 4.2751316272805195e-07, "loss": 1.6109, "step": 41760 }, { "epoch": 2579.0123456790125, "learning_rate": 4.272376637688257e-07, "loss": 1.6557, "step": 41780 }, { "epoch": 2580.246913580247, "learning_rate": 4.2696216480959956e-07, "loss": 1.6254, "step": 41800 }, { "epoch": 2581.4814814814813, "learning_rate": 4.266866658503734e-07, "loss": 1.6331, "step": 41820 }, { "epoch": 2582.716049382716, "learning_rate": 4.2641116689114723e-07, "loss": 1.6092, "step": 41840 }, { "epoch": 2583.9506172839506, "learning_rate": 4.2613566793192114e-07, "loss": 1.6032, "step": 41860 }, { "epoch": 2585.185185185185, "learning_rate": 4.25860168972695e-07, "loss": 1.6255, "step": 41880 }, { "epoch": 2586.41975308642, "learning_rate": 4.2558467001346886e-07, "loss": 1.5977, "step": 41900 }, { "epoch": 2587.6543209876545, "learning_rate": 4.253091710542426e-07, "loss": 1.6134, "step": 41920 }, { "epoch": 2588.8888888888887, "learning_rate": 4.2503367209501647e-07, "loss": 1.5769, "step": 41940 }, { "epoch": 2590.1234567901233, "learning_rate": 4.2475817313579033e-07, "loss": 1.6155, "step": 41960 }, { "epoch": 2591.358024691358, "learning_rate": 4.244826741765642e-07, "loss": 1.6145, "step": 41980 }, { "epoch": 2592.5925925925926, "learning_rate": 4.2420717521733805e-07, "loss": 1.6022, "step": 42000 }, { "epoch": 2593.8271604938273, "learning_rate": 4.239316762581119e-07, "loss": 1.6055, "step": 42020 }, { "epoch": 2595.061728395062, "learning_rate": 4.2365617729888577e-07, "loss": 1.6091, "step": 42040 }, { "epoch": 2596.296296296296, "learning_rate": 4.2338067833965957e-07, "loss": 1.5848, "step": 42060 }, { "epoch": 2597.5308641975307, "learning_rate": 4.2310517938043343e-07, "loss": 1.6191, "step": 42080 }, { "epoch": 2598.7654320987654, "learning_rate": 4.228296804212073e-07, "loss": 1.5982, "step": 42100 }, { "epoch": 2600.0, "learning_rate": 4.2255418146198115e-07, "loss": 1.6453, "step": 42120 }, { "epoch": 2601.2345679012346, "learning_rate": 4.22278682502755e-07, "loss": 1.6101, "step": 42140 }, { "epoch": 2602.4691358024693, "learning_rate": 4.2200318354352887e-07, "loss": 1.591, "step": 42160 }, { "epoch": 2603.703703703704, "learning_rate": 4.217276845843027e-07, "loss": 1.5913, "step": 42180 }, { "epoch": 2604.938271604938, "learning_rate": 4.214521856250765e-07, "loss": 1.5909, "step": 42200 }, { "epoch": 2606.1728395061727, "learning_rate": 4.2117668666585034e-07, "loss": 1.5949, "step": 42220 }, { "epoch": 2607.4074074074074, "learning_rate": 4.209011877066242e-07, "loss": 1.583, "step": 42240 }, { "epoch": 2608.641975308642, "learning_rate": 4.2062568874739805e-07, "loss": 1.5797, "step": 42260 }, { "epoch": 2609.8765432098767, "learning_rate": 4.203501897881719e-07, "loss": 1.638, "step": 42280 }, { "epoch": 2611.1111111111113, "learning_rate": 4.2007469082894577e-07, "loss": 1.597, "step": 42300 }, { "epoch": 2612.3456790123455, "learning_rate": 4.197991918697195e-07, "loss": 1.5964, "step": 42320 }, { "epoch": 2613.58024691358, "learning_rate": 4.195236929104934e-07, "loss": 1.5913, "step": 42340 }, { "epoch": 2614.814814814815, "learning_rate": 4.1924819395126724e-07, "loss": 1.5834, "step": 42360 }, { "epoch": 2616.0493827160494, "learning_rate": 4.189726949920411e-07, "loss": 1.6052, "step": 42380 }, { "epoch": 2617.283950617284, "learning_rate": 4.1869719603281496e-07, "loss": 1.5629, "step": 42400 }, { "epoch": 2618.5185185185187, "learning_rate": 4.184216970735888e-07, "loss": 1.6066, "step": 42420 }, { "epoch": 2619.753086419753, "learning_rate": 4.181461981143627e-07, "loss": 1.5675, "step": 42440 }, { "epoch": 2620.9876543209875, "learning_rate": 4.178706991551365e-07, "loss": 1.6437, "step": 42460 }, { "epoch": 2622.222222222222, "learning_rate": 4.1759520019591034e-07, "loss": 1.6229, "step": 42480 }, { "epoch": 2623.456790123457, "learning_rate": 4.173197012366842e-07, "loss": 1.6076, "step": 42500 }, { "epoch": 2624.6913580246915, "learning_rate": 4.1704420227745806e-07, "loss": 1.5865, "step": 42520 }, { "epoch": 2625.925925925926, "learning_rate": 4.167687033182319e-07, "loss": 1.5943, "step": 42540 }, { "epoch": 2627.1604938271603, "learning_rate": 4.164932043590058e-07, "loss": 1.5538, "step": 42560 }, { "epoch": 2628.395061728395, "learning_rate": 4.1621770539977964e-07, "loss": 1.6155, "step": 42580 }, { "epoch": 2629.6296296296296, "learning_rate": 4.159422064405534e-07, "loss": 1.6149, "step": 42600 }, { "epoch": 2630.864197530864, "learning_rate": 4.1566670748132725e-07, "loss": 1.6066, "step": 42620 }, { "epoch": 2632.098765432099, "learning_rate": 4.153912085221011e-07, "loss": 1.6325, "step": 42640 }, { "epoch": 2633.3333333333335, "learning_rate": 4.1511570956287497e-07, "loss": 1.6076, "step": 42660 }, { "epoch": 2634.567901234568, "learning_rate": 4.148402106036488e-07, "loss": 1.617, "step": 42680 }, { "epoch": 2635.8024691358023, "learning_rate": 4.145647116444227e-07, "loss": 1.6, "step": 42700 }, { "epoch": 2637.037037037037, "learning_rate": 4.1428921268519644e-07, "loss": 1.5751, "step": 42720 }, { "epoch": 2638.2716049382716, "learning_rate": 4.140137137259703e-07, "loss": 1.6129, "step": 42740 }, { "epoch": 2639.5061728395062, "learning_rate": 4.1373821476674415e-07, "loss": 1.5832, "step": 42760 }, { "epoch": 2640.740740740741, "learning_rate": 4.13462715807518e-07, "loss": 1.5988, "step": 42780 }, { "epoch": 2641.9753086419755, "learning_rate": 4.1318721684829187e-07, "loss": 1.6112, "step": 42800 }, { "epoch": 2643.2098765432097, "learning_rate": 4.129117178890658e-07, "loss": 1.6164, "step": 42820 }, { "epoch": 2644.4444444444443, "learning_rate": 4.1263621892983964e-07, "loss": 1.6019, "step": 42840 }, { "epoch": 2645.679012345679, "learning_rate": 4.123607199706134e-07, "loss": 1.5619, "step": 42860 }, { "epoch": 2646.9135802469136, "learning_rate": 4.1208522101138725e-07, "loss": 1.6228, "step": 42880 }, { "epoch": 2648.1481481481483, "learning_rate": 4.118097220521611e-07, "loss": 1.5923, "step": 42900 }, { "epoch": 2649.382716049383, "learning_rate": 4.1153422309293497e-07, "loss": 1.5557, "step": 42920 }, { "epoch": 2650.617283950617, "learning_rate": 4.1125872413370883e-07, "loss": 1.5794, "step": 42940 }, { "epoch": 2651.8518518518517, "learning_rate": 4.109832251744827e-07, "loss": 1.6231, "step": 42960 }, { "epoch": 2653.0864197530864, "learning_rate": 4.1070772621525655e-07, "loss": 1.6408, "step": 42980 }, { "epoch": 2654.320987654321, "learning_rate": 4.104322272560303e-07, "loss": 1.6312, "step": 43000 }, { "epoch": 2655.5555555555557, "learning_rate": 4.1015672829680416e-07, "loss": 1.6093, "step": 43020 }, { "epoch": 2656.7901234567903, "learning_rate": 4.09881229337578e-07, "loss": 1.5819, "step": 43040 }, { "epoch": 2658.0246913580245, "learning_rate": 4.096057303783519e-07, "loss": 1.6182, "step": 43060 }, { "epoch": 2659.259259259259, "learning_rate": 4.0933023141912574e-07, "loss": 1.5797, "step": 43080 }, { "epoch": 2660.4938271604938, "learning_rate": 4.090547324598996e-07, "loss": 1.6057, "step": 43100 }, { "epoch": 2661.7283950617284, "learning_rate": 4.0877923350067345e-07, "loss": 1.5719, "step": 43120 }, { "epoch": 2662.962962962963, "learning_rate": 4.085037345414472e-07, "loss": 1.5967, "step": 43140 }, { "epoch": 2664.1975308641977, "learning_rate": 4.0822823558222107e-07, "loss": 1.6359, "step": 43160 }, { "epoch": 2665.432098765432, "learning_rate": 4.07952736622995e-07, "loss": 1.621, "step": 43180 }, { "epoch": 2666.6666666666665, "learning_rate": 4.0767723766376884e-07, "loss": 1.5844, "step": 43200 }, { "epoch": 2667.901234567901, "learning_rate": 4.074017387045427e-07, "loss": 1.6164, "step": 43220 }, { "epoch": 2669.135802469136, "learning_rate": 4.0712623974531655e-07, "loss": 1.644, "step": 43240 }, { "epoch": 2670.3703703703704, "learning_rate": 4.068507407860903e-07, "loss": 1.6266, "step": 43260 }, { "epoch": 2671.604938271605, "learning_rate": 4.0657524182686417e-07, "loss": 1.6236, "step": 43280 }, { "epoch": 2672.8395061728397, "learning_rate": 4.06299742867638e-07, "loss": 1.5677, "step": 43300 }, { "epoch": 2674.074074074074, "learning_rate": 4.060242439084119e-07, "loss": 1.5827, "step": 43320 }, { "epoch": 2675.3086419753085, "learning_rate": 4.0574874494918574e-07, "loss": 1.6031, "step": 43340 }, { "epoch": 2676.543209876543, "learning_rate": 4.054732459899596e-07, "loss": 1.5703, "step": 43360 }, { "epoch": 2677.777777777778, "learning_rate": 4.0519774703073346e-07, "loss": 1.6003, "step": 43380 }, { "epoch": 2679.0123456790125, "learning_rate": 4.049222480715072e-07, "loss": 1.6612, "step": 43400 }, { "epoch": 2680.246913580247, "learning_rate": 4.0464674911228107e-07, "loss": 1.5979, "step": 43420 }, { "epoch": 2681.4814814814813, "learning_rate": 4.0437125015305493e-07, "loss": 1.5804, "step": 43440 }, { "epoch": 2682.716049382716, "learning_rate": 4.040957511938288e-07, "loss": 1.6054, "step": 43460 }, { "epoch": 2683.9506172839506, "learning_rate": 4.0382025223460265e-07, "loss": 1.5628, "step": 43480 }, { "epoch": 2685.185185185185, "learning_rate": 4.035447532753765e-07, "loss": 1.5943, "step": 43500 }, { "epoch": 2686.41975308642, "learning_rate": 4.0326925431615037e-07, "loss": 1.5749, "step": 43520 }, { "epoch": 2687.6543209876545, "learning_rate": 4.0299375535692417e-07, "loss": 1.5873, "step": 43540 }, { "epoch": 2688.8888888888887, "learning_rate": 4.0271825639769803e-07, "loss": 1.5841, "step": 43560 }, { "epoch": 2690.1234567901233, "learning_rate": 4.024427574384719e-07, "loss": 1.635, "step": 43580 }, { "epoch": 2691.358024691358, "learning_rate": 4.0216725847924575e-07, "loss": 1.6146, "step": 43600 }, { "epoch": 2692.5925925925926, "learning_rate": 4.0189175952001966e-07, "loss": 1.5575, "step": 43620 }, { "epoch": 2693.8271604938273, "learning_rate": 4.0161626056079347e-07, "loss": 1.6247, "step": 43640 }, { "epoch": 2695.061728395062, "learning_rate": 4.013407616015673e-07, "loss": 1.5918, "step": 43660 }, { "epoch": 2696.296296296296, "learning_rate": 4.010652626423411e-07, "loss": 1.6103, "step": 43680 }, { "epoch": 2697.5308641975307, "learning_rate": 4.0078976368311494e-07, "loss": 1.5966, "step": 43700 }, { "epoch": 2698.7654320987654, "learning_rate": 4.005142647238888e-07, "loss": 1.6105, "step": 43720 }, { "epoch": 2700.0, "learning_rate": 4.0023876576466266e-07, "loss": 1.637, "step": 43740 }, { "epoch": 2701.2345679012346, "learning_rate": 3.999632668054365e-07, "loss": 1.5751, "step": 43760 }, { "epoch": 2702.4691358024693, "learning_rate": 3.9968776784621037e-07, "loss": 1.6408, "step": 43780 }, { "epoch": 2703.703703703704, "learning_rate": 3.994122688869841e-07, "loss": 1.6009, "step": 43800 }, { "epoch": 2704.938271604938, "learning_rate": 3.99136769927758e-07, "loss": 1.5845, "step": 43820 }, { "epoch": 2706.1728395061727, "learning_rate": 3.9886127096853184e-07, "loss": 1.6328, "step": 43840 }, { "epoch": 2707.4074074074074, "learning_rate": 3.985857720093057e-07, "loss": 1.6059, "step": 43860 }, { "epoch": 2708.641975308642, "learning_rate": 3.9831027305007956e-07, "loss": 1.5844, "step": 43880 }, { "epoch": 2709.8765432098767, "learning_rate": 3.980347740908534e-07, "loss": 1.5928, "step": 43900 }, { "epoch": 2711.1111111111113, "learning_rate": 3.977592751316273e-07, "loss": 1.5914, "step": 43920 }, { "epoch": 2712.3456790123455, "learning_rate": 3.974837761724011e-07, "loss": 1.6088, "step": 43940 }, { "epoch": 2713.58024691358, "learning_rate": 3.9720827721317494e-07, "loss": 1.6005, "step": 43960 }, { "epoch": 2714.814814814815, "learning_rate": 3.9693277825394886e-07, "loss": 1.5973, "step": 43980 }, { "epoch": 2716.0493827160494, "learning_rate": 3.9665727929472266e-07, "loss": 1.6223, "step": 44000 }, { "epoch": 2717.283950617284, "learning_rate": 3.963817803354965e-07, "loss": 1.6079, "step": 44020 }, { "epoch": 2718.5185185185187, "learning_rate": 3.961062813762704e-07, "loss": 1.5864, "step": 44040 }, { "epoch": 2719.753086419753, "learning_rate": 3.9583078241704424e-07, "loss": 1.6023, "step": 44060 }, { "epoch": 2720.9876543209875, "learning_rate": 3.95555283457818e-07, "loss": 1.5783, "step": 44080 }, { "epoch": 2722.222222222222, "learning_rate": 3.9527978449859185e-07, "loss": 1.6224, "step": 44100 }, { "epoch": 2723.456790123457, "learning_rate": 3.950042855393657e-07, "loss": 1.5548, "step": 44120 }, { "epoch": 2724.6913580246915, "learning_rate": 3.9472878658013957e-07, "loss": 1.5773, "step": 44140 }, { "epoch": 2725.925925925926, "learning_rate": 3.9445328762091337e-07, "loss": 1.588, "step": 44160 }, { "epoch": 2727.1604938271603, "learning_rate": 3.941777886616873e-07, "loss": 1.6126, "step": 44180 }, { "epoch": 2728.395061728395, "learning_rate": 3.9390228970246114e-07, "loss": 1.6192, "step": 44200 }, { "epoch": 2729.6296296296296, "learning_rate": 3.936267907432349e-07, "loss": 1.5709, "step": 44220 }, { "epoch": 2730.864197530864, "learning_rate": 3.9335129178400876e-07, "loss": 1.5899, "step": 44240 }, { "epoch": 2732.098765432099, "learning_rate": 3.930757928247826e-07, "loss": 1.6171, "step": 44260 }, { "epoch": 2733.3333333333335, "learning_rate": 3.9280029386555647e-07, "loss": 1.5672, "step": 44280 }, { "epoch": 2734.567901234568, "learning_rate": 3.9252479490633033e-07, "loss": 1.5949, "step": 44300 }, { "epoch": 2735.8024691358023, "learning_rate": 3.9224929594710424e-07, "loss": 1.6092, "step": 44320 }, { "epoch": 2737.037037037037, "learning_rate": 3.9197379698787794e-07, "loss": 1.5984, "step": 44340 }, { "epoch": 2738.2716049382716, "learning_rate": 3.9169829802865186e-07, "loss": 1.5701, "step": 44360 }, { "epoch": 2739.5061728395062, "learning_rate": 3.914227990694257e-07, "loss": 1.6332, "step": 44380 }, { "epoch": 2740.740740740741, "learning_rate": 3.911473001101996e-07, "loss": 1.5343, "step": 44400 }, { "epoch": 2741.9753086419755, "learning_rate": 3.9087180115097343e-07, "loss": 1.5874, "step": 44420 }, { "epoch": 2743.2098765432097, "learning_rate": 3.905963021917473e-07, "loss": 1.5957, "step": 44440 }, { "epoch": 2744.4444444444443, "learning_rate": 3.9032080323252115e-07, "loss": 1.5792, "step": 44460 }, { "epoch": 2745.679012345679, "learning_rate": 3.900453042732949e-07, "loss": 1.6199, "step": 44480 }, { "epoch": 2746.9135802469136, "learning_rate": 3.8976980531406876e-07, "loss": 1.594, "step": 44500 }, { "epoch": 2748.1481481481483, "learning_rate": 3.894943063548426e-07, "loss": 1.6074, "step": 44520 }, { "epoch": 2749.382716049383, "learning_rate": 3.892188073956165e-07, "loss": 1.6032, "step": 44540 }, { "epoch": 2750.617283950617, "learning_rate": 3.8894330843639034e-07, "loss": 1.6152, "step": 44560 }, { "epoch": 2751.8518518518517, "learning_rate": 3.886678094771642e-07, "loss": 1.5874, "step": 44580 }, { "epoch": 2753.0864197530864, "learning_rate": 3.8839231051793806e-07, "loss": 1.6073, "step": 44600 }, { "epoch": 2754.320987654321, "learning_rate": 3.881168115587118e-07, "loss": 1.5544, "step": 44620 }, { "epoch": 2755.5555555555557, "learning_rate": 3.8784131259948567e-07, "loss": 1.6478, "step": 44640 }, { "epoch": 2756.7901234567903, "learning_rate": 3.875658136402596e-07, "loss": 1.5477, "step": 44660 }, { "epoch": 2758.0246913580245, "learning_rate": 3.8729031468103344e-07, "loss": 1.6029, "step": 44680 }, { "epoch": 2759.259259259259, "learning_rate": 3.870148157218073e-07, "loss": 1.6175, "step": 44700 }, { "epoch": 2760.4938271604938, "learning_rate": 3.8673931676258116e-07, "loss": 1.5881, "step": 44720 }, { "epoch": 2761.7283950617284, "learning_rate": 3.86463817803355e-07, "loss": 1.6043, "step": 44740 }, { "epoch": 2762.962962962963, "learning_rate": 3.8618831884412877e-07, "loss": 1.6047, "step": 44760 }, { "epoch": 2764.1975308641977, "learning_rate": 3.8591281988490263e-07, "loss": 1.5779, "step": 44780 }, { "epoch": 2765.432098765432, "learning_rate": 3.856373209256765e-07, "loss": 1.6374, "step": 44800 }, { "epoch": 2766.6666666666665, "learning_rate": 3.8536182196645034e-07, "loss": 1.5917, "step": 44820 }, { "epoch": 2767.901234567901, "learning_rate": 3.850863230072242e-07, "loss": 1.6095, "step": 44840 }, { "epoch": 2769.135802469136, "learning_rate": 3.8481082404799806e-07, "loss": 1.6041, "step": 44860 }, { "epoch": 2770.3703703703704, "learning_rate": 3.845353250887718e-07, "loss": 1.6671, "step": 44880 }, { "epoch": 2771.604938271605, "learning_rate": 3.842598261295457e-07, "loss": 1.586, "step": 44900 }, { "epoch": 2772.8395061728397, "learning_rate": 3.8398432717031953e-07, "loss": 1.6016, "step": 44920 }, { "epoch": 2774.074074074074, "learning_rate": 3.837088282110934e-07, "loss": 1.6045, "step": 44940 }, { "epoch": 2775.3086419753085, "learning_rate": 3.8343332925186725e-07, "loss": 1.6106, "step": 44960 }, { "epoch": 2776.543209876543, "learning_rate": 3.831578302926411e-07, "loss": 1.5523, "step": 44980 }, { "epoch": 2777.777777777778, "learning_rate": 3.8288233133341497e-07, "loss": 1.5853, "step": 45000 }, { "epoch": 2779.0123456790125, "learning_rate": 3.826068323741888e-07, "loss": 1.6267, "step": 45020 }, { "epoch": 2780.246913580247, "learning_rate": 3.8233133341496263e-07, "loss": 1.5569, "step": 45040 }, { "epoch": 2781.4814814814813, "learning_rate": 3.820558344557365e-07, "loss": 1.5999, "step": 45060 }, { "epoch": 2782.716049382716, "learning_rate": 3.8178033549651035e-07, "loss": 1.5939, "step": 45080 }, { "epoch": 2783.9506172839506, "learning_rate": 3.815048365372842e-07, "loss": 1.5417, "step": 45100 }, { "epoch": 2785.185185185185, "learning_rate": 3.8122933757805807e-07, "loss": 1.5989, "step": 45120 }, { "epoch": 2786.41975308642, "learning_rate": 3.8095383861883193e-07, "loss": 1.5541, "step": 45140 }, { "epoch": 2787.6543209876545, "learning_rate": 3.806783396596057e-07, "loss": 1.5832, "step": 45160 }, { "epoch": 2788.8888888888887, "learning_rate": 3.8040284070037954e-07, "loss": 1.5753, "step": 45180 }, { "epoch": 2790.1234567901233, "learning_rate": 3.801273417411534e-07, "loss": 1.6206, "step": 45200 }, { "epoch": 2791.358024691358, "learning_rate": 3.7985184278192726e-07, "loss": 1.5887, "step": 45220 }, { "epoch": 2792.5925925925926, "learning_rate": 3.795763438227011e-07, "loss": 1.5607, "step": 45240 }, { "epoch": 2793.8271604938273, "learning_rate": 3.79300844863475e-07, "loss": 1.5828, "step": 45260 }, { "epoch": 2795.061728395062, "learning_rate": 3.7902534590424883e-07, "loss": 1.6293, "step": 45280 }, { "epoch": 2796.296296296296, "learning_rate": 3.787498469450226e-07, "loss": 1.5939, "step": 45300 }, { "epoch": 2797.5308641975307, "learning_rate": 3.7847434798579645e-07, "loss": 1.566, "step": 45320 }, { "epoch": 2798.7654320987654, "learning_rate": 3.781988490265703e-07, "loss": 1.5431, "step": 45340 }, { "epoch": 2800.0, "learning_rate": 3.7792335006734416e-07, "loss": 1.6051, "step": 45360 }, { "epoch": 2801.2345679012346, "learning_rate": 3.77647851108118e-07, "loss": 1.5837, "step": 45380 }, { "epoch": 2802.4691358024693, "learning_rate": 3.773723521488919e-07, "loss": 1.5983, "step": 45400 }, { "epoch": 2803.703703703704, "learning_rate": 3.770968531896657e-07, "loss": 1.5997, "step": 45420 }, { "epoch": 2804.938271604938, "learning_rate": 3.7682135423043955e-07, "loss": 1.5838, "step": 45440 }, { "epoch": 2806.1728395061727, "learning_rate": 3.765458552712134e-07, "loss": 1.6048, "step": 45460 }, { "epoch": 2807.4074074074074, "learning_rate": 3.7627035631198726e-07, "loss": 1.5986, "step": 45480 }, { "epoch": 2808.641975308642, "learning_rate": 3.759948573527611e-07, "loss": 1.5858, "step": 45500 }, { "epoch": 2809.8765432098767, "learning_rate": 3.75719358393535e-07, "loss": 1.6225, "step": 45520 }, { "epoch": 2811.1111111111113, "learning_rate": 3.7544385943430884e-07, "loss": 1.5922, "step": 45540 }, { "epoch": 2812.3456790123455, "learning_rate": 3.751683604750826e-07, "loss": 1.5775, "step": 45560 }, { "epoch": 2813.58024691358, "learning_rate": 3.7489286151585645e-07, "loss": 1.5433, "step": 45580 }, { "epoch": 2814.814814814815, "learning_rate": 3.746173625566303e-07, "loss": 1.5915, "step": 45600 }, { "epoch": 2816.0493827160494, "learning_rate": 3.7434186359740417e-07, "loss": 1.6038, "step": 45620 }, { "epoch": 2817.283950617284, "learning_rate": 3.7406636463817803e-07, "loss": 1.6173, "step": 45640 }, { "epoch": 2818.5185185185187, "learning_rate": 3.737908656789519e-07, "loss": 1.574, "step": 45660 }, { "epoch": 2819.753086419753, "learning_rate": 3.7351536671972575e-07, "loss": 1.6042, "step": 45680 }, { "epoch": 2820.9876543209875, "learning_rate": 3.732398677604995e-07, "loss": 1.583, "step": 45700 }, { "epoch": 2822.222222222222, "learning_rate": 3.7296436880127336e-07, "loss": 1.5863, "step": 45720 }, { "epoch": 2823.456790123457, "learning_rate": 3.726888698420472e-07, "loss": 1.582, "step": 45740 }, { "epoch": 2824.6913580246915, "learning_rate": 3.724133708828211e-07, "loss": 1.6018, "step": 45760 }, { "epoch": 2825.925925925926, "learning_rate": 3.7213787192359493e-07, "loss": 1.5802, "step": 45780 }, { "epoch": 2827.1604938271603, "learning_rate": 3.7186237296436885e-07, "loss": 1.644, "step": 45800 }, { "epoch": 2828.395061728395, "learning_rate": 3.715868740051426e-07, "loss": 1.6081, "step": 45820 }, { "epoch": 2829.6296296296296, "learning_rate": 3.7131137504591646e-07, "loss": 1.5881, "step": 45840 }, { "epoch": 2830.864197530864, "learning_rate": 3.710358760866903e-07, "loss": 1.6446, "step": 45860 }, { "epoch": 2832.098765432099, "learning_rate": 3.707603771274642e-07, "loss": 1.5731, "step": 45880 }, { "epoch": 2833.3333333333335, "learning_rate": 3.7048487816823803e-07, "loss": 1.5767, "step": 45900 }, { "epoch": 2834.567901234568, "learning_rate": 3.702093792090119e-07, "loss": 1.6086, "step": 45920 }, { "epoch": 2835.8024691358023, "learning_rate": 3.699338802497858e-07, "loss": 1.6066, "step": 45940 }, { "epoch": 2837.037037037037, "learning_rate": 3.696583812905595e-07, "loss": 1.6132, "step": 45960 }, { "epoch": 2838.2716049382716, "learning_rate": 3.6938288233133336e-07, "loss": 1.5674, "step": 45980 }, { "epoch": 2839.5061728395062, "learning_rate": 3.691073833721072e-07, "loss": 1.5864, "step": 46000 }, { "epoch": 2840.740740740741, "learning_rate": 3.688318844128811e-07, "loss": 1.5639, "step": 46020 }, { "epoch": 2841.9753086419755, "learning_rate": 3.6855638545365494e-07, "loss": 1.5629, "step": 46040 }, { "epoch": 2843.2098765432097, "learning_rate": 3.682808864944288e-07, "loss": 1.5756, "step": 46060 }, { "epoch": 2844.4444444444443, "learning_rate": 3.6800538753520266e-07, "loss": 1.5834, "step": 46080 }, { "epoch": 2845.679012345679, "learning_rate": 3.677298885759764e-07, "loss": 1.5606, "step": 46100 }, { "epoch": 2846.9135802469136, "learning_rate": 3.6745438961675027e-07, "loss": 1.5492, "step": 46120 }, { "epoch": 2848.1481481481483, "learning_rate": 3.6717889065752413e-07, "loss": 1.5963, "step": 46140 }, { "epoch": 2849.382716049383, "learning_rate": 3.6690339169829804e-07, "loss": 1.534, "step": 46160 }, { "epoch": 2850.617283950617, "learning_rate": 3.666278927390719e-07, "loss": 1.6084, "step": 46180 }, { "epoch": 2851.8518518518517, "learning_rate": 3.6635239377984576e-07, "loss": 1.6032, "step": 46200 }, { "epoch": 2853.0864197530864, "learning_rate": 3.660768948206196e-07, "loss": 1.6116, "step": 46220 }, { "epoch": 2854.320987654321, "learning_rate": 3.6580139586139337e-07, "loss": 1.6103, "step": 46240 }, { "epoch": 2855.5555555555557, "learning_rate": 3.6552589690216723e-07, "loss": 1.5849, "step": 46260 }, { "epoch": 2856.7901234567903, "learning_rate": 3.652503979429411e-07, "loss": 1.5833, "step": 46280 }, { "epoch": 2858.0246913580245, "learning_rate": 3.64974898983715e-07, "loss": 1.6316, "step": 46300 }, { "epoch": 2859.259259259259, "learning_rate": 3.646994000244888e-07, "loss": 1.579, "step": 46320 }, { "epoch": 2860.4938271604938, "learning_rate": 3.6442390106526266e-07, "loss": 1.5636, "step": 46340 }, { "epoch": 2861.7283950617284, "learning_rate": 3.641484021060364e-07, "loss": 1.5765, "step": 46360 }, { "epoch": 2862.962962962963, "learning_rate": 3.638729031468103e-07, "loss": 1.5814, "step": 46380 }, { "epoch": 2864.1975308641977, "learning_rate": 3.6359740418758413e-07, "loss": 1.5863, "step": 46400 }, { "epoch": 2865.432098765432, "learning_rate": 3.63321905228358e-07, "loss": 1.609, "step": 46420 }, { "epoch": 2866.6666666666665, "learning_rate": 3.6304640626913185e-07, "loss": 1.5913, "step": 46440 }, { "epoch": 2867.901234567901, "learning_rate": 3.627709073099057e-07, "loss": 1.6438, "step": 46460 }, { "epoch": 2869.135802469136, "learning_rate": 3.6249540835067957e-07, "loss": 1.5741, "step": 46480 }, { "epoch": 2870.3703703703704, "learning_rate": 3.622199093914533e-07, "loss": 1.5949, "step": 46500 }, { "epoch": 2871.604938271605, "learning_rate": 3.6194441043222723e-07, "loss": 1.5939, "step": 46520 }, { "epoch": 2872.8395061728397, "learning_rate": 3.616689114730011e-07, "loss": 1.5647, "step": 46540 }, { "epoch": 2874.074074074074, "learning_rate": 3.6139341251377495e-07, "loss": 1.596, "step": 46560 }, { "epoch": 2875.3086419753085, "learning_rate": 3.611179135545488e-07, "loss": 1.6167, "step": 46580 }, { "epoch": 2876.543209876543, "learning_rate": 3.6084241459532267e-07, "loss": 1.573, "step": 46600 }, { "epoch": 2877.777777777778, "learning_rate": 3.6056691563609653e-07, "loss": 1.5993, "step": 46620 }, { "epoch": 2879.0123456790125, "learning_rate": 3.602914166768703e-07, "loss": 1.6066, "step": 46640 }, { "epoch": 2880.246913580247, "learning_rate": 3.600159177176441e-07, "loss": 1.5883, "step": 46660 }, { "epoch": 2881.4814814814813, "learning_rate": 3.59740418758418e-07, "loss": 1.5893, "step": 46680 }, { "epoch": 2882.716049382716, "learning_rate": 3.5946491979919186e-07, "loss": 1.5614, "step": 46700 }, { "epoch": 2883.9506172839506, "learning_rate": 3.591894208399657e-07, "loss": 1.5749, "step": 46720 }, { "epoch": 2885.185185185185, "learning_rate": 3.589139218807396e-07, "loss": 1.5943, "step": 46740 }, { "epoch": 2886.41975308642, "learning_rate": 3.5863842292151343e-07, "loss": 1.6147, "step": 46760 }, { "epoch": 2887.6543209876545, "learning_rate": 3.583629239622872e-07, "loss": 1.5916, "step": 46780 }, { "epoch": 2888.8888888888887, "learning_rate": 3.5808742500306105e-07, "loss": 1.5744, "step": 46800 }, { "epoch": 2890.1234567901233, "learning_rate": 3.578119260438349e-07, "loss": 1.621, "step": 46820 }, { "epoch": 2891.358024691358, "learning_rate": 3.5753642708460876e-07, "loss": 1.573, "step": 46840 }, { "epoch": 2892.5925925925926, "learning_rate": 3.572609281253826e-07, "loss": 1.5681, "step": 46860 }, { "epoch": 2893.8271604938273, "learning_rate": 3.569854291661565e-07, "loss": 1.5696, "step": 46880 }, { "epoch": 2895.061728395062, "learning_rate": 3.567099302069303e-07, "loss": 1.5788, "step": 46900 }, { "epoch": 2896.296296296296, "learning_rate": 3.5643443124770415e-07, "loss": 1.5923, "step": 46920 }, { "epoch": 2897.5308641975307, "learning_rate": 3.56158932288478e-07, "loss": 1.5965, "step": 46940 }, { "epoch": 2898.7654320987654, "learning_rate": 3.5588343332925186e-07, "loss": 1.5763, "step": 46960 }, { "epoch": 2900.0, "learning_rate": 3.556079343700257e-07, "loss": 1.6171, "step": 46980 }, { "epoch": 2901.2345679012346, "learning_rate": 3.553324354107996e-07, "loss": 1.5474, "step": 47000 }, { "epoch": 2902.4691358024693, "learning_rate": 3.5505693645157344e-07, "loss": 1.5582, "step": 47020 }, { "epoch": 2903.703703703704, "learning_rate": 3.547814374923472e-07, "loss": 1.5759, "step": 47040 }, { "epoch": 2904.938271604938, "learning_rate": 3.5450593853312105e-07, "loss": 1.5832, "step": 47060 }, { "epoch": 2906.1728395061727, "learning_rate": 3.542304395738949e-07, "loss": 1.5906, "step": 47080 }, { "epoch": 2907.4074074074074, "learning_rate": 3.5395494061466877e-07, "loss": 1.6047, "step": 47100 }, { "epoch": 2908.641975308642, "learning_rate": 3.5367944165544263e-07, "loss": 1.5955, "step": 47120 }, { "epoch": 2909.8765432098767, "learning_rate": 3.534039426962165e-07, "loss": 1.5496, "step": 47140 }, { "epoch": 2911.1111111111113, "learning_rate": 3.5312844373699035e-07, "loss": 1.6055, "step": 47160 }, { "epoch": 2912.3456790123455, "learning_rate": 3.528529447777641e-07, "loss": 1.5761, "step": 47180 }, { "epoch": 2913.58024691358, "learning_rate": 3.5257744581853796e-07, "loss": 1.5572, "step": 47200 }, { "epoch": 2914.814814814815, "learning_rate": 3.523019468593118e-07, "loss": 1.5623, "step": 47220 }, { "epoch": 2916.0493827160494, "learning_rate": 3.520264479000857e-07, "loss": 1.5843, "step": 47240 }, { "epoch": 2917.283950617284, "learning_rate": 3.5175094894085954e-07, "loss": 1.5663, "step": 47260 }, { "epoch": 2918.5185185185187, "learning_rate": 3.514754499816334e-07, "loss": 1.5972, "step": 47280 }, { "epoch": 2919.753086419753, "learning_rate": 3.511999510224073e-07, "loss": 1.5881, "step": 47300 }, { "epoch": 2920.9876543209875, "learning_rate": 3.5092445206318106e-07, "loss": 1.6143, "step": 47320 }, { "epoch": 2922.222222222222, "learning_rate": 3.506489531039549e-07, "loss": 1.559, "step": 47340 }, { "epoch": 2923.456790123457, "learning_rate": 3.503734541447288e-07, "loss": 1.6007, "step": 47360 }, { "epoch": 2924.6913580246915, "learning_rate": 3.5009795518550264e-07, "loss": 1.5927, "step": 47380 }, { "epoch": 2925.925925925926, "learning_rate": 3.498224562262765e-07, "loss": 1.5879, "step": 47400 }, { "epoch": 2927.1604938271603, "learning_rate": 3.4954695726705035e-07, "loss": 1.5982, "step": 47420 }, { "epoch": 2928.395061728395, "learning_rate": 3.492714583078241e-07, "loss": 1.5562, "step": 47440 }, { "epoch": 2929.6296296296296, "learning_rate": 3.4899595934859797e-07, "loss": 1.5878, "step": 47460 }, { "epoch": 2930.864197530864, "learning_rate": 3.487204603893718e-07, "loss": 1.61, "step": 47480 }, { "epoch": 2932.098765432099, "learning_rate": 3.484449614301457e-07, "loss": 1.5637, "step": 47500 }, { "epoch": 2933.3333333333335, "learning_rate": 3.4816946247091954e-07, "loss": 1.5854, "step": 47520 }, { "epoch": 2934.567901234568, "learning_rate": 3.478939635116934e-07, "loss": 1.5797, "step": 47540 }, { "epoch": 2935.8024691358023, "learning_rate": 3.4761846455246726e-07, "loss": 1.5696, "step": 47560 }, { "epoch": 2937.037037037037, "learning_rate": 3.47342965593241e-07, "loss": 1.574, "step": 47580 }, { "epoch": 2938.2716049382716, "learning_rate": 3.4706746663401487e-07, "loss": 1.575, "step": 47600 }, { "epoch": 2939.5061728395062, "learning_rate": 3.4679196767478873e-07, "loss": 1.5672, "step": 47620 }, { "epoch": 2940.740740740741, "learning_rate": 3.4651646871556264e-07, "loss": 1.5999, "step": 47640 }, { "epoch": 2941.9753086419755, "learning_rate": 3.462409697563365e-07, "loss": 1.5953, "step": 47660 }, { "epoch": 2943.2098765432097, "learning_rate": 3.4596547079711036e-07, "loss": 1.5806, "step": 47680 }, { "epoch": 2944.4444444444443, "learning_rate": 3.456899718378842e-07, "loss": 1.5711, "step": 47700 }, { "epoch": 2945.679012345679, "learning_rate": 3.4541447287865797e-07, "loss": 1.5894, "step": 47720 }, { "epoch": 2946.9135802469136, "learning_rate": 3.4513897391943183e-07, "loss": 1.6199, "step": 47740 }, { "epoch": 2948.1481481481483, "learning_rate": 3.448634749602057e-07, "loss": 1.5851, "step": 47760 }, { "epoch": 2949.382716049383, "learning_rate": 3.4458797600097955e-07, "loss": 1.6041, "step": 47780 }, { "epoch": 2950.617283950617, "learning_rate": 3.443124770417534e-07, "loss": 1.5351, "step": 47800 }, { "epoch": 2951.8518518518517, "learning_rate": 3.4403697808252727e-07, "loss": 1.5859, "step": 47820 }, { "epoch": 2953.0864197530864, "learning_rate": 3.437614791233011e-07, "loss": 1.6013, "step": 47840 }, { "epoch": 2954.320987654321, "learning_rate": 3.434859801640749e-07, "loss": 1.6008, "step": 47860 }, { "epoch": 2955.5555555555557, "learning_rate": 3.4321048120484874e-07, "loss": 1.5644, "step": 47880 }, { "epoch": 2956.7901234567903, "learning_rate": 3.429349822456226e-07, "loss": 1.5981, "step": 47900 }, { "epoch": 2958.0246913580245, "learning_rate": 3.4265948328639645e-07, "loss": 1.5938, "step": 47920 }, { "epoch": 2959.259259259259, "learning_rate": 3.423839843271703e-07, "loss": 1.5417, "step": 47940 }, { "epoch": 2960.4938271604938, "learning_rate": 3.4210848536794417e-07, "loss": 1.5891, "step": 47960 }, { "epoch": 2961.7283950617284, "learning_rate": 3.418329864087179e-07, "loss": 1.5653, "step": 47980 }, { "epoch": 2962.962962962963, "learning_rate": 3.4155748744949184e-07, "loss": 1.5807, "step": 48000 }, { "epoch": 2964.1975308641977, "learning_rate": 3.412819884902657e-07, "loss": 1.5767, "step": 48020 }, { "epoch": 2965.432098765432, "learning_rate": 3.4100648953103955e-07, "loss": 1.6012, "step": 48040 }, { "epoch": 2966.6666666666665, "learning_rate": 3.407309905718134e-07, "loss": 1.5877, "step": 48060 }, { "epoch": 2967.901234567901, "learning_rate": 3.4045549161258727e-07, "loss": 1.6071, "step": 48080 }, { "epoch": 2969.135802469136, "learning_rate": 3.4017999265336113e-07, "loss": 1.6194, "step": 48100 }, { "epoch": 2970.3703703703704, "learning_rate": 3.399044936941349e-07, "loss": 1.547, "step": 48120 }, { "epoch": 2971.604938271605, "learning_rate": 3.3962899473490874e-07, "loss": 1.597, "step": 48140 }, { "epoch": 2972.8395061728397, "learning_rate": 3.393534957756826e-07, "loss": 1.58, "step": 48160 }, { "epoch": 2974.074074074074, "learning_rate": 3.3907799681645646e-07, "loss": 1.5835, "step": 48180 }, { "epoch": 2975.3086419753085, "learning_rate": 3.388024978572303e-07, "loss": 1.5799, "step": 48200 }, { "epoch": 2976.543209876543, "learning_rate": 3.385269988980042e-07, "loss": 1.5581, "step": 48220 }, { "epoch": 2977.777777777778, "learning_rate": 3.3825149993877804e-07, "loss": 1.5892, "step": 48240 }, { "epoch": 2979.0123456790125, "learning_rate": 3.379760009795518e-07, "loss": 1.5856, "step": 48260 }, { "epoch": 2980.246913580247, "learning_rate": 3.3770050202032565e-07, "loss": 1.5864, "step": 48280 }, { "epoch": 2981.4814814814813, "learning_rate": 3.3742500306109956e-07, "loss": 1.5831, "step": 48300 }, { "epoch": 2982.716049382716, "learning_rate": 3.371495041018734e-07, "loss": 1.5405, "step": 48320 }, { "epoch": 2983.9506172839506, "learning_rate": 3.368740051426473e-07, "loss": 1.5822, "step": 48340 }, { "epoch": 2985.185185185185, "learning_rate": 3.3659850618342114e-07, "loss": 1.5596, "step": 48360 }, { "epoch": 2986.41975308642, "learning_rate": 3.363230072241949e-07, "loss": 1.5565, "step": 48380 }, { "epoch": 2987.6543209876545, "learning_rate": 3.3604750826496875e-07, "loss": 1.5896, "step": 48400 }, { "epoch": 2988.8888888888887, "learning_rate": 3.357720093057426e-07, "loss": 1.5642, "step": 48420 }, { "epoch": 2990.1234567901233, "learning_rate": 3.3549651034651647e-07, "loss": 1.5801, "step": 48440 }, { "epoch": 2991.358024691358, "learning_rate": 3.352210113872903e-07, "loss": 1.5995, "step": 48460 }, { "epoch": 2992.5925925925926, "learning_rate": 3.349455124280642e-07, "loss": 1.6019, "step": 48480 }, { "epoch": 2993.8271604938273, "learning_rate": 3.3467001346883804e-07, "loss": 1.5656, "step": 48500 }, { "epoch": 2995.061728395062, "learning_rate": 3.343945145096118e-07, "loss": 1.5631, "step": 48520 }, { "epoch": 2996.296296296296, "learning_rate": 3.3411901555038565e-07, "loss": 1.5424, "step": 48540 }, { "epoch": 2997.5308641975307, "learning_rate": 3.338435165911595e-07, "loss": 1.5902, "step": 48560 }, { "epoch": 2998.7654320987654, "learning_rate": 3.3356801763193337e-07, "loss": 1.5536, "step": 48580 }, { "epoch": 3000.0, "learning_rate": 3.3329251867270723e-07, "loss": 1.5825, "step": 48600 }, { "epoch": 3001.2345679012346, "learning_rate": 3.3301701971348114e-07, "loss": 1.5719, "step": 48620 }, { "epoch": 3002.4691358024693, "learning_rate": 3.3274152075425495e-07, "loss": 1.5747, "step": 48640 }, { "epoch": 3003.703703703704, "learning_rate": 3.324660217950287e-07, "loss": 1.5772, "step": 48660 }, { "epoch": 3004.938271604938, "learning_rate": 3.3219052283580256e-07, "loss": 1.5862, "step": 48680 }, { "epoch": 3006.1728395061727, "learning_rate": 3.319150238765764e-07, "loss": 1.5809, "step": 48700 }, { "epoch": 3007.4074074074074, "learning_rate": 3.316395249173503e-07, "loss": 1.588, "step": 48720 }, { "epoch": 3008.641975308642, "learning_rate": 3.3136402595812414e-07, "loss": 1.5702, "step": 48740 }, { "epoch": 3009.8765432098767, "learning_rate": 3.31088526998898e-07, "loss": 1.5724, "step": 48760 }, { "epoch": 3011.1111111111113, "learning_rate": 3.3081302803967185e-07, "loss": 1.5396, "step": 48780 }, { "epoch": 3012.3456790123455, "learning_rate": 3.305375290804456e-07, "loss": 1.5899, "step": 48800 }, { "epoch": 3013.58024691358, "learning_rate": 3.3026203012121947e-07, "loss": 1.552, "step": 48820 }, { "epoch": 3014.814814814815, "learning_rate": 3.299865311619933e-07, "loss": 1.5478, "step": 48840 }, { "epoch": 3016.0493827160494, "learning_rate": 3.2971103220276724e-07, "loss": 1.61, "step": 48860 }, { "epoch": 3017.283950617284, "learning_rate": 3.294355332435411e-07, "loss": 1.6093, "step": 48880 }, { "epoch": 3018.5185185185187, "learning_rate": 3.2916003428431495e-07, "loss": 1.5893, "step": 48900 }, { "epoch": 3019.753086419753, "learning_rate": 3.288845353250887e-07, "loss": 1.5737, "step": 48920 }, { "epoch": 3020.9876543209875, "learning_rate": 3.2860903636586257e-07, "loss": 1.5756, "step": 48940 }, { "epoch": 3022.222222222222, "learning_rate": 3.283335374066364e-07, "loss": 1.5661, "step": 48960 }, { "epoch": 3023.456790123457, "learning_rate": 3.2805803844741023e-07, "loss": 1.617, "step": 48980 }, { "epoch": 3024.6913580246915, "learning_rate": 3.2778253948818414e-07, "loss": 1.5672, "step": 49000 }, { "epoch": 3025.925925925926, "learning_rate": 3.27507040528958e-07, "loss": 1.6085, "step": 49020 }, { "epoch": 3027.1604938271603, "learning_rate": 3.2723154156973186e-07, "loss": 1.5881, "step": 49040 }, { "epoch": 3028.395061728395, "learning_rate": 3.2695604261050567e-07, "loss": 1.5573, "step": 49060 }, { "epoch": 3029.6296296296296, "learning_rate": 3.266805436512795e-07, "loss": 1.5592, "step": 49080 }, { "epoch": 3030.864197530864, "learning_rate": 3.264050446920534e-07, "loss": 1.5626, "step": 49100 }, { "epoch": 3032.098765432099, "learning_rate": 3.2612954573282724e-07, "loss": 1.5829, "step": 49120 }, { "epoch": 3033.3333333333335, "learning_rate": 3.258540467736011e-07, "loss": 1.5528, "step": 49140 }, { "epoch": 3034.567901234568, "learning_rate": 3.2557854781437496e-07, "loss": 1.5703, "step": 49160 }, { "epoch": 3035.8024691358023, "learning_rate": 3.253030488551488e-07, "loss": 1.5407, "step": 49180 }, { "epoch": 3037.037037037037, "learning_rate": 3.2502754989592257e-07, "loss": 1.574, "step": 49200 }, { "epoch": 3038.2716049382716, "learning_rate": 3.2475205093669643e-07, "loss": 1.5447, "step": 49220 }, { "epoch": 3039.5061728395062, "learning_rate": 3.244765519774703e-07, "loss": 1.5597, "step": 49240 }, { "epoch": 3040.740740740741, "learning_rate": 3.2420105301824415e-07, "loss": 1.5568, "step": 49260 }, { "epoch": 3041.9753086419755, "learning_rate": 3.23925554059018e-07, "loss": 1.5471, "step": 49280 }, { "epoch": 3043.2098765432097, "learning_rate": 3.2365005509979187e-07, "loss": 1.5906, "step": 49300 }, { "epoch": 3044.4444444444443, "learning_rate": 3.233745561405657e-07, "loss": 1.5571, "step": 49320 }, { "epoch": 3045.679012345679, "learning_rate": 3.230990571813394e-07, "loss": 1.5201, "step": 49340 }, { "epoch": 3046.9135802469136, "learning_rate": 3.2282355822211334e-07, "loss": 1.5368, "step": 49360 }, { "epoch": 3048.1481481481483, "learning_rate": 3.225480592628872e-07, "loss": 1.5692, "step": 49380 }, { "epoch": 3049.382716049383, "learning_rate": 3.2227256030366106e-07, "loss": 1.5696, "step": 49400 }, { "epoch": 3050.617283950617, "learning_rate": 3.219970613444349e-07, "loss": 1.5707, "step": 49420 }, { "epoch": 3051.8518518518517, "learning_rate": 3.2172156238520877e-07, "loss": 1.555, "step": 49440 }, { "epoch": 3053.0864197530864, "learning_rate": 3.214460634259825e-07, "loss": 1.6049, "step": 49460 }, { "epoch": 3054.320987654321, "learning_rate": 3.211705644667564e-07, "loss": 1.5847, "step": 49480 }, { "epoch": 3055.5555555555557, "learning_rate": 3.2089506550753024e-07, "loss": 1.5591, "step": 49500 }, { "epoch": 3056.7901234567903, "learning_rate": 3.206195665483041e-07, "loss": 1.6281, "step": 49520 }, { "epoch": 3058.0246913580245, "learning_rate": 3.2034406758907796e-07, "loss": 1.6113, "step": 49540 }, { "epoch": 3059.259259259259, "learning_rate": 3.200685686298518e-07, "loss": 1.5953, "step": 49560 }, { "epoch": 3060.4938271604938, "learning_rate": 3.197930696706257e-07, "loss": 1.5792, "step": 49580 }, { "epoch": 3061.7283950617284, "learning_rate": 3.195175707113995e-07, "loss": 1.5769, "step": 49600 }, { "epoch": 3062.962962962963, "learning_rate": 3.1924207175217334e-07, "loss": 1.5584, "step": 49620 }, { "epoch": 3064.1975308641977, "learning_rate": 3.189665727929472e-07, "loss": 1.6094, "step": 49640 }, { "epoch": 3065.432098765432, "learning_rate": 3.1869107383372106e-07, "loss": 1.5982, "step": 49660 }, { "epoch": 3066.6666666666665, "learning_rate": 3.184155748744949e-07, "loss": 1.5246, "step": 49680 }, { "epoch": 3067.901234567901, "learning_rate": 3.181400759152688e-07, "loss": 1.5822, "step": 49700 }, { "epoch": 3069.135802469136, "learning_rate": 3.1786457695604264e-07, "loss": 1.5882, "step": 49720 }, { "epoch": 3070.3703703703704, "learning_rate": 3.175890779968164e-07, "loss": 1.577, "step": 49740 }, { "epoch": 3071.604938271605, "learning_rate": 3.1731357903759025e-07, "loss": 1.5628, "step": 49760 }, { "epoch": 3072.8395061728397, "learning_rate": 3.170380800783642e-07, "loss": 1.5657, "step": 49780 }, { "epoch": 3074.074074074074, "learning_rate": 3.16762581119138e-07, "loss": 1.6059, "step": 49800 }, { "epoch": 3075.3086419753085, "learning_rate": 3.164870821599119e-07, "loss": 1.5552, "step": 49820 }, { "epoch": 3076.543209876543, "learning_rate": 3.1621158320068574e-07, "loss": 1.5917, "step": 49840 }, { "epoch": 3077.777777777778, "learning_rate": 3.159360842414596e-07, "loss": 1.5452, "step": 49860 }, { "epoch": 3079.0123456790125, "learning_rate": 3.1566058528223335e-07, "loss": 1.5704, "step": 49880 }, { "epoch": 3080.246913580247, "learning_rate": 3.153850863230072e-07, "loss": 1.553, "step": 49900 }, { "epoch": 3081.4814814814813, "learning_rate": 3.1510958736378107e-07, "loss": 1.5802, "step": 49920 }, { "epoch": 3082.716049382716, "learning_rate": 3.1483408840455493e-07, "loss": 1.5736, "step": 49940 }, { "epoch": 3083.9506172839506, "learning_rate": 3.145585894453288e-07, "loss": 1.5542, "step": 49960 }, { "epoch": 3085.185185185185, "learning_rate": 3.1428309048610264e-07, "loss": 1.5967, "step": 49980 }, { "epoch": 3086.41975308642, "learning_rate": 3.140075915268764e-07, "loss": 1.5828, "step": 50000 }, { "epoch": 3087.6543209876545, "learning_rate": 3.1373209256765026e-07, "loss": 1.5363, "step": 50020 }, { "epoch": 3088.8888888888887, "learning_rate": 3.134565936084241e-07, "loss": 1.5509, "step": 50040 }, { "epoch": 3090.1234567901233, "learning_rate": 3.13181094649198e-07, "loss": 1.5741, "step": 50060 }, { "epoch": 3091.358024691358, "learning_rate": 3.1290559568997183e-07, "loss": 1.58, "step": 50080 }, { "epoch": 3092.5925925925926, "learning_rate": 3.126300967307457e-07, "loss": 1.5934, "step": 50100 }, { "epoch": 3093.8271604938273, "learning_rate": 3.1235459777151955e-07, "loss": 1.5982, "step": 50120 }, { "epoch": 3095.061728395062, "learning_rate": 3.120790988122933e-07, "loss": 1.5727, "step": 50140 }, { "epoch": 3096.296296296296, "learning_rate": 3.1180359985306716e-07, "loss": 1.5596, "step": 50160 }, { "epoch": 3097.5308641975307, "learning_rate": 3.11528100893841e-07, "loss": 1.5842, "step": 50180 }, { "epoch": 3098.7654320987654, "learning_rate": 3.112526019346149e-07, "loss": 1.5643, "step": 50200 }, { "epoch": 3100.0, "learning_rate": 3.1097710297538874e-07, "loss": 1.5784, "step": 50220 }, { "epoch": 3101.2345679012346, "learning_rate": 3.107016040161626e-07, "loss": 1.5627, "step": 50240 }, { "epoch": 3102.4691358024693, "learning_rate": 3.1042610505693646e-07, "loss": 1.54, "step": 50260 }, { "epoch": 3103.703703703704, "learning_rate": 3.101506060977102e-07, "loss": 1.5772, "step": 50280 }, { "epoch": 3104.938271604938, "learning_rate": 3.0987510713848407e-07, "loss": 1.5741, "step": 50300 }, { "epoch": 3106.1728395061727, "learning_rate": 3.09599608179258e-07, "loss": 1.5732, "step": 50320 }, { "epoch": 3107.4074074074074, "learning_rate": 3.0932410922003184e-07, "loss": 1.581, "step": 50340 }, { "epoch": 3108.641975308642, "learning_rate": 3.090486102608057e-07, "loss": 1.5868, "step": 50360 }, { "epoch": 3109.8765432098767, "learning_rate": 3.0877311130157956e-07, "loss": 1.5879, "step": 50380 }, { "epoch": 3111.1111111111113, "learning_rate": 3.084976123423534e-07, "loss": 1.5726, "step": 50400 }, { "epoch": 3112.3456790123455, "learning_rate": 3.0822211338312717e-07, "loss": 1.5422, "step": 50420 }, { "epoch": 3113.58024691358, "learning_rate": 3.0794661442390103e-07, "loss": 1.5575, "step": 50440 }, { "epoch": 3114.814814814815, "learning_rate": 3.076711154646749e-07, "loss": 1.5783, "step": 50460 }, { "epoch": 3116.0493827160494, "learning_rate": 3.0739561650544874e-07, "loss": 1.5554, "step": 50480 }, { "epoch": 3117.283950617284, "learning_rate": 3.071201175462226e-07, "loss": 1.5733, "step": 50500 }, { "epoch": 3118.5185185185187, "learning_rate": 3.0684461858699646e-07, "loss": 1.5429, "step": 50520 }, { "epoch": 3119.753086419753, "learning_rate": 3.0656911962777027e-07, "loss": 1.5658, "step": 50540 }, { "epoch": 3120.9876543209875, "learning_rate": 3.0629362066854413e-07, "loss": 1.5662, "step": 50560 }, { "epoch": 3122.222222222222, "learning_rate": 3.06018121709318e-07, "loss": 1.5615, "step": 50580 }, { "epoch": 3123.456790123457, "learning_rate": 3.0574262275009185e-07, "loss": 1.5678, "step": 50600 }, { "epoch": 3124.6913580246915, "learning_rate": 3.054671237908657e-07, "loss": 1.5542, "step": 50620 }, { "epoch": 3125.925925925926, "learning_rate": 3.0519162483163956e-07, "loss": 1.5804, "step": 50640 }, { "epoch": 3127.1604938271603, "learning_rate": 3.049161258724134e-07, "loss": 1.5672, "step": 50660 }, { "epoch": 3128.395061728395, "learning_rate": 3.046406269131872e-07, "loss": 1.5756, "step": 50680 }, { "epoch": 3129.6296296296296, "learning_rate": 3.0436512795396103e-07, "loss": 1.5722, "step": 50700 }, { "epoch": 3130.864197530864, "learning_rate": 3.040896289947349e-07, "loss": 1.557, "step": 50720 }, { "epoch": 3132.098765432099, "learning_rate": 3.0381413003550875e-07, "loss": 1.5584, "step": 50740 }, { "epoch": 3133.3333333333335, "learning_rate": 3.035386310762826e-07, "loss": 1.5748, "step": 50760 }, { "epoch": 3134.567901234568, "learning_rate": 3.0326313211705647e-07, "loss": 1.5931, "step": 50780 }, { "epoch": 3135.8024691358023, "learning_rate": 3.0298763315783033e-07, "loss": 1.5797, "step": 50800 }, { "epoch": 3137.037037037037, "learning_rate": 3.027121341986041e-07, "loss": 1.5638, "step": 50820 }, { "epoch": 3138.2716049382716, "learning_rate": 3.0243663523937794e-07, "loss": 1.5431, "step": 50840 }, { "epoch": 3139.5061728395062, "learning_rate": 3.021611362801518e-07, "loss": 1.5679, "step": 50860 }, { "epoch": 3140.740740740741, "learning_rate": 3.0188563732092566e-07, "loss": 1.5609, "step": 50880 }, { "epoch": 3141.9753086419755, "learning_rate": 3.016101383616995e-07, "loss": 1.5893, "step": 50900 }, { "epoch": 3143.2098765432097, "learning_rate": 3.013346394024734e-07, "loss": 1.5805, "step": 50920 }, { "epoch": 3144.4444444444443, "learning_rate": 3.010591404432472e-07, "loss": 1.5756, "step": 50940 }, { "epoch": 3145.679012345679, "learning_rate": 3.00783641484021e-07, "loss": 1.5364, "step": 50960 }, { "epoch": 3146.9135802469136, "learning_rate": 3.0050814252479485e-07, "loss": 1.5451, "step": 50980 }, { "epoch": 3148.1481481481483, "learning_rate": 3.002326435655687e-07, "loss": 1.5657, "step": 51000 }, { "epoch": 3149.382716049383, "learning_rate": 2.9995714460634256e-07, "loss": 1.5771, "step": 51020 }, { "epoch": 3150.617283950617, "learning_rate": 2.996816456471164e-07, "loss": 1.5572, "step": 51040 }, { "epoch": 3151.8518518518517, "learning_rate": 2.994061466878903e-07, "loss": 1.5297, "step": 51060 }, { "epoch": 3153.0864197530864, "learning_rate": 2.991306477286641e-07, "loss": 1.6089, "step": 51080 }, { "epoch": 3154.320987654321, "learning_rate": 2.9885514876943795e-07, "loss": 1.5366, "step": 51100 }, { "epoch": 3155.5555555555557, "learning_rate": 2.9857964981021186e-07, "loss": 1.5776, "step": 51120 }, { "epoch": 3156.7901234567903, "learning_rate": 2.9830415085098566e-07, "loss": 1.6102, "step": 51140 }, { "epoch": 3158.0246913580245, "learning_rate": 2.980286518917595e-07, "loss": 1.5385, "step": 51160 }, { "epoch": 3159.259259259259, "learning_rate": 2.977531529325334e-07, "loss": 1.5312, "step": 51180 }, { "epoch": 3160.4938271604938, "learning_rate": 2.9747765397330724e-07, "loss": 1.5608, "step": 51200 }, { "epoch": 3161.7283950617284, "learning_rate": 2.97202155014081e-07, "loss": 1.5325, "step": 51220 }, { "epoch": 3162.962962962963, "learning_rate": 2.9692665605485485e-07, "loss": 1.5552, "step": 51240 }, { "epoch": 3164.1975308641977, "learning_rate": 2.966511570956287e-07, "loss": 1.5954, "step": 51260 }, { "epoch": 3165.432098765432, "learning_rate": 2.963756581364026e-07, "loss": 1.5833, "step": 51280 }, { "epoch": 3166.6666666666665, "learning_rate": 2.961001591771765e-07, "loss": 1.5642, "step": 51300 }, { "epoch": 3167.901234567901, "learning_rate": 2.9582466021795034e-07, "loss": 1.5585, "step": 51320 }, { "epoch": 3169.135802469136, "learning_rate": 2.955491612587242e-07, "loss": 1.5686, "step": 51340 }, { "epoch": 3170.3703703703704, "learning_rate": 2.9527366229949795e-07, "loss": 1.5734, "step": 51360 }, { "epoch": 3171.604938271605, "learning_rate": 2.949981633402718e-07, "loss": 1.5736, "step": 51380 }, { "epoch": 3172.8395061728397, "learning_rate": 2.9472266438104567e-07, "loss": 1.542, "step": 51400 }, { "epoch": 3174.074074074074, "learning_rate": 2.9444716542181953e-07, "loss": 1.5903, "step": 51420 }, { "epoch": 3175.3086419753085, "learning_rate": 2.941716664625934e-07, "loss": 1.5849, "step": 51440 }, { "epoch": 3176.543209876543, "learning_rate": 2.9389616750336725e-07, "loss": 1.5678, "step": 51460 }, { "epoch": 3177.777777777778, "learning_rate": 2.9362066854414105e-07, "loss": 1.5985, "step": 51480 }, { "epoch": 3179.0123456790125, "learning_rate": 2.9334516958491486e-07, "loss": 1.5859, "step": 51500 }, { "epoch": 3180.246913580247, "learning_rate": 2.930696706256887e-07, "loss": 1.5835, "step": 51520 }, { "epoch": 3181.4814814814813, "learning_rate": 2.927941716664626e-07, "loss": 1.5738, "step": 51540 }, { "epoch": 3182.716049382716, "learning_rate": 2.9251867270723643e-07, "loss": 1.5584, "step": 51560 }, { "epoch": 3183.9506172839506, "learning_rate": 2.922431737480103e-07, "loss": 1.6035, "step": 51580 }, { "epoch": 3185.185185185185, "learning_rate": 2.9196767478878415e-07, "loss": 1.5679, "step": 51600 }, { "epoch": 3186.41975308642, "learning_rate": 2.916921758295579e-07, "loss": 1.5901, "step": 51620 }, { "epoch": 3187.6543209876545, "learning_rate": 2.9141667687033176e-07, "loss": 1.558, "step": 51640 }, { "epoch": 3188.8888888888887, "learning_rate": 2.9114117791110557e-07, "loss": 1.5901, "step": 51660 }, { "epoch": 3190.1234567901233, "learning_rate": 2.908656789518795e-07, "loss": 1.5902, "step": 51680 }, { "epoch": 3191.358024691358, "learning_rate": 2.9059017999265334e-07, "loss": 1.5672, "step": 51700 }, { "epoch": 3192.5925925925926, "learning_rate": 2.903146810334272e-07, "loss": 1.5454, "step": 51720 }, { "epoch": 3193.8271604938273, "learning_rate": 2.9003918207420106e-07, "loss": 1.568, "step": 51740 }, { "epoch": 3195.061728395062, "learning_rate": 2.897636831149748e-07, "loss": 1.5765, "step": 51760 }, { "epoch": 3196.296296296296, "learning_rate": 2.8948818415574867e-07, "loss": 1.5516, "step": 51780 }, { "epoch": 3197.5308641975307, "learning_rate": 2.8921268519652253e-07, "loss": 1.5751, "step": 51800 }, { "epoch": 3198.7654320987654, "learning_rate": 2.889371862372964e-07, "loss": 1.6064, "step": 51820 }, { "epoch": 3200.0, "learning_rate": 2.886616872780703e-07, "loss": 1.5567, "step": 51840 }, { "epoch": 3201.2345679012346, "learning_rate": 2.8838618831884416e-07, "loss": 1.5783, "step": 51860 }, { "epoch": 3202.4691358024693, "learning_rate": 2.88110689359618e-07, "loss": 1.5631, "step": 51880 }, { "epoch": 3203.703703703704, "learning_rate": 2.8783519040039177e-07, "loss": 1.531, "step": 51900 }, { "epoch": 3204.938271604938, "learning_rate": 2.8755969144116563e-07, "loss": 1.5749, "step": 51920 }, { "epoch": 3206.1728395061727, "learning_rate": 2.872841924819395e-07, "loss": 1.5879, "step": 51940 }, { "epoch": 3207.4074074074074, "learning_rate": 2.8700869352271335e-07, "loss": 1.5604, "step": 51960 }, { "epoch": 3208.641975308642, "learning_rate": 2.867331945634872e-07, "loss": 1.5599, "step": 51980 }, { "epoch": 3209.8765432098767, "learning_rate": 2.8645769560426106e-07, "loss": 1.5697, "step": 52000 }, { "epoch": 3211.1111111111113, "learning_rate": 2.8618219664503487e-07, "loss": 1.5664, "step": 52020 }, { "epoch": 3212.3456790123455, "learning_rate": 2.8590669768580873e-07, "loss": 1.5831, "step": 52040 }, { "epoch": 3213.58024691358, "learning_rate": 2.856311987265826e-07, "loss": 1.5901, "step": 52060 }, { "epoch": 3214.814814814815, "learning_rate": 2.8535569976735645e-07, "loss": 1.5426, "step": 52080 }, { "epoch": 3216.0493827160494, "learning_rate": 2.8508020080813025e-07, "loss": 1.5985, "step": 52100 }, { "epoch": 3217.283950617284, "learning_rate": 2.8480470184890416e-07, "loss": 1.5181, "step": 52120 }, { "epoch": 3218.5185185185187, "learning_rate": 2.84529202889678e-07, "loss": 1.5434, "step": 52140 }, { "epoch": 3219.753086419753, "learning_rate": 2.842537039304518e-07, "loss": 1.5892, "step": 52160 }, { "epoch": 3220.9876543209875, "learning_rate": 2.8397820497122564e-07, "loss": 1.5584, "step": 52180 }, { "epoch": 3222.222222222222, "learning_rate": 2.837027060119995e-07, "loss": 1.5746, "step": 52200 }, { "epoch": 3223.456790123457, "learning_rate": 2.8342720705277335e-07, "loss": 1.5991, "step": 52220 }, { "epoch": 3224.6913580246915, "learning_rate": 2.831517080935472e-07, "loss": 1.5501, "step": 52240 }, { "epoch": 3225.925925925926, "learning_rate": 2.8287620913432107e-07, "loss": 1.5744, "step": 52260 }, { "epoch": 3227.1604938271603, "learning_rate": 2.8260071017509493e-07, "loss": 1.5674, "step": 52280 }, { "epoch": 3228.395061728395, "learning_rate": 2.823252112158687e-07, "loss": 1.5572, "step": 52300 }, { "epoch": 3229.6296296296296, "learning_rate": 2.8204971225664254e-07, "loss": 1.5803, "step": 52320 }, { "epoch": 3230.864197530864, "learning_rate": 2.817742132974164e-07, "loss": 1.5868, "step": 52340 }, { "epoch": 3232.098765432099, "learning_rate": 2.8149871433819026e-07, "loss": 1.5713, "step": 52360 }, { "epoch": 3233.3333333333335, "learning_rate": 2.812232153789641e-07, "loss": 1.5832, "step": 52380 }, { "epoch": 3234.567901234568, "learning_rate": 2.80947716419738e-07, "loss": 1.574, "step": 52400 }, { "epoch": 3235.8024691358023, "learning_rate": 2.8067221746051184e-07, "loss": 1.5573, "step": 52420 }, { "epoch": 3237.037037037037, "learning_rate": 2.803967185012856e-07, "loss": 1.522, "step": 52440 }, { "epoch": 3238.2716049382716, "learning_rate": 2.8012121954205945e-07, "loss": 1.5786, "step": 52460 }, { "epoch": 3239.5061728395062, "learning_rate": 2.798457205828333e-07, "loss": 1.5853, "step": 52480 }, { "epoch": 3240.740740740741, "learning_rate": 2.7957022162360716e-07, "loss": 1.532, "step": 52500 }, { "epoch": 3241.9753086419755, "learning_rate": 2.79294722664381e-07, "loss": 1.5801, "step": 52520 }, { "epoch": 3243.2098765432097, "learning_rate": 2.790192237051549e-07, "loss": 1.5543, "step": 52540 }, { "epoch": 3244.4444444444443, "learning_rate": 2.787437247459287e-07, "loss": 1.5939, "step": 52560 }, { "epoch": 3245.679012345679, "learning_rate": 2.7846822578670255e-07, "loss": 1.5567, "step": 52580 }, { "epoch": 3246.9135802469136, "learning_rate": 2.781927268274764e-07, "loss": 1.5631, "step": 52600 }, { "epoch": 3248.1481481481483, "learning_rate": 2.7791722786825026e-07, "loss": 1.5841, "step": 52620 }, { "epoch": 3249.382716049383, "learning_rate": 2.776417289090241e-07, "loss": 1.5499, "step": 52640 }, { "epoch": 3250.617283950617, "learning_rate": 2.77366229949798e-07, "loss": 1.5575, "step": 52660 }, { "epoch": 3251.8518518518517, "learning_rate": 2.7709073099057184e-07, "loss": 1.5516, "step": 52680 }, { "epoch": 3253.0864197530864, "learning_rate": 2.768152320313456e-07, "loss": 1.5494, "step": 52700 }, { "epoch": 3254.320987654321, "learning_rate": 2.7653973307211945e-07, "loss": 1.5336, "step": 52720 }, { "epoch": 3255.5555555555557, "learning_rate": 2.762642341128933e-07, "loss": 1.5654, "step": 52740 }, { "epoch": 3256.7901234567903, "learning_rate": 2.759887351536672e-07, "loss": 1.5785, "step": 52760 }, { "epoch": 3258.0246913580245, "learning_rate": 2.757132361944411e-07, "loss": 1.5694, "step": 52780 }, { "epoch": 3259.259259259259, "learning_rate": 2.7543773723521494e-07, "loss": 1.5759, "step": 52800 }, { "epoch": 3260.4938271604938, "learning_rate": 2.751622382759888e-07, "loss": 1.5569, "step": 52820 }, { "epoch": 3261.7283950617284, "learning_rate": 2.7488673931676255e-07, "loss": 1.5951, "step": 52840 }, { "epoch": 3262.962962962963, "learning_rate": 2.746112403575364e-07, "loss": 1.5815, "step": 52860 }, { "epoch": 3264.1975308641977, "learning_rate": 2.7433574139831027e-07, "loss": 1.5931, "step": 52880 }, { "epoch": 3265.432098765432, "learning_rate": 2.7406024243908413e-07, "loss": 1.5551, "step": 52900 }, { "epoch": 3266.6666666666665, "learning_rate": 2.73784743479858e-07, "loss": 1.5237, "step": 52920 }, { "epoch": 3267.901234567901, "learning_rate": 2.7350924452063185e-07, "loss": 1.5692, "step": 52940 }, { "epoch": 3269.135802469136, "learning_rate": 2.732337455614057e-07, "loss": 1.5748, "step": 52960 }, { "epoch": 3270.3703703703704, "learning_rate": 2.7295824660217946e-07, "loss": 1.5244, "step": 52980 }, { "epoch": 3271.604938271605, "learning_rate": 2.726827476429533e-07, "loss": 1.5968, "step": 53000 }, { "epoch": 3272.8395061728397, "learning_rate": 2.724072486837272e-07, "loss": 1.5532, "step": 53020 }, { "epoch": 3274.074074074074, "learning_rate": 2.7213174972450104e-07, "loss": 1.6095, "step": 53040 }, { "epoch": 3275.3086419753085, "learning_rate": 2.718562507652749e-07, "loss": 1.5268, "step": 53060 }, { "epoch": 3276.543209876543, "learning_rate": 2.7158075180604875e-07, "loss": 1.5713, "step": 53080 }, { "epoch": 3277.777777777778, "learning_rate": 2.713052528468225e-07, "loss": 1.5784, "step": 53100 }, { "epoch": 3279.0123456790125, "learning_rate": 2.7102975388759637e-07, "loss": 1.5586, "step": 53120 }, { "epoch": 3280.246913580247, "learning_rate": 2.707542549283702e-07, "loss": 1.5526, "step": 53140 }, { "epoch": 3281.4814814814813, "learning_rate": 2.704787559691441e-07, "loss": 1.5074, "step": 53160 }, { "epoch": 3282.716049382716, "learning_rate": 2.7020325700991794e-07, "loss": 1.6016, "step": 53180 }, { "epoch": 3283.9506172839506, "learning_rate": 2.699277580506918e-07, "loss": 1.5833, "step": 53200 }, { "epoch": 3285.185185185185, "learning_rate": 2.6965225909146566e-07, "loss": 1.5729, "step": 53220 }, { "epoch": 3286.41975308642, "learning_rate": 2.693767601322394e-07, "loss": 1.5773, "step": 53240 }, { "epoch": 3287.6543209876545, "learning_rate": 2.6910126117301327e-07, "loss": 1.5507, "step": 53260 }, { "epoch": 3288.8888888888887, "learning_rate": 2.6882576221378713e-07, "loss": 1.5789, "step": 53280 }, { "epoch": 3290.1234567901233, "learning_rate": 2.68550263254561e-07, "loss": 1.569, "step": 53300 }, { "epoch": 3291.358024691358, "learning_rate": 2.682747642953349e-07, "loss": 1.5787, "step": 53320 }, { "epoch": 3292.5925925925926, "learning_rate": 2.6799926533610876e-07, "loss": 1.5788, "step": 53340 }, { "epoch": 3293.8271604938273, "learning_rate": 2.677237663768826e-07, "loss": 1.5194, "step": 53360 }, { "epoch": 3295.061728395062, "learning_rate": 2.6744826741765637e-07, "loss": 1.5846, "step": 53380 }, { "epoch": 3296.296296296296, "learning_rate": 2.6717276845843023e-07, "loss": 1.5296, "step": 53400 }, { "epoch": 3297.5308641975307, "learning_rate": 2.668972694992041e-07, "loss": 1.5572, "step": 53420 }, { "epoch": 3298.7654320987654, "learning_rate": 2.66621770539978e-07, "loss": 1.5682, "step": 53440 }, { "epoch": 3300.0, "learning_rate": 2.663462715807518e-07, "loss": 1.5689, "step": 53460 }, { "epoch": 3301.2345679012346, "learning_rate": 2.6607077262152567e-07, "loss": 1.5466, "step": 53480 }, { "epoch": 3302.4691358024693, "learning_rate": 2.657952736622995e-07, "loss": 1.5556, "step": 53500 }, { "epoch": 3303.703703703704, "learning_rate": 2.6551977470307333e-07, "loss": 1.5757, "step": 53520 }, { "epoch": 3304.938271604938, "learning_rate": 2.652442757438472e-07, "loss": 1.538, "step": 53540 }, { "epoch": 3306.1728395061727, "learning_rate": 2.6496877678462105e-07, "loss": 1.521, "step": 53560 }, { "epoch": 3307.4074074074074, "learning_rate": 2.646932778253949e-07, "loss": 1.5797, "step": 53580 }, { "epoch": 3308.641975308642, "learning_rate": 2.6441777886616877e-07, "loss": 1.5624, "step": 53600 }, { "epoch": 3309.8765432098767, "learning_rate": 2.641422799069426e-07, "loss": 1.5675, "step": 53620 }, { "epoch": 3311.1111111111113, "learning_rate": 2.638667809477164e-07, "loss": 1.5882, "step": 53640 }, { "epoch": 3312.3456790123455, "learning_rate": 2.6359128198849024e-07, "loss": 1.5721, "step": 53660 }, { "epoch": 3313.58024691358, "learning_rate": 2.633157830292641e-07, "loss": 1.5667, "step": 53680 }, { "epoch": 3314.814814814815, "learning_rate": 2.6304028407003795e-07, "loss": 1.5926, "step": 53700 }, { "epoch": 3316.0493827160494, "learning_rate": 2.627647851108118e-07, "loss": 1.5634, "step": 53720 }, { "epoch": 3317.283950617284, "learning_rate": 2.6248928615158567e-07, "loss": 1.5571, "step": 53740 }, { "epoch": 3318.5185185185187, "learning_rate": 2.6221378719235953e-07, "loss": 1.5422, "step": 53760 }, { "epoch": 3319.753086419753, "learning_rate": 2.619382882331333e-07, "loss": 1.5624, "step": 53780 }, { "epoch": 3320.9876543209875, "learning_rate": 2.616627892739072e-07, "loss": 1.545, "step": 53800 }, { "epoch": 3322.222222222222, "learning_rate": 2.61387290314681e-07, "loss": 1.5573, "step": 53820 }, { "epoch": 3323.456790123457, "learning_rate": 2.6111179135545486e-07, "loss": 1.5567, "step": 53840 }, { "epoch": 3324.6913580246915, "learning_rate": 2.608362923962287e-07, "loss": 1.5713, "step": 53860 }, { "epoch": 3325.925925925926, "learning_rate": 2.605607934370026e-07, "loss": 1.5671, "step": 53880 }, { "epoch": 3327.1604938271603, "learning_rate": 2.6028529447777644e-07, "loss": 1.5582, "step": 53900 }, { "epoch": 3328.395061728395, "learning_rate": 2.600097955185502e-07, "loss": 1.5544, "step": 53920 }, { "epoch": 3329.6296296296296, "learning_rate": 2.5973429655932405e-07, "loss": 1.5697, "step": 53940 }, { "epoch": 3330.864197530864, "learning_rate": 2.594587976000979e-07, "loss": 1.5314, "step": 53960 }, { "epoch": 3332.098765432099, "learning_rate": 2.591832986408717e-07, "loss": 1.5656, "step": 53980 }, { "epoch": 3333.3333333333335, "learning_rate": 2.589077996816456e-07, "loss": 1.576, "step": 54000 }, { "epoch": 3334.567901234568, "learning_rate": 2.586323007224195e-07, "loss": 1.5541, "step": 54020 }, { "epoch": 3335.8024691358023, "learning_rate": 2.5835680176319334e-07, "loss": 1.6052, "step": 54040 }, { "epoch": 3337.037037037037, "learning_rate": 2.5808130280396715e-07, "loss": 1.5452, "step": 54060 }, { "epoch": 3338.2716049382716, "learning_rate": 2.57805803844741e-07, "loss": 1.5654, "step": 54080 }, { "epoch": 3339.5061728395062, "learning_rate": 2.5753030488551487e-07, "loss": 1.52, "step": 54100 }, { "epoch": 3340.740740740741, "learning_rate": 2.572548059262887e-07, "loss": 1.5224, "step": 54120 }, { "epoch": 3341.9753086419755, "learning_rate": 2.569793069670626e-07, "loss": 1.5441, "step": 54140 }, { "epoch": 3343.2098765432097, "learning_rate": 2.5670380800783644e-07, "loss": 1.5758, "step": 54160 }, { "epoch": 3344.4444444444443, "learning_rate": 2.564283090486102e-07, "loss": 1.5885, "step": 54180 }, { "epoch": 3345.679012345679, "learning_rate": 2.5615281008938405e-07, "loss": 1.5522, "step": 54200 }, { "epoch": 3346.9135802469136, "learning_rate": 2.558773111301579e-07, "loss": 1.5588, "step": 54220 }, { "epoch": 3348.1481481481483, "learning_rate": 2.5560181217093177e-07, "loss": 1.6006, "step": 54240 }, { "epoch": 3349.382716049383, "learning_rate": 2.553263132117057e-07, "loss": 1.5793, "step": 54260 }, { "epoch": 3350.617283950617, "learning_rate": 2.5505081425247954e-07, "loss": 1.5483, "step": 54280 }, { "epoch": 3351.8518518518517, "learning_rate": 2.547753152932534e-07, "loss": 1.5487, "step": 54300 }, { "epoch": 3353.0864197530864, "learning_rate": 2.5449981633402715e-07, "loss": 1.576, "step": 54320 }, { "epoch": 3354.320987654321, "learning_rate": 2.54224317374801e-07, "loss": 1.5713, "step": 54340 }, { "epoch": 3355.5555555555557, "learning_rate": 2.5394881841557487e-07, "loss": 1.5772, "step": 54360 }, { "epoch": 3356.7901234567903, "learning_rate": 2.5367331945634873e-07, "loss": 1.5755, "step": 54380 }, { "epoch": 3358.0246913580245, "learning_rate": 2.533978204971226e-07, "loss": 1.59, "step": 54400 }, { "epoch": 3359.259259259259, "learning_rate": 2.531223215378964e-07, "loss": 1.5586, "step": 54420 }, { "epoch": 3360.4938271604938, "learning_rate": 2.528468225786703e-07, "loss": 1.5461, "step": 54440 }, { "epoch": 3361.7283950617284, "learning_rate": 2.5257132361944406e-07, "loss": 1.5718, "step": 54460 }, { "epoch": 3362.962962962963, "learning_rate": 2.522958246602179e-07, "loss": 1.5497, "step": 54480 }, { "epoch": 3364.1975308641977, "learning_rate": 2.520203257009918e-07, "loss": 1.6113, "step": 54500 }, { "epoch": 3365.432098765432, "learning_rate": 2.5174482674176564e-07, "loss": 1.5709, "step": 54520 }, { "epoch": 3366.6666666666665, "learning_rate": 2.514693277825395e-07, "loss": 1.5189, "step": 54540 }, { "epoch": 3367.901234567901, "learning_rate": 2.5119382882331336e-07, "loss": 1.5557, "step": 54560 }, { "epoch": 3369.135802469136, "learning_rate": 2.509183298640871e-07, "loss": 1.5679, "step": 54580 }, { "epoch": 3370.3703703703704, "learning_rate": 2.5064283090486097e-07, "loss": 1.5589, "step": 54600 }, { "epoch": 3371.604938271605, "learning_rate": 2.503673319456348e-07, "loss": 1.5288, "step": 54620 }, { "epoch": 3372.8395061728397, "learning_rate": 2.500918329864087e-07, "loss": 1.5986, "step": 54640 }, { "epoch": 3374.074074074074, "learning_rate": 2.4981633402718254e-07, "loss": 1.5805, "step": 54660 }, { "epoch": 3375.3086419753085, "learning_rate": 2.495408350679564e-07, "loss": 1.5344, "step": 54680 }, { "epoch": 3376.543209876543, "learning_rate": 2.4926533610873026e-07, "loss": 1.5753, "step": 54700 }, { "epoch": 3377.777777777778, "learning_rate": 2.48989837149504e-07, "loss": 1.538, "step": 54720 }, { "epoch": 3379.0123456790125, "learning_rate": 2.4871433819027787e-07, "loss": 1.615, "step": 54740 }, { "epoch": 3380.246913580247, "learning_rate": 2.4843883923105173e-07, "loss": 1.5935, "step": 54760 }, { "epoch": 3381.4814814814813, "learning_rate": 2.481633402718256e-07, "loss": 1.5214, "step": 54780 }, { "epoch": 3382.716049382716, "learning_rate": 2.4788784131259945e-07, "loss": 1.5463, "step": 54800 }, { "epoch": 3383.9506172839506, "learning_rate": 2.4761234235337336e-07, "loss": 1.5678, "step": 54820 }, { "epoch": 3385.185185185185, "learning_rate": 2.473368433941472e-07, "loss": 1.5581, "step": 54840 }, { "epoch": 3386.41975308642, "learning_rate": 2.4706134443492097e-07, "loss": 1.5805, "step": 54860 }, { "epoch": 3387.6543209876545, "learning_rate": 2.4678584547569483e-07, "loss": 1.506, "step": 54880 }, { "epoch": 3388.8888888888887, "learning_rate": 2.465103465164687e-07, "loss": 1.5686, "step": 54900 }, { "epoch": 3390.1234567901233, "learning_rate": 2.4623484755724255e-07, "loss": 1.5814, "step": 54920 }, { "epoch": 3391.358024691358, "learning_rate": 2.459593485980164e-07, "loss": 1.5282, "step": 54940 }, { "epoch": 3392.5925925925926, "learning_rate": 2.4568384963879027e-07, "loss": 1.5412, "step": 54960 }, { "epoch": 3393.8271604938273, "learning_rate": 2.454083506795641e-07, "loss": 1.5542, "step": 54980 }, { "epoch": 3395.061728395062, "learning_rate": 2.4513285172033793e-07, "loss": 1.5828, "step": 55000 }, { "epoch": 3396.296296296296, "learning_rate": 2.448573527611118e-07, "loss": 1.5865, "step": 55020 }, { "epoch": 3397.5308641975307, "learning_rate": 2.4458185380188565e-07, "loss": 1.5806, "step": 55040 }, { "epoch": 3398.7654320987654, "learning_rate": 2.443063548426595e-07, "loss": 1.5568, "step": 55060 }, { "epoch": 3400.0, "learning_rate": 2.4403085588343337e-07, "loss": 1.618, "step": 55080 }, { "epoch": 3401.2345679012346, "learning_rate": 2.437553569242072e-07, "loss": 1.5494, "step": 55100 }, { "epoch": 3402.4691358024693, "learning_rate": 2.43479857964981e-07, "loss": 1.5472, "step": 55120 }, { "epoch": 3403.703703703704, "learning_rate": 2.4320435900575484e-07, "loss": 1.5314, "step": 55140 }, { "epoch": 3404.938271604938, "learning_rate": 2.429288600465287e-07, "loss": 1.5775, "step": 55160 }, { "epoch": 3406.1728395061727, "learning_rate": 2.4265336108730256e-07, "loss": 1.5817, "step": 55180 }, { "epoch": 3407.4074074074074, "learning_rate": 2.423778621280764e-07, "loss": 1.5537, "step": 55200 }, { "epoch": 3408.641975308642, "learning_rate": 2.421023631688503e-07, "loss": 1.5873, "step": 55220 }, { "epoch": 3409.8765432098767, "learning_rate": 2.4182686420962413e-07, "loss": 1.5728, "step": 55240 }, { "epoch": 3411.1111111111113, "learning_rate": 2.415513652503979e-07, "loss": 1.5896, "step": 55260 }, { "epoch": 3412.3456790123455, "learning_rate": 2.4127586629117174e-07, "loss": 1.5592, "step": 55280 }, { "epoch": 3413.58024691358, "learning_rate": 2.410003673319456e-07, "loss": 1.5229, "step": 55300 }, { "epoch": 3414.814814814815, "learning_rate": 2.4072486837271946e-07, "loss": 1.5571, "step": 55320 }, { "epoch": 3416.0493827160494, "learning_rate": 2.404493694134933e-07, "loss": 1.5525, "step": 55340 }, { "epoch": 3417.283950617284, "learning_rate": 2.401738704542672e-07, "loss": 1.5809, "step": 55360 }, { "epoch": 3418.5185185185187, "learning_rate": 2.3989837149504104e-07, "loss": 1.5348, "step": 55380 }, { "epoch": 3419.753086419753, "learning_rate": 2.396228725358148e-07, "loss": 1.5412, "step": 55400 }, { "epoch": 3420.9876543209875, "learning_rate": 2.3934737357658865e-07, "loss": 1.5556, "step": 55420 }, { "epoch": 3422.222222222222, "learning_rate": 2.390718746173625e-07, "loss": 1.5618, "step": 55440 }, { "epoch": 3423.456790123457, "learning_rate": 2.3879637565813637e-07, "loss": 1.5525, "step": 55460 }, { "epoch": 3424.6913580246915, "learning_rate": 2.3852087669891023e-07, "loss": 1.5701, "step": 55480 }, { "epoch": 3425.925925925926, "learning_rate": 2.382453777396841e-07, "loss": 1.5665, "step": 55500 }, { "epoch": 3427.1604938271603, "learning_rate": 2.3796987878045797e-07, "loss": 1.5478, "step": 55520 }, { "epoch": 3428.395061728395, "learning_rate": 2.3769437982123175e-07, "loss": 1.5384, "step": 55540 }, { "epoch": 3429.6296296296296, "learning_rate": 2.374188808620056e-07, "loss": 1.5703, "step": 55560 }, { "epoch": 3430.864197530864, "learning_rate": 2.3714338190277947e-07, "loss": 1.569, "step": 55580 }, { "epoch": 3432.098765432099, "learning_rate": 2.3686788294355333e-07, "loss": 1.5612, "step": 55600 }, { "epoch": 3433.3333333333335, "learning_rate": 2.3659238398432719e-07, "loss": 1.5785, "step": 55620 }, { "epoch": 3434.567901234568, "learning_rate": 2.3631688502510104e-07, "loss": 1.5189, "step": 55640 }, { "epoch": 3435.8024691358023, "learning_rate": 2.360413860658748e-07, "loss": 1.5885, "step": 55660 }, { "epoch": 3437.037037037037, "learning_rate": 2.3576588710664866e-07, "loss": 1.585, "step": 55680 }, { "epoch": 3438.2716049382716, "learning_rate": 2.3549038814742252e-07, "loss": 1.5307, "step": 55700 }, { "epoch": 3439.5061728395062, "learning_rate": 2.3521488918819637e-07, "loss": 1.5353, "step": 55720 }, { "epoch": 3440.740740740741, "learning_rate": 2.3493939022897026e-07, "loss": 1.551, "step": 55740 }, { "epoch": 3441.9753086419755, "learning_rate": 2.3466389126974412e-07, "loss": 1.5697, "step": 55760 }, { "epoch": 3443.2098765432097, "learning_rate": 2.34388392310518e-07, "loss": 1.5602, "step": 55780 }, { "epoch": 3444.4444444444443, "learning_rate": 2.3411289335129173e-07, "loss": 1.5776, "step": 55800 }, { "epoch": 3445.679012345679, "learning_rate": 2.338373943920656e-07, "loss": 1.5562, "step": 55820 }, { "epoch": 3446.9135802469136, "learning_rate": 2.3356189543283945e-07, "loss": 1.5396, "step": 55840 }, { "epoch": 3448.1481481481483, "learning_rate": 2.332863964736133e-07, "loss": 1.5721, "step": 55860 }, { "epoch": 3449.382716049383, "learning_rate": 2.3301089751438717e-07, "loss": 1.5857, "step": 55880 }, { "epoch": 3450.617283950617, "learning_rate": 2.3273539855516102e-07, "loss": 1.5669, "step": 55900 }, { "epoch": 3451.8518518518517, "learning_rate": 2.3245989959593488e-07, "loss": 1.5511, "step": 55920 }, { "epoch": 3453.0864197530864, "learning_rate": 2.3218440063670866e-07, "loss": 1.5583, "step": 55940 }, { "epoch": 3454.320987654321, "learning_rate": 2.3190890167748252e-07, "loss": 1.5677, "step": 55960 }, { "epoch": 3455.5555555555557, "learning_rate": 2.3163340271825638e-07, "loss": 1.5985, "step": 55980 }, { "epoch": 3456.7901234567903, "learning_rate": 2.3135790375903024e-07, "loss": 1.5462, "step": 56000 }, { "epoch": 3458.0246913580245, "learning_rate": 2.310824047998041e-07, "loss": 1.5744, "step": 56020 }, { "epoch": 3459.259259259259, "learning_rate": 2.3080690584057796e-07, "loss": 1.5822, "step": 56040 }, { "epoch": 3460.4938271604938, "learning_rate": 2.3053140688135182e-07, "loss": 1.5583, "step": 56060 }, { "epoch": 3461.7283950617284, "learning_rate": 2.3025590792212557e-07, "loss": 1.5357, "step": 56080 }, { "epoch": 3462.962962962963, "learning_rate": 2.2998040896289945e-07, "loss": 1.5823, "step": 56100 }, { "epoch": 3464.1975308641977, "learning_rate": 2.297049100036733e-07, "loss": 1.5461, "step": 56120 }, { "epoch": 3465.432098765432, "learning_rate": 2.294294110444472e-07, "loss": 1.5769, "step": 56140 }, { "epoch": 3466.6666666666665, "learning_rate": 2.2915391208522103e-07, "loss": 1.5764, "step": 56160 }, { "epoch": 3467.901234567901, "learning_rate": 2.288784131259949e-07, "loss": 1.5525, "step": 56180 }, { "epoch": 3469.135802469136, "learning_rate": 2.2860291416676864e-07, "loss": 1.5715, "step": 56200 }, { "epoch": 3470.3703703703704, "learning_rate": 2.283274152075425e-07, "loss": 1.6001, "step": 56220 }, { "epoch": 3471.604938271605, "learning_rate": 2.2805191624831636e-07, "loss": 1.5408, "step": 56240 }, { "epoch": 3472.8395061728397, "learning_rate": 2.2777641728909022e-07, "loss": 1.5314, "step": 56260 }, { "epoch": 3474.074074074074, "learning_rate": 2.2750091832986408e-07, "loss": 1.5999, "step": 56280 }, { "epoch": 3475.3086419753085, "learning_rate": 2.2722541937063794e-07, "loss": 1.5545, "step": 56300 }, { "epoch": 3476.543209876543, "learning_rate": 2.2694992041141182e-07, "loss": 1.5732, "step": 56320 }, { "epoch": 3477.777777777778, "learning_rate": 2.2667442145218557e-07, "loss": 1.5746, "step": 56340 }, { "epoch": 3479.0123456790125, "learning_rate": 2.2639892249295943e-07, "loss": 1.563, "step": 56360 }, { "epoch": 3480.246913580247, "learning_rate": 2.261234235337333e-07, "loss": 1.5645, "step": 56380 }, { "epoch": 3481.4814814814813, "learning_rate": 2.2584792457450715e-07, "loss": 1.5557, "step": 56400 }, { "epoch": 3482.716049382716, "learning_rate": 2.25572425615281e-07, "loss": 1.6164, "step": 56420 }, { "epoch": 3483.9506172839506, "learning_rate": 2.2529692665605487e-07, "loss": 1.558, "step": 56440 }, { "epoch": 3485.185185185185, "learning_rate": 2.2502142769682873e-07, "loss": 1.5719, "step": 56460 }, { "epoch": 3486.41975308642, "learning_rate": 2.247459287376025e-07, "loss": 1.5495, "step": 56480 }, { "epoch": 3487.6543209876545, "learning_rate": 2.244704297783764e-07, "loss": 1.5482, "step": 56500 }, { "epoch": 3488.8888888888887, "learning_rate": 2.2419493081915023e-07, "loss": 1.5571, "step": 56520 }, { "epoch": 3490.1234567901233, "learning_rate": 2.2391943185992408e-07, "loss": 1.5504, "step": 56540 }, { "epoch": 3491.358024691358, "learning_rate": 2.2364393290069794e-07, "loss": 1.5562, "step": 56560 }, { "epoch": 3492.5925925925926, "learning_rate": 2.233684339414718e-07, "loss": 1.5411, "step": 56580 }, { "epoch": 3493.8271604938273, "learning_rate": 2.2309293498224566e-07, "loss": 1.5854, "step": 56600 }, { "epoch": 3495.061728395062, "learning_rate": 2.2281743602301941e-07, "loss": 1.5206, "step": 56620 }, { "epoch": 3496.296296296296, "learning_rate": 2.2254193706379327e-07, "loss": 1.5742, "step": 56640 }, { "epoch": 3497.5308641975307, "learning_rate": 2.2226643810456716e-07, "loss": 1.5247, "step": 56660 }, { "epoch": 3498.7654320987654, "learning_rate": 2.2199093914534102e-07, "loss": 1.578, "step": 56680 }, { "epoch": 3500.0, "learning_rate": 2.2171544018611488e-07, "loss": 1.5607, "step": 56700 }, { "epoch": 3501.2345679012346, "learning_rate": 2.2143994122688873e-07, "loss": 1.5706, "step": 56720 }, { "epoch": 3502.4691358024693, "learning_rate": 2.211644422676625e-07, "loss": 1.5374, "step": 56740 }, { "epoch": 3503.703703703704, "learning_rate": 2.2088894330843635e-07, "loss": 1.5452, "step": 56760 }, { "epoch": 3504.938271604938, "learning_rate": 2.206134443492102e-07, "loss": 1.5477, "step": 56780 }, { "epoch": 3506.1728395061727, "learning_rate": 2.2033794538998406e-07, "loss": 1.5657, "step": 56800 }, { "epoch": 3507.4074074074074, "learning_rate": 2.2006244643075792e-07, "loss": 1.5085, "step": 56820 }, { "epoch": 3508.641975308642, "learning_rate": 2.1978694747153178e-07, "loss": 1.5564, "step": 56840 }, { "epoch": 3509.8765432098767, "learning_rate": 2.1951144851230564e-07, "loss": 1.564, "step": 56860 }, { "epoch": 3511.1111111111113, "learning_rate": 2.1923594955307942e-07, "loss": 1.5655, "step": 56880 }, { "epoch": 3512.3456790123455, "learning_rate": 2.1896045059385328e-07, "loss": 1.5342, "step": 56900 }, { "epoch": 3513.58024691358, "learning_rate": 2.1868495163462714e-07, "loss": 1.5541, "step": 56920 }, { "epoch": 3514.814814814815, "learning_rate": 2.18409452675401e-07, "loss": 1.5518, "step": 56940 }, { "epoch": 3516.0493827160494, "learning_rate": 2.1813395371617485e-07, "loss": 1.5251, "step": 56960 }, { "epoch": 3517.283950617284, "learning_rate": 2.1785845475694871e-07, "loss": 1.5355, "step": 56980 }, { "epoch": 3518.5185185185187, "learning_rate": 2.1758295579772257e-07, "loss": 1.5846, "step": 57000 }, { "epoch": 3519.753086419753, "learning_rate": 2.1730745683849635e-07, "loss": 1.5342, "step": 57020 }, { "epoch": 3520.9876543209875, "learning_rate": 2.170319578792702e-07, "loss": 1.5399, "step": 57040 }, { "epoch": 3522.222222222222, "learning_rate": 2.1675645892004407e-07, "loss": 1.575, "step": 57060 }, { "epoch": 3523.456790123457, "learning_rate": 2.1648095996081793e-07, "loss": 1.522, "step": 57080 }, { "epoch": 3524.6913580246915, "learning_rate": 2.162054610015918e-07, "loss": 1.5412, "step": 57100 }, { "epoch": 3525.925925925926, "learning_rate": 2.1592996204236567e-07, "loss": 1.5715, "step": 57120 }, { "epoch": 3527.1604938271603, "learning_rate": 2.156544630831394e-07, "loss": 1.5668, "step": 57140 }, { "epoch": 3528.395061728395, "learning_rate": 2.1537896412391326e-07, "loss": 1.569, "step": 57160 }, { "epoch": 3529.6296296296296, "learning_rate": 2.1510346516468712e-07, "loss": 1.5271, "step": 57180 }, { "epoch": 3530.864197530864, "learning_rate": 2.1482796620546098e-07, "loss": 1.5394, "step": 57200 }, { "epoch": 3532.098765432099, "learning_rate": 2.1455246724623483e-07, "loss": 1.5728, "step": 57220 }, { "epoch": 3533.3333333333335, "learning_rate": 2.142769682870087e-07, "loss": 1.6056, "step": 57240 }, { "epoch": 3534.567901234568, "learning_rate": 2.1400146932778255e-07, "loss": 1.5627, "step": 57260 }, { "epoch": 3535.8024691358023, "learning_rate": 2.1372597036855633e-07, "loss": 1.5417, "step": 57280 }, { "epoch": 3537.037037037037, "learning_rate": 2.134504714093302e-07, "loss": 1.5812, "step": 57300 }, { "epoch": 3538.2716049382716, "learning_rate": 2.1317497245010405e-07, "loss": 1.5156, "step": 57320 }, { "epoch": 3539.5061728395062, "learning_rate": 2.128994734908779e-07, "loss": 1.521, "step": 57340 }, { "epoch": 3540.740740740741, "learning_rate": 2.1262397453165177e-07, "loss": 1.5698, "step": 57360 }, { "epoch": 3541.9753086419755, "learning_rate": 2.1234847557242565e-07, "loss": 1.5729, "step": 57380 }, { "epoch": 3543.2098765432097, "learning_rate": 2.120729766131995e-07, "loss": 1.5858, "step": 57400 }, { "epoch": 3544.4444444444443, "learning_rate": 2.1179747765397326e-07, "loss": 1.5581, "step": 57420 }, { "epoch": 3545.679012345679, "learning_rate": 2.1152197869474712e-07, "loss": 1.5241, "step": 57440 }, { "epoch": 3546.9135802469136, "learning_rate": 2.1124647973552098e-07, "loss": 1.593, "step": 57460 }, { "epoch": 3548.1481481481483, "learning_rate": 2.1097098077629487e-07, "loss": 1.5178, "step": 57480 }, { "epoch": 3549.382716049383, "learning_rate": 2.106954818170687e-07, "loss": 1.528, "step": 57500 }, { "epoch": 3550.617283950617, "learning_rate": 2.1041998285784256e-07, "loss": 1.5427, "step": 57520 }, { "epoch": 3551.8518518518517, "learning_rate": 2.1014448389861642e-07, "loss": 1.5824, "step": 57540 }, { "epoch": 3553.0864197530864, "learning_rate": 2.0986898493939017e-07, "loss": 1.5518, "step": 57560 }, { "epoch": 3554.320987654321, "learning_rate": 2.0959348598016403e-07, "loss": 1.5727, "step": 57580 }, { "epoch": 3555.5555555555557, "learning_rate": 2.093179870209379e-07, "loss": 1.5564, "step": 57600 }, { "epoch": 3556.7901234567903, "learning_rate": 2.0904248806171175e-07, "loss": 1.5272, "step": 57620 }, { "epoch": 3558.0246913580245, "learning_rate": 2.087669891024856e-07, "loss": 1.5444, "step": 57640 }, { "epoch": 3559.259259259259, "learning_rate": 2.084914901432595e-07, "loss": 1.5539, "step": 57660 }, { "epoch": 3560.4938271604938, "learning_rate": 2.0821599118403324e-07, "loss": 1.5628, "step": 57680 }, { "epoch": 3561.7283950617284, "learning_rate": 2.079404922248071e-07, "loss": 1.543, "step": 57700 }, { "epoch": 3562.962962962963, "learning_rate": 2.0766499326558096e-07, "loss": 1.5733, "step": 57720 }, { "epoch": 3564.1975308641977, "learning_rate": 2.0738949430635485e-07, "loss": 1.5765, "step": 57740 }, { "epoch": 3565.432098765432, "learning_rate": 2.071139953471287e-07, "loss": 1.5361, "step": 57760 }, { "epoch": 3566.6666666666665, "learning_rate": 2.0683849638790256e-07, "loss": 1.5325, "step": 57780 }, { "epoch": 3567.901234567901, "learning_rate": 2.0656299742867642e-07, "loss": 1.567, "step": 57800 }, { "epoch": 3569.135802469136, "learning_rate": 2.0628749846945018e-07, "loss": 1.5774, "step": 57820 }, { "epoch": 3570.3703703703704, "learning_rate": 2.06011999510224e-07, "loss": 1.5808, "step": 57840 }, { "epoch": 3571.604938271605, "learning_rate": 2.057365005509979e-07, "loss": 1.543, "step": 57860 }, { "epoch": 3572.8395061728397, "learning_rate": 2.0546100159177175e-07, "loss": 1.5267, "step": 57880 }, { "epoch": 3574.074074074074, "learning_rate": 2.051855026325456e-07, "loss": 1.5612, "step": 57900 }, { "epoch": 3575.3086419753085, "learning_rate": 2.0491000367331947e-07, "loss": 1.5589, "step": 57920 }, { "epoch": 3576.543209876543, "learning_rate": 2.0463450471409333e-07, "loss": 1.542, "step": 57940 }, { "epoch": 3577.777777777778, "learning_rate": 2.0435900575486708e-07, "loss": 1.5493, "step": 57960 }, { "epoch": 3579.0123456790125, "learning_rate": 2.0408350679564094e-07, "loss": 1.505, "step": 57980 }, { "epoch": 3580.246913580247, "learning_rate": 2.038080078364148e-07, "loss": 1.5339, "step": 58000 }, { "epoch": 3581.4814814814813, "learning_rate": 2.0353250887718869e-07, "loss": 1.5404, "step": 58020 }, { "epoch": 3582.716049382716, "learning_rate": 2.0325700991796254e-07, "loss": 1.5498, "step": 58040 }, { "epoch": 3583.9506172839506, "learning_rate": 2.029815109587364e-07, "loss": 1.5171, "step": 58060 }, { "epoch": 3585.185185185185, "learning_rate": 2.0270601199951026e-07, "loss": 1.5456, "step": 58080 }, { "epoch": 3586.41975308642, "learning_rate": 2.0243051304028404e-07, "loss": 1.5425, "step": 58100 }, { "epoch": 3587.6543209876545, "learning_rate": 2.021550140810579e-07, "loss": 1.4761, "step": 58120 }, { "epoch": 3588.8888888888887, "learning_rate": 2.0187951512183176e-07, "loss": 1.5021, "step": 58140 }, { "epoch": 3590.1234567901233, "learning_rate": 2.0160401616260562e-07, "loss": 1.5373, "step": 58160 }, { "epoch": 3591.358024691358, "learning_rate": 2.0132851720337948e-07, "loss": 1.5045, "step": 58180 }, { "epoch": 3592.5925925925926, "learning_rate": 2.0105301824415334e-07, "loss": 1.5985, "step": 58200 }, { "epoch": 3593.8271604938273, "learning_rate": 2.007775192849271e-07, "loss": 1.5325, "step": 58220 }, { "epoch": 3595.061728395062, "learning_rate": 2.0050202032570095e-07, "loss": 1.5484, "step": 58240 }, { "epoch": 3596.296296296296, "learning_rate": 2.002265213664748e-07, "loss": 1.5396, "step": 58260 }, { "epoch": 3597.5308641975307, "learning_rate": 1.9995102240724867e-07, "loss": 1.5119, "step": 58280 }, { "epoch": 3598.7654320987654, "learning_rate": 1.9967552344802252e-07, "loss": 1.5147, "step": 58300 }, { "epoch": 3600.0, "learning_rate": 1.9940002448879638e-07, "loss": 1.5411, "step": 58320 }, { "epoch": 3601.2345679012346, "learning_rate": 1.9912452552957024e-07, "loss": 1.5351, "step": 58340 }, { "epoch": 3602.4691358024693, "learning_rate": 1.98849026570344e-07, "loss": 1.5331, "step": 58360 }, { "epoch": 3603.703703703704, "learning_rate": 1.9857352761111788e-07, "loss": 1.5585, "step": 58380 }, { "epoch": 3604.938271604938, "learning_rate": 1.9829802865189174e-07, "loss": 1.5598, "step": 58400 }, { "epoch": 3606.1728395061727, "learning_rate": 1.980225296926656e-07, "loss": 1.5326, "step": 58420 }, { "epoch": 3607.4074074074074, "learning_rate": 1.9774703073343946e-07, "loss": 1.5338, "step": 58440 }, { "epoch": 3608.641975308642, "learning_rate": 1.9747153177421334e-07, "loss": 1.5881, "step": 58460 }, { "epoch": 3609.8765432098767, "learning_rate": 1.9719603281498717e-07, "loss": 1.5337, "step": 58480 }, { "epoch": 3611.1111111111113, "learning_rate": 1.9692053385576095e-07, "loss": 1.5572, "step": 58500 }, { "epoch": 3612.3456790123455, "learning_rate": 1.966450348965348e-07, "loss": 1.5736, "step": 58520 }, { "epoch": 3613.58024691358, "learning_rate": 1.9636953593730867e-07, "loss": 1.5843, "step": 58540 }, { "epoch": 3614.814814814815, "learning_rate": 1.9609403697808253e-07, "loss": 1.5126, "step": 58560 }, { "epoch": 3616.0493827160494, "learning_rate": 1.958185380188564e-07, "loss": 1.566, "step": 58580 }, { "epoch": 3617.283950617284, "learning_rate": 1.9554303905963025e-07, "loss": 1.5558, "step": 58600 }, { "epoch": 3618.5185185185187, "learning_rate": 1.952675401004041e-07, "loss": 1.529, "step": 58620 }, { "epoch": 3619.753086419753, "learning_rate": 1.9499204114117786e-07, "loss": 1.5571, "step": 58640 }, { "epoch": 3620.9876543209875, "learning_rate": 1.9471654218195172e-07, "loss": 1.582, "step": 58660 }, { "epoch": 3622.222222222222, "learning_rate": 1.9444104322272558e-07, "loss": 1.5444, "step": 58680 }, { "epoch": 3623.456790123457, "learning_rate": 1.9416554426349944e-07, "loss": 1.5387, "step": 58700 }, { "epoch": 3624.6913580246915, "learning_rate": 1.938900453042733e-07, "loss": 1.5513, "step": 58720 }, { "epoch": 3625.925925925926, "learning_rate": 1.9361454634504715e-07, "loss": 1.512, "step": 58740 }, { "epoch": 3627.1604938271603, "learning_rate": 1.9333904738582093e-07, "loss": 1.5541, "step": 58760 }, { "epoch": 3628.395061728395, "learning_rate": 1.930635484265948e-07, "loss": 1.5458, "step": 58780 }, { "epoch": 3629.6296296296296, "learning_rate": 1.9278804946736865e-07, "loss": 1.5554, "step": 58800 }, { "epoch": 3630.864197530864, "learning_rate": 1.9251255050814248e-07, "loss": 1.5525, "step": 58820 }, { "epoch": 3632.098765432099, "learning_rate": 1.9223705154891637e-07, "loss": 1.5987, "step": 58840 }, { "epoch": 3633.3333333333335, "learning_rate": 1.9196155258969028e-07, "loss": 1.5321, "step": 58860 }, { "epoch": 3634.567901234568, "learning_rate": 1.916860536304641e-07, "loss": 1.5373, "step": 58880 }, { "epoch": 3635.8024691358023, "learning_rate": 1.9141055467123787e-07, "loss": 1.567, "step": 58900 }, { "epoch": 3637.037037037037, "learning_rate": 1.9113505571201172e-07, "loss": 1.5553, "step": 58920 }, { "epoch": 3638.2716049382716, "learning_rate": 1.9085955675278558e-07, "loss": 1.5572, "step": 58940 }, { "epoch": 3639.5061728395062, "learning_rate": 1.9058405779355944e-07, "loss": 1.5682, "step": 58960 }, { "epoch": 3640.740740740741, "learning_rate": 1.903085588343333e-07, "loss": 1.5635, "step": 58980 }, { "epoch": 3641.9753086419755, "learning_rate": 1.9003305987510716e-07, "loss": 1.5635, "step": 59000 }, { "epoch": 3643.2098765432097, "learning_rate": 1.8975756091588102e-07, "loss": 1.5658, "step": 59020 }, { "epoch": 3644.4444444444443, "learning_rate": 1.8948206195665477e-07, "loss": 1.5657, "step": 59040 }, { "epoch": 3645.679012345679, "learning_rate": 1.8920656299742863e-07, "loss": 1.589, "step": 59060 }, { "epoch": 3646.9135802469136, "learning_rate": 1.889310640382025e-07, "loss": 1.5261, "step": 59080 }, { "epoch": 3648.1481481481483, "learning_rate": 1.8865556507897635e-07, "loss": 1.5241, "step": 59100 }, { "epoch": 3649.382716049383, "learning_rate": 1.883800661197502e-07, "loss": 1.5414, "step": 59120 }, { "epoch": 3650.617283950617, "learning_rate": 1.8810456716052407e-07, "loss": 1.5475, "step": 59140 }, { "epoch": 3651.8518518518517, "learning_rate": 1.8782906820129795e-07, "loss": 1.5151, "step": 59160 }, { "epoch": 3653.0864197530864, "learning_rate": 1.8755356924207168e-07, "loss": 1.5475, "step": 59180 }, { "epoch": 3654.320987654321, "learning_rate": 1.8727807028284556e-07, "loss": 1.5143, "step": 59200 }, { "epoch": 3655.5555555555557, "learning_rate": 1.8700257132361948e-07, "loss": 1.5238, "step": 59220 }, { "epoch": 3656.7901234567903, "learning_rate": 1.867270723643933e-07, "loss": 1.5603, "step": 59240 }, { "epoch": 3658.0246913580245, "learning_rate": 1.8645157340516717e-07, "loss": 1.5396, "step": 59260 }, { "epoch": 3659.259259259259, "learning_rate": 1.8617607444594103e-07, "loss": 1.5327, "step": 59280 }, { "epoch": 3660.4938271604938, "learning_rate": 1.8590057548671478e-07, "loss": 1.5517, "step": 59300 }, { "epoch": 3661.7283950617284, "learning_rate": 1.8562507652748864e-07, "loss": 1.5484, "step": 59320 }, { "epoch": 3662.962962962963, "learning_rate": 1.853495775682625e-07, "loss": 1.5254, "step": 59340 }, { "epoch": 3664.1975308641977, "learning_rate": 1.8507407860903635e-07, "loss": 1.5516, "step": 59360 }, { "epoch": 3665.432098765432, "learning_rate": 1.8479857964981021e-07, "loss": 1.5704, "step": 59380 }, { "epoch": 3666.6666666666665, "learning_rate": 1.8452308069058407e-07, "loss": 1.5279, "step": 59400 }, { "epoch": 3667.901234567901, "learning_rate": 1.8424758173135793e-07, "loss": 1.4852, "step": 59420 }, { "epoch": 3669.135802469136, "learning_rate": 1.8397208277213168e-07, "loss": 1.5694, "step": 59440 }, { "epoch": 3670.3703703703704, "learning_rate": 1.8369658381290554e-07, "loss": 1.5283, "step": 59460 }, { "epoch": 3671.604938271605, "learning_rate": 1.834210848536794e-07, "loss": 1.5324, "step": 59480 }, { "epoch": 3672.8395061728397, "learning_rate": 1.831455858944533e-07, "loss": 1.5737, "step": 59500 }, { "epoch": 3674.074074074074, "learning_rate": 1.8287008693522715e-07, "loss": 1.5304, "step": 59520 }, { "epoch": 3675.3086419753085, "learning_rate": 1.82594587976001e-07, "loss": 1.5675, "step": 59540 }, { "epoch": 3676.543209876543, "learning_rate": 1.8231908901677486e-07, "loss": 1.574, "step": 59560 }, { "epoch": 3677.777777777778, "learning_rate": 1.8204359005754862e-07, "loss": 1.5253, "step": 59580 }, { "epoch": 3679.0123456790125, "learning_rate": 1.817680910983225e-07, "loss": 1.4923, "step": 59600 }, { "epoch": 3680.246913580247, "learning_rate": 1.8149259213909636e-07, "loss": 1.556, "step": 59620 }, { "epoch": 3681.4814814814813, "learning_rate": 1.8121709317987022e-07, "loss": 1.5306, "step": 59640 }, { "epoch": 3682.716049382716, "learning_rate": 1.8094159422064408e-07, "loss": 1.559, "step": 59660 }, { "epoch": 3683.9506172839506, "learning_rate": 1.8066609526141794e-07, "loss": 1.5591, "step": 59680 }, { "epoch": 3685.185185185185, "learning_rate": 1.803905963021918e-07, "loss": 1.5382, "step": 59700 }, { "epoch": 3686.41975308642, "learning_rate": 1.8011509734296555e-07, "loss": 1.5264, "step": 59720 }, { "epoch": 3687.6543209876545, "learning_rate": 1.798395983837394e-07, "loss": 1.5939, "step": 59740 }, { "epoch": 3688.8888888888887, "learning_rate": 1.7956409942451327e-07, "loss": 1.55, "step": 59760 }, { "epoch": 3690.1234567901233, "learning_rate": 1.7928860046528713e-07, "loss": 1.5009, "step": 59780 }, { "epoch": 3691.358024691358, "learning_rate": 1.79013101506061e-07, "loss": 1.5513, "step": 59800 }, { "epoch": 3692.5925925925926, "learning_rate": 1.7873760254683484e-07, "loss": 1.5661, "step": 59820 }, { "epoch": 3693.8271604938273, "learning_rate": 1.784621035876086e-07, "loss": 1.5401, "step": 59840 }, { "epoch": 3695.061728395062, "learning_rate": 1.7818660462838248e-07, "loss": 1.5403, "step": 59860 }, { "epoch": 3696.296296296296, "learning_rate": 1.7791110566915634e-07, "loss": 1.5321, "step": 59880 }, { "epoch": 3697.5308641975307, "learning_rate": 1.776356067099302e-07, "loss": 1.5369, "step": 59900 }, { "epoch": 3698.7654320987654, "learning_rate": 1.7736010775070406e-07, "loss": 1.5659, "step": 59920 }, { "epoch": 3700.0, "learning_rate": 1.7708460879147792e-07, "loss": 1.547, "step": 59940 }, { "epoch": 3701.2345679012346, "learning_rate": 1.7680910983225178e-07, "loss": 1.5303, "step": 59960 }, { "epoch": 3702.4691358024693, "learning_rate": 1.7653361087302556e-07, "loss": 1.538, "step": 59980 }, { "epoch": 3703.703703703704, "learning_rate": 1.7625811191379941e-07, "loss": 1.4937, "step": 60000 } ], "logging_steps": 20, "max_steps": 65536, "num_input_tokens_seen": 0, "num_train_epochs": 4096, "save_steps": 10000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.754211655397376e+17, "train_batch_size": 5, "trial_name": null, "trial_params": null }