diff --git "a/checkpoint-40000/trainer_state.json" "b/checkpoint-40000/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-40000/trainer_state.json" @@ -0,0 +1,12039 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2469.135802469136, + "eval_steps": 500, + "global_step": 40000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.06172839506172839, + "learning_rate": 5e-09, + "loss": 8.604, + "step": 1 + }, + { + "epoch": 1.2345679012345678, + "learning_rate": 1e-07, + "loss": 8.0873, + "step": 20 + }, + { + "epoch": 2.4691358024691357, + "learning_rate": 2e-07, + "loss": 7.8074, + "step": 40 + }, + { + "epoch": 3.7037037037037037, + "learning_rate": 3e-07, + "loss": 7.0763, + "step": 60 + }, + { + "epoch": 4.938271604938271, + "learning_rate": 4e-07, + "loss": 6.2969, + "step": 80 + }, + { + "epoch": 6.172839506172839, + "learning_rate": 5e-07, + "loss": 5.9334, + "step": 100 + }, + { + "epoch": 7.407407407407407, + "learning_rate": 6e-07, + "loss": 5.4919, + "step": 120 + }, + { + "epoch": 8.641975308641975, + "learning_rate": 7e-07, + "loss": 5.0659, + "step": 140 + }, + { + "epoch": 9.876543209876543, + "learning_rate": 8e-07, + "loss": 4.9089, + "step": 160 + }, + { + "epoch": 11.11111111111111, + "learning_rate": 9e-07, + "loss": 4.7144, + "step": 180 + }, + { + "epoch": 12.345679012345679, + "learning_rate": 1e-06, + "loss": 4.7373, + "step": 200 + }, + { + "epoch": 13.580246913580247, + "learning_rate": 9.997245010407738e-07, + "loss": 4.3848, + "step": 220 + }, + { + "epoch": 14.814814814814815, + "learning_rate": 9.994490020815477e-07, + "loss": 4.3457, + "step": 240 + }, + { + "epoch": 16.049382716049383, + "learning_rate": 9.991735031223215e-07, + "loss": 4.2217, + "step": 260 + }, + { + "epoch": 17.28395061728395, + "learning_rate": 9.988980041630952e-07, + "loss": 4.1273, + "step": 280 + }, + { + "epoch": 18.51851851851852, + "learning_rate": 9.986225052038692e-07, + "loss": 4.0123, + "step": 300 + }, + { + "epoch": 19.753086419753085, + "learning_rate": 9.98347006244643e-07, + "loss": 4.0784, + "step": 320 + }, + { + "epoch": 20.987654320987655, + "learning_rate": 9.98071507285417e-07, + "loss": 3.9415, + "step": 340 + }, + { + "epoch": 22.22222222222222, + "learning_rate": 9.977960083261906e-07, + "loss": 3.843, + "step": 360 + }, + { + "epoch": 23.45679012345679, + "learning_rate": 9.975205093669647e-07, + "loss": 3.8174, + "step": 380 + }, + { + "epoch": 24.691358024691358, + "learning_rate": 9.972450104077383e-07, + "loss": 3.7376, + "step": 400 + }, + { + "epoch": 25.925925925925927, + "learning_rate": 9.969695114485122e-07, + "loss": 3.7876, + "step": 420 + }, + { + "epoch": 27.160493827160494, + "learning_rate": 9.96694012489286e-07, + "loss": 3.7585, + "step": 440 + }, + { + "epoch": 28.395061728395063, + "learning_rate": 9.9641851353006e-07, + "loss": 3.6304, + "step": 460 + }, + { + "epoch": 29.62962962962963, + "learning_rate": 9.961430145708338e-07, + "loss": 3.6751, + "step": 480 + }, + { + "epoch": 30.864197530864196, + "learning_rate": 9.958675156116076e-07, + "loss": 3.677, + "step": 500 + }, + { + "epoch": 32.098765432098766, + "learning_rate": 9.955920166523815e-07, + "loss": 3.6556, + "step": 520 + }, + { + "epoch": 33.333333333333336, + "learning_rate": 9.953165176931551e-07, + "loss": 3.5897, + "step": 540 + }, + { + "epoch": 34.5679012345679, + "learning_rate": 9.950410187339292e-07, + "loss": 3.7304, + "step": 560 + }, + { + "epoch": 35.80246913580247, + "learning_rate": 9.94765519774703e-07, + "loss": 3.568, + "step": 580 + }, + { + "epoch": 37.03703703703704, + "learning_rate": 9.94490020815477e-07, + "loss": 3.6207, + "step": 600 + }, + { + "epoch": 38.27160493827161, + "learning_rate": 9.942145218562508e-07, + "loss": 3.4759, + "step": 620 + }, + { + "epoch": 39.50617283950617, + "learning_rate": 9.939390228970246e-07, + "loss": 3.4513, + "step": 640 + }, + { + "epoch": 40.74074074074074, + "learning_rate": 9.936635239377985e-07, + "loss": 3.372, + "step": 660 + }, + { + "epoch": 41.97530864197531, + "learning_rate": 9.933880249785724e-07, + "loss": 3.2601, + "step": 680 + }, + { + "epoch": 43.20987654320987, + "learning_rate": 9.93112526019346e-07, + "loss": 3.3071, + "step": 700 + }, + { + "epoch": 44.44444444444444, + "learning_rate": 9.9283702706012e-07, + "loss": 3.2768, + "step": 720 + }, + { + "epoch": 45.67901234567901, + "learning_rate": 9.925615281008937e-07, + "loss": 3.3103, + "step": 740 + }, + { + "epoch": 46.91358024691358, + "learning_rate": 9.922860291416678e-07, + "loss": 3.2807, + "step": 760 + }, + { + "epoch": 48.148148148148145, + "learning_rate": 9.920105301824414e-07, + "loss": 3.1839, + "step": 780 + }, + { + "epoch": 49.382716049382715, + "learning_rate": 9.917350312232155e-07, + "loss": 3.1689, + "step": 800 + }, + { + "epoch": 50.617283950617285, + "learning_rate": 9.914595322639892e-07, + "loss": 3.17, + "step": 820 + }, + { + "epoch": 51.851851851851855, + "learning_rate": 9.91184033304763e-07, + "loss": 3.1598, + "step": 840 + }, + { + "epoch": 53.08641975308642, + "learning_rate": 9.909085343455369e-07, + "loss": 3.1215, + "step": 860 + }, + { + "epoch": 54.32098765432099, + "learning_rate": 9.906330353863107e-07, + "loss": 3.102, + "step": 880 + }, + { + "epoch": 55.55555555555556, + "learning_rate": 9.903575364270846e-07, + "loss": 3.0819, + "step": 900 + }, + { + "epoch": 56.79012345679013, + "learning_rate": 9.900820374678584e-07, + "loss": 3.0729, + "step": 920 + }, + { + "epoch": 58.02469135802469, + "learning_rate": 9.898065385086323e-07, + "loss": 3.0639, + "step": 940 + }, + { + "epoch": 59.25925925925926, + "learning_rate": 9.89531039549406e-07, + "loss": 3.0172, + "step": 960 + }, + { + "epoch": 60.49382716049383, + "learning_rate": 9.8925554059018e-07, + "loss": 3.0463, + "step": 980 + }, + { + "epoch": 61.72839506172839, + "learning_rate": 9.889800416309537e-07, + "loss": 2.9424, + "step": 1000 + }, + { + "epoch": 62.96296296296296, + "learning_rate": 9.887045426717277e-07, + "loss": 3.0018, + "step": 1020 + }, + { + "epoch": 64.19753086419753, + "learning_rate": 9.884290437125014e-07, + "loss": 3.0268, + "step": 1040 + }, + { + "epoch": 65.4320987654321, + "learning_rate": 9.881535447532755e-07, + "loss": 3.0157, + "step": 1060 + }, + { + "epoch": 66.66666666666667, + "learning_rate": 9.878780457940491e-07, + "loss": 2.9853, + "step": 1080 + }, + { + "epoch": 67.90123456790124, + "learning_rate": 9.87602546834823e-07, + "loss": 2.8881, + "step": 1100 + }, + { + "epoch": 69.1358024691358, + "learning_rate": 9.873270478755968e-07, + "loss": 2.965, + "step": 1120 + }, + { + "epoch": 70.37037037037037, + "learning_rate": 9.870515489163707e-07, + "loss": 2.9127, + "step": 1140 + }, + { + "epoch": 71.60493827160494, + "learning_rate": 9.867760499571445e-07, + "loss": 2.8881, + "step": 1160 + }, + { + "epoch": 72.8395061728395, + "learning_rate": 9.865005509979184e-07, + "loss": 2.8964, + "step": 1180 + }, + { + "epoch": 74.07407407407408, + "learning_rate": 9.862250520386923e-07, + "loss": 2.932, + "step": 1200 + }, + { + "epoch": 75.30864197530865, + "learning_rate": 9.85949553079466e-07, + "loss": 2.9062, + "step": 1220 + }, + { + "epoch": 76.54320987654322, + "learning_rate": 9.8567405412024e-07, + "loss": 2.9141, + "step": 1240 + }, + { + "epoch": 77.77777777777777, + "learning_rate": 9.853985551610136e-07, + "loss": 2.8072, + "step": 1260 + }, + { + "epoch": 79.01234567901234, + "learning_rate": 9.851230562017877e-07, + "loss": 2.8403, + "step": 1280 + }, + { + "epoch": 80.24691358024691, + "learning_rate": 9.848475572425613e-07, + "loss": 2.8484, + "step": 1300 + }, + { + "epoch": 81.48148148148148, + "learning_rate": 9.845720582833354e-07, + "loss": 2.8565, + "step": 1320 + }, + { + "epoch": 82.71604938271605, + "learning_rate": 9.842965593241093e-07, + "loss": 2.8241, + "step": 1340 + }, + { + "epoch": 83.95061728395062, + "learning_rate": 9.840210603648831e-07, + "loss": 2.8383, + "step": 1360 + }, + { + "epoch": 85.18518518518519, + "learning_rate": 9.837455614056568e-07, + "loss": 2.8188, + "step": 1380 + }, + { + "epoch": 86.41975308641975, + "learning_rate": 9.834700624464309e-07, + "loss": 2.8681, + "step": 1400 + }, + { + "epoch": 87.65432098765432, + "learning_rate": 9.831945634872045e-07, + "loss": 2.7453, + "step": 1420 + }, + { + "epoch": 88.88888888888889, + "learning_rate": 9.829190645279786e-07, + "loss": 2.7488, + "step": 1440 + }, + { + "epoch": 90.12345679012346, + "learning_rate": 9.826435655687522e-07, + "loss": 2.7859, + "step": 1460 + }, + { + "epoch": 91.35802469135803, + "learning_rate": 9.82368066609526e-07, + "loss": 2.8079, + "step": 1480 + }, + { + "epoch": 92.5925925925926, + "learning_rate": 9.820925676503e-07, + "loss": 2.717, + "step": 1500 + }, + { + "epoch": 93.82716049382717, + "learning_rate": 9.818170686910738e-07, + "loss": 2.7502, + "step": 1520 + }, + { + "epoch": 95.06172839506173, + "learning_rate": 9.815415697318477e-07, + "loss": 2.7395, + "step": 1540 + }, + { + "epoch": 96.29629629629629, + "learning_rate": 9.812660707726215e-07, + "loss": 2.7172, + "step": 1560 + }, + { + "epoch": 97.53086419753086, + "learning_rate": 9.809905718133954e-07, + "loss": 2.6894, + "step": 1580 + }, + { + "epoch": 98.76543209876543, + "learning_rate": 9.807150728541692e-07, + "loss": 2.7527, + "step": 1600 + }, + { + "epoch": 100.0, + "learning_rate": 9.80439573894943e-07, + "loss": 2.7461, + "step": 1620 + }, + { + "epoch": 101.23456790123457, + "learning_rate": 9.801640749357167e-07, + "loss": 2.782, + "step": 1640 + }, + { + "epoch": 102.46913580246914, + "learning_rate": 9.798885759764908e-07, + "loss": 2.7753, + "step": 1660 + }, + { + "epoch": 103.70370370370371, + "learning_rate": 9.796130770172645e-07, + "loss": 2.7902, + "step": 1680 + }, + { + "epoch": 104.93827160493827, + "learning_rate": 9.793375780580385e-07, + "loss": 2.7305, + "step": 1700 + }, + { + "epoch": 106.17283950617283, + "learning_rate": 9.790620790988122e-07, + "loss": 2.7476, + "step": 1720 + }, + { + "epoch": 107.4074074074074, + "learning_rate": 9.787865801395862e-07, + "loss": 2.715, + "step": 1740 + }, + { + "epoch": 108.64197530864197, + "learning_rate": 9.785110811803599e-07, + "loss": 2.6883, + "step": 1760 + }, + { + "epoch": 109.87654320987654, + "learning_rate": 9.782355822211337e-07, + "loss": 2.6921, + "step": 1780 + }, + { + "epoch": 111.11111111111111, + "learning_rate": 9.779600832619076e-07, + "loss": 2.7132, + "step": 1800 + }, + { + "epoch": 112.34567901234568, + "learning_rate": 9.776845843026815e-07, + "loss": 2.6779, + "step": 1820 + }, + { + "epoch": 113.58024691358025, + "learning_rate": 9.774090853434553e-07, + "loss": 2.6119, + "step": 1840 + }, + { + "epoch": 114.81481481481481, + "learning_rate": 9.771335863842292e-07, + "loss": 2.6716, + "step": 1860 + }, + { + "epoch": 116.04938271604938, + "learning_rate": 9.76858087425003e-07, + "loss": 2.6666, + "step": 1880 + }, + { + "epoch": 117.28395061728395, + "learning_rate": 9.765825884657767e-07, + "loss": 2.6476, + "step": 1900 + }, + { + "epoch": 118.51851851851852, + "learning_rate": 9.763070895065508e-07, + "loss": 2.6393, + "step": 1920 + }, + { + "epoch": 119.75308641975309, + "learning_rate": 9.760315905473244e-07, + "loss": 2.627, + "step": 1940 + }, + { + "epoch": 120.98765432098766, + "learning_rate": 9.757560915880985e-07, + "loss": 2.6067, + "step": 1960 + }, + { + "epoch": 122.22222222222223, + "learning_rate": 9.754805926288721e-07, + "loss": 2.6015, + "step": 1980 + }, + { + "epoch": 123.45679012345678, + "learning_rate": 9.752050936696462e-07, + "loss": 2.6428, + "step": 2000 + }, + { + "epoch": 124.69135802469135, + "learning_rate": 9.749295947104198e-07, + "loss": 2.6251, + "step": 2020 + }, + { + "epoch": 125.92592592592592, + "learning_rate": 9.746540957511937e-07, + "loss": 2.655, + "step": 2040 + }, + { + "epoch": 127.1604938271605, + "learning_rate": 9.743785967919676e-07, + "loss": 2.5351, + "step": 2060 + }, + { + "epoch": 128.39506172839506, + "learning_rate": 9.741030978327416e-07, + "loss": 2.582, + "step": 2080 + }, + { + "epoch": 129.62962962962962, + "learning_rate": 9.738275988735153e-07, + "loss": 2.5874, + "step": 2100 + }, + { + "epoch": 130.8641975308642, + "learning_rate": 9.735520999142893e-07, + "loss": 2.5603, + "step": 2120 + }, + { + "epoch": 132.09876543209876, + "learning_rate": 9.73276600955063e-07, + "loss": 2.5398, + "step": 2140 + }, + { + "epoch": 133.33333333333334, + "learning_rate": 9.730011019958369e-07, + "loss": 2.5265, + "step": 2160 + }, + { + "epoch": 134.5679012345679, + "learning_rate": 9.727256030366107e-07, + "loss": 2.6271, + "step": 2180 + }, + { + "epoch": 135.80246913580248, + "learning_rate": 9.724501040773846e-07, + "loss": 2.5626, + "step": 2200 + }, + { + "epoch": 137.03703703703704, + "learning_rate": 9.721746051181584e-07, + "loss": 2.5425, + "step": 2220 + }, + { + "epoch": 138.2716049382716, + "learning_rate": 9.718991061589323e-07, + "loss": 2.5251, + "step": 2240 + }, + { + "epoch": 139.50617283950618, + "learning_rate": 9.716236071997061e-07, + "loss": 2.5817, + "step": 2260 + }, + { + "epoch": 140.74074074074073, + "learning_rate": 9.7134810824048e-07, + "loss": 2.4888, + "step": 2280 + }, + { + "epoch": 141.97530864197532, + "learning_rate": 9.710726092812539e-07, + "loss": 2.5426, + "step": 2300 + }, + { + "epoch": 143.20987654320987, + "learning_rate": 9.707971103220275e-07, + "loss": 2.5293, + "step": 2320 + }, + { + "epoch": 144.44444444444446, + "learning_rate": 9.705216113628016e-07, + "loss": 2.4874, + "step": 2340 + }, + { + "epoch": 145.679012345679, + "learning_rate": 9.702461124035752e-07, + "loss": 2.5487, + "step": 2360 + }, + { + "epoch": 146.91358024691357, + "learning_rate": 9.699706134443493e-07, + "loss": 2.5153, + "step": 2380 + }, + { + "epoch": 148.14814814814815, + "learning_rate": 9.69695114485123e-07, + "loss": 2.4468, + "step": 2400 + }, + { + "epoch": 149.3827160493827, + "learning_rate": 9.69419615525897e-07, + "loss": 2.4731, + "step": 2420 + }, + { + "epoch": 150.6172839506173, + "learning_rate": 9.691441165666707e-07, + "loss": 2.5007, + "step": 2440 + }, + { + "epoch": 151.85185185185185, + "learning_rate": 9.688686176074445e-07, + "loss": 2.4948, + "step": 2460 + }, + { + "epoch": 153.08641975308643, + "learning_rate": 9.685931186482184e-07, + "loss": 2.4867, + "step": 2480 + }, + { + "epoch": 154.320987654321, + "learning_rate": 9.683176196889922e-07, + "loss": 2.4654, + "step": 2500 + }, + { + "epoch": 155.55555555555554, + "learning_rate": 9.68042120729766e-07, + "loss": 2.4621, + "step": 2520 + }, + { + "epoch": 156.79012345679013, + "learning_rate": 9.6776662177054e-07, + "loss": 2.4738, + "step": 2540 + }, + { + "epoch": 158.02469135802468, + "learning_rate": 9.674911228113138e-07, + "loss": 2.4562, + "step": 2560 + }, + { + "epoch": 159.25925925925927, + "learning_rate": 9.672156238520875e-07, + "loss": 2.4937, + "step": 2580 + }, + { + "epoch": 160.49382716049382, + "learning_rate": 9.669401248928615e-07, + "loss": 2.4574, + "step": 2600 + }, + { + "epoch": 161.7283950617284, + "learning_rate": 9.666646259336352e-07, + "loss": 2.4642, + "step": 2620 + }, + { + "epoch": 162.96296296296296, + "learning_rate": 9.663891269744093e-07, + "loss": 2.4736, + "step": 2640 + }, + { + "epoch": 164.19753086419752, + "learning_rate": 9.66113628015183e-07, + "loss": 2.4926, + "step": 2660 + }, + { + "epoch": 165.4320987654321, + "learning_rate": 9.65838129055957e-07, + "loss": 2.4323, + "step": 2680 + }, + { + "epoch": 166.66666666666666, + "learning_rate": 9.655626300967306e-07, + "loss": 2.4508, + "step": 2700 + }, + { + "epoch": 167.90123456790124, + "learning_rate": 9.652871311375045e-07, + "loss": 2.4295, + "step": 2720 + }, + { + "epoch": 169.1358024691358, + "learning_rate": 9.650116321782783e-07, + "loss": 2.4753, + "step": 2740 + }, + { + "epoch": 170.37037037037038, + "learning_rate": 9.647361332190522e-07, + "loss": 2.4323, + "step": 2760 + }, + { + "epoch": 171.60493827160494, + "learning_rate": 9.64460634259826e-07, + "loss": 2.392, + "step": 2780 + }, + { + "epoch": 172.8395061728395, + "learning_rate": 9.641851353006e-07, + "loss": 2.4579, + "step": 2800 + }, + { + "epoch": 174.07407407407408, + "learning_rate": 9.639096363413738e-07, + "loss": 2.4091, + "step": 2820 + }, + { + "epoch": 175.30864197530863, + "learning_rate": 9.636341373821476e-07, + "loss": 2.3802, + "step": 2840 + }, + { + "epoch": 176.54320987654322, + "learning_rate": 9.633586384229215e-07, + "loss": 2.3483, + "step": 2860 + }, + { + "epoch": 177.77777777777777, + "learning_rate": 9.630831394636953e-07, + "loss": 2.374, + "step": 2880 + }, + { + "epoch": 179.01234567901236, + "learning_rate": 9.628076405044692e-07, + "loss": 2.414, + "step": 2900 + }, + { + "epoch": 180.2469135802469, + "learning_rate": 9.62532141545243e-07, + "loss": 2.3971, + "step": 2920 + }, + { + "epoch": 181.4814814814815, + "learning_rate": 9.62256642586017e-07, + "loss": 2.3508, + "step": 2940 + }, + { + "epoch": 182.71604938271605, + "learning_rate": 9.619811436267908e-07, + "loss": 2.4072, + "step": 2960 + }, + { + "epoch": 183.9506172839506, + "learning_rate": 9.617056446675646e-07, + "loss": 2.3853, + "step": 2980 + }, + { + "epoch": 185.1851851851852, + "learning_rate": 9.614301457083383e-07, + "loss": 2.4256, + "step": 3000 + }, + { + "epoch": 186.41975308641975, + "learning_rate": 9.611546467491124e-07, + "loss": 2.3795, + "step": 3020 + }, + { + "epoch": 187.65432098765433, + "learning_rate": 9.60879147789886e-07, + "loss": 2.3763, + "step": 3040 + }, + { + "epoch": 188.88888888888889, + "learning_rate": 9.6060364883066e-07, + "loss": 2.3474, + "step": 3060 + }, + { + "epoch": 190.12345679012347, + "learning_rate": 9.603281498714337e-07, + "loss": 2.4074, + "step": 3080 + }, + { + "epoch": 191.35802469135803, + "learning_rate": 9.600526509122076e-07, + "loss": 2.3438, + "step": 3100 + }, + { + "epoch": 192.59259259259258, + "learning_rate": 9.597771519529814e-07, + "loss": 2.3209, + "step": 3120 + }, + { + "epoch": 193.82716049382717, + "learning_rate": 9.595016529937553e-07, + "loss": 2.3364, + "step": 3140 + }, + { + "epoch": 195.06172839506172, + "learning_rate": 9.592261540345292e-07, + "loss": 2.3624, + "step": 3160 + }, + { + "epoch": 196.2962962962963, + "learning_rate": 9.58950655075303e-07, + "loss": 2.3004, + "step": 3180 + }, + { + "epoch": 197.53086419753086, + "learning_rate": 9.586751561160769e-07, + "loss": 2.2904, + "step": 3200 + }, + { + "epoch": 198.76543209876544, + "learning_rate": 9.583996571568507e-07, + "loss": 2.2971, + "step": 3220 + }, + { + "epoch": 200.0, + "learning_rate": 9.581241581976246e-07, + "loss": 2.3364, + "step": 3240 + }, + { + "epoch": 201.23456790123456, + "learning_rate": 9.578486592383982e-07, + "loss": 2.3191, + "step": 3260 + }, + { + "epoch": 202.46913580246914, + "learning_rate": 9.575731602791723e-07, + "loss": 2.3663, + "step": 3280 + }, + { + "epoch": 203.7037037037037, + "learning_rate": 9.57297661319946e-07, + "loss": 2.3649, + "step": 3300 + }, + { + "epoch": 204.93827160493828, + "learning_rate": 9.5702216236072e-07, + "loss": 2.2762, + "step": 3320 + }, + { + "epoch": 206.17283950617283, + "learning_rate": 9.567466634014937e-07, + "loss": 2.315, + "step": 3340 + }, + { + "epoch": 207.40740740740742, + "learning_rate": 9.564711644422677e-07, + "loss": 2.2924, + "step": 3360 + }, + { + "epoch": 208.64197530864197, + "learning_rate": 9.561956654830414e-07, + "loss": 2.2383, + "step": 3380 + }, + { + "epoch": 209.87654320987653, + "learning_rate": 9.559201665238153e-07, + "loss": 2.2903, + "step": 3400 + }, + { + "epoch": 211.11111111111111, + "learning_rate": 9.556446675645891e-07, + "loss": 2.3423, + "step": 3420 + }, + { + "epoch": 212.34567901234567, + "learning_rate": 9.55369168605363e-07, + "loss": 2.2747, + "step": 3440 + }, + { + "epoch": 213.58024691358025, + "learning_rate": 9.550936696461368e-07, + "loss": 2.2997, + "step": 3460 + }, + { + "epoch": 214.8148148148148, + "learning_rate": 9.548181706869107e-07, + "loss": 2.2166, + "step": 3480 + }, + { + "epoch": 216.0493827160494, + "learning_rate": 9.545426717276845e-07, + "loss": 2.349, + "step": 3500 + }, + { + "epoch": 217.28395061728395, + "learning_rate": 9.542671727684582e-07, + "loss": 2.2522, + "step": 3520 + }, + { + "epoch": 218.5185185185185, + "learning_rate": 9.539916738092323e-07, + "loss": 2.2854, + "step": 3540 + }, + { + "epoch": 219.7530864197531, + "learning_rate": 9.537161748500061e-07, + "loss": 2.2806, + "step": 3560 + }, + { + "epoch": 220.98765432098764, + "learning_rate": 9.5344067589078e-07, + "loss": 2.233, + "step": 3580 + }, + { + "epoch": 222.22222222222223, + "learning_rate": 9.531651769315538e-07, + "loss": 2.2588, + "step": 3600 + }, + { + "epoch": 223.45679012345678, + "learning_rate": 9.528896779723277e-07, + "loss": 2.2729, + "step": 3620 + }, + { + "epoch": 224.69135802469137, + "learning_rate": 9.526141790131015e-07, + "loss": 2.2818, + "step": 3640 + }, + { + "epoch": 225.92592592592592, + "learning_rate": 9.523386800538753e-07, + "loss": 2.2291, + "step": 3660 + }, + { + "epoch": 227.1604938271605, + "learning_rate": 9.520631810946491e-07, + "loss": 2.2552, + "step": 3680 + }, + { + "epoch": 228.39506172839506, + "learning_rate": 9.51787682135423e-07, + "loss": 2.2028, + "step": 3700 + }, + { + "epoch": 229.62962962962962, + "learning_rate": 9.515121831761969e-07, + "loss": 2.1948, + "step": 3720 + }, + { + "epoch": 230.8641975308642, + "learning_rate": 9.512366842169707e-07, + "loss": 2.2981, + "step": 3740 + }, + { + "epoch": 232.09876543209876, + "learning_rate": 9.509611852577446e-07, + "loss": 2.2519, + "step": 3760 + }, + { + "epoch": 233.33333333333334, + "learning_rate": 9.506856862985184e-07, + "loss": 2.2159, + "step": 3780 + }, + { + "epoch": 234.5679012345679, + "learning_rate": 9.504101873392922e-07, + "loss": 2.2122, + "step": 3800 + }, + { + "epoch": 235.80246913580248, + "learning_rate": 9.501346883800661e-07, + "loss": 2.2165, + "step": 3820 + }, + { + "epoch": 237.03703703703704, + "learning_rate": 9.498591894208399e-07, + "loss": 2.2362, + "step": 3840 + }, + { + "epoch": 238.2716049382716, + "learning_rate": 9.495836904616138e-07, + "loss": 2.1995, + "step": 3860 + }, + { + "epoch": 239.50617283950618, + "learning_rate": 9.493081915023877e-07, + "loss": 2.248, + "step": 3880 + }, + { + "epoch": 240.74074074074073, + "learning_rate": 9.490326925431615e-07, + "loss": 2.1703, + "step": 3900 + }, + { + "epoch": 241.97530864197532, + "learning_rate": 9.487571935839353e-07, + "loss": 2.1987, + "step": 3920 + }, + { + "epoch": 243.20987654320987, + "learning_rate": 9.484816946247091e-07, + "loss": 2.2023, + "step": 3940 + }, + { + "epoch": 244.44444444444446, + "learning_rate": 9.48206195665483e-07, + "loss": 2.2292, + "step": 3960 + }, + { + "epoch": 245.679012345679, + "learning_rate": 9.479306967062568e-07, + "loss": 2.1746, + "step": 3980 + }, + { + "epoch": 246.91358024691357, + "learning_rate": 9.476551977470307e-07, + "loss": 2.1809, + "step": 4000 + }, + { + "epoch": 248.14814814814815, + "learning_rate": 9.473796987878046e-07, + "loss": 2.1631, + "step": 4020 + }, + { + "epoch": 249.3827160493827, + "learning_rate": 9.471041998285784e-07, + "loss": 2.1437, + "step": 4040 + }, + { + "epoch": 250.6172839506173, + "learning_rate": 9.468287008693522e-07, + "loss": 2.1719, + "step": 4060 + }, + { + "epoch": 251.85185185185185, + "learning_rate": 9.46553201910126e-07, + "loss": 2.1754, + "step": 4080 + }, + { + "epoch": 253.08641975308643, + "learning_rate": 9.462777029508999e-07, + "loss": 2.172, + "step": 4100 + }, + { + "epoch": 254.320987654321, + "learning_rate": 9.460022039916737e-07, + "loss": 2.2135, + "step": 4120 + }, + { + "epoch": 255.55555555555554, + "learning_rate": 9.457267050324476e-07, + "loss": 2.1143, + "step": 4140 + }, + { + "epoch": 256.7901234567901, + "learning_rate": 9.454512060732215e-07, + "loss": 2.1804, + "step": 4160 + }, + { + "epoch": 258.0246913580247, + "learning_rate": 9.451757071139952e-07, + "loss": 2.147, + "step": 4180 + }, + { + "epoch": 259.25925925925924, + "learning_rate": 9.449002081547691e-07, + "loss": 2.1618, + "step": 4200 + }, + { + "epoch": 260.4938271604938, + "learning_rate": 9.446247091955429e-07, + "loss": 2.1434, + "step": 4220 + }, + { + "epoch": 261.7283950617284, + "learning_rate": 9.443492102363168e-07, + "loss": 2.1535, + "step": 4240 + }, + { + "epoch": 262.962962962963, + "learning_rate": 9.440737112770907e-07, + "loss": 2.2116, + "step": 4260 + }, + { + "epoch": 264.1975308641975, + "learning_rate": 9.437982123178645e-07, + "loss": 2.1857, + "step": 4280 + }, + { + "epoch": 265.4320987654321, + "learning_rate": 9.435227133586384e-07, + "loss": 2.1337, + "step": 4300 + }, + { + "epoch": 266.6666666666667, + "learning_rate": 9.432472143994122e-07, + "loss": 2.1554, + "step": 4320 + }, + { + "epoch": 267.9012345679012, + "learning_rate": 9.429717154401861e-07, + "loss": 2.1565, + "step": 4340 + }, + { + "epoch": 269.1358024691358, + "learning_rate": 9.4269621648096e-07, + "loss": 2.168, + "step": 4360 + }, + { + "epoch": 270.3703703703704, + "learning_rate": 9.424207175217338e-07, + "loss": 2.1392, + "step": 4380 + }, + { + "epoch": 271.60493827160496, + "learning_rate": 9.421452185625077e-07, + "loss": 2.1726, + "step": 4400 + }, + { + "epoch": 272.8395061728395, + "learning_rate": 9.418697196032815e-07, + "loss": 2.135, + "step": 4420 + }, + { + "epoch": 274.0740740740741, + "learning_rate": 9.415942206440553e-07, + "loss": 2.1329, + "step": 4440 + }, + { + "epoch": 275.30864197530866, + "learning_rate": 9.413187216848291e-07, + "loss": 2.1334, + "step": 4460 + }, + { + "epoch": 276.5432098765432, + "learning_rate": 9.41043222725603e-07, + "loss": 2.1309, + "step": 4480 + }, + { + "epoch": 277.77777777777777, + "learning_rate": 9.407677237663769e-07, + "loss": 2.0872, + "step": 4500 + }, + { + "epoch": 279.01234567901236, + "learning_rate": 9.404922248071507e-07, + "loss": 2.1426, + "step": 4520 + }, + { + "epoch": 280.24691358024694, + "learning_rate": 9.402167258479246e-07, + "loss": 2.1331, + "step": 4540 + }, + { + "epoch": 281.48148148148147, + "learning_rate": 9.399412268886984e-07, + "loss": 2.0859, + "step": 4560 + }, + { + "epoch": 282.71604938271605, + "learning_rate": 9.396657279294723e-07, + "loss": 2.0755, + "step": 4580 + }, + { + "epoch": 283.95061728395063, + "learning_rate": 9.39390228970246e-07, + "loss": 2.1203, + "step": 4600 + }, + { + "epoch": 285.18518518518516, + "learning_rate": 9.391147300110199e-07, + "loss": 2.0846, + "step": 4620 + }, + { + "epoch": 286.41975308641975, + "learning_rate": 9.388392310517938e-07, + "loss": 2.1284, + "step": 4640 + }, + { + "epoch": 287.65432098765433, + "learning_rate": 9.385637320925677e-07, + "loss": 2.107, + "step": 4660 + }, + { + "epoch": 288.8888888888889, + "learning_rate": 9.382882331333415e-07, + "loss": 2.2206, + "step": 4680 + }, + { + "epoch": 290.12345679012344, + "learning_rate": 9.380127341741153e-07, + "loss": 2.2475, + "step": 4700 + }, + { + "epoch": 291.358024691358, + "learning_rate": 9.377372352148891e-07, + "loss": 2.1887, + "step": 4720 + }, + { + "epoch": 292.5925925925926, + "learning_rate": 9.37461736255663e-07, + "loss": 2.1352, + "step": 4740 + }, + { + "epoch": 293.82716049382714, + "learning_rate": 9.371862372964368e-07, + "loss": 2.1565, + "step": 4760 + }, + { + "epoch": 295.0617283950617, + "learning_rate": 9.369107383372107e-07, + "loss": 2.1574, + "step": 4780 + }, + { + "epoch": 296.2962962962963, + "learning_rate": 9.366352393779845e-07, + "loss": 2.1384, + "step": 4800 + }, + { + "epoch": 297.5308641975309, + "learning_rate": 9.363597404187584e-07, + "loss": 2.1534, + "step": 4820 + }, + { + "epoch": 298.7654320987654, + "learning_rate": 9.360842414595322e-07, + "loss": 2.0941, + "step": 4840 + }, + { + "epoch": 300.0, + "learning_rate": 9.35808742500306e-07, + "loss": 2.101, + "step": 4860 + }, + { + "epoch": 301.2345679012346, + "learning_rate": 9.355332435410799e-07, + "loss": 2.1181, + "step": 4880 + }, + { + "epoch": 302.4691358024691, + "learning_rate": 9.352577445818537e-07, + "loss": 2.0771, + "step": 4900 + }, + { + "epoch": 303.7037037037037, + "learning_rate": 9.349822456226276e-07, + "loss": 2.0886, + "step": 4920 + }, + { + "epoch": 304.9382716049383, + "learning_rate": 9.347067466634014e-07, + "loss": 2.0715, + "step": 4940 + }, + { + "epoch": 306.17283950617286, + "learning_rate": 9.344312477041753e-07, + "loss": 2.0883, + "step": 4960 + }, + { + "epoch": 307.4074074074074, + "learning_rate": 9.34155748744949e-07, + "loss": 2.0767, + "step": 4980 + }, + { + "epoch": 308.641975308642, + "learning_rate": 9.338802497857229e-07, + "loss": 2.0757, + "step": 5000 + }, + { + "epoch": 309.87654320987656, + "learning_rate": 9.336047508264968e-07, + "loss": 2.0775, + "step": 5020 + }, + { + "epoch": 311.1111111111111, + "learning_rate": 9.333292518672707e-07, + "loss": 2.0763, + "step": 5040 + }, + { + "epoch": 312.34567901234567, + "learning_rate": 9.330537529080446e-07, + "loss": 2.065, + "step": 5060 + }, + { + "epoch": 313.58024691358025, + "learning_rate": 9.327782539488184e-07, + "loss": 2.0996, + "step": 5080 + }, + { + "epoch": 314.81481481481484, + "learning_rate": 9.325027549895923e-07, + "loss": 2.0837, + "step": 5100 + }, + { + "epoch": 316.04938271604937, + "learning_rate": 9.322272560303662e-07, + "loss": 2.0985, + "step": 5120 + }, + { + "epoch": 317.28395061728395, + "learning_rate": 9.319517570711399e-07, + "loss": 2.0662, + "step": 5140 + }, + { + "epoch": 318.51851851851853, + "learning_rate": 9.316762581119138e-07, + "loss": 2.1054, + "step": 5160 + }, + { + "epoch": 319.75308641975306, + "learning_rate": 9.314007591526876e-07, + "loss": 2.0688, + "step": 5180 + }, + { + "epoch": 320.98765432098764, + "learning_rate": 9.311252601934615e-07, + "loss": 2.06, + "step": 5200 + }, + { + "epoch": 322.22222222222223, + "learning_rate": 9.308497612342353e-07, + "loss": 2.0608, + "step": 5220 + }, + { + "epoch": 323.4567901234568, + "learning_rate": 9.305742622750092e-07, + "loss": 2.0238, + "step": 5240 + }, + { + "epoch": 324.69135802469134, + "learning_rate": 9.30298763315783e-07, + "loss": 2.0672, + "step": 5260 + }, + { + "epoch": 325.9259259259259, + "learning_rate": 9.300232643565568e-07, + "loss": 2.0045, + "step": 5280 + }, + { + "epoch": 327.1604938271605, + "learning_rate": 9.297477653973307e-07, + "loss": 2.0297, + "step": 5300 + }, + { + "epoch": 328.39506172839504, + "learning_rate": 9.294722664381045e-07, + "loss": 2.0939, + "step": 5320 + }, + { + "epoch": 329.6296296296296, + "learning_rate": 9.291967674788784e-07, + "loss": 2.0309, + "step": 5340 + }, + { + "epoch": 330.8641975308642, + "learning_rate": 9.289212685196523e-07, + "loss": 2.0221, + "step": 5360 + }, + { + "epoch": 332.0987654320988, + "learning_rate": 9.286457695604261e-07, + "loss": 2.0629, + "step": 5380 + }, + { + "epoch": 333.3333333333333, + "learning_rate": 9.283702706011999e-07, + "loss": 2.0113, + "step": 5400 + }, + { + "epoch": 334.5679012345679, + "learning_rate": 9.280947716419737e-07, + "loss": 2.0351, + "step": 5420 + }, + { + "epoch": 335.8024691358025, + "learning_rate": 9.278192726827476e-07, + "loss": 2.0535, + "step": 5440 + }, + { + "epoch": 337.037037037037, + "learning_rate": 9.275437737235214e-07, + "loss": 2.0653, + "step": 5460 + }, + { + "epoch": 338.2716049382716, + "learning_rate": 9.272682747642953e-07, + "loss": 2.0103, + "step": 5480 + }, + { + "epoch": 339.5061728395062, + "learning_rate": 9.269927758050692e-07, + "loss": 2.0367, + "step": 5500 + }, + { + "epoch": 340.74074074074076, + "learning_rate": 9.267172768458429e-07, + "loss": 1.9869, + "step": 5520 + }, + { + "epoch": 341.9753086419753, + "learning_rate": 9.264417778866168e-07, + "loss": 2.0831, + "step": 5540 + }, + { + "epoch": 343.2098765432099, + "learning_rate": 9.261662789273906e-07, + "loss": 2.0198, + "step": 5560 + }, + { + "epoch": 344.44444444444446, + "learning_rate": 9.258907799681645e-07, + "loss": 2.0057, + "step": 5580 + }, + { + "epoch": 345.679012345679, + "learning_rate": 9.256152810089383e-07, + "loss": 1.9973, + "step": 5600 + }, + { + "epoch": 346.91358024691357, + "learning_rate": 9.253397820497122e-07, + "loss": 2.0431, + "step": 5620 + }, + { + "epoch": 348.14814814814815, + "learning_rate": 9.250642830904861e-07, + "loss": 2.0132, + "step": 5640 + }, + { + "epoch": 349.38271604938274, + "learning_rate": 9.247887841312599e-07, + "loss": 2.0589, + "step": 5660 + }, + { + "epoch": 350.61728395061726, + "learning_rate": 9.245132851720337e-07, + "loss": 1.9975, + "step": 5680 + }, + { + "epoch": 351.85185185185185, + "learning_rate": 9.242377862128075e-07, + "loss": 2.0196, + "step": 5700 + }, + { + "epoch": 353.08641975308643, + "learning_rate": 9.239622872535814e-07, + "loss": 1.9605, + "step": 5720 + }, + { + "epoch": 354.320987654321, + "learning_rate": 9.236867882943551e-07, + "loss": 2.0043, + "step": 5740 + }, + { + "epoch": 355.55555555555554, + "learning_rate": 9.234112893351291e-07, + "loss": 1.9835, + "step": 5760 + }, + { + "epoch": 356.7901234567901, + "learning_rate": 9.23135790375903e-07, + "loss": 2.0274, + "step": 5780 + }, + { + "epoch": 358.0246913580247, + "learning_rate": 9.228602914166768e-07, + "loss": 2.0303, + "step": 5800 + }, + { + "epoch": 359.25925925925924, + "learning_rate": 9.225847924574507e-07, + "loss": 1.9691, + "step": 5820 + }, + { + "epoch": 360.4938271604938, + "learning_rate": 9.223092934982245e-07, + "loss": 2.0049, + "step": 5840 + }, + { + "epoch": 361.7283950617284, + "learning_rate": 9.220337945389984e-07, + "loss": 1.9652, + "step": 5860 + }, + { + "epoch": 362.962962962963, + "learning_rate": 9.217582955797723e-07, + "loss": 2.019, + "step": 5880 + }, + { + "epoch": 364.1975308641975, + "learning_rate": 9.214827966205461e-07, + "loss": 1.9819, + "step": 5900 + }, + { + "epoch": 365.4320987654321, + "learning_rate": 9.2120729766132e-07, + "loss": 2.0201, + "step": 5920 + }, + { + "epoch": 366.6666666666667, + "learning_rate": 9.209317987020937e-07, + "loss": 1.9934, + "step": 5940 + }, + { + "epoch": 367.9012345679012, + "learning_rate": 9.206562997428676e-07, + "loss": 1.9566, + "step": 5960 + }, + { + "epoch": 369.1358024691358, + "learning_rate": 9.203808007836415e-07, + "loss": 1.9902, + "step": 5980 + }, + { + "epoch": 370.3703703703704, + "learning_rate": 9.201053018244152e-07, + "loss": 2.0059, + "step": 6000 + }, + { + "epoch": 371.60493827160496, + "learning_rate": 9.198298028651892e-07, + "loss": 1.988, + "step": 6020 + }, + { + "epoch": 372.8395061728395, + "learning_rate": 9.19554303905963e-07, + "loss": 1.9659, + "step": 6040 + }, + { + "epoch": 374.0740740740741, + "learning_rate": 9.192788049467368e-07, + "loss": 2.0319, + "step": 6060 + }, + { + "epoch": 375.30864197530866, + "learning_rate": 9.190033059875106e-07, + "loss": 1.9308, + "step": 6080 + }, + { + "epoch": 376.5432098765432, + "learning_rate": 9.187278070282845e-07, + "loss": 1.967, + "step": 6100 + }, + { + "epoch": 377.77777777777777, + "learning_rate": 9.184523080690584e-07, + "loss": 2.0184, + "step": 6120 + }, + { + "epoch": 379.01234567901236, + "learning_rate": 9.181768091098322e-07, + "loss": 1.9865, + "step": 6140 + }, + { + "epoch": 380.24691358024694, + "learning_rate": 9.179013101506061e-07, + "loss": 1.9415, + "step": 6160 + }, + { + "epoch": 381.48148148148147, + "learning_rate": 9.176258111913799e-07, + "loss": 1.9619, + "step": 6180 + }, + { + "epoch": 382.71604938271605, + "learning_rate": 9.173503122321538e-07, + "loss": 1.949, + "step": 6200 + }, + { + "epoch": 383.95061728395063, + "learning_rate": 9.170748132729275e-07, + "loss": 1.9467, + "step": 6220 + }, + { + "epoch": 385.18518518518516, + "learning_rate": 9.167993143137014e-07, + "loss": 1.9691, + "step": 6240 + }, + { + "epoch": 386.41975308641975, + "learning_rate": 9.165238153544753e-07, + "loss": 1.9643, + "step": 6260 + }, + { + "epoch": 387.65432098765433, + "learning_rate": 9.162483163952491e-07, + "loss": 1.942, + "step": 6280 + }, + { + "epoch": 388.8888888888889, + "learning_rate": 9.15972817436023e-07, + "loss": 1.9002, + "step": 6300 + }, + { + "epoch": 390.12345679012344, + "learning_rate": 9.156973184767968e-07, + "loss": 1.9965, + "step": 6320 + }, + { + "epoch": 391.358024691358, + "learning_rate": 9.154218195175706e-07, + "loss": 1.9343, + "step": 6340 + }, + { + "epoch": 392.5925925925926, + "learning_rate": 9.151463205583446e-07, + "loss": 1.931, + "step": 6360 + }, + { + "epoch": 393.82716049382714, + "learning_rate": 9.148708215991183e-07, + "loss": 1.9628, + "step": 6380 + }, + { + "epoch": 395.0617283950617, + "learning_rate": 9.145953226398922e-07, + "loss": 1.9752, + "step": 6400 + }, + { + "epoch": 396.2962962962963, + "learning_rate": 9.14319823680666e-07, + "loss": 1.9499, + "step": 6420 + }, + { + "epoch": 397.5308641975309, + "learning_rate": 9.140443247214399e-07, + "loss": 1.9546, + "step": 6440 + }, + { + "epoch": 398.7654320987654, + "learning_rate": 9.137688257622137e-07, + "loss": 1.8583, + "step": 6460 + }, + { + "epoch": 400.0, + "learning_rate": 9.134933268029875e-07, + "loss": 1.9405, + "step": 6480 + }, + { + "epoch": 401.2345679012346, + "learning_rate": 9.132178278437614e-07, + "loss": 1.941, + "step": 6500 + }, + { + "epoch": 402.4691358024691, + "learning_rate": 9.129423288845352e-07, + "loss": 1.9432, + "step": 6520 + }, + { + "epoch": 403.7037037037037, + "learning_rate": 9.126668299253092e-07, + "loss": 1.9603, + "step": 6540 + }, + { + "epoch": 404.9382716049383, + "learning_rate": 9.12391330966083e-07, + "loss": 1.9456, + "step": 6560 + }, + { + "epoch": 406.17283950617286, + "learning_rate": 9.121158320068569e-07, + "loss": 1.9488, + "step": 6580 + }, + { + "epoch": 407.4074074074074, + "learning_rate": 9.118403330476307e-07, + "loss": 1.9248, + "step": 6600 + }, + { + "epoch": 408.641975308642, + "learning_rate": 9.115648340884045e-07, + "loss": 1.8897, + "step": 6620 + }, + { + "epoch": 409.87654320987656, + "learning_rate": 9.112893351291784e-07, + "loss": 1.9315, + "step": 6640 + }, + { + "epoch": 411.1111111111111, + "learning_rate": 9.110138361699522e-07, + "loss": 1.9599, + "step": 6660 + }, + { + "epoch": 412.34567901234567, + "learning_rate": 9.107383372107261e-07, + "loss": 1.9441, + "step": 6680 + }, + { + "epoch": 413.58024691358025, + "learning_rate": 9.104628382515e-07, + "loss": 1.8974, + "step": 6700 + }, + { + "epoch": 414.81481481481484, + "learning_rate": 9.101873392922738e-07, + "loss": 1.9078, + "step": 6720 + }, + { + "epoch": 416.04938271604937, + "learning_rate": 9.099118403330477e-07, + "loss": 1.9587, + "step": 6740 + }, + { + "epoch": 417.28395061728395, + "learning_rate": 9.096363413738214e-07, + "loss": 2.0016, + "step": 6760 + }, + { + "epoch": 418.51851851851853, + "learning_rate": 9.093608424145953e-07, + "loss": 1.9557, + "step": 6780 + }, + { + "epoch": 419.75308641975306, + "learning_rate": 9.090853434553691e-07, + "loss": 1.9719, + "step": 6800 + }, + { + "epoch": 420.98765432098764, + "learning_rate": 9.08809844496143e-07, + "loss": 1.9779, + "step": 6820 + }, + { + "epoch": 422.22222222222223, + "learning_rate": 9.085343455369169e-07, + "loss": 1.9754, + "step": 6840 + }, + { + "epoch": 423.4567901234568, + "learning_rate": 9.082588465776907e-07, + "loss": 2.0078, + "step": 6860 + }, + { + "epoch": 424.69135802469134, + "learning_rate": 9.079833476184645e-07, + "loss": 1.9856, + "step": 6880 + }, + { + "epoch": 425.9259259259259, + "learning_rate": 9.077078486592383e-07, + "loss": 1.9698, + "step": 6900 + }, + { + "epoch": 427.1604938271605, + "learning_rate": 9.074323497000122e-07, + "loss": 1.9826, + "step": 6920 + }, + { + "epoch": 428.39506172839504, + "learning_rate": 9.07156850740786e-07, + "loss": 1.9513, + "step": 6940 + }, + { + "epoch": 429.6296296296296, + "learning_rate": 9.068813517815599e-07, + "loss": 1.93, + "step": 6960 + }, + { + "epoch": 430.8641975308642, + "learning_rate": 9.066058528223339e-07, + "loss": 1.9779, + "step": 6980 + }, + { + "epoch": 432.0987654320988, + "learning_rate": 9.063303538631076e-07, + "loss": 1.9937, + "step": 7000 + }, + { + "epoch": 433.3333333333333, + "learning_rate": 9.060548549038814e-07, + "loss": 1.955, + "step": 7020 + }, + { + "epoch": 434.5679012345679, + "learning_rate": 9.057793559446552e-07, + "loss": 1.9652, + "step": 7040 + }, + { + "epoch": 435.8024691358025, + "learning_rate": 9.055038569854291e-07, + "loss": 1.9568, + "step": 7060 + }, + { + "epoch": 437.037037037037, + "learning_rate": 9.05228358026203e-07, + "loss": 1.9787, + "step": 7080 + }, + { + "epoch": 438.2716049382716, + "learning_rate": 9.049528590669768e-07, + "loss": 1.9137, + "step": 7100 + }, + { + "epoch": 439.5061728395062, + "learning_rate": 9.046773601077507e-07, + "loss": 1.9314, + "step": 7120 + }, + { + "epoch": 440.74074074074076, + "learning_rate": 9.044018611485244e-07, + "loss": 1.9961, + "step": 7140 + }, + { + "epoch": 441.9753086419753, + "learning_rate": 9.041263621892983e-07, + "loss": 1.9303, + "step": 7160 + }, + { + "epoch": 443.2098765432099, + "learning_rate": 9.038508632300721e-07, + "loss": 1.982, + "step": 7180 + }, + { + "epoch": 444.44444444444446, + "learning_rate": 9.03575364270846e-07, + "loss": 1.9133, + "step": 7200 + }, + { + "epoch": 445.679012345679, + "learning_rate": 9.032998653116199e-07, + "loss": 1.9376, + "step": 7220 + }, + { + "epoch": 446.91358024691357, + "learning_rate": 9.030243663523937e-07, + "loss": 1.9254, + "step": 7240 + }, + { + "epoch": 448.14814814814815, + "learning_rate": 9.027488673931676e-07, + "loss": 1.9491, + "step": 7260 + }, + { + "epoch": 449.38271604938274, + "learning_rate": 9.024733684339414e-07, + "loss": 1.9455, + "step": 7280 + }, + { + "epoch": 450.61728395061726, + "learning_rate": 9.021978694747153e-07, + "loss": 1.9128, + "step": 7300 + }, + { + "epoch": 451.85185185185185, + "learning_rate": 9.019223705154892e-07, + "loss": 1.9006, + "step": 7320 + }, + { + "epoch": 453.08641975308643, + "learning_rate": 9.016468715562629e-07, + "loss": 1.9172, + "step": 7340 + }, + { + "epoch": 454.320987654321, + "learning_rate": 9.013713725970369e-07, + "loss": 1.9754, + "step": 7360 + }, + { + "epoch": 455.55555555555554, + "learning_rate": 9.010958736378107e-07, + "loss": 1.9759, + "step": 7380 + }, + { + "epoch": 456.7901234567901, + "learning_rate": 9.008203746785846e-07, + "loss": 1.9452, + "step": 7400 + }, + { + "epoch": 458.0246913580247, + "learning_rate": 9.005448757193583e-07, + "loss": 1.9618, + "step": 7420 + }, + { + "epoch": 459.25925925925924, + "learning_rate": 9.002693767601322e-07, + "loss": 1.9364, + "step": 7440 + }, + { + "epoch": 460.4938271604938, + "learning_rate": 8.999938778009061e-07, + "loss": 1.9513, + "step": 7460 + }, + { + "epoch": 461.7283950617284, + "learning_rate": 8.997183788416799e-07, + "loss": 1.9473, + "step": 7480 + }, + { + "epoch": 462.962962962963, + "learning_rate": 8.994428798824538e-07, + "loss": 1.9159, + "step": 7500 + }, + { + "epoch": 464.1975308641975, + "learning_rate": 8.991673809232276e-07, + "loss": 1.9195, + "step": 7520 + }, + { + "epoch": 465.4320987654321, + "learning_rate": 8.988918819640015e-07, + "loss": 1.9647, + "step": 7540 + }, + { + "epoch": 466.6666666666667, + "learning_rate": 8.986163830047752e-07, + "loss": 1.895, + "step": 7560 + }, + { + "epoch": 467.9012345679012, + "learning_rate": 8.983408840455491e-07, + "loss": 1.9408, + "step": 7580 + }, + { + "epoch": 469.1358024691358, + "learning_rate": 8.98065385086323e-07, + "loss": 1.978, + "step": 7600 + }, + { + "epoch": 470.3703703703704, + "learning_rate": 8.977898861270968e-07, + "loss": 1.9085, + "step": 7620 + }, + { + "epoch": 471.60493827160496, + "learning_rate": 8.975143871678707e-07, + "loss": 1.9833, + "step": 7640 + }, + { + "epoch": 472.8395061728395, + "learning_rate": 8.972388882086445e-07, + "loss": 1.9559, + "step": 7660 + }, + { + "epoch": 474.0740740740741, + "learning_rate": 8.969633892494183e-07, + "loss": 1.9333, + "step": 7680 + }, + { + "epoch": 475.30864197530866, + "learning_rate": 8.966878902901923e-07, + "loss": 1.9075, + "step": 7700 + }, + { + "epoch": 476.5432098765432, + "learning_rate": 8.96412391330966e-07, + "loss": 1.9343, + "step": 7720 + }, + { + "epoch": 477.77777777777777, + "learning_rate": 8.961368923717399e-07, + "loss": 1.9107, + "step": 7740 + }, + { + "epoch": 479.01234567901236, + "learning_rate": 8.958613934125137e-07, + "loss": 1.9344, + "step": 7760 + }, + { + "epoch": 480.24691358024694, + "learning_rate": 8.955858944532876e-07, + "loss": 1.9044, + "step": 7780 + }, + { + "epoch": 481.48148148148147, + "learning_rate": 8.953103954940614e-07, + "loss": 1.974, + "step": 7800 + }, + { + "epoch": 482.71604938271605, + "learning_rate": 8.950348965348353e-07, + "loss": 1.9241, + "step": 7820 + }, + { + "epoch": 483.95061728395063, + "learning_rate": 8.947593975756091e-07, + "loss": 1.9377, + "step": 7840 + }, + { + "epoch": 485.18518518518516, + "learning_rate": 8.944838986163829e-07, + "loss": 1.9326, + "step": 7860 + }, + { + "epoch": 486.41975308641975, + "learning_rate": 8.942083996571568e-07, + "loss": 1.9028, + "step": 7880 + }, + { + "epoch": 487.65432098765433, + "learning_rate": 8.939329006979306e-07, + "loss": 1.8872, + "step": 7900 + }, + { + "epoch": 488.8888888888889, + "learning_rate": 8.936574017387045e-07, + "loss": 1.904, + "step": 7920 + }, + { + "epoch": 490.12345679012344, + "learning_rate": 8.933819027794784e-07, + "loss": 1.8897, + "step": 7940 + }, + { + "epoch": 491.358024691358, + "learning_rate": 8.931064038202521e-07, + "loss": 1.9026, + "step": 7960 + }, + { + "epoch": 492.5925925925926, + "learning_rate": 8.92830904861026e-07, + "loss": 1.9287, + "step": 7980 + }, + { + "epoch": 493.82716049382714, + "learning_rate": 8.925554059017998e-07, + "loss": 1.8277, + "step": 8000 + }, + { + "epoch": 495.0617283950617, + "learning_rate": 8.922799069425738e-07, + "loss": 1.8716, + "step": 8020 + }, + { + "epoch": 496.2962962962963, + "learning_rate": 8.920044079833476e-07, + "loss": 1.8907, + "step": 8040 + }, + { + "epoch": 497.5308641975309, + "learning_rate": 8.917289090241215e-07, + "loss": 1.8754, + "step": 8060 + }, + { + "epoch": 498.7654320987654, + "learning_rate": 8.914534100648954e-07, + "loss": 1.8713, + "step": 8080 + }, + { + "epoch": 500.0, + "learning_rate": 8.911779111056691e-07, + "loss": 1.8645, + "step": 8100 + }, + { + "epoch": 501.2345679012346, + "learning_rate": 8.90902412146443e-07, + "loss": 1.896, + "step": 8120 + }, + { + "epoch": 502.4691358024691, + "learning_rate": 8.906269131872168e-07, + "loss": 1.8824, + "step": 8140 + }, + { + "epoch": 503.7037037037037, + "learning_rate": 8.903514142279907e-07, + "loss": 1.8612, + "step": 8160 + }, + { + "epoch": 504.9382716049383, + "learning_rate": 8.900759152687646e-07, + "loss": 1.8747, + "step": 8180 + }, + { + "epoch": 506.17283950617286, + "learning_rate": 8.898004163095384e-07, + "loss": 1.8882, + "step": 8200 + }, + { + "epoch": 507.4074074074074, + "learning_rate": 8.895249173503122e-07, + "loss": 1.8858, + "step": 8220 + }, + { + "epoch": 508.641975308642, + "learning_rate": 8.89249418391086e-07, + "loss": 1.8599, + "step": 8240 + }, + { + "epoch": 509.87654320987656, + "learning_rate": 8.889739194318599e-07, + "loss": 1.9073, + "step": 8260 + }, + { + "epoch": 511.1111111111111, + "learning_rate": 8.886984204726337e-07, + "loss": 1.9222, + "step": 8280 + }, + { + "epoch": 512.3456790123457, + "learning_rate": 8.884229215134076e-07, + "loss": 1.904, + "step": 8300 + }, + { + "epoch": 513.5802469135803, + "learning_rate": 8.881474225541814e-07, + "loss": 1.8662, + "step": 8320 + }, + { + "epoch": 514.8148148148148, + "learning_rate": 8.878719235949553e-07, + "loss": 1.8384, + "step": 8340 + }, + { + "epoch": 516.0493827160494, + "learning_rate": 8.875964246357292e-07, + "loss": 1.9238, + "step": 8360 + }, + { + "epoch": 517.283950617284, + "learning_rate": 8.873209256765029e-07, + "loss": 1.8626, + "step": 8380 + }, + { + "epoch": 518.5185185185185, + "learning_rate": 8.870454267172768e-07, + "loss": 1.9077, + "step": 8400 + }, + { + "epoch": 519.7530864197531, + "learning_rate": 8.867699277580506e-07, + "loss": 1.8783, + "step": 8420 + }, + { + "epoch": 520.9876543209876, + "learning_rate": 8.864944287988245e-07, + "loss": 1.8721, + "step": 8440 + }, + { + "epoch": 522.2222222222222, + "learning_rate": 8.862189298395984e-07, + "loss": 1.8961, + "step": 8460 + }, + { + "epoch": 523.4567901234568, + "learning_rate": 8.859434308803721e-07, + "loss": 1.8552, + "step": 8480 + }, + { + "epoch": 524.6913580246913, + "learning_rate": 8.85667931921146e-07, + "loss": 1.9263, + "step": 8500 + }, + { + "epoch": 525.925925925926, + "learning_rate": 8.853924329619198e-07, + "loss": 1.8674, + "step": 8520 + }, + { + "epoch": 527.1604938271605, + "learning_rate": 8.851169340026937e-07, + "loss": 1.8876, + "step": 8540 + }, + { + "epoch": 528.395061728395, + "learning_rate": 8.848414350434676e-07, + "loss": 1.8834, + "step": 8560 + }, + { + "epoch": 529.6296296296297, + "learning_rate": 8.845659360842414e-07, + "loss": 1.8467, + "step": 8580 + }, + { + "epoch": 530.8641975308642, + "learning_rate": 8.842904371250153e-07, + "loss": 1.8957, + "step": 8600 + }, + { + "epoch": 532.0987654320987, + "learning_rate": 8.840149381657891e-07, + "loss": 1.8911, + "step": 8620 + }, + { + "epoch": 533.3333333333334, + "learning_rate": 8.837394392065629e-07, + "loss": 1.8994, + "step": 8640 + }, + { + "epoch": 534.5679012345679, + "learning_rate": 8.834639402473367e-07, + "loss": 1.9276, + "step": 8660 + }, + { + "epoch": 535.8024691358024, + "learning_rate": 8.831884412881107e-07, + "loss": 1.8605, + "step": 8680 + }, + { + "epoch": 537.0370370370371, + "learning_rate": 8.829129423288845e-07, + "loss": 1.8733, + "step": 8700 + }, + { + "epoch": 538.2716049382716, + "learning_rate": 8.826374433696583e-07, + "loss": 1.9128, + "step": 8720 + }, + { + "epoch": 539.5061728395061, + "learning_rate": 8.823619444104322e-07, + "loss": 1.8563, + "step": 8740 + }, + { + "epoch": 540.7407407407408, + "learning_rate": 8.82086445451206e-07, + "loss": 1.8647, + "step": 8760 + }, + { + "epoch": 541.9753086419753, + "learning_rate": 8.818109464919799e-07, + "loss": 1.8478, + "step": 8780 + }, + { + "epoch": 543.2098765432099, + "learning_rate": 8.815354475327538e-07, + "loss": 1.877, + "step": 8800 + }, + { + "epoch": 544.4444444444445, + "learning_rate": 8.812599485735276e-07, + "loss": 1.8432, + "step": 8820 + }, + { + "epoch": 545.679012345679, + "learning_rate": 8.809844496143015e-07, + "loss": 1.8245, + "step": 8840 + }, + { + "epoch": 546.9135802469136, + "learning_rate": 8.807089506550753e-07, + "loss": 1.841, + "step": 8860 + }, + { + "epoch": 548.1481481481482, + "learning_rate": 8.804334516958492e-07, + "loss": 1.8609, + "step": 8880 + }, + { + "epoch": 549.3827160493827, + "learning_rate": 8.80157952736623e-07, + "loss": 1.8815, + "step": 8900 + }, + { + "epoch": 550.6172839506173, + "learning_rate": 8.798824537773968e-07, + "loss": 1.8866, + "step": 8920 + }, + { + "epoch": 551.8518518518518, + "learning_rate": 8.796069548181707e-07, + "loss": 1.8573, + "step": 8940 + }, + { + "epoch": 553.0864197530864, + "learning_rate": 8.793314558589445e-07, + "loss": 1.8837, + "step": 8960 + }, + { + "epoch": 554.320987654321, + "learning_rate": 8.790559568997184e-07, + "loss": 1.8875, + "step": 8980 + }, + { + "epoch": 555.5555555555555, + "learning_rate": 8.787804579404922e-07, + "loss": 1.934, + "step": 9000 + }, + { + "epoch": 556.7901234567901, + "learning_rate": 8.78504958981266e-07, + "loss": 1.9294, + "step": 9020 + }, + { + "epoch": 558.0246913580247, + "learning_rate": 8.782294600220397e-07, + "loss": 2.0228, + "step": 9040 + }, + { + "epoch": 559.2592592592592, + "learning_rate": 8.779539610628137e-07, + "loss": 2.0074, + "step": 9060 + }, + { + "epoch": 560.4938271604939, + "learning_rate": 8.776784621035876e-07, + "loss": 1.8819, + "step": 9080 + }, + { + "epoch": 561.7283950617284, + "learning_rate": 8.774029631443614e-07, + "loss": 1.8354, + "step": 9100 + }, + { + "epoch": 562.9629629629629, + "learning_rate": 8.771274641851353e-07, + "loss": 1.8683, + "step": 9120 + }, + { + "epoch": 564.1975308641976, + "learning_rate": 8.768519652259091e-07, + "loss": 1.8644, + "step": 9140 + }, + { + "epoch": 565.4320987654321, + "learning_rate": 8.76576466266683e-07, + "loss": 1.8822, + "step": 9160 + }, + { + "epoch": 566.6666666666666, + "learning_rate": 8.763009673074568e-07, + "loss": 1.8544, + "step": 9180 + }, + { + "epoch": 567.9012345679013, + "learning_rate": 8.760254683482306e-07, + "loss": 1.8507, + "step": 9200 + }, + { + "epoch": 569.1358024691358, + "learning_rate": 8.757499693890045e-07, + "loss": 1.8816, + "step": 9220 + }, + { + "epoch": 570.3703703703703, + "learning_rate": 8.754744704297783e-07, + "loss": 1.7993, + "step": 9240 + }, + { + "epoch": 571.604938271605, + "learning_rate": 8.751989714705522e-07, + "loss": 1.8231, + "step": 9260 + }, + { + "epoch": 572.8395061728395, + "learning_rate": 8.74923472511326e-07, + "loss": 1.8354, + "step": 9280 + }, + { + "epoch": 574.074074074074, + "learning_rate": 8.746479735520998e-07, + "loss": 1.8731, + "step": 9300 + }, + { + "epoch": 575.3086419753087, + "learning_rate": 8.743724745928737e-07, + "loss": 1.8377, + "step": 9320 + }, + { + "epoch": 576.5432098765432, + "learning_rate": 8.740969756336475e-07, + "loss": 1.8211, + "step": 9340 + }, + { + "epoch": 577.7777777777778, + "learning_rate": 8.738214766744214e-07, + "loss": 1.8321, + "step": 9360 + }, + { + "epoch": 579.0123456790124, + "learning_rate": 8.735459777151952e-07, + "loss": 1.8091, + "step": 9380 + }, + { + "epoch": 580.2469135802469, + "learning_rate": 8.732704787559691e-07, + "loss": 1.7772, + "step": 9400 + }, + { + "epoch": 581.4814814814815, + "learning_rate": 8.72994979796743e-07, + "loss": 1.7844, + "step": 9420 + }, + { + "epoch": 582.716049382716, + "learning_rate": 8.727194808375168e-07, + "loss": 1.7728, + "step": 9440 + }, + { + "epoch": 583.9506172839506, + "learning_rate": 8.724439818782906e-07, + "loss": 1.7777, + "step": 9460 + }, + { + "epoch": 585.1851851851852, + "learning_rate": 8.721684829190644e-07, + "loss": 1.7895, + "step": 9480 + }, + { + "epoch": 586.4197530864197, + "learning_rate": 8.718929839598383e-07, + "loss": 1.7949, + "step": 9500 + }, + { + "epoch": 587.6543209876543, + "learning_rate": 8.716174850006122e-07, + "loss": 1.7314, + "step": 9520 + }, + { + "epoch": 588.8888888888889, + "learning_rate": 8.713419860413861e-07, + "loss": 1.7573, + "step": 9540 + }, + { + "epoch": 590.1234567901234, + "learning_rate": 8.710664870821599e-07, + "loss": 1.7663, + "step": 9560 + }, + { + "epoch": 591.358024691358, + "learning_rate": 8.707909881229337e-07, + "loss": 1.8203, + "step": 9580 + }, + { + "epoch": 592.5925925925926, + "learning_rate": 8.705154891637076e-07, + "loss": 1.8092, + "step": 9600 + }, + { + "epoch": 593.8271604938271, + "learning_rate": 8.702399902044814e-07, + "loss": 1.7717, + "step": 9620 + }, + { + "epoch": 595.0617283950618, + "learning_rate": 8.699644912452553e-07, + "loss": 1.7992, + "step": 9640 + }, + { + "epoch": 596.2962962962963, + "learning_rate": 8.69688992286029e-07, + "loss": 1.7772, + "step": 9660 + }, + { + "epoch": 597.5308641975308, + "learning_rate": 8.69413493326803e-07, + "loss": 1.7884, + "step": 9680 + }, + { + "epoch": 598.7654320987655, + "learning_rate": 8.691379943675769e-07, + "loss": 1.7618, + "step": 9700 + }, + { + "epoch": 600.0, + "learning_rate": 8.688624954083506e-07, + "loss": 1.7789, + "step": 9720 + }, + { + "epoch": 601.2345679012345, + "learning_rate": 8.685869964491245e-07, + "loss": 1.7624, + "step": 9740 + }, + { + "epoch": 602.4691358024692, + "learning_rate": 8.683114974898983e-07, + "loss": 1.7588, + "step": 9760 + }, + { + "epoch": 603.7037037037037, + "learning_rate": 8.680359985306722e-07, + "loss": 1.793, + "step": 9780 + }, + { + "epoch": 604.9382716049382, + "learning_rate": 8.677604995714461e-07, + "loss": 1.7527, + "step": 9800 + }, + { + "epoch": 606.1728395061729, + "learning_rate": 8.674850006122198e-07, + "loss": 1.7472, + "step": 9820 + }, + { + "epoch": 607.4074074074074, + "learning_rate": 8.672095016529937e-07, + "loss": 1.8087, + "step": 9840 + }, + { + "epoch": 608.641975308642, + "learning_rate": 8.669340026937675e-07, + "loss": 1.7727, + "step": 9860 + }, + { + "epoch": 609.8765432098766, + "learning_rate": 8.666585037345414e-07, + "loss": 1.7672, + "step": 9880 + }, + { + "epoch": 611.1111111111111, + "learning_rate": 8.663830047753152e-07, + "loss": 1.7614, + "step": 9900 + }, + { + "epoch": 612.3456790123457, + "learning_rate": 8.661075058160891e-07, + "loss": 1.7775, + "step": 9920 + }, + { + "epoch": 613.5802469135803, + "learning_rate": 8.65832006856863e-07, + "loss": 1.7512, + "step": 9940 + }, + { + "epoch": 614.8148148148148, + "learning_rate": 8.655565078976368e-07, + "loss": 1.7858, + "step": 9960 + }, + { + "epoch": 616.0493827160494, + "learning_rate": 8.652810089384107e-07, + "loss": 1.8064, + "step": 9980 + }, + { + "epoch": 617.283950617284, + "learning_rate": 8.650055099791844e-07, + "loss": 1.7649, + "step": 10000 + }, + { + "epoch": 618.5185185185185, + "learning_rate": 8.647300110199584e-07, + "loss": 1.8184, + "step": 10020 + }, + { + "epoch": 619.7530864197531, + "learning_rate": 8.644545120607322e-07, + "loss": 1.7532, + "step": 10040 + }, + { + "epoch": 620.9876543209876, + "learning_rate": 8.64179013101506e-07, + "loss": 1.7751, + "step": 10060 + }, + { + "epoch": 622.2222222222222, + "learning_rate": 8.639035141422799e-07, + "loss": 1.7995, + "step": 10080 + }, + { + "epoch": 623.4567901234568, + "learning_rate": 8.636280151830536e-07, + "loss": 1.7737, + "step": 10100 + }, + { + "epoch": 624.6913580246913, + "learning_rate": 8.633525162238275e-07, + "loss": 1.7962, + "step": 10120 + }, + { + "epoch": 625.925925925926, + "learning_rate": 8.630770172646013e-07, + "loss": 1.8228, + "step": 10140 + }, + { + "epoch": 627.1604938271605, + "learning_rate": 8.628015183053752e-07, + "loss": 1.8136, + "step": 10160 + }, + { + "epoch": 628.395061728395, + "learning_rate": 8.625260193461491e-07, + "loss": 1.7708, + "step": 10180 + }, + { + "epoch": 629.6296296296297, + "learning_rate": 8.622505203869229e-07, + "loss": 1.7872, + "step": 10200 + }, + { + "epoch": 630.8641975308642, + "learning_rate": 8.619750214276968e-07, + "loss": 1.7869, + "step": 10220 + }, + { + "epoch": 632.0987654320987, + "learning_rate": 8.616995224684706e-07, + "loss": 1.8117, + "step": 10240 + }, + { + "epoch": 633.3333333333334, + "learning_rate": 8.614240235092445e-07, + "loss": 1.7404, + "step": 10260 + }, + { + "epoch": 634.5679012345679, + "learning_rate": 8.611485245500185e-07, + "loss": 1.7706, + "step": 10280 + }, + { + "epoch": 635.8024691358024, + "learning_rate": 8.608730255907922e-07, + "loss": 1.7781, + "step": 10300 + }, + { + "epoch": 637.0370370370371, + "learning_rate": 8.605975266315661e-07, + "loss": 1.798, + "step": 10320 + }, + { + "epoch": 638.2716049382716, + "learning_rate": 8.603220276723399e-07, + "loss": 1.7768, + "step": 10340 + }, + { + "epoch": 639.5061728395061, + "learning_rate": 8.600465287131137e-07, + "loss": 1.7392, + "step": 10360 + }, + { + "epoch": 640.7407407407408, + "learning_rate": 8.597710297538874e-07, + "loss": 1.801, + "step": 10380 + }, + { + "epoch": 641.9753086419753, + "learning_rate": 8.594955307946614e-07, + "loss": 1.8121, + "step": 10400 + }, + { + "epoch": 643.2098765432099, + "learning_rate": 8.592200318354353e-07, + "loss": 1.7606, + "step": 10420 + }, + { + "epoch": 644.4444444444445, + "learning_rate": 8.589445328762091e-07, + "loss": 1.8174, + "step": 10440 + }, + { + "epoch": 645.679012345679, + "learning_rate": 8.58669033916983e-07, + "loss": 1.7442, + "step": 10460 + }, + { + "epoch": 646.9135802469136, + "learning_rate": 8.583935349577568e-07, + "loss": 1.7462, + "step": 10480 + }, + { + "epoch": 648.1481481481482, + "learning_rate": 8.581180359985307e-07, + "loss": 1.7649, + "step": 10500 + }, + { + "epoch": 649.3827160493827, + "learning_rate": 8.578425370393046e-07, + "loss": 1.7519, + "step": 10520 + }, + { + "epoch": 650.6172839506173, + "learning_rate": 8.575670380800783e-07, + "loss": 1.7684, + "step": 10540 + }, + { + "epoch": 651.8518518518518, + "learning_rate": 8.572915391208522e-07, + "loss": 1.7459, + "step": 10560 + }, + { + "epoch": 653.0864197530864, + "learning_rate": 8.57016040161626e-07, + "loss": 1.7555, + "step": 10580 + }, + { + "epoch": 654.320987654321, + "learning_rate": 8.567405412023999e-07, + "loss": 1.7332, + "step": 10600 + }, + { + "epoch": 655.5555555555555, + "learning_rate": 8.564650422431737e-07, + "loss": 1.7313, + "step": 10620 + }, + { + "epoch": 656.7901234567901, + "learning_rate": 8.561895432839475e-07, + "loss": 1.7064, + "step": 10640 + }, + { + "epoch": 658.0246913580247, + "learning_rate": 8.559140443247214e-07, + "loss": 1.7381, + "step": 10660 + }, + { + "epoch": 659.2592592592592, + "learning_rate": 8.556385453654952e-07, + "loss": 1.7343, + "step": 10680 + }, + { + "epoch": 660.4938271604939, + "learning_rate": 8.553630464062691e-07, + "loss": 1.7368, + "step": 10700 + }, + { + "epoch": 661.7283950617284, + "learning_rate": 8.550875474470429e-07, + "loss": 1.727, + "step": 10720 + }, + { + "epoch": 662.9629629629629, + "learning_rate": 8.548120484878168e-07, + "loss": 1.7363, + "step": 10740 + }, + { + "epoch": 664.1975308641976, + "learning_rate": 8.545365495285906e-07, + "loss": 1.7531, + "step": 10760 + }, + { + "epoch": 665.4320987654321, + "learning_rate": 8.542610505693645e-07, + "loss": 1.7282, + "step": 10780 + }, + { + "epoch": 666.6666666666666, + "learning_rate": 8.539855516101383e-07, + "loss": 1.7276, + "step": 10800 + }, + { + "epoch": 667.9012345679013, + "learning_rate": 8.537100526509121e-07, + "loss": 1.7316, + "step": 10820 + }, + { + "epoch": 669.1358024691358, + "learning_rate": 8.53434553691686e-07, + "loss": 1.7049, + "step": 10840 + }, + { + "epoch": 670.3703703703703, + "learning_rate": 8.531590547324598e-07, + "loss": 1.7183, + "step": 10860 + }, + { + "epoch": 671.604938271605, + "learning_rate": 8.528835557732337e-07, + "loss": 1.7383, + "step": 10880 + }, + { + "epoch": 672.8395061728395, + "learning_rate": 8.526080568140074e-07, + "loss": 1.7376, + "step": 10900 + }, + { + "epoch": 674.074074074074, + "learning_rate": 8.523325578547813e-07, + "loss": 1.7909, + "step": 10920 + }, + { + "epoch": 675.3086419753087, + "learning_rate": 8.520570588955552e-07, + "loss": 1.7334, + "step": 10940 + }, + { + "epoch": 676.5432098765432, + "learning_rate": 8.51781559936329e-07, + "loss": 1.7561, + "step": 10960 + }, + { + "epoch": 677.7777777777778, + "learning_rate": 8.515060609771029e-07, + "loss": 1.6731, + "step": 10980 + }, + { + "epoch": 679.0123456790124, + "learning_rate": 8.512305620178768e-07, + "loss": 1.737, + "step": 11000 + }, + { + "epoch": 680.2469135802469, + "learning_rate": 8.509550630586507e-07, + "loss": 1.7584, + "step": 11020 + }, + { + "epoch": 681.4814814814815, + "learning_rate": 8.506795640994246e-07, + "loss": 1.7177, + "step": 11040 + }, + { + "epoch": 682.716049382716, + "learning_rate": 8.504040651401984e-07, + "loss": 1.7428, + "step": 11060 + }, + { + "epoch": 683.9506172839506, + "learning_rate": 8.501285661809722e-07, + "loss": 1.7631, + "step": 11080 + }, + { + "epoch": 685.1851851851852, + "learning_rate": 8.49853067221746e-07, + "loss": 1.7386, + "step": 11100 + }, + { + "epoch": 686.4197530864197, + "learning_rate": 8.495775682625199e-07, + "loss": 1.7397, + "step": 11120 + }, + { + "epoch": 687.6543209876543, + "learning_rate": 8.493020693032938e-07, + "loss": 1.748, + "step": 11140 + }, + { + "epoch": 688.8888888888889, + "learning_rate": 8.490265703440676e-07, + "loss": 1.7534, + "step": 11160 + }, + { + "epoch": 690.1234567901234, + "learning_rate": 8.487510713848414e-07, + "loss": 1.7246, + "step": 11180 + }, + { + "epoch": 691.358024691358, + "learning_rate": 8.484755724256152e-07, + "loss": 1.7368, + "step": 11200 + }, + { + "epoch": 692.5925925925926, + "learning_rate": 8.482000734663891e-07, + "loss": 1.7433, + "step": 11220 + }, + { + "epoch": 693.8271604938271, + "learning_rate": 8.479245745071629e-07, + "loss": 1.6926, + "step": 11240 + }, + { + "epoch": 695.0617283950618, + "learning_rate": 8.476490755479368e-07, + "loss": 1.7433, + "step": 11260 + }, + { + "epoch": 696.2962962962963, + "learning_rate": 8.473735765887107e-07, + "loss": 1.7399, + "step": 11280 + }, + { + "epoch": 697.5308641975308, + "learning_rate": 8.470980776294845e-07, + "loss": 1.7287, + "step": 11300 + }, + { + "epoch": 698.7654320987655, + "learning_rate": 8.468225786702584e-07, + "loss": 1.724, + "step": 11320 + }, + { + "epoch": 700.0, + "learning_rate": 8.465470797110321e-07, + "loss": 1.7264, + "step": 11340 + }, + { + "epoch": 701.2345679012345, + "learning_rate": 8.462715807518059e-07, + "loss": 1.6885, + "step": 11360 + }, + { + "epoch": 702.4691358024692, + "learning_rate": 8.459960817925798e-07, + "loss": 1.7409, + "step": 11380 + }, + { + "epoch": 703.7037037037037, + "learning_rate": 8.457205828333537e-07, + "loss": 1.7205, + "step": 11400 + }, + { + "epoch": 704.9382716049382, + "learning_rate": 8.454450838741276e-07, + "loss": 1.7151, + "step": 11420 + }, + { + "epoch": 706.1728395061729, + "learning_rate": 8.451695849149013e-07, + "loss": 1.7348, + "step": 11440 + }, + { + "epoch": 707.4074074074074, + "learning_rate": 8.448940859556752e-07, + "loss": 1.7653, + "step": 11460 + }, + { + "epoch": 708.641975308642, + "learning_rate": 8.44618586996449e-07, + "loss": 1.6979, + "step": 11480 + }, + { + "epoch": 709.8765432098766, + "learning_rate": 8.443430880372229e-07, + "loss": 1.7564, + "step": 11500 + }, + { + "epoch": 711.1111111111111, + "learning_rate": 8.440675890779968e-07, + "loss": 1.7528, + "step": 11520 + }, + { + "epoch": 712.3456790123457, + "learning_rate": 8.437920901187706e-07, + "loss": 1.7172, + "step": 11540 + }, + { + "epoch": 713.5802469135803, + "learning_rate": 8.435165911595445e-07, + "loss": 1.7624, + "step": 11560 + }, + { + "epoch": 714.8148148148148, + "learning_rate": 8.432410922003183e-07, + "loss": 1.7049, + "step": 11580 + }, + { + "epoch": 716.0493827160494, + "learning_rate": 8.429655932410922e-07, + "loss": 1.6822, + "step": 11600 + }, + { + "epoch": 717.283950617284, + "learning_rate": 8.426900942818659e-07, + "loss": 1.7359, + "step": 11620 + }, + { + "epoch": 718.5185185185185, + "learning_rate": 8.424145953226398e-07, + "loss": 1.6829, + "step": 11640 + }, + { + "epoch": 719.7530864197531, + "learning_rate": 8.421390963634137e-07, + "loss": 1.7477, + "step": 11660 + }, + { + "epoch": 720.9876543209876, + "learning_rate": 8.418635974041875e-07, + "loss": 1.7161, + "step": 11680 + }, + { + "epoch": 722.2222222222222, + "learning_rate": 8.415880984449614e-07, + "loss": 1.7509, + "step": 11700 + }, + { + "epoch": 723.4567901234568, + "learning_rate": 8.413125994857352e-07, + "loss": 1.7336, + "step": 11720 + }, + { + "epoch": 724.6913580246913, + "learning_rate": 8.410371005265091e-07, + "loss": 1.7179, + "step": 11740 + }, + { + "epoch": 725.925925925926, + "learning_rate": 8.40761601567283e-07, + "loss": 1.7154, + "step": 11760 + }, + { + "epoch": 727.1604938271605, + "learning_rate": 8.404861026080568e-07, + "loss": 1.7359, + "step": 11780 + }, + { + "epoch": 728.395061728395, + "learning_rate": 8.402106036488307e-07, + "loss": 1.7177, + "step": 11800 + }, + { + "epoch": 729.6296296296297, + "learning_rate": 8.399351046896045e-07, + "loss": 1.6956, + "step": 11820 + }, + { + "epoch": 730.8641975308642, + "learning_rate": 8.396596057303784e-07, + "loss": 1.7127, + "step": 11840 + }, + { + "epoch": 732.0987654320987, + "learning_rate": 8.393841067711522e-07, + "loss": 1.7155, + "step": 11860 + }, + { + "epoch": 733.3333333333334, + "learning_rate": 8.39108607811926e-07, + "loss": 1.7437, + "step": 11880 + }, + { + "epoch": 734.5679012345679, + "learning_rate": 8.388331088526999e-07, + "loss": 1.7857, + "step": 11900 + }, + { + "epoch": 735.8024691358024, + "learning_rate": 8.385576098934737e-07, + "loss": 1.7504, + "step": 11920 + }, + { + "epoch": 737.0370370370371, + "learning_rate": 8.382821109342476e-07, + "loss": 1.7573, + "step": 11940 + }, + { + "epoch": 738.2716049382716, + "learning_rate": 8.380066119750214e-07, + "loss": 1.7569, + "step": 11960 + }, + { + "epoch": 739.5061728395061, + "learning_rate": 8.377311130157952e-07, + "loss": 1.7422, + "step": 11980 + }, + { + "epoch": 740.7407407407408, + "learning_rate": 8.37455614056569e-07, + "loss": 1.7292, + "step": 12000 + }, + { + "epoch": 741.9753086419753, + "learning_rate": 8.371801150973429e-07, + "loss": 1.7548, + "step": 12020 + }, + { + "epoch": 743.2098765432099, + "learning_rate": 8.369046161381168e-07, + "loss": 1.7435, + "step": 12040 + }, + { + "epoch": 744.4444444444445, + "learning_rate": 8.366291171788906e-07, + "loss": 1.73, + "step": 12060 + }, + { + "epoch": 745.679012345679, + "learning_rate": 8.363536182196645e-07, + "loss": 1.7321, + "step": 12080 + }, + { + "epoch": 746.9135802469136, + "learning_rate": 8.360781192604383e-07, + "loss": 1.7358, + "step": 12100 + }, + { + "epoch": 748.1481481481482, + "learning_rate": 8.358026203012122e-07, + "loss": 1.7249, + "step": 12120 + }, + { + "epoch": 749.3827160493827, + "learning_rate": 8.355271213419861e-07, + "loss": 1.744, + "step": 12140 + }, + { + "epoch": 750.6172839506173, + "learning_rate": 8.352516223827598e-07, + "loss": 1.7205, + "step": 12160 + }, + { + "epoch": 751.8518518518518, + "learning_rate": 8.349761234235337e-07, + "loss": 1.7333, + "step": 12180 + }, + { + "epoch": 753.0864197530864, + "learning_rate": 8.347006244643075e-07, + "loss": 1.7223, + "step": 12200 + }, + { + "epoch": 754.320987654321, + "learning_rate": 8.344251255050814e-07, + "loss": 1.7959, + "step": 12220 + }, + { + "epoch": 755.5555555555555, + "learning_rate": 8.341496265458553e-07, + "loss": 1.7236, + "step": 12240 + }, + { + "epoch": 756.7901234567901, + "learning_rate": 8.33874127586629e-07, + "loss": 1.6977, + "step": 12260 + }, + { + "epoch": 758.0246913580247, + "learning_rate": 8.335986286274029e-07, + "loss": 1.7903, + "step": 12280 + }, + { + "epoch": 759.2592592592592, + "learning_rate": 8.333231296681767e-07, + "loss": 1.8314, + "step": 12300 + }, + { + "epoch": 760.4938271604939, + "learning_rate": 8.330476307089506e-07, + "loss": 1.7828, + "step": 12320 + }, + { + "epoch": 761.7283950617284, + "learning_rate": 8.327721317497245e-07, + "loss": 1.7989, + "step": 12340 + }, + { + "epoch": 762.9629629629629, + "learning_rate": 8.324966327904983e-07, + "loss": 1.8263, + "step": 12360 + }, + { + "epoch": 764.1975308641976, + "learning_rate": 8.322211338312722e-07, + "loss": 1.7722, + "step": 12380 + }, + { + "epoch": 765.4320987654321, + "learning_rate": 8.31945634872046e-07, + "loss": 1.7722, + "step": 12400 + }, + { + "epoch": 766.6666666666666, + "learning_rate": 8.316701359128198e-07, + "loss": 1.8189, + "step": 12420 + }, + { + "epoch": 767.9012345679013, + "learning_rate": 8.313946369535936e-07, + "loss": 1.8084, + "step": 12440 + }, + { + "epoch": 769.1358024691358, + "learning_rate": 8.311191379943675e-07, + "loss": 1.8459, + "step": 12460 + }, + { + "epoch": 770.3703703703703, + "learning_rate": 8.308436390351413e-07, + "loss": 1.8127, + "step": 12480 + }, + { + "epoch": 771.604938271605, + "learning_rate": 8.305681400759153e-07, + "loss": 1.8462, + "step": 12500 + }, + { + "epoch": 772.8395061728395, + "learning_rate": 8.302926411166891e-07, + "loss": 1.8975, + "step": 12520 + }, + { + "epoch": 774.074074074074, + "learning_rate": 8.300171421574629e-07, + "loss": 1.8836, + "step": 12540 + }, + { + "epoch": 775.3086419753087, + "learning_rate": 8.297416431982368e-07, + "loss": 1.8912, + "step": 12560 + }, + { + "epoch": 776.5432098765432, + "learning_rate": 8.294661442390106e-07, + "loss": 1.962, + "step": 12580 + }, + { + "epoch": 777.7777777777778, + "learning_rate": 8.291906452797846e-07, + "loss": 1.9474, + "step": 12600 + }, + { + "epoch": 779.0123456790124, + "learning_rate": 8.289151463205584e-07, + "loss": 1.9509, + "step": 12620 + }, + { + "epoch": 780.2469135802469, + "learning_rate": 8.286396473613322e-07, + "loss": 1.9723, + "step": 12640 + }, + { + "epoch": 781.4814814814815, + "learning_rate": 8.283641484021061e-07, + "loss": 2.0032, + "step": 12660 + }, + { + "epoch": 782.716049382716, + "learning_rate": 8.280886494428799e-07, + "loss": 2.0202, + "step": 12680 + }, + { + "epoch": 783.9506172839506, + "learning_rate": 8.278131504836536e-07, + "loss": 2.0187, + "step": 12700 + }, + { + "epoch": 785.1851851851852, + "learning_rate": 8.275376515244275e-07, + "loss": 2.0322, + "step": 12720 + }, + { + "epoch": 786.4197530864197, + "learning_rate": 8.272621525652014e-07, + "loss": 1.9868, + "step": 12740 + }, + { + "epoch": 787.6543209876543, + "learning_rate": 8.269866536059753e-07, + "loss": 2.0095, + "step": 12760 + }, + { + "epoch": 788.8888888888889, + "learning_rate": 8.267111546467491e-07, + "loss": 2.0626, + "step": 12780 + }, + { + "epoch": 790.1234567901234, + "learning_rate": 8.264356556875229e-07, + "loss": 1.9852, + "step": 12800 + }, + { + "epoch": 791.358024691358, + "learning_rate": 8.261601567282967e-07, + "loss": 2.1492, + "step": 12820 + }, + { + "epoch": 792.5925925925926, + "learning_rate": 8.258846577690706e-07, + "loss": 2.1298, + "step": 12840 + }, + { + "epoch": 793.8271604938271, + "learning_rate": 8.256091588098445e-07, + "loss": 2.0899, + "step": 12860 + }, + { + "epoch": 795.0617283950618, + "learning_rate": 8.253336598506183e-07, + "loss": 2.1285, + "step": 12880 + }, + { + "epoch": 796.2962962962963, + "learning_rate": 8.250581608913922e-07, + "loss": 2.1273, + "step": 12900 + }, + { + "epoch": 797.5308641975308, + "learning_rate": 8.24782661932166e-07, + "loss": 2.0783, + "step": 12920 + }, + { + "epoch": 798.7654320987655, + "learning_rate": 8.245071629729399e-07, + "loss": 2.0597, + "step": 12940 + }, + { + "epoch": 800.0, + "learning_rate": 8.242316640137136e-07, + "loss": 2.0814, + "step": 12960 + }, + { + "epoch": 801.2345679012345, + "learning_rate": 8.239561650544875e-07, + "loss": 2.0731, + "step": 12980 + }, + { + "epoch": 802.4691358024692, + "learning_rate": 8.236806660952614e-07, + "loss": 2.0179, + "step": 13000 + }, + { + "epoch": 803.7037037037037, + "learning_rate": 8.234051671360352e-07, + "loss": 2.056, + "step": 13020 + }, + { + "epoch": 804.9382716049382, + "learning_rate": 8.231296681768091e-07, + "loss": 1.9941, + "step": 13040 + }, + { + "epoch": 806.1728395061729, + "learning_rate": 8.228541692175829e-07, + "loss": 1.9576, + "step": 13060 + }, + { + "epoch": 807.4074074074074, + "learning_rate": 8.225786702583567e-07, + "loss": 1.9323, + "step": 13080 + }, + { + "epoch": 808.641975308642, + "learning_rate": 8.223031712991305e-07, + "loss": 1.9424, + "step": 13100 + }, + { + "epoch": 809.8765432098766, + "learning_rate": 8.220276723399044e-07, + "loss": 1.9416, + "step": 13120 + }, + { + "epoch": 811.1111111111111, + "learning_rate": 8.217521733806783e-07, + "loss": 1.9588, + "step": 13140 + }, + { + "epoch": 812.3456790123457, + "learning_rate": 8.214766744214521e-07, + "loss": 1.91, + "step": 13160 + }, + { + "epoch": 813.5802469135803, + "learning_rate": 8.21201175462226e-07, + "loss": 1.9721, + "step": 13180 + }, + { + "epoch": 814.8148148148148, + "learning_rate": 8.209256765029998e-07, + "loss": 1.912, + "step": 13200 + }, + { + "epoch": 816.0493827160494, + "learning_rate": 8.206501775437737e-07, + "loss": 1.9495, + "step": 13220 + }, + { + "epoch": 817.283950617284, + "learning_rate": 8.203746785845476e-07, + "loss": 1.9077, + "step": 13240 + }, + { + "epoch": 818.5185185185185, + "learning_rate": 8.200991796253214e-07, + "loss": 1.9338, + "step": 13260 + }, + { + "epoch": 819.7530864197531, + "learning_rate": 8.198236806660953e-07, + "loss": 1.9473, + "step": 13280 + }, + { + "epoch": 820.9876543209876, + "learning_rate": 8.195481817068691e-07, + "loss": 1.9889, + "step": 13300 + }, + { + "epoch": 822.2222222222222, + "learning_rate": 8.19272682747643e-07, + "loss": 1.9169, + "step": 13320 + }, + { + "epoch": 823.4567901234568, + "learning_rate": 8.189971837884167e-07, + "loss": 1.9493, + "step": 13340 + }, + { + "epoch": 824.6913580246913, + "learning_rate": 8.187216848291906e-07, + "loss": 1.9277, + "step": 13360 + }, + { + "epoch": 825.925925925926, + "learning_rate": 8.184461858699645e-07, + "loss": 1.8767, + "step": 13380 + }, + { + "epoch": 827.1604938271605, + "learning_rate": 8.181706869107383e-07, + "loss": 1.9362, + "step": 13400 + }, + { + "epoch": 828.395061728395, + "learning_rate": 8.178951879515122e-07, + "loss": 1.8912, + "step": 13420 + }, + { + "epoch": 829.6296296296297, + "learning_rate": 8.17619688992286e-07, + "loss": 1.9401, + "step": 13440 + }, + { + "epoch": 830.8641975308642, + "learning_rate": 8.173441900330599e-07, + "loss": 1.9745, + "step": 13460 + }, + { + "epoch": 832.0987654320987, + "learning_rate": 8.170686910738338e-07, + "loss": 1.9682, + "step": 13480 + }, + { + "epoch": 833.3333333333334, + "learning_rate": 8.167931921146075e-07, + "loss": 1.8886, + "step": 13500 + }, + { + "epoch": 834.5679012345679, + "learning_rate": 8.165176931553814e-07, + "loss": 1.858, + "step": 13520 + }, + { + "epoch": 835.8024691358024, + "learning_rate": 8.162421941961552e-07, + "loss": 1.8989, + "step": 13540 + }, + { + "epoch": 837.0370370370371, + "learning_rate": 8.159666952369291e-07, + "loss": 1.9077, + "step": 13560 + }, + { + "epoch": 838.2716049382716, + "learning_rate": 8.156911962777029e-07, + "loss": 1.8887, + "step": 13580 + }, + { + "epoch": 839.5061728395061, + "learning_rate": 8.154156973184767e-07, + "loss": 1.8853, + "step": 13600 + }, + { + "epoch": 840.7407407407408, + "learning_rate": 8.151401983592506e-07, + "loss": 1.8671, + "step": 13620 + }, + { + "epoch": 841.9753086419753, + "learning_rate": 8.148646994000244e-07, + "loss": 1.9438, + "step": 13640 + }, + { + "epoch": 843.2098765432099, + "learning_rate": 8.145892004407983e-07, + "loss": 1.9565, + "step": 13660 + }, + { + "epoch": 844.4444444444445, + "learning_rate": 8.14313701481572e-07, + "loss": 1.927, + "step": 13680 + }, + { + "epoch": 845.679012345679, + "learning_rate": 8.14038202522346e-07, + "loss": 1.9143, + "step": 13700 + }, + { + "epoch": 846.9135802469136, + "learning_rate": 8.137627035631199e-07, + "loss": 1.9466, + "step": 13720 + }, + { + "epoch": 848.1481481481482, + "learning_rate": 8.134872046038937e-07, + "loss": 1.9136, + "step": 13740 + }, + { + "epoch": 849.3827160493827, + "learning_rate": 8.132117056446676e-07, + "loss": 1.9526, + "step": 13760 + }, + { + "epoch": 850.6172839506173, + "learning_rate": 8.129362066854413e-07, + "loss": 1.9294, + "step": 13780 + }, + { + "epoch": 851.8518518518518, + "learning_rate": 8.126607077262152e-07, + "loss": 1.9162, + "step": 13800 + }, + { + "epoch": 853.0864197530864, + "learning_rate": 8.12385208766989e-07, + "loss": 1.9758, + "step": 13820 + }, + { + "epoch": 854.320987654321, + "learning_rate": 8.121097098077629e-07, + "loss": 1.9344, + "step": 13840 + }, + { + "epoch": 855.5555555555555, + "learning_rate": 8.118342108485368e-07, + "loss": 1.9263, + "step": 13860 + }, + { + "epoch": 856.7901234567901, + "learning_rate": 8.115587118893105e-07, + "loss": 1.9164, + "step": 13880 + }, + { + "epoch": 858.0246913580247, + "learning_rate": 8.112832129300844e-07, + "loss": 1.9291, + "step": 13900 + }, + { + "epoch": 859.2592592592592, + "learning_rate": 8.110077139708582e-07, + "loss": 1.9555, + "step": 13920 + }, + { + "epoch": 860.4938271604939, + "learning_rate": 8.107322150116321e-07, + "loss": 1.9301, + "step": 13940 + }, + { + "epoch": 861.7283950617284, + "learning_rate": 8.104567160524059e-07, + "loss": 1.9491, + "step": 13960 + }, + { + "epoch": 862.9629629629629, + "learning_rate": 8.101812170931799e-07, + "loss": 1.9694, + "step": 13980 + }, + { + "epoch": 864.1975308641976, + "learning_rate": 8.099057181339538e-07, + "loss": 1.9086, + "step": 14000 + }, + { + "epoch": 865.4320987654321, + "learning_rate": 8.096302191747276e-07, + "loss": 1.9431, + "step": 14020 + }, + { + "epoch": 866.6666666666666, + "learning_rate": 8.093547202155014e-07, + "loss": 1.9224, + "step": 14040 + }, + { + "epoch": 867.9012345679013, + "learning_rate": 8.090792212562752e-07, + "loss": 1.9335, + "step": 14060 + }, + { + "epoch": 869.1358024691358, + "learning_rate": 8.088037222970491e-07, + "loss": 1.9382, + "step": 14080 + }, + { + "epoch": 870.3703703703703, + "learning_rate": 8.08528223337823e-07, + "loss": 1.9268, + "step": 14100 + }, + { + "epoch": 871.604938271605, + "learning_rate": 8.082527243785968e-07, + "loss": 1.9787, + "step": 14120 + }, + { + "epoch": 872.8395061728395, + "learning_rate": 8.079772254193706e-07, + "loss": 1.9271, + "step": 14140 + }, + { + "epoch": 874.074074074074, + "learning_rate": 8.077017264601444e-07, + "loss": 1.9718, + "step": 14160 + }, + { + "epoch": 875.3086419753087, + "learning_rate": 8.074262275009183e-07, + "loss": 1.9799, + "step": 14180 + }, + { + "epoch": 876.5432098765432, + "learning_rate": 8.071507285416921e-07, + "loss": 1.9316, + "step": 14200 + }, + { + "epoch": 877.7777777777778, + "learning_rate": 8.06875229582466e-07, + "loss": 1.8673, + "step": 14220 + }, + { + "epoch": 879.0123456790124, + "learning_rate": 8.065997306232399e-07, + "loss": 1.9195, + "step": 14240 + }, + { + "epoch": 880.2469135802469, + "learning_rate": 8.063242316640137e-07, + "loss": 1.8892, + "step": 14260 + }, + { + "epoch": 881.4814814814815, + "learning_rate": 8.060487327047876e-07, + "loss": 1.9126, + "step": 14280 + }, + { + "epoch": 882.716049382716, + "learning_rate": 8.057732337455613e-07, + "loss": 1.8663, + "step": 14300 + }, + { + "epoch": 883.9506172839506, + "learning_rate": 8.054977347863352e-07, + "loss": 1.9217, + "step": 14320 + }, + { + "epoch": 885.1851851851852, + "learning_rate": 8.05222235827109e-07, + "loss": 1.9216, + "step": 14340 + }, + { + "epoch": 886.4197530864197, + "learning_rate": 8.049467368678829e-07, + "loss": 1.904, + "step": 14360 + }, + { + "epoch": 887.6543209876543, + "learning_rate": 8.046712379086568e-07, + "loss": 1.8645, + "step": 14380 + }, + { + "epoch": 888.8888888888889, + "learning_rate": 8.043957389494306e-07, + "loss": 1.8726, + "step": 14400 + }, + { + "epoch": 890.1234567901234, + "learning_rate": 8.041202399902044e-07, + "loss": 1.9218, + "step": 14420 + }, + { + "epoch": 891.358024691358, + "learning_rate": 8.038447410309782e-07, + "loss": 1.875, + "step": 14440 + }, + { + "epoch": 892.5925925925926, + "learning_rate": 8.035692420717521e-07, + "loss": 1.8218, + "step": 14460 + }, + { + "epoch": 893.8271604938271, + "learning_rate": 8.03293743112526e-07, + "loss": 1.8583, + "step": 14480 + }, + { + "epoch": 895.0617283950618, + "learning_rate": 8.030182441532998e-07, + "loss": 1.8696, + "step": 14500 + }, + { + "epoch": 896.2962962962963, + "learning_rate": 8.027427451940737e-07, + "loss": 1.883, + "step": 14520 + }, + { + "epoch": 897.5308641975308, + "learning_rate": 8.024672462348475e-07, + "loss": 1.9137, + "step": 14540 + }, + { + "epoch": 898.7654320987655, + "learning_rate": 8.021917472756214e-07, + "loss": 1.871, + "step": 14560 + }, + { + "epoch": 900.0, + "learning_rate": 8.019162483163951e-07, + "loss": 1.8984, + "step": 14580 + }, + { + "epoch": 901.2345679012345, + "learning_rate": 8.01640749357169e-07, + "loss": 1.8821, + "step": 14600 + }, + { + "epoch": 902.4691358024692, + "learning_rate": 8.013652503979429e-07, + "loss": 1.9334, + "step": 14620 + }, + { + "epoch": 903.7037037037037, + "learning_rate": 8.010897514387167e-07, + "loss": 1.9117, + "step": 14640 + }, + { + "epoch": 904.9382716049382, + "learning_rate": 8.008142524794907e-07, + "loss": 1.9062, + "step": 14660 + }, + { + "epoch": 906.1728395061729, + "learning_rate": 8.005387535202643e-07, + "loss": 1.8953, + "step": 14680 + }, + { + "epoch": 907.4074074074074, + "learning_rate": 8.002632545610383e-07, + "loss": 1.8603, + "step": 14700 + }, + { + "epoch": 908.641975308642, + "learning_rate": 7.999877556018122e-07, + "loss": 1.8817, + "step": 14720 + }, + { + "epoch": 909.8765432098766, + "learning_rate": 7.99712256642586e-07, + "loss": 1.8897, + "step": 14740 + }, + { + "epoch": 911.1111111111111, + "learning_rate": 7.994367576833599e-07, + "loss": 1.8911, + "step": 14760 + }, + { + "epoch": 912.3456790123457, + "learning_rate": 7.991612587241337e-07, + "loss": 1.9044, + "step": 14780 + }, + { + "epoch": 913.5802469135803, + "learning_rate": 7.988857597649076e-07, + "loss": 1.8823, + "step": 14800 + }, + { + "epoch": 914.8148148148148, + "learning_rate": 7.986102608056815e-07, + "loss": 1.8894, + "step": 14820 + }, + { + "epoch": 916.0493827160494, + "learning_rate": 7.983347618464552e-07, + "loss": 1.8829, + "step": 14840 + }, + { + "epoch": 917.283950617284, + "learning_rate": 7.980592628872291e-07, + "loss": 1.8715, + "step": 14860 + }, + { + "epoch": 918.5185185185185, + "learning_rate": 7.977837639280029e-07, + "loss": 1.8413, + "step": 14880 + }, + { + "epoch": 919.7530864197531, + "learning_rate": 7.975082649687768e-07, + "loss": 1.8672, + "step": 14900 + }, + { + "epoch": 920.9876543209876, + "learning_rate": 7.972327660095507e-07, + "loss": 1.9188, + "step": 14920 + }, + { + "epoch": 922.2222222222222, + "learning_rate": 7.969572670503245e-07, + "loss": 1.8789, + "step": 14940 + }, + { + "epoch": 923.4567901234568, + "learning_rate": 7.966817680910983e-07, + "loss": 1.8792, + "step": 14960 + }, + { + "epoch": 924.6913580246913, + "learning_rate": 7.964062691318721e-07, + "loss": 1.9256, + "step": 14980 + }, + { + "epoch": 925.925925925926, + "learning_rate": 7.96130770172646e-07, + "loss": 1.8604, + "step": 15000 + }, + { + "epoch": 927.1604938271605, + "learning_rate": 7.958552712134197e-07, + "loss": 1.8824, + "step": 15020 + }, + { + "epoch": 928.395061728395, + "learning_rate": 7.955797722541937e-07, + "loss": 1.845, + "step": 15040 + }, + { + "epoch": 929.6296296296297, + "learning_rate": 7.953042732949675e-07, + "loss": 1.8786, + "step": 15060 + }, + { + "epoch": 930.8641975308642, + "learning_rate": 7.950287743357414e-07, + "loss": 1.8718, + "step": 15080 + }, + { + "epoch": 932.0987654320987, + "learning_rate": 7.947532753765153e-07, + "loss": 1.8616, + "step": 15100 + }, + { + "epoch": 933.3333333333334, + "learning_rate": 7.94477776417289e-07, + "loss": 1.8385, + "step": 15120 + }, + { + "epoch": 934.5679012345679, + "learning_rate": 7.942022774580629e-07, + "loss": 1.8861, + "step": 15140 + }, + { + "epoch": 935.8024691358024, + "learning_rate": 7.939267784988367e-07, + "loss": 1.8108, + "step": 15160 + }, + { + "epoch": 937.0370370370371, + "learning_rate": 7.936512795396106e-07, + "loss": 1.8679, + "step": 15180 + }, + { + "epoch": 938.2716049382716, + "learning_rate": 7.933757805803845e-07, + "loss": 1.8664, + "step": 15200 + }, + { + "epoch": 939.5061728395061, + "learning_rate": 7.931002816211582e-07, + "loss": 1.8981, + "step": 15220 + }, + { + "epoch": 940.7407407407408, + "learning_rate": 7.928247826619321e-07, + "loss": 1.8679, + "step": 15240 + }, + { + "epoch": 941.9753086419753, + "learning_rate": 7.925492837027059e-07, + "loss": 1.8222, + "step": 15260 + }, + { + "epoch": 943.2098765432099, + "learning_rate": 7.922737847434798e-07, + "loss": 1.8878, + "step": 15280 + }, + { + "epoch": 944.4444444444445, + "learning_rate": 7.919982857842536e-07, + "loss": 1.9096, + "step": 15300 + }, + { + "epoch": 945.679012345679, + "learning_rate": 7.917227868250275e-07, + "loss": 1.9034, + "step": 15320 + }, + { + "epoch": 946.9135802469136, + "learning_rate": 7.914472878658014e-07, + "loss": 1.8802, + "step": 15340 + }, + { + "epoch": 948.1481481481482, + "learning_rate": 7.911717889065752e-07, + "loss": 1.935, + "step": 15360 + }, + { + "epoch": 949.3827160493827, + "learning_rate": 7.908962899473489e-07, + "loss": 1.8363, + "step": 15380 + }, + { + "epoch": 950.6172839506173, + "learning_rate": 7.906207909881228e-07, + "loss": 1.945, + "step": 15400 + }, + { + "epoch": 951.8518518518518, + "learning_rate": 7.903452920288967e-07, + "loss": 1.8431, + "step": 15420 + }, + { + "epoch": 953.0864197530864, + "learning_rate": 7.900697930696705e-07, + "loss": 1.9474, + "step": 15440 + }, + { + "epoch": 954.320987654321, + "learning_rate": 7.897942941104444e-07, + "loss": 1.8394, + "step": 15460 + }, + { + "epoch": 955.5555555555555, + "learning_rate": 7.895187951512183e-07, + "loss": 1.8496, + "step": 15480 + }, + { + "epoch": 956.7901234567901, + "learning_rate": 7.892432961919921e-07, + "loss": 1.8566, + "step": 15500 + }, + { + "epoch": 958.0246913580247, + "learning_rate": 7.88967797232766e-07, + "loss": 1.8255, + "step": 15520 + }, + { + "epoch": 959.2592592592592, + "learning_rate": 7.886922982735398e-07, + "loss": 1.8211, + "step": 15540 + }, + { + "epoch": 960.4938271604939, + "learning_rate": 7.884167993143137e-07, + "loss": 1.9098, + "step": 15560 + }, + { + "epoch": 961.7283950617284, + "learning_rate": 7.881413003550876e-07, + "loss": 1.8611, + "step": 15580 + }, + { + "epoch": 962.9629629629629, + "learning_rate": 7.878658013958614e-07, + "loss": 1.8489, + "step": 15600 + }, + { + "epoch": 964.1975308641976, + "learning_rate": 7.875903024366353e-07, + "loss": 1.8596, + "step": 15620 + }, + { + "epoch": 965.4320987654321, + "learning_rate": 7.873148034774091e-07, + "loss": 1.8228, + "step": 15640 + }, + { + "epoch": 966.6666666666666, + "learning_rate": 7.870393045181829e-07, + "loss": 1.8544, + "step": 15660 + }, + { + "epoch": 967.9012345679013, + "learning_rate": 7.867638055589567e-07, + "loss": 1.8485, + "step": 15680 + }, + { + "epoch": 969.1358024691358, + "learning_rate": 7.864883065997306e-07, + "loss": 1.8627, + "step": 15700 + }, + { + "epoch": 970.3703703703703, + "learning_rate": 7.862128076405045e-07, + "loss": 1.8513, + "step": 15720 + }, + { + "epoch": 971.604938271605, + "learning_rate": 7.859373086812783e-07, + "loss": 1.8237, + "step": 15740 + }, + { + "epoch": 972.8395061728395, + "learning_rate": 7.856618097220521e-07, + "loss": 1.8937, + "step": 15760 + }, + { + "epoch": 974.074074074074, + "learning_rate": 7.853863107628259e-07, + "loss": 1.8339, + "step": 15780 + }, + { + "epoch": 975.3086419753087, + "learning_rate": 7.851108118035998e-07, + "loss": 1.8441, + "step": 15800 + }, + { + "epoch": 976.5432098765432, + "learning_rate": 7.848353128443737e-07, + "loss": 1.8302, + "step": 15820 + }, + { + "epoch": 977.7777777777778, + "learning_rate": 7.845598138851475e-07, + "loss": 1.8206, + "step": 15840 + }, + { + "epoch": 979.0123456790124, + "learning_rate": 7.842843149259214e-07, + "loss": 1.8555, + "step": 15860 + }, + { + "epoch": 980.2469135802469, + "learning_rate": 7.840088159666952e-07, + "loss": 1.8069, + "step": 15880 + }, + { + "epoch": 981.4814814814815, + "learning_rate": 7.837333170074691e-07, + "loss": 1.8273, + "step": 15900 + }, + { + "epoch": 982.716049382716, + "learning_rate": 7.834578180482428e-07, + "loss": 1.7991, + "step": 15920 + }, + { + "epoch": 983.9506172839506, + "learning_rate": 7.831823190890167e-07, + "loss": 1.8335, + "step": 15940 + }, + { + "epoch": 985.1851851851852, + "learning_rate": 7.829068201297906e-07, + "loss": 1.8476, + "step": 15960 + }, + { + "epoch": 986.4197530864197, + "learning_rate": 7.826313211705644e-07, + "loss": 1.8351, + "step": 15980 + }, + { + "epoch": 987.6543209876543, + "learning_rate": 7.823558222113382e-07, + "loss": 1.8428, + "step": 16000 + }, + { + "epoch": 988.8888888888889, + "learning_rate": 7.82080323252112e-07, + "loss": 1.8176, + "step": 16020 + }, + { + "epoch": 990.1234567901234, + "learning_rate": 7.818048242928859e-07, + "loss": 1.83, + "step": 16040 + }, + { + "epoch": 991.358024691358, + "learning_rate": 7.815293253336597e-07, + "loss": 1.8648, + "step": 16060 + }, + { + "epoch": 992.5925925925926, + "learning_rate": 7.812538263744336e-07, + "loss": 1.8545, + "step": 16080 + }, + { + "epoch": 993.8271604938271, + "learning_rate": 7.809783274152075e-07, + "loss": 1.8217, + "step": 16100 + }, + { + "epoch": 995.0617283950618, + "learning_rate": 7.807028284559813e-07, + "loss": 1.7575, + "step": 16120 + }, + { + "epoch": 996.2962962962963, + "learning_rate": 7.804273294967552e-07, + "loss": 1.7868, + "step": 16140 + }, + { + "epoch": 997.5308641975308, + "learning_rate": 7.80151830537529e-07, + "loss": 1.8232, + "step": 16160 + }, + { + "epoch": 998.7654320987655, + "learning_rate": 7.798763315783029e-07, + "loss": 1.7882, + "step": 16180 + }, + { + "epoch": 1000.0, + "learning_rate": 7.796008326190768e-07, + "loss": 1.8071, + "step": 16200 + }, + { + "epoch": 1001.2345679012345, + "learning_rate": 7.793253336598506e-07, + "loss": 1.8153, + "step": 16220 + }, + { + "epoch": 1002.4691358024692, + "learning_rate": 7.790498347006245e-07, + "loss": 1.7843, + "step": 16240 + }, + { + "epoch": 1003.7037037037037, + "learning_rate": 7.787743357413984e-07, + "loss": 1.8265, + "step": 16260 + }, + { + "epoch": 1004.9382716049382, + "learning_rate": 7.784988367821722e-07, + "loss": 1.8141, + "step": 16280 + }, + { + "epoch": 1006.1728395061729, + "learning_rate": 7.78223337822946e-07, + "loss": 1.8397, + "step": 16300 + }, + { + "epoch": 1007.4074074074074, + "learning_rate": 7.779478388637198e-07, + "loss": 1.844, + "step": 16320 + }, + { + "epoch": 1008.641975308642, + "learning_rate": 7.776723399044937e-07, + "loss": 1.799, + "step": 16340 + }, + { + "epoch": 1009.8765432098766, + "learning_rate": 7.773968409452675e-07, + "loss": 1.8023, + "step": 16360 + }, + { + "epoch": 1011.1111111111111, + "learning_rate": 7.771213419860414e-07, + "loss": 1.8189, + "step": 16380 + }, + { + "epoch": 1012.3456790123457, + "learning_rate": 7.768458430268152e-07, + "loss": 1.7896, + "step": 16400 + }, + { + "epoch": 1013.5802469135803, + "learning_rate": 7.765703440675891e-07, + "loss": 1.7747, + "step": 16420 + }, + { + "epoch": 1014.8148148148148, + "learning_rate": 7.76294845108363e-07, + "loss": 1.8221, + "step": 16440 + }, + { + "epoch": 1016.0493827160494, + "learning_rate": 7.760193461491367e-07, + "loss": 1.8302, + "step": 16460 + }, + { + "epoch": 1017.283950617284, + "learning_rate": 7.757438471899106e-07, + "loss": 1.8115, + "step": 16480 + }, + { + "epoch": 1018.5185185185185, + "learning_rate": 7.754683482306844e-07, + "loss": 1.7948, + "step": 16500 + }, + { + "epoch": 1019.7530864197531, + "learning_rate": 7.751928492714583e-07, + "loss": 1.791, + "step": 16520 + }, + { + "epoch": 1020.9876543209876, + "learning_rate": 7.749173503122321e-07, + "loss": 1.8166, + "step": 16540 + }, + { + "epoch": 1022.2222222222222, + "learning_rate": 7.746418513530059e-07, + "loss": 1.8231, + "step": 16560 + }, + { + "epoch": 1023.4567901234568, + "learning_rate": 7.743663523937798e-07, + "loss": 1.7667, + "step": 16580 + }, + { + "epoch": 1024.6913580246915, + "learning_rate": 7.740908534345536e-07, + "loss": 1.8331, + "step": 16600 + }, + { + "epoch": 1025.9259259259259, + "learning_rate": 7.738153544753275e-07, + "loss": 1.8244, + "step": 16620 + }, + { + "epoch": 1027.1604938271605, + "learning_rate": 7.735398555161013e-07, + "loss": 1.8083, + "step": 16640 + }, + { + "epoch": 1028.3950617283951, + "learning_rate": 7.732643565568752e-07, + "loss": 1.9108, + "step": 16660 + }, + { + "epoch": 1029.6296296296296, + "learning_rate": 7.729888575976491e-07, + "loss": 1.8943, + "step": 16680 + }, + { + "epoch": 1030.8641975308642, + "learning_rate": 7.727133586384229e-07, + "loss": 1.8622, + "step": 16700 + }, + { + "epoch": 1032.0987654320988, + "learning_rate": 7.724378596791968e-07, + "loss": 1.8061, + "step": 16720 + }, + { + "epoch": 1033.3333333333333, + "learning_rate": 7.721623607199706e-07, + "loss": 1.8007, + "step": 16740 + }, + { + "epoch": 1034.567901234568, + "learning_rate": 7.718868617607444e-07, + "loss": 1.8463, + "step": 16760 + }, + { + "epoch": 1035.8024691358025, + "learning_rate": 7.716113628015182e-07, + "loss": 1.8533, + "step": 16780 + }, + { + "epoch": 1037.037037037037, + "learning_rate": 7.713358638422921e-07, + "loss": 1.8287, + "step": 16800 + }, + { + "epoch": 1038.2716049382716, + "learning_rate": 7.710603648830659e-07, + "loss": 1.8153, + "step": 16820 + }, + { + "epoch": 1039.5061728395062, + "learning_rate": 7.707848659238397e-07, + "loss": 1.7903, + "step": 16840 + }, + { + "epoch": 1040.7407407407406, + "learning_rate": 7.705093669646136e-07, + "loss": 1.8491, + "step": 16860 + }, + { + "epoch": 1041.9753086419753, + "learning_rate": 7.702338680053874e-07, + "loss": 1.8319, + "step": 16880 + }, + { + "epoch": 1043.20987654321, + "learning_rate": 7.699583690461613e-07, + "loss": 1.856, + "step": 16900 + }, + { + "epoch": 1044.4444444444443, + "learning_rate": 7.696828700869351e-07, + "loss": 1.8419, + "step": 16920 + }, + { + "epoch": 1045.679012345679, + "learning_rate": 7.69407371127709e-07, + "loss": 1.7753, + "step": 16940 + }, + { + "epoch": 1046.9135802469136, + "learning_rate": 7.69131872168483e-07, + "loss": 1.8129, + "step": 16960 + }, + { + "epoch": 1048.148148148148, + "learning_rate": 7.688563732092568e-07, + "loss": 1.7887, + "step": 16980 + }, + { + "epoch": 1049.3827160493827, + "learning_rate": 7.685808742500307e-07, + "loss": 1.8068, + "step": 17000 + }, + { + "epoch": 1050.6172839506173, + "learning_rate": 7.683053752908044e-07, + "loss": 1.8225, + "step": 17020 + }, + { + "epoch": 1051.851851851852, + "learning_rate": 7.680298763315783e-07, + "loss": 1.7596, + "step": 17040 + }, + { + "epoch": 1053.0864197530864, + "learning_rate": 7.677543773723522e-07, + "loss": 1.8343, + "step": 17060 + }, + { + "epoch": 1054.320987654321, + "learning_rate": 7.674788784131259e-07, + "loss": 1.7608, + "step": 17080 + }, + { + "epoch": 1055.5555555555557, + "learning_rate": 7.672033794538998e-07, + "loss": 1.8318, + "step": 17100 + }, + { + "epoch": 1056.79012345679, + "learning_rate": 7.669278804946736e-07, + "loss": 1.8261, + "step": 17120 + }, + { + "epoch": 1058.0246913580247, + "learning_rate": 7.666523815354475e-07, + "loss": 1.8249, + "step": 17140 + }, + { + "epoch": 1059.2592592592594, + "learning_rate": 7.663768825762213e-07, + "loss": 1.8174, + "step": 17160 + }, + { + "epoch": 1060.4938271604938, + "learning_rate": 7.661013836169952e-07, + "loss": 1.7851, + "step": 17180 + }, + { + "epoch": 1061.7283950617284, + "learning_rate": 7.658258846577691e-07, + "loss": 1.8058, + "step": 17200 + }, + { + "epoch": 1062.962962962963, + "learning_rate": 7.655503856985429e-07, + "loss": 1.8048, + "step": 17220 + }, + { + "epoch": 1064.1975308641975, + "learning_rate": 7.652748867393169e-07, + "loss": 1.7883, + "step": 17240 + }, + { + "epoch": 1065.432098765432, + "learning_rate": 7.649993877800906e-07, + "loss": 1.8055, + "step": 17260 + }, + { + "epoch": 1066.6666666666667, + "learning_rate": 7.647238888208645e-07, + "loss": 1.7948, + "step": 17280 + }, + { + "epoch": 1067.9012345679012, + "learning_rate": 7.644483898616383e-07, + "loss": 1.7925, + "step": 17300 + }, + { + "epoch": 1069.1358024691358, + "learning_rate": 7.641728909024121e-07, + "loss": 1.8294, + "step": 17320 + }, + { + "epoch": 1070.3703703703704, + "learning_rate": 7.638973919431859e-07, + "loss": 1.7673, + "step": 17340 + }, + { + "epoch": 1071.6049382716049, + "learning_rate": 7.636218929839597e-07, + "loss": 1.7872, + "step": 17360 + }, + { + "epoch": 1072.8395061728395, + "learning_rate": 7.633463940247336e-07, + "loss": 1.765, + "step": 17380 + }, + { + "epoch": 1074.0740740740741, + "learning_rate": 7.630708950655074e-07, + "loss": 1.7924, + "step": 17400 + }, + { + "epoch": 1075.3086419753085, + "learning_rate": 7.627953961062813e-07, + "loss": 1.7804, + "step": 17420 + }, + { + "epoch": 1076.5432098765432, + "learning_rate": 7.625198971470552e-07, + "loss": 1.7999, + "step": 17440 + }, + { + "epoch": 1077.7777777777778, + "learning_rate": 7.62244398187829e-07, + "loss": 1.8317, + "step": 17460 + }, + { + "epoch": 1079.0123456790122, + "learning_rate": 7.619688992286029e-07, + "loss": 1.7989, + "step": 17480 + }, + { + "epoch": 1080.2469135802469, + "learning_rate": 7.616934002693767e-07, + "loss": 1.8067, + "step": 17500 + }, + { + "epoch": 1081.4814814814815, + "learning_rate": 7.614179013101506e-07, + "loss": 1.798, + "step": 17520 + }, + { + "epoch": 1082.716049382716, + "learning_rate": 7.611424023509245e-07, + "loss": 1.7895, + "step": 17540 + }, + { + "epoch": 1083.9506172839506, + "learning_rate": 7.608669033916982e-07, + "loss": 1.7702, + "step": 17560 + }, + { + "epoch": 1085.1851851851852, + "learning_rate": 7.605914044324721e-07, + "loss": 1.7835, + "step": 17580 + }, + { + "epoch": 1086.4197530864199, + "learning_rate": 7.603159054732459e-07, + "loss": 1.7743, + "step": 17600 + }, + { + "epoch": 1087.6543209876543, + "learning_rate": 7.600404065140197e-07, + "loss": 1.835, + "step": 17620 + }, + { + "epoch": 1088.888888888889, + "learning_rate": 7.597649075547935e-07, + "loss": 1.7897, + "step": 17640 + }, + { + "epoch": 1090.1234567901236, + "learning_rate": 7.594894085955674e-07, + "loss": 1.7955, + "step": 17660 + }, + { + "epoch": 1091.358024691358, + "learning_rate": 7.592139096363414e-07, + "loss": 1.8113, + "step": 17680 + }, + { + "epoch": 1092.5925925925926, + "learning_rate": 7.589384106771152e-07, + "loss": 1.7714, + "step": 17700 + }, + { + "epoch": 1093.8271604938273, + "learning_rate": 7.586629117178891e-07, + "loss": 1.8433, + "step": 17720 + }, + { + "epoch": 1095.0617283950617, + "learning_rate": 7.583874127586629e-07, + "loss": 1.8022, + "step": 17740 + }, + { + "epoch": 1096.2962962962963, + "learning_rate": 7.581119137994368e-07, + "loss": 1.7671, + "step": 17760 + }, + { + "epoch": 1097.530864197531, + "learning_rate": 7.578364148402107e-07, + "loss": 1.7749, + "step": 17780 + }, + { + "epoch": 1098.7654320987654, + "learning_rate": 7.575609158809845e-07, + "loss": 1.7352, + "step": 17800 + }, + { + "epoch": 1100.0, + "learning_rate": 7.572854169217584e-07, + "loss": 1.7511, + "step": 17820 + }, + { + "epoch": 1101.2345679012346, + "learning_rate": 7.570099179625321e-07, + "loss": 1.7498, + "step": 17840 + }, + { + "epoch": 1102.469135802469, + "learning_rate": 7.56734419003306e-07, + "loss": 1.771, + "step": 17860 + }, + { + "epoch": 1103.7037037037037, + "learning_rate": 7.564589200440798e-07, + "loss": 1.7921, + "step": 17880 + }, + { + "epoch": 1104.9382716049383, + "learning_rate": 7.561834210848536e-07, + "loss": 1.7689, + "step": 17900 + }, + { + "epoch": 1106.1728395061727, + "learning_rate": 7.559079221256275e-07, + "loss": 1.7358, + "step": 17920 + }, + { + "epoch": 1107.4074074074074, + "learning_rate": 7.556324231664013e-07, + "loss": 1.7594, + "step": 17940 + }, + { + "epoch": 1108.641975308642, + "learning_rate": 7.553569242071753e-07, + "loss": 1.7568, + "step": 17960 + }, + { + "epoch": 1109.8765432098764, + "learning_rate": 7.55081425247949e-07, + "loss": 1.7872, + "step": 17980 + }, + { + "epoch": 1111.111111111111, + "learning_rate": 7.548059262887229e-07, + "loss": 1.8052, + "step": 18000 + }, + { + "epoch": 1112.3456790123457, + "learning_rate": 7.545304273294968e-07, + "loss": 1.757, + "step": 18020 + }, + { + "epoch": 1113.5802469135801, + "learning_rate": 7.542549283702706e-07, + "loss": 1.7756, + "step": 18040 + }, + { + "epoch": 1114.8148148148148, + "learning_rate": 7.539794294110445e-07, + "loss": 1.7773, + "step": 18060 + }, + { + "epoch": 1116.0493827160494, + "learning_rate": 7.537039304518183e-07, + "loss": 1.8416, + "step": 18080 + }, + { + "epoch": 1117.283950617284, + "learning_rate": 7.534284314925921e-07, + "loss": 1.7244, + "step": 18100 + }, + { + "epoch": 1118.5185185185185, + "learning_rate": 7.531529325333659e-07, + "loss": 1.7225, + "step": 18120 + }, + { + "epoch": 1119.7530864197531, + "learning_rate": 7.528774335741398e-07, + "loss": 1.7795, + "step": 18140 + }, + { + "epoch": 1120.9876543209878, + "learning_rate": 7.526019346149136e-07, + "loss": 1.768, + "step": 18160 + }, + { + "epoch": 1122.2222222222222, + "learning_rate": 7.523264356556874e-07, + "loss": 1.7673, + "step": 18180 + }, + { + "epoch": 1123.4567901234568, + "learning_rate": 7.520509366964613e-07, + "loss": 1.7372, + "step": 18200 + }, + { + "epoch": 1124.6913580246915, + "learning_rate": 7.517754377372351e-07, + "loss": 1.7272, + "step": 18220 + }, + { + "epoch": 1125.9259259259259, + "learning_rate": 7.51499938778009e-07, + "loss": 1.7696, + "step": 18240 + }, + { + "epoch": 1127.1604938271605, + "learning_rate": 7.512244398187828e-07, + "loss": 1.819, + "step": 18260 + }, + { + "epoch": 1128.3950617283951, + "learning_rate": 7.509489408595567e-07, + "loss": 1.7688, + "step": 18280 + }, + { + "epoch": 1129.6296296296296, + "learning_rate": 7.506734419003306e-07, + "loss": 1.7766, + "step": 18300 + }, + { + "epoch": 1130.8641975308642, + "learning_rate": 7.503979429411043e-07, + "loss": 1.7581, + "step": 18320 + }, + { + "epoch": 1132.0987654320988, + "learning_rate": 7.501224439818783e-07, + "loss": 1.7735, + "step": 18340 + }, + { + "epoch": 1133.3333333333333, + "learning_rate": 7.498469450226521e-07, + "loss": 1.7485, + "step": 18360 + }, + { + "epoch": 1134.567901234568, + "learning_rate": 7.495714460634259e-07, + "loss": 1.7472, + "step": 18380 + }, + { + "epoch": 1135.8024691358025, + "learning_rate": 7.492959471041998e-07, + "loss": 1.749, + "step": 18400 + }, + { + "epoch": 1137.037037037037, + "learning_rate": 7.490204481449736e-07, + "loss": 1.7946, + "step": 18420 + }, + { + "epoch": 1138.2716049382716, + "learning_rate": 7.487449491857475e-07, + "loss": 1.7932, + "step": 18440 + }, + { + "epoch": 1139.5061728395062, + "learning_rate": 7.484694502265213e-07, + "loss": 1.7318, + "step": 18460 + }, + { + "epoch": 1140.7407407407406, + "learning_rate": 7.481939512672952e-07, + "loss": 1.7591, + "step": 18480 + }, + { + "epoch": 1141.9753086419753, + "learning_rate": 7.47918452308069e-07, + "loss": 1.7951, + "step": 18500 + }, + { + "epoch": 1143.20987654321, + "learning_rate": 7.476429533488429e-07, + "loss": 1.7578, + "step": 18520 + }, + { + "epoch": 1144.4444444444443, + "learning_rate": 7.473674543896168e-07, + "loss": 1.7653, + "step": 18540 + }, + { + "epoch": 1145.679012345679, + "learning_rate": 7.470919554303906e-07, + "loss": 1.7731, + "step": 18560 + }, + { + "epoch": 1146.9135802469136, + "learning_rate": 7.468164564711646e-07, + "loss": 1.7214, + "step": 18580 + }, + { + "epoch": 1148.148148148148, + "learning_rate": 7.465409575119383e-07, + "loss": 1.775, + "step": 18600 + }, + { + "epoch": 1149.3827160493827, + "learning_rate": 7.462654585527122e-07, + "loss": 1.7218, + "step": 18620 + }, + { + "epoch": 1150.6172839506173, + "learning_rate": 7.45989959593486e-07, + "loss": 1.7347, + "step": 18640 + }, + { + "epoch": 1151.851851851852, + "learning_rate": 7.457144606342598e-07, + "loss": 1.7236, + "step": 18660 + }, + { + "epoch": 1153.0864197530864, + "learning_rate": 7.454389616750337e-07, + "loss": 1.7785, + "step": 18680 + }, + { + "epoch": 1154.320987654321, + "learning_rate": 7.451634627158074e-07, + "loss": 1.7863, + "step": 18700 + }, + { + "epoch": 1155.5555555555557, + "learning_rate": 7.448879637565813e-07, + "loss": 1.7431, + "step": 18720 + }, + { + "epoch": 1156.79012345679, + "learning_rate": 7.446124647973551e-07, + "loss": 1.8058, + "step": 18740 + }, + { + "epoch": 1158.0246913580247, + "learning_rate": 7.44336965838129e-07, + "loss": 1.7335, + "step": 18760 + }, + { + "epoch": 1159.2592592592594, + "learning_rate": 7.440614668789029e-07, + "loss": 1.7839, + "step": 18780 + }, + { + "epoch": 1160.4938271604938, + "learning_rate": 7.437859679196767e-07, + "loss": 1.7525, + "step": 18800 + }, + { + "epoch": 1161.7283950617284, + "learning_rate": 7.435104689604506e-07, + "loss": 1.7545, + "step": 18820 + }, + { + "epoch": 1162.962962962963, + "learning_rate": 7.432349700012244e-07, + "loss": 1.7494, + "step": 18840 + }, + { + "epoch": 1164.1975308641975, + "learning_rate": 7.429594710419983e-07, + "loss": 1.7774, + "step": 18860 + }, + { + "epoch": 1165.432098765432, + "learning_rate": 7.426839720827722e-07, + "loss": 1.7327, + "step": 18880 + }, + { + "epoch": 1166.6666666666667, + "learning_rate": 7.424084731235459e-07, + "loss": 1.7474, + "step": 18900 + }, + { + "epoch": 1167.9012345679012, + "learning_rate": 7.421329741643198e-07, + "loss": 1.7702, + "step": 18920 + }, + { + "epoch": 1169.1358024691358, + "learning_rate": 7.418574752050936e-07, + "loss": 1.7782, + "step": 18940 + }, + { + "epoch": 1170.3703703703704, + "learning_rate": 7.415819762458675e-07, + "loss": 1.7498, + "step": 18960 + }, + { + "epoch": 1171.6049382716049, + "learning_rate": 7.413064772866412e-07, + "loss": 1.7764, + "step": 18980 + }, + { + "epoch": 1172.8395061728395, + "learning_rate": 7.410309783274151e-07, + "loss": 1.6855, + "step": 19000 + }, + { + "epoch": 1174.0740740740741, + "learning_rate": 7.40755479368189e-07, + "loss": 1.764, + "step": 19020 + }, + { + "epoch": 1175.3086419753085, + "learning_rate": 7.404799804089627e-07, + "loss": 1.7617, + "step": 19040 + }, + { + "epoch": 1176.5432098765432, + "learning_rate": 7.402044814497367e-07, + "loss": 1.7556, + "step": 19060 + }, + { + "epoch": 1177.7777777777778, + "learning_rate": 7.399289824905105e-07, + "loss": 1.7351, + "step": 19080 + }, + { + "epoch": 1179.0123456790122, + "learning_rate": 7.396534835312844e-07, + "loss": 1.7619, + "step": 19100 + }, + { + "epoch": 1180.2469135802469, + "learning_rate": 7.393779845720582e-07, + "loss": 1.7406, + "step": 19120 + }, + { + "epoch": 1181.4814814814815, + "learning_rate": 7.391024856128321e-07, + "loss": 1.7334, + "step": 19140 + }, + { + "epoch": 1182.716049382716, + "learning_rate": 7.38826986653606e-07, + "loss": 1.7695, + "step": 19160 + }, + { + "epoch": 1183.9506172839506, + "learning_rate": 7.385514876943798e-07, + "loss": 1.7869, + "step": 19180 + }, + { + "epoch": 1185.1851851851852, + "learning_rate": 7.382759887351537e-07, + "loss": 1.7892, + "step": 19200 + }, + { + "epoch": 1186.4197530864199, + "learning_rate": 7.380004897759275e-07, + "loss": 1.7485, + "step": 19220 + }, + { + "epoch": 1187.6543209876543, + "learning_rate": 7.377249908167013e-07, + "loss": 1.7163, + "step": 19240 + }, + { + "epoch": 1188.888888888889, + "learning_rate": 7.374494918574752e-07, + "loss": 1.7288, + "step": 19260 + }, + { + "epoch": 1190.1234567901236, + "learning_rate": 7.37173992898249e-07, + "loss": 1.8203, + "step": 19280 + }, + { + "epoch": 1191.358024691358, + "learning_rate": 7.36898493939023e-07, + "loss": 1.7354, + "step": 19300 + }, + { + "epoch": 1192.5925925925926, + "learning_rate": 7.366229949797967e-07, + "loss": 1.7581, + "step": 19320 + }, + { + "epoch": 1193.8271604938273, + "learning_rate": 7.363474960205706e-07, + "loss": 1.7342, + "step": 19340 + }, + { + "epoch": 1195.0617283950617, + "learning_rate": 7.360719970613444e-07, + "loss": 1.7549, + "step": 19360 + }, + { + "epoch": 1196.2962962962963, + "learning_rate": 7.357964981021183e-07, + "loss": 1.7604, + "step": 19380 + }, + { + "epoch": 1197.530864197531, + "learning_rate": 7.355209991428922e-07, + "loss": 1.7603, + "step": 19400 + }, + { + "epoch": 1198.7654320987654, + "learning_rate": 7.35245500183666e-07, + "loss": 1.7071, + "step": 19420 + }, + { + "epoch": 1200.0, + "learning_rate": 7.349700012244398e-07, + "loss": 1.7339, + "step": 19440 + }, + { + "epoch": 1201.2345679012346, + "learning_rate": 7.346945022652136e-07, + "loss": 1.7343, + "step": 19460 + }, + { + "epoch": 1202.469135802469, + "learning_rate": 7.344190033059875e-07, + "loss": 1.7962, + "step": 19480 + }, + { + "epoch": 1203.7037037037037, + "learning_rate": 7.341435043467614e-07, + "loss": 1.7555, + "step": 19500 + }, + { + "epoch": 1204.9382716049383, + "learning_rate": 7.338680053875351e-07, + "loss": 1.7909, + "step": 19520 + }, + { + "epoch": 1206.1728395061727, + "learning_rate": 7.33592506428309e-07, + "loss": 1.7417, + "step": 19540 + }, + { + "epoch": 1207.4074074074074, + "learning_rate": 7.333170074690828e-07, + "loss": 1.7785, + "step": 19560 + }, + { + "epoch": 1208.641975308642, + "learning_rate": 7.330415085098567e-07, + "loss": 1.7561, + "step": 19580 + }, + { + "epoch": 1209.8765432098764, + "learning_rate": 7.327660095506305e-07, + "loss": 1.7791, + "step": 19600 + }, + { + "epoch": 1211.111111111111, + "learning_rate": 7.324905105914044e-07, + "loss": 1.7881, + "step": 19620 + }, + { + "epoch": 1212.3456790123457, + "learning_rate": 7.322150116321783e-07, + "loss": 1.8316, + "step": 19640 + }, + { + "epoch": 1213.5802469135801, + "learning_rate": 7.31939512672952e-07, + "loss": 2.3297, + "step": 19660 + }, + { + "epoch": 1214.8148148148148, + "learning_rate": 7.31664013713726e-07, + "loss": 3.0734, + "step": 19680 + }, + { + "epoch": 1216.0493827160494, + "learning_rate": 7.313885147544998e-07, + "loss": 3.3818, + "step": 19700 + }, + { + "epoch": 1217.283950617284, + "learning_rate": 7.311130157952736e-07, + "loss": 3.4044, + "step": 19720 + }, + { + "epoch": 1218.5185185185185, + "learning_rate": 7.308375168360474e-07, + "loss": 3.3076, + "step": 19740 + }, + { + "epoch": 1219.7530864197531, + "learning_rate": 7.305620178768213e-07, + "loss": 3.3052, + "step": 19760 + }, + { + "epoch": 1220.9876543209878, + "learning_rate": 7.302865189175951e-07, + "loss": 3.2461, + "step": 19780 + }, + { + "epoch": 1222.2222222222222, + "learning_rate": 7.300110199583689e-07, + "loss": 3.2139, + "step": 19800 + }, + { + "epoch": 1223.4567901234568, + "learning_rate": 7.297355209991428e-07, + "loss": 3.2006, + "step": 19820 + }, + { + "epoch": 1224.6913580246915, + "learning_rate": 7.294600220399166e-07, + "loss": 3.2578, + "step": 19840 + }, + { + "epoch": 1225.9259259259259, + "learning_rate": 7.291845230806905e-07, + "loss": 3.1139, + "step": 19860 + }, + { + "epoch": 1227.1604938271605, + "learning_rate": 7.289090241214644e-07, + "loss": 3.1, + "step": 19880 + }, + { + "epoch": 1228.3950617283951, + "learning_rate": 7.286335251622382e-07, + "loss": 3.158, + "step": 19900 + }, + { + "epoch": 1229.6296296296296, + "learning_rate": 7.283580262030121e-07, + "loss": 3.1806, + "step": 19920 + }, + { + "epoch": 1230.8641975308642, + "learning_rate": 7.28082527243786e-07, + "loss": 3.0692, + "step": 19940 + }, + { + "epoch": 1232.0987654320988, + "learning_rate": 7.278070282845599e-07, + "loss": 3.1239, + "step": 19960 + }, + { + "epoch": 1233.3333333333333, + "learning_rate": 7.275315293253336e-07, + "loss": 3.1122, + "step": 19980 + }, + { + "epoch": 1234.567901234568, + "learning_rate": 7.272560303661075e-07, + "loss": 3.0677, + "step": 20000 + }, + { + "epoch": 1235.8024691358025, + "learning_rate": 7.269805314068814e-07, + "loss": 3.1123, + "step": 20020 + }, + { + "epoch": 1237.037037037037, + "learning_rate": 7.267050324476552e-07, + "loss": 3.113, + "step": 20040 + }, + { + "epoch": 1238.2716049382716, + "learning_rate": 7.26429533488429e-07, + "loss": 3.0655, + "step": 20060 + }, + { + "epoch": 1239.5061728395062, + "learning_rate": 7.261540345292028e-07, + "loss": 3.1083, + "step": 20080 + }, + { + "epoch": 1240.7407407407406, + "learning_rate": 7.258785355699767e-07, + "loss": 3.0929, + "step": 20100 + }, + { + "epoch": 1241.9753086419753, + "learning_rate": 7.256030366107506e-07, + "loss": 3.0803, + "step": 20120 + }, + { + "epoch": 1243.20987654321, + "learning_rate": 7.253275376515244e-07, + "loss": 3.1016, + "step": 20140 + }, + { + "epoch": 1244.4444444444443, + "learning_rate": 7.250520386922983e-07, + "loss": 3.0868, + "step": 20160 + }, + { + "epoch": 1245.679012345679, + "learning_rate": 7.247765397330721e-07, + "loss": 3.053, + "step": 20180 + }, + { + "epoch": 1246.9135802469136, + "learning_rate": 7.24501040773846e-07, + "loss": 3.0749, + "step": 20200 + }, + { + "epoch": 1248.148148148148, + "learning_rate": 7.242255418146198e-07, + "loss": 3.125, + "step": 20220 + }, + { + "epoch": 1249.3827160493827, + "learning_rate": 7.239500428553937e-07, + "loss": 3.1073, + "step": 20240 + }, + { + "epoch": 1250.6172839506173, + "learning_rate": 7.236745438961675e-07, + "loss": 3.0613, + "step": 20260 + }, + { + "epoch": 1251.851851851852, + "learning_rate": 7.233990449369414e-07, + "loss": 3.0474, + "step": 20280 + }, + { + "epoch": 1253.0864197530864, + "learning_rate": 7.231235459777152e-07, + "loss": 3.0647, + "step": 20300 + }, + { + "epoch": 1254.320987654321, + "learning_rate": 7.228480470184889e-07, + "loss": 3.0641, + "step": 20320 + }, + { + "epoch": 1255.5555555555557, + "learning_rate": 7.225725480592628e-07, + "loss": 3.0684, + "step": 20340 + }, + { + "epoch": 1256.79012345679, + "learning_rate": 7.222970491000366e-07, + "loss": 3.0527, + "step": 20360 + }, + { + "epoch": 1258.0246913580247, + "learning_rate": 7.220215501408105e-07, + "loss": 3.1258, + "step": 20380 + }, + { + "epoch": 1259.2592592592594, + "learning_rate": 7.217460511815844e-07, + "loss": 3.0887, + "step": 20400 + }, + { + "epoch": 1260.4938271604938, + "learning_rate": 7.214705522223582e-07, + "loss": 3.1021, + "step": 20420 + }, + { + "epoch": 1261.7283950617284, + "learning_rate": 7.211950532631321e-07, + "loss": 3.034, + "step": 20440 + }, + { + "epoch": 1262.962962962963, + "learning_rate": 7.209195543039059e-07, + "loss": 3.0488, + "step": 20460 + }, + { + "epoch": 1264.1975308641975, + "learning_rate": 7.206440553446798e-07, + "loss": 3.1048, + "step": 20480 + }, + { + "epoch": 1265.432098765432, + "learning_rate": 7.203685563854537e-07, + "loss": 3.0026, + "step": 20500 + }, + { + "epoch": 1266.6666666666667, + "learning_rate": 7.200930574262274e-07, + "loss": 2.9731, + "step": 20520 + }, + { + "epoch": 1267.9012345679012, + "learning_rate": 7.198175584670013e-07, + "loss": 3.0784, + "step": 20540 + }, + { + "epoch": 1269.1358024691358, + "learning_rate": 7.195420595077751e-07, + "loss": 3.0579, + "step": 20560 + }, + { + "epoch": 1270.3703703703704, + "learning_rate": 7.19266560548549e-07, + "loss": 3.1232, + "step": 20580 + }, + { + "epoch": 1271.6049382716049, + "learning_rate": 7.189910615893227e-07, + "loss": 3.0752, + "step": 20600 + }, + { + "epoch": 1272.8395061728395, + "learning_rate": 7.187155626300966e-07, + "loss": 2.9859, + "step": 20620 + }, + { + "epoch": 1274.0740740740741, + "learning_rate": 7.184400636708705e-07, + "loss": 3.0488, + "step": 20640 + }, + { + "epoch": 1275.3086419753085, + "learning_rate": 7.181645647116444e-07, + "loss": 3.1021, + "step": 20660 + }, + { + "epoch": 1276.5432098765432, + "learning_rate": 7.178890657524183e-07, + "loss": 3.0604, + "step": 20680 + }, + { + "epoch": 1277.7777777777778, + "learning_rate": 7.176135667931921e-07, + "loss": 3.0458, + "step": 20700 + }, + { + "epoch": 1279.0123456790122, + "learning_rate": 7.17338067833966e-07, + "loss": 3.0895, + "step": 20720 + }, + { + "epoch": 1280.2469135802469, + "learning_rate": 7.170625688747399e-07, + "loss": 2.9886, + "step": 20740 + }, + { + "epoch": 1281.4814814814815, + "learning_rate": 7.167870699155137e-07, + "loss": 3.086, + "step": 20760 + }, + { + "epoch": 1282.716049382716, + "learning_rate": 7.165115709562876e-07, + "loss": 3.006, + "step": 20780 + }, + { + "epoch": 1283.9506172839506, + "learning_rate": 7.162360719970613e-07, + "loss": 3.0638, + "step": 20800 + }, + { + "epoch": 1285.1851851851852, + "learning_rate": 7.159605730378352e-07, + "loss": 3.0063, + "step": 20820 + }, + { + "epoch": 1286.4197530864199, + "learning_rate": 7.15685074078609e-07, + "loss": 3.0392, + "step": 20840 + }, + { + "epoch": 1287.6543209876543, + "learning_rate": 7.154095751193828e-07, + "loss": 3.0259, + "step": 20860 + }, + { + "epoch": 1288.888888888889, + "learning_rate": 7.151340761601567e-07, + "loss": 3.0477, + "step": 20880 + }, + { + "epoch": 1290.1234567901236, + "learning_rate": 7.148585772009305e-07, + "loss": 3.0454, + "step": 20900 + }, + { + "epoch": 1291.358024691358, + "learning_rate": 7.145830782417044e-07, + "loss": 3.009, + "step": 20920 + }, + { + "epoch": 1292.5925925925926, + "learning_rate": 7.143075792824782e-07, + "loss": 3.0454, + "step": 20940 + }, + { + "epoch": 1293.8271604938273, + "learning_rate": 7.140320803232521e-07, + "loss": 3.0392, + "step": 20960 + }, + { + "epoch": 1295.0617283950617, + "learning_rate": 7.13756581364026e-07, + "loss": 3.046, + "step": 20980 + }, + { + "epoch": 1296.2962962962963, + "learning_rate": 7.134810824047998e-07, + "loss": 3.0165, + "step": 21000 + }, + { + "epoch": 1297.530864197531, + "learning_rate": 7.132055834455737e-07, + "loss": 3.031, + "step": 21020 + }, + { + "epoch": 1298.7654320987654, + "learning_rate": 7.129300844863475e-07, + "loss": 3.0662, + "step": 21040 + }, + { + "epoch": 1300.0, + "learning_rate": 7.126545855271213e-07, + "loss": 3.0765, + "step": 21060 + }, + { + "epoch": 1301.2345679012346, + "learning_rate": 7.123790865678951e-07, + "loss": 3.014, + "step": 21080 + }, + { + "epoch": 1302.469135802469, + "learning_rate": 7.12103587608669e-07, + "loss": 2.9775, + "step": 21100 + }, + { + "epoch": 1303.7037037037037, + "learning_rate": 7.118280886494429e-07, + "loss": 3.0938, + "step": 21120 + }, + { + "epoch": 1304.9382716049383, + "learning_rate": 7.115525896902166e-07, + "loss": 3.0424, + "step": 21140 + }, + { + "epoch": 1306.1728395061727, + "learning_rate": 7.112770907309905e-07, + "loss": 3.053, + "step": 21160 + }, + { + "epoch": 1307.4074074074074, + "learning_rate": 7.110015917717643e-07, + "loss": 3.0423, + "step": 21180 + }, + { + "epoch": 1308.641975308642, + "learning_rate": 7.107260928125382e-07, + "loss": 3.003, + "step": 21200 + }, + { + "epoch": 1309.8765432098764, + "learning_rate": 7.10450593853312e-07, + "loss": 3.0905, + "step": 21220 + }, + { + "epoch": 1311.111111111111, + "learning_rate": 7.101750948940859e-07, + "loss": 3.0676, + "step": 21240 + }, + { + "epoch": 1312.3456790123457, + "learning_rate": 7.098995959348598e-07, + "loss": 2.9869, + "step": 21260 + }, + { + "epoch": 1313.5802469135801, + "learning_rate": 7.096240969756336e-07, + "loss": 3.082, + "step": 21280 + }, + { + "epoch": 1314.8148148148148, + "learning_rate": 7.093485980164075e-07, + "loss": 2.9242, + "step": 21300 + }, + { + "epoch": 1316.0493827160494, + "learning_rate": 7.090730990571813e-07, + "loss": 3.0285, + "step": 21320 + }, + { + "epoch": 1317.283950617284, + "learning_rate": 7.087976000979551e-07, + "loss": 3.0237, + "step": 21340 + }, + { + "epoch": 1318.5185185185185, + "learning_rate": 7.085221011387288e-07, + "loss": 2.9958, + "step": 21360 + }, + { + "epoch": 1319.7530864197531, + "learning_rate": 7.082466021795028e-07, + "loss": 2.9874, + "step": 21380 + }, + { + "epoch": 1320.9876543209878, + "learning_rate": 7.079711032202767e-07, + "loss": 3.0377, + "step": 21400 + }, + { + "epoch": 1322.2222222222222, + "learning_rate": 7.076956042610505e-07, + "loss": 3.0517, + "step": 21420 + }, + { + "epoch": 1323.4567901234568, + "learning_rate": 7.074201053018244e-07, + "loss": 2.9989, + "step": 21440 + }, + { + "epoch": 1324.6913580246915, + "learning_rate": 7.071446063425982e-07, + "loss": 3.0555, + "step": 21460 + }, + { + "epoch": 1325.9259259259259, + "learning_rate": 7.068691073833721e-07, + "loss": 3.0958, + "step": 21480 + }, + { + "epoch": 1327.1604938271605, + "learning_rate": 7.06593608424146e-07, + "loss": 3.0202, + "step": 21500 + }, + { + "epoch": 1328.3950617283951, + "learning_rate": 7.063181094649198e-07, + "loss": 3.0313, + "step": 21520 + }, + { + "epoch": 1329.6296296296296, + "learning_rate": 7.060426105056937e-07, + "loss": 3.0543, + "step": 21540 + }, + { + "epoch": 1330.8641975308642, + "learning_rate": 7.057671115464675e-07, + "loss": 3.0223, + "step": 21560 + }, + { + "epoch": 1332.0987654320988, + "learning_rate": 7.054916125872414e-07, + "loss": 2.9771, + "step": 21580 + }, + { + "epoch": 1333.3333333333333, + "learning_rate": 7.052161136280152e-07, + "loss": 3.0458, + "step": 21600 + }, + { + "epoch": 1334.567901234568, + "learning_rate": 7.049406146687891e-07, + "loss": 3.0683, + "step": 21620 + }, + { + "epoch": 1335.8024691358025, + "learning_rate": 7.046651157095629e-07, + "loss": 3.0293, + "step": 21640 + }, + { + "epoch": 1337.037037037037, + "learning_rate": 7.043896167503367e-07, + "loss": 3.0793, + "step": 21660 + }, + { + "epoch": 1338.2716049382716, + "learning_rate": 7.041141177911105e-07, + "loss": 3.0518, + "step": 21680 + }, + { + "epoch": 1339.5061728395062, + "learning_rate": 7.038386188318843e-07, + "loss": 2.9899, + "step": 21700 + }, + { + "epoch": 1340.7407407407406, + "learning_rate": 7.035631198726582e-07, + "loss": 3.0019, + "step": 21720 + }, + { + "epoch": 1341.9753086419753, + "learning_rate": 7.032876209134321e-07, + "loss": 2.99, + "step": 21740 + }, + { + "epoch": 1343.20987654321, + "learning_rate": 7.030121219542059e-07, + "loss": 3.0385, + "step": 21760 + }, + { + "epoch": 1344.4444444444443, + "learning_rate": 7.027366229949798e-07, + "loss": 3.0483, + "step": 21780 + }, + { + "epoch": 1345.679012345679, + "learning_rate": 7.024611240357536e-07, + "loss": 3.0463, + "step": 21800 + }, + { + "epoch": 1346.9135802469136, + "learning_rate": 7.021856250765275e-07, + "loss": 3.0503, + "step": 21820 + }, + { + "epoch": 1348.148148148148, + "learning_rate": 7.019101261173014e-07, + "loss": 2.9832, + "step": 21840 + }, + { + "epoch": 1349.3827160493827, + "learning_rate": 7.016346271580752e-07, + "loss": 2.9928, + "step": 21860 + }, + { + "epoch": 1350.6172839506173, + "learning_rate": 7.01359128198849e-07, + "loss": 3.0193, + "step": 21880 + }, + { + "epoch": 1351.851851851852, + "learning_rate": 7.010836292396228e-07, + "loss": 3.0242, + "step": 21900 + }, + { + "epoch": 1353.0864197530864, + "learning_rate": 7.008081302803967e-07, + "loss": 3.0383, + "step": 21920 + }, + { + "epoch": 1354.320987654321, + "learning_rate": 7.005326313211704e-07, + "loss": 3.0641, + "step": 21940 + }, + { + "epoch": 1355.5555555555557, + "learning_rate": 7.002571323619443e-07, + "loss": 2.9852, + "step": 21960 + }, + { + "epoch": 1356.79012345679, + "learning_rate": 6.999816334027183e-07, + "loss": 3.0579, + "step": 21980 + }, + { + "epoch": 1358.0246913580247, + "learning_rate": 6.99706134443492e-07, + "loss": 3.1143, + "step": 22000 + }, + { + "epoch": 1359.2592592592594, + "learning_rate": 6.994306354842659e-07, + "loss": 2.9811, + "step": 22020 + }, + { + "epoch": 1360.4938271604938, + "learning_rate": 6.991551365250397e-07, + "loss": 2.9968, + "step": 22040 + }, + { + "epoch": 1361.7283950617284, + "learning_rate": 6.988796375658136e-07, + "loss": 2.9376, + "step": 22060 + }, + { + "epoch": 1362.962962962963, + "learning_rate": 6.986041386065874e-07, + "loss": 2.7708, + "step": 22080 + }, + { + "epoch": 1364.1975308641975, + "learning_rate": 6.983286396473613e-07, + "loss": 2.7872, + "step": 22100 + }, + { + "epoch": 1365.432098765432, + "learning_rate": 6.980531406881352e-07, + "loss": 2.9491, + "step": 22120 + }, + { + "epoch": 1366.6666666666667, + "learning_rate": 6.977776417289089e-07, + "loss": 2.9494, + "step": 22140 + }, + { + "epoch": 1367.9012345679012, + "learning_rate": 6.975021427696829e-07, + "loss": 2.952, + "step": 22160 + }, + { + "epoch": 1369.1358024691358, + "learning_rate": 6.972266438104567e-07, + "loss": 2.8546, + "step": 22180 + }, + { + "epoch": 1370.3703703703704, + "learning_rate": 6.969511448512306e-07, + "loss": 2.9859, + "step": 22200 + }, + { + "epoch": 1371.6049382716049, + "learning_rate": 6.966756458920044e-07, + "loss": 3.0157, + "step": 22220 + }, + { + "epoch": 1372.8395061728395, + "learning_rate": 6.964001469327782e-07, + "loss": 3.0706, + "step": 22240 + }, + { + "epoch": 1374.0740740740741, + "learning_rate": 6.961246479735521e-07, + "loss": 3.0858, + "step": 22260 + }, + { + "epoch": 1375.3086419753085, + "learning_rate": 6.958491490143259e-07, + "loss": 2.9814, + "step": 22280 + }, + { + "epoch": 1376.5432098765432, + "learning_rate": 6.955736500550998e-07, + "loss": 3.0067, + "step": 22300 + }, + { + "epoch": 1377.7777777777778, + "learning_rate": 6.952981510958736e-07, + "loss": 2.993, + "step": 22320 + }, + { + "epoch": 1379.0123456790122, + "learning_rate": 6.950226521366475e-07, + "loss": 3.0182, + "step": 22340 + }, + { + "epoch": 1380.2469135802469, + "learning_rate": 6.947471531774214e-07, + "loss": 2.9773, + "step": 22360 + }, + { + "epoch": 1381.4814814814815, + "learning_rate": 6.944716542181952e-07, + "loss": 2.9697, + "step": 22380 + }, + { + "epoch": 1382.716049382716, + "learning_rate": 6.941961552589691e-07, + "loss": 3.0496, + "step": 22400 + }, + { + "epoch": 1383.9506172839506, + "learning_rate": 6.939206562997428e-07, + "loss": 3.0031, + "step": 22420 + }, + { + "epoch": 1385.1851851851852, + "learning_rate": 6.936451573405167e-07, + "loss": 3.0418, + "step": 22440 + }, + { + "epoch": 1386.4197530864199, + "learning_rate": 6.933696583812906e-07, + "loss": 3.0117, + "step": 22460 + }, + { + "epoch": 1387.6543209876543, + "learning_rate": 6.930941594220643e-07, + "loss": 2.9879, + "step": 22480 + }, + { + "epoch": 1388.888888888889, + "learning_rate": 6.928186604628382e-07, + "loss": 3.0156, + "step": 22500 + }, + { + "epoch": 1390.1234567901236, + "learning_rate": 6.92543161503612e-07, + "loss": 2.9637, + "step": 22520 + }, + { + "epoch": 1391.358024691358, + "learning_rate": 6.922676625443859e-07, + "loss": 3.0093, + "step": 22540 + }, + { + "epoch": 1392.5925925925926, + "learning_rate": 6.919921635851597e-07, + "loss": 3.0227, + "step": 22560 + }, + { + "epoch": 1393.8271604938273, + "learning_rate": 6.917166646259336e-07, + "loss": 3.0063, + "step": 22580 + }, + { + "epoch": 1395.0617283950617, + "learning_rate": 6.914411656667076e-07, + "loss": 3.0236, + "step": 22600 + }, + { + "epoch": 1396.2962962962963, + "learning_rate": 6.911656667074813e-07, + "loss": 2.9828, + "step": 22620 + }, + { + "epoch": 1397.530864197531, + "learning_rate": 6.908901677482552e-07, + "loss": 3.0113, + "step": 22640 + }, + { + "epoch": 1398.7654320987654, + "learning_rate": 6.90614668789029e-07, + "loss": 3.0021, + "step": 22660 + }, + { + "epoch": 1400.0, + "learning_rate": 6.903391698298028e-07, + "loss": 2.9998, + "step": 22680 + }, + { + "epoch": 1401.2345679012346, + "learning_rate": 6.900636708705766e-07, + "loss": 3.0051, + "step": 22700 + }, + { + "epoch": 1402.469135802469, + "learning_rate": 6.897881719113505e-07, + "loss": 3.0361, + "step": 22720 + }, + { + "epoch": 1403.7037037037037, + "learning_rate": 6.895126729521244e-07, + "loss": 3.0125, + "step": 22740 + }, + { + "epoch": 1404.9382716049383, + "learning_rate": 6.892371739928981e-07, + "loss": 2.9742, + "step": 22760 + }, + { + "epoch": 1406.1728395061727, + "learning_rate": 6.88961675033672e-07, + "loss": 2.9415, + "step": 22780 + }, + { + "epoch": 1407.4074074074074, + "learning_rate": 6.886861760744458e-07, + "loss": 2.9931, + "step": 22800 + }, + { + "epoch": 1408.641975308642, + "learning_rate": 6.884106771152197e-07, + "loss": 2.9967, + "step": 22820 + }, + { + "epoch": 1409.8765432098764, + "learning_rate": 6.881351781559936e-07, + "loss": 2.971, + "step": 22840 + }, + { + "epoch": 1411.111111111111, + "learning_rate": 6.878596791967674e-07, + "loss": 2.9922, + "step": 22860 + }, + { + "epoch": 1412.3456790123457, + "learning_rate": 6.875841802375413e-07, + "loss": 3.0975, + "step": 22880 + }, + { + "epoch": 1413.5802469135801, + "learning_rate": 6.873086812783151e-07, + "loss": 2.9825, + "step": 22900 + }, + { + "epoch": 1414.8148148148148, + "learning_rate": 6.87033182319089e-07, + "loss": 3.023, + "step": 22920 + }, + { + "epoch": 1416.0493827160494, + "learning_rate": 6.86757683359863e-07, + "loss": 3.0236, + "step": 22940 + }, + { + "epoch": 1417.283950617284, + "learning_rate": 6.864821844006366e-07, + "loss": 3.0156, + "step": 22960 + }, + { + "epoch": 1418.5185185185185, + "learning_rate": 6.862066854414106e-07, + "loss": 2.9708, + "step": 22980 + }, + { + "epoch": 1419.7530864197531, + "learning_rate": 6.859311864821844e-07, + "loss": 3.0535, + "step": 23000 + }, + { + "epoch": 1420.9876543209878, + "learning_rate": 6.856556875229582e-07, + "loss": 2.9992, + "step": 23020 + }, + { + "epoch": 1422.2222222222222, + "learning_rate": 6.85380188563732e-07, + "loss": 2.9647, + "step": 23040 + }, + { + "epoch": 1423.4567901234568, + "learning_rate": 6.851046896045059e-07, + "loss": 2.9628, + "step": 23060 + }, + { + "epoch": 1424.6913580246915, + "learning_rate": 6.848291906452798e-07, + "loss": 3.0062, + "step": 23080 + }, + { + "epoch": 1425.9259259259259, + "learning_rate": 6.845536916860536e-07, + "loss": 2.943, + "step": 23100 + }, + { + "epoch": 1427.1604938271605, + "learning_rate": 6.842781927268275e-07, + "loss": 3.0331, + "step": 23120 + }, + { + "epoch": 1428.3950617283951, + "learning_rate": 6.840026937676013e-07, + "loss": 3.0069, + "step": 23140 + }, + { + "epoch": 1429.6296296296296, + "learning_rate": 6.837271948083752e-07, + "loss": 3.1004, + "step": 23160 + }, + { + "epoch": 1430.8641975308642, + "learning_rate": 6.83451695849149e-07, + "loss": 2.9466, + "step": 23180 + }, + { + "epoch": 1432.0987654320988, + "learning_rate": 6.831761968899229e-07, + "loss": 3.0308, + "step": 23200 + }, + { + "epoch": 1433.3333333333333, + "learning_rate": 6.829006979306967e-07, + "loss": 3.0082, + "step": 23220 + }, + { + "epoch": 1434.567901234568, + "learning_rate": 6.826251989714705e-07, + "loss": 2.9983, + "step": 23240 + }, + { + "epoch": 1435.8024691358025, + "learning_rate": 6.823497000122444e-07, + "loss": 2.9632, + "step": 23260 + }, + { + "epoch": 1437.037037037037, + "learning_rate": 6.820742010530181e-07, + "loss": 2.986, + "step": 23280 + }, + { + "epoch": 1438.2716049382716, + "learning_rate": 6.81798702093792e-07, + "loss": 3.0113, + "step": 23300 + }, + { + "epoch": 1439.5061728395062, + "learning_rate": 6.81523203134566e-07, + "loss": 2.9418, + "step": 23320 + }, + { + "epoch": 1440.7407407407406, + "learning_rate": 6.812477041753397e-07, + "loss": 2.9326, + "step": 23340 + }, + { + "epoch": 1441.9753086419753, + "learning_rate": 6.809722052161136e-07, + "loss": 2.9788, + "step": 23360 + }, + { + "epoch": 1443.20987654321, + "learning_rate": 6.806967062568874e-07, + "loss": 3.0468, + "step": 23380 + }, + { + "epoch": 1444.4444444444443, + "learning_rate": 6.804212072976613e-07, + "loss": 3.0129, + "step": 23400 + }, + { + "epoch": 1445.679012345679, + "learning_rate": 6.801457083384351e-07, + "loss": 2.9964, + "step": 23420 + }, + { + "epoch": 1446.9135802469136, + "learning_rate": 6.79870209379209e-07, + "loss": 3.0094, + "step": 23440 + }, + { + "epoch": 1448.148148148148, + "learning_rate": 6.795947104199829e-07, + "loss": 2.9579, + "step": 23460 + }, + { + "epoch": 1449.3827160493827, + "learning_rate": 6.793192114607567e-07, + "loss": 3.0166, + "step": 23480 + }, + { + "epoch": 1450.6172839506173, + "learning_rate": 6.790437125015305e-07, + "loss": 3.0441, + "step": 23500 + }, + { + "epoch": 1451.851851851852, + "learning_rate": 6.787682135423043e-07, + "loss": 2.9766, + "step": 23520 + }, + { + "epoch": 1453.0864197530864, + "learning_rate": 6.784927145830782e-07, + "loss": 3.0122, + "step": 23540 + }, + { + "epoch": 1454.320987654321, + "learning_rate": 6.782172156238519e-07, + "loss": 3.0115, + "step": 23560 + }, + { + "epoch": 1455.5555555555557, + "learning_rate": 6.779417166646258e-07, + "loss": 3.0546, + "step": 23580 + }, + { + "epoch": 1456.79012345679, + "learning_rate": 6.776662177053997e-07, + "loss": 2.9454, + "step": 23600 + }, + { + "epoch": 1458.0246913580247, + "learning_rate": 6.773907187461735e-07, + "loss": 3.0118, + "step": 23620 + }, + { + "epoch": 1459.2592592592594, + "learning_rate": 6.771152197869475e-07, + "loss": 3.0519, + "step": 23640 + }, + { + "epoch": 1460.4938271604938, + "learning_rate": 6.768397208277213e-07, + "loss": 2.9304, + "step": 23660 + }, + { + "epoch": 1461.7283950617284, + "learning_rate": 6.765642218684952e-07, + "loss": 2.9727, + "step": 23680 + }, + { + "epoch": 1462.962962962963, + "learning_rate": 6.762887229092691e-07, + "loss": 2.9836, + "step": 23700 + }, + { + "epoch": 1464.1975308641975, + "learning_rate": 6.760132239500429e-07, + "loss": 3.0426, + "step": 23720 + }, + { + "epoch": 1465.432098765432, + "learning_rate": 6.757377249908168e-07, + "loss": 3.0034, + "step": 23740 + }, + { + "epoch": 1466.6666666666667, + "learning_rate": 6.754622260315905e-07, + "loss": 2.9911, + "step": 23760 + }, + { + "epoch": 1467.9012345679012, + "learning_rate": 6.751867270723644e-07, + "loss": 2.9945, + "step": 23780 + }, + { + "epoch": 1469.1358024691358, + "learning_rate": 6.749112281131383e-07, + "loss": 2.9749, + "step": 23800 + }, + { + "epoch": 1470.3703703703704, + "learning_rate": 6.74635729153912e-07, + "loss": 2.9992, + "step": 23820 + }, + { + "epoch": 1471.6049382716049, + "learning_rate": 6.743602301946859e-07, + "loss": 2.9742, + "step": 23840 + }, + { + "epoch": 1472.8395061728395, + "learning_rate": 6.740847312354597e-07, + "loss": 3.0011, + "step": 23860 + }, + { + "epoch": 1474.0740740740741, + "learning_rate": 6.738092322762336e-07, + "loss": 2.9287, + "step": 23880 + }, + { + "epoch": 1475.3086419753085, + "learning_rate": 6.735337333170074e-07, + "loss": 2.9665, + "step": 23900 + }, + { + "epoch": 1476.5432098765432, + "learning_rate": 6.732582343577813e-07, + "loss": 2.973, + "step": 23920 + }, + { + "epoch": 1477.7777777777778, + "learning_rate": 6.729827353985553e-07, + "loss": 2.9416, + "step": 23940 + }, + { + "epoch": 1479.0123456790122, + "learning_rate": 6.72707236439329e-07, + "loss": 3.0906, + "step": 23960 + }, + { + "epoch": 1480.2469135802469, + "learning_rate": 6.724317374801029e-07, + "loss": 2.9303, + "step": 23980 + }, + { + "epoch": 1481.4814814814815, + "learning_rate": 6.721562385208767e-07, + "loss": 2.9875, + "step": 24000 + }, + { + "epoch": 1482.716049382716, + "learning_rate": 6.718807395616506e-07, + "loss": 2.9577, + "step": 24020 + }, + { + "epoch": 1483.9506172839506, + "learning_rate": 6.716052406024243e-07, + "loss": 3.0099, + "step": 24040 + }, + { + "epoch": 1485.1851851851852, + "learning_rate": 6.713297416431982e-07, + "loss": 2.9635, + "step": 24060 + }, + { + "epoch": 1486.4197530864199, + "learning_rate": 6.710542426839721e-07, + "loss": 3.0368, + "step": 24080 + }, + { + "epoch": 1487.6543209876543, + "learning_rate": 6.707787437247458e-07, + "loss": 2.8998, + "step": 24100 + }, + { + "epoch": 1488.888888888889, + "learning_rate": 6.705032447655197e-07, + "loss": 2.9467, + "step": 24120 + }, + { + "epoch": 1490.1234567901236, + "learning_rate": 6.702277458062935e-07, + "loss": 3.0443, + "step": 24140 + }, + { + "epoch": 1491.358024691358, + "learning_rate": 6.699522468470674e-07, + "loss": 2.9395, + "step": 24160 + }, + { + "epoch": 1492.5925925925926, + "learning_rate": 6.696767478878413e-07, + "loss": 3.0041, + "step": 24180 + }, + { + "epoch": 1493.8271604938273, + "learning_rate": 6.694012489286151e-07, + "loss": 2.9951, + "step": 24200 + }, + { + "epoch": 1495.0617283950617, + "learning_rate": 6.69125749969389e-07, + "loss": 3.0352, + "step": 24220 + }, + { + "epoch": 1496.2962962962963, + "learning_rate": 6.688502510101628e-07, + "loss": 2.9081, + "step": 24240 + }, + { + "epoch": 1497.530864197531, + "learning_rate": 6.685747520509367e-07, + "loss": 3.0379, + "step": 24260 + }, + { + "epoch": 1498.7654320987654, + "learning_rate": 6.682992530917105e-07, + "loss": 2.9549, + "step": 24280 + }, + { + "epoch": 1500.0, + "learning_rate": 6.680237541324844e-07, + "loss": 3.0366, + "step": 24300 + }, + { + "epoch": 1501.2345679012346, + "learning_rate": 6.677482551732582e-07, + "loss": 2.9446, + "step": 24320 + }, + { + "epoch": 1502.469135802469, + "learning_rate": 6.67472756214032e-07, + "loss": 2.9547, + "step": 24340 + }, + { + "epoch": 1503.7037037037037, + "learning_rate": 6.671972572548059e-07, + "loss": 2.9982, + "step": 24360 + }, + { + "epoch": 1504.9382716049383, + "learning_rate": 6.669217582955797e-07, + "loss": 3.0066, + "step": 24380 + }, + { + "epoch": 1506.1728395061727, + "learning_rate": 6.666462593363536e-07, + "loss": 3.0048, + "step": 24400 + }, + { + "epoch": 1507.4074074074074, + "learning_rate": 6.663707603771275e-07, + "loss": 3.0346, + "step": 24420 + }, + { + "epoch": 1508.641975308642, + "learning_rate": 6.660952614179013e-07, + "loss": 2.9453, + "step": 24440 + }, + { + "epoch": 1509.8765432098764, + "learning_rate": 6.658197624586752e-07, + "loss": 2.9708, + "step": 24460 + }, + { + "epoch": 1511.111111111111, + "learning_rate": 6.65544263499449e-07, + "loss": 2.9684, + "step": 24480 + }, + { + "epoch": 1512.3456790123457, + "learning_rate": 6.652687645402229e-07, + "loss": 2.9751, + "step": 24500 + }, + { + "epoch": 1513.5802469135801, + "learning_rate": 6.649932655809967e-07, + "loss": 2.9727, + "step": 24520 + }, + { + "epoch": 1514.8148148148148, + "learning_rate": 6.647177666217706e-07, + "loss": 2.9699, + "step": 24540 + }, + { + "epoch": 1516.0493827160494, + "learning_rate": 6.644422676625444e-07, + "loss": 2.9626, + "step": 24560 + }, + { + "epoch": 1517.283950617284, + "learning_rate": 6.641667687033182e-07, + "loss": 3.0035, + "step": 24580 + }, + { + "epoch": 1518.5185185185185, + "learning_rate": 6.638912697440921e-07, + "loss": 2.9803, + "step": 24600 + }, + { + "epoch": 1519.7530864197531, + "learning_rate": 6.636157707848659e-07, + "loss": 2.9758, + "step": 24620 + }, + { + "epoch": 1520.9876543209878, + "learning_rate": 6.633402718256397e-07, + "loss": 2.9671, + "step": 24640 + }, + { + "epoch": 1522.2222222222222, + "learning_rate": 6.630647728664134e-07, + "loss": 3.0017, + "step": 24660 + }, + { + "epoch": 1523.4567901234568, + "learning_rate": 6.627892739071874e-07, + "loss": 2.9997, + "step": 24680 + }, + { + "epoch": 1524.6913580246915, + "learning_rate": 6.625137749479613e-07, + "loss": 2.9376, + "step": 24700 + }, + { + "epoch": 1525.9259259259259, + "learning_rate": 6.622382759887351e-07, + "loss": 2.9338, + "step": 24720 + }, + { + "epoch": 1527.1604938271605, + "learning_rate": 6.61962777029509e-07, + "loss": 3.0424, + "step": 24740 + }, + { + "epoch": 1528.3950617283951, + "learning_rate": 6.616872780702828e-07, + "loss": 3.0168, + "step": 24760 + }, + { + "epoch": 1529.6296296296296, + "learning_rate": 6.614117791110567e-07, + "loss": 2.9788, + "step": 24780 + }, + { + "epoch": 1530.8641975308642, + "learning_rate": 6.611362801518306e-07, + "loss": 2.968, + "step": 24800 + }, + { + "epoch": 1532.0987654320988, + "learning_rate": 6.608607811926044e-07, + "loss": 3.0261, + "step": 24820 + }, + { + "epoch": 1533.3333333333333, + "learning_rate": 6.605852822333782e-07, + "loss": 3.0065, + "step": 24840 + }, + { + "epoch": 1534.567901234568, + "learning_rate": 6.60309783274152e-07, + "loss": 2.9672, + "step": 24860 + }, + { + "epoch": 1535.8024691358025, + "learning_rate": 6.600342843149259e-07, + "loss": 2.9691, + "step": 24880 + }, + { + "epoch": 1537.037037037037, + "learning_rate": 6.597587853556996e-07, + "loss": 2.9562, + "step": 24900 + }, + { + "epoch": 1538.2716049382716, + "learning_rate": 6.594832863964735e-07, + "loss": 2.9616, + "step": 24920 + }, + { + "epoch": 1539.5061728395062, + "learning_rate": 6.592077874372474e-07, + "loss": 3.0074, + "step": 24940 + }, + { + "epoch": 1540.7407407407406, + "learning_rate": 6.589322884780212e-07, + "loss": 2.9765, + "step": 24960 + }, + { + "epoch": 1541.9753086419753, + "learning_rate": 6.586567895187951e-07, + "loss": 2.9252, + "step": 24980 + }, + { + "epoch": 1543.20987654321, + "learning_rate": 6.583812905595689e-07, + "loss": 2.9334, + "step": 25000 + }, + { + "epoch": 1544.4444444444443, + "learning_rate": 6.581057916003428e-07, + "loss": 2.9412, + "step": 25020 + }, + { + "epoch": 1545.679012345679, + "learning_rate": 6.578302926411167e-07, + "loss": 2.9712, + "step": 25040 + }, + { + "epoch": 1546.9135802469136, + "learning_rate": 6.575547936818905e-07, + "loss": 3.0192, + "step": 25060 + }, + { + "epoch": 1548.148148148148, + "learning_rate": 6.572792947226644e-07, + "loss": 3.0106, + "step": 25080 + }, + { + "epoch": 1549.3827160493827, + "learning_rate": 6.570037957634381e-07, + "loss": 2.9342, + "step": 25100 + }, + { + "epoch": 1550.6172839506173, + "learning_rate": 6.56728296804212e-07, + "loss": 2.9943, + "step": 25120 + }, + { + "epoch": 1551.851851851852, + "learning_rate": 6.564527978449859e-07, + "loss": 2.8885, + "step": 25140 + }, + { + "epoch": 1553.0864197530864, + "learning_rate": 6.561772988857598e-07, + "loss": 2.9782, + "step": 25160 + }, + { + "epoch": 1554.320987654321, + "learning_rate": 6.559017999265336e-07, + "loss": 2.9691, + "step": 25180 + }, + { + "epoch": 1555.5555555555557, + "learning_rate": 6.556263009673074e-07, + "loss": 2.9399, + "step": 25200 + }, + { + "epoch": 1556.79012345679, + "learning_rate": 6.553508020080813e-07, + "loss": 2.9602, + "step": 25220 + }, + { + "epoch": 1558.0246913580247, + "learning_rate": 6.550753030488551e-07, + "loss": 2.9456, + "step": 25240 + }, + { + "epoch": 1559.2592592592594, + "learning_rate": 6.54799804089629e-07, + "loss": 3.0105, + "step": 25260 + }, + { + "epoch": 1560.4938271604938, + "learning_rate": 6.545243051304027e-07, + "loss": 2.9561, + "step": 25280 + }, + { + "epoch": 1561.7283950617284, + "learning_rate": 6.542488061711767e-07, + "loss": 2.9641, + "step": 25300 + }, + { + "epoch": 1562.962962962963, + "learning_rate": 6.539733072119506e-07, + "loss": 3.0078, + "step": 25320 + }, + { + "epoch": 1564.1975308641975, + "learning_rate": 6.536978082527244e-07, + "loss": 2.9818, + "step": 25340 + }, + { + "epoch": 1565.432098765432, + "learning_rate": 6.534223092934983e-07, + "loss": 3.0106, + "step": 25360 + }, + { + "epoch": 1566.6666666666667, + "learning_rate": 6.53146810334272e-07, + "loss": 2.9646, + "step": 25380 + }, + { + "epoch": 1567.9012345679012, + "learning_rate": 6.528713113750459e-07, + "loss": 2.9095, + "step": 25400 + }, + { + "epoch": 1569.1358024691358, + "learning_rate": 6.525958124158198e-07, + "loss": 2.9554, + "step": 25420 + }, + { + "epoch": 1570.3703703703704, + "learning_rate": 6.523203134565935e-07, + "loss": 2.8864, + "step": 25440 + }, + { + "epoch": 1571.6049382716049, + "learning_rate": 6.520448144973674e-07, + "loss": 2.9818, + "step": 25460 + }, + { + "epoch": 1572.8395061728395, + "learning_rate": 6.517693155381412e-07, + "loss": 2.9394, + "step": 25480 + }, + { + "epoch": 1574.0740740740741, + "learning_rate": 6.514938165789151e-07, + "loss": 2.9811, + "step": 25500 + }, + { + "epoch": 1575.3086419753085, + "learning_rate": 6.51218317619689e-07, + "loss": 2.963, + "step": 25520 + }, + { + "epoch": 1576.5432098765432, + "learning_rate": 6.509428186604628e-07, + "loss": 2.9593, + "step": 25540 + }, + { + "epoch": 1577.7777777777778, + "learning_rate": 6.506673197012367e-07, + "loss": 2.9009, + "step": 25560 + }, + { + "epoch": 1579.0123456790122, + "learning_rate": 6.503918207420105e-07, + "loss": 2.9137, + "step": 25580 + }, + { + "epoch": 1580.2469135802469, + "learning_rate": 6.501163217827844e-07, + "loss": 2.8327, + "step": 25600 + }, + { + "epoch": 1581.4814814814815, + "learning_rate": 6.498408228235582e-07, + "loss": 2.9212, + "step": 25620 + }, + { + "epoch": 1582.716049382716, + "learning_rate": 6.495653238643321e-07, + "loss": 2.8549, + "step": 25640 + }, + { + "epoch": 1583.9506172839506, + "learning_rate": 6.492898249051059e-07, + "loss": 2.889, + "step": 25660 + }, + { + "epoch": 1585.1851851851852, + "learning_rate": 6.490143259458797e-07, + "loss": 2.8057, + "step": 25680 + }, + { + "epoch": 1586.4197530864199, + "learning_rate": 6.487388269866536e-07, + "loss": 2.7912, + "step": 25700 + }, + { + "epoch": 1587.6543209876543, + "learning_rate": 6.484633280274273e-07, + "loss": 2.8488, + "step": 25720 + }, + { + "epoch": 1588.888888888889, + "learning_rate": 6.481878290682012e-07, + "loss": 2.8975, + "step": 25740 + }, + { + "epoch": 1590.1234567901236, + "learning_rate": 6.47912330108975e-07, + "loss": 2.7681, + "step": 25760 + }, + { + "epoch": 1591.358024691358, + "learning_rate": 6.476368311497489e-07, + "loss": 2.3341, + "step": 25780 + }, + { + "epoch": 1592.5925925925926, + "learning_rate": 6.473613321905228e-07, + "loss": 2.1036, + "step": 25800 + }, + { + "epoch": 1593.8271604938273, + "learning_rate": 6.470858332312966e-07, + "loss": 2.0129, + "step": 25820 + }, + { + "epoch": 1595.0617283950617, + "learning_rate": 6.468103342720705e-07, + "loss": 2.0491, + "step": 25840 + }, + { + "epoch": 1596.2962962962963, + "learning_rate": 6.465348353128443e-07, + "loss": 2.0864, + "step": 25860 + }, + { + "epoch": 1597.530864197531, + "learning_rate": 6.462593363536182e-07, + "loss": 2.1008, + "step": 25880 + }, + { + "epoch": 1598.7654320987654, + "learning_rate": 6.459838373943921e-07, + "loss": 1.9265, + "step": 25900 + }, + { + "epoch": 1600.0, + "learning_rate": 6.457083384351659e-07, + "loss": 1.8548, + "step": 25920 + }, + { + "epoch": 1601.2345679012346, + "learning_rate": 6.454328394759398e-07, + "loss": 1.8596, + "step": 25940 + }, + { + "epoch": 1602.469135802469, + "learning_rate": 6.451573405167136e-07, + "loss": 1.7976, + "step": 25960 + }, + { + "epoch": 1603.7037037037037, + "learning_rate": 6.448818415574874e-07, + "loss": 1.7973, + "step": 25980 + }, + { + "epoch": 1604.9382716049383, + "learning_rate": 6.446063425982611e-07, + "loss": 1.8429, + "step": 26000 + }, + { + "epoch": 1606.1728395061727, + "learning_rate": 6.443308436390351e-07, + "loss": 1.8667, + "step": 26020 + }, + { + "epoch": 1607.4074074074074, + "learning_rate": 6.44055344679809e-07, + "loss": 1.8682, + "step": 26040 + }, + { + "epoch": 1608.641975308642, + "learning_rate": 6.437798457205828e-07, + "loss": 1.8548, + "step": 26060 + }, + { + "epoch": 1609.8765432098764, + "learning_rate": 6.435043467613567e-07, + "loss": 1.787, + "step": 26080 + }, + { + "epoch": 1611.111111111111, + "learning_rate": 6.432288478021305e-07, + "loss": 1.8336, + "step": 26100 + }, + { + "epoch": 1612.3456790123457, + "learning_rate": 6.429533488429044e-07, + "loss": 1.7849, + "step": 26120 + }, + { + "epoch": 1613.5802469135801, + "learning_rate": 6.426778498836783e-07, + "loss": 1.7917, + "step": 26140 + }, + { + "epoch": 1614.8148148148148, + "learning_rate": 6.424023509244521e-07, + "loss": 1.7625, + "step": 26160 + }, + { + "epoch": 1616.0493827160494, + "learning_rate": 6.421268519652259e-07, + "loss": 1.8147, + "step": 26180 + }, + { + "epoch": 1617.283950617284, + "learning_rate": 6.418513530059997e-07, + "loss": 1.749, + "step": 26200 + }, + { + "epoch": 1618.5185185185185, + "learning_rate": 6.415758540467736e-07, + "loss": 1.7624, + "step": 26220 + }, + { + "epoch": 1619.7530864197531, + "learning_rate": 6.413003550875474e-07, + "loss": 1.7598, + "step": 26240 + }, + { + "epoch": 1620.9876543209878, + "learning_rate": 6.410248561283212e-07, + "loss": 1.7928, + "step": 26260 + }, + { + "epoch": 1622.2222222222222, + "learning_rate": 6.407493571690951e-07, + "loss": 1.7973, + "step": 26280 + }, + { + "epoch": 1623.4567901234568, + "learning_rate": 6.404738582098689e-07, + "loss": 1.7961, + "step": 26300 + }, + { + "epoch": 1624.6913580246915, + "learning_rate": 6.401983592506428e-07, + "loss": 1.811, + "step": 26320 + }, + { + "epoch": 1625.9259259259259, + "learning_rate": 6.399228602914166e-07, + "loss": 1.7704, + "step": 26340 + }, + { + "epoch": 1627.1604938271605, + "learning_rate": 6.396473613321905e-07, + "loss": 1.7649, + "step": 26360 + }, + { + "epoch": 1628.3950617283951, + "learning_rate": 6.393718623729643e-07, + "loss": 1.7882, + "step": 26380 + }, + { + "epoch": 1629.6296296296296, + "learning_rate": 6.390963634137382e-07, + "loss": 1.7661, + "step": 26400 + }, + { + "epoch": 1630.8641975308642, + "learning_rate": 6.388208644545121e-07, + "loss": 1.7519, + "step": 26420 + }, + { + "epoch": 1632.0987654320988, + "learning_rate": 6.385453654952859e-07, + "loss": 1.7378, + "step": 26440 + }, + { + "epoch": 1633.3333333333333, + "learning_rate": 6.382698665360597e-07, + "loss": 1.7445, + "step": 26460 + }, + { + "epoch": 1634.567901234568, + "learning_rate": 6.379943675768335e-07, + "loss": 1.7006, + "step": 26480 + }, + { + "epoch": 1635.8024691358025, + "learning_rate": 6.377188686176074e-07, + "loss": 1.8112, + "step": 26500 + }, + { + "epoch": 1637.037037037037, + "learning_rate": 6.374433696583811e-07, + "loss": 1.7643, + "step": 26520 + }, + { + "epoch": 1638.2716049382716, + "learning_rate": 6.37167870699155e-07, + "loss": 1.7564, + "step": 26540 + }, + { + "epoch": 1639.5061728395062, + "learning_rate": 6.368923717399289e-07, + "loss": 1.7366, + "step": 26560 + }, + { + "epoch": 1640.7407407407406, + "learning_rate": 6.366168727807027e-07, + "loss": 1.7218, + "step": 26580 + }, + { + "epoch": 1641.9753086419753, + "learning_rate": 6.363413738214766e-07, + "loss": 1.7528, + "step": 26600 + }, + { + "epoch": 1643.20987654321, + "learning_rate": 6.360658748622505e-07, + "loss": 1.7528, + "step": 26620 + }, + { + "epoch": 1644.4444444444443, + "learning_rate": 6.357903759030244e-07, + "loss": 1.7733, + "step": 26640 + }, + { + "epoch": 1645.679012345679, + "learning_rate": 6.355148769437983e-07, + "loss": 1.7352, + "step": 26660 + }, + { + "epoch": 1646.9135802469136, + "learning_rate": 6.352393779845721e-07, + "loss": 1.7101, + "step": 26680 + }, + { + "epoch": 1648.148148148148, + "learning_rate": 6.34963879025346e-07, + "loss": 1.7967, + "step": 26700 + }, + { + "epoch": 1649.3827160493827, + "learning_rate": 6.346883800661197e-07, + "loss": 1.7317, + "step": 26720 + }, + { + "epoch": 1650.6172839506173, + "learning_rate": 6.344128811068936e-07, + "loss": 1.766, + "step": 26740 + }, + { + "epoch": 1651.851851851852, + "learning_rate": 6.341373821476675e-07, + "loss": 1.7895, + "step": 26760 + }, + { + "epoch": 1653.0864197530864, + "learning_rate": 6.338618831884413e-07, + "loss": 1.7515, + "step": 26780 + }, + { + "epoch": 1654.320987654321, + "learning_rate": 6.335863842292151e-07, + "loss": 1.7262, + "step": 26800 + }, + { + "epoch": 1655.5555555555557, + "learning_rate": 6.333108852699889e-07, + "loss": 1.7339, + "step": 26820 + }, + { + "epoch": 1656.79012345679, + "learning_rate": 6.330353863107628e-07, + "loss": 1.721, + "step": 26840 + }, + { + "epoch": 1658.0246913580247, + "learning_rate": 6.327598873515366e-07, + "loss": 1.757, + "step": 26860 + }, + { + "epoch": 1659.2592592592594, + "learning_rate": 6.324843883923105e-07, + "loss": 1.7432, + "step": 26880 + }, + { + "epoch": 1660.4938271604938, + "learning_rate": 6.322088894330844e-07, + "loss": 1.7564, + "step": 26900 + }, + { + "epoch": 1661.7283950617284, + "learning_rate": 6.319333904738582e-07, + "loss": 1.7716, + "step": 26920 + }, + { + "epoch": 1662.962962962963, + "learning_rate": 6.316578915146321e-07, + "loss": 1.7049, + "step": 26940 + }, + { + "epoch": 1664.1975308641975, + "learning_rate": 6.313823925554059e-07, + "loss": 1.7469, + "step": 26960 + }, + { + "epoch": 1665.432098765432, + "learning_rate": 6.311068935961798e-07, + "loss": 1.7347, + "step": 26980 + }, + { + "epoch": 1666.6666666666667, + "learning_rate": 6.308313946369535e-07, + "loss": 1.7357, + "step": 27000 + }, + { + "epoch": 1667.9012345679012, + "learning_rate": 6.305558956777274e-07, + "loss": 1.7542, + "step": 27020 + }, + { + "epoch": 1669.1358024691358, + "learning_rate": 6.302803967185013e-07, + "loss": 1.6961, + "step": 27040 + }, + { + "epoch": 1670.3703703703704, + "learning_rate": 6.30004897759275e-07, + "loss": 1.7537, + "step": 27060 + }, + { + "epoch": 1671.6049382716049, + "learning_rate": 6.297293988000489e-07, + "loss": 1.7612, + "step": 27080 + }, + { + "epoch": 1672.8395061728395, + "learning_rate": 6.294538998408227e-07, + "loss": 1.7779, + "step": 27100 + }, + { + "epoch": 1674.0740740740741, + "learning_rate": 6.291784008815966e-07, + "loss": 1.7238, + "step": 27120 + }, + { + "epoch": 1675.3086419753085, + "learning_rate": 6.289029019223705e-07, + "loss": 1.7107, + "step": 27140 + }, + { + "epoch": 1676.5432098765432, + "learning_rate": 6.286274029631443e-07, + "loss": 1.7065, + "step": 27160 + }, + { + "epoch": 1677.7777777777778, + "learning_rate": 6.283519040039182e-07, + "loss": 1.7517, + "step": 27180 + }, + { + "epoch": 1679.0123456790122, + "learning_rate": 6.28076405044692e-07, + "loss": 1.7725, + "step": 27200 + }, + { + "epoch": 1680.2469135802469, + "learning_rate": 6.278009060854659e-07, + "loss": 1.7428, + "step": 27220 + }, + { + "epoch": 1681.4814814814815, + "learning_rate": 6.275254071262399e-07, + "loss": 1.7686, + "step": 27240 + }, + { + "epoch": 1682.716049382716, + "learning_rate": 6.272499081670135e-07, + "loss": 1.7229, + "step": 27260 + }, + { + "epoch": 1683.9506172839506, + "learning_rate": 6.269744092077874e-07, + "loss": 1.7524, + "step": 27280 + }, + { + "epoch": 1685.1851851851852, + "learning_rate": 6.266989102485612e-07, + "loss": 1.7512, + "step": 27300 + }, + { + "epoch": 1686.4197530864199, + "learning_rate": 6.264234112893351e-07, + "loss": 1.7376, + "step": 27320 + }, + { + "epoch": 1687.6543209876543, + "learning_rate": 6.261479123301089e-07, + "loss": 1.7337, + "step": 27340 + }, + { + "epoch": 1688.888888888889, + "learning_rate": 6.258724133708828e-07, + "loss": 1.7469, + "step": 27360 + }, + { + "epoch": 1690.1234567901236, + "learning_rate": 6.255969144116567e-07, + "loss": 1.7249, + "step": 27380 + }, + { + "epoch": 1691.358024691358, + "learning_rate": 6.253214154524305e-07, + "loss": 1.728, + "step": 27400 + }, + { + "epoch": 1692.5925925925926, + "learning_rate": 6.250459164932044e-07, + "loss": 1.7316, + "step": 27420 + }, + { + "epoch": 1693.8271604938273, + "learning_rate": 6.247704175339782e-07, + "loss": 1.7038, + "step": 27440 + }, + { + "epoch": 1695.0617283950617, + "learning_rate": 6.244949185747521e-07, + "loss": 1.7378, + "step": 27460 + }, + { + "epoch": 1696.2962962962963, + "learning_rate": 6.24219419615526e-07, + "loss": 1.7121, + "step": 27480 + }, + { + "epoch": 1697.530864197531, + "learning_rate": 6.239439206562998e-07, + "loss": 1.7308, + "step": 27500 + }, + { + "epoch": 1698.7654320987654, + "learning_rate": 6.236684216970737e-07, + "loss": 1.7108, + "step": 27520 + }, + { + "epoch": 1700.0, + "learning_rate": 6.233929227378474e-07, + "loss": 1.7702, + "step": 27540 + }, + { + "epoch": 1701.2345679012346, + "learning_rate": 6.231174237786213e-07, + "loss": 1.7137, + "step": 27560 + }, + { + "epoch": 1702.469135802469, + "learning_rate": 6.228419248193951e-07, + "loss": 1.7127, + "step": 27580 + }, + { + "epoch": 1703.7037037037037, + "learning_rate": 6.225664258601689e-07, + "loss": 1.7142, + "step": 27600 + }, + { + "epoch": 1704.9382716049383, + "learning_rate": 6.222909269009427e-07, + "loss": 1.771, + "step": 27620 + }, + { + "epoch": 1706.1728395061727, + "learning_rate": 6.220154279417166e-07, + "loss": 1.7219, + "step": 27640 + }, + { + "epoch": 1707.4074074074074, + "learning_rate": 6.217399289824905e-07, + "loss": 1.7772, + "step": 27660 + }, + { + "epoch": 1708.641975308642, + "learning_rate": 6.214644300232643e-07, + "loss": 1.6827, + "step": 27680 + }, + { + "epoch": 1709.8765432098764, + "learning_rate": 6.211889310640382e-07, + "loss": 1.7013, + "step": 27700 + }, + { + "epoch": 1711.111111111111, + "learning_rate": 6.20913432104812e-07, + "loss": 1.7148, + "step": 27720 + }, + { + "epoch": 1712.3456790123457, + "learning_rate": 6.206379331455859e-07, + "loss": 1.7623, + "step": 27740 + }, + { + "epoch": 1713.5802469135801, + "learning_rate": 6.203624341863598e-07, + "loss": 1.7249, + "step": 27760 + }, + { + "epoch": 1714.8148148148148, + "learning_rate": 6.200869352271336e-07, + "loss": 1.7041, + "step": 27780 + }, + { + "epoch": 1716.0493827160494, + "learning_rate": 6.198114362679074e-07, + "loss": 1.6833, + "step": 27800 + }, + { + "epoch": 1717.283950617284, + "learning_rate": 6.195359373086812e-07, + "loss": 1.7555, + "step": 27820 + }, + { + "epoch": 1718.5185185185185, + "learning_rate": 6.192604383494551e-07, + "loss": 1.6939, + "step": 27840 + }, + { + "epoch": 1719.7530864197531, + "learning_rate": 6.18984939390229e-07, + "loss": 1.6779, + "step": 27860 + }, + { + "epoch": 1720.9876543209878, + "learning_rate": 6.187094404310027e-07, + "loss": 1.7628, + "step": 27880 + }, + { + "epoch": 1722.2222222222222, + "learning_rate": 6.184339414717766e-07, + "loss": 1.7266, + "step": 27900 + }, + { + "epoch": 1723.4567901234568, + "learning_rate": 6.181584425125504e-07, + "loss": 1.7034, + "step": 27920 + }, + { + "epoch": 1724.6913580246915, + "learning_rate": 6.178829435533243e-07, + "loss": 1.758, + "step": 27940 + }, + { + "epoch": 1725.9259259259259, + "learning_rate": 6.176074445940982e-07, + "loss": 1.7253, + "step": 27960 + }, + { + "epoch": 1727.1604938271605, + "learning_rate": 6.17331945634872e-07, + "loss": 1.7962, + "step": 27980 + }, + { + "epoch": 1728.3950617283951, + "learning_rate": 6.170564466756459e-07, + "loss": 1.7053, + "step": 28000 + }, + { + "epoch": 1729.6296296296296, + "learning_rate": 6.167809477164197e-07, + "loss": 1.7582, + "step": 28020 + }, + { + "epoch": 1730.8641975308642, + "learning_rate": 6.165054487571936e-07, + "loss": 1.7062, + "step": 28040 + }, + { + "epoch": 1732.0987654320988, + "learning_rate": 6.162299497979674e-07, + "loss": 1.7443, + "step": 28060 + }, + { + "epoch": 1733.3333333333333, + "learning_rate": 6.159544508387412e-07, + "loss": 1.7365, + "step": 28080 + }, + { + "epoch": 1734.567901234568, + "learning_rate": 6.15678951879515e-07, + "loss": 1.6809, + "step": 28100 + }, + { + "epoch": 1735.8024691358025, + "learning_rate": 6.15403452920289e-07, + "loss": 1.7113, + "step": 28120 + }, + { + "epoch": 1737.037037037037, + "learning_rate": 6.151279539610628e-07, + "loss": 1.7568, + "step": 28140 + }, + { + "epoch": 1738.2716049382716, + "learning_rate": 6.148524550018366e-07, + "loss": 1.7214, + "step": 28160 + }, + { + "epoch": 1739.5061728395062, + "learning_rate": 6.145769560426105e-07, + "loss": 1.6456, + "step": 28180 + }, + { + "epoch": 1740.7407407407406, + "learning_rate": 6.143014570833843e-07, + "loss": 1.707, + "step": 28200 + }, + { + "epoch": 1741.9753086419753, + "learning_rate": 6.140259581241582e-07, + "loss": 1.6937, + "step": 28220 + }, + { + "epoch": 1743.20987654321, + "learning_rate": 6.137504591649321e-07, + "loss": 1.7067, + "step": 28240 + }, + { + "epoch": 1744.4444444444443, + "learning_rate": 6.134749602057059e-07, + "loss": 1.726, + "step": 28260 + }, + { + "epoch": 1745.679012345679, + "learning_rate": 6.131994612464798e-07, + "loss": 1.7014, + "step": 28280 + }, + { + "epoch": 1746.9135802469136, + "learning_rate": 6.129239622872536e-07, + "loss": 1.7578, + "step": 28300 + }, + { + "epoch": 1748.148148148148, + "learning_rate": 6.126484633280275e-07, + "loss": 1.6701, + "step": 28320 + }, + { + "epoch": 1749.3827160493827, + "learning_rate": 6.123729643688012e-07, + "loss": 1.6948, + "step": 28340 + }, + { + "epoch": 1750.6172839506173, + "learning_rate": 6.120974654095751e-07, + "loss": 1.699, + "step": 28360 + }, + { + "epoch": 1751.851851851852, + "learning_rate": 6.11821966450349e-07, + "loss": 1.6875, + "step": 28380 + }, + { + "epoch": 1753.0864197530864, + "learning_rate": 6.115464674911228e-07, + "loss": 1.7007, + "step": 28400 + }, + { + "epoch": 1754.320987654321, + "learning_rate": 6.112709685318966e-07, + "loss": 1.7035, + "step": 28420 + }, + { + "epoch": 1755.5555555555557, + "learning_rate": 6.109954695726704e-07, + "loss": 1.6506, + "step": 28440 + }, + { + "epoch": 1756.79012345679, + "learning_rate": 6.107199706134443e-07, + "loss": 1.7308, + "step": 28460 + }, + { + "epoch": 1758.0246913580247, + "learning_rate": 6.104444716542182e-07, + "loss": 1.7238, + "step": 28480 + }, + { + "epoch": 1759.2592592592594, + "learning_rate": 6.10168972694992e-07, + "loss": 1.6749, + "step": 28500 + }, + { + "epoch": 1760.4938271604938, + "learning_rate": 6.098934737357659e-07, + "loss": 1.6481, + "step": 28520 + }, + { + "epoch": 1761.7283950617284, + "learning_rate": 6.096179747765397e-07, + "loss": 1.7056, + "step": 28540 + }, + { + "epoch": 1762.962962962963, + "learning_rate": 6.093424758173136e-07, + "loss": 1.7139, + "step": 28560 + }, + { + "epoch": 1764.1975308641975, + "learning_rate": 6.090669768580873e-07, + "loss": 1.6868, + "step": 28580 + }, + { + "epoch": 1765.432098765432, + "learning_rate": 6.087914778988613e-07, + "loss": 1.7287, + "step": 28600 + }, + { + "epoch": 1766.6666666666667, + "learning_rate": 6.085159789396351e-07, + "loss": 1.676, + "step": 28620 + }, + { + "epoch": 1767.9012345679012, + "learning_rate": 6.082404799804089e-07, + "loss": 1.7063, + "step": 28640 + }, + { + "epoch": 1769.1358024691358, + "learning_rate": 6.079649810211828e-07, + "loss": 1.7094, + "step": 28660 + }, + { + "epoch": 1770.3703703703704, + "learning_rate": 6.076894820619566e-07, + "loss": 1.7092, + "step": 28680 + }, + { + "epoch": 1771.6049382716049, + "learning_rate": 6.074139831027304e-07, + "loss": 1.7185, + "step": 28700 + }, + { + "epoch": 1772.8395061728395, + "learning_rate": 6.071384841435042e-07, + "loss": 1.666, + "step": 28720 + }, + { + "epoch": 1774.0740740740741, + "learning_rate": 6.068629851842781e-07, + "loss": 1.7145, + "step": 28740 + }, + { + "epoch": 1775.3086419753085, + "learning_rate": 6.06587486225052e-07, + "loss": 1.7243, + "step": 28760 + }, + { + "epoch": 1776.5432098765432, + "learning_rate": 6.063119872658258e-07, + "loss": 1.7053, + "step": 28780 + }, + { + "epoch": 1777.7777777777778, + "learning_rate": 6.060364883065997e-07, + "loss": 1.7349, + "step": 28800 + }, + { + "epoch": 1779.0123456790122, + "learning_rate": 6.057609893473735e-07, + "loss": 1.7217, + "step": 28820 + }, + { + "epoch": 1780.2469135802469, + "learning_rate": 6.054854903881474e-07, + "loss": 1.6761, + "step": 28840 + }, + { + "epoch": 1781.4814814814815, + "learning_rate": 6.052099914289213e-07, + "loss": 1.6951, + "step": 28860 + }, + { + "epoch": 1782.716049382716, + "learning_rate": 6.049344924696951e-07, + "loss": 1.7104, + "step": 28880 + }, + { + "epoch": 1783.9506172839506, + "learning_rate": 6.04658993510469e-07, + "loss": 1.6784, + "step": 28900 + }, + { + "epoch": 1785.1851851851852, + "learning_rate": 6.043834945512428e-07, + "loss": 1.6877, + "step": 28920 + }, + { + "epoch": 1786.4197530864199, + "learning_rate": 6.041079955920167e-07, + "loss": 1.6964, + "step": 28940 + }, + { + "epoch": 1787.6543209876543, + "learning_rate": 6.038324966327904e-07, + "loss": 1.6788, + "step": 28960 + }, + { + "epoch": 1788.888888888889, + "learning_rate": 6.035569976735643e-07, + "loss": 1.6828, + "step": 28980 + }, + { + "epoch": 1790.1234567901236, + "learning_rate": 6.032814987143382e-07, + "loss": 1.6915, + "step": 29000 + }, + { + "epoch": 1791.358024691358, + "learning_rate": 6.03005999755112e-07, + "loss": 1.6709, + "step": 29020 + }, + { + "epoch": 1792.5925925925926, + "learning_rate": 6.027305007958859e-07, + "loss": 1.7406, + "step": 29040 + }, + { + "epoch": 1793.8271604938273, + "learning_rate": 6.024550018366597e-07, + "loss": 1.6912, + "step": 29060 + }, + { + "epoch": 1795.0617283950617, + "learning_rate": 6.021795028774336e-07, + "loss": 1.6927, + "step": 29080 + }, + { + "epoch": 1796.2962962962963, + "learning_rate": 6.019040039182075e-07, + "loss": 1.725, + "step": 29100 + }, + { + "epoch": 1797.530864197531, + "learning_rate": 6.016285049589813e-07, + "loss": 1.6578, + "step": 29120 + }, + { + "epoch": 1798.7654320987654, + "learning_rate": 6.013530059997552e-07, + "loss": 1.7392, + "step": 29140 + }, + { + "epoch": 1800.0, + "learning_rate": 6.010775070405289e-07, + "loss": 1.6798, + "step": 29160 + }, + { + "epoch": 1801.2345679012346, + "learning_rate": 6.008020080813028e-07, + "loss": 1.6791, + "step": 29180 + }, + { + "epoch": 1802.469135802469, + "learning_rate": 6.005265091220766e-07, + "loss": 1.6692, + "step": 29200 + }, + { + "epoch": 1803.7037037037037, + "learning_rate": 6.002510101628504e-07, + "loss": 1.6823, + "step": 29220 + }, + { + "epoch": 1804.9382716049383, + "learning_rate": 5.999755112036243e-07, + "loss": 1.6822, + "step": 29240 + }, + { + "epoch": 1806.1728395061727, + "learning_rate": 5.997000122443981e-07, + "loss": 1.6831, + "step": 29260 + }, + { + "epoch": 1807.4074074074074, + "learning_rate": 5.99424513285172e-07, + "loss": 1.7249, + "step": 29280 + }, + { + "epoch": 1808.641975308642, + "learning_rate": 5.991490143259457e-07, + "loss": 1.6903, + "step": 29300 + }, + { + "epoch": 1809.8765432098764, + "learning_rate": 5.988735153667197e-07, + "loss": 1.6981, + "step": 29320 + }, + { + "epoch": 1811.111111111111, + "learning_rate": 5.985980164074936e-07, + "loss": 1.699, + "step": 29340 + }, + { + "epoch": 1812.3456790123457, + "learning_rate": 5.983225174482674e-07, + "loss": 1.6921, + "step": 29360 + }, + { + "epoch": 1813.5802469135801, + "learning_rate": 5.980470184890413e-07, + "loss": 1.69, + "step": 29380 + }, + { + "epoch": 1814.8148148148148, + "learning_rate": 5.977715195298151e-07, + "loss": 1.6803, + "step": 29400 + }, + { + "epoch": 1816.0493827160494, + "learning_rate": 5.974960205705889e-07, + "loss": 1.7028, + "step": 29420 + }, + { + "epoch": 1817.283950617284, + "learning_rate": 5.972205216113627e-07, + "loss": 1.6634, + "step": 29440 + }, + { + "epoch": 1818.5185185185185, + "learning_rate": 5.969450226521366e-07, + "loss": 1.6827, + "step": 29460 + }, + { + "epoch": 1819.7530864197531, + "learning_rate": 5.966695236929104e-07, + "loss": 1.6808, + "step": 29480 + }, + { + "epoch": 1820.9876543209878, + "learning_rate": 5.963940247336842e-07, + "loss": 1.6874, + "step": 29500 + }, + { + "epoch": 1822.2222222222222, + "learning_rate": 5.961185257744581e-07, + "loss": 1.7005, + "step": 29520 + }, + { + "epoch": 1823.4567901234568, + "learning_rate": 5.958430268152319e-07, + "loss": 1.7055, + "step": 29540 + }, + { + "epoch": 1824.6913580246915, + "learning_rate": 5.955675278560058e-07, + "loss": 1.6762, + "step": 29560 + }, + { + "epoch": 1825.9259259259259, + "learning_rate": 5.952920288967796e-07, + "loss": 1.6896, + "step": 29580 + }, + { + "epoch": 1827.1604938271605, + "learning_rate": 5.950165299375535e-07, + "loss": 1.6752, + "step": 29600 + }, + { + "epoch": 1828.3950617283951, + "learning_rate": 5.947410309783275e-07, + "loss": 1.712, + "step": 29620 + }, + { + "epoch": 1829.6296296296296, + "learning_rate": 5.944655320191013e-07, + "loss": 1.6502, + "step": 29640 + }, + { + "epoch": 1830.8641975308642, + "learning_rate": 5.941900330598752e-07, + "loss": 1.7099, + "step": 29660 + }, + { + "epoch": 1832.0987654320988, + "learning_rate": 5.93914534100649e-07, + "loss": 1.6656, + "step": 29680 + }, + { + "epoch": 1833.3333333333333, + "learning_rate": 5.936390351414228e-07, + "loss": 1.6611, + "step": 29700 + }, + { + "epoch": 1834.567901234568, + "learning_rate": 5.933635361821967e-07, + "loss": 1.7461, + "step": 29720 + }, + { + "epoch": 1835.8024691358025, + "learning_rate": 5.930880372229705e-07, + "loss": 1.7029, + "step": 29740 + }, + { + "epoch": 1837.037037037037, + "learning_rate": 5.928125382637443e-07, + "loss": 1.6585, + "step": 29760 + }, + { + "epoch": 1838.2716049382716, + "learning_rate": 5.925370393045181e-07, + "loss": 1.6802, + "step": 29780 + }, + { + "epoch": 1839.5061728395062, + "learning_rate": 5.92261540345292e-07, + "loss": 1.6939, + "step": 29800 + }, + { + "epoch": 1840.7407407407406, + "learning_rate": 5.919860413860658e-07, + "loss": 1.6721, + "step": 29820 + }, + { + "epoch": 1841.9753086419753, + "learning_rate": 5.917105424268397e-07, + "loss": 1.6698, + "step": 29840 + }, + { + "epoch": 1843.20987654321, + "learning_rate": 5.914350434676136e-07, + "loss": 1.7243, + "step": 29860 + }, + { + "epoch": 1844.4444444444443, + "learning_rate": 5.911595445083874e-07, + "loss": 1.6764, + "step": 29880 + }, + { + "epoch": 1845.679012345679, + "learning_rate": 5.908840455491613e-07, + "loss": 1.6299, + "step": 29900 + }, + { + "epoch": 1846.9135802469136, + "learning_rate": 5.90608546589935e-07, + "loss": 1.6761, + "step": 29920 + }, + { + "epoch": 1848.148148148148, + "learning_rate": 5.90333047630709e-07, + "loss": 1.689, + "step": 29940 + }, + { + "epoch": 1849.3827160493827, + "learning_rate": 5.900575486714828e-07, + "loss": 1.6907, + "step": 29960 + }, + { + "epoch": 1850.6172839506173, + "learning_rate": 5.897820497122566e-07, + "loss": 1.6604, + "step": 29980 + }, + { + "epoch": 1851.851851851852, + "learning_rate": 5.895065507530305e-07, + "loss": 1.6979, + "step": 30000 + }, + { + "epoch": 1853.0864197530864, + "learning_rate": 5.892310517938041e-07, + "loss": 1.6912, + "step": 30020 + }, + { + "epoch": 1854.320987654321, + "learning_rate": 5.889555528345781e-07, + "loss": 1.6986, + "step": 30040 + }, + { + "epoch": 1855.5555555555557, + "learning_rate": 5.886800538753519e-07, + "loss": 1.6842, + "step": 30060 + }, + { + "epoch": 1856.79012345679, + "learning_rate": 5.884045549161258e-07, + "loss": 1.653, + "step": 30080 + }, + { + "epoch": 1858.0246913580247, + "learning_rate": 5.881290559568997e-07, + "loss": 1.7042, + "step": 30100 + }, + { + "epoch": 1859.2592592592594, + "learning_rate": 5.878535569976735e-07, + "loss": 1.6651, + "step": 30120 + }, + { + "epoch": 1860.4938271604938, + "learning_rate": 5.875780580384474e-07, + "loss": 1.6653, + "step": 30140 + }, + { + "epoch": 1861.7283950617284, + "learning_rate": 5.873025590792212e-07, + "loss": 1.6434, + "step": 30160 + }, + { + "epoch": 1862.962962962963, + "learning_rate": 5.870270601199951e-07, + "loss": 1.6873, + "step": 30180 + }, + { + "epoch": 1864.1975308641975, + "learning_rate": 5.86751561160769e-07, + "loss": 1.725, + "step": 30200 + }, + { + "epoch": 1865.432098765432, + "learning_rate": 5.864760622015427e-07, + "loss": 1.6511, + "step": 30220 + }, + { + "epoch": 1866.6666666666667, + "learning_rate": 5.862005632423166e-07, + "loss": 1.6844, + "step": 30240 + }, + { + "epoch": 1867.9012345679012, + "learning_rate": 5.859250642830904e-07, + "loss": 1.6549, + "step": 30260 + }, + { + "epoch": 1869.1358024691358, + "learning_rate": 5.856495653238644e-07, + "loss": 1.6959, + "step": 30280 + }, + { + "epoch": 1870.3703703703704, + "learning_rate": 5.85374066364638e-07, + "loss": 1.6674, + "step": 30300 + }, + { + "epoch": 1871.6049382716049, + "learning_rate": 5.85098567405412e-07, + "loss": 1.6911, + "step": 30320 + }, + { + "epoch": 1872.8395061728395, + "learning_rate": 5.848230684461859e-07, + "loss": 1.6571, + "step": 30340 + }, + { + "epoch": 1874.0740740740741, + "learning_rate": 5.845475694869597e-07, + "loss": 1.7023, + "step": 30360 + }, + { + "epoch": 1875.3086419753085, + "learning_rate": 5.842720705277336e-07, + "loss": 1.6806, + "step": 30380 + }, + { + "epoch": 1876.5432098765432, + "learning_rate": 5.839965715685074e-07, + "loss": 1.6539, + "step": 30400 + }, + { + "epoch": 1877.7777777777778, + "learning_rate": 5.837210726092813e-07, + "loss": 1.7039, + "step": 30420 + }, + { + "epoch": 1879.0123456790122, + "learning_rate": 5.834455736500552e-07, + "loss": 1.6855, + "step": 30440 + }, + { + "epoch": 1880.2469135802469, + "learning_rate": 5.83170074690829e-07, + "loss": 1.6723, + "step": 30460 + }, + { + "epoch": 1881.4814814814815, + "learning_rate": 5.828945757316029e-07, + "loss": 1.6378, + "step": 30480 + }, + { + "epoch": 1882.716049382716, + "learning_rate": 5.826190767723766e-07, + "loss": 1.6862, + "step": 30500 + }, + { + "epoch": 1883.9506172839506, + "learning_rate": 5.823435778131505e-07, + "loss": 1.7127, + "step": 30520 + }, + { + "epoch": 1885.1851851851852, + "learning_rate": 5.820680788539243e-07, + "loss": 1.6837, + "step": 30540 + }, + { + "epoch": 1886.4197530864199, + "learning_rate": 5.817925798946981e-07, + "loss": 1.6474, + "step": 30560 + }, + { + "epoch": 1887.6543209876543, + "learning_rate": 5.81517080935472e-07, + "loss": 1.6561, + "step": 30580 + }, + { + "epoch": 1888.888888888889, + "learning_rate": 5.812415819762458e-07, + "loss": 1.6545, + "step": 30600 + }, + { + "epoch": 1890.1234567901236, + "learning_rate": 5.809660830170197e-07, + "loss": 1.6694, + "step": 30620 + }, + { + "epoch": 1891.358024691358, + "learning_rate": 5.806905840577934e-07, + "loss": 1.6651, + "step": 30640 + }, + { + "epoch": 1892.5925925925926, + "learning_rate": 5.804150850985674e-07, + "loss": 1.6752, + "step": 30660 + }, + { + "epoch": 1893.8271604938273, + "learning_rate": 5.801395861393412e-07, + "loss": 1.674, + "step": 30680 + }, + { + "epoch": 1895.0617283950617, + "learning_rate": 5.798640871801151e-07, + "loss": 1.6675, + "step": 30700 + }, + { + "epoch": 1896.2962962962963, + "learning_rate": 5.79588588220889e-07, + "loss": 1.698, + "step": 30720 + }, + { + "epoch": 1897.530864197531, + "learning_rate": 5.793130892616628e-07, + "loss": 1.6722, + "step": 30740 + }, + { + "epoch": 1898.7654320987654, + "learning_rate": 5.790375903024366e-07, + "loss": 1.6899, + "step": 30760 + }, + { + "epoch": 1900.0, + "learning_rate": 5.787620913432104e-07, + "loss": 1.6784, + "step": 30780 + }, + { + "epoch": 1901.2345679012346, + "learning_rate": 5.784865923839843e-07, + "loss": 1.6472, + "step": 30800 + }, + { + "epoch": 1902.469135802469, + "learning_rate": 5.782110934247582e-07, + "loss": 1.6882, + "step": 30820 + }, + { + "epoch": 1903.7037037037037, + "learning_rate": 5.779355944655319e-07, + "loss": 1.6812, + "step": 30840 + }, + { + "epoch": 1904.9382716049383, + "learning_rate": 5.776600955063058e-07, + "loss": 1.6791, + "step": 30860 + }, + { + "epoch": 1906.1728395061727, + "learning_rate": 5.773845965470796e-07, + "loss": 1.6917, + "step": 30880 + }, + { + "epoch": 1907.4074074074074, + "learning_rate": 5.771090975878535e-07, + "loss": 1.6563, + "step": 30900 + }, + { + "epoch": 1908.641975308642, + "learning_rate": 5.768335986286274e-07, + "loss": 1.6402, + "step": 30920 + }, + { + "epoch": 1909.8765432098764, + "learning_rate": 5.765580996694013e-07, + "loss": 1.6835, + "step": 30940 + }, + { + "epoch": 1911.111111111111, + "learning_rate": 5.762826007101752e-07, + "loss": 1.6657, + "step": 30960 + }, + { + "epoch": 1912.3456790123457, + "learning_rate": 5.76007101750949e-07, + "loss": 1.7137, + "step": 30980 + }, + { + "epoch": 1913.5802469135801, + "learning_rate": 5.757316027917229e-07, + "loss": 1.6696, + "step": 31000 + }, + { + "epoch": 1914.8148148148148, + "learning_rate": 5.754561038324967e-07, + "loss": 1.6753, + "step": 31020 + }, + { + "epoch": 1916.0493827160494, + "learning_rate": 5.751806048732704e-07, + "loss": 1.6665, + "step": 31040 + }, + { + "epoch": 1917.283950617284, + "learning_rate": 5.749051059140442e-07, + "loss": 1.6984, + "step": 31060 + }, + { + "epoch": 1918.5185185185185, + "learning_rate": 5.746296069548181e-07, + "loss": 1.6797, + "step": 31080 + }, + { + "epoch": 1919.7530864197531, + "learning_rate": 5.74354107995592e-07, + "loss": 1.6895, + "step": 31100 + }, + { + "epoch": 1920.9876543209878, + "learning_rate": 5.740786090363658e-07, + "loss": 1.6677, + "step": 31120 + }, + { + "epoch": 1922.2222222222222, + "learning_rate": 5.738031100771397e-07, + "loss": 1.6921, + "step": 31140 + }, + { + "epoch": 1923.4567901234568, + "learning_rate": 5.735276111179135e-07, + "loss": 1.6702, + "step": 31160 + }, + { + "epoch": 1924.6913580246915, + "learning_rate": 5.732521121586874e-07, + "loss": 1.6713, + "step": 31180 + }, + { + "epoch": 1925.9259259259259, + "learning_rate": 5.729766131994613e-07, + "loss": 1.6473, + "step": 31200 + }, + { + "epoch": 1927.1604938271605, + "learning_rate": 5.727011142402351e-07, + "loss": 1.6703, + "step": 31220 + }, + { + "epoch": 1928.3950617283951, + "learning_rate": 5.72425615281009e-07, + "loss": 1.6477, + "step": 31240 + }, + { + "epoch": 1929.6296296296296, + "learning_rate": 5.721501163217828e-07, + "loss": 1.6524, + "step": 31260 + }, + { + "epoch": 1930.8641975308642, + "learning_rate": 5.718746173625567e-07, + "loss": 1.6835, + "step": 31280 + }, + { + "epoch": 1932.0987654320988, + "learning_rate": 5.715991184033304e-07, + "loss": 1.7149, + "step": 31300 + }, + { + "epoch": 1933.3333333333333, + "learning_rate": 5.713236194441043e-07, + "loss": 1.6778, + "step": 31320 + }, + { + "epoch": 1934.567901234568, + "learning_rate": 5.710481204848782e-07, + "loss": 1.676, + "step": 31340 + }, + { + "epoch": 1935.8024691358025, + "learning_rate": 5.70772621525652e-07, + "loss": 1.6905, + "step": 31360 + }, + { + "epoch": 1937.037037037037, + "learning_rate": 5.704971225664258e-07, + "loss": 1.687, + "step": 31380 + }, + { + "epoch": 1938.2716049382716, + "learning_rate": 5.702216236071996e-07, + "loss": 1.6852, + "step": 31400 + }, + { + "epoch": 1939.5061728395062, + "learning_rate": 5.699461246479735e-07, + "loss": 1.6914, + "step": 31420 + }, + { + "epoch": 1940.7407407407406, + "learning_rate": 5.696706256887474e-07, + "loss": 1.7071, + "step": 31440 + }, + { + "epoch": 1941.9753086419753, + "learning_rate": 5.693951267295212e-07, + "loss": 1.6584, + "step": 31460 + }, + { + "epoch": 1943.20987654321, + "learning_rate": 5.691196277702951e-07, + "loss": 1.6533, + "step": 31480 + }, + { + "epoch": 1944.4444444444443, + "learning_rate": 5.688441288110689e-07, + "loss": 1.646, + "step": 31500 + }, + { + "epoch": 1945.679012345679, + "learning_rate": 5.685686298518428e-07, + "loss": 1.6853, + "step": 31520 + }, + { + "epoch": 1946.9135802469136, + "learning_rate": 5.682931308926166e-07, + "loss": 1.6666, + "step": 31540 + }, + { + "epoch": 1948.148148148148, + "learning_rate": 5.680176319333905e-07, + "loss": 1.6918, + "step": 31560 + }, + { + "epoch": 1949.3827160493827, + "learning_rate": 5.677421329741643e-07, + "loss": 1.6719, + "step": 31580 + }, + { + "epoch": 1950.6172839506173, + "learning_rate": 5.674666340149381e-07, + "loss": 1.6708, + "step": 31600 + }, + { + "epoch": 1951.851851851852, + "learning_rate": 5.671911350557119e-07, + "loss": 1.6474, + "step": 31620 + }, + { + "epoch": 1953.0864197530864, + "learning_rate": 5.669156360964857e-07, + "loss": 1.6686, + "step": 31640 + }, + { + "epoch": 1954.320987654321, + "learning_rate": 5.666401371372596e-07, + "loss": 1.6534, + "step": 31660 + }, + { + "epoch": 1955.5555555555557, + "learning_rate": 5.663646381780334e-07, + "loss": 1.6554, + "step": 31680 + }, + { + "epoch": 1956.79012345679, + "learning_rate": 5.660891392188073e-07, + "loss": 1.6135, + "step": 31700 + }, + { + "epoch": 1958.0246913580247, + "learning_rate": 5.658136402595812e-07, + "loss": 1.6707, + "step": 31720 + }, + { + "epoch": 1959.2592592592594, + "learning_rate": 5.65538141300355e-07, + "loss": 1.6569, + "step": 31740 + }, + { + "epoch": 1960.4938271604938, + "learning_rate": 5.652626423411289e-07, + "loss": 1.7037, + "step": 31760 + }, + { + "epoch": 1961.7283950617284, + "learning_rate": 5.649871433819027e-07, + "loss": 1.6324, + "step": 31780 + }, + { + "epoch": 1962.962962962963, + "learning_rate": 5.647116444226766e-07, + "loss": 1.666, + "step": 31800 + }, + { + "epoch": 1964.1975308641975, + "learning_rate": 5.644361454634505e-07, + "loss": 1.651, + "step": 31820 + }, + { + "epoch": 1965.432098765432, + "learning_rate": 5.641606465042243e-07, + "loss": 1.6548, + "step": 31840 + }, + { + "epoch": 1966.6666666666667, + "learning_rate": 5.638851475449982e-07, + "loss": 1.6696, + "step": 31860 + }, + { + "epoch": 1967.9012345679012, + "learning_rate": 5.636096485857721e-07, + "loss": 1.7076, + "step": 31880 + }, + { + "epoch": 1969.1358024691358, + "learning_rate": 5.633341496265459e-07, + "loss": 1.702, + "step": 31900 + }, + { + "epoch": 1970.3703703703704, + "learning_rate": 5.630586506673195e-07, + "loss": 1.6458, + "step": 31920 + }, + { + "epoch": 1971.6049382716049, + "learning_rate": 5.627831517080934e-07, + "loss": 1.6834, + "step": 31940 + }, + { + "epoch": 1972.8395061728395, + "learning_rate": 5.625076527488673e-07, + "loss": 1.6328, + "step": 31960 + }, + { + "epoch": 1974.0740740740741, + "learning_rate": 5.622321537896412e-07, + "loss": 1.6697, + "step": 31980 + }, + { + "epoch": 1975.3086419753085, + "learning_rate": 5.61956654830415e-07, + "loss": 1.6517, + "step": 32000 + }, + { + "epoch": 1976.5432098765432, + "learning_rate": 5.616811558711889e-07, + "loss": 1.6757, + "step": 32020 + }, + { + "epoch": 1977.7777777777778, + "learning_rate": 5.614056569119628e-07, + "loss": 1.6457, + "step": 32040 + }, + { + "epoch": 1979.0123456790122, + "learning_rate": 5.611301579527367e-07, + "loss": 1.6244, + "step": 32060 + }, + { + "epoch": 1980.2469135802469, + "learning_rate": 5.608546589935105e-07, + "loss": 1.6646, + "step": 32080 + }, + { + "epoch": 1981.4814814814815, + "learning_rate": 5.605791600342844e-07, + "loss": 1.6297, + "step": 32100 + }, + { + "epoch": 1982.716049382716, + "learning_rate": 5.603036610750581e-07, + "loss": 1.6971, + "step": 32120 + }, + { + "epoch": 1983.9506172839506, + "learning_rate": 5.60028162115832e-07, + "loss": 1.6461, + "step": 32140 + }, + { + "epoch": 1985.1851851851852, + "learning_rate": 5.597526631566058e-07, + "loss": 1.6862, + "step": 32160 + }, + { + "epoch": 1986.4197530864199, + "learning_rate": 5.594771641973796e-07, + "loss": 1.6764, + "step": 32180 + }, + { + "epoch": 1987.6543209876543, + "learning_rate": 5.592016652381535e-07, + "loss": 1.6503, + "step": 32200 + }, + { + "epoch": 1988.888888888889, + "learning_rate": 5.589261662789273e-07, + "loss": 1.6331, + "step": 32220 + }, + { + "epoch": 1990.1234567901236, + "learning_rate": 5.586506673197012e-07, + "loss": 1.6758, + "step": 32240 + }, + { + "epoch": 1991.358024691358, + "learning_rate": 5.58375168360475e-07, + "loss": 1.6388, + "step": 32260 + }, + { + "epoch": 1992.5925925925926, + "learning_rate": 5.580996694012489e-07, + "loss": 1.6492, + "step": 32280 + }, + { + "epoch": 1993.8271604938273, + "learning_rate": 5.578241704420228e-07, + "loss": 1.635, + "step": 32300 + }, + { + "epoch": 1995.0617283950617, + "learning_rate": 5.575486714827966e-07, + "loss": 1.65, + "step": 32320 + }, + { + "epoch": 1996.2962962962963, + "learning_rate": 5.572731725235705e-07, + "loss": 1.6663, + "step": 32340 + }, + { + "epoch": 1997.530864197531, + "learning_rate": 5.569976735643443e-07, + "loss": 1.6435, + "step": 32360 + }, + { + "epoch": 1998.7654320987654, + "learning_rate": 5.567221746051181e-07, + "loss": 1.6865, + "step": 32380 + }, + { + "epoch": 2000.0, + "learning_rate": 5.564466756458919e-07, + "loss": 1.6806, + "step": 32400 + }, + { + "epoch": 2001.2345679012346, + "learning_rate": 5.561711766866658e-07, + "loss": 1.664, + "step": 32420 + }, + { + "epoch": 2002.469135802469, + "learning_rate": 5.558956777274397e-07, + "loss": 1.583, + "step": 32440 + }, + { + "epoch": 2003.7037037037037, + "learning_rate": 5.556201787682135e-07, + "loss": 1.7005, + "step": 32460 + }, + { + "epoch": 2004.9382716049383, + "learning_rate": 5.553446798089874e-07, + "loss": 1.6248, + "step": 32480 + }, + { + "epoch": 2006.1728395061727, + "learning_rate": 5.550691808497612e-07, + "loss": 1.6806, + "step": 32500 + }, + { + "epoch": 2007.4074074074074, + "learning_rate": 5.547936818905351e-07, + "loss": 1.6522, + "step": 32520 + }, + { + "epoch": 2008.641975308642, + "learning_rate": 5.54518182931309e-07, + "loss": 1.6834, + "step": 32540 + }, + { + "epoch": 2009.8765432098764, + "learning_rate": 5.542426839720828e-07, + "loss": 1.6222, + "step": 32560 + }, + { + "epoch": 2011.111111111111, + "learning_rate": 5.539671850128567e-07, + "loss": 1.6515, + "step": 32580 + }, + { + "epoch": 2012.3456790123457, + "learning_rate": 5.536916860536305e-07, + "loss": 1.6526, + "step": 32600 + }, + { + "epoch": 2013.5802469135801, + "learning_rate": 5.534161870944044e-07, + "loss": 1.6346, + "step": 32620 + }, + { + "epoch": 2014.8148148148148, + "learning_rate": 5.531406881351782e-07, + "loss": 1.6423, + "step": 32640 + }, + { + "epoch": 2016.0493827160494, + "learning_rate": 5.528651891759519e-07, + "loss": 1.6667, + "step": 32660 + }, + { + "epoch": 2017.283950617284, + "learning_rate": 5.525896902167258e-07, + "loss": 1.6526, + "step": 32680 + }, + { + "epoch": 2018.5185185185185, + "learning_rate": 5.523141912574996e-07, + "loss": 1.6486, + "step": 32700 + }, + { + "epoch": 2019.7530864197531, + "learning_rate": 5.520386922982735e-07, + "loss": 1.6536, + "step": 32720 + }, + { + "epoch": 2020.9876543209878, + "learning_rate": 5.517631933390473e-07, + "loss": 1.6297, + "step": 32740 + }, + { + "epoch": 2022.2222222222222, + "learning_rate": 5.514876943798212e-07, + "loss": 1.648, + "step": 32760 + }, + { + "epoch": 2023.4567901234568, + "learning_rate": 5.51212195420595e-07, + "loss": 1.632, + "step": 32780 + }, + { + "epoch": 2024.6913580246915, + "learning_rate": 5.509366964613689e-07, + "loss": 1.6787, + "step": 32800 + }, + { + "epoch": 2025.9259259259259, + "learning_rate": 5.506611975021428e-07, + "loss": 1.6219, + "step": 32820 + }, + { + "epoch": 2027.1604938271605, + "learning_rate": 5.503856985429166e-07, + "loss": 1.6477, + "step": 32840 + }, + { + "epoch": 2028.3950617283951, + "learning_rate": 5.501101995836905e-07, + "loss": 1.6809, + "step": 32860 + }, + { + "epoch": 2029.6296296296296, + "learning_rate": 5.498347006244643e-07, + "loss": 1.6266, + "step": 32880 + }, + { + "epoch": 2030.8641975308642, + "learning_rate": 5.495592016652381e-07, + "loss": 1.6525, + "step": 32900 + }, + { + "epoch": 2032.0987654320988, + "learning_rate": 5.49283702706012e-07, + "loss": 1.6831, + "step": 32920 + }, + { + "epoch": 2033.3333333333333, + "learning_rate": 5.490082037467858e-07, + "loss": 1.6116, + "step": 32940 + }, + { + "epoch": 2034.567901234568, + "learning_rate": 5.487327047875596e-07, + "loss": 1.6331, + "step": 32960 + }, + { + "epoch": 2035.8024691358025, + "learning_rate": 5.484572058283335e-07, + "loss": 1.6585, + "step": 32980 + }, + { + "epoch": 2037.037037037037, + "learning_rate": 5.481817068691074e-07, + "loss": 1.6951, + "step": 33000 + }, + { + "epoch": 2038.2716049382716, + "learning_rate": 5.479062079098811e-07, + "loss": 1.7036, + "step": 33020 + }, + { + "epoch": 2039.5061728395062, + "learning_rate": 5.47630708950655e-07, + "loss": 1.6319, + "step": 33040 + }, + { + "epoch": 2040.7407407407406, + "learning_rate": 5.473552099914289e-07, + "loss": 1.6505, + "step": 33060 + }, + { + "epoch": 2041.9753086419753, + "learning_rate": 5.470797110322027e-07, + "loss": 1.6459, + "step": 33080 + }, + { + "epoch": 2043.20987654321, + "learning_rate": 5.468042120729766e-07, + "loss": 1.6577, + "step": 33100 + }, + { + "epoch": 2044.4444444444443, + "learning_rate": 5.465287131137504e-07, + "loss": 1.6493, + "step": 33120 + }, + { + "epoch": 2045.679012345679, + "learning_rate": 5.462532141545243e-07, + "loss": 1.6472, + "step": 33140 + }, + { + "epoch": 2046.9135802469136, + "learning_rate": 5.459777151952982e-07, + "loss": 1.6356, + "step": 33160 + }, + { + "epoch": 2048.1481481481483, + "learning_rate": 5.45702216236072e-07, + "loss": 1.647, + "step": 33180 + }, + { + "epoch": 2049.382716049383, + "learning_rate": 5.454267172768459e-07, + "loss": 1.6356, + "step": 33200 + }, + { + "epoch": 2050.617283950617, + "learning_rate": 5.451512183176196e-07, + "loss": 1.5994, + "step": 33220 + }, + { + "epoch": 2051.8518518518517, + "learning_rate": 5.448757193583936e-07, + "loss": 1.6527, + "step": 33240 + }, + { + "epoch": 2053.0864197530864, + "learning_rate": 5.446002203991674e-07, + "loss": 1.6733, + "step": 33260 + }, + { + "epoch": 2054.320987654321, + "learning_rate": 5.443247214399413e-07, + "loss": 1.679, + "step": 33280 + }, + { + "epoch": 2055.5555555555557, + "learning_rate": 5.44049222480715e-07, + "loss": 1.6314, + "step": 33300 + }, + { + "epoch": 2056.7901234567903, + "learning_rate": 5.437737235214889e-07, + "loss": 1.6589, + "step": 33320 + }, + { + "epoch": 2058.0246913580245, + "learning_rate": 5.434982245622627e-07, + "loss": 1.6226, + "step": 33340 + }, + { + "epoch": 2059.259259259259, + "learning_rate": 5.432227256030365e-07, + "loss": 1.6508, + "step": 33360 + }, + { + "epoch": 2060.4938271604938, + "learning_rate": 5.429472266438104e-07, + "loss": 1.6574, + "step": 33380 + }, + { + "epoch": 2061.7283950617284, + "learning_rate": 5.426717276845842e-07, + "loss": 1.6206, + "step": 33400 + }, + { + "epoch": 2062.962962962963, + "learning_rate": 5.423962287253581e-07, + "loss": 1.6472, + "step": 33420 + }, + { + "epoch": 2064.1975308641977, + "learning_rate": 5.42120729766132e-07, + "loss": 1.675, + "step": 33440 + }, + { + "epoch": 2065.432098765432, + "learning_rate": 5.418452308069058e-07, + "loss": 1.6319, + "step": 33460 + }, + { + "epoch": 2066.6666666666665, + "learning_rate": 5.415697318476797e-07, + "loss": 1.6193, + "step": 33480 + }, + { + "epoch": 2067.901234567901, + "learning_rate": 5.412942328884535e-07, + "loss": 1.6383, + "step": 33500 + }, + { + "epoch": 2069.135802469136, + "learning_rate": 5.410187339292274e-07, + "loss": 1.6552, + "step": 33520 + }, + { + "epoch": 2070.3703703703704, + "learning_rate": 5.407432349700013e-07, + "loss": 1.6484, + "step": 33540 + }, + { + "epoch": 2071.604938271605, + "learning_rate": 5.40467736010775e-07, + "loss": 1.6406, + "step": 33560 + }, + { + "epoch": 2072.8395061728397, + "learning_rate": 5.40192237051549e-07, + "loss": 1.6387, + "step": 33580 + }, + { + "epoch": 2074.074074074074, + "learning_rate": 5.399167380923227e-07, + "loss": 1.6443, + "step": 33600 + }, + { + "epoch": 2075.3086419753085, + "learning_rate": 5.396412391330966e-07, + "loss": 1.5999, + "step": 33620 + }, + { + "epoch": 2076.543209876543, + "learning_rate": 5.393657401738705e-07, + "loss": 1.6797, + "step": 33640 + }, + { + "epoch": 2077.777777777778, + "learning_rate": 5.390902412146443e-07, + "loss": 1.6671, + "step": 33660 + }, + { + "epoch": 2079.0123456790125, + "learning_rate": 5.388147422554182e-07, + "loss": 1.6938, + "step": 33680 + }, + { + "epoch": 2080.246913580247, + "learning_rate": 5.385392432961919e-07, + "loss": 1.6288, + "step": 33700 + }, + { + "epoch": 2081.4814814814813, + "learning_rate": 5.382637443369658e-07, + "loss": 1.6256, + "step": 33720 + }, + { + "epoch": 2082.716049382716, + "learning_rate": 5.379882453777396e-07, + "loss": 1.6537, + "step": 33740 + }, + { + "epoch": 2083.9506172839506, + "learning_rate": 5.377127464185135e-07, + "loss": 1.618, + "step": 33760 + }, + { + "epoch": 2085.185185185185, + "learning_rate": 5.374372474592874e-07, + "loss": 1.6029, + "step": 33780 + }, + { + "epoch": 2086.41975308642, + "learning_rate": 5.371617485000612e-07, + "loss": 1.6189, + "step": 33800 + }, + { + "epoch": 2087.6543209876545, + "learning_rate": 5.368862495408351e-07, + "loss": 1.6409, + "step": 33820 + }, + { + "epoch": 2088.8888888888887, + "learning_rate": 5.366107505816088e-07, + "loss": 1.6319, + "step": 33840 + }, + { + "epoch": 2090.1234567901233, + "learning_rate": 5.363352516223827e-07, + "loss": 1.6839, + "step": 33860 + }, + { + "epoch": 2091.358024691358, + "learning_rate": 5.360597526631567e-07, + "loss": 1.6088, + "step": 33880 + }, + { + "epoch": 2092.5925925925926, + "learning_rate": 5.357842537039305e-07, + "loss": 1.6999, + "step": 33900 + }, + { + "epoch": 2093.8271604938273, + "learning_rate": 5.355087547447044e-07, + "loss": 1.6296, + "step": 33920 + }, + { + "epoch": 2095.061728395062, + "learning_rate": 5.352332557854782e-07, + "loss": 1.6524, + "step": 33940 + }, + { + "epoch": 2096.296296296296, + "learning_rate": 5.349577568262519e-07, + "loss": 1.6768, + "step": 33960 + }, + { + "epoch": 2097.5308641975307, + "learning_rate": 5.346822578670257e-07, + "loss": 1.6304, + "step": 33980 + }, + { + "epoch": 2098.7654320987654, + "learning_rate": 5.344067589077996e-07, + "loss": 1.6687, + "step": 34000 + }, + { + "epoch": 2100.0, + "learning_rate": 5.341312599485735e-07, + "loss": 1.639, + "step": 34020 + }, + { + "epoch": 2101.2345679012346, + "learning_rate": 5.338557609893473e-07, + "loss": 1.6512, + "step": 34040 + }, + { + "epoch": 2102.4691358024693, + "learning_rate": 5.335802620301212e-07, + "loss": 1.6303, + "step": 34060 + }, + { + "epoch": 2103.703703703704, + "learning_rate": 5.33304763070895e-07, + "loss": 1.6472, + "step": 34080 + }, + { + "epoch": 2104.938271604938, + "learning_rate": 5.330292641116689e-07, + "loss": 1.6132, + "step": 34100 + }, + { + "epoch": 2106.1728395061727, + "learning_rate": 5.327537651524427e-07, + "loss": 1.6328, + "step": 34120 + }, + { + "epoch": 2107.4074074074074, + "learning_rate": 5.324782661932166e-07, + "loss": 1.6175, + "step": 34140 + }, + { + "epoch": 2108.641975308642, + "learning_rate": 5.322027672339905e-07, + "loss": 1.6078, + "step": 34160 + }, + { + "epoch": 2109.8765432098767, + "learning_rate": 5.319272682747643e-07, + "loss": 1.6795, + "step": 34180 + }, + { + "epoch": 2111.1111111111113, + "learning_rate": 5.316517693155383e-07, + "loss": 1.6347, + "step": 34200 + }, + { + "epoch": 2112.3456790123455, + "learning_rate": 5.31376270356312e-07, + "loss": 1.6583, + "step": 34220 + }, + { + "epoch": 2113.58024691358, + "learning_rate": 5.311007713970858e-07, + "loss": 1.649, + "step": 34240 + }, + { + "epoch": 2114.814814814815, + "learning_rate": 5.308252724378597e-07, + "loss": 1.6458, + "step": 34260 + }, + { + "epoch": 2116.0493827160494, + "learning_rate": 5.305497734786335e-07, + "loss": 1.6741, + "step": 34280 + }, + { + "epoch": 2117.283950617284, + "learning_rate": 5.302742745194074e-07, + "loss": 1.6458, + "step": 34300 + }, + { + "epoch": 2118.5185185185187, + "learning_rate": 5.299987755601812e-07, + "loss": 1.6725, + "step": 34320 + }, + { + "epoch": 2119.753086419753, + "learning_rate": 5.297232766009551e-07, + "loss": 1.6372, + "step": 34340 + }, + { + "epoch": 2120.9876543209875, + "learning_rate": 5.294477776417289e-07, + "loss": 1.5928, + "step": 34360 + }, + { + "epoch": 2122.222222222222, + "learning_rate": 5.291722786825027e-07, + "loss": 1.6509, + "step": 34380 + }, + { + "epoch": 2123.456790123457, + "learning_rate": 5.288967797232766e-07, + "loss": 1.688, + "step": 34400 + }, + { + "epoch": 2124.6913580246915, + "learning_rate": 5.286212807640504e-07, + "loss": 1.6257, + "step": 34420 + }, + { + "epoch": 2125.925925925926, + "learning_rate": 5.283457818048243e-07, + "loss": 1.6655, + "step": 34440 + }, + { + "epoch": 2127.1604938271603, + "learning_rate": 5.280702828455981e-07, + "loss": 1.6803, + "step": 34460 + }, + { + "epoch": 2128.395061728395, + "learning_rate": 5.27794783886372e-07, + "loss": 1.5928, + "step": 34480 + }, + { + "epoch": 2129.6296296296296, + "learning_rate": 5.275192849271457e-07, + "loss": 1.6843, + "step": 34500 + }, + { + "epoch": 2130.864197530864, + "learning_rate": 5.272437859679196e-07, + "loss": 1.6173, + "step": 34520 + }, + { + "epoch": 2132.098765432099, + "learning_rate": 5.269682870086935e-07, + "loss": 1.6635, + "step": 34540 + }, + { + "epoch": 2133.3333333333335, + "learning_rate": 5.266927880494673e-07, + "loss": 1.651, + "step": 34560 + }, + { + "epoch": 2134.567901234568, + "learning_rate": 5.264172890902412e-07, + "loss": 1.5994, + "step": 34580 + }, + { + "epoch": 2135.8024691358023, + "learning_rate": 5.26141790131015e-07, + "loss": 1.6398, + "step": 34600 + }, + { + "epoch": 2137.037037037037, + "learning_rate": 5.258662911717889e-07, + "loss": 1.638, + "step": 34620 + }, + { + "epoch": 2138.2716049382716, + "learning_rate": 5.255907922125627e-07, + "loss": 1.6332, + "step": 34640 + }, + { + "epoch": 2139.5061728395062, + "learning_rate": 5.253152932533364e-07, + "loss": 1.6314, + "step": 34660 + }, + { + "epoch": 2140.740740740741, + "learning_rate": 5.250397942941104e-07, + "loss": 1.6453, + "step": 34680 + }, + { + "epoch": 2141.9753086419755, + "learning_rate": 5.247642953348842e-07, + "loss": 1.6315, + "step": 34700 + }, + { + "epoch": 2143.2098765432097, + "learning_rate": 5.244887963756581e-07, + "loss": 1.6325, + "step": 34720 + }, + { + "epoch": 2144.4444444444443, + "learning_rate": 5.242132974164319e-07, + "loss": 1.6387, + "step": 34740 + }, + { + "epoch": 2145.679012345679, + "learning_rate": 5.239377984572058e-07, + "loss": 1.6771, + "step": 34760 + }, + { + "epoch": 2146.9135802469136, + "learning_rate": 5.236622994979797e-07, + "loss": 1.6287, + "step": 34780 + }, + { + "epoch": 2148.1481481481483, + "learning_rate": 5.233868005387535e-07, + "loss": 1.6298, + "step": 34800 + }, + { + "epoch": 2149.382716049383, + "learning_rate": 5.231113015795274e-07, + "loss": 1.6201, + "step": 34820 + }, + { + "epoch": 2150.617283950617, + "learning_rate": 5.228358026203012e-07, + "loss": 1.64, + "step": 34840 + }, + { + "epoch": 2151.8518518518517, + "learning_rate": 5.225603036610751e-07, + "loss": 1.6608, + "step": 34860 + }, + { + "epoch": 2153.0864197530864, + "learning_rate": 5.22284804701849e-07, + "loss": 1.6253, + "step": 34880 + }, + { + "epoch": 2154.320987654321, + "learning_rate": 5.220093057426228e-07, + "loss": 1.6644, + "step": 34900 + }, + { + "epoch": 2155.5555555555557, + "learning_rate": 5.217338067833965e-07, + "loss": 1.6186, + "step": 34920 + }, + { + "epoch": 2156.7901234567903, + "learning_rate": 5.214583078241703e-07, + "loss": 1.7043, + "step": 34940 + }, + { + "epoch": 2158.0246913580245, + "learning_rate": 5.211828088649442e-07, + "loss": 1.6333, + "step": 34960 + }, + { + "epoch": 2159.259259259259, + "learning_rate": 5.20907309905718e-07, + "loss": 1.6256, + "step": 34980 + }, + { + "epoch": 2160.4938271604938, + "learning_rate": 5.20631810946492e-07, + "loss": 1.5845, + "step": 35000 + }, + { + "epoch": 2161.7283950617284, + "learning_rate": 5.203563119872659e-07, + "loss": 1.681, + "step": 35020 + }, + { + "epoch": 2162.962962962963, + "learning_rate": 5.200808130280396e-07, + "loss": 1.6559, + "step": 35040 + }, + { + "epoch": 2164.1975308641977, + "learning_rate": 5.198053140688135e-07, + "loss": 1.6413, + "step": 35060 + }, + { + "epoch": 2165.432098765432, + "learning_rate": 5.195298151095873e-07, + "loss": 1.6549, + "step": 35080 + }, + { + "epoch": 2166.6666666666665, + "learning_rate": 5.192543161503612e-07, + "loss": 1.6315, + "step": 35100 + }, + { + "epoch": 2167.901234567901, + "learning_rate": 5.18978817191135e-07, + "loss": 1.5978, + "step": 35120 + }, + { + "epoch": 2169.135802469136, + "learning_rate": 5.187033182319089e-07, + "loss": 1.6748, + "step": 35140 + }, + { + "epoch": 2170.3703703703704, + "learning_rate": 5.184278192726828e-07, + "loss": 1.6301, + "step": 35160 + }, + { + "epoch": 2171.604938271605, + "learning_rate": 5.181523203134565e-07, + "loss": 1.6186, + "step": 35180 + }, + { + "epoch": 2172.8395061728397, + "learning_rate": 5.178768213542304e-07, + "loss": 1.6626, + "step": 35200 + }, + { + "epoch": 2174.074074074074, + "learning_rate": 5.176013223950042e-07, + "loss": 1.6176, + "step": 35220 + }, + { + "epoch": 2175.3086419753085, + "learning_rate": 5.173258234357781e-07, + "loss": 1.6184, + "step": 35240 + }, + { + "epoch": 2176.543209876543, + "learning_rate": 5.17050324476552e-07, + "loss": 1.669, + "step": 35260 + }, + { + "epoch": 2177.777777777778, + "learning_rate": 5.167748255173257e-07, + "loss": 1.6438, + "step": 35280 + }, + { + "epoch": 2179.0123456790125, + "learning_rate": 5.164993265580997e-07, + "loss": 1.6993, + "step": 35300 + }, + { + "epoch": 2180.246913580247, + "learning_rate": 5.162238275988734e-07, + "loss": 1.6705, + "step": 35320 + }, + { + "epoch": 2181.4814814814813, + "learning_rate": 5.159483286396473e-07, + "loss": 1.6452, + "step": 35340 + }, + { + "epoch": 2182.716049382716, + "learning_rate": 5.156728296804211e-07, + "loss": 1.6083, + "step": 35360 + }, + { + "epoch": 2183.9506172839506, + "learning_rate": 5.15397330721195e-07, + "loss": 1.6284, + "step": 35380 + }, + { + "epoch": 2185.185185185185, + "learning_rate": 5.151218317619689e-07, + "loss": 1.6395, + "step": 35400 + }, + { + "epoch": 2186.41975308642, + "learning_rate": 5.148463328027427e-07, + "loss": 1.6433, + "step": 35420 + }, + { + "epoch": 2187.6543209876545, + "learning_rate": 5.145708338435166e-07, + "loss": 1.6108, + "step": 35440 + }, + { + "epoch": 2188.8888888888887, + "learning_rate": 5.142953348842904e-07, + "loss": 1.6232, + "step": 35460 + }, + { + "epoch": 2190.1234567901233, + "learning_rate": 5.140198359250643e-07, + "loss": 1.6295, + "step": 35480 + }, + { + "epoch": 2191.358024691358, + "learning_rate": 5.137443369658382e-07, + "loss": 1.6395, + "step": 35500 + }, + { + "epoch": 2192.5925925925926, + "learning_rate": 5.13468838006612e-07, + "loss": 1.637, + "step": 35520 + }, + { + "epoch": 2193.8271604938273, + "learning_rate": 5.131933390473858e-07, + "loss": 1.6075, + "step": 35540 + }, + { + "epoch": 2195.061728395062, + "learning_rate": 5.129178400881597e-07, + "loss": 1.6256, + "step": 35560 + }, + { + "epoch": 2196.296296296296, + "learning_rate": 5.126423411289334e-07, + "loss": 1.6463, + "step": 35580 + }, + { + "epoch": 2197.5308641975307, + "learning_rate": 5.123668421697072e-07, + "loss": 1.6383, + "step": 35600 + }, + { + "epoch": 2198.7654320987654, + "learning_rate": 5.120913432104811e-07, + "loss": 1.6451, + "step": 35620 + }, + { + "epoch": 2200.0, + "learning_rate": 5.118158442512551e-07, + "loss": 1.6669, + "step": 35640 + }, + { + "epoch": 2201.2345679012346, + "learning_rate": 5.115403452920288e-07, + "loss": 1.6535, + "step": 35660 + }, + { + "epoch": 2202.4691358024693, + "learning_rate": 5.112648463328027e-07, + "loss": 1.623, + "step": 35680 + }, + { + "epoch": 2203.703703703704, + "learning_rate": 5.109893473735765e-07, + "loss": 1.6265, + "step": 35700 + }, + { + "epoch": 2204.938271604938, + "learning_rate": 5.107138484143504e-07, + "loss": 1.6418, + "step": 35720 + }, + { + "epoch": 2206.1728395061727, + "learning_rate": 5.104383494551243e-07, + "loss": 1.6721, + "step": 35740 + }, + { + "epoch": 2207.4074074074074, + "learning_rate": 5.101628504958981e-07, + "loss": 1.6456, + "step": 35760 + }, + { + "epoch": 2208.641975308642, + "learning_rate": 5.09887351536672e-07, + "loss": 1.6195, + "step": 35780 + }, + { + "epoch": 2209.8765432098767, + "learning_rate": 5.096118525774458e-07, + "loss": 1.6122, + "step": 35800 + }, + { + "epoch": 2211.1111111111113, + "learning_rate": 5.093363536182197e-07, + "loss": 1.6416, + "step": 35820 + }, + { + "epoch": 2212.3456790123455, + "learning_rate": 5.090608546589935e-07, + "loss": 1.6261, + "step": 35840 + }, + { + "epoch": 2213.58024691358, + "learning_rate": 5.087853556997673e-07, + "loss": 1.6247, + "step": 35860 + }, + { + "epoch": 2214.814814814815, + "learning_rate": 5.085098567405412e-07, + "loss": 1.6314, + "step": 35880 + }, + { + "epoch": 2216.0493827160494, + "learning_rate": 5.082343577813151e-07, + "loss": 1.6695, + "step": 35900 + }, + { + "epoch": 2217.283950617284, + "learning_rate": 5.079588588220889e-07, + "loss": 1.7002, + "step": 35920 + }, + { + "epoch": 2218.5185185185187, + "learning_rate": 5.076833598628627e-07, + "loss": 1.6342, + "step": 35940 + }, + { + "epoch": 2219.753086419753, + "learning_rate": 5.074078609036366e-07, + "loss": 1.6208, + "step": 35960 + }, + { + "epoch": 2220.9876543209875, + "learning_rate": 5.071323619444105e-07, + "loss": 1.648, + "step": 35980 + }, + { + "epoch": 2222.222222222222, + "learning_rate": 5.068568629851841e-07, + "loss": 1.6647, + "step": 36000 + }, + { + "epoch": 2223.456790123457, + "learning_rate": 5.065813640259581e-07, + "loss": 1.6099, + "step": 36020 + }, + { + "epoch": 2224.6913580246915, + "learning_rate": 5.063058650667319e-07, + "loss": 1.6258, + "step": 36040 + }, + { + "epoch": 2225.925925925926, + "learning_rate": 5.060303661075058e-07, + "loss": 1.6306, + "step": 36060 + }, + { + "epoch": 2227.1604938271603, + "learning_rate": 5.057548671482796e-07, + "loss": 1.6182, + "step": 36080 + }, + { + "epoch": 2228.395061728395, + "learning_rate": 5.054793681890535e-07, + "loss": 1.5924, + "step": 36100 + }, + { + "epoch": 2229.6296296296296, + "learning_rate": 5.052038692298274e-07, + "loss": 1.6585, + "step": 36120 + }, + { + "epoch": 2230.864197530864, + "learning_rate": 5.049283702706012e-07, + "loss": 1.6297, + "step": 36140 + }, + { + "epoch": 2232.098765432099, + "learning_rate": 5.046528713113751e-07, + "loss": 1.6579, + "step": 36160 + }, + { + "epoch": 2233.3333333333335, + "learning_rate": 5.043773723521489e-07, + "loss": 1.6233, + "step": 36180 + }, + { + "epoch": 2234.567901234568, + "learning_rate": 5.041018733929228e-07, + "loss": 1.6651, + "step": 36200 + }, + { + "epoch": 2235.8024691358023, + "learning_rate": 5.038263744336967e-07, + "loss": 1.6159, + "step": 36220 + }, + { + "epoch": 2237.037037037037, + "learning_rate": 5.035508754744705e-07, + "loss": 1.6339, + "step": 36240 + }, + { + "epoch": 2238.2716049382716, + "learning_rate": 5.032753765152442e-07, + "loss": 1.6332, + "step": 36260 + }, + { + "epoch": 2239.5061728395062, + "learning_rate": 5.02999877556018e-07, + "loss": 1.6244, + "step": 36280 + }, + { + "epoch": 2240.740740740741, + "learning_rate": 5.027243785967919e-07, + "loss": 1.6199, + "step": 36300 + }, + { + "epoch": 2241.9753086419755, + "learning_rate": 5.024488796375657e-07, + "loss": 1.5973, + "step": 36320 + }, + { + "epoch": 2243.2098765432097, + "learning_rate": 5.021733806783396e-07, + "loss": 1.641, + "step": 36340 + }, + { + "epoch": 2244.4444444444443, + "learning_rate": 5.018978817191135e-07, + "loss": 1.6018, + "step": 36360 + }, + { + "epoch": 2245.679012345679, + "learning_rate": 5.016223827598873e-07, + "loss": 1.6101, + "step": 36380 + }, + { + "epoch": 2246.9135802469136, + "learning_rate": 5.013468838006612e-07, + "loss": 1.6013, + "step": 36400 + }, + { + "epoch": 2248.1481481481483, + "learning_rate": 5.01071384841435e-07, + "loss": 1.6538, + "step": 36420 + }, + { + "epoch": 2249.382716049383, + "learning_rate": 5.007958858822089e-07, + "loss": 1.6441, + "step": 36440 + }, + { + "epoch": 2250.617283950617, + "learning_rate": 5.005203869229827e-07, + "loss": 1.5998, + "step": 36460 + }, + { + "epoch": 2251.8518518518517, + "learning_rate": 5.002448879637566e-07, + "loss": 1.6576, + "step": 36480 + }, + { + "epoch": 2253.0864197530864, + "learning_rate": 4.999693890045305e-07, + "loss": 1.6271, + "step": 36500 + }, + { + "epoch": 2254.320987654321, + "learning_rate": 4.996938900453043e-07, + "loss": 1.6348, + "step": 36520 + }, + { + "epoch": 2255.5555555555557, + "learning_rate": 4.994183910860781e-07, + "loss": 1.6658, + "step": 36540 + }, + { + "epoch": 2256.7901234567903, + "learning_rate": 4.991428921268519e-07, + "loss": 1.5881, + "step": 36560 + }, + { + "epoch": 2258.0246913580245, + "learning_rate": 4.988673931676258e-07, + "loss": 1.6299, + "step": 36580 + }, + { + "epoch": 2259.259259259259, + "learning_rate": 4.985918942083997e-07, + "loss": 1.6025, + "step": 36600 + }, + { + "epoch": 2260.4938271604938, + "learning_rate": 4.983163952491735e-07, + "loss": 1.6725, + "step": 36620 + }, + { + "epoch": 2261.7283950617284, + "learning_rate": 4.980408962899474e-07, + "loss": 1.6149, + "step": 36640 + }, + { + "epoch": 2262.962962962963, + "learning_rate": 4.977653973307211e-07, + "loss": 1.6358, + "step": 36660 + }, + { + "epoch": 2264.1975308641977, + "learning_rate": 4.97489898371495e-07, + "loss": 1.6036, + "step": 36680 + }, + { + "epoch": 2265.432098765432, + "learning_rate": 4.972143994122688e-07, + "loss": 1.6184, + "step": 36700 + }, + { + "epoch": 2266.6666666666665, + "learning_rate": 4.969389004530427e-07, + "loss": 1.6165, + "step": 36720 + }, + { + "epoch": 2267.901234567901, + "learning_rate": 4.966634014938166e-07, + "loss": 1.6563, + "step": 36740 + }, + { + "epoch": 2269.135802469136, + "learning_rate": 4.963879025345904e-07, + "loss": 1.6184, + "step": 36760 + }, + { + "epoch": 2270.3703703703704, + "learning_rate": 4.961124035753643e-07, + "loss": 1.6589, + "step": 36780 + }, + { + "epoch": 2271.604938271605, + "learning_rate": 4.95836904616138e-07, + "loss": 1.6132, + "step": 36800 + }, + { + "epoch": 2272.8395061728397, + "learning_rate": 4.955614056569119e-07, + "loss": 1.6007, + "step": 36820 + }, + { + "epoch": 2274.074074074074, + "learning_rate": 4.952859066976857e-07, + "loss": 1.6238, + "step": 36840 + }, + { + "epoch": 2275.3086419753085, + "learning_rate": 4.950104077384597e-07, + "loss": 1.6, + "step": 36860 + }, + { + "epoch": 2276.543209876543, + "learning_rate": 4.947349087792336e-07, + "loss": 1.6028, + "step": 36880 + }, + { + "epoch": 2277.777777777778, + "learning_rate": 4.944594098200074e-07, + "loss": 1.6577, + "step": 36900 + }, + { + "epoch": 2279.0123456790125, + "learning_rate": 4.941839108607813e-07, + "loss": 1.6336, + "step": 36920 + }, + { + "epoch": 2280.246913580247, + "learning_rate": 4.939084119015549e-07, + "loss": 1.6477, + "step": 36940 + }, + { + "epoch": 2281.4814814814813, + "learning_rate": 4.936329129423288e-07, + "loss": 1.6654, + "step": 36960 + }, + { + "epoch": 2282.716049382716, + "learning_rate": 4.933574139831027e-07, + "loss": 1.5985, + "step": 36980 + }, + { + "epoch": 2283.9506172839506, + "learning_rate": 4.930819150238765e-07, + "loss": 1.6147, + "step": 37000 + }, + { + "epoch": 2285.185185185185, + "learning_rate": 4.928064160646504e-07, + "loss": 1.6644, + "step": 37020 + }, + { + "epoch": 2286.41975308642, + "learning_rate": 4.925309171054242e-07, + "loss": 1.6265, + "step": 37040 + }, + { + "epoch": 2287.6543209876545, + "learning_rate": 4.922554181461981e-07, + "loss": 1.6096, + "step": 37060 + }, + { + "epoch": 2288.8888888888887, + "learning_rate": 4.91979919186972e-07, + "loss": 1.6549, + "step": 37080 + }, + { + "epoch": 2290.1234567901233, + "learning_rate": 4.917044202277458e-07, + "loss": 1.6385, + "step": 37100 + }, + { + "epoch": 2291.358024691358, + "learning_rate": 4.914289212685197e-07, + "loss": 1.6274, + "step": 37120 + }, + { + "epoch": 2292.5925925925926, + "learning_rate": 4.911534223092935e-07, + "loss": 1.6508, + "step": 37140 + }, + { + "epoch": 2293.8271604938273, + "learning_rate": 4.908779233500674e-07, + "loss": 1.6107, + "step": 37160 + }, + { + "epoch": 2295.061728395062, + "learning_rate": 4.906024243908412e-07, + "loss": 1.6367, + "step": 37180 + }, + { + "epoch": 2296.296296296296, + "learning_rate": 4.903269254316149e-07, + "loss": 1.6522, + "step": 37200 + }, + { + "epoch": 2297.5308641975307, + "learning_rate": 4.900514264723889e-07, + "loss": 1.5979, + "step": 37220 + }, + { + "epoch": 2298.7654320987654, + "learning_rate": 4.897759275131627e-07, + "loss": 1.684, + "step": 37240 + }, + { + "epoch": 2300.0, + "learning_rate": 4.895004285539366e-07, + "loss": 1.615, + "step": 37260 + }, + { + "epoch": 2301.2345679012346, + "learning_rate": 4.892249295947104e-07, + "loss": 1.6249, + "step": 37280 + }, + { + "epoch": 2302.4691358024693, + "learning_rate": 4.889494306354843e-07, + "loss": 1.6209, + "step": 37300 + }, + { + "epoch": 2303.703703703704, + "learning_rate": 4.886739316762581e-07, + "loss": 1.6521, + "step": 37320 + }, + { + "epoch": 2304.938271604938, + "learning_rate": 4.883984327170319e-07, + "loss": 1.6456, + "step": 37340 + }, + { + "epoch": 2306.1728395061727, + "learning_rate": 4.881229337578058e-07, + "loss": 1.64, + "step": 37360 + }, + { + "epoch": 2307.4074074074074, + "learning_rate": 4.878474347985796e-07, + "loss": 1.6339, + "step": 37380 + }, + { + "epoch": 2308.641975308642, + "learning_rate": 4.875719358393535e-07, + "loss": 1.6251, + "step": 37400 + }, + { + "epoch": 2309.8765432098767, + "learning_rate": 4.872964368801273e-07, + "loss": 1.6307, + "step": 37420 + }, + { + "epoch": 2311.1111111111113, + "learning_rate": 4.870209379209012e-07, + "loss": 1.6564, + "step": 37440 + }, + { + "epoch": 2312.3456790123455, + "learning_rate": 4.867454389616751e-07, + "loss": 1.6458, + "step": 37460 + }, + { + "epoch": 2313.58024691358, + "learning_rate": 4.864699400024488e-07, + "loss": 1.5916, + "step": 37480 + }, + { + "epoch": 2314.814814814815, + "learning_rate": 4.861944410432227e-07, + "loss": 1.5901, + "step": 37500 + }, + { + "epoch": 2316.0493827160494, + "learning_rate": 4.859189420839965e-07, + "loss": 1.6709, + "step": 37520 + }, + { + "epoch": 2317.283950617284, + "learning_rate": 4.856434431247704e-07, + "loss": 1.6112, + "step": 37540 + }, + { + "epoch": 2318.5185185185187, + "learning_rate": 4.853679441655442e-07, + "loss": 1.6341, + "step": 37560 + }, + { + "epoch": 2319.753086419753, + "learning_rate": 4.850924452063181e-07, + "loss": 1.6095, + "step": 37580 + }, + { + "epoch": 2320.9876543209875, + "learning_rate": 4.848169462470919e-07, + "loss": 1.6618, + "step": 37600 + }, + { + "epoch": 2322.222222222222, + "learning_rate": 4.845414472878657e-07, + "loss": 1.6223, + "step": 37620 + }, + { + "epoch": 2323.456790123457, + "learning_rate": 4.842659483286396e-07, + "loss": 1.6658, + "step": 37640 + }, + { + "epoch": 2324.6913580246915, + "learning_rate": 4.839904493694134e-07, + "loss": 1.5882, + "step": 37660 + }, + { + "epoch": 2325.925925925926, + "learning_rate": 4.837149504101873e-07, + "loss": 1.586, + "step": 37680 + }, + { + "epoch": 2327.1604938271603, + "learning_rate": 4.834394514509611e-07, + "loss": 1.6513, + "step": 37700 + }, + { + "epoch": 2328.395061728395, + "learning_rate": 4.83163952491735e-07, + "loss": 1.602, + "step": 37720 + }, + { + "epoch": 2329.6296296296296, + "learning_rate": 4.828884535325089e-07, + "loss": 1.6642, + "step": 37740 + }, + { + "epoch": 2330.864197530864, + "learning_rate": 4.826129545732827e-07, + "loss": 1.6289, + "step": 37760 + }, + { + "epoch": 2332.098765432099, + "learning_rate": 4.823374556140566e-07, + "loss": 1.6072, + "step": 37780 + }, + { + "epoch": 2333.3333333333335, + "learning_rate": 4.820619566548304e-07, + "loss": 1.6095, + "step": 37800 + }, + { + "epoch": 2334.567901234568, + "learning_rate": 4.817864576956043e-07, + "loss": 1.643, + "step": 37820 + }, + { + "epoch": 2335.8024691358023, + "learning_rate": 4.815109587363782e-07, + "loss": 1.617, + "step": 37840 + }, + { + "epoch": 2337.037037037037, + "learning_rate": 4.81235459777152e-07, + "loss": 1.6112, + "step": 37860 + }, + { + "epoch": 2338.2716049382716, + "learning_rate": 4.809599608179257e-07, + "loss": 1.6165, + "step": 37880 + }, + { + "epoch": 2339.5061728395062, + "learning_rate": 4.806844618586995e-07, + "loss": 1.633, + "step": 37900 + }, + { + "epoch": 2340.740740740741, + "learning_rate": 4.804089628994734e-07, + "loss": 1.6311, + "step": 37920 + }, + { + "epoch": 2341.9753086419755, + "learning_rate": 4.801334639402472e-07, + "loss": 1.5876, + "step": 37940 + }, + { + "epoch": 2343.2098765432097, + "learning_rate": 4.798579649810211e-07, + "loss": 1.6237, + "step": 37960 + }, + { + "epoch": 2344.4444444444443, + "learning_rate": 4.795824660217951e-07, + "loss": 1.6244, + "step": 37980 + }, + { + "epoch": 2345.679012345679, + "learning_rate": 4.793069670625689e-07, + "loss": 1.586, + "step": 38000 + }, + { + "epoch": 2346.9135802469136, + "learning_rate": 4.790314681033427e-07, + "loss": 1.6345, + "step": 38020 + }, + { + "epoch": 2348.1481481481483, + "learning_rate": 4.787559691441165e-07, + "loss": 1.6393, + "step": 38040 + }, + { + "epoch": 2349.382716049383, + "learning_rate": 4.784804701848904e-07, + "loss": 1.6173, + "step": 38060 + }, + { + "epoch": 2350.617283950617, + "learning_rate": 4.782049712256643e-07, + "loss": 1.6243, + "step": 38080 + }, + { + "epoch": 2351.8518518518517, + "learning_rate": 4.779294722664381e-07, + "loss": 1.6464, + "step": 38100 + }, + { + "epoch": 2353.0864197530864, + "learning_rate": 4.77653973307212e-07, + "loss": 1.6287, + "step": 38120 + }, + { + "epoch": 2354.320987654321, + "learning_rate": 4.773784743479857e-07, + "loss": 1.6247, + "step": 38140 + }, + { + "epoch": 2355.5555555555557, + "learning_rate": 4.771029753887596e-07, + "loss": 1.6204, + "step": 38160 + }, + { + "epoch": 2356.7901234567903, + "learning_rate": 4.768274764295335e-07, + "loss": 1.6291, + "step": 38180 + }, + { + "epoch": 2358.0246913580245, + "learning_rate": 4.765519774703073e-07, + "loss": 1.6355, + "step": 38200 + }, + { + "epoch": 2359.259259259259, + "learning_rate": 4.7627647851108116e-07, + "loss": 1.6379, + "step": 38220 + }, + { + "epoch": 2360.4938271604938, + "learning_rate": 4.76000979551855e-07, + "loss": 1.6773, + "step": 38240 + }, + { + "epoch": 2361.7283950617284, + "learning_rate": 4.757254805926289e-07, + "loss": 1.5918, + "step": 38260 + }, + { + "epoch": 2362.962962962963, + "learning_rate": 4.7544998163340263e-07, + "loss": 1.6183, + "step": 38280 + }, + { + "epoch": 2364.1975308641977, + "learning_rate": 4.751744826741765e-07, + "loss": 1.5934, + "step": 38300 + }, + { + "epoch": 2365.432098765432, + "learning_rate": 4.7489898371495035e-07, + "loss": 1.5774, + "step": 38320 + }, + { + "epoch": 2366.6666666666665, + "learning_rate": 4.7462348475572426e-07, + "loss": 1.6208, + "step": 38340 + }, + { + "epoch": 2367.901234567901, + "learning_rate": 4.743479857964981e-07, + "loss": 1.6088, + "step": 38360 + }, + { + "epoch": 2369.135802469136, + "learning_rate": 4.74072486837272e-07, + "loss": 1.6248, + "step": 38380 + }, + { + "epoch": 2370.3703703703704, + "learning_rate": 4.7379698787804584e-07, + "loss": 1.5924, + "step": 38400 + }, + { + "epoch": 2371.604938271605, + "learning_rate": 4.735214889188196e-07, + "loss": 1.6087, + "step": 38420 + }, + { + "epoch": 2372.8395061728397, + "learning_rate": 4.7324598995959345e-07, + "loss": 1.5936, + "step": 38440 + }, + { + "epoch": 2374.074074074074, + "learning_rate": 4.729704910003673e-07, + "loss": 1.6532, + "step": 38460 + }, + { + "epoch": 2375.3086419753085, + "learning_rate": 4.7269499204114117e-07, + "loss": 1.6262, + "step": 38480 + }, + { + "epoch": 2376.543209876543, + "learning_rate": 4.72419493081915e-07, + "loss": 1.6101, + "step": 38500 + }, + { + "epoch": 2377.777777777778, + "learning_rate": 4.721439941226889e-07, + "loss": 1.634, + "step": 38520 + }, + { + "epoch": 2379.0123456790125, + "learning_rate": 4.7186849516346274e-07, + "loss": 1.6157, + "step": 38540 + }, + { + "epoch": 2380.246913580247, + "learning_rate": 4.715929962042365e-07, + "loss": 1.6171, + "step": 38560 + }, + { + "epoch": 2381.4814814814813, + "learning_rate": 4.7131749724501036e-07, + "loss": 1.6804, + "step": 38580 + }, + { + "epoch": 2382.716049382716, + "learning_rate": 4.710419982857842e-07, + "loss": 1.596, + "step": 38600 + }, + { + "epoch": 2383.9506172839506, + "learning_rate": 4.7076649932655807e-07, + "loss": 1.6372, + "step": 38620 + }, + { + "epoch": 2385.185185185185, + "learning_rate": 4.7049100036733193e-07, + "loss": 1.6268, + "step": 38640 + }, + { + "epoch": 2386.41975308642, + "learning_rate": 4.702155014081058e-07, + "loss": 1.6041, + "step": 38660 + }, + { + "epoch": 2387.6543209876545, + "learning_rate": 4.6994000244887954e-07, + "loss": 1.6394, + "step": 38680 + }, + { + "epoch": 2388.8888888888887, + "learning_rate": 4.6966450348965346e-07, + "loss": 1.6243, + "step": 38700 + }, + { + "epoch": 2390.1234567901233, + "learning_rate": 4.693890045304273e-07, + "loss": 1.6462, + "step": 38720 + }, + { + "epoch": 2391.358024691358, + "learning_rate": 4.6911350557120117e-07, + "loss": 1.608, + "step": 38740 + }, + { + "epoch": 2392.5925925925926, + "learning_rate": 4.6883800661197503e-07, + "loss": 1.6236, + "step": 38760 + }, + { + "epoch": 2393.8271604938273, + "learning_rate": 4.685625076527489e-07, + "loss": 1.639, + "step": 38780 + }, + { + "epoch": 2395.061728395062, + "learning_rate": 4.682870086935227e-07, + "loss": 1.6154, + "step": 38800 + }, + { + "epoch": 2396.296296296296, + "learning_rate": 4.680115097342965e-07, + "loss": 1.6044, + "step": 38820 + }, + { + "epoch": 2397.5308641975307, + "learning_rate": 4.6773601077507036e-07, + "loss": 1.6302, + "step": 38840 + }, + { + "epoch": 2398.7654320987654, + "learning_rate": 4.674605118158442e-07, + "loss": 1.6219, + "step": 38860 + }, + { + "epoch": 2400.0, + "learning_rate": 4.671850128566181e-07, + "loss": 1.6178, + "step": 38880 + }, + { + "epoch": 2401.2345679012346, + "learning_rate": 4.6690951389739194e-07, + "loss": 1.6197, + "step": 38900 + }, + { + "epoch": 2402.4691358024693, + "learning_rate": 4.666340149381658e-07, + "loss": 1.6042, + "step": 38920 + }, + { + "epoch": 2403.703703703704, + "learning_rate": 4.6635851597893966e-07, + "loss": 1.591, + "step": 38940 + }, + { + "epoch": 2404.938271604938, + "learning_rate": 4.660830170197134e-07, + "loss": 1.6311, + "step": 38960 + }, + { + "epoch": 2406.1728395061727, + "learning_rate": 4.6580751806048727e-07, + "loss": 1.644, + "step": 38980 + }, + { + "epoch": 2407.4074074074074, + "learning_rate": 4.6553201910126113e-07, + "loss": 1.6399, + "step": 39000 + }, + { + "epoch": 2408.641975308642, + "learning_rate": 4.65256520142035e-07, + "loss": 1.6386, + "step": 39020 + }, + { + "epoch": 2409.8765432098767, + "learning_rate": 4.6498102118280884e-07, + "loss": 1.6388, + "step": 39040 + }, + { + "epoch": 2411.1111111111113, + "learning_rate": 4.647055222235827e-07, + "loss": 1.6258, + "step": 39060 + }, + { + "epoch": 2412.3456790123455, + "learning_rate": 4.6443002326435656e-07, + "loss": 1.6054, + "step": 39080 + }, + { + "epoch": 2413.58024691358, + "learning_rate": 4.6415452430513037e-07, + "loss": 1.666, + "step": 39100 + }, + { + "epoch": 2414.814814814815, + "learning_rate": 4.6387902534590423e-07, + "loss": 1.6082, + "step": 39120 + }, + { + "epoch": 2416.0493827160494, + "learning_rate": 4.636035263866781e-07, + "loss": 1.6282, + "step": 39140 + }, + { + "epoch": 2417.283950617284, + "learning_rate": 4.633280274274519e-07, + "loss": 1.6102, + "step": 39160 + }, + { + "epoch": 2418.5185185185187, + "learning_rate": 4.630525284682258e-07, + "loss": 1.6249, + "step": 39180 + }, + { + "epoch": 2419.753086419753, + "learning_rate": 4.6277702950899966e-07, + "loss": 1.6388, + "step": 39200 + }, + { + "epoch": 2420.9876543209875, + "learning_rate": 4.625015305497734e-07, + "loss": 1.6135, + "step": 39220 + }, + { + "epoch": 2422.222222222222, + "learning_rate": 4.622260315905473e-07, + "loss": 1.6742, + "step": 39240 + }, + { + "epoch": 2423.456790123457, + "learning_rate": 4.6195053263132113e-07, + "loss": 1.6415, + "step": 39260 + }, + { + "epoch": 2424.6913580246915, + "learning_rate": 4.61675033672095e-07, + "loss": 1.6385, + "step": 39280 + }, + { + "epoch": 2425.925925925926, + "learning_rate": 4.6139953471286885e-07, + "loss": 1.6201, + "step": 39300 + }, + { + "epoch": 2427.1604938271603, + "learning_rate": 4.611240357536427e-07, + "loss": 1.6216, + "step": 39320 + }, + { + "epoch": 2428.395061728395, + "learning_rate": 4.6084853679441657e-07, + "loss": 1.5967, + "step": 39340 + }, + { + "epoch": 2429.6296296296296, + "learning_rate": 4.605730378351903e-07, + "loss": 1.6069, + "step": 39360 + }, + { + "epoch": 2430.864197530864, + "learning_rate": 4.602975388759642e-07, + "loss": 1.6185, + "step": 39380 + }, + { + "epoch": 2432.098765432099, + "learning_rate": 4.6002203991673804e-07, + "loss": 1.6183, + "step": 39400 + }, + { + "epoch": 2433.3333333333335, + "learning_rate": 4.597465409575119e-07, + "loss": 1.613, + "step": 39420 + }, + { + "epoch": 2434.567901234568, + "learning_rate": 4.5947104199828576e-07, + "loss": 1.6076, + "step": 39440 + }, + { + "epoch": 2435.8024691358023, + "learning_rate": 4.591955430390596e-07, + "loss": 1.6154, + "step": 39460 + }, + { + "epoch": 2437.037037037037, + "learning_rate": 4.5892004407983353e-07, + "loss": 1.6337, + "step": 39480 + }, + { + "epoch": 2438.2716049382716, + "learning_rate": 4.586445451206073e-07, + "loss": 1.6416, + "step": 39500 + }, + { + "epoch": 2439.5061728395062, + "learning_rate": 4.583690461613811e-07, + "loss": 1.615, + "step": 39520 + }, + { + "epoch": 2440.740740740741, + "learning_rate": 4.58093547202155e-07, + "loss": 1.6198, + "step": 39540 + }, + { + "epoch": 2441.9753086419755, + "learning_rate": 4.5781804824292886e-07, + "loss": 1.6119, + "step": 39560 + }, + { + "epoch": 2443.2098765432097, + "learning_rate": 4.575425492837027e-07, + "loss": 1.6009, + "step": 39580 + }, + { + "epoch": 2444.4444444444443, + "learning_rate": 4.572670503244766e-07, + "loss": 1.5824, + "step": 39600 + }, + { + "epoch": 2445.679012345679, + "learning_rate": 4.5699155136525033e-07, + "loss": 1.6549, + "step": 39620 + }, + { + "epoch": 2446.9135802469136, + "learning_rate": 4.567160524060242e-07, + "loss": 1.636, + "step": 39640 + }, + { + "epoch": 2448.1481481481483, + "learning_rate": 4.5644055344679804e-07, + "loss": 1.6012, + "step": 39660 + }, + { + "epoch": 2449.382716049383, + "learning_rate": 4.561650544875719e-07, + "loss": 1.6254, + "step": 39680 + }, + { + "epoch": 2450.617283950617, + "learning_rate": 4.5588955552834576e-07, + "loss": 1.5667, + "step": 39700 + }, + { + "epoch": 2451.8518518518517, + "learning_rate": 4.556140565691196e-07, + "loss": 1.6614, + "step": 39720 + }, + { + "epoch": 2453.0864197530864, + "learning_rate": 4.553385576098935e-07, + "loss": 1.6136, + "step": 39740 + }, + { + "epoch": 2454.320987654321, + "learning_rate": 4.5506305865066723e-07, + "loss": 1.6299, + "step": 39760 + }, + { + "epoch": 2455.5555555555557, + "learning_rate": 4.547875596914411e-07, + "loss": 1.5855, + "step": 39780 + }, + { + "epoch": 2456.7901234567903, + "learning_rate": 4.5451206073221495e-07, + "loss": 1.6241, + "step": 39800 + }, + { + "epoch": 2458.0246913580245, + "learning_rate": 4.542365617729888e-07, + "loss": 1.6204, + "step": 39820 + }, + { + "epoch": 2459.259259259259, + "learning_rate": 4.539610628137627e-07, + "loss": 1.6197, + "step": 39840 + }, + { + "epoch": 2460.4938271604938, + "learning_rate": 4.536855638545366e-07, + "loss": 1.5769, + "step": 39860 + }, + { + "epoch": 2461.7283950617284, + "learning_rate": 4.5341006489531044e-07, + "loss": 1.6498, + "step": 39880 + }, + { + "epoch": 2462.962962962963, + "learning_rate": 4.531345659360842e-07, + "loss": 1.59, + "step": 39900 + }, + { + "epoch": 2464.1975308641977, + "learning_rate": 4.5285906697685805e-07, + "loss": 1.6261, + "step": 39920 + }, + { + "epoch": 2465.432098765432, + "learning_rate": 4.525835680176319e-07, + "loss": 1.6421, + "step": 39940 + }, + { + "epoch": 2466.6666666666665, + "learning_rate": 4.5230806905840577e-07, + "loss": 1.5835, + "step": 39960 + }, + { + "epoch": 2467.901234567901, + "learning_rate": 4.5203257009917963e-07, + "loss": 1.5877, + "step": 39980 + }, + { + "epoch": 2469.135802469136, + "learning_rate": 4.517570711399535e-07, + "loss": 1.6351, + "step": 40000 + } + ], + "logging_steps": 20, + "max_steps": 65536, + "num_input_tokens_seen": 0, + "num_train_epochs": 4096, + "save_steps": 10000, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 1.1693476259787571e+17, + "train_batch_size": 5, + "trial_name": null, + "trial_params": null +}