abdiharyadi's picture
Training in progress, step 10000, checkpoint
1e89a28 verified
raw
history blame
67.6 kB
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 617.283950617284,
"eval_steps": 500,
"global_step": 10000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.06172839506172839,
"learning_rate": 5e-09,
"loss": 8.604,
"step": 1
},
{
"epoch": 1.2345679012345678,
"learning_rate": 1e-07,
"loss": 8.0873,
"step": 20
},
{
"epoch": 2.4691358024691357,
"learning_rate": 2e-07,
"loss": 7.8074,
"step": 40
},
{
"epoch": 3.7037037037037037,
"learning_rate": 3e-07,
"loss": 7.0763,
"step": 60
},
{
"epoch": 4.938271604938271,
"learning_rate": 4e-07,
"loss": 6.2969,
"step": 80
},
{
"epoch": 6.172839506172839,
"learning_rate": 5e-07,
"loss": 5.9334,
"step": 100
},
{
"epoch": 7.407407407407407,
"learning_rate": 6e-07,
"loss": 5.4919,
"step": 120
},
{
"epoch": 8.641975308641975,
"learning_rate": 7e-07,
"loss": 5.0659,
"step": 140
},
{
"epoch": 9.876543209876543,
"learning_rate": 8e-07,
"loss": 4.9089,
"step": 160
},
{
"epoch": 11.11111111111111,
"learning_rate": 9e-07,
"loss": 4.7144,
"step": 180
},
{
"epoch": 12.345679012345679,
"learning_rate": 1e-06,
"loss": 4.7373,
"step": 200
},
{
"epoch": 13.580246913580247,
"learning_rate": 9.997245010407738e-07,
"loss": 4.3848,
"step": 220
},
{
"epoch": 14.814814814814815,
"learning_rate": 9.994490020815477e-07,
"loss": 4.3457,
"step": 240
},
{
"epoch": 16.049382716049383,
"learning_rate": 9.991735031223215e-07,
"loss": 4.2217,
"step": 260
},
{
"epoch": 17.28395061728395,
"learning_rate": 9.988980041630952e-07,
"loss": 4.1273,
"step": 280
},
{
"epoch": 18.51851851851852,
"learning_rate": 9.986225052038692e-07,
"loss": 4.0123,
"step": 300
},
{
"epoch": 19.753086419753085,
"learning_rate": 9.98347006244643e-07,
"loss": 4.0784,
"step": 320
},
{
"epoch": 20.987654320987655,
"learning_rate": 9.98071507285417e-07,
"loss": 3.9415,
"step": 340
},
{
"epoch": 22.22222222222222,
"learning_rate": 9.977960083261906e-07,
"loss": 3.843,
"step": 360
},
{
"epoch": 23.45679012345679,
"learning_rate": 9.975205093669647e-07,
"loss": 3.8174,
"step": 380
},
{
"epoch": 24.691358024691358,
"learning_rate": 9.972450104077383e-07,
"loss": 3.7376,
"step": 400
},
{
"epoch": 25.925925925925927,
"learning_rate": 9.969695114485122e-07,
"loss": 3.7876,
"step": 420
},
{
"epoch": 27.160493827160494,
"learning_rate": 9.96694012489286e-07,
"loss": 3.7585,
"step": 440
},
{
"epoch": 28.395061728395063,
"learning_rate": 9.9641851353006e-07,
"loss": 3.6304,
"step": 460
},
{
"epoch": 29.62962962962963,
"learning_rate": 9.961430145708338e-07,
"loss": 3.6751,
"step": 480
},
{
"epoch": 30.864197530864196,
"learning_rate": 9.958675156116076e-07,
"loss": 3.677,
"step": 500
},
{
"epoch": 32.098765432098766,
"learning_rate": 9.955920166523815e-07,
"loss": 3.6556,
"step": 520
},
{
"epoch": 33.333333333333336,
"learning_rate": 9.953165176931551e-07,
"loss": 3.5897,
"step": 540
},
{
"epoch": 34.5679012345679,
"learning_rate": 9.950410187339292e-07,
"loss": 3.7304,
"step": 560
},
{
"epoch": 35.80246913580247,
"learning_rate": 9.94765519774703e-07,
"loss": 3.568,
"step": 580
},
{
"epoch": 37.03703703703704,
"learning_rate": 9.94490020815477e-07,
"loss": 3.6207,
"step": 600
},
{
"epoch": 38.27160493827161,
"learning_rate": 9.942145218562508e-07,
"loss": 3.4759,
"step": 620
},
{
"epoch": 39.50617283950617,
"learning_rate": 9.939390228970246e-07,
"loss": 3.4513,
"step": 640
},
{
"epoch": 40.74074074074074,
"learning_rate": 9.936635239377985e-07,
"loss": 3.372,
"step": 660
},
{
"epoch": 41.97530864197531,
"learning_rate": 9.933880249785724e-07,
"loss": 3.2601,
"step": 680
},
{
"epoch": 43.20987654320987,
"learning_rate": 9.93112526019346e-07,
"loss": 3.3071,
"step": 700
},
{
"epoch": 44.44444444444444,
"learning_rate": 9.9283702706012e-07,
"loss": 3.2768,
"step": 720
},
{
"epoch": 45.67901234567901,
"learning_rate": 9.925615281008937e-07,
"loss": 3.3103,
"step": 740
},
{
"epoch": 46.91358024691358,
"learning_rate": 9.922860291416678e-07,
"loss": 3.2807,
"step": 760
},
{
"epoch": 48.148148148148145,
"learning_rate": 9.920105301824414e-07,
"loss": 3.1839,
"step": 780
},
{
"epoch": 49.382716049382715,
"learning_rate": 9.917350312232155e-07,
"loss": 3.1689,
"step": 800
},
{
"epoch": 50.617283950617285,
"learning_rate": 9.914595322639892e-07,
"loss": 3.17,
"step": 820
},
{
"epoch": 51.851851851851855,
"learning_rate": 9.91184033304763e-07,
"loss": 3.1598,
"step": 840
},
{
"epoch": 53.08641975308642,
"learning_rate": 9.909085343455369e-07,
"loss": 3.1215,
"step": 860
},
{
"epoch": 54.32098765432099,
"learning_rate": 9.906330353863107e-07,
"loss": 3.102,
"step": 880
},
{
"epoch": 55.55555555555556,
"learning_rate": 9.903575364270846e-07,
"loss": 3.0819,
"step": 900
},
{
"epoch": 56.79012345679013,
"learning_rate": 9.900820374678584e-07,
"loss": 3.0729,
"step": 920
},
{
"epoch": 58.02469135802469,
"learning_rate": 9.898065385086323e-07,
"loss": 3.0639,
"step": 940
},
{
"epoch": 59.25925925925926,
"learning_rate": 9.89531039549406e-07,
"loss": 3.0172,
"step": 960
},
{
"epoch": 60.49382716049383,
"learning_rate": 9.8925554059018e-07,
"loss": 3.0463,
"step": 980
},
{
"epoch": 61.72839506172839,
"learning_rate": 9.889800416309537e-07,
"loss": 2.9424,
"step": 1000
},
{
"epoch": 62.96296296296296,
"learning_rate": 9.887045426717277e-07,
"loss": 3.0018,
"step": 1020
},
{
"epoch": 64.19753086419753,
"learning_rate": 9.884290437125014e-07,
"loss": 3.0268,
"step": 1040
},
{
"epoch": 65.4320987654321,
"learning_rate": 9.881535447532755e-07,
"loss": 3.0157,
"step": 1060
},
{
"epoch": 66.66666666666667,
"learning_rate": 9.878780457940491e-07,
"loss": 2.9853,
"step": 1080
},
{
"epoch": 67.90123456790124,
"learning_rate": 9.87602546834823e-07,
"loss": 2.8881,
"step": 1100
},
{
"epoch": 69.1358024691358,
"learning_rate": 9.873270478755968e-07,
"loss": 2.965,
"step": 1120
},
{
"epoch": 70.37037037037037,
"learning_rate": 9.870515489163707e-07,
"loss": 2.9127,
"step": 1140
},
{
"epoch": 71.60493827160494,
"learning_rate": 9.867760499571445e-07,
"loss": 2.8881,
"step": 1160
},
{
"epoch": 72.8395061728395,
"learning_rate": 9.865005509979184e-07,
"loss": 2.8964,
"step": 1180
},
{
"epoch": 74.07407407407408,
"learning_rate": 9.862250520386923e-07,
"loss": 2.932,
"step": 1200
},
{
"epoch": 75.30864197530865,
"learning_rate": 9.85949553079466e-07,
"loss": 2.9062,
"step": 1220
},
{
"epoch": 76.54320987654322,
"learning_rate": 9.8567405412024e-07,
"loss": 2.9141,
"step": 1240
},
{
"epoch": 77.77777777777777,
"learning_rate": 9.853985551610136e-07,
"loss": 2.8072,
"step": 1260
},
{
"epoch": 79.01234567901234,
"learning_rate": 9.851230562017877e-07,
"loss": 2.8403,
"step": 1280
},
{
"epoch": 80.24691358024691,
"learning_rate": 9.848475572425613e-07,
"loss": 2.8484,
"step": 1300
},
{
"epoch": 81.48148148148148,
"learning_rate": 9.845720582833354e-07,
"loss": 2.8565,
"step": 1320
},
{
"epoch": 82.71604938271605,
"learning_rate": 9.842965593241093e-07,
"loss": 2.8241,
"step": 1340
},
{
"epoch": 83.95061728395062,
"learning_rate": 9.840210603648831e-07,
"loss": 2.8383,
"step": 1360
},
{
"epoch": 85.18518518518519,
"learning_rate": 9.837455614056568e-07,
"loss": 2.8188,
"step": 1380
},
{
"epoch": 86.41975308641975,
"learning_rate": 9.834700624464309e-07,
"loss": 2.8681,
"step": 1400
},
{
"epoch": 87.65432098765432,
"learning_rate": 9.831945634872045e-07,
"loss": 2.7453,
"step": 1420
},
{
"epoch": 88.88888888888889,
"learning_rate": 9.829190645279786e-07,
"loss": 2.7488,
"step": 1440
},
{
"epoch": 90.12345679012346,
"learning_rate": 9.826435655687522e-07,
"loss": 2.7859,
"step": 1460
},
{
"epoch": 91.35802469135803,
"learning_rate": 9.82368066609526e-07,
"loss": 2.8079,
"step": 1480
},
{
"epoch": 92.5925925925926,
"learning_rate": 9.820925676503e-07,
"loss": 2.717,
"step": 1500
},
{
"epoch": 93.82716049382717,
"learning_rate": 9.818170686910738e-07,
"loss": 2.7502,
"step": 1520
},
{
"epoch": 95.06172839506173,
"learning_rate": 9.815415697318477e-07,
"loss": 2.7395,
"step": 1540
},
{
"epoch": 96.29629629629629,
"learning_rate": 9.812660707726215e-07,
"loss": 2.7172,
"step": 1560
},
{
"epoch": 97.53086419753086,
"learning_rate": 9.809905718133954e-07,
"loss": 2.6894,
"step": 1580
},
{
"epoch": 98.76543209876543,
"learning_rate": 9.807150728541692e-07,
"loss": 2.7527,
"step": 1600
},
{
"epoch": 100.0,
"learning_rate": 9.80439573894943e-07,
"loss": 2.7461,
"step": 1620
},
{
"epoch": 101.23456790123457,
"learning_rate": 9.801640749357167e-07,
"loss": 2.782,
"step": 1640
},
{
"epoch": 102.46913580246914,
"learning_rate": 9.798885759764908e-07,
"loss": 2.7753,
"step": 1660
},
{
"epoch": 103.70370370370371,
"learning_rate": 9.796130770172645e-07,
"loss": 2.7902,
"step": 1680
},
{
"epoch": 104.93827160493827,
"learning_rate": 9.793375780580385e-07,
"loss": 2.7305,
"step": 1700
},
{
"epoch": 106.17283950617283,
"learning_rate": 9.790620790988122e-07,
"loss": 2.7476,
"step": 1720
},
{
"epoch": 107.4074074074074,
"learning_rate": 9.787865801395862e-07,
"loss": 2.715,
"step": 1740
},
{
"epoch": 108.64197530864197,
"learning_rate": 9.785110811803599e-07,
"loss": 2.6883,
"step": 1760
},
{
"epoch": 109.87654320987654,
"learning_rate": 9.782355822211337e-07,
"loss": 2.6921,
"step": 1780
},
{
"epoch": 111.11111111111111,
"learning_rate": 9.779600832619076e-07,
"loss": 2.7132,
"step": 1800
},
{
"epoch": 112.34567901234568,
"learning_rate": 9.776845843026815e-07,
"loss": 2.6779,
"step": 1820
},
{
"epoch": 113.58024691358025,
"learning_rate": 9.774090853434553e-07,
"loss": 2.6119,
"step": 1840
},
{
"epoch": 114.81481481481481,
"learning_rate": 9.771335863842292e-07,
"loss": 2.6716,
"step": 1860
},
{
"epoch": 116.04938271604938,
"learning_rate": 9.76858087425003e-07,
"loss": 2.6666,
"step": 1880
},
{
"epoch": 117.28395061728395,
"learning_rate": 9.765825884657767e-07,
"loss": 2.6476,
"step": 1900
},
{
"epoch": 118.51851851851852,
"learning_rate": 9.763070895065508e-07,
"loss": 2.6393,
"step": 1920
},
{
"epoch": 119.75308641975309,
"learning_rate": 9.760315905473244e-07,
"loss": 2.627,
"step": 1940
},
{
"epoch": 120.98765432098766,
"learning_rate": 9.757560915880985e-07,
"loss": 2.6067,
"step": 1960
},
{
"epoch": 122.22222222222223,
"learning_rate": 9.754805926288721e-07,
"loss": 2.6015,
"step": 1980
},
{
"epoch": 123.45679012345678,
"learning_rate": 9.752050936696462e-07,
"loss": 2.6428,
"step": 2000
},
{
"epoch": 124.69135802469135,
"learning_rate": 9.749295947104198e-07,
"loss": 2.6251,
"step": 2020
},
{
"epoch": 125.92592592592592,
"learning_rate": 9.746540957511937e-07,
"loss": 2.655,
"step": 2040
},
{
"epoch": 127.1604938271605,
"learning_rate": 9.743785967919676e-07,
"loss": 2.5351,
"step": 2060
},
{
"epoch": 128.39506172839506,
"learning_rate": 9.741030978327416e-07,
"loss": 2.582,
"step": 2080
},
{
"epoch": 129.62962962962962,
"learning_rate": 9.738275988735153e-07,
"loss": 2.5874,
"step": 2100
},
{
"epoch": 130.8641975308642,
"learning_rate": 9.735520999142893e-07,
"loss": 2.5603,
"step": 2120
},
{
"epoch": 132.09876543209876,
"learning_rate": 9.73276600955063e-07,
"loss": 2.5398,
"step": 2140
},
{
"epoch": 133.33333333333334,
"learning_rate": 9.730011019958369e-07,
"loss": 2.5265,
"step": 2160
},
{
"epoch": 134.5679012345679,
"learning_rate": 9.727256030366107e-07,
"loss": 2.6271,
"step": 2180
},
{
"epoch": 135.80246913580248,
"learning_rate": 9.724501040773846e-07,
"loss": 2.5626,
"step": 2200
},
{
"epoch": 137.03703703703704,
"learning_rate": 9.721746051181584e-07,
"loss": 2.5425,
"step": 2220
},
{
"epoch": 138.2716049382716,
"learning_rate": 9.718991061589323e-07,
"loss": 2.5251,
"step": 2240
},
{
"epoch": 139.50617283950618,
"learning_rate": 9.716236071997061e-07,
"loss": 2.5817,
"step": 2260
},
{
"epoch": 140.74074074074073,
"learning_rate": 9.7134810824048e-07,
"loss": 2.4888,
"step": 2280
},
{
"epoch": 141.97530864197532,
"learning_rate": 9.710726092812539e-07,
"loss": 2.5426,
"step": 2300
},
{
"epoch": 143.20987654320987,
"learning_rate": 9.707971103220275e-07,
"loss": 2.5293,
"step": 2320
},
{
"epoch": 144.44444444444446,
"learning_rate": 9.705216113628016e-07,
"loss": 2.4874,
"step": 2340
},
{
"epoch": 145.679012345679,
"learning_rate": 9.702461124035752e-07,
"loss": 2.5487,
"step": 2360
},
{
"epoch": 146.91358024691357,
"learning_rate": 9.699706134443493e-07,
"loss": 2.5153,
"step": 2380
},
{
"epoch": 148.14814814814815,
"learning_rate": 9.69695114485123e-07,
"loss": 2.4468,
"step": 2400
},
{
"epoch": 149.3827160493827,
"learning_rate": 9.69419615525897e-07,
"loss": 2.4731,
"step": 2420
},
{
"epoch": 150.6172839506173,
"learning_rate": 9.691441165666707e-07,
"loss": 2.5007,
"step": 2440
},
{
"epoch": 151.85185185185185,
"learning_rate": 9.688686176074445e-07,
"loss": 2.4948,
"step": 2460
},
{
"epoch": 153.08641975308643,
"learning_rate": 9.685931186482184e-07,
"loss": 2.4867,
"step": 2480
},
{
"epoch": 154.320987654321,
"learning_rate": 9.683176196889922e-07,
"loss": 2.4654,
"step": 2500
},
{
"epoch": 155.55555555555554,
"learning_rate": 9.68042120729766e-07,
"loss": 2.4621,
"step": 2520
},
{
"epoch": 156.79012345679013,
"learning_rate": 9.6776662177054e-07,
"loss": 2.4738,
"step": 2540
},
{
"epoch": 158.02469135802468,
"learning_rate": 9.674911228113138e-07,
"loss": 2.4562,
"step": 2560
},
{
"epoch": 159.25925925925927,
"learning_rate": 9.672156238520875e-07,
"loss": 2.4937,
"step": 2580
},
{
"epoch": 160.49382716049382,
"learning_rate": 9.669401248928615e-07,
"loss": 2.4574,
"step": 2600
},
{
"epoch": 161.7283950617284,
"learning_rate": 9.666646259336352e-07,
"loss": 2.4642,
"step": 2620
},
{
"epoch": 162.96296296296296,
"learning_rate": 9.663891269744093e-07,
"loss": 2.4736,
"step": 2640
},
{
"epoch": 164.19753086419752,
"learning_rate": 9.66113628015183e-07,
"loss": 2.4926,
"step": 2660
},
{
"epoch": 165.4320987654321,
"learning_rate": 9.65838129055957e-07,
"loss": 2.4323,
"step": 2680
},
{
"epoch": 166.66666666666666,
"learning_rate": 9.655626300967306e-07,
"loss": 2.4508,
"step": 2700
},
{
"epoch": 167.90123456790124,
"learning_rate": 9.652871311375045e-07,
"loss": 2.4295,
"step": 2720
},
{
"epoch": 169.1358024691358,
"learning_rate": 9.650116321782783e-07,
"loss": 2.4753,
"step": 2740
},
{
"epoch": 170.37037037037038,
"learning_rate": 9.647361332190522e-07,
"loss": 2.4323,
"step": 2760
},
{
"epoch": 171.60493827160494,
"learning_rate": 9.64460634259826e-07,
"loss": 2.392,
"step": 2780
},
{
"epoch": 172.8395061728395,
"learning_rate": 9.641851353006e-07,
"loss": 2.4579,
"step": 2800
},
{
"epoch": 174.07407407407408,
"learning_rate": 9.639096363413738e-07,
"loss": 2.4091,
"step": 2820
},
{
"epoch": 175.30864197530863,
"learning_rate": 9.636341373821476e-07,
"loss": 2.3802,
"step": 2840
},
{
"epoch": 176.54320987654322,
"learning_rate": 9.633586384229215e-07,
"loss": 2.3483,
"step": 2860
},
{
"epoch": 177.77777777777777,
"learning_rate": 9.630831394636953e-07,
"loss": 2.374,
"step": 2880
},
{
"epoch": 179.01234567901236,
"learning_rate": 9.628076405044692e-07,
"loss": 2.414,
"step": 2900
},
{
"epoch": 180.2469135802469,
"learning_rate": 9.62532141545243e-07,
"loss": 2.3971,
"step": 2920
},
{
"epoch": 181.4814814814815,
"learning_rate": 9.62256642586017e-07,
"loss": 2.3508,
"step": 2940
},
{
"epoch": 182.71604938271605,
"learning_rate": 9.619811436267908e-07,
"loss": 2.4072,
"step": 2960
},
{
"epoch": 183.9506172839506,
"learning_rate": 9.617056446675646e-07,
"loss": 2.3853,
"step": 2980
},
{
"epoch": 185.1851851851852,
"learning_rate": 9.614301457083383e-07,
"loss": 2.4256,
"step": 3000
},
{
"epoch": 186.41975308641975,
"learning_rate": 9.611546467491124e-07,
"loss": 2.3795,
"step": 3020
},
{
"epoch": 187.65432098765433,
"learning_rate": 9.60879147789886e-07,
"loss": 2.3763,
"step": 3040
},
{
"epoch": 188.88888888888889,
"learning_rate": 9.6060364883066e-07,
"loss": 2.3474,
"step": 3060
},
{
"epoch": 190.12345679012347,
"learning_rate": 9.603281498714337e-07,
"loss": 2.4074,
"step": 3080
},
{
"epoch": 191.35802469135803,
"learning_rate": 9.600526509122076e-07,
"loss": 2.3438,
"step": 3100
},
{
"epoch": 192.59259259259258,
"learning_rate": 9.597771519529814e-07,
"loss": 2.3209,
"step": 3120
},
{
"epoch": 193.82716049382717,
"learning_rate": 9.595016529937553e-07,
"loss": 2.3364,
"step": 3140
},
{
"epoch": 195.06172839506172,
"learning_rate": 9.592261540345292e-07,
"loss": 2.3624,
"step": 3160
},
{
"epoch": 196.2962962962963,
"learning_rate": 9.58950655075303e-07,
"loss": 2.3004,
"step": 3180
},
{
"epoch": 197.53086419753086,
"learning_rate": 9.586751561160769e-07,
"loss": 2.2904,
"step": 3200
},
{
"epoch": 198.76543209876544,
"learning_rate": 9.583996571568507e-07,
"loss": 2.2971,
"step": 3220
},
{
"epoch": 200.0,
"learning_rate": 9.581241581976246e-07,
"loss": 2.3364,
"step": 3240
},
{
"epoch": 201.23456790123456,
"learning_rate": 9.578486592383982e-07,
"loss": 2.3191,
"step": 3260
},
{
"epoch": 202.46913580246914,
"learning_rate": 9.575731602791723e-07,
"loss": 2.3663,
"step": 3280
},
{
"epoch": 203.7037037037037,
"learning_rate": 9.57297661319946e-07,
"loss": 2.3649,
"step": 3300
},
{
"epoch": 204.93827160493828,
"learning_rate": 9.5702216236072e-07,
"loss": 2.2762,
"step": 3320
},
{
"epoch": 206.17283950617283,
"learning_rate": 9.567466634014937e-07,
"loss": 2.315,
"step": 3340
},
{
"epoch": 207.40740740740742,
"learning_rate": 9.564711644422677e-07,
"loss": 2.2924,
"step": 3360
},
{
"epoch": 208.64197530864197,
"learning_rate": 9.561956654830414e-07,
"loss": 2.2383,
"step": 3380
},
{
"epoch": 209.87654320987653,
"learning_rate": 9.559201665238153e-07,
"loss": 2.2903,
"step": 3400
},
{
"epoch": 211.11111111111111,
"learning_rate": 9.556446675645891e-07,
"loss": 2.3423,
"step": 3420
},
{
"epoch": 212.34567901234567,
"learning_rate": 9.55369168605363e-07,
"loss": 2.2747,
"step": 3440
},
{
"epoch": 213.58024691358025,
"learning_rate": 9.550936696461368e-07,
"loss": 2.2997,
"step": 3460
},
{
"epoch": 214.8148148148148,
"learning_rate": 9.548181706869107e-07,
"loss": 2.2166,
"step": 3480
},
{
"epoch": 216.0493827160494,
"learning_rate": 9.545426717276845e-07,
"loss": 2.349,
"step": 3500
},
{
"epoch": 217.28395061728395,
"learning_rate": 9.542671727684582e-07,
"loss": 2.2522,
"step": 3520
},
{
"epoch": 218.5185185185185,
"learning_rate": 9.539916738092323e-07,
"loss": 2.2854,
"step": 3540
},
{
"epoch": 219.7530864197531,
"learning_rate": 9.537161748500061e-07,
"loss": 2.2806,
"step": 3560
},
{
"epoch": 220.98765432098764,
"learning_rate": 9.5344067589078e-07,
"loss": 2.233,
"step": 3580
},
{
"epoch": 222.22222222222223,
"learning_rate": 9.531651769315538e-07,
"loss": 2.2588,
"step": 3600
},
{
"epoch": 223.45679012345678,
"learning_rate": 9.528896779723277e-07,
"loss": 2.2729,
"step": 3620
},
{
"epoch": 224.69135802469137,
"learning_rate": 9.526141790131015e-07,
"loss": 2.2818,
"step": 3640
},
{
"epoch": 225.92592592592592,
"learning_rate": 9.523386800538753e-07,
"loss": 2.2291,
"step": 3660
},
{
"epoch": 227.1604938271605,
"learning_rate": 9.520631810946491e-07,
"loss": 2.2552,
"step": 3680
},
{
"epoch": 228.39506172839506,
"learning_rate": 9.51787682135423e-07,
"loss": 2.2028,
"step": 3700
},
{
"epoch": 229.62962962962962,
"learning_rate": 9.515121831761969e-07,
"loss": 2.1948,
"step": 3720
},
{
"epoch": 230.8641975308642,
"learning_rate": 9.512366842169707e-07,
"loss": 2.2981,
"step": 3740
},
{
"epoch": 232.09876543209876,
"learning_rate": 9.509611852577446e-07,
"loss": 2.2519,
"step": 3760
},
{
"epoch": 233.33333333333334,
"learning_rate": 9.506856862985184e-07,
"loss": 2.2159,
"step": 3780
},
{
"epoch": 234.5679012345679,
"learning_rate": 9.504101873392922e-07,
"loss": 2.2122,
"step": 3800
},
{
"epoch": 235.80246913580248,
"learning_rate": 9.501346883800661e-07,
"loss": 2.2165,
"step": 3820
},
{
"epoch": 237.03703703703704,
"learning_rate": 9.498591894208399e-07,
"loss": 2.2362,
"step": 3840
},
{
"epoch": 238.2716049382716,
"learning_rate": 9.495836904616138e-07,
"loss": 2.1995,
"step": 3860
},
{
"epoch": 239.50617283950618,
"learning_rate": 9.493081915023877e-07,
"loss": 2.248,
"step": 3880
},
{
"epoch": 240.74074074074073,
"learning_rate": 9.490326925431615e-07,
"loss": 2.1703,
"step": 3900
},
{
"epoch": 241.97530864197532,
"learning_rate": 9.487571935839353e-07,
"loss": 2.1987,
"step": 3920
},
{
"epoch": 243.20987654320987,
"learning_rate": 9.484816946247091e-07,
"loss": 2.2023,
"step": 3940
},
{
"epoch": 244.44444444444446,
"learning_rate": 9.48206195665483e-07,
"loss": 2.2292,
"step": 3960
},
{
"epoch": 245.679012345679,
"learning_rate": 9.479306967062568e-07,
"loss": 2.1746,
"step": 3980
},
{
"epoch": 246.91358024691357,
"learning_rate": 9.476551977470307e-07,
"loss": 2.1809,
"step": 4000
},
{
"epoch": 248.14814814814815,
"learning_rate": 9.473796987878046e-07,
"loss": 2.1631,
"step": 4020
},
{
"epoch": 249.3827160493827,
"learning_rate": 9.471041998285784e-07,
"loss": 2.1437,
"step": 4040
},
{
"epoch": 250.6172839506173,
"learning_rate": 9.468287008693522e-07,
"loss": 2.1719,
"step": 4060
},
{
"epoch": 251.85185185185185,
"learning_rate": 9.46553201910126e-07,
"loss": 2.1754,
"step": 4080
},
{
"epoch": 253.08641975308643,
"learning_rate": 9.462777029508999e-07,
"loss": 2.172,
"step": 4100
},
{
"epoch": 254.320987654321,
"learning_rate": 9.460022039916737e-07,
"loss": 2.2135,
"step": 4120
},
{
"epoch": 255.55555555555554,
"learning_rate": 9.457267050324476e-07,
"loss": 2.1143,
"step": 4140
},
{
"epoch": 256.7901234567901,
"learning_rate": 9.454512060732215e-07,
"loss": 2.1804,
"step": 4160
},
{
"epoch": 258.0246913580247,
"learning_rate": 9.451757071139952e-07,
"loss": 2.147,
"step": 4180
},
{
"epoch": 259.25925925925924,
"learning_rate": 9.449002081547691e-07,
"loss": 2.1618,
"step": 4200
},
{
"epoch": 260.4938271604938,
"learning_rate": 9.446247091955429e-07,
"loss": 2.1434,
"step": 4220
},
{
"epoch": 261.7283950617284,
"learning_rate": 9.443492102363168e-07,
"loss": 2.1535,
"step": 4240
},
{
"epoch": 262.962962962963,
"learning_rate": 9.440737112770907e-07,
"loss": 2.2116,
"step": 4260
},
{
"epoch": 264.1975308641975,
"learning_rate": 9.437982123178645e-07,
"loss": 2.1857,
"step": 4280
},
{
"epoch": 265.4320987654321,
"learning_rate": 9.435227133586384e-07,
"loss": 2.1337,
"step": 4300
},
{
"epoch": 266.6666666666667,
"learning_rate": 9.432472143994122e-07,
"loss": 2.1554,
"step": 4320
},
{
"epoch": 267.9012345679012,
"learning_rate": 9.429717154401861e-07,
"loss": 2.1565,
"step": 4340
},
{
"epoch": 269.1358024691358,
"learning_rate": 9.4269621648096e-07,
"loss": 2.168,
"step": 4360
},
{
"epoch": 270.3703703703704,
"learning_rate": 9.424207175217338e-07,
"loss": 2.1392,
"step": 4380
},
{
"epoch": 271.60493827160496,
"learning_rate": 9.421452185625077e-07,
"loss": 2.1726,
"step": 4400
},
{
"epoch": 272.8395061728395,
"learning_rate": 9.418697196032815e-07,
"loss": 2.135,
"step": 4420
},
{
"epoch": 274.0740740740741,
"learning_rate": 9.415942206440553e-07,
"loss": 2.1329,
"step": 4440
},
{
"epoch": 275.30864197530866,
"learning_rate": 9.413187216848291e-07,
"loss": 2.1334,
"step": 4460
},
{
"epoch": 276.5432098765432,
"learning_rate": 9.41043222725603e-07,
"loss": 2.1309,
"step": 4480
},
{
"epoch": 277.77777777777777,
"learning_rate": 9.407677237663769e-07,
"loss": 2.0872,
"step": 4500
},
{
"epoch": 279.01234567901236,
"learning_rate": 9.404922248071507e-07,
"loss": 2.1426,
"step": 4520
},
{
"epoch": 280.24691358024694,
"learning_rate": 9.402167258479246e-07,
"loss": 2.1331,
"step": 4540
},
{
"epoch": 281.48148148148147,
"learning_rate": 9.399412268886984e-07,
"loss": 2.0859,
"step": 4560
},
{
"epoch": 282.71604938271605,
"learning_rate": 9.396657279294723e-07,
"loss": 2.0755,
"step": 4580
},
{
"epoch": 283.95061728395063,
"learning_rate": 9.39390228970246e-07,
"loss": 2.1203,
"step": 4600
},
{
"epoch": 285.18518518518516,
"learning_rate": 9.391147300110199e-07,
"loss": 2.0846,
"step": 4620
},
{
"epoch": 286.41975308641975,
"learning_rate": 9.388392310517938e-07,
"loss": 2.1284,
"step": 4640
},
{
"epoch": 287.65432098765433,
"learning_rate": 9.385637320925677e-07,
"loss": 2.107,
"step": 4660
},
{
"epoch": 288.8888888888889,
"learning_rate": 9.382882331333415e-07,
"loss": 2.2206,
"step": 4680
},
{
"epoch": 290.12345679012344,
"learning_rate": 9.380127341741153e-07,
"loss": 2.2475,
"step": 4700
},
{
"epoch": 291.358024691358,
"learning_rate": 9.377372352148891e-07,
"loss": 2.1887,
"step": 4720
},
{
"epoch": 292.5925925925926,
"learning_rate": 9.37461736255663e-07,
"loss": 2.1352,
"step": 4740
},
{
"epoch": 293.82716049382714,
"learning_rate": 9.371862372964368e-07,
"loss": 2.1565,
"step": 4760
},
{
"epoch": 295.0617283950617,
"learning_rate": 9.369107383372107e-07,
"loss": 2.1574,
"step": 4780
},
{
"epoch": 296.2962962962963,
"learning_rate": 9.366352393779845e-07,
"loss": 2.1384,
"step": 4800
},
{
"epoch": 297.5308641975309,
"learning_rate": 9.363597404187584e-07,
"loss": 2.1534,
"step": 4820
},
{
"epoch": 298.7654320987654,
"learning_rate": 9.360842414595322e-07,
"loss": 2.0941,
"step": 4840
},
{
"epoch": 300.0,
"learning_rate": 9.35808742500306e-07,
"loss": 2.101,
"step": 4860
},
{
"epoch": 301.2345679012346,
"learning_rate": 9.355332435410799e-07,
"loss": 2.1181,
"step": 4880
},
{
"epoch": 302.4691358024691,
"learning_rate": 9.352577445818537e-07,
"loss": 2.0771,
"step": 4900
},
{
"epoch": 303.7037037037037,
"learning_rate": 9.349822456226276e-07,
"loss": 2.0886,
"step": 4920
},
{
"epoch": 304.9382716049383,
"learning_rate": 9.347067466634014e-07,
"loss": 2.0715,
"step": 4940
},
{
"epoch": 306.17283950617286,
"learning_rate": 9.344312477041753e-07,
"loss": 2.0883,
"step": 4960
},
{
"epoch": 307.4074074074074,
"learning_rate": 9.34155748744949e-07,
"loss": 2.0767,
"step": 4980
},
{
"epoch": 308.641975308642,
"learning_rate": 9.338802497857229e-07,
"loss": 2.0757,
"step": 5000
},
{
"epoch": 309.87654320987656,
"learning_rate": 9.336047508264968e-07,
"loss": 2.0775,
"step": 5020
},
{
"epoch": 311.1111111111111,
"learning_rate": 9.333292518672707e-07,
"loss": 2.0763,
"step": 5040
},
{
"epoch": 312.34567901234567,
"learning_rate": 9.330537529080446e-07,
"loss": 2.065,
"step": 5060
},
{
"epoch": 313.58024691358025,
"learning_rate": 9.327782539488184e-07,
"loss": 2.0996,
"step": 5080
},
{
"epoch": 314.81481481481484,
"learning_rate": 9.325027549895923e-07,
"loss": 2.0837,
"step": 5100
},
{
"epoch": 316.04938271604937,
"learning_rate": 9.322272560303662e-07,
"loss": 2.0985,
"step": 5120
},
{
"epoch": 317.28395061728395,
"learning_rate": 9.319517570711399e-07,
"loss": 2.0662,
"step": 5140
},
{
"epoch": 318.51851851851853,
"learning_rate": 9.316762581119138e-07,
"loss": 2.1054,
"step": 5160
},
{
"epoch": 319.75308641975306,
"learning_rate": 9.314007591526876e-07,
"loss": 2.0688,
"step": 5180
},
{
"epoch": 320.98765432098764,
"learning_rate": 9.311252601934615e-07,
"loss": 2.06,
"step": 5200
},
{
"epoch": 322.22222222222223,
"learning_rate": 9.308497612342353e-07,
"loss": 2.0608,
"step": 5220
},
{
"epoch": 323.4567901234568,
"learning_rate": 9.305742622750092e-07,
"loss": 2.0238,
"step": 5240
},
{
"epoch": 324.69135802469134,
"learning_rate": 9.30298763315783e-07,
"loss": 2.0672,
"step": 5260
},
{
"epoch": 325.9259259259259,
"learning_rate": 9.300232643565568e-07,
"loss": 2.0045,
"step": 5280
},
{
"epoch": 327.1604938271605,
"learning_rate": 9.297477653973307e-07,
"loss": 2.0297,
"step": 5300
},
{
"epoch": 328.39506172839504,
"learning_rate": 9.294722664381045e-07,
"loss": 2.0939,
"step": 5320
},
{
"epoch": 329.6296296296296,
"learning_rate": 9.291967674788784e-07,
"loss": 2.0309,
"step": 5340
},
{
"epoch": 330.8641975308642,
"learning_rate": 9.289212685196523e-07,
"loss": 2.0221,
"step": 5360
},
{
"epoch": 332.0987654320988,
"learning_rate": 9.286457695604261e-07,
"loss": 2.0629,
"step": 5380
},
{
"epoch": 333.3333333333333,
"learning_rate": 9.283702706011999e-07,
"loss": 2.0113,
"step": 5400
},
{
"epoch": 334.5679012345679,
"learning_rate": 9.280947716419737e-07,
"loss": 2.0351,
"step": 5420
},
{
"epoch": 335.8024691358025,
"learning_rate": 9.278192726827476e-07,
"loss": 2.0535,
"step": 5440
},
{
"epoch": 337.037037037037,
"learning_rate": 9.275437737235214e-07,
"loss": 2.0653,
"step": 5460
},
{
"epoch": 338.2716049382716,
"learning_rate": 9.272682747642953e-07,
"loss": 2.0103,
"step": 5480
},
{
"epoch": 339.5061728395062,
"learning_rate": 9.269927758050692e-07,
"loss": 2.0367,
"step": 5500
},
{
"epoch": 340.74074074074076,
"learning_rate": 9.267172768458429e-07,
"loss": 1.9869,
"step": 5520
},
{
"epoch": 341.9753086419753,
"learning_rate": 9.264417778866168e-07,
"loss": 2.0831,
"step": 5540
},
{
"epoch": 343.2098765432099,
"learning_rate": 9.261662789273906e-07,
"loss": 2.0198,
"step": 5560
},
{
"epoch": 344.44444444444446,
"learning_rate": 9.258907799681645e-07,
"loss": 2.0057,
"step": 5580
},
{
"epoch": 345.679012345679,
"learning_rate": 9.256152810089383e-07,
"loss": 1.9973,
"step": 5600
},
{
"epoch": 346.91358024691357,
"learning_rate": 9.253397820497122e-07,
"loss": 2.0431,
"step": 5620
},
{
"epoch": 348.14814814814815,
"learning_rate": 9.250642830904861e-07,
"loss": 2.0132,
"step": 5640
},
{
"epoch": 349.38271604938274,
"learning_rate": 9.247887841312599e-07,
"loss": 2.0589,
"step": 5660
},
{
"epoch": 350.61728395061726,
"learning_rate": 9.245132851720337e-07,
"loss": 1.9975,
"step": 5680
},
{
"epoch": 351.85185185185185,
"learning_rate": 9.242377862128075e-07,
"loss": 2.0196,
"step": 5700
},
{
"epoch": 353.08641975308643,
"learning_rate": 9.239622872535814e-07,
"loss": 1.9605,
"step": 5720
},
{
"epoch": 354.320987654321,
"learning_rate": 9.236867882943551e-07,
"loss": 2.0043,
"step": 5740
},
{
"epoch": 355.55555555555554,
"learning_rate": 9.234112893351291e-07,
"loss": 1.9835,
"step": 5760
},
{
"epoch": 356.7901234567901,
"learning_rate": 9.23135790375903e-07,
"loss": 2.0274,
"step": 5780
},
{
"epoch": 358.0246913580247,
"learning_rate": 9.228602914166768e-07,
"loss": 2.0303,
"step": 5800
},
{
"epoch": 359.25925925925924,
"learning_rate": 9.225847924574507e-07,
"loss": 1.9691,
"step": 5820
},
{
"epoch": 360.4938271604938,
"learning_rate": 9.223092934982245e-07,
"loss": 2.0049,
"step": 5840
},
{
"epoch": 361.7283950617284,
"learning_rate": 9.220337945389984e-07,
"loss": 1.9652,
"step": 5860
},
{
"epoch": 362.962962962963,
"learning_rate": 9.217582955797723e-07,
"loss": 2.019,
"step": 5880
},
{
"epoch": 364.1975308641975,
"learning_rate": 9.214827966205461e-07,
"loss": 1.9819,
"step": 5900
},
{
"epoch": 365.4320987654321,
"learning_rate": 9.2120729766132e-07,
"loss": 2.0201,
"step": 5920
},
{
"epoch": 366.6666666666667,
"learning_rate": 9.209317987020937e-07,
"loss": 1.9934,
"step": 5940
},
{
"epoch": 367.9012345679012,
"learning_rate": 9.206562997428676e-07,
"loss": 1.9566,
"step": 5960
},
{
"epoch": 369.1358024691358,
"learning_rate": 9.203808007836415e-07,
"loss": 1.9902,
"step": 5980
},
{
"epoch": 370.3703703703704,
"learning_rate": 9.201053018244152e-07,
"loss": 2.0059,
"step": 6000
},
{
"epoch": 371.60493827160496,
"learning_rate": 9.198298028651892e-07,
"loss": 1.988,
"step": 6020
},
{
"epoch": 372.8395061728395,
"learning_rate": 9.19554303905963e-07,
"loss": 1.9659,
"step": 6040
},
{
"epoch": 374.0740740740741,
"learning_rate": 9.192788049467368e-07,
"loss": 2.0319,
"step": 6060
},
{
"epoch": 375.30864197530866,
"learning_rate": 9.190033059875106e-07,
"loss": 1.9308,
"step": 6080
},
{
"epoch": 376.5432098765432,
"learning_rate": 9.187278070282845e-07,
"loss": 1.967,
"step": 6100
},
{
"epoch": 377.77777777777777,
"learning_rate": 9.184523080690584e-07,
"loss": 2.0184,
"step": 6120
},
{
"epoch": 379.01234567901236,
"learning_rate": 9.181768091098322e-07,
"loss": 1.9865,
"step": 6140
},
{
"epoch": 380.24691358024694,
"learning_rate": 9.179013101506061e-07,
"loss": 1.9415,
"step": 6160
},
{
"epoch": 381.48148148148147,
"learning_rate": 9.176258111913799e-07,
"loss": 1.9619,
"step": 6180
},
{
"epoch": 382.71604938271605,
"learning_rate": 9.173503122321538e-07,
"loss": 1.949,
"step": 6200
},
{
"epoch": 383.95061728395063,
"learning_rate": 9.170748132729275e-07,
"loss": 1.9467,
"step": 6220
},
{
"epoch": 385.18518518518516,
"learning_rate": 9.167993143137014e-07,
"loss": 1.9691,
"step": 6240
},
{
"epoch": 386.41975308641975,
"learning_rate": 9.165238153544753e-07,
"loss": 1.9643,
"step": 6260
},
{
"epoch": 387.65432098765433,
"learning_rate": 9.162483163952491e-07,
"loss": 1.942,
"step": 6280
},
{
"epoch": 388.8888888888889,
"learning_rate": 9.15972817436023e-07,
"loss": 1.9002,
"step": 6300
},
{
"epoch": 390.12345679012344,
"learning_rate": 9.156973184767968e-07,
"loss": 1.9965,
"step": 6320
},
{
"epoch": 391.358024691358,
"learning_rate": 9.154218195175706e-07,
"loss": 1.9343,
"step": 6340
},
{
"epoch": 392.5925925925926,
"learning_rate": 9.151463205583446e-07,
"loss": 1.931,
"step": 6360
},
{
"epoch": 393.82716049382714,
"learning_rate": 9.148708215991183e-07,
"loss": 1.9628,
"step": 6380
},
{
"epoch": 395.0617283950617,
"learning_rate": 9.145953226398922e-07,
"loss": 1.9752,
"step": 6400
},
{
"epoch": 396.2962962962963,
"learning_rate": 9.14319823680666e-07,
"loss": 1.9499,
"step": 6420
},
{
"epoch": 397.5308641975309,
"learning_rate": 9.140443247214399e-07,
"loss": 1.9546,
"step": 6440
},
{
"epoch": 398.7654320987654,
"learning_rate": 9.137688257622137e-07,
"loss": 1.8583,
"step": 6460
},
{
"epoch": 400.0,
"learning_rate": 9.134933268029875e-07,
"loss": 1.9405,
"step": 6480
},
{
"epoch": 401.2345679012346,
"learning_rate": 9.132178278437614e-07,
"loss": 1.941,
"step": 6500
},
{
"epoch": 402.4691358024691,
"learning_rate": 9.129423288845352e-07,
"loss": 1.9432,
"step": 6520
},
{
"epoch": 403.7037037037037,
"learning_rate": 9.126668299253092e-07,
"loss": 1.9603,
"step": 6540
},
{
"epoch": 404.9382716049383,
"learning_rate": 9.12391330966083e-07,
"loss": 1.9456,
"step": 6560
},
{
"epoch": 406.17283950617286,
"learning_rate": 9.121158320068569e-07,
"loss": 1.9488,
"step": 6580
},
{
"epoch": 407.4074074074074,
"learning_rate": 9.118403330476307e-07,
"loss": 1.9248,
"step": 6600
},
{
"epoch": 408.641975308642,
"learning_rate": 9.115648340884045e-07,
"loss": 1.8897,
"step": 6620
},
{
"epoch": 409.87654320987656,
"learning_rate": 9.112893351291784e-07,
"loss": 1.9315,
"step": 6640
},
{
"epoch": 411.1111111111111,
"learning_rate": 9.110138361699522e-07,
"loss": 1.9599,
"step": 6660
},
{
"epoch": 412.34567901234567,
"learning_rate": 9.107383372107261e-07,
"loss": 1.9441,
"step": 6680
},
{
"epoch": 413.58024691358025,
"learning_rate": 9.104628382515e-07,
"loss": 1.8974,
"step": 6700
},
{
"epoch": 414.81481481481484,
"learning_rate": 9.101873392922738e-07,
"loss": 1.9078,
"step": 6720
},
{
"epoch": 416.04938271604937,
"learning_rate": 9.099118403330477e-07,
"loss": 1.9587,
"step": 6740
},
{
"epoch": 417.28395061728395,
"learning_rate": 9.096363413738214e-07,
"loss": 2.0016,
"step": 6760
},
{
"epoch": 418.51851851851853,
"learning_rate": 9.093608424145953e-07,
"loss": 1.9557,
"step": 6780
},
{
"epoch": 419.75308641975306,
"learning_rate": 9.090853434553691e-07,
"loss": 1.9719,
"step": 6800
},
{
"epoch": 420.98765432098764,
"learning_rate": 9.08809844496143e-07,
"loss": 1.9779,
"step": 6820
},
{
"epoch": 422.22222222222223,
"learning_rate": 9.085343455369169e-07,
"loss": 1.9754,
"step": 6840
},
{
"epoch": 423.4567901234568,
"learning_rate": 9.082588465776907e-07,
"loss": 2.0078,
"step": 6860
},
{
"epoch": 424.69135802469134,
"learning_rate": 9.079833476184645e-07,
"loss": 1.9856,
"step": 6880
},
{
"epoch": 425.9259259259259,
"learning_rate": 9.077078486592383e-07,
"loss": 1.9698,
"step": 6900
},
{
"epoch": 427.1604938271605,
"learning_rate": 9.074323497000122e-07,
"loss": 1.9826,
"step": 6920
},
{
"epoch": 428.39506172839504,
"learning_rate": 9.07156850740786e-07,
"loss": 1.9513,
"step": 6940
},
{
"epoch": 429.6296296296296,
"learning_rate": 9.068813517815599e-07,
"loss": 1.93,
"step": 6960
},
{
"epoch": 430.8641975308642,
"learning_rate": 9.066058528223339e-07,
"loss": 1.9779,
"step": 6980
},
{
"epoch": 432.0987654320988,
"learning_rate": 9.063303538631076e-07,
"loss": 1.9937,
"step": 7000
},
{
"epoch": 433.3333333333333,
"learning_rate": 9.060548549038814e-07,
"loss": 1.955,
"step": 7020
},
{
"epoch": 434.5679012345679,
"learning_rate": 9.057793559446552e-07,
"loss": 1.9652,
"step": 7040
},
{
"epoch": 435.8024691358025,
"learning_rate": 9.055038569854291e-07,
"loss": 1.9568,
"step": 7060
},
{
"epoch": 437.037037037037,
"learning_rate": 9.05228358026203e-07,
"loss": 1.9787,
"step": 7080
},
{
"epoch": 438.2716049382716,
"learning_rate": 9.049528590669768e-07,
"loss": 1.9137,
"step": 7100
},
{
"epoch": 439.5061728395062,
"learning_rate": 9.046773601077507e-07,
"loss": 1.9314,
"step": 7120
},
{
"epoch": 440.74074074074076,
"learning_rate": 9.044018611485244e-07,
"loss": 1.9961,
"step": 7140
},
{
"epoch": 441.9753086419753,
"learning_rate": 9.041263621892983e-07,
"loss": 1.9303,
"step": 7160
},
{
"epoch": 443.2098765432099,
"learning_rate": 9.038508632300721e-07,
"loss": 1.982,
"step": 7180
},
{
"epoch": 444.44444444444446,
"learning_rate": 9.03575364270846e-07,
"loss": 1.9133,
"step": 7200
},
{
"epoch": 445.679012345679,
"learning_rate": 9.032998653116199e-07,
"loss": 1.9376,
"step": 7220
},
{
"epoch": 446.91358024691357,
"learning_rate": 9.030243663523937e-07,
"loss": 1.9254,
"step": 7240
},
{
"epoch": 448.14814814814815,
"learning_rate": 9.027488673931676e-07,
"loss": 1.9491,
"step": 7260
},
{
"epoch": 449.38271604938274,
"learning_rate": 9.024733684339414e-07,
"loss": 1.9455,
"step": 7280
},
{
"epoch": 450.61728395061726,
"learning_rate": 9.021978694747153e-07,
"loss": 1.9128,
"step": 7300
},
{
"epoch": 451.85185185185185,
"learning_rate": 9.019223705154892e-07,
"loss": 1.9006,
"step": 7320
},
{
"epoch": 453.08641975308643,
"learning_rate": 9.016468715562629e-07,
"loss": 1.9172,
"step": 7340
},
{
"epoch": 454.320987654321,
"learning_rate": 9.013713725970369e-07,
"loss": 1.9754,
"step": 7360
},
{
"epoch": 455.55555555555554,
"learning_rate": 9.010958736378107e-07,
"loss": 1.9759,
"step": 7380
},
{
"epoch": 456.7901234567901,
"learning_rate": 9.008203746785846e-07,
"loss": 1.9452,
"step": 7400
},
{
"epoch": 458.0246913580247,
"learning_rate": 9.005448757193583e-07,
"loss": 1.9618,
"step": 7420
},
{
"epoch": 459.25925925925924,
"learning_rate": 9.002693767601322e-07,
"loss": 1.9364,
"step": 7440
},
{
"epoch": 460.4938271604938,
"learning_rate": 8.999938778009061e-07,
"loss": 1.9513,
"step": 7460
},
{
"epoch": 461.7283950617284,
"learning_rate": 8.997183788416799e-07,
"loss": 1.9473,
"step": 7480
},
{
"epoch": 462.962962962963,
"learning_rate": 8.994428798824538e-07,
"loss": 1.9159,
"step": 7500
},
{
"epoch": 464.1975308641975,
"learning_rate": 8.991673809232276e-07,
"loss": 1.9195,
"step": 7520
},
{
"epoch": 465.4320987654321,
"learning_rate": 8.988918819640015e-07,
"loss": 1.9647,
"step": 7540
},
{
"epoch": 466.6666666666667,
"learning_rate": 8.986163830047752e-07,
"loss": 1.895,
"step": 7560
},
{
"epoch": 467.9012345679012,
"learning_rate": 8.983408840455491e-07,
"loss": 1.9408,
"step": 7580
},
{
"epoch": 469.1358024691358,
"learning_rate": 8.98065385086323e-07,
"loss": 1.978,
"step": 7600
},
{
"epoch": 470.3703703703704,
"learning_rate": 8.977898861270968e-07,
"loss": 1.9085,
"step": 7620
},
{
"epoch": 471.60493827160496,
"learning_rate": 8.975143871678707e-07,
"loss": 1.9833,
"step": 7640
},
{
"epoch": 472.8395061728395,
"learning_rate": 8.972388882086445e-07,
"loss": 1.9559,
"step": 7660
},
{
"epoch": 474.0740740740741,
"learning_rate": 8.969633892494183e-07,
"loss": 1.9333,
"step": 7680
},
{
"epoch": 475.30864197530866,
"learning_rate": 8.966878902901923e-07,
"loss": 1.9075,
"step": 7700
},
{
"epoch": 476.5432098765432,
"learning_rate": 8.96412391330966e-07,
"loss": 1.9343,
"step": 7720
},
{
"epoch": 477.77777777777777,
"learning_rate": 8.961368923717399e-07,
"loss": 1.9107,
"step": 7740
},
{
"epoch": 479.01234567901236,
"learning_rate": 8.958613934125137e-07,
"loss": 1.9344,
"step": 7760
},
{
"epoch": 480.24691358024694,
"learning_rate": 8.955858944532876e-07,
"loss": 1.9044,
"step": 7780
},
{
"epoch": 481.48148148148147,
"learning_rate": 8.953103954940614e-07,
"loss": 1.974,
"step": 7800
},
{
"epoch": 482.71604938271605,
"learning_rate": 8.950348965348353e-07,
"loss": 1.9241,
"step": 7820
},
{
"epoch": 483.95061728395063,
"learning_rate": 8.947593975756091e-07,
"loss": 1.9377,
"step": 7840
},
{
"epoch": 485.18518518518516,
"learning_rate": 8.944838986163829e-07,
"loss": 1.9326,
"step": 7860
},
{
"epoch": 486.41975308641975,
"learning_rate": 8.942083996571568e-07,
"loss": 1.9028,
"step": 7880
},
{
"epoch": 487.65432098765433,
"learning_rate": 8.939329006979306e-07,
"loss": 1.8872,
"step": 7900
},
{
"epoch": 488.8888888888889,
"learning_rate": 8.936574017387045e-07,
"loss": 1.904,
"step": 7920
},
{
"epoch": 490.12345679012344,
"learning_rate": 8.933819027794784e-07,
"loss": 1.8897,
"step": 7940
},
{
"epoch": 491.358024691358,
"learning_rate": 8.931064038202521e-07,
"loss": 1.9026,
"step": 7960
},
{
"epoch": 492.5925925925926,
"learning_rate": 8.92830904861026e-07,
"loss": 1.9287,
"step": 7980
},
{
"epoch": 493.82716049382714,
"learning_rate": 8.925554059017998e-07,
"loss": 1.8277,
"step": 8000
},
{
"epoch": 495.0617283950617,
"learning_rate": 8.922799069425738e-07,
"loss": 1.8716,
"step": 8020
},
{
"epoch": 496.2962962962963,
"learning_rate": 8.920044079833476e-07,
"loss": 1.8907,
"step": 8040
},
{
"epoch": 497.5308641975309,
"learning_rate": 8.917289090241215e-07,
"loss": 1.8754,
"step": 8060
},
{
"epoch": 498.7654320987654,
"learning_rate": 8.914534100648954e-07,
"loss": 1.8713,
"step": 8080
},
{
"epoch": 500.0,
"learning_rate": 8.911779111056691e-07,
"loss": 1.8645,
"step": 8100
},
{
"epoch": 501.2345679012346,
"learning_rate": 8.90902412146443e-07,
"loss": 1.896,
"step": 8120
},
{
"epoch": 502.4691358024691,
"learning_rate": 8.906269131872168e-07,
"loss": 1.8824,
"step": 8140
},
{
"epoch": 503.7037037037037,
"learning_rate": 8.903514142279907e-07,
"loss": 1.8612,
"step": 8160
},
{
"epoch": 504.9382716049383,
"learning_rate": 8.900759152687646e-07,
"loss": 1.8747,
"step": 8180
},
{
"epoch": 506.17283950617286,
"learning_rate": 8.898004163095384e-07,
"loss": 1.8882,
"step": 8200
},
{
"epoch": 507.4074074074074,
"learning_rate": 8.895249173503122e-07,
"loss": 1.8858,
"step": 8220
},
{
"epoch": 508.641975308642,
"learning_rate": 8.89249418391086e-07,
"loss": 1.8599,
"step": 8240
},
{
"epoch": 509.87654320987656,
"learning_rate": 8.889739194318599e-07,
"loss": 1.9073,
"step": 8260
},
{
"epoch": 511.1111111111111,
"learning_rate": 8.886984204726337e-07,
"loss": 1.9222,
"step": 8280
},
{
"epoch": 512.3456790123457,
"learning_rate": 8.884229215134076e-07,
"loss": 1.904,
"step": 8300
},
{
"epoch": 513.5802469135803,
"learning_rate": 8.881474225541814e-07,
"loss": 1.8662,
"step": 8320
},
{
"epoch": 514.8148148148148,
"learning_rate": 8.878719235949553e-07,
"loss": 1.8384,
"step": 8340
},
{
"epoch": 516.0493827160494,
"learning_rate": 8.875964246357292e-07,
"loss": 1.9238,
"step": 8360
},
{
"epoch": 517.283950617284,
"learning_rate": 8.873209256765029e-07,
"loss": 1.8626,
"step": 8380
},
{
"epoch": 518.5185185185185,
"learning_rate": 8.870454267172768e-07,
"loss": 1.9077,
"step": 8400
},
{
"epoch": 519.7530864197531,
"learning_rate": 8.867699277580506e-07,
"loss": 1.8783,
"step": 8420
},
{
"epoch": 520.9876543209876,
"learning_rate": 8.864944287988245e-07,
"loss": 1.8721,
"step": 8440
},
{
"epoch": 522.2222222222222,
"learning_rate": 8.862189298395984e-07,
"loss": 1.8961,
"step": 8460
},
{
"epoch": 523.4567901234568,
"learning_rate": 8.859434308803721e-07,
"loss": 1.8552,
"step": 8480
},
{
"epoch": 524.6913580246913,
"learning_rate": 8.85667931921146e-07,
"loss": 1.9263,
"step": 8500
},
{
"epoch": 525.925925925926,
"learning_rate": 8.853924329619198e-07,
"loss": 1.8674,
"step": 8520
},
{
"epoch": 527.1604938271605,
"learning_rate": 8.851169340026937e-07,
"loss": 1.8876,
"step": 8540
},
{
"epoch": 528.395061728395,
"learning_rate": 8.848414350434676e-07,
"loss": 1.8834,
"step": 8560
},
{
"epoch": 529.6296296296297,
"learning_rate": 8.845659360842414e-07,
"loss": 1.8467,
"step": 8580
},
{
"epoch": 530.8641975308642,
"learning_rate": 8.842904371250153e-07,
"loss": 1.8957,
"step": 8600
},
{
"epoch": 532.0987654320987,
"learning_rate": 8.840149381657891e-07,
"loss": 1.8911,
"step": 8620
},
{
"epoch": 533.3333333333334,
"learning_rate": 8.837394392065629e-07,
"loss": 1.8994,
"step": 8640
},
{
"epoch": 534.5679012345679,
"learning_rate": 8.834639402473367e-07,
"loss": 1.9276,
"step": 8660
},
{
"epoch": 535.8024691358024,
"learning_rate": 8.831884412881107e-07,
"loss": 1.8605,
"step": 8680
},
{
"epoch": 537.0370370370371,
"learning_rate": 8.829129423288845e-07,
"loss": 1.8733,
"step": 8700
},
{
"epoch": 538.2716049382716,
"learning_rate": 8.826374433696583e-07,
"loss": 1.9128,
"step": 8720
},
{
"epoch": 539.5061728395061,
"learning_rate": 8.823619444104322e-07,
"loss": 1.8563,
"step": 8740
},
{
"epoch": 540.7407407407408,
"learning_rate": 8.82086445451206e-07,
"loss": 1.8647,
"step": 8760
},
{
"epoch": 541.9753086419753,
"learning_rate": 8.818109464919799e-07,
"loss": 1.8478,
"step": 8780
},
{
"epoch": 543.2098765432099,
"learning_rate": 8.815354475327538e-07,
"loss": 1.877,
"step": 8800
},
{
"epoch": 544.4444444444445,
"learning_rate": 8.812599485735276e-07,
"loss": 1.8432,
"step": 8820
},
{
"epoch": 545.679012345679,
"learning_rate": 8.809844496143015e-07,
"loss": 1.8245,
"step": 8840
},
{
"epoch": 546.9135802469136,
"learning_rate": 8.807089506550753e-07,
"loss": 1.841,
"step": 8860
},
{
"epoch": 548.1481481481482,
"learning_rate": 8.804334516958492e-07,
"loss": 1.8609,
"step": 8880
},
{
"epoch": 549.3827160493827,
"learning_rate": 8.80157952736623e-07,
"loss": 1.8815,
"step": 8900
},
{
"epoch": 550.6172839506173,
"learning_rate": 8.798824537773968e-07,
"loss": 1.8866,
"step": 8920
},
{
"epoch": 551.8518518518518,
"learning_rate": 8.796069548181707e-07,
"loss": 1.8573,
"step": 8940
},
{
"epoch": 553.0864197530864,
"learning_rate": 8.793314558589445e-07,
"loss": 1.8837,
"step": 8960
},
{
"epoch": 554.320987654321,
"learning_rate": 8.790559568997184e-07,
"loss": 1.8875,
"step": 8980
},
{
"epoch": 555.5555555555555,
"learning_rate": 8.787804579404922e-07,
"loss": 1.934,
"step": 9000
},
{
"epoch": 556.7901234567901,
"learning_rate": 8.78504958981266e-07,
"loss": 1.9294,
"step": 9020
},
{
"epoch": 558.0246913580247,
"learning_rate": 8.782294600220397e-07,
"loss": 2.0228,
"step": 9040
},
{
"epoch": 559.2592592592592,
"learning_rate": 8.779539610628137e-07,
"loss": 2.0074,
"step": 9060
},
{
"epoch": 560.4938271604939,
"learning_rate": 8.776784621035876e-07,
"loss": 1.8819,
"step": 9080
},
{
"epoch": 561.7283950617284,
"learning_rate": 8.774029631443614e-07,
"loss": 1.8354,
"step": 9100
},
{
"epoch": 562.9629629629629,
"learning_rate": 8.771274641851353e-07,
"loss": 1.8683,
"step": 9120
},
{
"epoch": 564.1975308641976,
"learning_rate": 8.768519652259091e-07,
"loss": 1.8644,
"step": 9140
},
{
"epoch": 565.4320987654321,
"learning_rate": 8.76576466266683e-07,
"loss": 1.8822,
"step": 9160
},
{
"epoch": 566.6666666666666,
"learning_rate": 8.763009673074568e-07,
"loss": 1.8544,
"step": 9180
},
{
"epoch": 567.9012345679013,
"learning_rate": 8.760254683482306e-07,
"loss": 1.8507,
"step": 9200
},
{
"epoch": 569.1358024691358,
"learning_rate": 8.757499693890045e-07,
"loss": 1.8816,
"step": 9220
},
{
"epoch": 570.3703703703703,
"learning_rate": 8.754744704297783e-07,
"loss": 1.7993,
"step": 9240
},
{
"epoch": 571.604938271605,
"learning_rate": 8.751989714705522e-07,
"loss": 1.8231,
"step": 9260
},
{
"epoch": 572.8395061728395,
"learning_rate": 8.74923472511326e-07,
"loss": 1.8354,
"step": 9280
},
{
"epoch": 574.074074074074,
"learning_rate": 8.746479735520998e-07,
"loss": 1.8731,
"step": 9300
},
{
"epoch": 575.3086419753087,
"learning_rate": 8.743724745928737e-07,
"loss": 1.8377,
"step": 9320
},
{
"epoch": 576.5432098765432,
"learning_rate": 8.740969756336475e-07,
"loss": 1.8211,
"step": 9340
},
{
"epoch": 577.7777777777778,
"learning_rate": 8.738214766744214e-07,
"loss": 1.8321,
"step": 9360
},
{
"epoch": 579.0123456790124,
"learning_rate": 8.735459777151952e-07,
"loss": 1.8091,
"step": 9380
},
{
"epoch": 580.2469135802469,
"learning_rate": 8.732704787559691e-07,
"loss": 1.7772,
"step": 9400
},
{
"epoch": 581.4814814814815,
"learning_rate": 8.72994979796743e-07,
"loss": 1.7844,
"step": 9420
},
{
"epoch": 582.716049382716,
"learning_rate": 8.727194808375168e-07,
"loss": 1.7728,
"step": 9440
},
{
"epoch": 583.9506172839506,
"learning_rate": 8.724439818782906e-07,
"loss": 1.7777,
"step": 9460
},
{
"epoch": 585.1851851851852,
"learning_rate": 8.721684829190644e-07,
"loss": 1.7895,
"step": 9480
},
{
"epoch": 586.4197530864197,
"learning_rate": 8.718929839598383e-07,
"loss": 1.7949,
"step": 9500
},
{
"epoch": 587.6543209876543,
"learning_rate": 8.716174850006122e-07,
"loss": 1.7314,
"step": 9520
},
{
"epoch": 588.8888888888889,
"learning_rate": 8.713419860413861e-07,
"loss": 1.7573,
"step": 9540
},
{
"epoch": 590.1234567901234,
"learning_rate": 8.710664870821599e-07,
"loss": 1.7663,
"step": 9560
},
{
"epoch": 591.358024691358,
"learning_rate": 8.707909881229337e-07,
"loss": 1.8203,
"step": 9580
},
{
"epoch": 592.5925925925926,
"learning_rate": 8.705154891637076e-07,
"loss": 1.8092,
"step": 9600
},
{
"epoch": 593.8271604938271,
"learning_rate": 8.702399902044814e-07,
"loss": 1.7717,
"step": 9620
},
{
"epoch": 595.0617283950618,
"learning_rate": 8.699644912452553e-07,
"loss": 1.7992,
"step": 9640
},
{
"epoch": 596.2962962962963,
"learning_rate": 8.69688992286029e-07,
"loss": 1.7772,
"step": 9660
},
{
"epoch": 597.5308641975308,
"learning_rate": 8.69413493326803e-07,
"loss": 1.7884,
"step": 9680
},
{
"epoch": 598.7654320987655,
"learning_rate": 8.691379943675769e-07,
"loss": 1.7618,
"step": 9700
},
{
"epoch": 600.0,
"learning_rate": 8.688624954083506e-07,
"loss": 1.7789,
"step": 9720
},
{
"epoch": 601.2345679012345,
"learning_rate": 8.685869964491245e-07,
"loss": 1.7624,
"step": 9740
},
{
"epoch": 602.4691358024692,
"learning_rate": 8.683114974898983e-07,
"loss": 1.7588,
"step": 9760
},
{
"epoch": 603.7037037037037,
"learning_rate": 8.680359985306722e-07,
"loss": 1.793,
"step": 9780
},
{
"epoch": 604.9382716049382,
"learning_rate": 8.677604995714461e-07,
"loss": 1.7527,
"step": 9800
},
{
"epoch": 606.1728395061729,
"learning_rate": 8.674850006122198e-07,
"loss": 1.7472,
"step": 9820
},
{
"epoch": 607.4074074074074,
"learning_rate": 8.672095016529937e-07,
"loss": 1.8087,
"step": 9840
},
{
"epoch": 608.641975308642,
"learning_rate": 8.669340026937675e-07,
"loss": 1.7727,
"step": 9860
},
{
"epoch": 609.8765432098766,
"learning_rate": 8.666585037345414e-07,
"loss": 1.7672,
"step": 9880
},
{
"epoch": 611.1111111111111,
"learning_rate": 8.663830047753152e-07,
"loss": 1.7614,
"step": 9900
},
{
"epoch": 612.3456790123457,
"learning_rate": 8.661075058160891e-07,
"loss": 1.7775,
"step": 9920
},
{
"epoch": 613.5802469135803,
"learning_rate": 8.65832006856863e-07,
"loss": 1.7512,
"step": 9940
},
{
"epoch": 614.8148148148148,
"learning_rate": 8.655565078976368e-07,
"loss": 1.7858,
"step": 9960
},
{
"epoch": 616.0493827160494,
"learning_rate": 8.652810089384107e-07,
"loss": 1.8064,
"step": 9980
},
{
"epoch": 617.283950617284,
"learning_rate": 8.650055099791844e-07,
"loss": 1.7649,
"step": 10000
}
],
"logging_steps": 20,
"max_steps": 65536,
"num_input_tokens_seen": 0,
"num_train_epochs": 4096,
"save_steps": 10000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.923583979036672e+16,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}