{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 500, "global_step": 23217, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.307188697936857e-05, "learning_rate": 5e-09, "loss": 15.2085, "step": 1 }, { "epoch": 0.0008614377395873713, "learning_rate": 1e-07, "loss": 13.4227, "step": 20 }, { "epoch": 0.0017228754791747427, "learning_rate": 2e-07, "loss": 12.2855, "step": 40 }, { "epoch": 0.002584313218762114, "learning_rate": 3e-07, "loss": 12.1044, "step": 60 }, { "epoch": 0.0034457509583494853, "learning_rate": 4e-07, "loss": 10.705, "step": 80 }, { "epoch": 0.004307188697936857, "learning_rate": 5e-07, "loss": 10.2652, "step": 100 }, { "epoch": 0.005168626437524228, "learning_rate": 6e-07, "loss": 8.8297, "step": 120 }, { "epoch": 0.0060300641771116, "learning_rate": 7e-07, "loss": 8.3154, "step": 140 }, { "epoch": 0.006891501916698971, "learning_rate": 8e-07, "loss": 7.722, "step": 160 }, { "epoch": 0.007752939656286342, "learning_rate": 9e-07, "loss": 7.6096, "step": 180 }, { "epoch": 0.008614377395873713, "learning_rate": 1e-06, "loss": 7.394, "step": 200 }, { "epoch": 0.009475815135461085, "learning_rate": 9.99951518024521e-07, "loss": 7.0227, "step": 220 }, { "epoch": 0.010337252875048455, "learning_rate": 9.999030360490423e-07, "loss": 7.7254, "step": 240 }, { "epoch": 0.011198690614635827, "learning_rate": 9.998545540735632e-07, "loss": 7.6063, "step": 260 }, { "epoch": 0.0120601283542232, "learning_rate": 9.998060720980845e-07, "loss": 6.9029, "step": 280 }, { "epoch": 0.01292156609381057, "learning_rate": 9.997575901226053e-07, "loss": 7.0533, "step": 300 }, { "epoch": 0.013783003833397941, "learning_rate": 9.997091081471266e-07, "loss": 6.9369, "step": 320 }, { "epoch": 0.014644441572985313, "learning_rate": 9.996606261716477e-07, "loss": 7.2298, "step": 340 }, { "epoch": 0.015505879312572683, "learning_rate": 9.996121441961687e-07, "loss": 6.9774, "step": 360 }, { "epoch": 0.016367317052160057, "learning_rate": 9.9956366222069e-07, "loss": 6.6153, "step": 380 }, { "epoch": 0.017228754791747427, "learning_rate": 9.995151802452109e-07, "loss": 6.7399, "step": 400 }, { "epoch": 0.018090192531334797, "learning_rate": 9.994666982697322e-07, "loss": 6.5775, "step": 420 }, { "epoch": 0.01895163027092217, "learning_rate": 9.994182162942532e-07, "loss": 6.9794, "step": 440 }, { "epoch": 0.01981306801050954, "learning_rate": 9.993697343187743e-07, "loss": 6.662, "step": 460 }, { "epoch": 0.02067450575009691, "learning_rate": 9.993212523432956e-07, "loss": 6.7392, "step": 480 }, { "epoch": 0.021535943489684285, "learning_rate": 9.992727703678165e-07, "loss": 6.7243, "step": 500 }, { "epoch": 0.022397381229271655, "learning_rate": 9.992242883923377e-07, "loss": 6.9017, "step": 520 }, { "epoch": 0.023258818968859025, "learning_rate": 9.991758064168586e-07, "loss": 6.3385, "step": 540 }, { "epoch": 0.0241202567084464, "learning_rate": 9.991273244413799e-07, "loss": 6.436, "step": 560 }, { "epoch": 0.02498169444803377, "learning_rate": 9.99078842465901e-07, "loss": 6.7753, "step": 580 }, { "epoch": 0.02584313218762114, "learning_rate": 9.99030360490422e-07, "loss": 6.2456, "step": 600 }, { "epoch": 0.026704569927208512, "learning_rate": 9.989818785149433e-07, "loss": 6.6402, "step": 620 }, { "epoch": 0.027566007666795882, "learning_rate": 9.989333965394644e-07, "loss": 6.3705, "step": 640 }, { "epoch": 0.028427445406383253, "learning_rate": 9.988849145639854e-07, "loss": 6.1228, "step": 660 }, { "epoch": 0.029288883145970626, "learning_rate": 9.988364325885065e-07, "loss": 6.377, "step": 680 }, { "epoch": 0.030150320885557996, "learning_rate": 9.987879506130276e-07, "loss": 6.5323, "step": 700 }, { "epoch": 0.031011758625145366, "learning_rate": 9.987394686375489e-07, "loss": 6.5611, "step": 720 }, { "epoch": 0.03187319636473274, "learning_rate": 9.986909866620697e-07, "loss": 6.5353, "step": 740 }, { "epoch": 0.032734634104320114, "learning_rate": 9.98642504686591e-07, "loss": 6.5194, "step": 760 }, { "epoch": 0.033596071843907484, "learning_rate": 9.985940227111119e-07, "loss": 6.4292, "step": 780 }, { "epoch": 0.034457509583494854, "learning_rate": 9.985455407356332e-07, "loss": 6.311, "step": 800 }, { "epoch": 0.035318947323082224, "learning_rate": 9.984970587601542e-07, "loss": 6.4556, "step": 820 }, { "epoch": 0.036180385062669594, "learning_rate": 9.984485767846753e-07, "loss": 6.3878, "step": 840 }, { "epoch": 0.037041822802256964, "learning_rate": 9.984000948091966e-07, "loss": 6.2106, "step": 860 }, { "epoch": 0.03790326054184434, "learning_rate": 9.983516128337174e-07, "loss": 6.4038, "step": 880 }, { "epoch": 0.03876469828143171, "learning_rate": 9.983031308582387e-07, "loss": 6.554, "step": 900 }, { "epoch": 0.03962613602101908, "learning_rate": 9.982546488827596e-07, "loss": 6.5065, "step": 920 }, { "epoch": 0.04048757376060645, "learning_rate": 9.982061669072809e-07, "loss": 6.2372, "step": 940 }, { "epoch": 0.04134901150019382, "learning_rate": 9.98157684931802e-07, "loss": 6.6843, "step": 960 }, { "epoch": 0.04221044923978119, "learning_rate": 9.98109202956323e-07, "loss": 6.3941, "step": 980 }, { "epoch": 0.04307188697936857, "learning_rate": 9.980607209808443e-07, "loss": 6.0508, "step": 1000 }, { "epoch": 0.04393332471895594, "learning_rate": 9.980122390053654e-07, "loss": 6.1723, "step": 1020 }, { "epoch": 0.04479476245854331, "learning_rate": 9.979637570298864e-07, "loss": 6.5495, "step": 1040 }, { "epoch": 0.04565620019813068, "learning_rate": 9.979152750544075e-07, "loss": 6.4215, "step": 1060 }, { "epoch": 0.04651763793771805, "learning_rate": 9.978667930789286e-07, "loss": 6.2316, "step": 1080 }, { "epoch": 0.04737907567730542, "learning_rate": 9.978183111034499e-07, "loss": 6.291, "step": 1100 }, { "epoch": 0.0482405134168928, "learning_rate": 9.977698291279707e-07, "loss": 6.2892, "step": 1120 }, { "epoch": 0.04910195115648017, "learning_rate": 9.97721347152492e-07, "loss": 6.1592, "step": 1140 }, { "epoch": 0.04996338889606754, "learning_rate": 9.976728651770129e-07, "loss": 6.149, "step": 1160 }, { "epoch": 0.05082482663565491, "learning_rate": 9.976243832015342e-07, "loss": 6.3048, "step": 1180 }, { "epoch": 0.05168626437524228, "learning_rate": 9.975759012260552e-07, "loss": 6.4359, "step": 1200 }, { "epoch": 0.05254770211482965, "learning_rate": 9.975274192505763e-07, "loss": 6.1895, "step": 1220 }, { "epoch": 0.053409139854417025, "learning_rate": 9.974789372750976e-07, "loss": 6.0717, "step": 1240 }, { "epoch": 0.054270577594004395, "learning_rate": 9.974304552996186e-07, "loss": 6.4554, "step": 1260 }, { "epoch": 0.055132015333591765, "learning_rate": 9.973819733241397e-07, "loss": 6.3709, "step": 1280 }, { "epoch": 0.055993453073179135, "learning_rate": 9.973334913486606e-07, "loss": 6.3882, "step": 1300 }, { "epoch": 0.056854890812766505, "learning_rate": 9.972850093731819e-07, "loss": 6.272, "step": 1320 }, { "epoch": 0.057716328552353875, "learning_rate": 9.97236527397703e-07, "loss": 6.0779, "step": 1340 }, { "epoch": 0.05857776629194125, "learning_rate": 9.97188045422224e-07, "loss": 6.3757, "step": 1360 }, { "epoch": 0.05943920403152862, "learning_rate": 9.971395634467453e-07, "loss": 5.9969, "step": 1380 }, { "epoch": 0.06030064177111599, "learning_rate": 9.970910814712664e-07, "loss": 6.288, "step": 1400 }, { "epoch": 0.06116207951070336, "learning_rate": 9.970425994957874e-07, "loss": 6.3014, "step": 1420 }, { "epoch": 0.06202351725029073, "learning_rate": 9.969941175203085e-07, "loss": 6.0141, "step": 1440 }, { "epoch": 0.0628849549898781, "learning_rate": 9.969456355448296e-07, "loss": 6.2296, "step": 1460 }, { "epoch": 0.06374639272946547, "learning_rate": 9.968971535693509e-07, "loss": 6.2257, "step": 1480 }, { "epoch": 0.06460783046905284, "learning_rate": 9.968486715938717e-07, "loss": 6.2917, "step": 1500 }, { "epoch": 0.06546926820864023, "learning_rate": 9.96800189618393e-07, "loss": 6.0469, "step": 1520 }, { "epoch": 0.0663307059482276, "learning_rate": 9.967517076429139e-07, "loss": 6.1077, "step": 1540 }, { "epoch": 0.06719214368781497, "learning_rate": 9.967032256674351e-07, "loss": 6.2491, "step": 1560 }, { "epoch": 0.06805358142740234, "learning_rate": 9.966547436919562e-07, "loss": 5.8996, "step": 1580 }, { "epoch": 0.06891501916698971, "learning_rate": 9.966062617164773e-07, "loss": 6.3099, "step": 1600 }, { "epoch": 0.06977645690657708, "learning_rate": 9.965577797409986e-07, "loss": 6.2458, "step": 1620 }, { "epoch": 0.07063789464616445, "learning_rate": 9.965092977655196e-07, "loss": 6.2759, "step": 1640 }, { "epoch": 0.07149933238575182, "learning_rate": 9.964608157900407e-07, "loss": 6.2417, "step": 1660 }, { "epoch": 0.07236077012533919, "learning_rate": 9.964123338145618e-07, "loss": 6.0339, "step": 1680 }, { "epoch": 0.07322220786492656, "learning_rate": 9.963638518390829e-07, "loss": 6.1054, "step": 1700 }, { "epoch": 0.07408364560451393, "learning_rate": 9.963153698636041e-07, "loss": 6.2843, "step": 1720 }, { "epoch": 0.0749450833441013, "learning_rate": 9.96266887888125e-07, "loss": 6.0034, "step": 1740 }, { "epoch": 0.07580652108368868, "learning_rate": 9.962184059126463e-07, "loss": 6.2372, "step": 1760 }, { "epoch": 0.07666795882327605, "learning_rate": 9.961699239371674e-07, "loss": 6.2253, "step": 1780 }, { "epoch": 0.07752939656286342, "learning_rate": 9.961214419616884e-07, "loss": 6.1184, "step": 1800 }, { "epoch": 0.0783908343024508, "learning_rate": 9.960729599862095e-07, "loss": 5.952, "step": 1820 }, { "epoch": 0.07925227204203816, "learning_rate": 9.960244780107306e-07, "loss": 6.1059, "step": 1840 }, { "epoch": 0.08011370978162553, "learning_rate": 9.959759960352519e-07, "loss": 6.0038, "step": 1860 }, { "epoch": 0.0809751475212129, "learning_rate": 9.959275140597727e-07, "loss": 6.197, "step": 1880 }, { "epoch": 0.08183658526080027, "learning_rate": 9.95879032084294e-07, "loss": 6.0109, "step": 1900 }, { "epoch": 0.08269802300038764, "learning_rate": 9.95830550108815e-07, "loss": 6.081, "step": 1920 }, { "epoch": 0.08355946073997501, "learning_rate": 9.957820681333361e-07, "loss": 5.8241, "step": 1940 }, { "epoch": 0.08442089847956238, "learning_rate": 9.957335861578572e-07, "loss": 6.2336, "step": 1960 }, { "epoch": 0.08528233621914975, "learning_rate": 9.956851041823783e-07, "loss": 5.9938, "step": 1980 }, { "epoch": 0.08614377395873714, "learning_rate": 9.956366222068996e-07, "loss": 6.1137, "step": 2000 }, { "epoch": 0.08700521169832451, "learning_rate": 9.955881402314206e-07, "loss": 5.8231, "step": 2020 }, { "epoch": 0.08786664943791188, "learning_rate": 9.955396582559417e-07, "loss": 5.919, "step": 2040 }, { "epoch": 0.08872808717749925, "learning_rate": 9.954911762804628e-07, "loss": 5.9266, "step": 2060 }, { "epoch": 0.08958952491708662, "learning_rate": 9.954426943049839e-07, "loss": 6.0525, "step": 2080 }, { "epoch": 0.09045096265667399, "learning_rate": 9.953942123295051e-07, "loss": 5.8037, "step": 2100 }, { "epoch": 0.09131240039626136, "learning_rate": 9.95345730354026e-07, "loss": 5.8224, "step": 2120 }, { "epoch": 0.09217383813584873, "learning_rate": 9.952972483785473e-07, "loss": 5.9992, "step": 2140 }, { "epoch": 0.0930352758754361, "learning_rate": 9.952487664030683e-07, "loss": 6.222, "step": 2160 }, { "epoch": 0.09389671361502347, "learning_rate": 9.952002844275894e-07, "loss": 6.221, "step": 2180 }, { "epoch": 0.09475815135461084, "learning_rate": 9.951518024521105e-07, "loss": 5.9652, "step": 2200 }, { "epoch": 0.09561958909419822, "learning_rate": 9.951033204766316e-07, "loss": 6.1783, "step": 2220 }, { "epoch": 0.0964810268337856, "learning_rate": 9.950548385011528e-07, "loss": 5.9059, "step": 2240 }, { "epoch": 0.09734246457337296, "learning_rate": 9.95006356525674e-07, "loss": 6.1, "step": 2260 }, { "epoch": 0.09820390231296033, "learning_rate": 9.94957874550195e-07, "loss": 5.9231, "step": 2280 }, { "epoch": 0.0990653400525477, "learning_rate": 9.94909392574716e-07, "loss": 6.0686, "step": 2300 }, { "epoch": 0.09992677779213507, "learning_rate": 9.948609105992371e-07, "loss": 5.8488, "step": 2320 }, { "epoch": 0.10078821553172244, "learning_rate": 9.948124286237584e-07, "loss": 5.7428, "step": 2340 }, { "epoch": 0.10164965327130981, "learning_rate": 9.947639466482793e-07, "loss": 5.8092, "step": 2360 }, { "epoch": 0.10251109101089718, "learning_rate": 9.947154646728006e-07, "loss": 6.0985, "step": 2380 }, { "epoch": 0.10337252875048455, "learning_rate": 9.946669826973216e-07, "loss": 6.14, "step": 2400 }, { "epoch": 0.10423396649007192, "learning_rate": 9.946185007218427e-07, "loss": 5.6721, "step": 2420 }, { "epoch": 0.1050954042296593, "learning_rate": 9.945700187463638e-07, "loss": 5.6273, "step": 2440 }, { "epoch": 0.10595684196924668, "learning_rate": 9.945215367708848e-07, "loss": 6.1214, "step": 2460 }, { "epoch": 0.10681827970883405, "learning_rate": 9.944730547954061e-07, "loss": 5.9658, "step": 2480 }, { "epoch": 0.10767971744842142, "learning_rate": 9.94424572819927e-07, "loss": 5.7439, "step": 2500 }, { "epoch": 0.10854115518800879, "learning_rate": 9.943760908444483e-07, "loss": 5.7759, "step": 2520 }, { "epoch": 0.10940259292759616, "learning_rate": 9.943276088689693e-07, "loss": 5.9705, "step": 2540 }, { "epoch": 0.11026403066718353, "learning_rate": 9.942791268934904e-07, "loss": 5.5893, "step": 2560 }, { "epoch": 0.1111254684067709, "learning_rate": 9.942306449180115e-07, "loss": 5.7964, "step": 2580 }, { "epoch": 0.11198690614635827, "learning_rate": 9.941821629425326e-07, "loss": 5.8011, "step": 2600 }, { "epoch": 0.11284834388594564, "learning_rate": 9.941336809670538e-07, "loss": 5.7291, "step": 2620 }, { "epoch": 0.11370978162553301, "learning_rate": 9.94085198991575e-07, "loss": 5.7857, "step": 2640 }, { "epoch": 0.11457121936512038, "learning_rate": 9.94036717016096e-07, "loss": 5.8249, "step": 2660 }, { "epoch": 0.11543265710470775, "learning_rate": 9.93988235040617e-07, "loss": 5.6865, "step": 2680 }, { "epoch": 0.11629409484429513, "learning_rate": 9.939397530651381e-07, "loss": 5.8647, "step": 2700 }, { "epoch": 0.1171555325838825, "learning_rate": 9.938912710896594e-07, "loss": 5.558, "step": 2720 }, { "epoch": 0.11801697032346987, "learning_rate": 9.938427891141803e-07, "loss": 6.0017, "step": 2740 }, { "epoch": 0.11887840806305724, "learning_rate": 9.937943071387016e-07, "loss": 5.7348, "step": 2760 }, { "epoch": 0.11973984580264461, "learning_rate": 9.937458251632226e-07, "loss": 5.5196, "step": 2780 }, { "epoch": 0.12060128354223199, "learning_rate": 9.936973431877437e-07, "loss": 5.6164, "step": 2800 }, { "epoch": 0.12146272128181936, "learning_rate": 9.936488612122648e-07, "loss": 5.8701, "step": 2820 }, { "epoch": 0.12232415902140673, "learning_rate": 9.936003792367858e-07, "loss": 5.9252, "step": 2840 }, { "epoch": 0.1231855967609941, "learning_rate": 9.935518972613071e-07, "loss": 5.502, "step": 2860 }, { "epoch": 0.12404703450058147, "learning_rate": 9.935034152858282e-07, "loss": 5.8715, "step": 2880 }, { "epoch": 0.12490847224016884, "learning_rate": 9.934549333103493e-07, "loss": 5.5986, "step": 2900 }, { "epoch": 0.1257699099797562, "learning_rate": 9.934064513348703e-07, "loss": 5.7461, "step": 2920 }, { "epoch": 0.1266313477193436, "learning_rate": 9.933579693593914e-07, "loss": 5.921, "step": 2940 }, { "epoch": 0.12749278545893095, "learning_rate": 9.933094873839125e-07, "loss": 5.8295, "step": 2960 }, { "epoch": 0.12835422319851833, "learning_rate": 9.932610054084336e-07, "loss": 5.6581, "step": 2980 }, { "epoch": 0.1292156609381057, "learning_rate": 9.932125234329548e-07, "loss": 5.7304, "step": 3000 }, { "epoch": 0.13007709867769307, "learning_rate": 9.93164041457476e-07, "loss": 5.9998, "step": 3020 }, { "epoch": 0.13093853641728045, "learning_rate": 9.93115559481997e-07, "loss": 6.0369, "step": 3040 }, { "epoch": 0.1317999741568678, "learning_rate": 9.930670775065183e-07, "loss": 5.9676, "step": 3060 }, { "epoch": 0.1326614118964552, "learning_rate": 9.930185955310391e-07, "loss": 5.2663, "step": 3080 }, { "epoch": 0.13352284963604255, "learning_rate": 9.929701135555604e-07, "loss": 5.8159, "step": 3100 }, { "epoch": 0.13438428737562993, "learning_rate": 9.929216315800813e-07, "loss": 5.5133, "step": 3120 }, { "epoch": 0.1352457251152173, "learning_rate": 9.928731496046025e-07, "loss": 5.7083, "step": 3140 }, { "epoch": 0.13610716285480468, "learning_rate": 9.928246676291236e-07, "loss": 5.7072, "step": 3160 }, { "epoch": 0.13696860059439203, "learning_rate": 9.927761856536447e-07, "loss": 5.8228, "step": 3180 }, { "epoch": 0.13783003833397942, "learning_rate": 9.927277036781658e-07, "loss": 5.9269, "step": 3200 }, { "epoch": 0.13869147607356677, "learning_rate": 9.926792217026868e-07, "loss": 5.7547, "step": 3220 }, { "epoch": 0.13955291381315416, "learning_rate": 9.926307397272081e-07, "loss": 5.7518, "step": 3240 }, { "epoch": 0.1404143515527415, "learning_rate": 9.925822577517292e-07, "loss": 5.7172, "step": 3260 }, { "epoch": 0.1412757892923289, "learning_rate": 9.925337757762503e-07, "loss": 5.3164, "step": 3280 }, { "epoch": 0.14213722703191628, "learning_rate": 9.924852938007715e-07, "loss": 5.5814, "step": 3300 }, { "epoch": 0.14299866477150364, "learning_rate": 9.924368118252924e-07, "loss": 5.7014, "step": 3320 }, { "epoch": 0.14386010251109102, "learning_rate": 9.923883298498137e-07, "loss": 5.6091, "step": 3340 }, { "epoch": 0.14472154025067838, "learning_rate": 9.923398478743345e-07, "loss": 5.9346, "step": 3360 }, { "epoch": 0.14558297799026576, "learning_rate": 9.922913658988558e-07, "loss": 5.411, "step": 3380 }, { "epoch": 0.14644441572985312, "learning_rate": 9.92242883923377e-07, "loss": 5.7669, "step": 3400 }, { "epoch": 0.1473058534694405, "learning_rate": 9.92194401947898e-07, "loss": 5.3575, "step": 3420 }, { "epoch": 0.14816729120902786, "learning_rate": 9.921459199724193e-07, "loss": 5.8575, "step": 3440 }, { "epoch": 0.14902872894861524, "learning_rate": 9.920974379969401e-07, "loss": 5.5618, "step": 3460 }, { "epoch": 0.1498901666882026, "learning_rate": 9.920489560214614e-07, "loss": 5.7241, "step": 3480 }, { "epoch": 0.15075160442778998, "learning_rate": 9.920004740459823e-07, "loss": 5.6646, "step": 3500 }, { "epoch": 0.15161304216737737, "learning_rate": 9.919519920705035e-07, "loss": 5.7116, "step": 3520 }, { "epoch": 0.15247447990696472, "learning_rate": 9.919035100950246e-07, "loss": 5.4578, "step": 3540 }, { "epoch": 0.1533359176465521, "learning_rate": 9.918550281195457e-07, "loss": 5.4505, "step": 3560 }, { "epoch": 0.15419735538613946, "learning_rate": 9.918065461440668e-07, "loss": 5.6805, "step": 3580 }, { "epoch": 0.15505879312572685, "learning_rate": 9.917580641685878e-07, "loss": 5.3575, "step": 3600 }, { "epoch": 0.1559202308653142, "learning_rate": 9.917095821931091e-07, "loss": 5.7853, "step": 3620 }, { "epoch": 0.1567816686049016, "learning_rate": 9.916611002176302e-07, "loss": 5.8258, "step": 3640 }, { "epoch": 0.15764310634448894, "learning_rate": 9.916126182421513e-07, "loss": 5.6299, "step": 3660 }, { "epoch": 0.15850454408407633, "learning_rate": 9.915641362666725e-07, "loss": 5.7533, "step": 3680 }, { "epoch": 0.15936598182366368, "learning_rate": 9.915156542911934e-07, "loss": 5.6216, "step": 3700 }, { "epoch": 0.16022741956325107, "learning_rate": 9.914671723157147e-07, "loss": 5.9745, "step": 3720 }, { "epoch": 0.16108885730283845, "learning_rate": 9.914186903402355e-07, "loss": 5.6028, "step": 3740 }, { "epoch": 0.1619502950424258, "learning_rate": 9.913702083647568e-07, "loss": 5.658, "step": 3760 }, { "epoch": 0.1628117327820132, "learning_rate": 9.913217263892779e-07, "loss": 5.4876, "step": 3780 }, { "epoch": 0.16367317052160055, "learning_rate": 9.91273244413799e-07, "loss": 5.5794, "step": 3800 }, { "epoch": 0.16453460826118793, "learning_rate": 9.912247624383202e-07, "loss": 5.6442, "step": 3820 }, { "epoch": 0.1653960460007753, "learning_rate": 9.911762804628411e-07, "loss": 5.5536, "step": 3840 }, { "epoch": 0.16625748374036267, "learning_rate": 9.911277984873624e-07, "loss": 5.5444, "step": 3860 }, { "epoch": 0.16711892147995003, "learning_rate": 9.910793165118835e-07, "loss": 5.7136, "step": 3880 }, { "epoch": 0.1679803592195374, "learning_rate": 9.910308345364045e-07, "loss": 5.2753, "step": 3900 }, { "epoch": 0.16884179695912477, "learning_rate": 9.909823525609258e-07, "loss": 5.4269, "step": 3920 }, { "epoch": 0.16970323469871215, "learning_rate": 9.909338705854467e-07, "loss": 5.4923, "step": 3940 }, { "epoch": 0.1705646724382995, "learning_rate": 9.90885388609968e-07, "loss": 5.5202, "step": 3960 }, { "epoch": 0.1714261101778869, "learning_rate": 9.908369066344888e-07, "loss": 5.5258, "step": 3980 }, { "epoch": 0.17228754791747428, "learning_rate": 9.9078842465901e-07, "loss": 5.5544, "step": 4000 }, { "epoch": 0.17314898565706163, "learning_rate": 9.907399426835312e-07, "loss": 5.5478, "step": 4020 }, { "epoch": 0.17401042339664902, "learning_rate": 9.906914607080522e-07, "loss": 5.5041, "step": 4040 }, { "epoch": 0.17487186113623637, "learning_rate": 9.906429787325735e-07, "loss": 5.4906, "step": 4060 }, { "epoch": 0.17573329887582376, "learning_rate": 9.905944967570944e-07, "loss": 5.4225, "step": 4080 }, { "epoch": 0.1765947366154111, "learning_rate": 9.905460147816157e-07, "loss": 5.6207, "step": 4100 }, { "epoch": 0.1774561743549985, "learning_rate": 9.904975328061365e-07, "loss": 5.5054, "step": 4120 }, { "epoch": 0.17831761209458585, "learning_rate": 9.904490508306578e-07, "loss": 5.4273, "step": 4140 }, { "epoch": 0.17917904983417324, "learning_rate": 9.904005688551789e-07, "loss": 5.6312, "step": 4160 }, { "epoch": 0.1800404875737606, "learning_rate": 9.903520868797e-07, "loss": 5.5304, "step": 4180 }, { "epoch": 0.18090192531334798, "learning_rate": 9.903036049042212e-07, "loss": 5.8041, "step": 4200 }, { "epoch": 0.18176336305293536, "learning_rate": 9.90255122928742e-07, "loss": 5.6389, "step": 4220 }, { "epoch": 0.18262480079252272, "learning_rate": 9.902066409532634e-07, "loss": 5.4039, "step": 4240 }, { "epoch": 0.1834862385321101, "learning_rate": 9.901581589777845e-07, "loss": 5.3949, "step": 4260 }, { "epoch": 0.18434767627169746, "learning_rate": 9.901096770023055e-07, "loss": 5.462, "step": 4280 }, { "epoch": 0.18520911401128484, "learning_rate": 9.900611950268268e-07, "loss": 5.6406, "step": 4300 }, { "epoch": 0.1860705517508722, "learning_rate": 9.900127130513477e-07, "loss": 5.4098, "step": 4320 }, { "epoch": 0.18693198949045958, "learning_rate": 9.89964231075869e-07, "loss": 5.5045, "step": 4340 }, { "epoch": 0.18779342723004694, "learning_rate": 9.899157491003898e-07, "loss": 5.4192, "step": 4360 }, { "epoch": 0.18865486496963432, "learning_rate": 9.89867267124911e-07, "loss": 5.3939, "step": 4380 }, { "epoch": 0.18951630270922168, "learning_rate": 9.898187851494322e-07, "loss": 5.6309, "step": 4400 }, { "epoch": 0.19037774044880906, "learning_rate": 9.897703031739532e-07, "loss": 5.3819, "step": 4420 }, { "epoch": 0.19123917818839645, "learning_rate": 9.897218211984745e-07, "loss": 5.3316, "step": 4440 }, { "epoch": 0.1921006159279838, "learning_rate": 9.896733392229954e-07, "loss": 5.3271, "step": 4460 }, { "epoch": 0.1929620536675712, "learning_rate": 9.896248572475167e-07, "loss": 5.5944, "step": 4480 }, { "epoch": 0.19382349140715854, "learning_rate": 9.895763752720377e-07, "loss": 5.4956, "step": 4500 }, { "epoch": 0.19468492914674593, "learning_rate": 9.895278932965588e-07, "loss": 5.3326, "step": 4520 }, { "epoch": 0.19554636688633328, "learning_rate": 9.894794113210799e-07, "loss": 5.4429, "step": 4540 }, { "epoch": 0.19640780462592067, "learning_rate": 9.89430929345601e-07, "loss": 5.4955, "step": 4560 }, { "epoch": 0.19726924236550802, "learning_rate": 9.893824473701222e-07, "loss": 5.2479, "step": 4580 }, { "epoch": 0.1981306801050954, "learning_rate": 9.89333965394643e-07, "loss": 5.3128, "step": 4600 }, { "epoch": 0.19899211784468276, "learning_rate": 9.892854834191644e-07, "loss": 5.3392, "step": 4620 }, { "epoch": 0.19985355558427015, "learning_rate": 9.892370014436854e-07, "loss": 5.5973, "step": 4640 }, { "epoch": 0.2007149933238575, "learning_rate": 9.891885194682065e-07, "loss": 5.4401, "step": 4660 }, { "epoch": 0.2015764310634449, "learning_rate": 9.891400374927278e-07, "loss": 5.3984, "step": 4680 }, { "epoch": 0.20243786880303227, "learning_rate": 9.890915555172487e-07, "loss": 5.2152, "step": 4700 }, { "epoch": 0.20329930654261963, "learning_rate": 9.8904307354177e-07, "loss": 5.5735, "step": 4720 }, { "epoch": 0.204160744282207, "learning_rate": 9.889945915662908e-07, "loss": 5.4082, "step": 4740 }, { "epoch": 0.20502218202179437, "learning_rate": 9.88946109590812e-07, "loss": 5.2788, "step": 4760 }, { "epoch": 0.20588361976138175, "learning_rate": 9.888976276153332e-07, "loss": 5.384, "step": 4780 }, { "epoch": 0.2067450575009691, "learning_rate": 9.888491456398542e-07, "loss": 5.4814, "step": 4800 }, { "epoch": 0.2076064952405565, "learning_rate": 9.888006636643755e-07, "loss": 5.1962, "step": 4820 }, { "epoch": 0.20846793298014385, "learning_rate": 9.887521816888966e-07, "loss": 5.0913, "step": 4840 }, { "epoch": 0.20932937071973123, "learning_rate": 9.887036997134177e-07, "loss": 5.3056, "step": 4860 }, { "epoch": 0.2101908084593186, "learning_rate": 9.886552177379387e-07, "loss": 5.4644, "step": 4880 }, { "epoch": 0.21105224619890597, "learning_rate": 9.886067357624598e-07, "loss": 5.6686, "step": 4900 }, { "epoch": 0.21191368393849336, "learning_rate": 9.88558253786981e-07, "loss": 5.3201, "step": 4920 }, { "epoch": 0.21277512167808071, "learning_rate": 9.88509771811502e-07, "loss": 5.2799, "step": 4940 }, { "epoch": 0.2136365594176681, "learning_rate": 9.884612898360232e-07, "loss": 5.1967, "step": 4960 }, { "epoch": 0.21449799715725545, "learning_rate": 9.88412807860544e-07, "loss": 5.4358, "step": 4980 }, { "epoch": 0.21535943489684284, "learning_rate": 9.883643258850654e-07, "loss": 5.124, "step": 5000 }, { "epoch": 0.2162208726364302, "learning_rate": 9.883158439095864e-07, "loss": 5.4017, "step": 5020 }, { "epoch": 0.21708231037601758, "learning_rate": 9.882673619341075e-07, "loss": 5.28, "step": 5040 }, { "epoch": 0.21794374811560493, "learning_rate": 9.882188799586288e-07, "loss": 5.427, "step": 5060 }, { "epoch": 0.21880518585519232, "learning_rate": 9.881703979831497e-07, "loss": 5.272, "step": 5080 }, { "epoch": 0.21966662359477968, "learning_rate": 9.88121916007671e-07, "loss": 5.3149, "step": 5100 }, { "epoch": 0.22052806133436706, "learning_rate": 9.880734340321918e-07, "loss": 5.5404, "step": 5120 }, { "epoch": 0.22138949907395444, "learning_rate": 9.88024952056713e-07, "loss": 5.3568, "step": 5140 }, { "epoch": 0.2222509368135418, "learning_rate": 9.879764700812342e-07, "loss": 5.2099, "step": 5160 }, { "epoch": 0.22311237455312918, "learning_rate": 9.879279881057552e-07, "loss": 5.2753, "step": 5180 }, { "epoch": 0.22397381229271654, "learning_rate": 9.878795061302765e-07, "loss": 5.3216, "step": 5200 }, { "epoch": 0.22483525003230392, "learning_rate": 9.878310241547976e-07, "loss": 5.332, "step": 5220 }, { "epoch": 0.22569668777189128, "learning_rate": 9.877825421793187e-07, "loss": 5.1957, "step": 5240 }, { "epoch": 0.22655812551147866, "learning_rate": 9.877340602038397e-07, "loss": 5.493, "step": 5260 }, { "epoch": 0.22741956325106602, "learning_rate": 9.876855782283608e-07, "loss": 5.4048, "step": 5280 }, { "epoch": 0.2282810009906534, "learning_rate": 9.87637096252882e-07, "loss": 5.4582, "step": 5300 }, { "epoch": 0.22914243873024076, "learning_rate": 9.87588614277403e-07, "loss": 5.1358, "step": 5320 }, { "epoch": 0.23000387646982814, "learning_rate": 9.875401323019242e-07, "loss": 5.507, "step": 5340 }, { "epoch": 0.2308653142094155, "learning_rate": 9.87491650326445e-07, "loss": 5.2223, "step": 5360 }, { "epoch": 0.23172675194900288, "learning_rate": 9.874431683509664e-07, "loss": 5.2098, "step": 5380 }, { "epoch": 0.23258818968859027, "learning_rate": 9.873946863754874e-07, "loss": 5.2615, "step": 5400 }, { "epoch": 0.23344962742817763, "learning_rate": 9.873462044000085e-07, "loss": 5.2096, "step": 5420 }, { "epoch": 0.234311065167765, "learning_rate": 9.872977224245298e-07, "loss": 5.1175, "step": 5440 }, { "epoch": 0.23517250290735237, "learning_rate": 9.872492404490509e-07, "loss": 5.2392, "step": 5460 }, { "epoch": 0.23603394064693975, "learning_rate": 9.87200758473572e-07, "loss": 5.1828, "step": 5480 }, { "epoch": 0.2368953783865271, "learning_rate": 9.87152276498093e-07, "loss": 5.1802, "step": 5500 }, { "epoch": 0.2377568161261145, "learning_rate": 9.87103794522614e-07, "loss": 5.2447, "step": 5520 }, { "epoch": 0.23861825386570185, "learning_rate": 9.870553125471354e-07, "loss": 5.0941, "step": 5540 }, { "epoch": 0.23947969160528923, "learning_rate": 9.870068305716562e-07, "loss": 5.0211, "step": 5560 }, { "epoch": 0.2403411293448766, "learning_rate": 9.869583485961775e-07, "loss": 5.4385, "step": 5580 }, { "epoch": 0.24120256708446397, "learning_rate": 9.869098666206986e-07, "loss": 5.5081, "step": 5600 }, { "epoch": 0.24206400482405135, "learning_rate": 9.868613846452196e-07, "loss": 5.5758, "step": 5620 }, { "epoch": 0.2429254425636387, "learning_rate": 9.868129026697407e-07, "loss": 5.379, "step": 5640 }, { "epoch": 0.2437868803032261, "learning_rate": 9.867644206942618e-07, "loss": 5.5049, "step": 5660 }, { "epoch": 0.24464831804281345, "learning_rate": 9.86715938718783e-07, "loss": 5.2212, "step": 5680 }, { "epoch": 0.24550975578240083, "learning_rate": 9.86667456743304e-07, "loss": 5.1315, "step": 5700 }, { "epoch": 0.2463711935219882, "learning_rate": 9.866189747678252e-07, "loss": 4.9211, "step": 5720 }, { "epoch": 0.24723263126157558, "learning_rate": 9.86570492792346e-07, "loss": 5.2577, "step": 5740 }, { "epoch": 0.24809406900116293, "learning_rate": 9.865220108168674e-07, "loss": 5.1484, "step": 5760 }, { "epoch": 0.24895550674075032, "learning_rate": 9.864735288413884e-07, "loss": 5.2857, "step": 5780 }, { "epoch": 0.24981694448033767, "learning_rate": 9.864250468659095e-07, "loss": 5.1734, "step": 5800 }, { "epoch": 0.25067838221992506, "learning_rate": 9.863765648904308e-07, "loss": 5.3354, "step": 5820 }, { "epoch": 0.2515398199595124, "learning_rate": 9.863280829149519e-07, "loss": 5.2673, "step": 5840 }, { "epoch": 0.2524012576990998, "learning_rate": 9.86279600939473e-07, "loss": 5.1835, "step": 5860 }, { "epoch": 0.2532626954386872, "learning_rate": 9.86231118963994e-07, "loss": 5.1701, "step": 5880 }, { "epoch": 0.25412413317827454, "learning_rate": 9.86182636988515e-07, "loss": 5.2417, "step": 5900 }, { "epoch": 0.2549855709178619, "learning_rate": 9.861341550130364e-07, "loss": 4.9244, "step": 5920 }, { "epoch": 0.2558470086574493, "learning_rate": 9.860856730375572e-07, "loss": 5.0248, "step": 5940 }, { "epoch": 0.25670844639703666, "learning_rate": 9.860371910620785e-07, "loss": 5.362, "step": 5960 }, { "epoch": 0.257569884136624, "learning_rate": 9.859887090865996e-07, "loss": 4.9936, "step": 5980 }, { "epoch": 0.2584313218762114, "learning_rate": 9.859402271111206e-07, "loss": 5.5917, "step": 6000 }, { "epoch": 0.2592927596157988, "learning_rate": 9.858917451356417e-07, "loss": 5.1554, "step": 6020 }, { "epoch": 0.26015419735538614, "learning_rate": 9.858432631601628e-07, "loss": 5.3179, "step": 6040 }, { "epoch": 0.2610156350949735, "learning_rate": 9.85794781184684e-07, "loss": 5.2851, "step": 6060 }, { "epoch": 0.2618770728345609, "learning_rate": 9.857462992092051e-07, "loss": 5.2355, "step": 6080 }, { "epoch": 0.26273851057414827, "learning_rate": 9.856978172337262e-07, "loss": 5.2866, "step": 6100 }, { "epoch": 0.2635999483137356, "learning_rate": 9.85649335258247e-07, "loss": 5.2053, "step": 6120 }, { "epoch": 0.264461386053323, "learning_rate": 9.856008532827684e-07, "loss": 5.0226, "step": 6140 }, { "epoch": 0.2653228237929104, "learning_rate": 9.855523713072894e-07, "loss": 5.2522, "step": 6160 }, { "epoch": 0.26618426153249775, "learning_rate": 9.855038893318105e-07, "loss": 5.101, "step": 6180 }, { "epoch": 0.2670456992720851, "learning_rate": 9.854554073563318e-07, "loss": 5.272, "step": 6200 }, { "epoch": 0.26790713701167246, "learning_rate": 9.854069253808529e-07, "loss": 5.2802, "step": 6220 }, { "epoch": 0.26876857475125987, "learning_rate": 9.85358443405374e-07, "loss": 5.3429, "step": 6240 }, { "epoch": 0.2696300124908472, "learning_rate": 9.85309961429895e-07, "loss": 5.3596, "step": 6260 }, { "epoch": 0.2704914502304346, "learning_rate": 9.85261479454416e-07, "loss": 4.9346, "step": 6280 }, { "epoch": 0.271352887970022, "learning_rate": 9.852129974789373e-07, "loss": 5.178, "step": 6300 }, { "epoch": 0.27221432570960935, "learning_rate": 9.851645155034582e-07, "loss": 5.266, "step": 6320 }, { "epoch": 0.2730757634491967, "learning_rate": 9.851160335279795e-07, "loss": 5.205, "step": 6340 }, { "epoch": 0.27393720118878406, "learning_rate": 9.850675515525006e-07, "loss": 5.1011, "step": 6360 }, { "epoch": 0.2747986389283715, "learning_rate": 9.850190695770216e-07, "loss": 5.3448, "step": 6380 }, { "epoch": 0.27566007666795883, "learning_rate": 9.849705876015427e-07, "loss": 5.1696, "step": 6400 }, { "epoch": 0.2765215144075462, "learning_rate": 9.849221056260638e-07, "loss": 4.74, "step": 6420 }, { "epoch": 0.27738295214713354, "learning_rate": 9.84873623650585e-07, "loss": 5.101, "step": 6440 }, { "epoch": 0.27824438988672096, "learning_rate": 9.848251416751061e-07, "loss": 5.1098, "step": 6460 }, { "epoch": 0.2791058276263083, "learning_rate": 9.847766596996272e-07, "loss": 5.2632, "step": 6480 }, { "epoch": 0.27996726536589567, "learning_rate": 9.847281777241483e-07, "loss": 5.2572, "step": 6500 }, { "epoch": 0.280828703105483, "learning_rate": 9.846796957486693e-07, "loss": 5.2582, "step": 6520 }, { "epoch": 0.28169014084507044, "learning_rate": 9.846312137731906e-07, "loss": 5.3315, "step": 6540 }, { "epoch": 0.2825515785846578, "learning_rate": 9.845827317977115e-07, "loss": 4.955, "step": 6560 }, { "epoch": 0.28341301632424515, "learning_rate": 9.845342498222328e-07, "loss": 5.2012, "step": 6580 }, { "epoch": 0.28427445406383256, "learning_rate": 9.844857678467538e-07, "loss": 5.3038, "step": 6600 }, { "epoch": 0.2851358918034199, "learning_rate": 9.84437285871275e-07, "loss": 5.2074, "step": 6620 }, { "epoch": 0.2859973295430073, "learning_rate": 9.84388803895796e-07, "loss": 5.0363, "step": 6640 }, { "epoch": 0.28685876728259463, "learning_rate": 9.84340321920317e-07, "loss": 5.1664, "step": 6660 }, { "epoch": 0.28772020502218204, "learning_rate": 9.842918399448383e-07, "loss": 5.1214, "step": 6680 }, { "epoch": 0.2885816427617694, "learning_rate": 9.842433579693592e-07, "loss": 4.9995, "step": 6700 }, { "epoch": 0.28944308050135675, "learning_rate": 9.841948759938805e-07, "loss": 5.0777, "step": 6720 }, { "epoch": 0.2903045182409441, "learning_rate": 9.841463940184016e-07, "loss": 5.1652, "step": 6740 }, { "epoch": 0.2911659559805315, "learning_rate": 9.840979120429226e-07, "loss": 5.1024, "step": 6760 }, { "epoch": 0.2920273937201189, "learning_rate": 9.840494300674437e-07, "loss": 5.054, "step": 6780 }, { "epoch": 0.29288883145970623, "learning_rate": 9.840009480919648e-07, "loss": 5.2045, "step": 6800 }, { "epoch": 0.29375026919929365, "learning_rate": 9.83952466116486e-07, "loss": 5.3555, "step": 6820 }, { "epoch": 0.294611706938881, "learning_rate": 9.839039841410071e-07, "loss": 5.1176, "step": 6840 }, { "epoch": 0.29547314467846836, "learning_rate": 9.838555021655282e-07, "loss": 5.4848, "step": 6860 }, { "epoch": 0.2963345824180557, "learning_rate": 9.838070201900493e-07, "loss": 5.1506, "step": 6880 }, { "epoch": 0.2971960201576431, "learning_rate": 9.837585382145703e-07, "loss": 5.1673, "step": 6900 }, { "epoch": 0.2980574578972305, "learning_rate": 9.837100562390916e-07, "loss": 5.0694, "step": 6920 }, { "epoch": 0.29891889563681784, "learning_rate": 9.836615742636125e-07, "loss": 5.4286, "step": 6940 }, { "epoch": 0.2997803333764052, "learning_rate": 9.836130922881338e-07, "loss": 4.9144, "step": 6960 }, { "epoch": 0.3006417711159926, "learning_rate": 9.835646103126548e-07, "loss": 4.9661, "step": 6980 }, { "epoch": 0.30150320885557996, "learning_rate": 9.83516128337176e-07, "loss": 5.0778, "step": 7000 }, { "epoch": 0.3023646465951673, "learning_rate": 9.83467646361697e-07, "loss": 5.14, "step": 7020 }, { "epoch": 0.30322608433475473, "learning_rate": 9.83419164386218e-07, "loss": 5.1204, "step": 7040 }, { "epoch": 0.3040875220743421, "learning_rate": 9.833706824107393e-07, "loss": 4.942, "step": 7060 }, { "epoch": 0.30494895981392944, "learning_rate": 9.833222004352604e-07, "loss": 5.08, "step": 7080 }, { "epoch": 0.3058103975535168, "learning_rate": 9.832737184597815e-07, "loss": 5.1513, "step": 7100 }, { "epoch": 0.3066718352931042, "learning_rate": 9.832252364843028e-07, "loss": 5.0832, "step": 7120 }, { "epoch": 0.30753327303269157, "learning_rate": 9.831767545088236e-07, "loss": 5.1246, "step": 7140 }, { "epoch": 0.3083947107722789, "learning_rate": 9.83128272533345e-07, "loss": 5.1067, "step": 7160 }, { "epoch": 0.3092561485118663, "learning_rate": 9.830797905578658e-07, "loss": 5.0547, "step": 7180 }, { "epoch": 0.3101175862514537, "learning_rate": 9.83031308582387e-07, "loss": 5.3209, "step": 7200 }, { "epoch": 0.31097902399104105, "learning_rate": 9.829828266069081e-07, "loss": 5.1226, "step": 7220 }, { "epoch": 0.3118404617306284, "learning_rate": 9.829343446314292e-07, "loss": 5.2013, "step": 7240 }, { "epoch": 0.3127018994702158, "learning_rate": 9.828858626559505e-07, "loss": 5.1918, "step": 7260 }, { "epoch": 0.3135633372098032, "learning_rate": 9.828373806804713e-07, "loss": 5.1001, "step": 7280 }, { "epoch": 0.31442477494939053, "learning_rate": 9.827888987049926e-07, "loss": 5.1812, "step": 7300 }, { "epoch": 0.3152862126889779, "learning_rate": 9.827404167295135e-07, "loss": 4.9552, "step": 7320 }, { "epoch": 0.3161476504285653, "learning_rate": 9.826919347540348e-07, "loss": 5.221, "step": 7340 }, { "epoch": 0.31700908816815265, "learning_rate": 9.826434527785558e-07, "loss": 4.8989, "step": 7360 }, { "epoch": 0.31787052590774, "learning_rate": 9.82594970803077e-07, "loss": 5.2631, "step": 7380 }, { "epoch": 0.31873196364732737, "learning_rate": 9.82546488827598e-07, "loss": 5.0938, "step": 7400 }, { "epoch": 0.3195934013869148, "learning_rate": 9.82498006852119e-07, "loss": 5.0062, "step": 7420 }, { "epoch": 0.32045483912650213, "learning_rate": 9.824495248766403e-07, "loss": 4.9372, "step": 7440 }, { "epoch": 0.3213162768660895, "learning_rate": 9.824010429011614e-07, "loss": 4.8481, "step": 7460 }, { "epoch": 0.3221777146056769, "learning_rate": 9.823525609256825e-07, "loss": 4.8534, "step": 7480 }, { "epoch": 0.32303915234526426, "learning_rate": 9.823040789502038e-07, "loss": 4.9954, "step": 7500 }, { "epoch": 0.3239005900848516, "learning_rate": 9.822555969747246e-07, "loss": 4.8983, "step": 7520 }, { "epoch": 0.32476202782443897, "learning_rate": 9.82207114999246e-07, "loss": 5.0276, "step": 7540 }, { "epoch": 0.3256234655640264, "learning_rate": 9.821586330237668e-07, "loss": 4.787, "step": 7560 }, { "epoch": 0.32648490330361374, "learning_rate": 9.82110151048288e-07, "loss": 5.4519, "step": 7580 }, { "epoch": 0.3273463410432011, "learning_rate": 9.820616690728091e-07, "loss": 5.1657, "step": 7600 }, { "epoch": 0.32820777878278845, "learning_rate": 9.820131870973302e-07, "loss": 5.0934, "step": 7620 }, { "epoch": 0.32906921652237586, "learning_rate": 9.819647051218515e-07, "loss": 5.1082, "step": 7640 }, { "epoch": 0.3299306542619632, "learning_rate": 9.819162231463723e-07, "loss": 4.9206, "step": 7660 }, { "epoch": 0.3307920920015506, "learning_rate": 9.818677411708936e-07, "loss": 5.1762, "step": 7680 }, { "epoch": 0.331653529741138, "learning_rate": 9.818192591954147e-07, "loss": 4.9105, "step": 7700 }, { "epoch": 0.33251496748072534, "learning_rate": 9.817707772199358e-07, "loss": 5.1632, "step": 7720 }, { "epoch": 0.3333764052203127, "learning_rate": 9.817222952444568e-07, "loss": 5.2379, "step": 7740 }, { "epoch": 0.33423784295990006, "learning_rate": 9.81673813268978e-07, "loss": 5.1562, "step": 7760 }, { "epoch": 0.33509928069948747, "learning_rate": 9.81625331293499e-07, "loss": 5.0009, "step": 7780 }, { "epoch": 0.3359607184390748, "learning_rate": 9.8157684931802e-07, "loss": 4.8898, "step": 7800 }, { "epoch": 0.3368221561786622, "learning_rate": 9.815283673425413e-07, "loss": 4.9866, "step": 7820 }, { "epoch": 0.33768359391824954, "learning_rate": 9.814798853670624e-07, "loss": 4.8798, "step": 7840 }, { "epoch": 0.33854503165783695, "learning_rate": 9.814314033915835e-07, "loss": 4.9858, "step": 7860 }, { "epoch": 0.3394064693974243, "learning_rate": 9.813829214161047e-07, "loss": 5.1652, "step": 7880 }, { "epoch": 0.34026790713701166, "learning_rate": 9.813344394406256e-07, "loss": 5.0168, "step": 7900 }, { "epoch": 0.341129344876599, "learning_rate": 9.812859574651469e-07, "loss": 4.9382, "step": 7920 }, { "epoch": 0.34199078261618643, "learning_rate": 9.812374754896678e-07, "loss": 5.1187, "step": 7940 }, { "epoch": 0.3428522203557738, "learning_rate": 9.81188993514189e-07, "loss": 4.8829, "step": 7960 }, { "epoch": 0.34371365809536114, "learning_rate": 9.811405115387101e-07, "loss": 4.8803, "step": 7980 }, { "epoch": 0.34457509583494855, "learning_rate": 9.810920295632312e-07, "loss": 4.9934, "step": 8000 }, { "epoch": 0.3454365335745359, "learning_rate": 9.810435475877525e-07, "loss": 5.0201, "step": 8020 }, { "epoch": 0.34629797131412327, "learning_rate": 9.809950656122733e-07, "loss": 5.2234, "step": 8040 }, { "epoch": 0.3471594090537106, "learning_rate": 9.809465836367946e-07, "loss": 5.102, "step": 8060 }, { "epoch": 0.34802084679329803, "learning_rate": 9.808981016613157e-07, "loss": 5.0268, "step": 8080 }, { "epoch": 0.3488822845328854, "learning_rate": 9.808496196858367e-07, "loss": 4.6825, "step": 8100 }, { "epoch": 0.34974372227247275, "learning_rate": 9.80801137710358e-07, "loss": 5.0273, "step": 8120 }, { "epoch": 0.3506051600120601, "learning_rate": 9.807526557348789e-07, "loss": 4.76, "step": 8140 }, { "epoch": 0.3514665977516475, "learning_rate": 9.807041737594002e-07, "loss": 4.8518, "step": 8160 }, { "epoch": 0.35232803549123487, "learning_rate": 9.80655691783921e-07, "loss": 5.0516, "step": 8180 }, { "epoch": 0.3531894732308222, "learning_rate": 9.806072098084423e-07, "loss": 5.2637, "step": 8200 }, { "epoch": 0.35405091097040964, "learning_rate": 9.805587278329634e-07, "loss": 5.178, "step": 8220 }, { "epoch": 0.354912348709997, "learning_rate": 9.805102458574845e-07, "loss": 5.163, "step": 8240 }, { "epoch": 0.35577378644958435, "learning_rate": 9.804617638820057e-07, "loss": 4.9961, "step": 8260 }, { "epoch": 0.3566352241891717, "learning_rate": 9.804132819065266e-07, "loss": 4.8015, "step": 8280 }, { "epoch": 0.3574966619287591, "learning_rate": 9.803647999310479e-07, "loss": 4.8818, "step": 8300 }, { "epoch": 0.3583580996683465, "learning_rate": 9.803163179555687e-07, "loss": 4.9811, "step": 8320 }, { "epoch": 0.35921953740793383, "learning_rate": 9.8026783598009e-07, "loss": 4.9989, "step": 8340 }, { "epoch": 0.3600809751475212, "learning_rate": 9.80219354004611e-07, "loss": 4.7071, "step": 8360 }, { "epoch": 0.3609424128871086, "learning_rate": 9.801708720291322e-07, "loss": 5.0305, "step": 8380 }, { "epoch": 0.36180385062669596, "learning_rate": 9.801223900536535e-07, "loss": 4.7833, "step": 8400 }, { "epoch": 0.3626652883662833, "learning_rate": 9.800739080781743e-07, "loss": 4.714, "step": 8420 }, { "epoch": 0.3635267261058707, "learning_rate": 9.800254261026956e-07, "loss": 4.7957, "step": 8440 }, { "epoch": 0.3643881638454581, "learning_rate": 9.799769441272167e-07, "loss": 4.9877, "step": 8460 }, { "epoch": 0.36524960158504544, "learning_rate": 9.799284621517377e-07, "loss": 5.1407, "step": 8480 }, { "epoch": 0.3661110393246328, "learning_rate": 9.79879980176259e-07, "loss": 5.0109, "step": 8500 }, { "epoch": 0.3669724770642202, "learning_rate": 9.798314982007799e-07, "loss": 5.0929, "step": 8520 }, { "epoch": 0.36783391480380756, "learning_rate": 9.797830162253012e-07, "loss": 4.8868, "step": 8540 }, { "epoch": 0.3686953525433949, "learning_rate": 9.79734534249822e-07, "loss": 5.0758, "step": 8560 }, { "epoch": 0.3695567902829823, "learning_rate": 9.796860522743433e-07, "loss": 4.7914, "step": 8580 }, { "epoch": 0.3704182280225697, "learning_rate": 9.796375702988644e-07, "loss": 4.9024, "step": 8600 }, { "epoch": 0.37127966576215704, "learning_rate": 9.795890883233855e-07, "loss": 5.1092, "step": 8620 }, { "epoch": 0.3721411035017444, "learning_rate": 9.795406063479067e-07, "loss": 5.0201, "step": 8640 }, { "epoch": 0.3730025412413318, "learning_rate": 9.794921243724276e-07, "loss": 4.9426, "step": 8660 }, { "epoch": 0.37386397898091916, "learning_rate": 9.794436423969489e-07, "loss": 5.0351, "step": 8680 }, { "epoch": 0.3747254167205065, "learning_rate": 9.7939516042147e-07, "loss": 4.855, "step": 8700 }, { "epoch": 0.3755868544600939, "learning_rate": 9.79346678445991e-07, "loss": 4.9019, "step": 8720 }, { "epoch": 0.3764482921996813, "learning_rate": 9.792981964705123e-07, "loss": 4.9067, "step": 8740 }, { "epoch": 0.37730972993926865, "learning_rate": 9.792497144950332e-07, "loss": 4.9602, "step": 8760 }, { "epoch": 0.378171167678856, "learning_rate": 9.792012325195544e-07, "loss": 5.1773, "step": 8780 }, { "epoch": 0.37903260541844336, "learning_rate": 9.791527505440753e-07, "loss": 4.8999, "step": 8800 }, { "epoch": 0.37989404315803077, "learning_rate": 9.791042685685966e-07, "loss": 5.1569, "step": 8820 }, { "epoch": 0.3807554808976181, "learning_rate": 9.790557865931177e-07, "loss": 5.0223, "step": 8840 }, { "epoch": 0.3816169186372055, "learning_rate": 9.790073046176387e-07, "loss": 4.9746, "step": 8860 }, { "epoch": 0.3824783563767929, "learning_rate": 9.7895882264216e-07, "loss": 4.7316, "step": 8880 }, { "epoch": 0.38333979411638025, "learning_rate": 9.789103406666809e-07, "loss": 4.8671, "step": 8900 }, { "epoch": 0.3842012318559676, "learning_rate": 9.788618586912022e-07, "loss": 4.8685, "step": 8920 }, { "epoch": 0.38506266959555496, "learning_rate": 9.78813376715723e-07, "loss": 5.0705, "step": 8940 }, { "epoch": 0.3859241073351424, "learning_rate": 9.787648947402443e-07, "loss": 5.0033, "step": 8960 }, { "epoch": 0.38678554507472973, "learning_rate": 9.787164127647654e-07, "loss": 5.13, "step": 8980 }, { "epoch": 0.3876469828143171, "learning_rate": 9.786679307892864e-07, "loss": 4.9016, "step": 9000 }, { "epoch": 0.38850842055390444, "learning_rate": 9.786194488138077e-07, "loss": 4.5963, "step": 9020 }, { "epoch": 0.38936985829349186, "learning_rate": 9.785709668383288e-07, "loss": 5.1174, "step": 9040 }, { "epoch": 0.3902312960330792, "learning_rate": 9.785224848628499e-07, "loss": 4.6056, "step": 9060 }, { "epoch": 0.39109273377266657, "learning_rate": 9.78474002887371e-07, "loss": 5.1854, "step": 9080 }, { "epoch": 0.391954171512254, "learning_rate": 9.78425520911892e-07, "loss": 5.0354, "step": 9100 }, { "epoch": 0.39281560925184134, "learning_rate": 9.783770389364133e-07, "loss": 4.973, "step": 9120 }, { "epoch": 0.3936770469914287, "learning_rate": 9.783285569609342e-07, "loss": 4.9599, "step": 9140 }, { "epoch": 0.39453848473101605, "learning_rate": 9.782800749854554e-07, "loss": 4.8212, "step": 9160 }, { "epoch": 0.39539992247060346, "learning_rate": 9.782315930099763e-07, "loss": 4.7895, "step": 9180 }, { "epoch": 0.3962613602101908, "learning_rate": 9.781831110344976e-07, "loss": 5.0378, "step": 9200 }, { "epoch": 0.3971227979497782, "learning_rate": 9.781346290590187e-07, "loss": 5.1935, "step": 9220 }, { "epoch": 0.39798423568936553, "learning_rate": 9.780861470835397e-07, "loss": 5.0377, "step": 9240 }, { "epoch": 0.39884567342895294, "learning_rate": 9.78037665108061e-07, "loss": 5.2153, "step": 9260 }, { "epoch": 0.3997071111685403, "learning_rate": 9.77989183132582e-07, "loss": 5.0034, "step": 9280 }, { "epoch": 0.40056854890812765, "learning_rate": 9.779407011571032e-07, "loss": 5.0281, "step": 9300 }, { "epoch": 0.401429986647715, "learning_rate": 9.778922191816242e-07, "loss": 4.5347, "step": 9320 }, { "epoch": 0.4022914243873024, "learning_rate": 9.778437372061453e-07, "loss": 4.7715, "step": 9340 }, { "epoch": 0.4031528621268898, "learning_rate": 9.777952552306664e-07, "loss": 5.0449, "step": 9360 }, { "epoch": 0.40401429986647713, "learning_rate": 9.777467732551874e-07, "loss": 5.1752, "step": 9380 }, { "epoch": 0.40487573760606455, "learning_rate": 9.776982912797087e-07, "loss": 5.063, "step": 9400 }, { "epoch": 0.4057371753456519, "learning_rate": 9.776498093042298e-07, "loss": 4.8293, "step": 9420 }, { "epoch": 0.40659861308523926, "learning_rate": 9.776013273287509e-07, "loss": 5.0506, "step": 9440 }, { "epoch": 0.4074600508248266, "learning_rate": 9.77552845353272e-07, "loss": 5.0681, "step": 9460 }, { "epoch": 0.408321488564414, "learning_rate": 9.77504363377793e-07, "loss": 4.9795, "step": 9480 }, { "epoch": 0.4091829263040014, "learning_rate": 9.774558814023143e-07, "loss": 4.7326, "step": 9500 }, { "epoch": 0.41004436404358874, "learning_rate": 9.774073994268352e-07, "loss": 4.5815, "step": 9520 }, { "epoch": 0.4109058017831761, "learning_rate": 9.773589174513564e-07, "loss": 5.0059, "step": 9540 }, { "epoch": 0.4117672395227635, "learning_rate": 9.773104354758773e-07, "loss": 4.8963, "step": 9560 }, { "epoch": 0.41262867726235086, "learning_rate": 9.772619535003986e-07, "loss": 4.9941, "step": 9580 }, { "epoch": 0.4134901150019382, "learning_rate": 9.772134715249196e-07, "loss": 5.0495, "step": 9600 }, { "epoch": 0.41435155274152563, "learning_rate": 9.771649895494407e-07, "loss": 4.5602, "step": 9620 }, { "epoch": 0.415212990481113, "learning_rate": 9.77116507573962e-07, "loss": 5.1274, "step": 9640 }, { "epoch": 0.41607442822070034, "learning_rate": 9.77068025598483e-07, "loss": 5.2212, "step": 9660 }, { "epoch": 0.4169358659602877, "learning_rate": 9.770195436230041e-07, "loss": 4.8636, "step": 9680 }, { "epoch": 0.4177973036998751, "learning_rate": 9.769710616475252e-07, "loss": 5.0909, "step": 9700 }, { "epoch": 0.41865874143946247, "learning_rate": 9.769225796720463e-07, "loss": 5.0281, "step": 9720 }, { "epoch": 0.4195201791790498, "learning_rate": 9.768740976965676e-07, "loss": 4.9283, "step": 9740 }, { "epoch": 0.4203816169186372, "learning_rate": 9.768256157210884e-07, "loss": 4.6899, "step": 9760 }, { "epoch": 0.4212430546582246, "learning_rate": 9.767771337456097e-07, "loss": 5.1294, "step": 9780 }, { "epoch": 0.42210449239781195, "learning_rate": 9.767286517701308e-07, "loss": 5.0745, "step": 9800 }, { "epoch": 0.4229659301373993, "learning_rate": 9.766801697946519e-07, "loss": 5.1475, "step": 9820 }, { "epoch": 0.4238273678769867, "learning_rate": 9.76631687819173e-07, "loss": 4.9102, "step": 9840 }, { "epoch": 0.42468880561657407, "learning_rate": 9.76583205843694e-07, "loss": 4.8146, "step": 9860 }, { "epoch": 0.42555024335616143, "learning_rate": 9.765347238682153e-07, "loss": 4.8416, "step": 9880 }, { "epoch": 0.4264116810957488, "learning_rate": 9.764862418927361e-07, "loss": 4.9126, "step": 9900 }, { "epoch": 0.4272731188353362, "learning_rate": 9.764377599172574e-07, "loss": 4.7237, "step": 9920 }, { "epoch": 0.42813455657492355, "learning_rate": 9.763892779417783e-07, "loss": 4.93, "step": 9940 }, { "epoch": 0.4289959943145109, "learning_rate": 9.763407959662996e-07, "loss": 4.8873, "step": 9960 }, { "epoch": 0.42985743205409827, "learning_rate": 9.762923139908206e-07, "loss": 4.7226, "step": 9980 }, { "epoch": 0.4307188697936857, "learning_rate": 9.762438320153417e-07, "loss": 5.0115, "step": 10000 }, { "epoch": 0.43158030753327303, "learning_rate": 9.76195350039863e-07, "loss": 4.9169, "step": 10020 }, { "epoch": 0.4324417452728604, "learning_rate": 9.76146868064384e-07, "loss": 4.9183, "step": 10040 }, { "epoch": 0.4333031830124478, "learning_rate": 9.760983860889051e-07, "loss": 4.8978, "step": 10060 }, { "epoch": 0.43416462075203516, "learning_rate": 9.760499041134262e-07, "loss": 4.6671, "step": 10080 }, { "epoch": 0.4350260584916225, "learning_rate": 9.760014221379473e-07, "loss": 4.9651, "step": 10100 }, { "epoch": 0.43588749623120987, "learning_rate": 9.759529401624686e-07, "loss": 4.8849, "step": 10120 }, { "epoch": 0.4367489339707973, "learning_rate": 9.759044581869894e-07, "loss": 4.7284, "step": 10140 }, { "epoch": 0.43761037171038464, "learning_rate": 9.758559762115107e-07, "loss": 4.8962, "step": 10160 }, { "epoch": 0.438471809449972, "learning_rate": 9.758074942360318e-07, "loss": 5.2482, "step": 10180 }, { "epoch": 0.43933324718955935, "learning_rate": 9.757590122605529e-07, "loss": 4.751, "step": 10200 }, { "epoch": 0.44019468492914676, "learning_rate": 9.75710530285074e-07, "loss": 4.873, "step": 10220 }, { "epoch": 0.4410561226687341, "learning_rate": 9.75662048309595e-07, "loss": 4.8341, "step": 10240 }, { "epoch": 0.4419175604083215, "learning_rate": 9.756135663341163e-07, "loss": 4.8262, "step": 10260 }, { "epoch": 0.4427789981479089, "learning_rate": 9.755650843586374e-07, "loss": 5.131, "step": 10280 }, { "epoch": 0.44364043588749624, "learning_rate": 9.755166023831584e-07, "loss": 5.0421, "step": 10300 }, { "epoch": 0.4445018736270836, "learning_rate": 9.754681204076795e-07, "loss": 4.9971, "step": 10320 }, { "epoch": 0.44536331136667096, "learning_rate": 9.754196384322006e-07, "loss": 5.0195, "step": 10340 }, { "epoch": 0.44622474910625837, "learning_rate": 9.753711564567218e-07, "loss": 4.7907, "step": 10360 }, { "epoch": 0.4470861868458457, "learning_rate": 9.753226744812427e-07, "loss": 4.9427, "step": 10380 }, { "epoch": 0.4479476245854331, "learning_rate": 9.75274192505764e-07, "loss": 4.9174, "step": 10400 }, { "epoch": 0.44880906232502044, "learning_rate": 9.75225710530285e-07, "loss": 5.0763, "step": 10420 }, { "epoch": 0.44967050006460785, "learning_rate": 9.751772285548061e-07, "loss": 4.8748, "step": 10440 }, { "epoch": 0.4505319378041952, "learning_rate": 9.751287465793272e-07, "loss": 4.9506, "step": 10460 }, { "epoch": 0.45139337554378256, "learning_rate": 9.750802646038483e-07, "loss": 4.6341, "step": 10480 }, { "epoch": 0.45225481328336997, "learning_rate": 9.750317826283696e-07, "loss": 4.7974, "step": 10500 }, { "epoch": 0.45311625102295733, "learning_rate": 9.749833006528904e-07, "loss": 4.8822, "step": 10520 }, { "epoch": 0.4539776887625447, "learning_rate": 9.749348186774117e-07, "loss": 4.7732, "step": 10540 }, { "epoch": 0.45483912650213204, "learning_rate": 9.748863367019328e-07, "loss": 5.0418, "step": 10560 }, { "epoch": 0.45570056424171945, "learning_rate": 9.748378547264538e-07, "loss": 5.0333, "step": 10580 }, { "epoch": 0.4565620019813068, "learning_rate": 9.74789372750975e-07, "loss": 4.7601, "step": 10600 }, { "epoch": 0.45742343972089416, "learning_rate": 9.74740890775496e-07, "loss": 4.8413, "step": 10620 }, { "epoch": 0.4582848774604815, "learning_rate": 9.746924088000173e-07, "loss": 4.8576, "step": 10640 }, { "epoch": 0.45914631520006893, "learning_rate": 9.746439268245383e-07, "loss": 4.9137, "step": 10660 }, { "epoch": 0.4600077529396563, "learning_rate": 9.745954448490594e-07, "loss": 4.7917, "step": 10680 }, { "epoch": 0.46086919067924365, "learning_rate": 9.745469628735805e-07, "loss": 5.0347, "step": 10700 }, { "epoch": 0.461730628418831, "learning_rate": 9.744984808981016e-07, "loss": 5.0721, "step": 10720 }, { "epoch": 0.4625920661584184, "learning_rate": 9.744499989226228e-07, "loss": 5.223, "step": 10740 }, { "epoch": 0.46345350389800577, "learning_rate": 9.744015169471437e-07, "loss": 4.7617, "step": 10760 }, { "epoch": 0.4643149416375931, "learning_rate": 9.74353034971665e-07, "loss": 4.7315, "step": 10780 }, { "epoch": 0.46517637937718054, "learning_rate": 9.74304552996186e-07, "loss": 5.0133, "step": 10800 }, { "epoch": 0.4660378171167679, "learning_rate": 9.742560710207071e-07, "loss": 4.5951, "step": 10820 }, { "epoch": 0.46689925485635525, "learning_rate": 9.742075890452282e-07, "loss": 5.0109, "step": 10840 }, { "epoch": 0.4677606925959426, "learning_rate": 9.741591070697493e-07, "loss": 4.7951, "step": 10860 }, { "epoch": 0.46862213033553, "learning_rate": 9.741106250942706e-07, "loss": 4.9083, "step": 10880 }, { "epoch": 0.4694835680751174, "learning_rate": 9.740621431187916e-07, "loss": 5.0881, "step": 10900 }, { "epoch": 0.47034500581470473, "learning_rate": 9.740136611433127e-07, "loss": 4.9049, "step": 10920 }, { "epoch": 0.4712064435542921, "learning_rate": 9.73965179167834e-07, "loss": 5.0503, "step": 10940 }, { "epoch": 0.4720678812938795, "learning_rate": 9.739166971923548e-07, "loss": 5.1389, "step": 10960 }, { "epoch": 0.47292931903346686, "learning_rate": 9.73868215216876e-07, "loss": 4.6824, "step": 10980 }, { "epoch": 0.4737907567730542, "learning_rate": 9.73819733241397e-07, "loss": 5.0677, "step": 11000 }, { "epoch": 0.4746521945126416, "learning_rate": 9.737712512659183e-07, "loss": 4.8373, "step": 11020 }, { "epoch": 0.475513632252229, "learning_rate": 9.737227692904393e-07, "loss": 4.7048, "step": 11040 }, { "epoch": 0.47637506999181634, "learning_rate": 9.736742873149604e-07, "loss": 4.7661, "step": 11060 }, { "epoch": 0.4772365077314037, "learning_rate": 9.736258053394815e-07, "loss": 4.5163, "step": 11080 }, { "epoch": 0.4780979454709911, "learning_rate": 9.735773233640026e-07, "loss": 4.8271, "step": 11100 }, { "epoch": 0.47895938321057846, "learning_rate": 9.735288413885238e-07, "loss": 4.9904, "step": 11120 }, { "epoch": 0.4798208209501658, "learning_rate": 9.734803594130447e-07, "loss": 5.1391, "step": 11140 }, { "epoch": 0.4806822586897532, "learning_rate": 9.73431877437566e-07, "loss": 4.9359, "step": 11160 }, { "epoch": 0.4815436964293406, "learning_rate": 9.73383395462087e-07, "loss": 4.7196, "step": 11180 }, { "epoch": 0.48240513416892794, "learning_rate": 9.733349134866081e-07, "loss": 4.8989, "step": 11200 }, { "epoch": 0.4832665719085153, "learning_rate": 9.732864315111292e-07, "loss": 4.5728, "step": 11220 }, { "epoch": 0.4841280096481027, "learning_rate": 9.732379495356503e-07, "loss": 4.8242, "step": 11240 }, { "epoch": 0.48498944738769006, "learning_rate": 9.731894675601715e-07, "loss": 4.9628, "step": 11260 }, { "epoch": 0.4858508851272774, "learning_rate": 9.731409855846926e-07, "loss": 4.9849, "step": 11280 }, { "epoch": 0.4867123228668648, "learning_rate": 9.730925036092137e-07, "loss": 4.7428, "step": 11300 }, { "epoch": 0.4875737606064522, "learning_rate": 9.73044021633735e-07, "loss": 4.84, "step": 11320 }, { "epoch": 0.48843519834603955, "learning_rate": 9.729955396582558e-07, "loss": 4.8301, "step": 11340 }, { "epoch": 0.4892966360856269, "learning_rate": 9.729470576827771e-07, "loss": 4.8208, "step": 11360 }, { "epoch": 0.49015807382521426, "learning_rate": 9.72898575707298e-07, "loss": 4.799, "step": 11380 }, { "epoch": 0.49101951156480167, "learning_rate": 9.728500937318193e-07, "loss": 4.8384, "step": 11400 }, { "epoch": 0.491880949304389, "learning_rate": 9.728016117563403e-07, "loss": 4.8149, "step": 11420 }, { "epoch": 0.4927423870439764, "learning_rate": 9.727531297808614e-07, "loss": 5.0723, "step": 11440 }, { "epoch": 0.4936038247835638, "learning_rate": 9.727046478053825e-07, "loss": 4.8349, "step": 11460 }, { "epoch": 0.49446526252315115, "learning_rate": 9.726561658299035e-07, "loss": 4.8027, "step": 11480 }, { "epoch": 0.4953267002627385, "learning_rate": 9.726076838544248e-07, "loss": 4.4655, "step": 11500 }, { "epoch": 0.49618813800232586, "learning_rate": 9.725592018789457e-07, "loss": 4.6683, "step": 11520 }, { "epoch": 0.4970495757419133, "learning_rate": 9.72510719903467e-07, "loss": 4.3801, "step": 11540 }, { "epoch": 0.49791101348150063, "learning_rate": 9.72462237927988e-07, "loss": 4.8508, "step": 11560 }, { "epoch": 0.498772451221088, "learning_rate": 9.724137559525091e-07, "loss": 4.8787, "step": 11580 }, { "epoch": 0.49963388896067534, "learning_rate": 9.723652739770302e-07, "loss": 4.9498, "step": 11600 }, { "epoch": 0.5004953267002628, "learning_rate": 9.723167920015513e-07, "loss": 5.0344, "step": 11620 }, { "epoch": 0.5013567644398501, "learning_rate": 9.722683100260725e-07, "loss": 5.0325, "step": 11640 }, { "epoch": 0.5022182021794375, "learning_rate": 9.722198280505936e-07, "loss": 4.8234, "step": 11660 }, { "epoch": 0.5030796399190248, "learning_rate": 9.721713460751147e-07, "loss": 4.5882, "step": 11680 }, { "epoch": 0.5039410776586122, "learning_rate": 9.72122864099636e-07, "loss": 4.864, "step": 11700 }, { "epoch": 0.5048025153981996, "learning_rate": 9.720743821241568e-07, "loss": 4.8641, "step": 11720 }, { "epoch": 0.505663953137787, "learning_rate": 9.720259001486781e-07, "loss": 4.8875, "step": 11740 }, { "epoch": 0.5065253908773744, "learning_rate": 9.71977418173199e-07, "loss": 5.13, "step": 11760 }, { "epoch": 0.5073868286169617, "learning_rate": 9.719289361977203e-07, "loss": 4.8252, "step": 11780 }, { "epoch": 0.5082482663565491, "learning_rate": 9.718804542222413e-07, "loss": 4.9937, "step": 11800 }, { "epoch": 0.5091097040961364, "learning_rate": 9.718319722467624e-07, "loss": 4.9656, "step": 11820 }, { "epoch": 0.5099711418357238, "learning_rate": 9.717834902712837e-07, "loss": 4.8202, "step": 11840 }, { "epoch": 0.5108325795753111, "learning_rate": 9.717350082958045e-07, "loss": 4.8806, "step": 11860 }, { "epoch": 0.5116940173148986, "learning_rate": 9.716865263203258e-07, "loss": 4.7003, "step": 11880 }, { "epoch": 0.512555455054486, "learning_rate": 9.71638044344847e-07, "loss": 4.9151, "step": 11900 }, { "epoch": 0.5134168927940733, "learning_rate": 9.71589562369368e-07, "loss": 4.7234, "step": 11920 }, { "epoch": 0.5142783305336607, "learning_rate": 9.715410803938892e-07, "loss": 4.8458, "step": 11940 }, { "epoch": 0.515139768273248, "learning_rate": 9.714925984184101e-07, "loss": 4.7352, "step": 11960 }, { "epoch": 0.5160012060128354, "learning_rate": 9.714441164429314e-07, "loss": 5.0708, "step": 11980 }, { "epoch": 0.5168626437524227, "learning_rate": 9.713956344674523e-07, "loss": 4.8615, "step": 12000 }, { "epoch": 0.5177240814920102, "learning_rate": 9.713471524919735e-07, "loss": 4.8537, "step": 12020 }, { "epoch": 0.5185855192315976, "learning_rate": 9.712986705164946e-07, "loss": 4.6005, "step": 12040 }, { "epoch": 0.5194469569711849, "learning_rate": 9.712501885410157e-07, "loss": 4.7112, "step": 12060 }, { "epoch": 0.5203083947107723, "learning_rate": 9.71201706565537e-07, "loss": 4.7967, "step": 12080 }, { "epoch": 0.5211698324503596, "learning_rate": 9.711532245900578e-07, "loss": 4.8298, "step": 12100 }, { "epoch": 0.522031270189947, "learning_rate": 9.71104742614579e-07, "loss": 4.527, "step": 12120 }, { "epoch": 0.5228927079295344, "learning_rate": 9.710562606391e-07, "loss": 4.7663, "step": 12140 }, { "epoch": 0.5237541456691218, "learning_rate": 9.710077786636212e-07, "loss": 4.8853, "step": 12160 }, { "epoch": 0.5246155834087092, "learning_rate": 9.709592966881423e-07, "loss": 4.6954, "step": 12180 }, { "epoch": 0.5254770211482965, "learning_rate": 9.709108147126634e-07, "loss": 4.7645, "step": 12200 }, { "epoch": 0.5263384588878839, "learning_rate": 9.708623327371847e-07, "loss": 4.6727, "step": 12220 }, { "epoch": 0.5271998966274712, "learning_rate": 9.708138507617055e-07, "loss": 4.5477, "step": 12240 }, { "epoch": 0.5280613343670586, "learning_rate": 9.707653687862268e-07, "loss": 4.6893, "step": 12260 }, { "epoch": 0.528922772106646, "learning_rate": 9.707168868107479e-07, "loss": 4.8461, "step": 12280 }, { "epoch": 0.5297842098462333, "learning_rate": 9.70668404835269e-07, "loss": 4.5713, "step": 12300 }, { "epoch": 0.5306456475858208, "learning_rate": 9.706199228597902e-07, "loss": 4.6324, "step": 12320 }, { "epoch": 0.5315070853254081, "learning_rate": 9.70571440884311e-07, "loss": 4.9228, "step": 12340 }, { "epoch": 0.5323685230649955, "learning_rate": 9.705229589088324e-07, "loss": 4.947, "step": 12360 }, { "epoch": 0.5332299608045828, "learning_rate": 9.704744769333532e-07, "loss": 4.7576, "step": 12380 }, { "epoch": 0.5340913985441702, "learning_rate": 9.704259949578745e-07, "loss": 4.6725, "step": 12400 }, { "epoch": 0.5349528362837576, "learning_rate": 9.703775129823956e-07, "loss": 4.677, "step": 12420 }, { "epoch": 0.5358142740233449, "learning_rate": 9.703290310069167e-07, "loss": 4.7095, "step": 12440 }, { "epoch": 0.5366757117629324, "learning_rate": 9.70280549031438e-07, "loss": 4.8679, "step": 12460 }, { "epoch": 0.5375371495025197, "learning_rate": 9.702320670559588e-07, "loss": 4.7676, "step": 12480 }, { "epoch": 0.5383985872421071, "learning_rate": 9.7018358508048e-07, "loss": 4.7246, "step": 12500 }, { "epoch": 0.5392600249816945, "learning_rate": 9.701351031050012e-07, "loss": 4.7469, "step": 12520 }, { "epoch": 0.5401214627212818, "learning_rate": 9.700866211295222e-07, "loss": 4.7057, "step": 12540 }, { "epoch": 0.5409829004608692, "learning_rate": 9.700381391540435e-07, "loss": 4.5612, "step": 12560 }, { "epoch": 0.5418443382004565, "learning_rate": 9.699896571785644e-07, "loss": 4.6163, "step": 12580 }, { "epoch": 0.542705775940044, "learning_rate": 9.699411752030857e-07, "loss": 4.5884, "step": 12600 }, { "epoch": 0.5435672136796313, "learning_rate": 9.698926932276065e-07, "loss": 4.7224, "step": 12620 }, { "epoch": 0.5444286514192187, "learning_rate": 9.698442112521278e-07, "loss": 4.7125, "step": 12640 }, { "epoch": 0.5452900891588061, "learning_rate": 9.697957292766489e-07, "loss": 4.5919, "step": 12660 }, { "epoch": 0.5461515268983934, "learning_rate": 9.6974724730117e-07, "loss": 4.9423, "step": 12680 }, { "epoch": 0.5470129646379808, "learning_rate": 9.696987653256912e-07, "loss": 4.7632, "step": 12700 }, { "epoch": 0.5478744023775681, "learning_rate": 9.69650283350212e-07, "loss": 4.7262, "step": 12720 }, { "epoch": 0.5487358401171555, "learning_rate": 9.696018013747334e-07, "loss": 4.8491, "step": 12740 }, { "epoch": 0.549597277856743, "learning_rate": 9.695533193992542e-07, "loss": 4.9667, "step": 12760 }, { "epoch": 0.5504587155963303, "learning_rate": 9.695048374237755e-07, "loss": 4.669, "step": 12780 }, { "epoch": 0.5513201533359177, "learning_rate": 9.694563554482966e-07, "loss": 4.7343, "step": 12800 }, { "epoch": 0.552181591075505, "learning_rate": 9.694078734728177e-07, "loss": 5.1538, "step": 12820 }, { "epoch": 0.5530430288150924, "learning_rate": 9.69359391497339e-07, "loss": 4.6751, "step": 12840 }, { "epoch": 0.5539044665546797, "learning_rate": 9.693109095218598e-07, "loss": 4.249, "step": 12860 }, { "epoch": 0.5547659042942671, "learning_rate": 9.69262427546381e-07, "loss": 4.5602, "step": 12880 }, { "epoch": 0.5556273420338546, "learning_rate": 9.692139455709022e-07, "loss": 4.7908, "step": 12900 }, { "epoch": 0.5564887797734419, "learning_rate": 9.691654635954232e-07, "loss": 4.8403, "step": 12920 }, { "epoch": 0.5573502175130293, "learning_rate": 9.691169816199445e-07, "loss": 4.654, "step": 12940 }, { "epoch": 0.5582116552526166, "learning_rate": 9.690684996444654e-07, "loss": 4.9628, "step": 12960 }, { "epoch": 0.559073092992204, "learning_rate": 9.690200176689867e-07, "loss": 4.6785, "step": 12980 }, { "epoch": 0.5599345307317913, "learning_rate": 9.689715356935075e-07, "loss": 4.7578, "step": 13000 }, { "epoch": 0.5607959684713787, "learning_rate": 9.689230537180288e-07, "loss": 4.6124, "step": 13020 }, { "epoch": 0.561657406210966, "learning_rate": 9.688745717425499e-07, "loss": 4.7457, "step": 13040 }, { "epoch": 0.5625188439505535, "learning_rate": 9.68826089767071e-07, "loss": 4.656, "step": 13060 }, { "epoch": 0.5633802816901409, "learning_rate": 9.687776077915922e-07, "loss": 4.6385, "step": 13080 }, { "epoch": 0.5642417194297282, "learning_rate": 9.687291258161133e-07, "loss": 4.791, "step": 13100 }, { "epoch": 0.5651031571693156, "learning_rate": 9.686806438406344e-07, "loss": 4.6335, "step": 13120 }, { "epoch": 0.5659645949089029, "learning_rate": 9.686321618651552e-07, "loss": 4.9381, "step": 13140 }, { "epoch": 0.5668260326484903, "learning_rate": 9.685836798896765e-07, "loss": 4.5365, "step": 13160 }, { "epoch": 0.5676874703880777, "learning_rate": 9.685351979141976e-07, "loss": 4.7251, "step": 13180 }, { "epoch": 0.5685489081276651, "learning_rate": 9.684867159387187e-07, "loss": 4.6211, "step": 13200 }, { "epoch": 0.5694103458672525, "learning_rate": 9.6843823396324e-07, "loss": 4.8273, "step": 13220 }, { "epoch": 0.5702717836068398, "learning_rate": 9.683897519877608e-07, "loss": 4.8895, "step": 13240 }, { "epoch": 0.5711332213464272, "learning_rate": 9.68341270012282e-07, "loss": 5.1973, "step": 13260 }, { "epoch": 0.5719946590860145, "learning_rate": 9.682927880368032e-07, "loss": 4.7367, "step": 13280 }, { "epoch": 0.5728560968256019, "learning_rate": 9.682443060613242e-07, "loss": 4.807, "step": 13300 }, { "epoch": 0.5737175345651893, "learning_rate": 9.681958240858455e-07, "loss": 4.7434, "step": 13320 }, { "epoch": 0.5745789723047767, "learning_rate": 9.681473421103664e-07, "loss": 4.8386, "step": 13340 }, { "epoch": 0.5754404100443641, "learning_rate": 9.680988601348877e-07, "loss": 4.9904, "step": 13360 }, { "epoch": 0.5763018477839514, "learning_rate": 9.680503781594085e-07, "loss": 4.6482, "step": 13380 }, { "epoch": 0.5771632855235388, "learning_rate": 9.680018961839298e-07, "loss": 4.8677, "step": 13400 }, { "epoch": 0.5780247232631262, "learning_rate": 9.679534142084509e-07, "loss": 4.8015, "step": 13420 }, { "epoch": 0.5788861610027135, "learning_rate": 9.67904932232972e-07, "loss": 4.9403, "step": 13440 }, { "epoch": 0.5797475987423009, "learning_rate": 9.678564502574932e-07, "loss": 4.8006, "step": 13460 }, { "epoch": 0.5806090364818882, "learning_rate": 9.678079682820143e-07, "loss": 4.9156, "step": 13480 }, { "epoch": 0.5814704742214757, "learning_rate": 9.677594863065354e-07, "loss": 4.575, "step": 13500 }, { "epoch": 0.582331911961063, "learning_rate": 9.677110043310564e-07, "loss": 4.6135, "step": 13520 }, { "epoch": 0.5831933497006504, "learning_rate": 9.676625223555775e-07, "loss": 4.8542, "step": 13540 }, { "epoch": 0.5840547874402378, "learning_rate": 9.676140403800988e-07, "loss": 4.5898, "step": 13560 }, { "epoch": 0.5849162251798251, "learning_rate": 9.675655584046197e-07, "loss": 4.6785, "step": 13580 }, { "epoch": 0.5857776629194125, "learning_rate": 9.67517076429141e-07, "loss": 4.4886, "step": 13600 }, { "epoch": 0.5866391006589998, "learning_rate": 9.67468594453662e-07, "loss": 4.8392, "step": 13620 }, { "epoch": 0.5875005383985873, "learning_rate": 9.67420112478183e-07, "loss": 4.7093, "step": 13640 }, { "epoch": 0.5883619761381746, "learning_rate": 9.673716305027042e-07, "loss": 4.5766, "step": 13660 }, { "epoch": 0.589223413877762, "learning_rate": 9.673231485272252e-07, "loss": 4.6722, "step": 13680 }, { "epoch": 0.5900848516173494, "learning_rate": 9.672746665517465e-07, "loss": 4.7496, "step": 13700 }, { "epoch": 0.5909462893569367, "learning_rate": 9.672261845762674e-07, "loss": 4.5997, "step": 13720 }, { "epoch": 0.5918077270965241, "learning_rate": 9.671777026007886e-07, "loss": 4.811, "step": 13740 }, { "epoch": 0.5926691648361114, "learning_rate": 9.671292206253095e-07, "loss": 4.7544, "step": 13760 }, { "epoch": 0.5935306025756989, "learning_rate": 9.670807386498308e-07, "loss": 4.872, "step": 13780 }, { "epoch": 0.5943920403152863, "learning_rate": 9.670322566743519e-07, "loss": 4.616, "step": 13800 }, { "epoch": 0.5952534780548736, "learning_rate": 9.66983774698873e-07, "loss": 4.6266, "step": 13820 }, { "epoch": 0.596114915794461, "learning_rate": 9.669352927233942e-07, "loss": 4.8612, "step": 13840 }, { "epoch": 0.5969763535340483, "learning_rate": 9.668868107479153e-07, "loss": 4.4442, "step": 13860 }, { "epoch": 0.5978377912736357, "learning_rate": 9.668383287724364e-07, "loss": 4.6665, "step": 13880 }, { "epoch": 0.598699229013223, "learning_rate": 9.667898467969574e-07, "loss": 4.5713, "step": 13900 }, { "epoch": 0.5995606667528104, "learning_rate": 9.667413648214785e-07, "loss": 4.6818, "step": 13920 }, { "epoch": 0.6004221044923979, "learning_rate": 9.666928828459998e-07, "loss": 4.8311, "step": 13940 }, { "epoch": 0.6012835422319852, "learning_rate": 9.666444008705206e-07, "loss": 4.7409, "step": 13960 }, { "epoch": 0.6021449799715726, "learning_rate": 9.66595918895042e-07, "loss": 4.6599, "step": 13980 }, { "epoch": 0.6030064177111599, "learning_rate": 9.66547436919563e-07, "loss": 4.5503, "step": 14000 }, { "epoch": 0.6038678554507473, "learning_rate": 9.66498954944084e-07, "loss": 4.6705, "step": 14020 }, { "epoch": 0.6047292931903346, "learning_rate": 9.664504729686051e-07, "loss": 4.9944, "step": 14040 }, { "epoch": 0.605590730929922, "learning_rate": 9.664019909931262e-07, "loss": 4.8301, "step": 14060 }, { "epoch": 0.6064521686695095, "learning_rate": 9.663535090176475e-07, "loss": 4.8497, "step": 14080 }, { "epoch": 0.6073136064090968, "learning_rate": 9.663050270421686e-07, "loss": 4.7864, "step": 14100 }, { "epoch": 0.6081750441486842, "learning_rate": 9.662565450666896e-07, "loss": 4.9266, "step": 14120 }, { "epoch": 0.6090364818882715, "learning_rate": 9.662080630912107e-07, "loss": 4.8071, "step": 14140 }, { "epoch": 0.6098979196278589, "learning_rate": 9.661595811157318e-07, "loss": 4.6743, "step": 14160 }, { "epoch": 0.6107593573674462, "learning_rate": 9.66111099140253e-07, "loss": 4.532, "step": 14180 }, { "epoch": 0.6116207951070336, "learning_rate": 9.66062617164774e-07, "loss": 4.9564, "step": 14200 }, { "epoch": 0.612482232846621, "learning_rate": 9.660141351892952e-07, "loss": 4.8359, "step": 14220 }, { "epoch": 0.6133436705862084, "learning_rate": 9.659656532138163e-07, "loss": 4.7172, "step": 14240 }, { "epoch": 0.6142051083257958, "learning_rate": 9.659171712383374e-07, "loss": 4.8027, "step": 14260 }, { "epoch": 0.6150665460653831, "learning_rate": 9.658686892628584e-07, "loss": 4.5783, "step": 14280 }, { "epoch": 0.6159279838049705, "learning_rate": 9.658202072873795e-07, "loss": 4.7292, "step": 14300 }, { "epoch": 0.6167894215445578, "learning_rate": 9.657717253119008e-07, "loss": 4.5805, "step": 14320 }, { "epoch": 0.6176508592841452, "learning_rate": 9.657232433364216e-07, "loss": 4.6905, "step": 14340 }, { "epoch": 0.6185122970237326, "learning_rate": 9.65674761360943e-07, "loss": 5.0062, "step": 14360 }, { "epoch": 0.61937373476332, "learning_rate": 9.65626279385464e-07, "loss": 4.5667, "step": 14380 }, { "epoch": 0.6202351725029074, "learning_rate": 9.65577797409985e-07, "loss": 4.7865, "step": 14400 }, { "epoch": 0.6210966102424947, "learning_rate": 9.655293154345061e-07, "loss": 4.8152, "step": 14420 }, { "epoch": 0.6219580479820821, "learning_rate": 9.654808334590272e-07, "loss": 5.001, "step": 14440 }, { "epoch": 0.6228194857216695, "learning_rate": 9.654323514835485e-07, "loss": 4.568, "step": 14460 }, { "epoch": 0.6236809234612568, "learning_rate": 9.653838695080696e-07, "loss": 5.1036, "step": 14480 }, { "epoch": 0.6245423612008442, "learning_rate": 9.653353875325906e-07, "loss": 4.6376, "step": 14500 }, { "epoch": 0.6254037989404316, "learning_rate": 9.652869055571117e-07, "loss": 4.8825, "step": 14520 }, { "epoch": 0.626265236680019, "learning_rate": 9.652384235816328e-07, "loss": 4.8115, "step": 14540 }, { "epoch": 0.6271266744196063, "learning_rate": 9.65189941606154e-07, "loss": 4.907, "step": 14560 }, { "epoch": 0.6279881121591937, "learning_rate": 9.65141459630675e-07, "loss": 4.7589, "step": 14580 }, { "epoch": 0.6288495498987811, "learning_rate": 9.650929776551962e-07, "loss": 4.6099, "step": 14600 }, { "epoch": 0.6297109876383684, "learning_rate": 9.650444956797173e-07, "loss": 4.7292, "step": 14620 }, { "epoch": 0.6305724253779558, "learning_rate": 9.649960137042383e-07, "loss": 4.8569, "step": 14640 }, { "epoch": 0.6314338631175431, "learning_rate": 9.649475317287594e-07, "loss": 4.476, "step": 14660 }, { "epoch": 0.6322953008571306, "learning_rate": 9.648990497532805e-07, "loss": 5.1189, "step": 14680 }, { "epoch": 0.633156738596718, "learning_rate": 9.648505677778018e-07, "loss": 4.4871, "step": 14700 }, { "epoch": 0.6340181763363053, "learning_rate": 9.648020858023228e-07, "loss": 4.7954, "step": 14720 }, { "epoch": 0.6348796140758927, "learning_rate": 9.64753603826844e-07, "loss": 4.8858, "step": 14740 }, { "epoch": 0.63574105181548, "learning_rate": 9.64705121851365e-07, "loss": 4.7299, "step": 14760 }, { "epoch": 0.6366024895550674, "learning_rate": 9.64656639875886e-07, "loss": 4.6986, "step": 14780 }, { "epoch": 0.6374639272946547, "learning_rate": 9.646081579004071e-07, "loss": 4.716, "step": 14800 }, { "epoch": 0.6383253650342422, "learning_rate": 9.645596759249282e-07, "loss": 4.8712, "step": 14820 }, { "epoch": 0.6391868027738296, "learning_rate": 9.645111939494495e-07, "loss": 4.5896, "step": 14840 }, { "epoch": 0.6400482405134169, "learning_rate": 9.644627119739706e-07, "loss": 4.8666, "step": 14860 }, { "epoch": 0.6409096782530043, "learning_rate": 9.644142299984916e-07, "loss": 4.6907, "step": 14880 }, { "epoch": 0.6417711159925916, "learning_rate": 9.643657480230127e-07, "loss": 4.7755, "step": 14900 }, { "epoch": 0.642632553732179, "learning_rate": 9.643172660475338e-07, "loss": 4.8946, "step": 14920 }, { "epoch": 0.6434939914717663, "learning_rate": 9.64268784072055e-07, "loss": 4.7275, "step": 14940 }, { "epoch": 0.6443554292113538, "learning_rate": 9.64220302096576e-07, "loss": 4.7162, "step": 14960 }, { "epoch": 0.6452168669509412, "learning_rate": 9.641718201210972e-07, "loss": 4.7541, "step": 14980 }, { "epoch": 0.6460783046905285, "learning_rate": 9.641233381456183e-07, "loss": 4.7376, "step": 15000 }, { "epoch": 0.6469397424301159, "learning_rate": 9.640748561701393e-07, "loss": 4.7821, "step": 15020 }, { "epoch": 0.6478011801697032, "learning_rate": 9.640263741946604e-07, "loss": 4.5146, "step": 15040 }, { "epoch": 0.6486626179092906, "learning_rate": 9.639778922191815e-07, "loss": 4.7323, "step": 15060 }, { "epoch": 0.6495240556488779, "learning_rate": 9.639294102437028e-07, "loss": 4.5574, "step": 15080 }, { "epoch": 0.6503854933884653, "learning_rate": 9.638809282682238e-07, "loss": 4.8495, "step": 15100 }, { "epoch": 0.6512469311280528, "learning_rate": 9.63832446292745e-07, "loss": 4.5805, "step": 15120 }, { "epoch": 0.6521083688676401, "learning_rate": 9.637839643172662e-07, "loss": 4.3699, "step": 15140 }, { "epoch": 0.6529698066072275, "learning_rate": 9.63735482341787e-07, "loss": 4.8785, "step": 15160 }, { "epoch": 0.6538312443468148, "learning_rate": 9.636870003663083e-07, "loss": 4.7165, "step": 15180 }, { "epoch": 0.6546926820864022, "learning_rate": 9.636385183908292e-07, "loss": 4.8152, "step": 15200 }, { "epoch": 0.6555541198259895, "learning_rate": 9.635900364153505e-07, "loss": 4.4532, "step": 15220 }, { "epoch": 0.6564155575655769, "learning_rate": 9.635415544398716e-07, "loss": 4.6873, "step": 15240 }, { "epoch": 0.6572769953051644, "learning_rate": 9.634930724643926e-07, "loss": 4.6663, "step": 15260 }, { "epoch": 0.6581384330447517, "learning_rate": 9.634445904889137e-07, "loss": 4.776, "step": 15280 }, { "epoch": 0.6589998707843391, "learning_rate": 9.633961085134348e-07, "loss": 4.6215, "step": 15300 }, { "epoch": 0.6598613085239264, "learning_rate": 9.63347626537956e-07, "loss": 4.921, "step": 15320 }, { "epoch": 0.6607227462635138, "learning_rate": 9.63299144562477e-07, "loss": 4.8543, "step": 15340 }, { "epoch": 0.6615841840031012, "learning_rate": 9.632506625869982e-07, "loss": 4.643, "step": 15360 }, { "epoch": 0.6624456217426885, "learning_rate": 9.632021806115193e-07, "loss": 4.901, "step": 15380 }, { "epoch": 0.663307059482276, "learning_rate": 9.631536986360403e-07, "loss": 4.6514, "step": 15400 }, { "epoch": 0.6641684972218633, "learning_rate": 9.631052166605614e-07, "loss": 4.6956, "step": 15420 }, { "epoch": 0.6650299349614507, "learning_rate": 9.630567346850825e-07, "loss": 4.6573, "step": 15440 }, { "epoch": 0.665891372701038, "learning_rate": 9.630082527096038e-07, "loss": 4.6353, "step": 15460 }, { "epoch": 0.6667528104406254, "learning_rate": 9.629597707341248e-07, "loss": 4.6602, "step": 15480 }, { "epoch": 0.6676142481802128, "learning_rate": 9.62911288758646e-07, "loss": 4.6295, "step": 15500 }, { "epoch": 0.6684756859198001, "learning_rate": 9.628628067831672e-07, "loss": 4.7094, "step": 15520 }, { "epoch": 0.6693371236593875, "learning_rate": 9.62814324807688e-07, "loss": 4.7686, "step": 15540 }, { "epoch": 0.6701985613989749, "learning_rate": 9.627658428322093e-07, "loss": 4.3885, "step": 15560 }, { "epoch": 0.6710599991385623, "learning_rate": 9.627173608567302e-07, "loss": 5.0316, "step": 15580 }, { "epoch": 0.6719214368781496, "learning_rate": 9.626688788812515e-07, "loss": 4.5627, "step": 15600 }, { "epoch": 0.672782874617737, "learning_rate": 9.626203969057725e-07, "loss": 4.3815, "step": 15620 }, { "epoch": 0.6736443123573244, "learning_rate": 9.625719149302936e-07, "loss": 4.74, "step": 15640 }, { "epoch": 0.6745057500969117, "learning_rate": 9.625234329548147e-07, "loss": 4.6831, "step": 15660 }, { "epoch": 0.6753671878364991, "learning_rate": 9.624749509793358e-07, "loss": 4.609, "step": 15680 }, { "epoch": 0.6762286255760865, "learning_rate": 9.62426469003857e-07, "loss": 4.823, "step": 15700 }, { "epoch": 0.6770900633156739, "learning_rate": 9.623779870283781e-07, "loss": 4.9465, "step": 15720 }, { "epoch": 0.6779515010552613, "learning_rate": 9.623295050528992e-07, "loss": 4.7628, "step": 15740 }, { "epoch": 0.6788129387948486, "learning_rate": 9.622810230774205e-07, "loss": 4.5836, "step": 15760 }, { "epoch": 0.679674376534436, "learning_rate": 9.622325411019413e-07, "loss": 4.8521, "step": 15780 }, { "epoch": 0.6805358142740233, "learning_rate": 9.621840591264626e-07, "loss": 4.7569, "step": 15800 }, { "epoch": 0.6813972520136107, "learning_rate": 9.621355771509835e-07, "loss": 4.4686, "step": 15820 }, { "epoch": 0.682258689753198, "learning_rate": 9.620870951755048e-07, "loss": 4.7357, "step": 15840 }, { "epoch": 0.6831201274927855, "learning_rate": 9.620386132000258e-07, "loss": 4.6195, "step": 15860 }, { "epoch": 0.6839815652323729, "learning_rate": 9.61990131224547e-07, "loss": 4.7323, "step": 15880 }, { "epoch": 0.6848430029719602, "learning_rate": 9.619416492490682e-07, "loss": 4.6049, "step": 15900 }, { "epoch": 0.6857044407115476, "learning_rate": 9.61893167273589e-07, "loss": 4.5214, "step": 15920 }, { "epoch": 0.6865658784511349, "learning_rate": 9.618446852981103e-07, "loss": 4.4264, "step": 15940 }, { "epoch": 0.6874273161907223, "learning_rate": 9.617962033226312e-07, "loss": 4.9132, "step": 15960 }, { "epoch": 0.6882887539303096, "learning_rate": 9.617477213471525e-07, "loss": 4.5938, "step": 15980 }, { "epoch": 0.6891501916698971, "learning_rate": 9.616992393716735e-07, "loss": 4.8424, "step": 16000 }, { "epoch": 0.6900116294094845, "learning_rate": 9.616507573961946e-07, "loss": 4.7597, "step": 16020 }, { "epoch": 0.6908730671490718, "learning_rate": 9.616022754207159e-07, "loss": 4.7149, "step": 16040 }, { "epoch": 0.6917345048886592, "learning_rate": 9.615537934452368e-07, "loss": 4.6915, "step": 16060 }, { "epoch": 0.6925959426282465, "learning_rate": 9.61505311469758e-07, "loss": 4.714, "step": 16080 }, { "epoch": 0.6934573803678339, "learning_rate": 9.614568294942791e-07, "loss": 4.497, "step": 16100 }, { "epoch": 0.6943188181074212, "learning_rate": 9.614083475188002e-07, "loss": 4.8497, "step": 16120 }, { "epoch": 0.6951802558470087, "learning_rate": 9.613598655433215e-07, "loss": 4.691, "step": 16140 }, { "epoch": 0.6960416935865961, "learning_rate": 9.613113835678423e-07, "loss": 4.8417, "step": 16160 }, { "epoch": 0.6969031313261834, "learning_rate": 9.612629015923636e-07, "loss": 4.7323, "step": 16180 }, { "epoch": 0.6977645690657708, "learning_rate": 9.612144196168845e-07, "loss": 4.632, "step": 16200 }, { "epoch": 0.6986260068053581, "learning_rate": 9.611659376414057e-07, "loss": 4.8886, "step": 16220 }, { "epoch": 0.6994874445449455, "learning_rate": 9.611174556659268e-07, "loss": 4.3369, "step": 16240 }, { "epoch": 0.7003488822845328, "learning_rate": 9.610689736904479e-07, "loss": 4.6227, "step": 16260 }, { "epoch": 0.7012103200241202, "learning_rate": 9.610204917149692e-07, "loss": 4.5117, "step": 16280 }, { "epoch": 0.7020717577637077, "learning_rate": 9.6097200973949e-07, "loss": 4.7759, "step": 16300 }, { "epoch": 0.702933195503295, "learning_rate": 9.609235277640113e-07, "loss": 4.6303, "step": 16320 }, { "epoch": 0.7037946332428824, "learning_rate": 9.608750457885324e-07, "loss": 4.836, "step": 16340 }, { "epoch": 0.7046560709824697, "learning_rate": 9.608265638130535e-07, "loss": 4.7166, "step": 16360 }, { "epoch": 0.7055175087220571, "learning_rate": 9.607780818375745e-07, "loss": 4.489, "step": 16380 }, { "epoch": 0.7063789464616445, "learning_rate": 9.607295998620956e-07, "loss": 4.6527, "step": 16400 }, { "epoch": 0.7072403842012318, "learning_rate": 9.606811178866169e-07, "loss": 4.4992, "step": 16420 }, { "epoch": 0.7081018219408193, "learning_rate": 9.606326359111377e-07, "loss": 4.9018, "step": 16440 }, { "epoch": 0.7089632596804066, "learning_rate": 9.60584153935659e-07, "loss": 4.854, "step": 16460 }, { "epoch": 0.709824697419994, "learning_rate": 9.6053567196018e-07, "loss": 4.8965, "step": 16480 }, { "epoch": 0.7106861351595813, "learning_rate": 9.604871899847012e-07, "loss": 4.5009, "step": 16500 }, { "epoch": 0.7115475728991687, "learning_rate": 9.604387080092225e-07, "loss": 4.8104, "step": 16520 }, { "epoch": 0.7124090106387561, "learning_rate": 9.603902260337433e-07, "loss": 4.698, "step": 16540 }, { "epoch": 0.7132704483783434, "learning_rate": 9.603417440582646e-07, "loss": 4.7735, "step": 16560 }, { "epoch": 0.7141318861179309, "learning_rate": 9.602932620827855e-07, "loss": 4.6545, "step": 16580 }, { "epoch": 0.7149933238575182, "learning_rate": 9.602447801073067e-07, "loss": 4.988, "step": 16600 }, { "epoch": 0.7158547615971056, "learning_rate": 9.601962981318278e-07, "loss": 4.6552, "step": 16620 }, { "epoch": 0.716716199336693, "learning_rate": 9.601478161563489e-07, "loss": 4.4583, "step": 16640 }, { "epoch": 0.7175776370762803, "learning_rate": 9.600993341808702e-07, "loss": 4.6773, "step": 16660 }, { "epoch": 0.7184390748158677, "learning_rate": 9.60050852205391e-07, "loss": 4.5659, "step": 16680 }, { "epoch": 0.719300512555455, "learning_rate": 9.600023702299123e-07, "loss": 4.866, "step": 16700 }, { "epoch": 0.7201619502950424, "learning_rate": 9.599538882544334e-07, "loss": 4.8379, "step": 16720 }, { "epoch": 0.7210233880346298, "learning_rate": 9.599054062789545e-07, "loss": 4.5252, "step": 16740 }, { "epoch": 0.7218848257742172, "learning_rate": 9.598569243034757e-07, "loss": 4.7735, "step": 16760 }, { "epoch": 0.7227462635138046, "learning_rate": 9.598084423279966e-07, "loss": 4.6124, "step": 16780 }, { "epoch": 0.7236077012533919, "learning_rate": 9.597599603525179e-07, "loss": 4.8224, "step": 16800 }, { "epoch": 0.7244691389929793, "learning_rate": 9.597114783770387e-07, "loss": 4.4931, "step": 16820 }, { "epoch": 0.7253305767325666, "learning_rate": 9.5966299640156e-07, "loss": 4.7042, "step": 16840 }, { "epoch": 0.726192014472154, "learning_rate": 9.59614514426081e-07, "loss": 4.4158, "step": 16860 }, { "epoch": 0.7270534522117414, "learning_rate": 9.595660324506022e-07, "loss": 4.6365, "step": 16880 }, { "epoch": 0.7279148899513288, "learning_rate": 9.595175504751234e-07, "loss": 4.6285, "step": 16900 }, { "epoch": 0.7287763276909162, "learning_rate": 9.594690684996443e-07, "loss": 4.6141, "step": 16920 }, { "epoch": 0.7296377654305035, "learning_rate": 9.594205865241656e-07, "loss": 4.6042, "step": 16940 }, { "epoch": 0.7304992031700909, "learning_rate": 9.593721045486865e-07, "loss": 4.4642, "step": 16960 }, { "epoch": 0.7313606409096782, "learning_rate": 9.593236225732077e-07, "loss": 4.7874, "step": 16980 }, { "epoch": 0.7322220786492656, "learning_rate": 9.592751405977288e-07, "loss": 4.7062, "step": 17000 }, { "epoch": 0.7330835163888529, "learning_rate": 9.592266586222499e-07, "loss": 4.7244, "step": 17020 }, { "epoch": 0.7339449541284404, "learning_rate": 9.591781766467712e-07, "loss": 4.6183, "step": 17040 }, { "epoch": 0.7348063918680278, "learning_rate": 9.59129694671292e-07, "loss": 4.592, "step": 17060 }, { "epoch": 0.7356678296076151, "learning_rate": 9.590812126958133e-07, "loss": 4.8319, "step": 17080 }, { "epoch": 0.7365292673472025, "learning_rate": 9.590327307203344e-07, "loss": 4.8552, "step": 17100 }, { "epoch": 0.7373907050867898, "learning_rate": 9.589842487448554e-07, "loss": 4.9682, "step": 17120 }, { "epoch": 0.7382521428263772, "learning_rate": 9.589357667693767e-07, "loss": 4.8142, "step": 17140 }, { "epoch": 0.7391135805659645, "learning_rate": 9.588872847938976e-07, "loss": 4.4919, "step": 17160 }, { "epoch": 0.739975018305552, "learning_rate": 9.588388028184189e-07, "loss": 4.7011, "step": 17180 }, { "epoch": 0.7408364560451394, "learning_rate": 9.587903208429397e-07, "loss": 4.7958, "step": 17200 }, { "epoch": 0.7416978937847267, "learning_rate": 9.58741838867461e-07, "loss": 4.6178, "step": 17220 }, { "epoch": 0.7425593315243141, "learning_rate": 9.58693356891982e-07, "loss": 4.6442, "step": 17240 }, { "epoch": 0.7434207692639014, "learning_rate": 9.586448749165032e-07, "loss": 4.7595, "step": 17260 }, { "epoch": 0.7442822070034888, "learning_rate": 9.585963929410244e-07, "loss": 4.7747, "step": 17280 }, { "epoch": 0.7451436447430762, "learning_rate": 9.585479109655455e-07, "loss": 4.7121, "step": 17300 }, { "epoch": 0.7460050824826636, "learning_rate": 9.584994289900666e-07, "loss": 4.8845, "step": 17320 }, { "epoch": 0.746866520222251, "learning_rate": 9.584509470145877e-07, "loss": 4.7855, "step": 17340 }, { "epoch": 0.7477279579618383, "learning_rate": 9.584024650391087e-07, "loss": 4.6848, "step": 17360 }, { "epoch": 0.7485893957014257, "learning_rate": 9.5835398306363e-07, "loss": 4.7187, "step": 17380 }, { "epoch": 0.749450833441013, "learning_rate": 9.583055010881509e-07, "loss": 4.5737, "step": 17400 }, { "epoch": 0.7503122711806004, "learning_rate": 9.582570191126722e-07, "loss": 4.9443, "step": 17420 }, { "epoch": 0.7511737089201878, "learning_rate": 9.58208537137193e-07, "loss": 4.9395, "step": 17440 }, { "epoch": 0.7520351466597751, "learning_rate": 9.581600551617143e-07, "loss": 4.5068, "step": 17460 }, { "epoch": 0.7528965843993626, "learning_rate": 9.581115731862354e-07, "loss": 4.9245, "step": 17480 }, { "epoch": 0.7537580221389499, "learning_rate": 9.580630912107564e-07, "loss": 4.6495, "step": 17500 }, { "epoch": 0.7546194598785373, "learning_rate": 9.580146092352777e-07, "loss": 4.4371, "step": 17520 }, { "epoch": 0.7554808976181246, "learning_rate": 9.579661272597986e-07, "loss": 4.5391, "step": 17540 }, { "epoch": 0.756342335357712, "learning_rate": 9.579176452843199e-07, "loss": 4.4885, "step": 17560 }, { "epoch": 0.7572037730972994, "learning_rate": 9.578691633088407e-07, "loss": 4.5693, "step": 17580 }, { "epoch": 0.7580652108368867, "learning_rate": 9.57820681333362e-07, "loss": 4.4952, "step": 17600 }, { "epoch": 0.7589266485764742, "learning_rate": 9.57772199357883e-07, "loss": 4.633, "step": 17620 }, { "epoch": 0.7597880863160615, "learning_rate": 9.577237173824042e-07, "loss": 4.6817, "step": 17640 }, { "epoch": 0.7606495240556489, "learning_rate": 9.576752354069254e-07, "loss": 4.647, "step": 17660 }, { "epoch": 0.7615109617952363, "learning_rate": 9.576267534314465e-07, "loss": 4.6863, "step": 17680 }, { "epoch": 0.7623723995348236, "learning_rate": 9.575782714559676e-07, "loss": 4.8397, "step": 17700 }, { "epoch": 0.763233837274411, "learning_rate": 9.575297894804887e-07, "loss": 4.4226, "step": 17720 }, { "epoch": 0.7640952750139983, "learning_rate": 9.574813075050097e-07, "loss": 4.4665, "step": 17740 }, { "epoch": 0.7649567127535858, "learning_rate": 9.57432825529531e-07, "loss": 4.6331, "step": 17760 }, { "epoch": 0.7658181504931731, "learning_rate": 9.573843435540519e-07, "loss": 4.7533, "step": 17780 }, { "epoch": 0.7666795882327605, "learning_rate": 9.573358615785731e-07, "loss": 4.4459, "step": 17800 }, { "epoch": 0.7675410259723479, "learning_rate": 9.572873796030942e-07, "loss": 4.5337, "step": 17820 }, { "epoch": 0.7684024637119352, "learning_rate": 9.572388976276153e-07, "loss": 4.6425, "step": 17840 }, { "epoch": 0.7692639014515226, "learning_rate": 9.571904156521364e-07, "loss": 4.4908, "step": 17860 }, { "epoch": 0.7701253391911099, "learning_rate": 9.571419336766574e-07, "loss": 4.6324, "step": 17880 }, { "epoch": 0.7709867769306973, "learning_rate": 9.570934517011787e-07, "loss": 4.5503, "step": 17900 }, { "epoch": 0.7718482146702847, "learning_rate": 9.570449697256998e-07, "loss": 4.3948, "step": 17920 }, { "epoch": 0.7727096524098721, "learning_rate": 9.569964877502209e-07, "loss": 4.7714, "step": 17940 }, { "epoch": 0.7735710901494595, "learning_rate": 9.56948005774742e-07, "loss": 4.4958, "step": 17960 }, { "epoch": 0.7744325278890468, "learning_rate": 9.56899523799263e-07, "loss": 4.8341, "step": 17980 }, { "epoch": 0.7752939656286342, "learning_rate": 9.56851041823784e-07, "loss": 4.8692, "step": 18000 }, { "epoch": 0.7761554033682215, "learning_rate": 9.568025598483051e-07, "loss": 4.5919, "step": 18020 }, { "epoch": 0.7770168411078089, "learning_rate": 9.567540778728264e-07, "loss": 4.5073, "step": 18040 }, { "epoch": 0.7778782788473964, "learning_rate": 9.567055958973475e-07, "loss": 4.541, "step": 18060 }, { "epoch": 0.7787397165869837, "learning_rate": 9.566571139218686e-07, "loss": 4.5663, "step": 18080 }, { "epoch": 0.7796011543265711, "learning_rate": 9.566086319463896e-07, "loss": 4.4336, "step": 18100 }, { "epoch": 0.7804625920661584, "learning_rate": 9.565601499709107e-07, "loss": 4.4726, "step": 18120 }, { "epoch": 0.7813240298057458, "learning_rate": 9.56511667995432e-07, "loss": 4.5123, "step": 18140 }, { "epoch": 0.7821854675453331, "learning_rate": 9.564631860199529e-07, "loss": 4.6115, "step": 18160 }, { "epoch": 0.7830469052849205, "learning_rate": 9.564147040444741e-07, "loss": 4.7271, "step": 18180 }, { "epoch": 0.783908343024508, "learning_rate": 9.563662220689952e-07, "loss": 4.815, "step": 18200 }, { "epoch": 0.7847697807640953, "learning_rate": 9.563177400935163e-07, "loss": 4.5781, "step": 18220 }, { "epoch": 0.7856312185036827, "learning_rate": 9.562692581180374e-07, "loss": 4.5923, "step": 18240 }, { "epoch": 0.78649265624327, "learning_rate": 9.562207761425584e-07, "loss": 4.6544, "step": 18260 }, { "epoch": 0.7873540939828574, "learning_rate": 9.561722941670797e-07, "loss": 4.6061, "step": 18280 }, { "epoch": 0.7882155317224447, "learning_rate": 9.561238121916008e-07, "loss": 4.6143, "step": 18300 }, { "epoch": 0.7890769694620321, "learning_rate": 9.560753302161219e-07, "loss": 4.6983, "step": 18320 }, { "epoch": 0.7899384072016195, "learning_rate": 9.56026848240643e-07, "loss": 4.7628, "step": 18340 }, { "epoch": 0.7907998449412069, "learning_rate": 9.55978366265164e-07, "loss": 4.629, "step": 18360 }, { "epoch": 0.7916612826807943, "learning_rate": 9.559298842896853e-07, "loss": 4.643, "step": 18380 }, { "epoch": 0.7925227204203816, "learning_rate": 9.558814023142061e-07, "loss": 4.6185, "step": 18400 }, { "epoch": 0.793384158159969, "learning_rate": 9.558329203387274e-07, "loss": 4.5335, "step": 18420 }, { "epoch": 0.7942455958995563, "learning_rate": 9.557844383632485e-07, "loss": 4.6082, "step": 18440 }, { "epoch": 0.7951070336391437, "learning_rate": 9.557359563877696e-07, "loss": 4.5302, "step": 18460 }, { "epoch": 0.7959684713787311, "learning_rate": 9.556874744122906e-07, "loss": 4.9049, "step": 18480 }, { "epoch": 0.7968299091183185, "learning_rate": 9.556389924368117e-07, "loss": 4.5565, "step": 18500 }, { "epoch": 0.7976913468579059, "learning_rate": 9.55590510461333e-07, "loss": 4.5934, "step": 18520 }, { "epoch": 0.7985527845974932, "learning_rate": 9.555420284858539e-07, "loss": 4.498, "step": 18540 }, { "epoch": 0.7994142223370806, "learning_rate": 9.554935465103751e-07, "loss": 4.8571, "step": 18560 }, { "epoch": 0.800275660076668, "learning_rate": 9.554450645348962e-07, "loss": 4.5185, "step": 18580 }, { "epoch": 0.8011370978162553, "learning_rate": 9.553965825594173e-07, "loss": 4.5657, "step": 18600 }, { "epoch": 0.8019985355558427, "learning_rate": 9.553481005839384e-07, "loss": 4.4812, "step": 18620 }, { "epoch": 0.80285997329543, "learning_rate": 9.552996186084594e-07, "loss": 4.6503, "step": 18640 }, { "epoch": 0.8037214110350175, "learning_rate": 9.552511366329807e-07, "loss": 4.8931, "step": 18660 }, { "epoch": 0.8045828487746048, "learning_rate": 9.552026546575018e-07, "loss": 4.8269, "step": 18680 }, { "epoch": 0.8054442865141922, "learning_rate": 9.551541726820228e-07, "loss": 4.7274, "step": 18700 }, { "epoch": 0.8063057242537796, "learning_rate": 9.55105690706544e-07, "loss": 4.4974, "step": 18720 }, { "epoch": 0.8071671619933669, "learning_rate": 9.55057208731065e-07, "loss": 4.5903, "step": 18740 }, { "epoch": 0.8080285997329543, "learning_rate": 9.550087267555863e-07, "loss": 4.4657, "step": 18760 }, { "epoch": 0.8088900374725416, "learning_rate": 9.549602447801071e-07, "loss": 4.5626, "step": 18780 }, { "epoch": 0.8097514752121291, "learning_rate": 9.549117628046284e-07, "loss": 4.6358, "step": 18800 }, { "epoch": 0.8106129129517164, "learning_rate": 9.548632808291495e-07, "loss": 4.5517, "step": 18820 }, { "epoch": 0.8114743506913038, "learning_rate": 9.548147988536706e-07, "loss": 4.7087, "step": 18840 }, { "epoch": 0.8123357884308912, "learning_rate": 9.547663168781916e-07, "loss": 4.3492, "step": 18860 }, { "epoch": 0.8131972261704785, "learning_rate": 9.547178349027127e-07, "loss": 4.4344, "step": 18880 }, { "epoch": 0.8140586639100659, "learning_rate": 9.54669352927234e-07, "loss": 4.3568, "step": 18900 }, { "epoch": 0.8149201016496532, "learning_rate": 9.54620870951755e-07, "loss": 4.685, "step": 18920 }, { "epoch": 0.8157815393892407, "learning_rate": 9.545723889762761e-07, "loss": 4.6504, "step": 18940 }, { "epoch": 0.816642977128828, "learning_rate": 9.545239070007974e-07, "loss": 4.5637, "step": 18960 }, { "epoch": 0.8175044148684154, "learning_rate": 9.544754250253183e-07, "loss": 4.4504, "step": 18980 }, { "epoch": 0.8183658526080028, "learning_rate": 9.544269430498396e-07, "loss": 4.6673, "step": 19000 }, { "epoch": 0.8192272903475901, "learning_rate": 9.543784610743604e-07, "loss": 4.7172, "step": 19020 }, { "epoch": 0.8200887280871775, "learning_rate": 9.543299790988817e-07, "loss": 4.6677, "step": 19040 }, { "epoch": 0.8209501658267648, "learning_rate": 9.542814971234028e-07, "loss": 4.5032, "step": 19060 }, { "epoch": 0.8218116035663522, "learning_rate": 9.542330151479238e-07, "loss": 4.6686, "step": 19080 }, { "epoch": 0.8226730413059397, "learning_rate": 9.54184533172445e-07, "loss": 4.5099, "step": 19100 }, { "epoch": 0.823534479045527, "learning_rate": 9.54136051196966e-07, "loss": 4.5959, "step": 19120 }, { "epoch": 0.8243959167851144, "learning_rate": 9.540875692214873e-07, "loss": 4.757, "step": 19140 }, { "epoch": 0.8252573545247017, "learning_rate": 9.540390872460081e-07, "loss": 4.6209, "step": 19160 }, { "epoch": 0.8261187922642891, "learning_rate": 9.539906052705294e-07, "loss": 4.6562, "step": 19180 }, { "epoch": 0.8269802300038764, "learning_rate": 9.539421232950505e-07, "loss": 4.5569, "step": 19200 }, { "epoch": 0.8278416677434638, "learning_rate": 9.538936413195716e-07, "loss": 4.5734, "step": 19220 }, { "epoch": 0.8287031054830513, "learning_rate": 9.538451593440926e-07, "loss": 4.4665, "step": 19240 }, { "epoch": 0.8295645432226386, "learning_rate": 9.537966773686137e-07, "loss": 4.8744, "step": 19260 }, { "epoch": 0.830425980962226, "learning_rate": 9.53748195393135e-07, "loss": 4.7477, "step": 19280 }, { "epoch": 0.8312874187018133, "learning_rate": 9.53699713417656e-07, "loss": 4.4835, "step": 19300 }, { "epoch": 0.8321488564414007, "learning_rate": 9.536512314421771e-07, "loss": 4.7603, "step": 19320 }, { "epoch": 0.833010294180988, "learning_rate": 9.536027494666983e-07, "loss": 4.6376, "step": 19340 }, { "epoch": 0.8338717319205754, "learning_rate": 9.535542674912193e-07, "loss": 4.688, "step": 19360 }, { "epoch": 0.8347331696601629, "learning_rate": 9.535057855157404e-07, "loss": 4.4329, "step": 19380 }, { "epoch": 0.8355946073997502, "learning_rate": 9.534573035402615e-07, "loss": 4.7792, "step": 19400 }, { "epoch": 0.8364560451393376, "learning_rate": 9.534088215647826e-07, "loss": 4.6019, "step": 19420 }, { "epoch": 0.8373174828789249, "learning_rate": 9.533603395893038e-07, "loss": 4.6412, "step": 19440 }, { "epoch": 0.8381789206185123, "learning_rate": 9.533118576138249e-07, "loss": 4.8209, "step": 19460 }, { "epoch": 0.8390403583580996, "learning_rate": 9.532633756383459e-07, "loss": 4.7257, "step": 19480 }, { "epoch": 0.839901796097687, "learning_rate": 9.532148936628671e-07, "loss": 4.5974, "step": 19500 }, { "epoch": 0.8407632338372744, "learning_rate": 9.531664116873882e-07, "loss": 4.7499, "step": 19520 }, { "epoch": 0.8416246715768618, "learning_rate": 9.531179297119093e-07, "loss": 4.8645, "step": 19540 }, { "epoch": 0.8424861093164492, "learning_rate": 9.530694477364304e-07, "loss": 4.5787, "step": 19560 }, { "epoch": 0.8433475470560365, "learning_rate": 9.530209657609516e-07, "loss": 4.4854, "step": 19580 }, { "epoch": 0.8442089847956239, "learning_rate": 9.529724837854727e-07, "loss": 4.7621, "step": 19600 }, { "epoch": 0.8450704225352113, "learning_rate": 9.529240018099936e-07, "loss": 4.7263, "step": 19620 }, { "epoch": 0.8459318602747986, "learning_rate": 9.528755198345148e-07, "loss": 4.7101, "step": 19640 }, { "epoch": 0.846793298014386, "learning_rate": 9.528270378590359e-07, "loss": 4.3076, "step": 19660 }, { "epoch": 0.8476547357539734, "learning_rate": 9.52778555883557e-07, "loss": 4.5904, "step": 19680 }, { "epoch": 0.8485161734935608, "learning_rate": 9.527300739080781e-07, "loss": 4.4537, "step": 19700 }, { "epoch": 0.8493776112331481, "learning_rate": 9.526815919325993e-07, "loss": 4.5525, "step": 19720 }, { "epoch": 0.8502390489727355, "learning_rate": 9.526331099571202e-07, "loss": 4.4868, "step": 19740 }, { "epoch": 0.8511004867123229, "learning_rate": 9.525846279816414e-07, "loss": 4.7816, "step": 19760 }, { "epoch": 0.8519619244519102, "learning_rate": 9.525361460061625e-07, "loss": 4.5658, "step": 19780 }, { "epoch": 0.8528233621914976, "learning_rate": 9.524876640306837e-07, "loss": 4.574, "step": 19800 }, { "epoch": 0.8536847999310849, "learning_rate": 9.524391820552048e-07, "loss": 4.7047, "step": 19820 }, { "epoch": 0.8545462376706724, "learning_rate": 9.523907000797259e-07, "loss": 4.353, "step": 19840 }, { "epoch": 0.8554076754102597, "learning_rate": 9.523422181042469e-07, "loss": 4.6372, "step": 19860 }, { "epoch": 0.8562691131498471, "learning_rate": 9.522937361287681e-07, "loss": 4.563, "step": 19880 }, { "epoch": 0.8571305508894345, "learning_rate": 9.522452541532892e-07, "loss": 4.7784, "step": 19900 }, { "epoch": 0.8579919886290218, "learning_rate": 9.521967721778103e-07, "loss": 4.8083, "step": 19920 }, { "epoch": 0.8588534263686092, "learning_rate": 9.521482902023314e-07, "loss": 4.7428, "step": 19940 }, { "epoch": 0.8597148641081965, "learning_rate": 9.520998082268526e-07, "loss": 4.6361, "step": 19960 }, { "epoch": 0.860576301847784, "learning_rate": 9.520513262513736e-07, "loss": 4.2954, "step": 19980 }, { "epoch": 0.8614377395873714, "learning_rate": 9.520028442758947e-07, "loss": 4.3637, "step": 20000 }, { "epoch": 0.8622991773269587, "learning_rate": 9.519543623004158e-07, "loss": 4.6597, "step": 20020 }, { "epoch": 0.8631606150665461, "learning_rate": 9.51905880324937e-07, "loss": 4.5807, "step": 20040 }, { "epoch": 0.8640220528061334, "learning_rate": 9.51857398349458e-07, "loss": 4.4035, "step": 20060 }, { "epoch": 0.8648834905457208, "learning_rate": 9.518089163739792e-07, "loss": 4.5505, "step": 20080 }, { "epoch": 0.8657449282853081, "learning_rate": 9.517604343985003e-07, "loss": 4.4001, "step": 20100 }, { "epoch": 0.8666063660248956, "learning_rate": 9.517119524230214e-07, "loss": 4.5568, "step": 20120 }, { "epoch": 0.867467803764483, "learning_rate": 9.516634704475424e-07, "loss": 4.5367, "step": 20140 }, { "epoch": 0.8683292415040703, "learning_rate": 9.516149884720635e-07, "loss": 4.1851, "step": 20160 }, { "epoch": 0.8691906792436577, "learning_rate": 9.515665064965847e-07, "loss": 4.6914, "step": 20180 }, { "epoch": 0.870052116983245, "learning_rate": 9.515180245211058e-07, "loss": 4.4509, "step": 20200 }, { "epoch": 0.8709135547228324, "learning_rate": 9.514695425456269e-07, "loss": 4.6518, "step": 20220 }, { "epoch": 0.8717749924624197, "learning_rate": 9.51421060570148e-07, "loss": 4.5476, "step": 20240 }, { "epoch": 0.8726364302020071, "learning_rate": 9.51372578594669e-07, "loss": 4.4285, "step": 20260 }, { "epoch": 0.8734978679415946, "learning_rate": 9.513240966191901e-07, "loss": 4.4528, "step": 20280 }, { "epoch": 0.8743593056811819, "learning_rate": 9.512756146437113e-07, "loss": 4.4412, "step": 20300 }, { "epoch": 0.8752207434207693, "learning_rate": 9.512271326682324e-07, "loss": 4.6117, "step": 20320 }, { "epoch": 0.8760821811603566, "learning_rate": 9.511786506927536e-07, "loss": 4.5079, "step": 20340 }, { "epoch": 0.876943618899944, "learning_rate": 9.511301687172747e-07, "loss": 4.5531, "step": 20360 }, { "epoch": 0.8778050566395313, "learning_rate": 9.510816867417957e-07, "loss": 4.5672, "step": 20380 }, { "epoch": 0.8786664943791187, "learning_rate": 9.510332047663168e-07, "loss": 4.581, "step": 20400 }, { "epoch": 0.8795279321187062, "learning_rate": 9.50984722790838e-07, "loss": 4.8276, "step": 20420 }, { "epoch": 0.8803893698582935, "learning_rate": 9.50936240815359e-07, "loss": 4.5643, "step": 20440 }, { "epoch": 0.8812508075978809, "learning_rate": 9.508877588398802e-07, "loss": 4.7807, "step": 20460 }, { "epoch": 0.8821122453374682, "learning_rate": 9.508392768644013e-07, "loss": 4.5882, "step": 20480 }, { "epoch": 0.8829736830770556, "learning_rate": 9.507907948889224e-07, "loss": 4.5565, "step": 20500 }, { "epoch": 0.883835120816643, "learning_rate": 9.507423129134434e-07, "loss": 4.3398, "step": 20520 }, { "epoch": 0.8846965585562303, "learning_rate": 9.506938309379646e-07, "loss": 4.6346, "step": 20540 }, { "epoch": 0.8855579962958178, "learning_rate": 9.506453489624857e-07, "loss": 4.7336, "step": 20560 }, { "epoch": 0.8864194340354051, "learning_rate": 9.505968669870069e-07, "loss": 4.6375, "step": 20580 }, { "epoch": 0.8872808717749925, "learning_rate": 9.505483850115279e-07, "loss": 4.1946, "step": 20600 }, { "epoch": 0.8881423095145798, "learning_rate": 9.50499903036049e-07, "loss": 4.5503, "step": 20620 }, { "epoch": 0.8890037472541672, "learning_rate": 9.504514210605701e-07, "loss": 4.6196, "step": 20640 }, { "epoch": 0.8898651849937546, "learning_rate": 9.504029390850912e-07, "loss": 4.657, "step": 20660 }, { "epoch": 0.8907266227333419, "learning_rate": 9.503544571096123e-07, "loss": 4.7372, "step": 20680 }, { "epoch": 0.8915880604729293, "learning_rate": 9.503059751341334e-07, "loss": 4.7459, "step": 20700 }, { "epoch": 0.8924494982125167, "learning_rate": 9.502574931586546e-07, "loss": 4.275, "step": 20720 }, { "epoch": 0.8933109359521041, "learning_rate": 9.502090111831756e-07, "loss": 4.4509, "step": 20740 }, { "epoch": 0.8941723736916914, "learning_rate": 9.501605292076967e-07, "loss": 4.8113, "step": 20760 }, { "epoch": 0.8950338114312788, "learning_rate": 9.501120472322178e-07, "loss": 4.5494, "step": 20780 }, { "epoch": 0.8958952491708662, "learning_rate": 9.50063565256739e-07, "loss": 4.4905, "step": 20800 }, { "epoch": 0.8967566869104535, "learning_rate": 9.5001508328126e-07, "loss": 4.5792, "step": 20820 }, { "epoch": 0.8976181246500409, "learning_rate": 9.499666013057812e-07, "loss": 4.3705, "step": 20840 }, { "epoch": 0.8984795623896283, "learning_rate": 9.499181193303023e-07, "loss": 4.5957, "step": 20860 }, { "epoch": 0.8993410001292157, "learning_rate": 9.498696373548233e-07, "loss": 4.5509, "step": 20880 }, { "epoch": 0.900202437868803, "learning_rate": 9.498211553793444e-07, "loss": 4.6055, "step": 20900 }, { "epoch": 0.9010638756083904, "learning_rate": 9.497726734038656e-07, "loss": 4.5213, "step": 20920 }, { "epoch": 0.9019253133479778, "learning_rate": 9.497241914283867e-07, "loss": 4.5847, "step": 20940 }, { "epoch": 0.9027867510875651, "learning_rate": 9.496757094529078e-07, "loss": 4.2846, "step": 20960 }, { "epoch": 0.9036481888271525, "learning_rate": 9.496272274774289e-07, "loss": 4.6234, "step": 20980 }, { "epoch": 0.9045096265667399, "learning_rate": 9.4957874550195e-07, "loss": 4.6808, "step": 21000 }, { "epoch": 0.9053710643063273, "learning_rate": 9.495302635264711e-07, "loss": 4.3551, "step": 21020 }, { "epoch": 0.9062325020459147, "learning_rate": 9.494817815509922e-07, "loss": 4.738, "step": 21040 }, { "epoch": 0.907093939785502, "learning_rate": 9.494332995755133e-07, "loss": 4.7525, "step": 21060 }, { "epoch": 0.9079553775250894, "learning_rate": 9.493848176000345e-07, "loss": 4.4851, "step": 21080 }, { "epoch": 0.9088168152646767, "learning_rate": 9.493363356245556e-07, "loss": 4.5402, "step": 21100 }, { "epoch": 0.9096782530042641, "learning_rate": 9.492878536490767e-07, "loss": 4.5958, "step": 21120 }, { "epoch": 0.9105396907438514, "learning_rate": 9.492393716735977e-07, "loss": 4.5298, "step": 21140 }, { "epoch": 0.9114011284834389, "learning_rate": 9.491908896981189e-07, "loss": 4.3762, "step": 21160 }, { "epoch": 0.9122625662230263, "learning_rate": 9.4914240772264e-07, "loss": 4.5817, "step": 21180 }, { "epoch": 0.9131240039626136, "learning_rate": 9.49093925747161e-07, "loss": 4.6998, "step": 21200 }, { "epoch": 0.913985441702201, "learning_rate": 9.490454437716822e-07, "loss": 4.6316, "step": 21220 }, { "epoch": 0.9148468794417883, "learning_rate": 9.489969617962033e-07, "loss": 4.5339, "step": 21240 }, { "epoch": 0.9157083171813757, "learning_rate": 9.489484798207243e-07, "loss": 4.5119, "step": 21260 }, { "epoch": 0.916569754920963, "learning_rate": 9.488999978452454e-07, "loss": 4.674, "step": 21280 }, { "epoch": 0.9174311926605505, "learning_rate": 9.488515158697666e-07, "loss": 4.5183, "step": 21300 }, { "epoch": 0.9182926304001379, "learning_rate": 9.488030338942877e-07, "loss": 4.7068, "step": 21320 }, { "epoch": 0.9191540681397252, "learning_rate": 9.487545519188088e-07, "loss": 4.4489, "step": 21340 }, { "epoch": 0.9200155058793126, "learning_rate": 9.487060699433299e-07, "loss": 4.7522, "step": 21360 }, { "epoch": 0.9208769436188999, "learning_rate": 9.486575879678511e-07, "loss": 4.4866, "step": 21380 }, { "epoch": 0.9217383813584873, "learning_rate": 9.486091059923721e-07, "loss": 4.4634, "step": 21400 }, { "epoch": 0.9225998190980746, "learning_rate": 9.485606240168932e-07, "loss": 4.6163, "step": 21420 }, { "epoch": 0.923461256837662, "learning_rate": 9.485121420414143e-07, "loss": 4.6412, "step": 21440 }, { "epoch": 0.9243226945772495, "learning_rate": 9.484636600659355e-07, "loss": 4.5538, "step": 21460 }, { "epoch": 0.9251841323168368, "learning_rate": 9.484151780904566e-07, "loss": 4.5306, "step": 21480 }, { "epoch": 0.9260455700564242, "learning_rate": 9.483666961149777e-07, "loss": 4.94, "step": 21500 }, { "epoch": 0.9269070077960115, "learning_rate": 9.483182141394986e-07, "loss": 4.3409, "step": 21520 }, { "epoch": 0.9277684455355989, "learning_rate": 9.482697321640199e-07, "loss": 4.9327, "step": 21540 }, { "epoch": 0.9286298832751863, "learning_rate": 9.482212501885409e-07, "loss": 4.6211, "step": 21560 }, { "epoch": 0.9294913210147736, "learning_rate": 9.481727682130621e-07, "loss": 4.3866, "step": 21580 }, { "epoch": 0.9303527587543611, "learning_rate": 9.481242862375832e-07, "loss": 4.4892, "step": 21600 }, { "epoch": 0.9312141964939484, "learning_rate": 9.480758042621044e-07, "loss": 4.7242, "step": 21620 }, { "epoch": 0.9320756342335358, "learning_rate": 9.480273222866253e-07, "loss": 4.789, "step": 21640 }, { "epoch": 0.9329370719731231, "learning_rate": 9.479788403111465e-07, "loss": 4.5236, "step": 21660 }, { "epoch": 0.9337985097127105, "learning_rate": 9.479303583356676e-07, "loss": 4.9787, "step": 21680 }, { "epoch": 0.9346599474522979, "learning_rate": 9.478818763601888e-07, "loss": 4.6006, "step": 21700 }, { "epoch": 0.9355213851918852, "learning_rate": 9.478333943847098e-07, "loss": 4.5336, "step": 21720 }, { "epoch": 0.9363828229314727, "learning_rate": 9.47784912409231e-07, "loss": 4.6637, "step": 21740 }, { "epoch": 0.93724426067106, "learning_rate": 9.477364304337521e-07, "loss": 4.5355, "step": 21760 }, { "epoch": 0.9381056984106474, "learning_rate": 9.47687948458273e-07, "loss": 4.6314, "step": 21780 }, { "epoch": 0.9389671361502347, "learning_rate": 9.476394664827942e-07, "loss": 4.6773, "step": 21800 }, { "epoch": 0.9398285738898221, "learning_rate": 9.475909845073153e-07, "loss": 4.7378, "step": 21820 }, { "epoch": 0.9406900116294095, "learning_rate": 9.475425025318365e-07, "loss": 4.3205, "step": 21840 }, { "epoch": 0.9415514493689968, "learning_rate": 9.474940205563575e-07, "loss": 4.6247, "step": 21860 }, { "epoch": 0.9424128871085842, "learning_rate": 9.474455385808787e-07, "loss": 4.4743, "step": 21880 }, { "epoch": 0.9432743248481716, "learning_rate": 9.473970566053997e-07, "loss": 4.5955, "step": 21900 }, { "epoch": 0.944135762587759, "learning_rate": 9.473485746299209e-07, "loss": 4.4757, "step": 21920 }, { "epoch": 0.9449972003273464, "learning_rate": 9.473000926544419e-07, "loss": 4.6824, "step": 21940 }, { "epoch": 0.9458586380669337, "learning_rate": 9.472516106789631e-07, "loss": 4.4245, "step": 21960 }, { "epoch": 0.9467200758065211, "learning_rate": 9.472031287034842e-07, "loss": 4.7057, "step": 21980 }, { "epoch": 0.9475815135461084, "learning_rate": 9.471546467280054e-07, "loss": 4.2502, "step": 22000 }, { "epoch": 0.9484429512856958, "learning_rate": 9.471061647525264e-07, "loss": 4.4566, "step": 22020 }, { "epoch": 0.9493043890252832, "learning_rate": 9.470576827770474e-07, "loss": 4.3403, "step": 22040 }, { "epoch": 0.9501658267648706, "learning_rate": 9.470092008015686e-07, "loss": 4.7907, "step": 22060 }, { "epoch": 0.951027264504458, "learning_rate": 9.469607188260898e-07, "loss": 4.5686, "step": 22080 }, { "epoch": 0.9518887022440453, "learning_rate": 9.469122368506108e-07, "loss": 4.4133, "step": 22100 }, { "epoch": 0.9527501399836327, "learning_rate": 9.46863754875132e-07, "loss": 4.5255, "step": 22120 }, { "epoch": 0.95361157772322, "learning_rate": 9.468152728996532e-07, "loss": 4.5422, "step": 22140 }, { "epoch": 0.9544730154628074, "learning_rate": 9.467667909241741e-07, "loss": 4.4569, "step": 22160 }, { "epoch": 0.9553344532023949, "learning_rate": 9.467183089486952e-07, "loss": 4.7992, "step": 22180 }, { "epoch": 0.9561958909419822, "learning_rate": 9.466698269732164e-07, "loss": 4.4247, "step": 22200 }, { "epoch": 0.9570573286815696, "learning_rate": 9.466213449977375e-07, "loss": 4.6975, "step": 22220 }, { "epoch": 0.9579187664211569, "learning_rate": 9.465728630222586e-07, "loss": 4.6951, "step": 22240 }, { "epoch": 0.9587802041607443, "learning_rate": 9.465243810467797e-07, "loss": 4.6245, "step": 22260 }, { "epoch": 0.9596416419003316, "learning_rate": 9.464758990713008e-07, "loss": 4.4001, "step": 22280 }, { "epoch": 0.960503079639919, "learning_rate": 9.464274170958219e-07, "loss": 4.4475, "step": 22300 }, { "epoch": 0.9613645173795063, "learning_rate": 9.463789351203429e-07, "loss": 4.576, "step": 22320 }, { "epoch": 0.9622259551190938, "learning_rate": 9.463304531448641e-07, "loss": 4.5425, "step": 22340 }, { "epoch": 0.9630873928586812, "learning_rate": 9.462819711693852e-07, "loss": 4.6569, "step": 22360 }, { "epoch": 0.9639488305982685, "learning_rate": 9.462334891939064e-07, "loss": 4.7177, "step": 22380 }, { "epoch": 0.9648102683378559, "learning_rate": 9.461850072184274e-07, "loss": 4.5029, "step": 22400 }, { "epoch": 0.9656717060774432, "learning_rate": 9.461365252429485e-07, "loss": 4.5453, "step": 22420 }, { "epoch": 0.9665331438170306, "learning_rate": 9.460880432674696e-07, "loss": 4.3993, "step": 22440 }, { "epoch": 0.967394581556618, "learning_rate": 9.460395612919907e-07, "loss": 4.5315, "step": 22460 }, { "epoch": 0.9682560192962054, "learning_rate": 9.459910793165118e-07, "loss": 4.3262, "step": 22480 }, { "epoch": 0.9691174570357928, "learning_rate": 9.45942597341033e-07, "loss": 4.2996, "step": 22500 }, { "epoch": 0.9699788947753801, "learning_rate": 9.458941153655541e-07, "loss": 4.5598, "step": 22520 }, { "epoch": 0.9708403325149675, "learning_rate": 9.458456333900751e-07, "loss": 4.5351, "step": 22540 }, { "epoch": 0.9717017702545548, "learning_rate": 9.457971514145962e-07, "loss": 4.605, "step": 22560 }, { "epoch": 0.9725632079941422, "learning_rate": 9.457486694391174e-07, "loss": 4.4104, "step": 22580 }, { "epoch": 0.9734246457337296, "learning_rate": 9.457001874636385e-07, "loss": 4.5709, "step": 22600 }, { "epoch": 0.9742860834733169, "learning_rate": 9.456517054881596e-07, "loss": 4.475, "step": 22620 }, { "epoch": 0.9751475212129044, "learning_rate": 9.456032235126807e-07, "loss": 4.6957, "step": 22640 }, { "epoch": 0.9760089589524917, "learning_rate": 9.455547415372018e-07, "loss": 4.5696, "step": 22660 }, { "epoch": 0.9768703966920791, "learning_rate": 9.455062595617229e-07, "loss": 4.4252, "step": 22680 }, { "epoch": 0.9777318344316664, "learning_rate": 9.45457777586244e-07, "loss": 4.5615, "step": 22700 }, { "epoch": 0.9785932721712538, "learning_rate": 9.454092956107651e-07, "loss": 4.6245, "step": 22720 }, { "epoch": 0.9794547099108412, "learning_rate": 9.453608136352863e-07, "loss": 4.754, "step": 22740 }, { "epoch": 0.9803161476504285, "learning_rate": 9.453123316598073e-07, "loss": 4.5483, "step": 22760 }, { "epoch": 0.981177585390016, "learning_rate": 9.452638496843284e-07, "loss": 4.817, "step": 22780 }, { "epoch": 0.9820390231296033, "learning_rate": 9.452153677088495e-07, "loss": 4.5536, "step": 22800 }, { "epoch": 0.9829004608691907, "learning_rate": 9.451668857333707e-07, "loss": 4.6521, "step": 22820 }, { "epoch": 0.983761898608778, "learning_rate": 9.451184037578917e-07, "loss": 4.6244, "step": 22840 }, { "epoch": 0.9846233363483654, "learning_rate": 9.450699217824128e-07, "loss": 4.7698, "step": 22860 }, { "epoch": 0.9854847740879528, "learning_rate": 9.45021439806934e-07, "loss": 4.5851, "step": 22880 }, { "epoch": 0.9863462118275401, "learning_rate": 9.449729578314551e-07, "loss": 4.3717, "step": 22900 }, { "epoch": 0.9872076495671276, "learning_rate": 9.449244758559761e-07, "loss": 4.7009, "step": 22920 }, { "epoch": 0.9880690873067149, "learning_rate": 9.448759938804972e-07, "loss": 4.5667, "step": 22940 }, { "epoch": 0.9889305250463023, "learning_rate": 9.448275119050184e-07, "loss": 4.7279, "step": 22960 }, { "epoch": 0.9897919627858897, "learning_rate": 9.447790299295395e-07, "loss": 4.6005, "step": 22980 }, { "epoch": 0.990653400525477, "learning_rate": 9.447305479540606e-07, "loss": 4.875, "step": 23000 }, { "epoch": 0.9915148382650644, "learning_rate": 9.446820659785817e-07, "loss": 4.6195, "step": 23020 }, { "epoch": 0.9923762760046517, "learning_rate": 9.446335840031028e-07, "loss": 4.3456, "step": 23040 }, { "epoch": 0.9932377137442391, "learning_rate": 9.445851020276238e-07, "loss": 4.5356, "step": 23060 }, { "epoch": 0.9940991514838265, "learning_rate": 9.44536620052145e-07, "loss": 4.5905, "step": 23080 }, { "epoch": 0.9949605892234139, "learning_rate": 9.444881380766661e-07, "loss": 4.6106, "step": 23100 }, { "epoch": 0.9958220269630013, "learning_rate": 9.444396561011873e-07, "loss": 4.443, "step": 23120 }, { "epoch": 0.9966834647025886, "learning_rate": 9.443911741257083e-07, "loss": 4.7554, "step": 23140 }, { "epoch": 0.997544902442176, "learning_rate": 9.443426921502295e-07, "loss": 4.4825, "step": 23160 }, { "epoch": 0.9984063401817633, "learning_rate": 9.442942101747505e-07, "loss": 4.8178, "step": 23180 }, { "epoch": 0.9992677779213507, "learning_rate": 9.442457281992717e-07, "loss": 4.51, "step": 23200 } ], "logging_steps": 20, "max_steps": 371472, "num_input_tokens_seen": 0, "num_train_epochs": 16, "save_steps": 10000.0, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.541046422749184e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }