diff --git "a/checkpoint-46434/trainer_state.json" "b/checkpoint-46434/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-46434/trainer_state.json" @@ -0,0 +1,13965 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 2.0, + "eval_steps": 500, + "global_step": 46434, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 4.307188697936857e-05, + "learning_rate": 5e-09, + "loss": 15.2085, + "step": 1 + }, + { + "epoch": 0.0008614377395873713, + "learning_rate": 1e-07, + "loss": 13.4227, + "step": 20 + }, + { + "epoch": 0.0017228754791747427, + "learning_rate": 2e-07, + "loss": 12.2855, + "step": 40 + }, + { + "epoch": 0.002584313218762114, + "learning_rate": 3e-07, + "loss": 12.1044, + "step": 60 + }, + { + "epoch": 0.0034457509583494853, + "learning_rate": 4e-07, + "loss": 10.705, + "step": 80 + }, + { + "epoch": 0.004307188697936857, + "learning_rate": 5e-07, + "loss": 10.2652, + "step": 100 + }, + { + "epoch": 0.005168626437524228, + "learning_rate": 6e-07, + "loss": 8.8297, + "step": 120 + }, + { + "epoch": 0.0060300641771116, + "learning_rate": 7e-07, + "loss": 8.3154, + "step": 140 + }, + { + "epoch": 0.006891501916698971, + "learning_rate": 8e-07, + "loss": 7.722, + "step": 160 + }, + { + "epoch": 0.007752939656286342, + "learning_rate": 9e-07, + "loss": 7.6096, + "step": 180 + }, + { + "epoch": 0.008614377395873713, + "learning_rate": 1e-06, + "loss": 7.394, + "step": 200 + }, + { + "epoch": 0.009475815135461085, + "learning_rate": 9.99951518024521e-07, + "loss": 7.0227, + "step": 220 + }, + { + "epoch": 0.010337252875048455, + "learning_rate": 9.999030360490423e-07, + "loss": 7.7254, + "step": 240 + }, + { + "epoch": 0.011198690614635827, + "learning_rate": 9.998545540735632e-07, + "loss": 7.6063, + "step": 260 + }, + { + "epoch": 0.0120601283542232, + "learning_rate": 9.998060720980845e-07, + "loss": 6.9029, + "step": 280 + }, + { + "epoch": 0.01292156609381057, + "learning_rate": 9.997575901226053e-07, + "loss": 7.0533, + "step": 300 + }, + { + "epoch": 0.013783003833397941, + "learning_rate": 9.997091081471266e-07, + "loss": 6.9369, + "step": 320 + }, + { + "epoch": 0.014644441572985313, + "learning_rate": 9.996606261716477e-07, + "loss": 7.2298, + "step": 340 + }, + { + "epoch": 0.015505879312572683, + "learning_rate": 9.996121441961687e-07, + "loss": 6.9774, + "step": 360 + }, + { + "epoch": 0.016367317052160057, + "learning_rate": 9.9956366222069e-07, + "loss": 6.6153, + "step": 380 + }, + { + "epoch": 0.017228754791747427, + "learning_rate": 9.995151802452109e-07, + "loss": 6.7399, + "step": 400 + }, + { + "epoch": 0.018090192531334797, + "learning_rate": 9.994666982697322e-07, + "loss": 6.5775, + "step": 420 + }, + { + "epoch": 0.01895163027092217, + "learning_rate": 9.994182162942532e-07, + "loss": 6.9794, + "step": 440 + }, + { + "epoch": 0.01981306801050954, + "learning_rate": 9.993697343187743e-07, + "loss": 6.662, + "step": 460 + }, + { + "epoch": 0.02067450575009691, + "learning_rate": 9.993212523432956e-07, + "loss": 6.7392, + "step": 480 + }, + { + "epoch": 0.021535943489684285, + "learning_rate": 9.992727703678165e-07, + "loss": 6.7243, + "step": 500 + }, + { + "epoch": 0.022397381229271655, + "learning_rate": 9.992242883923377e-07, + "loss": 6.9017, + "step": 520 + }, + { + "epoch": 0.023258818968859025, + "learning_rate": 9.991758064168586e-07, + "loss": 6.3385, + "step": 540 + }, + { + "epoch": 0.0241202567084464, + "learning_rate": 9.991273244413799e-07, + "loss": 6.436, + "step": 560 + }, + { + "epoch": 0.02498169444803377, + "learning_rate": 9.99078842465901e-07, + "loss": 6.7753, + "step": 580 + }, + { + "epoch": 0.02584313218762114, + "learning_rate": 9.99030360490422e-07, + "loss": 6.2456, + "step": 600 + }, + { + "epoch": 0.026704569927208512, + "learning_rate": 9.989818785149433e-07, + "loss": 6.6402, + "step": 620 + }, + { + "epoch": 0.027566007666795882, + "learning_rate": 9.989333965394644e-07, + "loss": 6.3705, + "step": 640 + }, + { + "epoch": 0.028427445406383253, + "learning_rate": 9.988849145639854e-07, + "loss": 6.1228, + "step": 660 + }, + { + "epoch": 0.029288883145970626, + "learning_rate": 9.988364325885065e-07, + "loss": 6.377, + "step": 680 + }, + { + "epoch": 0.030150320885557996, + "learning_rate": 9.987879506130276e-07, + "loss": 6.5323, + "step": 700 + }, + { + "epoch": 0.031011758625145366, + "learning_rate": 9.987394686375489e-07, + "loss": 6.5611, + "step": 720 + }, + { + "epoch": 0.03187319636473274, + "learning_rate": 9.986909866620697e-07, + "loss": 6.5353, + "step": 740 + }, + { + "epoch": 0.032734634104320114, + "learning_rate": 9.98642504686591e-07, + "loss": 6.5194, + "step": 760 + }, + { + "epoch": 0.033596071843907484, + "learning_rate": 9.985940227111119e-07, + "loss": 6.4292, + "step": 780 + }, + { + "epoch": 0.034457509583494854, + "learning_rate": 9.985455407356332e-07, + "loss": 6.311, + "step": 800 + }, + { + "epoch": 0.035318947323082224, + "learning_rate": 9.984970587601542e-07, + "loss": 6.4556, + "step": 820 + }, + { + "epoch": 0.036180385062669594, + "learning_rate": 9.984485767846753e-07, + "loss": 6.3878, + "step": 840 + }, + { + "epoch": 0.037041822802256964, + "learning_rate": 9.984000948091966e-07, + "loss": 6.2106, + "step": 860 + }, + { + "epoch": 0.03790326054184434, + "learning_rate": 9.983516128337174e-07, + "loss": 6.4038, + "step": 880 + }, + { + "epoch": 0.03876469828143171, + "learning_rate": 9.983031308582387e-07, + "loss": 6.554, + "step": 900 + }, + { + "epoch": 0.03962613602101908, + "learning_rate": 9.982546488827596e-07, + "loss": 6.5065, + "step": 920 + }, + { + "epoch": 0.04048757376060645, + "learning_rate": 9.982061669072809e-07, + "loss": 6.2372, + "step": 940 + }, + { + "epoch": 0.04134901150019382, + "learning_rate": 9.98157684931802e-07, + "loss": 6.6843, + "step": 960 + }, + { + "epoch": 0.04221044923978119, + "learning_rate": 9.98109202956323e-07, + "loss": 6.3941, + "step": 980 + }, + { + "epoch": 0.04307188697936857, + "learning_rate": 9.980607209808443e-07, + "loss": 6.0508, + "step": 1000 + }, + { + "epoch": 0.04393332471895594, + "learning_rate": 9.980122390053654e-07, + "loss": 6.1723, + "step": 1020 + }, + { + "epoch": 0.04479476245854331, + "learning_rate": 9.979637570298864e-07, + "loss": 6.5495, + "step": 1040 + }, + { + "epoch": 0.04565620019813068, + "learning_rate": 9.979152750544075e-07, + "loss": 6.4215, + "step": 1060 + }, + { + "epoch": 0.04651763793771805, + "learning_rate": 9.978667930789286e-07, + "loss": 6.2316, + "step": 1080 + }, + { + "epoch": 0.04737907567730542, + "learning_rate": 9.978183111034499e-07, + "loss": 6.291, + "step": 1100 + }, + { + "epoch": 0.0482405134168928, + "learning_rate": 9.977698291279707e-07, + "loss": 6.2892, + "step": 1120 + }, + { + "epoch": 0.04910195115648017, + "learning_rate": 9.97721347152492e-07, + "loss": 6.1592, + "step": 1140 + }, + { + "epoch": 0.04996338889606754, + "learning_rate": 9.976728651770129e-07, + "loss": 6.149, + "step": 1160 + }, + { + "epoch": 0.05082482663565491, + "learning_rate": 9.976243832015342e-07, + "loss": 6.3048, + "step": 1180 + }, + { + "epoch": 0.05168626437524228, + "learning_rate": 9.975759012260552e-07, + "loss": 6.4359, + "step": 1200 + }, + { + "epoch": 0.05254770211482965, + "learning_rate": 9.975274192505763e-07, + "loss": 6.1895, + "step": 1220 + }, + { + "epoch": 0.053409139854417025, + "learning_rate": 9.974789372750976e-07, + "loss": 6.0717, + "step": 1240 + }, + { + "epoch": 0.054270577594004395, + "learning_rate": 9.974304552996186e-07, + "loss": 6.4554, + "step": 1260 + }, + { + "epoch": 0.055132015333591765, + "learning_rate": 9.973819733241397e-07, + "loss": 6.3709, + "step": 1280 + }, + { + "epoch": 0.055993453073179135, + "learning_rate": 9.973334913486606e-07, + "loss": 6.3882, + "step": 1300 + }, + { + "epoch": 0.056854890812766505, + "learning_rate": 9.972850093731819e-07, + "loss": 6.272, + "step": 1320 + }, + { + "epoch": 0.057716328552353875, + "learning_rate": 9.97236527397703e-07, + "loss": 6.0779, + "step": 1340 + }, + { + "epoch": 0.05857776629194125, + "learning_rate": 9.97188045422224e-07, + "loss": 6.3757, + "step": 1360 + }, + { + "epoch": 0.05943920403152862, + "learning_rate": 9.971395634467453e-07, + "loss": 5.9969, + "step": 1380 + }, + { + "epoch": 0.06030064177111599, + "learning_rate": 9.970910814712664e-07, + "loss": 6.288, + "step": 1400 + }, + { + "epoch": 0.06116207951070336, + "learning_rate": 9.970425994957874e-07, + "loss": 6.3014, + "step": 1420 + }, + { + "epoch": 0.06202351725029073, + "learning_rate": 9.969941175203085e-07, + "loss": 6.0141, + "step": 1440 + }, + { + "epoch": 0.0628849549898781, + "learning_rate": 9.969456355448296e-07, + "loss": 6.2296, + "step": 1460 + }, + { + "epoch": 0.06374639272946547, + "learning_rate": 9.968971535693509e-07, + "loss": 6.2257, + "step": 1480 + }, + { + "epoch": 0.06460783046905284, + "learning_rate": 9.968486715938717e-07, + "loss": 6.2917, + "step": 1500 + }, + { + "epoch": 0.06546926820864023, + "learning_rate": 9.96800189618393e-07, + "loss": 6.0469, + "step": 1520 + }, + { + "epoch": 0.0663307059482276, + "learning_rate": 9.967517076429139e-07, + "loss": 6.1077, + "step": 1540 + }, + { + "epoch": 0.06719214368781497, + "learning_rate": 9.967032256674351e-07, + "loss": 6.2491, + "step": 1560 + }, + { + "epoch": 0.06805358142740234, + "learning_rate": 9.966547436919562e-07, + "loss": 5.8996, + "step": 1580 + }, + { + "epoch": 0.06891501916698971, + "learning_rate": 9.966062617164773e-07, + "loss": 6.3099, + "step": 1600 + }, + { + "epoch": 0.06977645690657708, + "learning_rate": 9.965577797409986e-07, + "loss": 6.2458, + "step": 1620 + }, + { + "epoch": 0.07063789464616445, + "learning_rate": 9.965092977655196e-07, + "loss": 6.2759, + "step": 1640 + }, + { + "epoch": 0.07149933238575182, + "learning_rate": 9.964608157900407e-07, + "loss": 6.2417, + "step": 1660 + }, + { + "epoch": 0.07236077012533919, + "learning_rate": 9.964123338145618e-07, + "loss": 6.0339, + "step": 1680 + }, + { + "epoch": 0.07322220786492656, + "learning_rate": 9.963638518390829e-07, + "loss": 6.1054, + "step": 1700 + }, + { + "epoch": 0.07408364560451393, + "learning_rate": 9.963153698636041e-07, + "loss": 6.2843, + "step": 1720 + }, + { + "epoch": 0.0749450833441013, + "learning_rate": 9.96266887888125e-07, + "loss": 6.0034, + "step": 1740 + }, + { + "epoch": 0.07580652108368868, + "learning_rate": 9.962184059126463e-07, + "loss": 6.2372, + "step": 1760 + }, + { + "epoch": 0.07666795882327605, + "learning_rate": 9.961699239371674e-07, + "loss": 6.2253, + "step": 1780 + }, + { + "epoch": 0.07752939656286342, + "learning_rate": 9.961214419616884e-07, + "loss": 6.1184, + "step": 1800 + }, + { + "epoch": 0.0783908343024508, + "learning_rate": 9.960729599862095e-07, + "loss": 5.952, + "step": 1820 + }, + { + "epoch": 0.07925227204203816, + "learning_rate": 9.960244780107306e-07, + "loss": 6.1059, + "step": 1840 + }, + { + "epoch": 0.08011370978162553, + "learning_rate": 9.959759960352519e-07, + "loss": 6.0038, + "step": 1860 + }, + { + "epoch": 0.0809751475212129, + "learning_rate": 9.959275140597727e-07, + "loss": 6.197, + "step": 1880 + }, + { + "epoch": 0.08183658526080027, + "learning_rate": 9.95879032084294e-07, + "loss": 6.0109, + "step": 1900 + }, + { + "epoch": 0.08269802300038764, + "learning_rate": 9.95830550108815e-07, + "loss": 6.081, + "step": 1920 + }, + { + "epoch": 0.08355946073997501, + "learning_rate": 9.957820681333361e-07, + "loss": 5.8241, + "step": 1940 + }, + { + "epoch": 0.08442089847956238, + "learning_rate": 9.957335861578572e-07, + "loss": 6.2336, + "step": 1960 + }, + { + "epoch": 0.08528233621914975, + "learning_rate": 9.956851041823783e-07, + "loss": 5.9938, + "step": 1980 + }, + { + "epoch": 0.08614377395873714, + "learning_rate": 9.956366222068996e-07, + "loss": 6.1137, + "step": 2000 + }, + { + "epoch": 0.08700521169832451, + "learning_rate": 9.955881402314206e-07, + "loss": 5.8231, + "step": 2020 + }, + { + "epoch": 0.08786664943791188, + "learning_rate": 9.955396582559417e-07, + "loss": 5.919, + "step": 2040 + }, + { + "epoch": 0.08872808717749925, + "learning_rate": 9.954911762804628e-07, + "loss": 5.9266, + "step": 2060 + }, + { + "epoch": 0.08958952491708662, + "learning_rate": 9.954426943049839e-07, + "loss": 6.0525, + "step": 2080 + }, + { + "epoch": 0.09045096265667399, + "learning_rate": 9.953942123295051e-07, + "loss": 5.8037, + "step": 2100 + }, + { + "epoch": 0.09131240039626136, + "learning_rate": 9.95345730354026e-07, + "loss": 5.8224, + "step": 2120 + }, + { + "epoch": 0.09217383813584873, + "learning_rate": 9.952972483785473e-07, + "loss": 5.9992, + "step": 2140 + }, + { + "epoch": 0.0930352758754361, + "learning_rate": 9.952487664030683e-07, + "loss": 6.222, + "step": 2160 + }, + { + "epoch": 0.09389671361502347, + "learning_rate": 9.952002844275894e-07, + "loss": 6.221, + "step": 2180 + }, + { + "epoch": 0.09475815135461084, + "learning_rate": 9.951518024521105e-07, + "loss": 5.9652, + "step": 2200 + }, + { + "epoch": 0.09561958909419822, + "learning_rate": 9.951033204766316e-07, + "loss": 6.1783, + "step": 2220 + }, + { + "epoch": 0.0964810268337856, + "learning_rate": 9.950548385011528e-07, + "loss": 5.9059, + "step": 2240 + }, + { + "epoch": 0.09734246457337296, + "learning_rate": 9.95006356525674e-07, + "loss": 6.1, + "step": 2260 + }, + { + "epoch": 0.09820390231296033, + "learning_rate": 9.94957874550195e-07, + "loss": 5.9231, + "step": 2280 + }, + { + "epoch": 0.0990653400525477, + "learning_rate": 9.94909392574716e-07, + "loss": 6.0686, + "step": 2300 + }, + { + "epoch": 0.09992677779213507, + "learning_rate": 9.948609105992371e-07, + "loss": 5.8488, + "step": 2320 + }, + { + "epoch": 0.10078821553172244, + "learning_rate": 9.948124286237584e-07, + "loss": 5.7428, + "step": 2340 + }, + { + "epoch": 0.10164965327130981, + "learning_rate": 9.947639466482793e-07, + "loss": 5.8092, + "step": 2360 + }, + { + "epoch": 0.10251109101089718, + "learning_rate": 9.947154646728006e-07, + "loss": 6.0985, + "step": 2380 + }, + { + "epoch": 0.10337252875048455, + "learning_rate": 9.946669826973216e-07, + "loss": 6.14, + "step": 2400 + }, + { + "epoch": 0.10423396649007192, + "learning_rate": 9.946185007218427e-07, + "loss": 5.6721, + "step": 2420 + }, + { + "epoch": 0.1050954042296593, + "learning_rate": 9.945700187463638e-07, + "loss": 5.6273, + "step": 2440 + }, + { + "epoch": 0.10595684196924668, + "learning_rate": 9.945215367708848e-07, + "loss": 6.1214, + "step": 2460 + }, + { + "epoch": 0.10681827970883405, + "learning_rate": 9.944730547954061e-07, + "loss": 5.9658, + "step": 2480 + }, + { + "epoch": 0.10767971744842142, + "learning_rate": 9.94424572819927e-07, + "loss": 5.7439, + "step": 2500 + }, + { + "epoch": 0.10854115518800879, + "learning_rate": 9.943760908444483e-07, + "loss": 5.7759, + "step": 2520 + }, + { + "epoch": 0.10940259292759616, + "learning_rate": 9.943276088689693e-07, + "loss": 5.9705, + "step": 2540 + }, + { + "epoch": 0.11026403066718353, + "learning_rate": 9.942791268934904e-07, + "loss": 5.5893, + "step": 2560 + }, + { + "epoch": 0.1111254684067709, + "learning_rate": 9.942306449180115e-07, + "loss": 5.7964, + "step": 2580 + }, + { + "epoch": 0.11198690614635827, + "learning_rate": 9.941821629425326e-07, + "loss": 5.8011, + "step": 2600 + }, + { + "epoch": 0.11284834388594564, + "learning_rate": 9.941336809670538e-07, + "loss": 5.7291, + "step": 2620 + }, + { + "epoch": 0.11370978162553301, + "learning_rate": 9.94085198991575e-07, + "loss": 5.7857, + "step": 2640 + }, + { + "epoch": 0.11457121936512038, + "learning_rate": 9.94036717016096e-07, + "loss": 5.8249, + "step": 2660 + }, + { + "epoch": 0.11543265710470775, + "learning_rate": 9.93988235040617e-07, + "loss": 5.6865, + "step": 2680 + }, + { + "epoch": 0.11629409484429513, + "learning_rate": 9.939397530651381e-07, + "loss": 5.8647, + "step": 2700 + }, + { + "epoch": 0.1171555325838825, + "learning_rate": 9.938912710896594e-07, + "loss": 5.558, + "step": 2720 + }, + { + "epoch": 0.11801697032346987, + "learning_rate": 9.938427891141803e-07, + "loss": 6.0017, + "step": 2740 + }, + { + "epoch": 0.11887840806305724, + "learning_rate": 9.937943071387016e-07, + "loss": 5.7348, + "step": 2760 + }, + { + "epoch": 0.11973984580264461, + "learning_rate": 9.937458251632226e-07, + "loss": 5.5196, + "step": 2780 + }, + { + "epoch": 0.12060128354223199, + "learning_rate": 9.936973431877437e-07, + "loss": 5.6164, + "step": 2800 + }, + { + "epoch": 0.12146272128181936, + "learning_rate": 9.936488612122648e-07, + "loss": 5.8701, + "step": 2820 + }, + { + "epoch": 0.12232415902140673, + "learning_rate": 9.936003792367858e-07, + "loss": 5.9252, + "step": 2840 + }, + { + "epoch": 0.1231855967609941, + "learning_rate": 9.935518972613071e-07, + "loss": 5.502, + "step": 2860 + }, + { + "epoch": 0.12404703450058147, + "learning_rate": 9.935034152858282e-07, + "loss": 5.8715, + "step": 2880 + }, + { + "epoch": 0.12490847224016884, + "learning_rate": 9.934549333103493e-07, + "loss": 5.5986, + "step": 2900 + }, + { + "epoch": 0.1257699099797562, + "learning_rate": 9.934064513348703e-07, + "loss": 5.7461, + "step": 2920 + }, + { + "epoch": 0.1266313477193436, + "learning_rate": 9.933579693593914e-07, + "loss": 5.921, + "step": 2940 + }, + { + "epoch": 0.12749278545893095, + "learning_rate": 9.933094873839125e-07, + "loss": 5.8295, + "step": 2960 + }, + { + "epoch": 0.12835422319851833, + "learning_rate": 9.932610054084336e-07, + "loss": 5.6581, + "step": 2980 + }, + { + "epoch": 0.1292156609381057, + "learning_rate": 9.932125234329548e-07, + "loss": 5.7304, + "step": 3000 + }, + { + "epoch": 0.13007709867769307, + "learning_rate": 9.93164041457476e-07, + "loss": 5.9998, + "step": 3020 + }, + { + "epoch": 0.13093853641728045, + "learning_rate": 9.93115559481997e-07, + "loss": 6.0369, + "step": 3040 + }, + { + "epoch": 0.1317999741568678, + "learning_rate": 9.930670775065183e-07, + "loss": 5.9676, + "step": 3060 + }, + { + "epoch": 0.1326614118964552, + "learning_rate": 9.930185955310391e-07, + "loss": 5.2663, + "step": 3080 + }, + { + "epoch": 0.13352284963604255, + "learning_rate": 9.929701135555604e-07, + "loss": 5.8159, + "step": 3100 + }, + { + "epoch": 0.13438428737562993, + "learning_rate": 9.929216315800813e-07, + "loss": 5.5133, + "step": 3120 + }, + { + "epoch": 0.1352457251152173, + "learning_rate": 9.928731496046025e-07, + "loss": 5.7083, + "step": 3140 + }, + { + "epoch": 0.13610716285480468, + "learning_rate": 9.928246676291236e-07, + "loss": 5.7072, + "step": 3160 + }, + { + "epoch": 0.13696860059439203, + "learning_rate": 9.927761856536447e-07, + "loss": 5.8228, + "step": 3180 + }, + { + "epoch": 0.13783003833397942, + "learning_rate": 9.927277036781658e-07, + "loss": 5.9269, + "step": 3200 + }, + { + "epoch": 0.13869147607356677, + "learning_rate": 9.926792217026868e-07, + "loss": 5.7547, + "step": 3220 + }, + { + "epoch": 0.13955291381315416, + "learning_rate": 9.926307397272081e-07, + "loss": 5.7518, + "step": 3240 + }, + { + "epoch": 0.1404143515527415, + "learning_rate": 9.925822577517292e-07, + "loss": 5.7172, + "step": 3260 + }, + { + "epoch": 0.1412757892923289, + "learning_rate": 9.925337757762503e-07, + "loss": 5.3164, + "step": 3280 + }, + { + "epoch": 0.14213722703191628, + "learning_rate": 9.924852938007715e-07, + "loss": 5.5814, + "step": 3300 + }, + { + "epoch": 0.14299866477150364, + "learning_rate": 9.924368118252924e-07, + "loss": 5.7014, + "step": 3320 + }, + { + "epoch": 0.14386010251109102, + "learning_rate": 9.923883298498137e-07, + "loss": 5.6091, + "step": 3340 + }, + { + "epoch": 0.14472154025067838, + "learning_rate": 9.923398478743345e-07, + "loss": 5.9346, + "step": 3360 + }, + { + "epoch": 0.14558297799026576, + "learning_rate": 9.922913658988558e-07, + "loss": 5.411, + "step": 3380 + }, + { + "epoch": 0.14644441572985312, + "learning_rate": 9.92242883923377e-07, + "loss": 5.7669, + "step": 3400 + }, + { + "epoch": 0.1473058534694405, + "learning_rate": 9.92194401947898e-07, + "loss": 5.3575, + "step": 3420 + }, + { + "epoch": 0.14816729120902786, + "learning_rate": 9.921459199724193e-07, + "loss": 5.8575, + "step": 3440 + }, + { + "epoch": 0.14902872894861524, + "learning_rate": 9.920974379969401e-07, + "loss": 5.5618, + "step": 3460 + }, + { + "epoch": 0.1498901666882026, + "learning_rate": 9.920489560214614e-07, + "loss": 5.7241, + "step": 3480 + }, + { + "epoch": 0.15075160442778998, + "learning_rate": 9.920004740459823e-07, + "loss": 5.6646, + "step": 3500 + }, + { + "epoch": 0.15161304216737737, + "learning_rate": 9.919519920705035e-07, + "loss": 5.7116, + "step": 3520 + }, + { + "epoch": 0.15247447990696472, + "learning_rate": 9.919035100950246e-07, + "loss": 5.4578, + "step": 3540 + }, + { + "epoch": 0.1533359176465521, + "learning_rate": 9.918550281195457e-07, + "loss": 5.4505, + "step": 3560 + }, + { + "epoch": 0.15419735538613946, + "learning_rate": 9.918065461440668e-07, + "loss": 5.6805, + "step": 3580 + }, + { + "epoch": 0.15505879312572685, + "learning_rate": 9.917580641685878e-07, + "loss": 5.3575, + "step": 3600 + }, + { + "epoch": 0.1559202308653142, + "learning_rate": 9.917095821931091e-07, + "loss": 5.7853, + "step": 3620 + }, + { + "epoch": 0.1567816686049016, + "learning_rate": 9.916611002176302e-07, + "loss": 5.8258, + "step": 3640 + }, + { + "epoch": 0.15764310634448894, + "learning_rate": 9.916126182421513e-07, + "loss": 5.6299, + "step": 3660 + }, + { + "epoch": 0.15850454408407633, + "learning_rate": 9.915641362666725e-07, + "loss": 5.7533, + "step": 3680 + }, + { + "epoch": 0.15936598182366368, + "learning_rate": 9.915156542911934e-07, + "loss": 5.6216, + "step": 3700 + }, + { + "epoch": 0.16022741956325107, + "learning_rate": 9.914671723157147e-07, + "loss": 5.9745, + "step": 3720 + }, + { + "epoch": 0.16108885730283845, + "learning_rate": 9.914186903402355e-07, + "loss": 5.6028, + "step": 3740 + }, + { + "epoch": 0.1619502950424258, + "learning_rate": 9.913702083647568e-07, + "loss": 5.658, + "step": 3760 + }, + { + "epoch": 0.1628117327820132, + "learning_rate": 9.913217263892779e-07, + "loss": 5.4876, + "step": 3780 + }, + { + "epoch": 0.16367317052160055, + "learning_rate": 9.91273244413799e-07, + "loss": 5.5794, + "step": 3800 + }, + { + "epoch": 0.16453460826118793, + "learning_rate": 9.912247624383202e-07, + "loss": 5.6442, + "step": 3820 + }, + { + "epoch": 0.1653960460007753, + "learning_rate": 9.911762804628411e-07, + "loss": 5.5536, + "step": 3840 + }, + { + "epoch": 0.16625748374036267, + "learning_rate": 9.911277984873624e-07, + "loss": 5.5444, + "step": 3860 + }, + { + "epoch": 0.16711892147995003, + "learning_rate": 9.910793165118835e-07, + "loss": 5.7136, + "step": 3880 + }, + { + "epoch": 0.1679803592195374, + "learning_rate": 9.910308345364045e-07, + "loss": 5.2753, + "step": 3900 + }, + { + "epoch": 0.16884179695912477, + "learning_rate": 9.909823525609258e-07, + "loss": 5.4269, + "step": 3920 + }, + { + "epoch": 0.16970323469871215, + "learning_rate": 9.909338705854467e-07, + "loss": 5.4923, + "step": 3940 + }, + { + "epoch": 0.1705646724382995, + "learning_rate": 9.90885388609968e-07, + "loss": 5.5202, + "step": 3960 + }, + { + "epoch": 0.1714261101778869, + "learning_rate": 9.908369066344888e-07, + "loss": 5.5258, + "step": 3980 + }, + { + "epoch": 0.17228754791747428, + "learning_rate": 9.9078842465901e-07, + "loss": 5.5544, + "step": 4000 + }, + { + "epoch": 0.17314898565706163, + "learning_rate": 9.907399426835312e-07, + "loss": 5.5478, + "step": 4020 + }, + { + "epoch": 0.17401042339664902, + "learning_rate": 9.906914607080522e-07, + "loss": 5.5041, + "step": 4040 + }, + { + "epoch": 0.17487186113623637, + "learning_rate": 9.906429787325735e-07, + "loss": 5.4906, + "step": 4060 + }, + { + "epoch": 0.17573329887582376, + "learning_rate": 9.905944967570944e-07, + "loss": 5.4225, + "step": 4080 + }, + { + "epoch": 0.1765947366154111, + "learning_rate": 9.905460147816157e-07, + "loss": 5.6207, + "step": 4100 + }, + { + "epoch": 0.1774561743549985, + "learning_rate": 9.904975328061365e-07, + "loss": 5.5054, + "step": 4120 + }, + { + "epoch": 0.17831761209458585, + "learning_rate": 9.904490508306578e-07, + "loss": 5.4273, + "step": 4140 + }, + { + "epoch": 0.17917904983417324, + "learning_rate": 9.904005688551789e-07, + "loss": 5.6312, + "step": 4160 + }, + { + "epoch": 0.1800404875737606, + "learning_rate": 9.903520868797e-07, + "loss": 5.5304, + "step": 4180 + }, + { + "epoch": 0.18090192531334798, + "learning_rate": 9.903036049042212e-07, + "loss": 5.8041, + "step": 4200 + }, + { + "epoch": 0.18176336305293536, + "learning_rate": 9.90255122928742e-07, + "loss": 5.6389, + "step": 4220 + }, + { + "epoch": 0.18262480079252272, + "learning_rate": 9.902066409532634e-07, + "loss": 5.4039, + "step": 4240 + }, + { + "epoch": 0.1834862385321101, + "learning_rate": 9.901581589777845e-07, + "loss": 5.3949, + "step": 4260 + }, + { + "epoch": 0.18434767627169746, + "learning_rate": 9.901096770023055e-07, + "loss": 5.462, + "step": 4280 + }, + { + "epoch": 0.18520911401128484, + "learning_rate": 9.900611950268268e-07, + "loss": 5.6406, + "step": 4300 + }, + { + "epoch": 0.1860705517508722, + "learning_rate": 9.900127130513477e-07, + "loss": 5.4098, + "step": 4320 + }, + { + "epoch": 0.18693198949045958, + "learning_rate": 9.89964231075869e-07, + "loss": 5.5045, + "step": 4340 + }, + { + "epoch": 0.18779342723004694, + "learning_rate": 9.899157491003898e-07, + "loss": 5.4192, + "step": 4360 + }, + { + "epoch": 0.18865486496963432, + "learning_rate": 9.89867267124911e-07, + "loss": 5.3939, + "step": 4380 + }, + { + "epoch": 0.18951630270922168, + "learning_rate": 9.898187851494322e-07, + "loss": 5.6309, + "step": 4400 + }, + { + "epoch": 0.19037774044880906, + "learning_rate": 9.897703031739532e-07, + "loss": 5.3819, + "step": 4420 + }, + { + "epoch": 0.19123917818839645, + "learning_rate": 9.897218211984745e-07, + "loss": 5.3316, + "step": 4440 + }, + { + "epoch": 0.1921006159279838, + "learning_rate": 9.896733392229954e-07, + "loss": 5.3271, + "step": 4460 + }, + { + "epoch": 0.1929620536675712, + "learning_rate": 9.896248572475167e-07, + "loss": 5.5944, + "step": 4480 + }, + { + "epoch": 0.19382349140715854, + "learning_rate": 9.895763752720377e-07, + "loss": 5.4956, + "step": 4500 + }, + { + "epoch": 0.19468492914674593, + "learning_rate": 9.895278932965588e-07, + "loss": 5.3326, + "step": 4520 + }, + { + "epoch": 0.19554636688633328, + "learning_rate": 9.894794113210799e-07, + "loss": 5.4429, + "step": 4540 + }, + { + "epoch": 0.19640780462592067, + "learning_rate": 9.89430929345601e-07, + "loss": 5.4955, + "step": 4560 + }, + { + "epoch": 0.19726924236550802, + "learning_rate": 9.893824473701222e-07, + "loss": 5.2479, + "step": 4580 + }, + { + "epoch": 0.1981306801050954, + "learning_rate": 9.89333965394643e-07, + "loss": 5.3128, + "step": 4600 + }, + { + "epoch": 0.19899211784468276, + "learning_rate": 9.892854834191644e-07, + "loss": 5.3392, + "step": 4620 + }, + { + "epoch": 0.19985355558427015, + "learning_rate": 9.892370014436854e-07, + "loss": 5.5973, + "step": 4640 + }, + { + "epoch": 0.2007149933238575, + "learning_rate": 9.891885194682065e-07, + "loss": 5.4401, + "step": 4660 + }, + { + "epoch": 0.2015764310634449, + "learning_rate": 9.891400374927278e-07, + "loss": 5.3984, + "step": 4680 + }, + { + "epoch": 0.20243786880303227, + "learning_rate": 9.890915555172487e-07, + "loss": 5.2152, + "step": 4700 + }, + { + "epoch": 0.20329930654261963, + "learning_rate": 9.8904307354177e-07, + "loss": 5.5735, + "step": 4720 + }, + { + "epoch": 0.204160744282207, + "learning_rate": 9.889945915662908e-07, + "loss": 5.4082, + "step": 4740 + }, + { + "epoch": 0.20502218202179437, + "learning_rate": 9.88946109590812e-07, + "loss": 5.2788, + "step": 4760 + }, + { + "epoch": 0.20588361976138175, + "learning_rate": 9.888976276153332e-07, + "loss": 5.384, + "step": 4780 + }, + { + "epoch": 0.2067450575009691, + "learning_rate": 9.888491456398542e-07, + "loss": 5.4814, + "step": 4800 + }, + { + "epoch": 0.2076064952405565, + "learning_rate": 9.888006636643755e-07, + "loss": 5.1962, + "step": 4820 + }, + { + "epoch": 0.20846793298014385, + "learning_rate": 9.887521816888966e-07, + "loss": 5.0913, + "step": 4840 + }, + { + "epoch": 0.20932937071973123, + "learning_rate": 9.887036997134177e-07, + "loss": 5.3056, + "step": 4860 + }, + { + "epoch": 0.2101908084593186, + "learning_rate": 9.886552177379387e-07, + "loss": 5.4644, + "step": 4880 + }, + { + "epoch": 0.21105224619890597, + "learning_rate": 9.886067357624598e-07, + "loss": 5.6686, + "step": 4900 + }, + { + "epoch": 0.21191368393849336, + "learning_rate": 9.88558253786981e-07, + "loss": 5.3201, + "step": 4920 + }, + { + "epoch": 0.21277512167808071, + "learning_rate": 9.88509771811502e-07, + "loss": 5.2799, + "step": 4940 + }, + { + "epoch": 0.2136365594176681, + "learning_rate": 9.884612898360232e-07, + "loss": 5.1967, + "step": 4960 + }, + { + "epoch": 0.21449799715725545, + "learning_rate": 9.88412807860544e-07, + "loss": 5.4358, + "step": 4980 + }, + { + "epoch": 0.21535943489684284, + "learning_rate": 9.883643258850654e-07, + "loss": 5.124, + "step": 5000 + }, + { + "epoch": 0.2162208726364302, + "learning_rate": 9.883158439095864e-07, + "loss": 5.4017, + "step": 5020 + }, + { + "epoch": 0.21708231037601758, + "learning_rate": 9.882673619341075e-07, + "loss": 5.28, + "step": 5040 + }, + { + "epoch": 0.21794374811560493, + "learning_rate": 9.882188799586288e-07, + "loss": 5.427, + "step": 5060 + }, + { + "epoch": 0.21880518585519232, + "learning_rate": 9.881703979831497e-07, + "loss": 5.272, + "step": 5080 + }, + { + "epoch": 0.21966662359477968, + "learning_rate": 9.88121916007671e-07, + "loss": 5.3149, + "step": 5100 + }, + { + "epoch": 0.22052806133436706, + "learning_rate": 9.880734340321918e-07, + "loss": 5.5404, + "step": 5120 + }, + { + "epoch": 0.22138949907395444, + "learning_rate": 9.88024952056713e-07, + "loss": 5.3568, + "step": 5140 + }, + { + "epoch": 0.2222509368135418, + "learning_rate": 9.879764700812342e-07, + "loss": 5.2099, + "step": 5160 + }, + { + "epoch": 0.22311237455312918, + "learning_rate": 9.879279881057552e-07, + "loss": 5.2753, + "step": 5180 + }, + { + "epoch": 0.22397381229271654, + "learning_rate": 9.878795061302765e-07, + "loss": 5.3216, + "step": 5200 + }, + { + "epoch": 0.22483525003230392, + "learning_rate": 9.878310241547976e-07, + "loss": 5.332, + "step": 5220 + }, + { + "epoch": 0.22569668777189128, + "learning_rate": 9.877825421793187e-07, + "loss": 5.1957, + "step": 5240 + }, + { + "epoch": 0.22655812551147866, + "learning_rate": 9.877340602038397e-07, + "loss": 5.493, + "step": 5260 + }, + { + "epoch": 0.22741956325106602, + "learning_rate": 9.876855782283608e-07, + "loss": 5.4048, + "step": 5280 + }, + { + "epoch": 0.2282810009906534, + "learning_rate": 9.87637096252882e-07, + "loss": 5.4582, + "step": 5300 + }, + { + "epoch": 0.22914243873024076, + "learning_rate": 9.87588614277403e-07, + "loss": 5.1358, + "step": 5320 + }, + { + "epoch": 0.23000387646982814, + "learning_rate": 9.875401323019242e-07, + "loss": 5.507, + "step": 5340 + }, + { + "epoch": 0.2308653142094155, + "learning_rate": 9.87491650326445e-07, + "loss": 5.2223, + "step": 5360 + }, + { + "epoch": 0.23172675194900288, + "learning_rate": 9.874431683509664e-07, + "loss": 5.2098, + "step": 5380 + }, + { + "epoch": 0.23258818968859027, + "learning_rate": 9.873946863754874e-07, + "loss": 5.2615, + "step": 5400 + }, + { + "epoch": 0.23344962742817763, + "learning_rate": 9.873462044000085e-07, + "loss": 5.2096, + "step": 5420 + }, + { + "epoch": 0.234311065167765, + "learning_rate": 9.872977224245298e-07, + "loss": 5.1175, + "step": 5440 + }, + { + "epoch": 0.23517250290735237, + "learning_rate": 9.872492404490509e-07, + "loss": 5.2392, + "step": 5460 + }, + { + "epoch": 0.23603394064693975, + "learning_rate": 9.87200758473572e-07, + "loss": 5.1828, + "step": 5480 + }, + { + "epoch": 0.2368953783865271, + "learning_rate": 9.87152276498093e-07, + "loss": 5.1802, + "step": 5500 + }, + { + "epoch": 0.2377568161261145, + "learning_rate": 9.87103794522614e-07, + "loss": 5.2447, + "step": 5520 + }, + { + "epoch": 0.23861825386570185, + "learning_rate": 9.870553125471354e-07, + "loss": 5.0941, + "step": 5540 + }, + { + "epoch": 0.23947969160528923, + "learning_rate": 9.870068305716562e-07, + "loss": 5.0211, + "step": 5560 + }, + { + "epoch": 0.2403411293448766, + "learning_rate": 9.869583485961775e-07, + "loss": 5.4385, + "step": 5580 + }, + { + "epoch": 0.24120256708446397, + "learning_rate": 9.869098666206986e-07, + "loss": 5.5081, + "step": 5600 + }, + { + "epoch": 0.24206400482405135, + "learning_rate": 9.868613846452196e-07, + "loss": 5.5758, + "step": 5620 + }, + { + "epoch": 0.2429254425636387, + "learning_rate": 9.868129026697407e-07, + "loss": 5.379, + "step": 5640 + }, + { + "epoch": 0.2437868803032261, + "learning_rate": 9.867644206942618e-07, + "loss": 5.5049, + "step": 5660 + }, + { + "epoch": 0.24464831804281345, + "learning_rate": 9.86715938718783e-07, + "loss": 5.2212, + "step": 5680 + }, + { + "epoch": 0.24550975578240083, + "learning_rate": 9.86667456743304e-07, + "loss": 5.1315, + "step": 5700 + }, + { + "epoch": 0.2463711935219882, + "learning_rate": 9.866189747678252e-07, + "loss": 4.9211, + "step": 5720 + }, + { + "epoch": 0.24723263126157558, + "learning_rate": 9.86570492792346e-07, + "loss": 5.2577, + "step": 5740 + }, + { + "epoch": 0.24809406900116293, + "learning_rate": 9.865220108168674e-07, + "loss": 5.1484, + "step": 5760 + }, + { + "epoch": 0.24895550674075032, + "learning_rate": 9.864735288413884e-07, + "loss": 5.2857, + "step": 5780 + }, + { + "epoch": 0.24981694448033767, + "learning_rate": 9.864250468659095e-07, + "loss": 5.1734, + "step": 5800 + }, + { + "epoch": 0.25067838221992506, + "learning_rate": 9.863765648904308e-07, + "loss": 5.3354, + "step": 5820 + }, + { + "epoch": 0.2515398199595124, + "learning_rate": 9.863280829149519e-07, + "loss": 5.2673, + "step": 5840 + }, + { + "epoch": 0.2524012576990998, + "learning_rate": 9.86279600939473e-07, + "loss": 5.1835, + "step": 5860 + }, + { + "epoch": 0.2532626954386872, + "learning_rate": 9.86231118963994e-07, + "loss": 5.1701, + "step": 5880 + }, + { + "epoch": 0.25412413317827454, + "learning_rate": 9.86182636988515e-07, + "loss": 5.2417, + "step": 5900 + }, + { + "epoch": 0.2549855709178619, + "learning_rate": 9.861341550130364e-07, + "loss": 4.9244, + "step": 5920 + }, + { + "epoch": 0.2558470086574493, + "learning_rate": 9.860856730375572e-07, + "loss": 5.0248, + "step": 5940 + }, + { + "epoch": 0.25670844639703666, + "learning_rate": 9.860371910620785e-07, + "loss": 5.362, + "step": 5960 + }, + { + "epoch": 0.257569884136624, + "learning_rate": 9.859887090865996e-07, + "loss": 4.9936, + "step": 5980 + }, + { + "epoch": 0.2584313218762114, + "learning_rate": 9.859402271111206e-07, + "loss": 5.5917, + "step": 6000 + }, + { + "epoch": 0.2592927596157988, + "learning_rate": 9.858917451356417e-07, + "loss": 5.1554, + "step": 6020 + }, + { + "epoch": 0.26015419735538614, + "learning_rate": 9.858432631601628e-07, + "loss": 5.3179, + "step": 6040 + }, + { + "epoch": 0.2610156350949735, + "learning_rate": 9.85794781184684e-07, + "loss": 5.2851, + "step": 6060 + }, + { + "epoch": 0.2618770728345609, + "learning_rate": 9.857462992092051e-07, + "loss": 5.2355, + "step": 6080 + }, + { + "epoch": 0.26273851057414827, + "learning_rate": 9.856978172337262e-07, + "loss": 5.2866, + "step": 6100 + }, + { + "epoch": 0.2635999483137356, + "learning_rate": 9.85649335258247e-07, + "loss": 5.2053, + "step": 6120 + }, + { + "epoch": 0.264461386053323, + "learning_rate": 9.856008532827684e-07, + "loss": 5.0226, + "step": 6140 + }, + { + "epoch": 0.2653228237929104, + "learning_rate": 9.855523713072894e-07, + "loss": 5.2522, + "step": 6160 + }, + { + "epoch": 0.26618426153249775, + "learning_rate": 9.855038893318105e-07, + "loss": 5.101, + "step": 6180 + }, + { + "epoch": 0.2670456992720851, + "learning_rate": 9.854554073563318e-07, + "loss": 5.272, + "step": 6200 + }, + { + "epoch": 0.26790713701167246, + "learning_rate": 9.854069253808529e-07, + "loss": 5.2802, + "step": 6220 + }, + { + "epoch": 0.26876857475125987, + "learning_rate": 9.85358443405374e-07, + "loss": 5.3429, + "step": 6240 + }, + { + "epoch": 0.2696300124908472, + "learning_rate": 9.85309961429895e-07, + "loss": 5.3596, + "step": 6260 + }, + { + "epoch": 0.2704914502304346, + "learning_rate": 9.85261479454416e-07, + "loss": 4.9346, + "step": 6280 + }, + { + "epoch": 0.271352887970022, + "learning_rate": 9.852129974789373e-07, + "loss": 5.178, + "step": 6300 + }, + { + "epoch": 0.27221432570960935, + "learning_rate": 9.851645155034582e-07, + "loss": 5.266, + "step": 6320 + }, + { + "epoch": 0.2730757634491967, + "learning_rate": 9.851160335279795e-07, + "loss": 5.205, + "step": 6340 + }, + { + "epoch": 0.27393720118878406, + "learning_rate": 9.850675515525006e-07, + "loss": 5.1011, + "step": 6360 + }, + { + "epoch": 0.2747986389283715, + "learning_rate": 9.850190695770216e-07, + "loss": 5.3448, + "step": 6380 + }, + { + "epoch": 0.27566007666795883, + "learning_rate": 9.849705876015427e-07, + "loss": 5.1696, + "step": 6400 + }, + { + "epoch": 0.2765215144075462, + "learning_rate": 9.849221056260638e-07, + "loss": 4.74, + "step": 6420 + }, + { + "epoch": 0.27738295214713354, + "learning_rate": 9.84873623650585e-07, + "loss": 5.101, + "step": 6440 + }, + { + "epoch": 0.27824438988672096, + "learning_rate": 9.848251416751061e-07, + "loss": 5.1098, + "step": 6460 + }, + { + "epoch": 0.2791058276263083, + "learning_rate": 9.847766596996272e-07, + "loss": 5.2632, + "step": 6480 + }, + { + "epoch": 0.27996726536589567, + "learning_rate": 9.847281777241483e-07, + "loss": 5.2572, + "step": 6500 + }, + { + "epoch": 0.280828703105483, + "learning_rate": 9.846796957486693e-07, + "loss": 5.2582, + "step": 6520 + }, + { + "epoch": 0.28169014084507044, + "learning_rate": 9.846312137731906e-07, + "loss": 5.3315, + "step": 6540 + }, + { + "epoch": 0.2825515785846578, + "learning_rate": 9.845827317977115e-07, + "loss": 4.955, + "step": 6560 + }, + { + "epoch": 0.28341301632424515, + "learning_rate": 9.845342498222328e-07, + "loss": 5.2012, + "step": 6580 + }, + { + "epoch": 0.28427445406383256, + "learning_rate": 9.844857678467538e-07, + "loss": 5.3038, + "step": 6600 + }, + { + "epoch": 0.2851358918034199, + "learning_rate": 9.84437285871275e-07, + "loss": 5.2074, + "step": 6620 + }, + { + "epoch": 0.2859973295430073, + "learning_rate": 9.84388803895796e-07, + "loss": 5.0363, + "step": 6640 + }, + { + "epoch": 0.28685876728259463, + "learning_rate": 9.84340321920317e-07, + "loss": 5.1664, + "step": 6660 + }, + { + "epoch": 0.28772020502218204, + "learning_rate": 9.842918399448383e-07, + "loss": 5.1214, + "step": 6680 + }, + { + "epoch": 0.2885816427617694, + "learning_rate": 9.842433579693592e-07, + "loss": 4.9995, + "step": 6700 + }, + { + "epoch": 0.28944308050135675, + "learning_rate": 9.841948759938805e-07, + "loss": 5.0777, + "step": 6720 + }, + { + "epoch": 0.2903045182409441, + "learning_rate": 9.841463940184016e-07, + "loss": 5.1652, + "step": 6740 + }, + { + "epoch": 0.2911659559805315, + "learning_rate": 9.840979120429226e-07, + "loss": 5.1024, + "step": 6760 + }, + { + "epoch": 0.2920273937201189, + "learning_rate": 9.840494300674437e-07, + "loss": 5.054, + "step": 6780 + }, + { + "epoch": 0.29288883145970623, + "learning_rate": 9.840009480919648e-07, + "loss": 5.2045, + "step": 6800 + }, + { + "epoch": 0.29375026919929365, + "learning_rate": 9.83952466116486e-07, + "loss": 5.3555, + "step": 6820 + }, + { + "epoch": 0.294611706938881, + "learning_rate": 9.839039841410071e-07, + "loss": 5.1176, + "step": 6840 + }, + { + "epoch": 0.29547314467846836, + "learning_rate": 9.838555021655282e-07, + "loss": 5.4848, + "step": 6860 + }, + { + "epoch": 0.2963345824180557, + "learning_rate": 9.838070201900493e-07, + "loss": 5.1506, + "step": 6880 + }, + { + "epoch": 0.2971960201576431, + "learning_rate": 9.837585382145703e-07, + "loss": 5.1673, + "step": 6900 + }, + { + "epoch": 0.2980574578972305, + "learning_rate": 9.837100562390916e-07, + "loss": 5.0694, + "step": 6920 + }, + { + "epoch": 0.29891889563681784, + "learning_rate": 9.836615742636125e-07, + "loss": 5.4286, + "step": 6940 + }, + { + "epoch": 0.2997803333764052, + "learning_rate": 9.836130922881338e-07, + "loss": 4.9144, + "step": 6960 + }, + { + "epoch": 0.3006417711159926, + "learning_rate": 9.835646103126548e-07, + "loss": 4.9661, + "step": 6980 + }, + { + "epoch": 0.30150320885557996, + "learning_rate": 9.83516128337176e-07, + "loss": 5.0778, + "step": 7000 + }, + { + "epoch": 0.3023646465951673, + "learning_rate": 9.83467646361697e-07, + "loss": 5.14, + "step": 7020 + }, + { + "epoch": 0.30322608433475473, + "learning_rate": 9.83419164386218e-07, + "loss": 5.1204, + "step": 7040 + }, + { + "epoch": 0.3040875220743421, + "learning_rate": 9.833706824107393e-07, + "loss": 4.942, + "step": 7060 + }, + { + "epoch": 0.30494895981392944, + "learning_rate": 9.833222004352604e-07, + "loss": 5.08, + "step": 7080 + }, + { + "epoch": 0.3058103975535168, + "learning_rate": 9.832737184597815e-07, + "loss": 5.1513, + "step": 7100 + }, + { + "epoch": 0.3066718352931042, + "learning_rate": 9.832252364843028e-07, + "loss": 5.0832, + "step": 7120 + }, + { + "epoch": 0.30753327303269157, + "learning_rate": 9.831767545088236e-07, + "loss": 5.1246, + "step": 7140 + }, + { + "epoch": 0.3083947107722789, + "learning_rate": 9.83128272533345e-07, + "loss": 5.1067, + "step": 7160 + }, + { + "epoch": 0.3092561485118663, + "learning_rate": 9.830797905578658e-07, + "loss": 5.0547, + "step": 7180 + }, + { + "epoch": 0.3101175862514537, + "learning_rate": 9.83031308582387e-07, + "loss": 5.3209, + "step": 7200 + }, + { + "epoch": 0.31097902399104105, + "learning_rate": 9.829828266069081e-07, + "loss": 5.1226, + "step": 7220 + }, + { + "epoch": 0.3118404617306284, + "learning_rate": 9.829343446314292e-07, + "loss": 5.2013, + "step": 7240 + }, + { + "epoch": 0.3127018994702158, + "learning_rate": 9.828858626559505e-07, + "loss": 5.1918, + "step": 7260 + }, + { + "epoch": 0.3135633372098032, + "learning_rate": 9.828373806804713e-07, + "loss": 5.1001, + "step": 7280 + }, + { + "epoch": 0.31442477494939053, + "learning_rate": 9.827888987049926e-07, + "loss": 5.1812, + "step": 7300 + }, + { + "epoch": 0.3152862126889779, + "learning_rate": 9.827404167295135e-07, + "loss": 4.9552, + "step": 7320 + }, + { + "epoch": 0.3161476504285653, + "learning_rate": 9.826919347540348e-07, + "loss": 5.221, + "step": 7340 + }, + { + "epoch": 0.31700908816815265, + "learning_rate": 9.826434527785558e-07, + "loss": 4.8989, + "step": 7360 + }, + { + "epoch": 0.31787052590774, + "learning_rate": 9.82594970803077e-07, + "loss": 5.2631, + "step": 7380 + }, + { + "epoch": 0.31873196364732737, + "learning_rate": 9.82546488827598e-07, + "loss": 5.0938, + "step": 7400 + }, + { + "epoch": 0.3195934013869148, + "learning_rate": 9.82498006852119e-07, + "loss": 5.0062, + "step": 7420 + }, + { + "epoch": 0.32045483912650213, + "learning_rate": 9.824495248766403e-07, + "loss": 4.9372, + "step": 7440 + }, + { + "epoch": 0.3213162768660895, + "learning_rate": 9.824010429011614e-07, + "loss": 4.8481, + "step": 7460 + }, + { + "epoch": 0.3221777146056769, + "learning_rate": 9.823525609256825e-07, + "loss": 4.8534, + "step": 7480 + }, + { + "epoch": 0.32303915234526426, + "learning_rate": 9.823040789502038e-07, + "loss": 4.9954, + "step": 7500 + }, + { + "epoch": 0.3239005900848516, + "learning_rate": 9.822555969747246e-07, + "loss": 4.8983, + "step": 7520 + }, + { + "epoch": 0.32476202782443897, + "learning_rate": 9.82207114999246e-07, + "loss": 5.0276, + "step": 7540 + }, + { + "epoch": 0.3256234655640264, + "learning_rate": 9.821586330237668e-07, + "loss": 4.787, + "step": 7560 + }, + { + "epoch": 0.32648490330361374, + "learning_rate": 9.82110151048288e-07, + "loss": 5.4519, + "step": 7580 + }, + { + "epoch": 0.3273463410432011, + "learning_rate": 9.820616690728091e-07, + "loss": 5.1657, + "step": 7600 + }, + { + "epoch": 0.32820777878278845, + "learning_rate": 9.820131870973302e-07, + "loss": 5.0934, + "step": 7620 + }, + { + "epoch": 0.32906921652237586, + "learning_rate": 9.819647051218515e-07, + "loss": 5.1082, + "step": 7640 + }, + { + "epoch": 0.3299306542619632, + "learning_rate": 9.819162231463723e-07, + "loss": 4.9206, + "step": 7660 + }, + { + "epoch": 0.3307920920015506, + "learning_rate": 9.818677411708936e-07, + "loss": 5.1762, + "step": 7680 + }, + { + "epoch": 0.331653529741138, + "learning_rate": 9.818192591954147e-07, + "loss": 4.9105, + "step": 7700 + }, + { + "epoch": 0.33251496748072534, + "learning_rate": 9.817707772199358e-07, + "loss": 5.1632, + "step": 7720 + }, + { + "epoch": 0.3333764052203127, + "learning_rate": 9.817222952444568e-07, + "loss": 5.2379, + "step": 7740 + }, + { + "epoch": 0.33423784295990006, + "learning_rate": 9.81673813268978e-07, + "loss": 5.1562, + "step": 7760 + }, + { + "epoch": 0.33509928069948747, + "learning_rate": 9.81625331293499e-07, + "loss": 5.0009, + "step": 7780 + }, + { + "epoch": 0.3359607184390748, + "learning_rate": 9.8157684931802e-07, + "loss": 4.8898, + "step": 7800 + }, + { + "epoch": 0.3368221561786622, + "learning_rate": 9.815283673425413e-07, + "loss": 4.9866, + "step": 7820 + }, + { + "epoch": 0.33768359391824954, + "learning_rate": 9.814798853670624e-07, + "loss": 4.8798, + "step": 7840 + }, + { + "epoch": 0.33854503165783695, + "learning_rate": 9.814314033915835e-07, + "loss": 4.9858, + "step": 7860 + }, + { + "epoch": 0.3394064693974243, + "learning_rate": 9.813829214161047e-07, + "loss": 5.1652, + "step": 7880 + }, + { + "epoch": 0.34026790713701166, + "learning_rate": 9.813344394406256e-07, + "loss": 5.0168, + "step": 7900 + }, + { + "epoch": 0.341129344876599, + "learning_rate": 9.812859574651469e-07, + "loss": 4.9382, + "step": 7920 + }, + { + "epoch": 0.34199078261618643, + "learning_rate": 9.812374754896678e-07, + "loss": 5.1187, + "step": 7940 + }, + { + "epoch": 0.3428522203557738, + "learning_rate": 9.81188993514189e-07, + "loss": 4.8829, + "step": 7960 + }, + { + "epoch": 0.34371365809536114, + "learning_rate": 9.811405115387101e-07, + "loss": 4.8803, + "step": 7980 + }, + { + "epoch": 0.34457509583494855, + "learning_rate": 9.810920295632312e-07, + "loss": 4.9934, + "step": 8000 + }, + { + "epoch": 0.3454365335745359, + "learning_rate": 9.810435475877525e-07, + "loss": 5.0201, + "step": 8020 + }, + { + "epoch": 0.34629797131412327, + "learning_rate": 9.809950656122733e-07, + "loss": 5.2234, + "step": 8040 + }, + { + "epoch": 0.3471594090537106, + "learning_rate": 9.809465836367946e-07, + "loss": 5.102, + "step": 8060 + }, + { + "epoch": 0.34802084679329803, + "learning_rate": 9.808981016613157e-07, + "loss": 5.0268, + "step": 8080 + }, + { + "epoch": 0.3488822845328854, + "learning_rate": 9.808496196858367e-07, + "loss": 4.6825, + "step": 8100 + }, + { + "epoch": 0.34974372227247275, + "learning_rate": 9.80801137710358e-07, + "loss": 5.0273, + "step": 8120 + }, + { + "epoch": 0.3506051600120601, + "learning_rate": 9.807526557348789e-07, + "loss": 4.76, + "step": 8140 + }, + { + "epoch": 0.3514665977516475, + "learning_rate": 9.807041737594002e-07, + "loss": 4.8518, + "step": 8160 + }, + { + "epoch": 0.35232803549123487, + "learning_rate": 9.80655691783921e-07, + "loss": 5.0516, + "step": 8180 + }, + { + "epoch": 0.3531894732308222, + "learning_rate": 9.806072098084423e-07, + "loss": 5.2637, + "step": 8200 + }, + { + "epoch": 0.35405091097040964, + "learning_rate": 9.805587278329634e-07, + "loss": 5.178, + "step": 8220 + }, + { + "epoch": 0.354912348709997, + "learning_rate": 9.805102458574845e-07, + "loss": 5.163, + "step": 8240 + }, + { + "epoch": 0.35577378644958435, + "learning_rate": 9.804617638820057e-07, + "loss": 4.9961, + "step": 8260 + }, + { + "epoch": 0.3566352241891717, + "learning_rate": 9.804132819065266e-07, + "loss": 4.8015, + "step": 8280 + }, + { + "epoch": 0.3574966619287591, + "learning_rate": 9.803647999310479e-07, + "loss": 4.8818, + "step": 8300 + }, + { + "epoch": 0.3583580996683465, + "learning_rate": 9.803163179555687e-07, + "loss": 4.9811, + "step": 8320 + }, + { + "epoch": 0.35921953740793383, + "learning_rate": 9.8026783598009e-07, + "loss": 4.9989, + "step": 8340 + }, + { + "epoch": 0.3600809751475212, + "learning_rate": 9.80219354004611e-07, + "loss": 4.7071, + "step": 8360 + }, + { + "epoch": 0.3609424128871086, + "learning_rate": 9.801708720291322e-07, + "loss": 5.0305, + "step": 8380 + }, + { + "epoch": 0.36180385062669596, + "learning_rate": 9.801223900536535e-07, + "loss": 4.7833, + "step": 8400 + }, + { + "epoch": 0.3626652883662833, + "learning_rate": 9.800739080781743e-07, + "loss": 4.714, + "step": 8420 + }, + { + "epoch": 0.3635267261058707, + "learning_rate": 9.800254261026956e-07, + "loss": 4.7957, + "step": 8440 + }, + { + "epoch": 0.3643881638454581, + "learning_rate": 9.799769441272167e-07, + "loss": 4.9877, + "step": 8460 + }, + { + "epoch": 0.36524960158504544, + "learning_rate": 9.799284621517377e-07, + "loss": 5.1407, + "step": 8480 + }, + { + "epoch": 0.3661110393246328, + "learning_rate": 9.79879980176259e-07, + "loss": 5.0109, + "step": 8500 + }, + { + "epoch": 0.3669724770642202, + "learning_rate": 9.798314982007799e-07, + "loss": 5.0929, + "step": 8520 + }, + { + "epoch": 0.36783391480380756, + "learning_rate": 9.797830162253012e-07, + "loss": 4.8868, + "step": 8540 + }, + { + "epoch": 0.3686953525433949, + "learning_rate": 9.79734534249822e-07, + "loss": 5.0758, + "step": 8560 + }, + { + "epoch": 0.3695567902829823, + "learning_rate": 9.796860522743433e-07, + "loss": 4.7914, + "step": 8580 + }, + { + "epoch": 0.3704182280225697, + "learning_rate": 9.796375702988644e-07, + "loss": 4.9024, + "step": 8600 + }, + { + "epoch": 0.37127966576215704, + "learning_rate": 9.795890883233855e-07, + "loss": 5.1092, + "step": 8620 + }, + { + "epoch": 0.3721411035017444, + "learning_rate": 9.795406063479067e-07, + "loss": 5.0201, + "step": 8640 + }, + { + "epoch": 0.3730025412413318, + "learning_rate": 9.794921243724276e-07, + "loss": 4.9426, + "step": 8660 + }, + { + "epoch": 0.37386397898091916, + "learning_rate": 9.794436423969489e-07, + "loss": 5.0351, + "step": 8680 + }, + { + "epoch": 0.3747254167205065, + "learning_rate": 9.7939516042147e-07, + "loss": 4.855, + "step": 8700 + }, + { + "epoch": 0.3755868544600939, + "learning_rate": 9.79346678445991e-07, + "loss": 4.9019, + "step": 8720 + }, + { + "epoch": 0.3764482921996813, + "learning_rate": 9.792981964705123e-07, + "loss": 4.9067, + "step": 8740 + }, + { + "epoch": 0.37730972993926865, + "learning_rate": 9.792497144950332e-07, + "loss": 4.9602, + "step": 8760 + }, + { + "epoch": 0.378171167678856, + "learning_rate": 9.792012325195544e-07, + "loss": 5.1773, + "step": 8780 + }, + { + "epoch": 0.37903260541844336, + "learning_rate": 9.791527505440753e-07, + "loss": 4.8999, + "step": 8800 + }, + { + "epoch": 0.37989404315803077, + "learning_rate": 9.791042685685966e-07, + "loss": 5.1569, + "step": 8820 + }, + { + "epoch": 0.3807554808976181, + "learning_rate": 9.790557865931177e-07, + "loss": 5.0223, + "step": 8840 + }, + { + "epoch": 0.3816169186372055, + "learning_rate": 9.790073046176387e-07, + "loss": 4.9746, + "step": 8860 + }, + { + "epoch": 0.3824783563767929, + "learning_rate": 9.7895882264216e-07, + "loss": 4.7316, + "step": 8880 + }, + { + "epoch": 0.38333979411638025, + "learning_rate": 9.789103406666809e-07, + "loss": 4.8671, + "step": 8900 + }, + { + "epoch": 0.3842012318559676, + "learning_rate": 9.788618586912022e-07, + "loss": 4.8685, + "step": 8920 + }, + { + "epoch": 0.38506266959555496, + "learning_rate": 9.78813376715723e-07, + "loss": 5.0705, + "step": 8940 + }, + { + "epoch": 0.3859241073351424, + "learning_rate": 9.787648947402443e-07, + "loss": 5.0033, + "step": 8960 + }, + { + "epoch": 0.38678554507472973, + "learning_rate": 9.787164127647654e-07, + "loss": 5.13, + "step": 8980 + }, + { + "epoch": 0.3876469828143171, + "learning_rate": 9.786679307892864e-07, + "loss": 4.9016, + "step": 9000 + }, + { + "epoch": 0.38850842055390444, + "learning_rate": 9.786194488138077e-07, + "loss": 4.5963, + "step": 9020 + }, + { + "epoch": 0.38936985829349186, + "learning_rate": 9.785709668383288e-07, + "loss": 5.1174, + "step": 9040 + }, + { + "epoch": 0.3902312960330792, + "learning_rate": 9.785224848628499e-07, + "loss": 4.6056, + "step": 9060 + }, + { + "epoch": 0.39109273377266657, + "learning_rate": 9.78474002887371e-07, + "loss": 5.1854, + "step": 9080 + }, + { + "epoch": 0.391954171512254, + "learning_rate": 9.78425520911892e-07, + "loss": 5.0354, + "step": 9100 + }, + { + "epoch": 0.39281560925184134, + "learning_rate": 9.783770389364133e-07, + "loss": 4.973, + "step": 9120 + }, + { + "epoch": 0.3936770469914287, + "learning_rate": 9.783285569609342e-07, + "loss": 4.9599, + "step": 9140 + }, + { + "epoch": 0.39453848473101605, + "learning_rate": 9.782800749854554e-07, + "loss": 4.8212, + "step": 9160 + }, + { + "epoch": 0.39539992247060346, + "learning_rate": 9.782315930099763e-07, + "loss": 4.7895, + "step": 9180 + }, + { + "epoch": 0.3962613602101908, + "learning_rate": 9.781831110344976e-07, + "loss": 5.0378, + "step": 9200 + }, + { + "epoch": 0.3971227979497782, + "learning_rate": 9.781346290590187e-07, + "loss": 5.1935, + "step": 9220 + }, + { + "epoch": 0.39798423568936553, + "learning_rate": 9.780861470835397e-07, + "loss": 5.0377, + "step": 9240 + }, + { + "epoch": 0.39884567342895294, + "learning_rate": 9.78037665108061e-07, + "loss": 5.2153, + "step": 9260 + }, + { + "epoch": 0.3997071111685403, + "learning_rate": 9.77989183132582e-07, + "loss": 5.0034, + "step": 9280 + }, + { + "epoch": 0.40056854890812765, + "learning_rate": 9.779407011571032e-07, + "loss": 5.0281, + "step": 9300 + }, + { + "epoch": 0.401429986647715, + "learning_rate": 9.778922191816242e-07, + "loss": 4.5347, + "step": 9320 + }, + { + "epoch": 0.4022914243873024, + "learning_rate": 9.778437372061453e-07, + "loss": 4.7715, + "step": 9340 + }, + { + "epoch": 0.4031528621268898, + "learning_rate": 9.777952552306664e-07, + "loss": 5.0449, + "step": 9360 + }, + { + "epoch": 0.40401429986647713, + "learning_rate": 9.777467732551874e-07, + "loss": 5.1752, + "step": 9380 + }, + { + "epoch": 0.40487573760606455, + "learning_rate": 9.776982912797087e-07, + "loss": 5.063, + "step": 9400 + }, + { + "epoch": 0.4057371753456519, + "learning_rate": 9.776498093042298e-07, + "loss": 4.8293, + "step": 9420 + }, + { + "epoch": 0.40659861308523926, + "learning_rate": 9.776013273287509e-07, + "loss": 5.0506, + "step": 9440 + }, + { + "epoch": 0.4074600508248266, + "learning_rate": 9.77552845353272e-07, + "loss": 5.0681, + "step": 9460 + }, + { + "epoch": 0.408321488564414, + "learning_rate": 9.77504363377793e-07, + "loss": 4.9795, + "step": 9480 + }, + { + "epoch": 0.4091829263040014, + "learning_rate": 9.774558814023143e-07, + "loss": 4.7326, + "step": 9500 + }, + { + "epoch": 0.41004436404358874, + "learning_rate": 9.774073994268352e-07, + "loss": 4.5815, + "step": 9520 + }, + { + "epoch": 0.4109058017831761, + "learning_rate": 9.773589174513564e-07, + "loss": 5.0059, + "step": 9540 + }, + { + "epoch": 0.4117672395227635, + "learning_rate": 9.773104354758773e-07, + "loss": 4.8963, + "step": 9560 + }, + { + "epoch": 0.41262867726235086, + "learning_rate": 9.772619535003986e-07, + "loss": 4.9941, + "step": 9580 + }, + { + "epoch": 0.4134901150019382, + "learning_rate": 9.772134715249196e-07, + "loss": 5.0495, + "step": 9600 + }, + { + "epoch": 0.41435155274152563, + "learning_rate": 9.771649895494407e-07, + "loss": 4.5602, + "step": 9620 + }, + { + "epoch": 0.415212990481113, + "learning_rate": 9.77116507573962e-07, + "loss": 5.1274, + "step": 9640 + }, + { + "epoch": 0.41607442822070034, + "learning_rate": 9.77068025598483e-07, + "loss": 5.2212, + "step": 9660 + }, + { + "epoch": 0.4169358659602877, + "learning_rate": 9.770195436230041e-07, + "loss": 4.8636, + "step": 9680 + }, + { + "epoch": 0.4177973036998751, + "learning_rate": 9.769710616475252e-07, + "loss": 5.0909, + "step": 9700 + }, + { + "epoch": 0.41865874143946247, + "learning_rate": 9.769225796720463e-07, + "loss": 5.0281, + "step": 9720 + }, + { + "epoch": 0.4195201791790498, + "learning_rate": 9.768740976965676e-07, + "loss": 4.9283, + "step": 9740 + }, + { + "epoch": 0.4203816169186372, + "learning_rate": 9.768256157210884e-07, + "loss": 4.6899, + "step": 9760 + }, + { + "epoch": 0.4212430546582246, + "learning_rate": 9.767771337456097e-07, + "loss": 5.1294, + "step": 9780 + }, + { + "epoch": 0.42210449239781195, + "learning_rate": 9.767286517701308e-07, + "loss": 5.0745, + "step": 9800 + }, + { + "epoch": 0.4229659301373993, + "learning_rate": 9.766801697946519e-07, + "loss": 5.1475, + "step": 9820 + }, + { + "epoch": 0.4238273678769867, + "learning_rate": 9.76631687819173e-07, + "loss": 4.9102, + "step": 9840 + }, + { + "epoch": 0.42468880561657407, + "learning_rate": 9.76583205843694e-07, + "loss": 4.8146, + "step": 9860 + }, + { + "epoch": 0.42555024335616143, + "learning_rate": 9.765347238682153e-07, + "loss": 4.8416, + "step": 9880 + }, + { + "epoch": 0.4264116810957488, + "learning_rate": 9.764862418927361e-07, + "loss": 4.9126, + "step": 9900 + }, + { + "epoch": 0.4272731188353362, + "learning_rate": 9.764377599172574e-07, + "loss": 4.7237, + "step": 9920 + }, + { + "epoch": 0.42813455657492355, + "learning_rate": 9.763892779417783e-07, + "loss": 4.93, + "step": 9940 + }, + { + "epoch": 0.4289959943145109, + "learning_rate": 9.763407959662996e-07, + "loss": 4.8873, + "step": 9960 + }, + { + "epoch": 0.42985743205409827, + "learning_rate": 9.762923139908206e-07, + "loss": 4.7226, + "step": 9980 + }, + { + "epoch": 0.4307188697936857, + "learning_rate": 9.762438320153417e-07, + "loss": 5.0115, + "step": 10000 + }, + { + "epoch": 0.43158030753327303, + "learning_rate": 9.76195350039863e-07, + "loss": 4.9169, + "step": 10020 + }, + { + "epoch": 0.4324417452728604, + "learning_rate": 9.76146868064384e-07, + "loss": 4.9183, + "step": 10040 + }, + { + "epoch": 0.4333031830124478, + "learning_rate": 9.760983860889051e-07, + "loss": 4.8978, + "step": 10060 + }, + { + "epoch": 0.43416462075203516, + "learning_rate": 9.760499041134262e-07, + "loss": 4.6671, + "step": 10080 + }, + { + "epoch": 0.4350260584916225, + "learning_rate": 9.760014221379473e-07, + "loss": 4.9651, + "step": 10100 + }, + { + "epoch": 0.43588749623120987, + "learning_rate": 9.759529401624686e-07, + "loss": 4.8849, + "step": 10120 + }, + { + "epoch": 0.4367489339707973, + "learning_rate": 9.759044581869894e-07, + "loss": 4.7284, + "step": 10140 + }, + { + "epoch": 0.43761037171038464, + "learning_rate": 9.758559762115107e-07, + "loss": 4.8962, + "step": 10160 + }, + { + "epoch": 0.438471809449972, + "learning_rate": 9.758074942360318e-07, + "loss": 5.2482, + "step": 10180 + }, + { + "epoch": 0.43933324718955935, + "learning_rate": 9.757590122605529e-07, + "loss": 4.751, + "step": 10200 + }, + { + "epoch": 0.44019468492914676, + "learning_rate": 9.75710530285074e-07, + "loss": 4.873, + "step": 10220 + }, + { + "epoch": 0.4410561226687341, + "learning_rate": 9.75662048309595e-07, + "loss": 4.8341, + "step": 10240 + }, + { + "epoch": 0.4419175604083215, + "learning_rate": 9.756135663341163e-07, + "loss": 4.8262, + "step": 10260 + }, + { + "epoch": 0.4427789981479089, + "learning_rate": 9.755650843586374e-07, + "loss": 5.131, + "step": 10280 + }, + { + "epoch": 0.44364043588749624, + "learning_rate": 9.755166023831584e-07, + "loss": 5.0421, + "step": 10300 + }, + { + "epoch": 0.4445018736270836, + "learning_rate": 9.754681204076795e-07, + "loss": 4.9971, + "step": 10320 + }, + { + "epoch": 0.44536331136667096, + "learning_rate": 9.754196384322006e-07, + "loss": 5.0195, + "step": 10340 + }, + { + "epoch": 0.44622474910625837, + "learning_rate": 9.753711564567218e-07, + "loss": 4.7907, + "step": 10360 + }, + { + "epoch": 0.4470861868458457, + "learning_rate": 9.753226744812427e-07, + "loss": 4.9427, + "step": 10380 + }, + { + "epoch": 0.4479476245854331, + "learning_rate": 9.75274192505764e-07, + "loss": 4.9174, + "step": 10400 + }, + { + "epoch": 0.44880906232502044, + "learning_rate": 9.75225710530285e-07, + "loss": 5.0763, + "step": 10420 + }, + { + "epoch": 0.44967050006460785, + "learning_rate": 9.751772285548061e-07, + "loss": 4.8748, + "step": 10440 + }, + { + "epoch": 0.4505319378041952, + "learning_rate": 9.751287465793272e-07, + "loss": 4.9506, + "step": 10460 + }, + { + "epoch": 0.45139337554378256, + "learning_rate": 9.750802646038483e-07, + "loss": 4.6341, + "step": 10480 + }, + { + "epoch": 0.45225481328336997, + "learning_rate": 9.750317826283696e-07, + "loss": 4.7974, + "step": 10500 + }, + { + "epoch": 0.45311625102295733, + "learning_rate": 9.749833006528904e-07, + "loss": 4.8822, + "step": 10520 + }, + { + "epoch": 0.4539776887625447, + "learning_rate": 9.749348186774117e-07, + "loss": 4.7732, + "step": 10540 + }, + { + "epoch": 0.45483912650213204, + "learning_rate": 9.748863367019328e-07, + "loss": 5.0418, + "step": 10560 + }, + { + "epoch": 0.45570056424171945, + "learning_rate": 9.748378547264538e-07, + "loss": 5.0333, + "step": 10580 + }, + { + "epoch": 0.4565620019813068, + "learning_rate": 9.74789372750975e-07, + "loss": 4.7601, + "step": 10600 + }, + { + "epoch": 0.45742343972089416, + "learning_rate": 9.74740890775496e-07, + "loss": 4.8413, + "step": 10620 + }, + { + "epoch": 0.4582848774604815, + "learning_rate": 9.746924088000173e-07, + "loss": 4.8576, + "step": 10640 + }, + { + "epoch": 0.45914631520006893, + "learning_rate": 9.746439268245383e-07, + "loss": 4.9137, + "step": 10660 + }, + { + "epoch": 0.4600077529396563, + "learning_rate": 9.745954448490594e-07, + "loss": 4.7917, + "step": 10680 + }, + { + "epoch": 0.46086919067924365, + "learning_rate": 9.745469628735805e-07, + "loss": 5.0347, + "step": 10700 + }, + { + "epoch": 0.461730628418831, + "learning_rate": 9.744984808981016e-07, + "loss": 5.0721, + "step": 10720 + }, + { + "epoch": 0.4625920661584184, + "learning_rate": 9.744499989226228e-07, + "loss": 5.223, + "step": 10740 + }, + { + "epoch": 0.46345350389800577, + "learning_rate": 9.744015169471437e-07, + "loss": 4.7617, + "step": 10760 + }, + { + "epoch": 0.4643149416375931, + "learning_rate": 9.74353034971665e-07, + "loss": 4.7315, + "step": 10780 + }, + { + "epoch": 0.46517637937718054, + "learning_rate": 9.74304552996186e-07, + "loss": 5.0133, + "step": 10800 + }, + { + "epoch": 0.4660378171167679, + "learning_rate": 9.742560710207071e-07, + "loss": 4.5951, + "step": 10820 + }, + { + "epoch": 0.46689925485635525, + "learning_rate": 9.742075890452282e-07, + "loss": 5.0109, + "step": 10840 + }, + { + "epoch": 0.4677606925959426, + "learning_rate": 9.741591070697493e-07, + "loss": 4.7951, + "step": 10860 + }, + { + "epoch": 0.46862213033553, + "learning_rate": 9.741106250942706e-07, + "loss": 4.9083, + "step": 10880 + }, + { + "epoch": 0.4694835680751174, + "learning_rate": 9.740621431187916e-07, + "loss": 5.0881, + "step": 10900 + }, + { + "epoch": 0.47034500581470473, + "learning_rate": 9.740136611433127e-07, + "loss": 4.9049, + "step": 10920 + }, + { + "epoch": 0.4712064435542921, + "learning_rate": 9.73965179167834e-07, + "loss": 5.0503, + "step": 10940 + }, + { + "epoch": 0.4720678812938795, + "learning_rate": 9.739166971923548e-07, + "loss": 5.1389, + "step": 10960 + }, + { + "epoch": 0.47292931903346686, + "learning_rate": 9.73868215216876e-07, + "loss": 4.6824, + "step": 10980 + }, + { + "epoch": 0.4737907567730542, + "learning_rate": 9.73819733241397e-07, + "loss": 5.0677, + "step": 11000 + }, + { + "epoch": 0.4746521945126416, + "learning_rate": 9.737712512659183e-07, + "loss": 4.8373, + "step": 11020 + }, + { + "epoch": 0.475513632252229, + "learning_rate": 9.737227692904393e-07, + "loss": 4.7048, + "step": 11040 + }, + { + "epoch": 0.47637506999181634, + "learning_rate": 9.736742873149604e-07, + "loss": 4.7661, + "step": 11060 + }, + { + "epoch": 0.4772365077314037, + "learning_rate": 9.736258053394815e-07, + "loss": 4.5163, + "step": 11080 + }, + { + "epoch": 0.4780979454709911, + "learning_rate": 9.735773233640026e-07, + "loss": 4.8271, + "step": 11100 + }, + { + "epoch": 0.47895938321057846, + "learning_rate": 9.735288413885238e-07, + "loss": 4.9904, + "step": 11120 + }, + { + "epoch": 0.4798208209501658, + "learning_rate": 9.734803594130447e-07, + "loss": 5.1391, + "step": 11140 + }, + { + "epoch": 0.4806822586897532, + "learning_rate": 9.73431877437566e-07, + "loss": 4.9359, + "step": 11160 + }, + { + "epoch": 0.4815436964293406, + "learning_rate": 9.73383395462087e-07, + "loss": 4.7196, + "step": 11180 + }, + { + "epoch": 0.48240513416892794, + "learning_rate": 9.733349134866081e-07, + "loss": 4.8989, + "step": 11200 + }, + { + "epoch": 0.4832665719085153, + "learning_rate": 9.732864315111292e-07, + "loss": 4.5728, + "step": 11220 + }, + { + "epoch": 0.4841280096481027, + "learning_rate": 9.732379495356503e-07, + "loss": 4.8242, + "step": 11240 + }, + { + "epoch": 0.48498944738769006, + "learning_rate": 9.731894675601715e-07, + "loss": 4.9628, + "step": 11260 + }, + { + "epoch": 0.4858508851272774, + "learning_rate": 9.731409855846926e-07, + "loss": 4.9849, + "step": 11280 + }, + { + "epoch": 0.4867123228668648, + "learning_rate": 9.730925036092137e-07, + "loss": 4.7428, + "step": 11300 + }, + { + "epoch": 0.4875737606064522, + "learning_rate": 9.73044021633735e-07, + "loss": 4.84, + "step": 11320 + }, + { + "epoch": 0.48843519834603955, + "learning_rate": 9.729955396582558e-07, + "loss": 4.8301, + "step": 11340 + }, + { + "epoch": 0.4892966360856269, + "learning_rate": 9.729470576827771e-07, + "loss": 4.8208, + "step": 11360 + }, + { + "epoch": 0.49015807382521426, + "learning_rate": 9.72898575707298e-07, + "loss": 4.799, + "step": 11380 + }, + { + "epoch": 0.49101951156480167, + "learning_rate": 9.728500937318193e-07, + "loss": 4.8384, + "step": 11400 + }, + { + "epoch": 0.491880949304389, + "learning_rate": 9.728016117563403e-07, + "loss": 4.8149, + "step": 11420 + }, + { + "epoch": 0.4927423870439764, + "learning_rate": 9.727531297808614e-07, + "loss": 5.0723, + "step": 11440 + }, + { + "epoch": 0.4936038247835638, + "learning_rate": 9.727046478053825e-07, + "loss": 4.8349, + "step": 11460 + }, + { + "epoch": 0.49446526252315115, + "learning_rate": 9.726561658299035e-07, + "loss": 4.8027, + "step": 11480 + }, + { + "epoch": 0.4953267002627385, + "learning_rate": 9.726076838544248e-07, + "loss": 4.4655, + "step": 11500 + }, + { + "epoch": 0.49618813800232586, + "learning_rate": 9.725592018789457e-07, + "loss": 4.6683, + "step": 11520 + }, + { + "epoch": 0.4970495757419133, + "learning_rate": 9.72510719903467e-07, + "loss": 4.3801, + "step": 11540 + }, + { + "epoch": 0.49791101348150063, + "learning_rate": 9.72462237927988e-07, + "loss": 4.8508, + "step": 11560 + }, + { + "epoch": 0.498772451221088, + "learning_rate": 9.724137559525091e-07, + "loss": 4.8787, + "step": 11580 + }, + { + "epoch": 0.49963388896067534, + "learning_rate": 9.723652739770302e-07, + "loss": 4.9498, + "step": 11600 + }, + { + "epoch": 0.5004953267002628, + "learning_rate": 9.723167920015513e-07, + "loss": 5.0344, + "step": 11620 + }, + { + "epoch": 0.5013567644398501, + "learning_rate": 9.722683100260725e-07, + "loss": 5.0325, + "step": 11640 + }, + { + "epoch": 0.5022182021794375, + "learning_rate": 9.722198280505936e-07, + "loss": 4.8234, + "step": 11660 + }, + { + "epoch": 0.5030796399190248, + "learning_rate": 9.721713460751147e-07, + "loss": 4.5882, + "step": 11680 + }, + { + "epoch": 0.5039410776586122, + "learning_rate": 9.72122864099636e-07, + "loss": 4.864, + "step": 11700 + }, + { + "epoch": 0.5048025153981996, + "learning_rate": 9.720743821241568e-07, + "loss": 4.8641, + "step": 11720 + }, + { + "epoch": 0.505663953137787, + "learning_rate": 9.720259001486781e-07, + "loss": 4.8875, + "step": 11740 + }, + { + "epoch": 0.5065253908773744, + "learning_rate": 9.71977418173199e-07, + "loss": 5.13, + "step": 11760 + }, + { + "epoch": 0.5073868286169617, + "learning_rate": 9.719289361977203e-07, + "loss": 4.8252, + "step": 11780 + }, + { + "epoch": 0.5082482663565491, + "learning_rate": 9.718804542222413e-07, + "loss": 4.9937, + "step": 11800 + }, + { + "epoch": 0.5091097040961364, + "learning_rate": 9.718319722467624e-07, + "loss": 4.9656, + "step": 11820 + }, + { + "epoch": 0.5099711418357238, + "learning_rate": 9.717834902712837e-07, + "loss": 4.8202, + "step": 11840 + }, + { + "epoch": 0.5108325795753111, + "learning_rate": 9.717350082958045e-07, + "loss": 4.8806, + "step": 11860 + }, + { + "epoch": 0.5116940173148986, + "learning_rate": 9.716865263203258e-07, + "loss": 4.7003, + "step": 11880 + }, + { + "epoch": 0.512555455054486, + "learning_rate": 9.71638044344847e-07, + "loss": 4.9151, + "step": 11900 + }, + { + "epoch": 0.5134168927940733, + "learning_rate": 9.71589562369368e-07, + "loss": 4.7234, + "step": 11920 + }, + { + "epoch": 0.5142783305336607, + "learning_rate": 9.715410803938892e-07, + "loss": 4.8458, + "step": 11940 + }, + { + "epoch": 0.515139768273248, + "learning_rate": 9.714925984184101e-07, + "loss": 4.7352, + "step": 11960 + }, + { + "epoch": 0.5160012060128354, + "learning_rate": 9.714441164429314e-07, + "loss": 5.0708, + "step": 11980 + }, + { + "epoch": 0.5168626437524227, + "learning_rate": 9.713956344674523e-07, + "loss": 4.8615, + "step": 12000 + }, + { + "epoch": 0.5177240814920102, + "learning_rate": 9.713471524919735e-07, + "loss": 4.8537, + "step": 12020 + }, + { + "epoch": 0.5185855192315976, + "learning_rate": 9.712986705164946e-07, + "loss": 4.6005, + "step": 12040 + }, + { + "epoch": 0.5194469569711849, + "learning_rate": 9.712501885410157e-07, + "loss": 4.7112, + "step": 12060 + }, + { + "epoch": 0.5203083947107723, + "learning_rate": 9.71201706565537e-07, + "loss": 4.7967, + "step": 12080 + }, + { + "epoch": 0.5211698324503596, + "learning_rate": 9.711532245900578e-07, + "loss": 4.8298, + "step": 12100 + }, + { + "epoch": 0.522031270189947, + "learning_rate": 9.71104742614579e-07, + "loss": 4.527, + "step": 12120 + }, + { + "epoch": 0.5228927079295344, + "learning_rate": 9.710562606391e-07, + "loss": 4.7663, + "step": 12140 + }, + { + "epoch": 0.5237541456691218, + "learning_rate": 9.710077786636212e-07, + "loss": 4.8853, + "step": 12160 + }, + { + "epoch": 0.5246155834087092, + "learning_rate": 9.709592966881423e-07, + "loss": 4.6954, + "step": 12180 + }, + { + "epoch": 0.5254770211482965, + "learning_rate": 9.709108147126634e-07, + "loss": 4.7645, + "step": 12200 + }, + { + "epoch": 0.5263384588878839, + "learning_rate": 9.708623327371847e-07, + "loss": 4.6727, + "step": 12220 + }, + { + "epoch": 0.5271998966274712, + "learning_rate": 9.708138507617055e-07, + "loss": 4.5477, + "step": 12240 + }, + { + "epoch": 0.5280613343670586, + "learning_rate": 9.707653687862268e-07, + "loss": 4.6893, + "step": 12260 + }, + { + "epoch": 0.528922772106646, + "learning_rate": 9.707168868107479e-07, + "loss": 4.8461, + "step": 12280 + }, + { + "epoch": 0.5297842098462333, + "learning_rate": 9.70668404835269e-07, + "loss": 4.5713, + "step": 12300 + }, + { + "epoch": 0.5306456475858208, + "learning_rate": 9.706199228597902e-07, + "loss": 4.6324, + "step": 12320 + }, + { + "epoch": 0.5315070853254081, + "learning_rate": 9.70571440884311e-07, + "loss": 4.9228, + "step": 12340 + }, + { + "epoch": 0.5323685230649955, + "learning_rate": 9.705229589088324e-07, + "loss": 4.947, + "step": 12360 + }, + { + "epoch": 0.5332299608045828, + "learning_rate": 9.704744769333532e-07, + "loss": 4.7576, + "step": 12380 + }, + { + "epoch": 0.5340913985441702, + "learning_rate": 9.704259949578745e-07, + "loss": 4.6725, + "step": 12400 + }, + { + "epoch": 0.5349528362837576, + "learning_rate": 9.703775129823956e-07, + "loss": 4.677, + "step": 12420 + }, + { + "epoch": 0.5358142740233449, + "learning_rate": 9.703290310069167e-07, + "loss": 4.7095, + "step": 12440 + }, + { + "epoch": 0.5366757117629324, + "learning_rate": 9.70280549031438e-07, + "loss": 4.8679, + "step": 12460 + }, + { + "epoch": 0.5375371495025197, + "learning_rate": 9.702320670559588e-07, + "loss": 4.7676, + "step": 12480 + }, + { + "epoch": 0.5383985872421071, + "learning_rate": 9.7018358508048e-07, + "loss": 4.7246, + "step": 12500 + }, + { + "epoch": 0.5392600249816945, + "learning_rate": 9.701351031050012e-07, + "loss": 4.7469, + "step": 12520 + }, + { + "epoch": 0.5401214627212818, + "learning_rate": 9.700866211295222e-07, + "loss": 4.7057, + "step": 12540 + }, + { + "epoch": 0.5409829004608692, + "learning_rate": 9.700381391540435e-07, + "loss": 4.5612, + "step": 12560 + }, + { + "epoch": 0.5418443382004565, + "learning_rate": 9.699896571785644e-07, + "loss": 4.6163, + "step": 12580 + }, + { + "epoch": 0.542705775940044, + "learning_rate": 9.699411752030857e-07, + "loss": 4.5884, + "step": 12600 + }, + { + "epoch": 0.5435672136796313, + "learning_rate": 9.698926932276065e-07, + "loss": 4.7224, + "step": 12620 + }, + { + "epoch": 0.5444286514192187, + "learning_rate": 9.698442112521278e-07, + "loss": 4.7125, + "step": 12640 + }, + { + "epoch": 0.5452900891588061, + "learning_rate": 9.697957292766489e-07, + "loss": 4.5919, + "step": 12660 + }, + { + "epoch": 0.5461515268983934, + "learning_rate": 9.6974724730117e-07, + "loss": 4.9423, + "step": 12680 + }, + { + "epoch": 0.5470129646379808, + "learning_rate": 9.696987653256912e-07, + "loss": 4.7632, + "step": 12700 + }, + { + "epoch": 0.5478744023775681, + "learning_rate": 9.69650283350212e-07, + "loss": 4.7262, + "step": 12720 + }, + { + "epoch": 0.5487358401171555, + "learning_rate": 9.696018013747334e-07, + "loss": 4.8491, + "step": 12740 + }, + { + "epoch": 0.549597277856743, + "learning_rate": 9.695533193992542e-07, + "loss": 4.9667, + "step": 12760 + }, + { + "epoch": 0.5504587155963303, + "learning_rate": 9.695048374237755e-07, + "loss": 4.669, + "step": 12780 + }, + { + "epoch": 0.5513201533359177, + "learning_rate": 9.694563554482966e-07, + "loss": 4.7343, + "step": 12800 + }, + { + "epoch": 0.552181591075505, + "learning_rate": 9.694078734728177e-07, + "loss": 5.1538, + "step": 12820 + }, + { + "epoch": 0.5530430288150924, + "learning_rate": 9.69359391497339e-07, + "loss": 4.6751, + "step": 12840 + }, + { + "epoch": 0.5539044665546797, + "learning_rate": 9.693109095218598e-07, + "loss": 4.249, + "step": 12860 + }, + { + "epoch": 0.5547659042942671, + "learning_rate": 9.69262427546381e-07, + "loss": 4.5602, + "step": 12880 + }, + { + "epoch": 0.5556273420338546, + "learning_rate": 9.692139455709022e-07, + "loss": 4.7908, + "step": 12900 + }, + { + "epoch": 0.5564887797734419, + "learning_rate": 9.691654635954232e-07, + "loss": 4.8403, + "step": 12920 + }, + { + "epoch": 0.5573502175130293, + "learning_rate": 9.691169816199445e-07, + "loss": 4.654, + "step": 12940 + }, + { + "epoch": 0.5582116552526166, + "learning_rate": 9.690684996444654e-07, + "loss": 4.9628, + "step": 12960 + }, + { + "epoch": 0.559073092992204, + "learning_rate": 9.690200176689867e-07, + "loss": 4.6785, + "step": 12980 + }, + { + "epoch": 0.5599345307317913, + "learning_rate": 9.689715356935075e-07, + "loss": 4.7578, + "step": 13000 + }, + { + "epoch": 0.5607959684713787, + "learning_rate": 9.689230537180288e-07, + "loss": 4.6124, + "step": 13020 + }, + { + "epoch": 0.561657406210966, + "learning_rate": 9.688745717425499e-07, + "loss": 4.7457, + "step": 13040 + }, + { + "epoch": 0.5625188439505535, + "learning_rate": 9.68826089767071e-07, + "loss": 4.656, + "step": 13060 + }, + { + "epoch": 0.5633802816901409, + "learning_rate": 9.687776077915922e-07, + "loss": 4.6385, + "step": 13080 + }, + { + "epoch": 0.5642417194297282, + "learning_rate": 9.687291258161133e-07, + "loss": 4.791, + "step": 13100 + }, + { + "epoch": 0.5651031571693156, + "learning_rate": 9.686806438406344e-07, + "loss": 4.6335, + "step": 13120 + }, + { + "epoch": 0.5659645949089029, + "learning_rate": 9.686321618651552e-07, + "loss": 4.9381, + "step": 13140 + }, + { + "epoch": 0.5668260326484903, + "learning_rate": 9.685836798896765e-07, + "loss": 4.5365, + "step": 13160 + }, + { + "epoch": 0.5676874703880777, + "learning_rate": 9.685351979141976e-07, + "loss": 4.7251, + "step": 13180 + }, + { + "epoch": 0.5685489081276651, + "learning_rate": 9.684867159387187e-07, + "loss": 4.6211, + "step": 13200 + }, + { + "epoch": 0.5694103458672525, + "learning_rate": 9.6843823396324e-07, + "loss": 4.8273, + "step": 13220 + }, + { + "epoch": 0.5702717836068398, + "learning_rate": 9.683897519877608e-07, + "loss": 4.8895, + "step": 13240 + }, + { + "epoch": 0.5711332213464272, + "learning_rate": 9.68341270012282e-07, + "loss": 5.1973, + "step": 13260 + }, + { + "epoch": 0.5719946590860145, + "learning_rate": 9.682927880368032e-07, + "loss": 4.7367, + "step": 13280 + }, + { + "epoch": 0.5728560968256019, + "learning_rate": 9.682443060613242e-07, + "loss": 4.807, + "step": 13300 + }, + { + "epoch": 0.5737175345651893, + "learning_rate": 9.681958240858455e-07, + "loss": 4.7434, + "step": 13320 + }, + { + "epoch": 0.5745789723047767, + "learning_rate": 9.681473421103664e-07, + "loss": 4.8386, + "step": 13340 + }, + { + "epoch": 0.5754404100443641, + "learning_rate": 9.680988601348877e-07, + "loss": 4.9904, + "step": 13360 + }, + { + "epoch": 0.5763018477839514, + "learning_rate": 9.680503781594085e-07, + "loss": 4.6482, + "step": 13380 + }, + { + "epoch": 0.5771632855235388, + "learning_rate": 9.680018961839298e-07, + "loss": 4.8677, + "step": 13400 + }, + { + "epoch": 0.5780247232631262, + "learning_rate": 9.679534142084509e-07, + "loss": 4.8015, + "step": 13420 + }, + { + "epoch": 0.5788861610027135, + "learning_rate": 9.67904932232972e-07, + "loss": 4.9403, + "step": 13440 + }, + { + "epoch": 0.5797475987423009, + "learning_rate": 9.678564502574932e-07, + "loss": 4.8006, + "step": 13460 + }, + { + "epoch": 0.5806090364818882, + "learning_rate": 9.678079682820143e-07, + "loss": 4.9156, + "step": 13480 + }, + { + "epoch": 0.5814704742214757, + "learning_rate": 9.677594863065354e-07, + "loss": 4.575, + "step": 13500 + }, + { + "epoch": 0.582331911961063, + "learning_rate": 9.677110043310564e-07, + "loss": 4.6135, + "step": 13520 + }, + { + "epoch": 0.5831933497006504, + "learning_rate": 9.676625223555775e-07, + "loss": 4.8542, + "step": 13540 + }, + { + "epoch": 0.5840547874402378, + "learning_rate": 9.676140403800988e-07, + "loss": 4.5898, + "step": 13560 + }, + { + "epoch": 0.5849162251798251, + "learning_rate": 9.675655584046197e-07, + "loss": 4.6785, + "step": 13580 + }, + { + "epoch": 0.5857776629194125, + "learning_rate": 9.67517076429141e-07, + "loss": 4.4886, + "step": 13600 + }, + { + "epoch": 0.5866391006589998, + "learning_rate": 9.67468594453662e-07, + "loss": 4.8392, + "step": 13620 + }, + { + "epoch": 0.5875005383985873, + "learning_rate": 9.67420112478183e-07, + "loss": 4.7093, + "step": 13640 + }, + { + "epoch": 0.5883619761381746, + "learning_rate": 9.673716305027042e-07, + "loss": 4.5766, + "step": 13660 + }, + { + "epoch": 0.589223413877762, + "learning_rate": 9.673231485272252e-07, + "loss": 4.6722, + "step": 13680 + }, + { + "epoch": 0.5900848516173494, + "learning_rate": 9.672746665517465e-07, + "loss": 4.7496, + "step": 13700 + }, + { + "epoch": 0.5909462893569367, + "learning_rate": 9.672261845762674e-07, + "loss": 4.5997, + "step": 13720 + }, + { + "epoch": 0.5918077270965241, + "learning_rate": 9.671777026007886e-07, + "loss": 4.811, + "step": 13740 + }, + { + "epoch": 0.5926691648361114, + "learning_rate": 9.671292206253095e-07, + "loss": 4.7544, + "step": 13760 + }, + { + "epoch": 0.5935306025756989, + "learning_rate": 9.670807386498308e-07, + "loss": 4.872, + "step": 13780 + }, + { + "epoch": 0.5943920403152863, + "learning_rate": 9.670322566743519e-07, + "loss": 4.616, + "step": 13800 + }, + { + "epoch": 0.5952534780548736, + "learning_rate": 9.66983774698873e-07, + "loss": 4.6266, + "step": 13820 + }, + { + "epoch": 0.596114915794461, + "learning_rate": 9.669352927233942e-07, + "loss": 4.8612, + "step": 13840 + }, + { + "epoch": 0.5969763535340483, + "learning_rate": 9.668868107479153e-07, + "loss": 4.4442, + "step": 13860 + }, + { + "epoch": 0.5978377912736357, + "learning_rate": 9.668383287724364e-07, + "loss": 4.6665, + "step": 13880 + }, + { + "epoch": 0.598699229013223, + "learning_rate": 9.667898467969574e-07, + "loss": 4.5713, + "step": 13900 + }, + { + "epoch": 0.5995606667528104, + "learning_rate": 9.667413648214785e-07, + "loss": 4.6818, + "step": 13920 + }, + { + "epoch": 0.6004221044923979, + "learning_rate": 9.666928828459998e-07, + "loss": 4.8311, + "step": 13940 + }, + { + "epoch": 0.6012835422319852, + "learning_rate": 9.666444008705206e-07, + "loss": 4.7409, + "step": 13960 + }, + { + "epoch": 0.6021449799715726, + "learning_rate": 9.66595918895042e-07, + "loss": 4.6599, + "step": 13980 + }, + { + "epoch": 0.6030064177111599, + "learning_rate": 9.66547436919563e-07, + "loss": 4.5503, + "step": 14000 + }, + { + "epoch": 0.6038678554507473, + "learning_rate": 9.66498954944084e-07, + "loss": 4.6705, + "step": 14020 + }, + { + "epoch": 0.6047292931903346, + "learning_rate": 9.664504729686051e-07, + "loss": 4.9944, + "step": 14040 + }, + { + "epoch": 0.605590730929922, + "learning_rate": 9.664019909931262e-07, + "loss": 4.8301, + "step": 14060 + }, + { + "epoch": 0.6064521686695095, + "learning_rate": 9.663535090176475e-07, + "loss": 4.8497, + "step": 14080 + }, + { + "epoch": 0.6073136064090968, + "learning_rate": 9.663050270421686e-07, + "loss": 4.7864, + "step": 14100 + }, + { + "epoch": 0.6081750441486842, + "learning_rate": 9.662565450666896e-07, + "loss": 4.9266, + "step": 14120 + }, + { + "epoch": 0.6090364818882715, + "learning_rate": 9.662080630912107e-07, + "loss": 4.8071, + "step": 14140 + }, + { + "epoch": 0.6098979196278589, + "learning_rate": 9.661595811157318e-07, + "loss": 4.6743, + "step": 14160 + }, + { + "epoch": 0.6107593573674462, + "learning_rate": 9.66111099140253e-07, + "loss": 4.532, + "step": 14180 + }, + { + "epoch": 0.6116207951070336, + "learning_rate": 9.66062617164774e-07, + "loss": 4.9564, + "step": 14200 + }, + { + "epoch": 0.612482232846621, + "learning_rate": 9.660141351892952e-07, + "loss": 4.8359, + "step": 14220 + }, + { + "epoch": 0.6133436705862084, + "learning_rate": 9.659656532138163e-07, + "loss": 4.7172, + "step": 14240 + }, + { + "epoch": 0.6142051083257958, + "learning_rate": 9.659171712383374e-07, + "loss": 4.8027, + "step": 14260 + }, + { + "epoch": 0.6150665460653831, + "learning_rate": 9.658686892628584e-07, + "loss": 4.5783, + "step": 14280 + }, + { + "epoch": 0.6159279838049705, + "learning_rate": 9.658202072873795e-07, + "loss": 4.7292, + "step": 14300 + }, + { + "epoch": 0.6167894215445578, + "learning_rate": 9.657717253119008e-07, + "loss": 4.5805, + "step": 14320 + }, + { + "epoch": 0.6176508592841452, + "learning_rate": 9.657232433364216e-07, + "loss": 4.6905, + "step": 14340 + }, + { + "epoch": 0.6185122970237326, + "learning_rate": 9.65674761360943e-07, + "loss": 5.0062, + "step": 14360 + }, + { + "epoch": 0.61937373476332, + "learning_rate": 9.65626279385464e-07, + "loss": 4.5667, + "step": 14380 + }, + { + "epoch": 0.6202351725029074, + "learning_rate": 9.65577797409985e-07, + "loss": 4.7865, + "step": 14400 + }, + { + "epoch": 0.6210966102424947, + "learning_rate": 9.655293154345061e-07, + "loss": 4.8152, + "step": 14420 + }, + { + "epoch": 0.6219580479820821, + "learning_rate": 9.654808334590272e-07, + "loss": 5.001, + "step": 14440 + }, + { + "epoch": 0.6228194857216695, + "learning_rate": 9.654323514835485e-07, + "loss": 4.568, + "step": 14460 + }, + { + "epoch": 0.6236809234612568, + "learning_rate": 9.653838695080696e-07, + "loss": 5.1036, + "step": 14480 + }, + { + "epoch": 0.6245423612008442, + "learning_rate": 9.653353875325906e-07, + "loss": 4.6376, + "step": 14500 + }, + { + "epoch": 0.6254037989404316, + "learning_rate": 9.652869055571117e-07, + "loss": 4.8825, + "step": 14520 + }, + { + "epoch": 0.626265236680019, + "learning_rate": 9.652384235816328e-07, + "loss": 4.8115, + "step": 14540 + }, + { + "epoch": 0.6271266744196063, + "learning_rate": 9.65189941606154e-07, + "loss": 4.907, + "step": 14560 + }, + { + "epoch": 0.6279881121591937, + "learning_rate": 9.65141459630675e-07, + "loss": 4.7589, + "step": 14580 + }, + { + "epoch": 0.6288495498987811, + "learning_rate": 9.650929776551962e-07, + "loss": 4.6099, + "step": 14600 + }, + { + "epoch": 0.6297109876383684, + "learning_rate": 9.650444956797173e-07, + "loss": 4.7292, + "step": 14620 + }, + { + "epoch": 0.6305724253779558, + "learning_rate": 9.649960137042383e-07, + "loss": 4.8569, + "step": 14640 + }, + { + "epoch": 0.6314338631175431, + "learning_rate": 9.649475317287594e-07, + "loss": 4.476, + "step": 14660 + }, + { + "epoch": 0.6322953008571306, + "learning_rate": 9.648990497532805e-07, + "loss": 5.1189, + "step": 14680 + }, + { + "epoch": 0.633156738596718, + "learning_rate": 9.648505677778018e-07, + "loss": 4.4871, + "step": 14700 + }, + { + "epoch": 0.6340181763363053, + "learning_rate": 9.648020858023228e-07, + "loss": 4.7954, + "step": 14720 + }, + { + "epoch": 0.6348796140758927, + "learning_rate": 9.64753603826844e-07, + "loss": 4.8858, + "step": 14740 + }, + { + "epoch": 0.63574105181548, + "learning_rate": 9.64705121851365e-07, + "loss": 4.7299, + "step": 14760 + }, + { + "epoch": 0.6366024895550674, + "learning_rate": 9.64656639875886e-07, + "loss": 4.6986, + "step": 14780 + }, + { + "epoch": 0.6374639272946547, + "learning_rate": 9.646081579004071e-07, + "loss": 4.716, + "step": 14800 + }, + { + "epoch": 0.6383253650342422, + "learning_rate": 9.645596759249282e-07, + "loss": 4.8712, + "step": 14820 + }, + { + "epoch": 0.6391868027738296, + "learning_rate": 9.645111939494495e-07, + "loss": 4.5896, + "step": 14840 + }, + { + "epoch": 0.6400482405134169, + "learning_rate": 9.644627119739706e-07, + "loss": 4.8666, + "step": 14860 + }, + { + "epoch": 0.6409096782530043, + "learning_rate": 9.644142299984916e-07, + "loss": 4.6907, + "step": 14880 + }, + { + "epoch": 0.6417711159925916, + "learning_rate": 9.643657480230127e-07, + "loss": 4.7755, + "step": 14900 + }, + { + "epoch": 0.642632553732179, + "learning_rate": 9.643172660475338e-07, + "loss": 4.8946, + "step": 14920 + }, + { + "epoch": 0.6434939914717663, + "learning_rate": 9.64268784072055e-07, + "loss": 4.7275, + "step": 14940 + }, + { + "epoch": 0.6443554292113538, + "learning_rate": 9.64220302096576e-07, + "loss": 4.7162, + "step": 14960 + }, + { + "epoch": 0.6452168669509412, + "learning_rate": 9.641718201210972e-07, + "loss": 4.7541, + "step": 14980 + }, + { + "epoch": 0.6460783046905285, + "learning_rate": 9.641233381456183e-07, + "loss": 4.7376, + "step": 15000 + }, + { + "epoch": 0.6469397424301159, + "learning_rate": 9.640748561701393e-07, + "loss": 4.7821, + "step": 15020 + }, + { + "epoch": 0.6478011801697032, + "learning_rate": 9.640263741946604e-07, + "loss": 4.5146, + "step": 15040 + }, + { + "epoch": 0.6486626179092906, + "learning_rate": 9.639778922191815e-07, + "loss": 4.7323, + "step": 15060 + }, + { + "epoch": 0.6495240556488779, + "learning_rate": 9.639294102437028e-07, + "loss": 4.5574, + "step": 15080 + }, + { + "epoch": 0.6503854933884653, + "learning_rate": 9.638809282682238e-07, + "loss": 4.8495, + "step": 15100 + }, + { + "epoch": 0.6512469311280528, + "learning_rate": 9.63832446292745e-07, + "loss": 4.5805, + "step": 15120 + }, + { + "epoch": 0.6521083688676401, + "learning_rate": 9.637839643172662e-07, + "loss": 4.3699, + "step": 15140 + }, + { + "epoch": 0.6529698066072275, + "learning_rate": 9.63735482341787e-07, + "loss": 4.8785, + "step": 15160 + }, + { + "epoch": 0.6538312443468148, + "learning_rate": 9.636870003663083e-07, + "loss": 4.7165, + "step": 15180 + }, + { + "epoch": 0.6546926820864022, + "learning_rate": 9.636385183908292e-07, + "loss": 4.8152, + "step": 15200 + }, + { + "epoch": 0.6555541198259895, + "learning_rate": 9.635900364153505e-07, + "loss": 4.4532, + "step": 15220 + }, + { + "epoch": 0.6564155575655769, + "learning_rate": 9.635415544398716e-07, + "loss": 4.6873, + "step": 15240 + }, + { + "epoch": 0.6572769953051644, + "learning_rate": 9.634930724643926e-07, + "loss": 4.6663, + "step": 15260 + }, + { + "epoch": 0.6581384330447517, + "learning_rate": 9.634445904889137e-07, + "loss": 4.776, + "step": 15280 + }, + { + "epoch": 0.6589998707843391, + "learning_rate": 9.633961085134348e-07, + "loss": 4.6215, + "step": 15300 + }, + { + "epoch": 0.6598613085239264, + "learning_rate": 9.63347626537956e-07, + "loss": 4.921, + "step": 15320 + }, + { + "epoch": 0.6607227462635138, + "learning_rate": 9.63299144562477e-07, + "loss": 4.8543, + "step": 15340 + }, + { + "epoch": 0.6615841840031012, + "learning_rate": 9.632506625869982e-07, + "loss": 4.643, + "step": 15360 + }, + { + "epoch": 0.6624456217426885, + "learning_rate": 9.632021806115193e-07, + "loss": 4.901, + "step": 15380 + }, + { + "epoch": 0.663307059482276, + "learning_rate": 9.631536986360403e-07, + "loss": 4.6514, + "step": 15400 + }, + { + "epoch": 0.6641684972218633, + "learning_rate": 9.631052166605614e-07, + "loss": 4.6956, + "step": 15420 + }, + { + "epoch": 0.6650299349614507, + "learning_rate": 9.630567346850825e-07, + "loss": 4.6573, + "step": 15440 + }, + { + "epoch": 0.665891372701038, + "learning_rate": 9.630082527096038e-07, + "loss": 4.6353, + "step": 15460 + }, + { + "epoch": 0.6667528104406254, + "learning_rate": 9.629597707341248e-07, + "loss": 4.6602, + "step": 15480 + }, + { + "epoch": 0.6676142481802128, + "learning_rate": 9.62911288758646e-07, + "loss": 4.6295, + "step": 15500 + }, + { + "epoch": 0.6684756859198001, + "learning_rate": 9.628628067831672e-07, + "loss": 4.7094, + "step": 15520 + }, + { + "epoch": 0.6693371236593875, + "learning_rate": 9.62814324807688e-07, + "loss": 4.7686, + "step": 15540 + }, + { + "epoch": 0.6701985613989749, + "learning_rate": 9.627658428322093e-07, + "loss": 4.3885, + "step": 15560 + }, + { + "epoch": 0.6710599991385623, + "learning_rate": 9.627173608567302e-07, + "loss": 5.0316, + "step": 15580 + }, + { + "epoch": 0.6719214368781496, + "learning_rate": 9.626688788812515e-07, + "loss": 4.5627, + "step": 15600 + }, + { + "epoch": 0.672782874617737, + "learning_rate": 9.626203969057725e-07, + "loss": 4.3815, + "step": 15620 + }, + { + "epoch": 0.6736443123573244, + "learning_rate": 9.625719149302936e-07, + "loss": 4.74, + "step": 15640 + }, + { + "epoch": 0.6745057500969117, + "learning_rate": 9.625234329548147e-07, + "loss": 4.6831, + "step": 15660 + }, + { + "epoch": 0.6753671878364991, + "learning_rate": 9.624749509793358e-07, + "loss": 4.609, + "step": 15680 + }, + { + "epoch": 0.6762286255760865, + "learning_rate": 9.62426469003857e-07, + "loss": 4.823, + "step": 15700 + }, + { + "epoch": 0.6770900633156739, + "learning_rate": 9.623779870283781e-07, + "loss": 4.9465, + "step": 15720 + }, + { + "epoch": 0.6779515010552613, + "learning_rate": 9.623295050528992e-07, + "loss": 4.7628, + "step": 15740 + }, + { + "epoch": 0.6788129387948486, + "learning_rate": 9.622810230774205e-07, + "loss": 4.5836, + "step": 15760 + }, + { + "epoch": 0.679674376534436, + "learning_rate": 9.622325411019413e-07, + "loss": 4.8521, + "step": 15780 + }, + { + "epoch": 0.6805358142740233, + "learning_rate": 9.621840591264626e-07, + "loss": 4.7569, + "step": 15800 + }, + { + "epoch": 0.6813972520136107, + "learning_rate": 9.621355771509835e-07, + "loss": 4.4686, + "step": 15820 + }, + { + "epoch": 0.682258689753198, + "learning_rate": 9.620870951755048e-07, + "loss": 4.7357, + "step": 15840 + }, + { + "epoch": 0.6831201274927855, + "learning_rate": 9.620386132000258e-07, + "loss": 4.6195, + "step": 15860 + }, + { + "epoch": 0.6839815652323729, + "learning_rate": 9.61990131224547e-07, + "loss": 4.7323, + "step": 15880 + }, + { + "epoch": 0.6848430029719602, + "learning_rate": 9.619416492490682e-07, + "loss": 4.6049, + "step": 15900 + }, + { + "epoch": 0.6857044407115476, + "learning_rate": 9.61893167273589e-07, + "loss": 4.5214, + "step": 15920 + }, + { + "epoch": 0.6865658784511349, + "learning_rate": 9.618446852981103e-07, + "loss": 4.4264, + "step": 15940 + }, + { + "epoch": 0.6874273161907223, + "learning_rate": 9.617962033226312e-07, + "loss": 4.9132, + "step": 15960 + }, + { + "epoch": 0.6882887539303096, + "learning_rate": 9.617477213471525e-07, + "loss": 4.5938, + "step": 15980 + }, + { + "epoch": 0.6891501916698971, + "learning_rate": 9.616992393716735e-07, + "loss": 4.8424, + "step": 16000 + }, + { + "epoch": 0.6900116294094845, + "learning_rate": 9.616507573961946e-07, + "loss": 4.7597, + "step": 16020 + }, + { + "epoch": 0.6908730671490718, + "learning_rate": 9.616022754207159e-07, + "loss": 4.7149, + "step": 16040 + }, + { + "epoch": 0.6917345048886592, + "learning_rate": 9.615537934452368e-07, + "loss": 4.6915, + "step": 16060 + }, + { + "epoch": 0.6925959426282465, + "learning_rate": 9.61505311469758e-07, + "loss": 4.714, + "step": 16080 + }, + { + "epoch": 0.6934573803678339, + "learning_rate": 9.614568294942791e-07, + "loss": 4.497, + "step": 16100 + }, + { + "epoch": 0.6943188181074212, + "learning_rate": 9.614083475188002e-07, + "loss": 4.8497, + "step": 16120 + }, + { + "epoch": 0.6951802558470087, + "learning_rate": 9.613598655433215e-07, + "loss": 4.691, + "step": 16140 + }, + { + "epoch": 0.6960416935865961, + "learning_rate": 9.613113835678423e-07, + "loss": 4.8417, + "step": 16160 + }, + { + "epoch": 0.6969031313261834, + "learning_rate": 9.612629015923636e-07, + "loss": 4.7323, + "step": 16180 + }, + { + "epoch": 0.6977645690657708, + "learning_rate": 9.612144196168845e-07, + "loss": 4.632, + "step": 16200 + }, + { + "epoch": 0.6986260068053581, + "learning_rate": 9.611659376414057e-07, + "loss": 4.8886, + "step": 16220 + }, + { + "epoch": 0.6994874445449455, + "learning_rate": 9.611174556659268e-07, + "loss": 4.3369, + "step": 16240 + }, + { + "epoch": 0.7003488822845328, + "learning_rate": 9.610689736904479e-07, + "loss": 4.6227, + "step": 16260 + }, + { + "epoch": 0.7012103200241202, + "learning_rate": 9.610204917149692e-07, + "loss": 4.5117, + "step": 16280 + }, + { + "epoch": 0.7020717577637077, + "learning_rate": 9.6097200973949e-07, + "loss": 4.7759, + "step": 16300 + }, + { + "epoch": 0.702933195503295, + "learning_rate": 9.609235277640113e-07, + "loss": 4.6303, + "step": 16320 + }, + { + "epoch": 0.7037946332428824, + "learning_rate": 9.608750457885324e-07, + "loss": 4.836, + "step": 16340 + }, + { + "epoch": 0.7046560709824697, + "learning_rate": 9.608265638130535e-07, + "loss": 4.7166, + "step": 16360 + }, + { + "epoch": 0.7055175087220571, + "learning_rate": 9.607780818375745e-07, + "loss": 4.489, + "step": 16380 + }, + { + "epoch": 0.7063789464616445, + "learning_rate": 9.607295998620956e-07, + "loss": 4.6527, + "step": 16400 + }, + { + "epoch": 0.7072403842012318, + "learning_rate": 9.606811178866169e-07, + "loss": 4.4992, + "step": 16420 + }, + { + "epoch": 0.7081018219408193, + "learning_rate": 9.606326359111377e-07, + "loss": 4.9018, + "step": 16440 + }, + { + "epoch": 0.7089632596804066, + "learning_rate": 9.60584153935659e-07, + "loss": 4.854, + "step": 16460 + }, + { + "epoch": 0.709824697419994, + "learning_rate": 9.6053567196018e-07, + "loss": 4.8965, + "step": 16480 + }, + { + "epoch": 0.7106861351595813, + "learning_rate": 9.604871899847012e-07, + "loss": 4.5009, + "step": 16500 + }, + { + "epoch": 0.7115475728991687, + "learning_rate": 9.604387080092225e-07, + "loss": 4.8104, + "step": 16520 + }, + { + "epoch": 0.7124090106387561, + "learning_rate": 9.603902260337433e-07, + "loss": 4.698, + "step": 16540 + }, + { + "epoch": 0.7132704483783434, + "learning_rate": 9.603417440582646e-07, + "loss": 4.7735, + "step": 16560 + }, + { + "epoch": 0.7141318861179309, + "learning_rate": 9.602932620827855e-07, + "loss": 4.6545, + "step": 16580 + }, + { + "epoch": 0.7149933238575182, + "learning_rate": 9.602447801073067e-07, + "loss": 4.988, + "step": 16600 + }, + { + "epoch": 0.7158547615971056, + "learning_rate": 9.601962981318278e-07, + "loss": 4.6552, + "step": 16620 + }, + { + "epoch": 0.716716199336693, + "learning_rate": 9.601478161563489e-07, + "loss": 4.4583, + "step": 16640 + }, + { + "epoch": 0.7175776370762803, + "learning_rate": 9.600993341808702e-07, + "loss": 4.6773, + "step": 16660 + }, + { + "epoch": 0.7184390748158677, + "learning_rate": 9.60050852205391e-07, + "loss": 4.5659, + "step": 16680 + }, + { + "epoch": 0.719300512555455, + "learning_rate": 9.600023702299123e-07, + "loss": 4.866, + "step": 16700 + }, + { + "epoch": 0.7201619502950424, + "learning_rate": 9.599538882544334e-07, + "loss": 4.8379, + "step": 16720 + }, + { + "epoch": 0.7210233880346298, + "learning_rate": 9.599054062789545e-07, + "loss": 4.5252, + "step": 16740 + }, + { + "epoch": 0.7218848257742172, + "learning_rate": 9.598569243034757e-07, + "loss": 4.7735, + "step": 16760 + }, + { + "epoch": 0.7227462635138046, + "learning_rate": 9.598084423279966e-07, + "loss": 4.6124, + "step": 16780 + }, + { + "epoch": 0.7236077012533919, + "learning_rate": 9.597599603525179e-07, + "loss": 4.8224, + "step": 16800 + }, + { + "epoch": 0.7244691389929793, + "learning_rate": 9.597114783770387e-07, + "loss": 4.4931, + "step": 16820 + }, + { + "epoch": 0.7253305767325666, + "learning_rate": 9.5966299640156e-07, + "loss": 4.7042, + "step": 16840 + }, + { + "epoch": 0.726192014472154, + "learning_rate": 9.59614514426081e-07, + "loss": 4.4158, + "step": 16860 + }, + { + "epoch": 0.7270534522117414, + "learning_rate": 9.595660324506022e-07, + "loss": 4.6365, + "step": 16880 + }, + { + "epoch": 0.7279148899513288, + "learning_rate": 9.595175504751234e-07, + "loss": 4.6285, + "step": 16900 + }, + { + "epoch": 0.7287763276909162, + "learning_rate": 9.594690684996443e-07, + "loss": 4.6141, + "step": 16920 + }, + { + "epoch": 0.7296377654305035, + "learning_rate": 9.594205865241656e-07, + "loss": 4.6042, + "step": 16940 + }, + { + "epoch": 0.7304992031700909, + "learning_rate": 9.593721045486865e-07, + "loss": 4.4642, + "step": 16960 + }, + { + "epoch": 0.7313606409096782, + "learning_rate": 9.593236225732077e-07, + "loss": 4.7874, + "step": 16980 + }, + { + "epoch": 0.7322220786492656, + "learning_rate": 9.592751405977288e-07, + "loss": 4.7062, + "step": 17000 + }, + { + "epoch": 0.7330835163888529, + "learning_rate": 9.592266586222499e-07, + "loss": 4.7244, + "step": 17020 + }, + { + "epoch": 0.7339449541284404, + "learning_rate": 9.591781766467712e-07, + "loss": 4.6183, + "step": 17040 + }, + { + "epoch": 0.7348063918680278, + "learning_rate": 9.59129694671292e-07, + "loss": 4.592, + "step": 17060 + }, + { + "epoch": 0.7356678296076151, + "learning_rate": 9.590812126958133e-07, + "loss": 4.8319, + "step": 17080 + }, + { + "epoch": 0.7365292673472025, + "learning_rate": 9.590327307203344e-07, + "loss": 4.8552, + "step": 17100 + }, + { + "epoch": 0.7373907050867898, + "learning_rate": 9.589842487448554e-07, + "loss": 4.9682, + "step": 17120 + }, + { + "epoch": 0.7382521428263772, + "learning_rate": 9.589357667693767e-07, + "loss": 4.8142, + "step": 17140 + }, + { + "epoch": 0.7391135805659645, + "learning_rate": 9.588872847938976e-07, + "loss": 4.4919, + "step": 17160 + }, + { + "epoch": 0.739975018305552, + "learning_rate": 9.588388028184189e-07, + "loss": 4.7011, + "step": 17180 + }, + { + "epoch": 0.7408364560451394, + "learning_rate": 9.587903208429397e-07, + "loss": 4.7958, + "step": 17200 + }, + { + "epoch": 0.7416978937847267, + "learning_rate": 9.58741838867461e-07, + "loss": 4.6178, + "step": 17220 + }, + { + "epoch": 0.7425593315243141, + "learning_rate": 9.58693356891982e-07, + "loss": 4.6442, + "step": 17240 + }, + { + "epoch": 0.7434207692639014, + "learning_rate": 9.586448749165032e-07, + "loss": 4.7595, + "step": 17260 + }, + { + "epoch": 0.7442822070034888, + "learning_rate": 9.585963929410244e-07, + "loss": 4.7747, + "step": 17280 + }, + { + "epoch": 0.7451436447430762, + "learning_rate": 9.585479109655455e-07, + "loss": 4.7121, + "step": 17300 + }, + { + "epoch": 0.7460050824826636, + "learning_rate": 9.584994289900666e-07, + "loss": 4.8845, + "step": 17320 + }, + { + "epoch": 0.746866520222251, + "learning_rate": 9.584509470145877e-07, + "loss": 4.7855, + "step": 17340 + }, + { + "epoch": 0.7477279579618383, + "learning_rate": 9.584024650391087e-07, + "loss": 4.6848, + "step": 17360 + }, + { + "epoch": 0.7485893957014257, + "learning_rate": 9.5835398306363e-07, + "loss": 4.7187, + "step": 17380 + }, + { + "epoch": 0.749450833441013, + "learning_rate": 9.583055010881509e-07, + "loss": 4.5737, + "step": 17400 + }, + { + "epoch": 0.7503122711806004, + "learning_rate": 9.582570191126722e-07, + "loss": 4.9443, + "step": 17420 + }, + { + "epoch": 0.7511737089201878, + "learning_rate": 9.58208537137193e-07, + "loss": 4.9395, + "step": 17440 + }, + { + "epoch": 0.7520351466597751, + "learning_rate": 9.581600551617143e-07, + "loss": 4.5068, + "step": 17460 + }, + { + "epoch": 0.7528965843993626, + "learning_rate": 9.581115731862354e-07, + "loss": 4.9245, + "step": 17480 + }, + { + "epoch": 0.7537580221389499, + "learning_rate": 9.580630912107564e-07, + "loss": 4.6495, + "step": 17500 + }, + { + "epoch": 0.7546194598785373, + "learning_rate": 9.580146092352777e-07, + "loss": 4.4371, + "step": 17520 + }, + { + "epoch": 0.7554808976181246, + "learning_rate": 9.579661272597986e-07, + "loss": 4.5391, + "step": 17540 + }, + { + "epoch": 0.756342335357712, + "learning_rate": 9.579176452843199e-07, + "loss": 4.4885, + "step": 17560 + }, + { + "epoch": 0.7572037730972994, + "learning_rate": 9.578691633088407e-07, + "loss": 4.5693, + "step": 17580 + }, + { + "epoch": 0.7580652108368867, + "learning_rate": 9.57820681333362e-07, + "loss": 4.4952, + "step": 17600 + }, + { + "epoch": 0.7589266485764742, + "learning_rate": 9.57772199357883e-07, + "loss": 4.633, + "step": 17620 + }, + { + "epoch": 0.7597880863160615, + "learning_rate": 9.577237173824042e-07, + "loss": 4.6817, + "step": 17640 + }, + { + "epoch": 0.7606495240556489, + "learning_rate": 9.576752354069254e-07, + "loss": 4.647, + "step": 17660 + }, + { + "epoch": 0.7615109617952363, + "learning_rate": 9.576267534314465e-07, + "loss": 4.6863, + "step": 17680 + }, + { + "epoch": 0.7623723995348236, + "learning_rate": 9.575782714559676e-07, + "loss": 4.8397, + "step": 17700 + }, + { + "epoch": 0.763233837274411, + "learning_rate": 9.575297894804887e-07, + "loss": 4.4226, + "step": 17720 + }, + { + "epoch": 0.7640952750139983, + "learning_rate": 9.574813075050097e-07, + "loss": 4.4665, + "step": 17740 + }, + { + "epoch": 0.7649567127535858, + "learning_rate": 9.57432825529531e-07, + "loss": 4.6331, + "step": 17760 + }, + { + "epoch": 0.7658181504931731, + "learning_rate": 9.573843435540519e-07, + "loss": 4.7533, + "step": 17780 + }, + { + "epoch": 0.7666795882327605, + "learning_rate": 9.573358615785731e-07, + "loss": 4.4459, + "step": 17800 + }, + { + "epoch": 0.7675410259723479, + "learning_rate": 9.572873796030942e-07, + "loss": 4.5337, + "step": 17820 + }, + { + "epoch": 0.7684024637119352, + "learning_rate": 9.572388976276153e-07, + "loss": 4.6425, + "step": 17840 + }, + { + "epoch": 0.7692639014515226, + "learning_rate": 9.571904156521364e-07, + "loss": 4.4908, + "step": 17860 + }, + { + "epoch": 0.7701253391911099, + "learning_rate": 9.571419336766574e-07, + "loss": 4.6324, + "step": 17880 + }, + { + "epoch": 0.7709867769306973, + "learning_rate": 9.570934517011787e-07, + "loss": 4.5503, + "step": 17900 + }, + { + "epoch": 0.7718482146702847, + "learning_rate": 9.570449697256998e-07, + "loss": 4.3948, + "step": 17920 + }, + { + "epoch": 0.7727096524098721, + "learning_rate": 9.569964877502209e-07, + "loss": 4.7714, + "step": 17940 + }, + { + "epoch": 0.7735710901494595, + "learning_rate": 9.56948005774742e-07, + "loss": 4.4958, + "step": 17960 + }, + { + "epoch": 0.7744325278890468, + "learning_rate": 9.56899523799263e-07, + "loss": 4.8341, + "step": 17980 + }, + { + "epoch": 0.7752939656286342, + "learning_rate": 9.56851041823784e-07, + "loss": 4.8692, + "step": 18000 + }, + { + "epoch": 0.7761554033682215, + "learning_rate": 9.568025598483051e-07, + "loss": 4.5919, + "step": 18020 + }, + { + "epoch": 0.7770168411078089, + "learning_rate": 9.567540778728264e-07, + "loss": 4.5073, + "step": 18040 + }, + { + "epoch": 0.7778782788473964, + "learning_rate": 9.567055958973475e-07, + "loss": 4.541, + "step": 18060 + }, + { + "epoch": 0.7787397165869837, + "learning_rate": 9.566571139218686e-07, + "loss": 4.5663, + "step": 18080 + }, + { + "epoch": 0.7796011543265711, + "learning_rate": 9.566086319463896e-07, + "loss": 4.4336, + "step": 18100 + }, + { + "epoch": 0.7804625920661584, + "learning_rate": 9.565601499709107e-07, + "loss": 4.4726, + "step": 18120 + }, + { + "epoch": 0.7813240298057458, + "learning_rate": 9.56511667995432e-07, + "loss": 4.5123, + "step": 18140 + }, + { + "epoch": 0.7821854675453331, + "learning_rate": 9.564631860199529e-07, + "loss": 4.6115, + "step": 18160 + }, + { + "epoch": 0.7830469052849205, + "learning_rate": 9.564147040444741e-07, + "loss": 4.7271, + "step": 18180 + }, + { + "epoch": 0.783908343024508, + "learning_rate": 9.563662220689952e-07, + "loss": 4.815, + "step": 18200 + }, + { + "epoch": 0.7847697807640953, + "learning_rate": 9.563177400935163e-07, + "loss": 4.5781, + "step": 18220 + }, + { + "epoch": 0.7856312185036827, + "learning_rate": 9.562692581180374e-07, + "loss": 4.5923, + "step": 18240 + }, + { + "epoch": 0.78649265624327, + "learning_rate": 9.562207761425584e-07, + "loss": 4.6544, + "step": 18260 + }, + { + "epoch": 0.7873540939828574, + "learning_rate": 9.561722941670797e-07, + "loss": 4.6061, + "step": 18280 + }, + { + "epoch": 0.7882155317224447, + "learning_rate": 9.561238121916008e-07, + "loss": 4.6143, + "step": 18300 + }, + { + "epoch": 0.7890769694620321, + "learning_rate": 9.560753302161219e-07, + "loss": 4.6983, + "step": 18320 + }, + { + "epoch": 0.7899384072016195, + "learning_rate": 9.56026848240643e-07, + "loss": 4.7628, + "step": 18340 + }, + { + "epoch": 0.7907998449412069, + "learning_rate": 9.55978366265164e-07, + "loss": 4.629, + "step": 18360 + }, + { + "epoch": 0.7916612826807943, + "learning_rate": 9.559298842896853e-07, + "loss": 4.643, + "step": 18380 + }, + { + "epoch": 0.7925227204203816, + "learning_rate": 9.558814023142061e-07, + "loss": 4.6185, + "step": 18400 + }, + { + "epoch": 0.793384158159969, + "learning_rate": 9.558329203387274e-07, + "loss": 4.5335, + "step": 18420 + }, + { + "epoch": 0.7942455958995563, + "learning_rate": 9.557844383632485e-07, + "loss": 4.6082, + "step": 18440 + }, + { + "epoch": 0.7951070336391437, + "learning_rate": 9.557359563877696e-07, + "loss": 4.5302, + "step": 18460 + }, + { + "epoch": 0.7959684713787311, + "learning_rate": 9.556874744122906e-07, + "loss": 4.9049, + "step": 18480 + }, + { + "epoch": 0.7968299091183185, + "learning_rate": 9.556389924368117e-07, + "loss": 4.5565, + "step": 18500 + }, + { + "epoch": 0.7976913468579059, + "learning_rate": 9.55590510461333e-07, + "loss": 4.5934, + "step": 18520 + }, + { + "epoch": 0.7985527845974932, + "learning_rate": 9.555420284858539e-07, + "loss": 4.498, + "step": 18540 + }, + { + "epoch": 0.7994142223370806, + "learning_rate": 9.554935465103751e-07, + "loss": 4.8571, + "step": 18560 + }, + { + "epoch": 0.800275660076668, + "learning_rate": 9.554450645348962e-07, + "loss": 4.5185, + "step": 18580 + }, + { + "epoch": 0.8011370978162553, + "learning_rate": 9.553965825594173e-07, + "loss": 4.5657, + "step": 18600 + }, + { + "epoch": 0.8019985355558427, + "learning_rate": 9.553481005839384e-07, + "loss": 4.4812, + "step": 18620 + }, + { + "epoch": 0.80285997329543, + "learning_rate": 9.552996186084594e-07, + "loss": 4.6503, + "step": 18640 + }, + { + "epoch": 0.8037214110350175, + "learning_rate": 9.552511366329807e-07, + "loss": 4.8931, + "step": 18660 + }, + { + "epoch": 0.8045828487746048, + "learning_rate": 9.552026546575018e-07, + "loss": 4.8269, + "step": 18680 + }, + { + "epoch": 0.8054442865141922, + "learning_rate": 9.551541726820228e-07, + "loss": 4.7274, + "step": 18700 + }, + { + "epoch": 0.8063057242537796, + "learning_rate": 9.55105690706544e-07, + "loss": 4.4974, + "step": 18720 + }, + { + "epoch": 0.8071671619933669, + "learning_rate": 9.55057208731065e-07, + "loss": 4.5903, + "step": 18740 + }, + { + "epoch": 0.8080285997329543, + "learning_rate": 9.550087267555863e-07, + "loss": 4.4657, + "step": 18760 + }, + { + "epoch": 0.8088900374725416, + "learning_rate": 9.549602447801071e-07, + "loss": 4.5626, + "step": 18780 + }, + { + "epoch": 0.8097514752121291, + "learning_rate": 9.549117628046284e-07, + "loss": 4.6358, + "step": 18800 + }, + { + "epoch": 0.8106129129517164, + "learning_rate": 9.548632808291495e-07, + "loss": 4.5517, + "step": 18820 + }, + { + "epoch": 0.8114743506913038, + "learning_rate": 9.548147988536706e-07, + "loss": 4.7087, + "step": 18840 + }, + { + "epoch": 0.8123357884308912, + "learning_rate": 9.547663168781916e-07, + "loss": 4.3492, + "step": 18860 + }, + { + "epoch": 0.8131972261704785, + "learning_rate": 9.547178349027127e-07, + "loss": 4.4344, + "step": 18880 + }, + { + "epoch": 0.8140586639100659, + "learning_rate": 9.54669352927234e-07, + "loss": 4.3568, + "step": 18900 + }, + { + "epoch": 0.8149201016496532, + "learning_rate": 9.54620870951755e-07, + "loss": 4.685, + "step": 18920 + }, + { + "epoch": 0.8157815393892407, + "learning_rate": 9.545723889762761e-07, + "loss": 4.6504, + "step": 18940 + }, + { + "epoch": 0.816642977128828, + "learning_rate": 9.545239070007974e-07, + "loss": 4.5637, + "step": 18960 + }, + { + "epoch": 0.8175044148684154, + "learning_rate": 9.544754250253183e-07, + "loss": 4.4504, + "step": 18980 + }, + { + "epoch": 0.8183658526080028, + "learning_rate": 9.544269430498396e-07, + "loss": 4.6673, + "step": 19000 + }, + { + "epoch": 0.8192272903475901, + "learning_rate": 9.543784610743604e-07, + "loss": 4.7172, + "step": 19020 + }, + { + "epoch": 0.8200887280871775, + "learning_rate": 9.543299790988817e-07, + "loss": 4.6677, + "step": 19040 + }, + { + "epoch": 0.8209501658267648, + "learning_rate": 9.542814971234028e-07, + "loss": 4.5032, + "step": 19060 + }, + { + "epoch": 0.8218116035663522, + "learning_rate": 9.542330151479238e-07, + "loss": 4.6686, + "step": 19080 + }, + { + "epoch": 0.8226730413059397, + "learning_rate": 9.54184533172445e-07, + "loss": 4.5099, + "step": 19100 + }, + { + "epoch": 0.823534479045527, + "learning_rate": 9.54136051196966e-07, + "loss": 4.5959, + "step": 19120 + }, + { + "epoch": 0.8243959167851144, + "learning_rate": 9.540875692214873e-07, + "loss": 4.757, + "step": 19140 + }, + { + "epoch": 0.8252573545247017, + "learning_rate": 9.540390872460081e-07, + "loss": 4.6209, + "step": 19160 + }, + { + "epoch": 0.8261187922642891, + "learning_rate": 9.539906052705294e-07, + "loss": 4.6562, + "step": 19180 + }, + { + "epoch": 0.8269802300038764, + "learning_rate": 9.539421232950505e-07, + "loss": 4.5569, + "step": 19200 + }, + { + "epoch": 0.8278416677434638, + "learning_rate": 9.538936413195716e-07, + "loss": 4.5734, + "step": 19220 + }, + { + "epoch": 0.8287031054830513, + "learning_rate": 9.538451593440926e-07, + "loss": 4.4665, + "step": 19240 + }, + { + "epoch": 0.8295645432226386, + "learning_rate": 9.537966773686137e-07, + "loss": 4.8744, + "step": 19260 + }, + { + "epoch": 0.830425980962226, + "learning_rate": 9.53748195393135e-07, + "loss": 4.7477, + "step": 19280 + }, + { + "epoch": 0.8312874187018133, + "learning_rate": 9.53699713417656e-07, + "loss": 4.4835, + "step": 19300 + }, + { + "epoch": 0.8321488564414007, + "learning_rate": 9.536512314421771e-07, + "loss": 4.7603, + "step": 19320 + }, + { + "epoch": 0.833010294180988, + "learning_rate": 9.536027494666983e-07, + "loss": 4.6376, + "step": 19340 + }, + { + "epoch": 0.8338717319205754, + "learning_rate": 9.535542674912193e-07, + "loss": 4.688, + "step": 19360 + }, + { + "epoch": 0.8347331696601629, + "learning_rate": 9.535057855157404e-07, + "loss": 4.4329, + "step": 19380 + }, + { + "epoch": 0.8355946073997502, + "learning_rate": 9.534573035402615e-07, + "loss": 4.7792, + "step": 19400 + }, + { + "epoch": 0.8364560451393376, + "learning_rate": 9.534088215647826e-07, + "loss": 4.6019, + "step": 19420 + }, + { + "epoch": 0.8373174828789249, + "learning_rate": 9.533603395893038e-07, + "loss": 4.6412, + "step": 19440 + }, + { + "epoch": 0.8381789206185123, + "learning_rate": 9.533118576138249e-07, + "loss": 4.8209, + "step": 19460 + }, + { + "epoch": 0.8390403583580996, + "learning_rate": 9.532633756383459e-07, + "loss": 4.7257, + "step": 19480 + }, + { + "epoch": 0.839901796097687, + "learning_rate": 9.532148936628671e-07, + "loss": 4.5974, + "step": 19500 + }, + { + "epoch": 0.8407632338372744, + "learning_rate": 9.531664116873882e-07, + "loss": 4.7499, + "step": 19520 + }, + { + "epoch": 0.8416246715768618, + "learning_rate": 9.531179297119093e-07, + "loss": 4.8645, + "step": 19540 + }, + { + "epoch": 0.8424861093164492, + "learning_rate": 9.530694477364304e-07, + "loss": 4.5787, + "step": 19560 + }, + { + "epoch": 0.8433475470560365, + "learning_rate": 9.530209657609516e-07, + "loss": 4.4854, + "step": 19580 + }, + { + "epoch": 0.8442089847956239, + "learning_rate": 9.529724837854727e-07, + "loss": 4.7621, + "step": 19600 + }, + { + "epoch": 0.8450704225352113, + "learning_rate": 9.529240018099936e-07, + "loss": 4.7263, + "step": 19620 + }, + { + "epoch": 0.8459318602747986, + "learning_rate": 9.528755198345148e-07, + "loss": 4.7101, + "step": 19640 + }, + { + "epoch": 0.846793298014386, + "learning_rate": 9.528270378590359e-07, + "loss": 4.3076, + "step": 19660 + }, + { + "epoch": 0.8476547357539734, + "learning_rate": 9.52778555883557e-07, + "loss": 4.5904, + "step": 19680 + }, + { + "epoch": 0.8485161734935608, + "learning_rate": 9.527300739080781e-07, + "loss": 4.4537, + "step": 19700 + }, + { + "epoch": 0.8493776112331481, + "learning_rate": 9.526815919325993e-07, + "loss": 4.5525, + "step": 19720 + }, + { + "epoch": 0.8502390489727355, + "learning_rate": 9.526331099571202e-07, + "loss": 4.4868, + "step": 19740 + }, + { + "epoch": 0.8511004867123229, + "learning_rate": 9.525846279816414e-07, + "loss": 4.7816, + "step": 19760 + }, + { + "epoch": 0.8519619244519102, + "learning_rate": 9.525361460061625e-07, + "loss": 4.5658, + "step": 19780 + }, + { + "epoch": 0.8528233621914976, + "learning_rate": 9.524876640306837e-07, + "loss": 4.574, + "step": 19800 + }, + { + "epoch": 0.8536847999310849, + "learning_rate": 9.524391820552048e-07, + "loss": 4.7047, + "step": 19820 + }, + { + "epoch": 0.8545462376706724, + "learning_rate": 9.523907000797259e-07, + "loss": 4.353, + "step": 19840 + }, + { + "epoch": 0.8554076754102597, + "learning_rate": 9.523422181042469e-07, + "loss": 4.6372, + "step": 19860 + }, + { + "epoch": 0.8562691131498471, + "learning_rate": 9.522937361287681e-07, + "loss": 4.563, + "step": 19880 + }, + { + "epoch": 0.8571305508894345, + "learning_rate": 9.522452541532892e-07, + "loss": 4.7784, + "step": 19900 + }, + { + "epoch": 0.8579919886290218, + "learning_rate": 9.521967721778103e-07, + "loss": 4.8083, + "step": 19920 + }, + { + "epoch": 0.8588534263686092, + "learning_rate": 9.521482902023314e-07, + "loss": 4.7428, + "step": 19940 + }, + { + "epoch": 0.8597148641081965, + "learning_rate": 9.520998082268526e-07, + "loss": 4.6361, + "step": 19960 + }, + { + "epoch": 0.860576301847784, + "learning_rate": 9.520513262513736e-07, + "loss": 4.2954, + "step": 19980 + }, + { + "epoch": 0.8614377395873714, + "learning_rate": 9.520028442758947e-07, + "loss": 4.3637, + "step": 20000 + }, + { + "epoch": 0.8622991773269587, + "learning_rate": 9.519543623004158e-07, + "loss": 4.6597, + "step": 20020 + }, + { + "epoch": 0.8631606150665461, + "learning_rate": 9.51905880324937e-07, + "loss": 4.5807, + "step": 20040 + }, + { + "epoch": 0.8640220528061334, + "learning_rate": 9.51857398349458e-07, + "loss": 4.4035, + "step": 20060 + }, + { + "epoch": 0.8648834905457208, + "learning_rate": 9.518089163739792e-07, + "loss": 4.5505, + "step": 20080 + }, + { + "epoch": 0.8657449282853081, + "learning_rate": 9.517604343985003e-07, + "loss": 4.4001, + "step": 20100 + }, + { + "epoch": 0.8666063660248956, + "learning_rate": 9.517119524230214e-07, + "loss": 4.5568, + "step": 20120 + }, + { + "epoch": 0.867467803764483, + "learning_rate": 9.516634704475424e-07, + "loss": 4.5367, + "step": 20140 + }, + { + "epoch": 0.8683292415040703, + "learning_rate": 9.516149884720635e-07, + "loss": 4.1851, + "step": 20160 + }, + { + "epoch": 0.8691906792436577, + "learning_rate": 9.515665064965847e-07, + "loss": 4.6914, + "step": 20180 + }, + { + "epoch": 0.870052116983245, + "learning_rate": 9.515180245211058e-07, + "loss": 4.4509, + "step": 20200 + }, + { + "epoch": 0.8709135547228324, + "learning_rate": 9.514695425456269e-07, + "loss": 4.6518, + "step": 20220 + }, + { + "epoch": 0.8717749924624197, + "learning_rate": 9.51421060570148e-07, + "loss": 4.5476, + "step": 20240 + }, + { + "epoch": 0.8726364302020071, + "learning_rate": 9.51372578594669e-07, + "loss": 4.4285, + "step": 20260 + }, + { + "epoch": 0.8734978679415946, + "learning_rate": 9.513240966191901e-07, + "loss": 4.4528, + "step": 20280 + }, + { + "epoch": 0.8743593056811819, + "learning_rate": 9.512756146437113e-07, + "loss": 4.4412, + "step": 20300 + }, + { + "epoch": 0.8752207434207693, + "learning_rate": 9.512271326682324e-07, + "loss": 4.6117, + "step": 20320 + }, + { + "epoch": 0.8760821811603566, + "learning_rate": 9.511786506927536e-07, + "loss": 4.5079, + "step": 20340 + }, + { + "epoch": 0.876943618899944, + "learning_rate": 9.511301687172747e-07, + "loss": 4.5531, + "step": 20360 + }, + { + "epoch": 0.8778050566395313, + "learning_rate": 9.510816867417957e-07, + "loss": 4.5672, + "step": 20380 + }, + { + "epoch": 0.8786664943791187, + "learning_rate": 9.510332047663168e-07, + "loss": 4.581, + "step": 20400 + }, + { + "epoch": 0.8795279321187062, + "learning_rate": 9.50984722790838e-07, + "loss": 4.8276, + "step": 20420 + }, + { + "epoch": 0.8803893698582935, + "learning_rate": 9.50936240815359e-07, + "loss": 4.5643, + "step": 20440 + }, + { + "epoch": 0.8812508075978809, + "learning_rate": 9.508877588398802e-07, + "loss": 4.7807, + "step": 20460 + }, + { + "epoch": 0.8821122453374682, + "learning_rate": 9.508392768644013e-07, + "loss": 4.5882, + "step": 20480 + }, + { + "epoch": 0.8829736830770556, + "learning_rate": 9.507907948889224e-07, + "loss": 4.5565, + "step": 20500 + }, + { + "epoch": 0.883835120816643, + "learning_rate": 9.507423129134434e-07, + "loss": 4.3398, + "step": 20520 + }, + { + "epoch": 0.8846965585562303, + "learning_rate": 9.506938309379646e-07, + "loss": 4.6346, + "step": 20540 + }, + { + "epoch": 0.8855579962958178, + "learning_rate": 9.506453489624857e-07, + "loss": 4.7336, + "step": 20560 + }, + { + "epoch": 0.8864194340354051, + "learning_rate": 9.505968669870069e-07, + "loss": 4.6375, + "step": 20580 + }, + { + "epoch": 0.8872808717749925, + "learning_rate": 9.505483850115279e-07, + "loss": 4.1946, + "step": 20600 + }, + { + "epoch": 0.8881423095145798, + "learning_rate": 9.50499903036049e-07, + "loss": 4.5503, + "step": 20620 + }, + { + "epoch": 0.8890037472541672, + "learning_rate": 9.504514210605701e-07, + "loss": 4.6196, + "step": 20640 + }, + { + "epoch": 0.8898651849937546, + "learning_rate": 9.504029390850912e-07, + "loss": 4.657, + "step": 20660 + }, + { + "epoch": 0.8907266227333419, + "learning_rate": 9.503544571096123e-07, + "loss": 4.7372, + "step": 20680 + }, + { + "epoch": 0.8915880604729293, + "learning_rate": 9.503059751341334e-07, + "loss": 4.7459, + "step": 20700 + }, + { + "epoch": 0.8924494982125167, + "learning_rate": 9.502574931586546e-07, + "loss": 4.275, + "step": 20720 + }, + { + "epoch": 0.8933109359521041, + "learning_rate": 9.502090111831756e-07, + "loss": 4.4509, + "step": 20740 + }, + { + "epoch": 0.8941723736916914, + "learning_rate": 9.501605292076967e-07, + "loss": 4.8113, + "step": 20760 + }, + { + "epoch": 0.8950338114312788, + "learning_rate": 9.501120472322178e-07, + "loss": 4.5494, + "step": 20780 + }, + { + "epoch": 0.8958952491708662, + "learning_rate": 9.50063565256739e-07, + "loss": 4.4905, + "step": 20800 + }, + { + "epoch": 0.8967566869104535, + "learning_rate": 9.5001508328126e-07, + "loss": 4.5792, + "step": 20820 + }, + { + "epoch": 0.8976181246500409, + "learning_rate": 9.499666013057812e-07, + "loss": 4.3705, + "step": 20840 + }, + { + "epoch": 0.8984795623896283, + "learning_rate": 9.499181193303023e-07, + "loss": 4.5957, + "step": 20860 + }, + { + "epoch": 0.8993410001292157, + "learning_rate": 9.498696373548233e-07, + "loss": 4.5509, + "step": 20880 + }, + { + "epoch": 0.900202437868803, + "learning_rate": 9.498211553793444e-07, + "loss": 4.6055, + "step": 20900 + }, + { + "epoch": 0.9010638756083904, + "learning_rate": 9.497726734038656e-07, + "loss": 4.5213, + "step": 20920 + }, + { + "epoch": 0.9019253133479778, + "learning_rate": 9.497241914283867e-07, + "loss": 4.5847, + "step": 20940 + }, + { + "epoch": 0.9027867510875651, + "learning_rate": 9.496757094529078e-07, + "loss": 4.2846, + "step": 20960 + }, + { + "epoch": 0.9036481888271525, + "learning_rate": 9.496272274774289e-07, + "loss": 4.6234, + "step": 20980 + }, + { + "epoch": 0.9045096265667399, + "learning_rate": 9.4957874550195e-07, + "loss": 4.6808, + "step": 21000 + }, + { + "epoch": 0.9053710643063273, + "learning_rate": 9.495302635264711e-07, + "loss": 4.3551, + "step": 21020 + }, + { + "epoch": 0.9062325020459147, + "learning_rate": 9.494817815509922e-07, + "loss": 4.738, + "step": 21040 + }, + { + "epoch": 0.907093939785502, + "learning_rate": 9.494332995755133e-07, + "loss": 4.7525, + "step": 21060 + }, + { + "epoch": 0.9079553775250894, + "learning_rate": 9.493848176000345e-07, + "loss": 4.4851, + "step": 21080 + }, + { + "epoch": 0.9088168152646767, + "learning_rate": 9.493363356245556e-07, + "loss": 4.5402, + "step": 21100 + }, + { + "epoch": 0.9096782530042641, + "learning_rate": 9.492878536490767e-07, + "loss": 4.5958, + "step": 21120 + }, + { + "epoch": 0.9105396907438514, + "learning_rate": 9.492393716735977e-07, + "loss": 4.5298, + "step": 21140 + }, + { + "epoch": 0.9114011284834389, + "learning_rate": 9.491908896981189e-07, + "loss": 4.3762, + "step": 21160 + }, + { + "epoch": 0.9122625662230263, + "learning_rate": 9.4914240772264e-07, + "loss": 4.5817, + "step": 21180 + }, + { + "epoch": 0.9131240039626136, + "learning_rate": 9.49093925747161e-07, + "loss": 4.6998, + "step": 21200 + }, + { + "epoch": 0.913985441702201, + "learning_rate": 9.490454437716822e-07, + "loss": 4.6316, + "step": 21220 + }, + { + "epoch": 0.9148468794417883, + "learning_rate": 9.489969617962033e-07, + "loss": 4.5339, + "step": 21240 + }, + { + "epoch": 0.9157083171813757, + "learning_rate": 9.489484798207243e-07, + "loss": 4.5119, + "step": 21260 + }, + { + "epoch": 0.916569754920963, + "learning_rate": 9.488999978452454e-07, + "loss": 4.674, + "step": 21280 + }, + { + "epoch": 0.9174311926605505, + "learning_rate": 9.488515158697666e-07, + "loss": 4.5183, + "step": 21300 + }, + { + "epoch": 0.9182926304001379, + "learning_rate": 9.488030338942877e-07, + "loss": 4.7068, + "step": 21320 + }, + { + "epoch": 0.9191540681397252, + "learning_rate": 9.487545519188088e-07, + "loss": 4.4489, + "step": 21340 + }, + { + "epoch": 0.9200155058793126, + "learning_rate": 9.487060699433299e-07, + "loss": 4.7522, + "step": 21360 + }, + { + "epoch": 0.9208769436188999, + "learning_rate": 9.486575879678511e-07, + "loss": 4.4866, + "step": 21380 + }, + { + "epoch": 0.9217383813584873, + "learning_rate": 9.486091059923721e-07, + "loss": 4.4634, + "step": 21400 + }, + { + "epoch": 0.9225998190980746, + "learning_rate": 9.485606240168932e-07, + "loss": 4.6163, + "step": 21420 + }, + { + "epoch": 0.923461256837662, + "learning_rate": 9.485121420414143e-07, + "loss": 4.6412, + "step": 21440 + }, + { + "epoch": 0.9243226945772495, + "learning_rate": 9.484636600659355e-07, + "loss": 4.5538, + "step": 21460 + }, + { + "epoch": 0.9251841323168368, + "learning_rate": 9.484151780904566e-07, + "loss": 4.5306, + "step": 21480 + }, + { + "epoch": 0.9260455700564242, + "learning_rate": 9.483666961149777e-07, + "loss": 4.94, + "step": 21500 + }, + { + "epoch": 0.9269070077960115, + "learning_rate": 9.483182141394986e-07, + "loss": 4.3409, + "step": 21520 + }, + { + "epoch": 0.9277684455355989, + "learning_rate": 9.482697321640199e-07, + "loss": 4.9327, + "step": 21540 + }, + { + "epoch": 0.9286298832751863, + "learning_rate": 9.482212501885409e-07, + "loss": 4.6211, + "step": 21560 + }, + { + "epoch": 0.9294913210147736, + "learning_rate": 9.481727682130621e-07, + "loss": 4.3866, + "step": 21580 + }, + { + "epoch": 0.9303527587543611, + "learning_rate": 9.481242862375832e-07, + "loss": 4.4892, + "step": 21600 + }, + { + "epoch": 0.9312141964939484, + "learning_rate": 9.480758042621044e-07, + "loss": 4.7242, + "step": 21620 + }, + { + "epoch": 0.9320756342335358, + "learning_rate": 9.480273222866253e-07, + "loss": 4.789, + "step": 21640 + }, + { + "epoch": 0.9329370719731231, + "learning_rate": 9.479788403111465e-07, + "loss": 4.5236, + "step": 21660 + }, + { + "epoch": 0.9337985097127105, + "learning_rate": 9.479303583356676e-07, + "loss": 4.9787, + "step": 21680 + }, + { + "epoch": 0.9346599474522979, + "learning_rate": 9.478818763601888e-07, + "loss": 4.6006, + "step": 21700 + }, + { + "epoch": 0.9355213851918852, + "learning_rate": 9.478333943847098e-07, + "loss": 4.5336, + "step": 21720 + }, + { + "epoch": 0.9363828229314727, + "learning_rate": 9.47784912409231e-07, + "loss": 4.6637, + "step": 21740 + }, + { + "epoch": 0.93724426067106, + "learning_rate": 9.477364304337521e-07, + "loss": 4.5355, + "step": 21760 + }, + { + "epoch": 0.9381056984106474, + "learning_rate": 9.47687948458273e-07, + "loss": 4.6314, + "step": 21780 + }, + { + "epoch": 0.9389671361502347, + "learning_rate": 9.476394664827942e-07, + "loss": 4.6773, + "step": 21800 + }, + { + "epoch": 0.9398285738898221, + "learning_rate": 9.475909845073153e-07, + "loss": 4.7378, + "step": 21820 + }, + { + "epoch": 0.9406900116294095, + "learning_rate": 9.475425025318365e-07, + "loss": 4.3205, + "step": 21840 + }, + { + "epoch": 0.9415514493689968, + "learning_rate": 9.474940205563575e-07, + "loss": 4.6247, + "step": 21860 + }, + { + "epoch": 0.9424128871085842, + "learning_rate": 9.474455385808787e-07, + "loss": 4.4743, + "step": 21880 + }, + { + "epoch": 0.9432743248481716, + "learning_rate": 9.473970566053997e-07, + "loss": 4.5955, + "step": 21900 + }, + { + "epoch": 0.944135762587759, + "learning_rate": 9.473485746299209e-07, + "loss": 4.4757, + "step": 21920 + }, + { + "epoch": 0.9449972003273464, + "learning_rate": 9.473000926544419e-07, + "loss": 4.6824, + "step": 21940 + }, + { + "epoch": 0.9458586380669337, + "learning_rate": 9.472516106789631e-07, + "loss": 4.4245, + "step": 21960 + }, + { + "epoch": 0.9467200758065211, + "learning_rate": 9.472031287034842e-07, + "loss": 4.7057, + "step": 21980 + }, + { + "epoch": 0.9475815135461084, + "learning_rate": 9.471546467280054e-07, + "loss": 4.2502, + "step": 22000 + }, + { + "epoch": 0.9484429512856958, + "learning_rate": 9.471061647525264e-07, + "loss": 4.4566, + "step": 22020 + }, + { + "epoch": 0.9493043890252832, + "learning_rate": 9.470576827770474e-07, + "loss": 4.3403, + "step": 22040 + }, + { + "epoch": 0.9501658267648706, + "learning_rate": 9.470092008015686e-07, + "loss": 4.7907, + "step": 22060 + }, + { + "epoch": 0.951027264504458, + "learning_rate": 9.469607188260898e-07, + "loss": 4.5686, + "step": 22080 + }, + { + "epoch": 0.9518887022440453, + "learning_rate": 9.469122368506108e-07, + "loss": 4.4133, + "step": 22100 + }, + { + "epoch": 0.9527501399836327, + "learning_rate": 9.46863754875132e-07, + "loss": 4.5255, + "step": 22120 + }, + { + "epoch": 0.95361157772322, + "learning_rate": 9.468152728996532e-07, + "loss": 4.5422, + "step": 22140 + }, + { + "epoch": 0.9544730154628074, + "learning_rate": 9.467667909241741e-07, + "loss": 4.4569, + "step": 22160 + }, + { + "epoch": 0.9553344532023949, + "learning_rate": 9.467183089486952e-07, + "loss": 4.7992, + "step": 22180 + }, + { + "epoch": 0.9561958909419822, + "learning_rate": 9.466698269732164e-07, + "loss": 4.4247, + "step": 22200 + }, + { + "epoch": 0.9570573286815696, + "learning_rate": 9.466213449977375e-07, + "loss": 4.6975, + "step": 22220 + }, + { + "epoch": 0.9579187664211569, + "learning_rate": 9.465728630222586e-07, + "loss": 4.6951, + "step": 22240 + }, + { + "epoch": 0.9587802041607443, + "learning_rate": 9.465243810467797e-07, + "loss": 4.6245, + "step": 22260 + }, + { + "epoch": 0.9596416419003316, + "learning_rate": 9.464758990713008e-07, + "loss": 4.4001, + "step": 22280 + }, + { + "epoch": 0.960503079639919, + "learning_rate": 9.464274170958219e-07, + "loss": 4.4475, + "step": 22300 + }, + { + "epoch": 0.9613645173795063, + "learning_rate": 9.463789351203429e-07, + "loss": 4.576, + "step": 22320 + }, + { + "epoch": 0.9622259551190938, + "learning_rate": 9.463304531448641e-07, + "loss": 4.5425, + "step": 22340 + }, + { + "epoch": 0.9630873928586812, + "learning_rate": 9.462819711693852e-07, + "loss": 4.6569, + "step": 22360 + }, + { + "epoch": 0.9639488305982685, + "learning_rate": 9.462334891939064e-07, + "loss": 4.7177, + "step": 22380 + }, + { + "epoch": 0.9648102683378559, + "learning_rate": 9.461850072184274e-07, + "loss": 4.5029, + "step": 22400 + }, + { + "epoch": 0.9656717060774432, + "learning_rate": 9.461365252429485e-07, + "loss": 4.5453, + "step": 22420 + }, + { + "epoch": 0.9665331438170306, + "learning_rate": 9.460880432674696e-07, + "loss": 4.3993, + "step": 22440 + }, + { + "epoch": 0.967394581556618, + "learning_rate": 9.460395612919907e-07, + "loss": 4.5315, + "step": 22460 + }, + { + "epoch": 0.9682560192962054, + "learning_rate": 9.459910793165118e-07, + "loss": 4.3262, + "step": 22480 + }, + { + "epoch": 0.9691174570357928, + "learning_rate": 9.45942597341033e-07, + "loss": 4.2996, + "step": 22500 + }, + { + "epoch": 0.9699788947753801, + "learning_rate": 9.458941153655541e-07, + "loss": 4.5598, + "step": 22520 + }, + { + "epoch": 0.9708403325149675, + "learning_rate": 9.458456333900751e-07, + "loss": 4.5351, + "step": 22540 + }, + { + "epoch": 0.9717017702545548, + "learning_rate": 9.457971514145962e-07, + "loss": 4.605, + "step": 22560 + }, + { + "epoch": 0.9725632079941422, + "learning_rate": 9.457486694391174e-07, + "loss": 4.4104, + "step": 22580 + }, + { + "epoch": 0.9734246457337296, + "learning_rate": 9.457001874636385e-07, + "loss": 4.5709, + "step": 22600 + }, + { + "epoch": 0.9742860834733169, + "learning_rate": 9.456517054881596e-07, + "loss": 4.475, + "step": 22620 + }, + { + "epoch": 0.9751475212129044, + "learning_rate": 9.456032235126807e-07, + "loss": 4.6957, + "step": 22640 + }, + { + "epoch": 0.9760089589524917, + "learning_rate": 9.455547415372018e-07, + "loss": 4.5696, + "step": 22660 + }, + { + "epoch": 0.9768703966920791, + "learning_rate": 9.455062595617229e-07, + "loss": 4.4252, + "step": 22680 + }, + { + "epoch": 0.9777318344316664, + "learning_rate": 9.45457777586244e-07, + "loss": 4.5615, + "step": 22700 + }, + { + "epoch": 0.9785932721712538, + "learning_rate": 9.454092956107651e-07, + "loss": 4.6245, + "step": 22720 + }, + { + "epoch": 0.9794547099108412, + "learning_rate": 9.453608136352863e-07, + "loss": 4.754, + "step": 22740 + }, + { + "epoch": 0.9803161476504285, + "learning_rate": 9.453123316598073e-07, + "loss": 4.5483, + "step": 22760 + }, + { + "epoch": 0.981177585390016, + "learning_rate": 9.452638496843284e-07, + "loss": 4.817, + "step": 22780 + }, + { + "epoch": 0.9820390231296033, + "learning_rate": 9.452153677088495e-07, + "loss": 4.5536, + "step": 22800 + }, + { + "epoch": 0.9829004608691907, + "learning_rate": 9.451668857333707e-07, + "loss": 4.6521, + "step": 22820 + }, + { + "epoch": 0.983761898608778, + "learning_rate": 9.451184037578917e-07, + "loss": 4.6244, + "step": 22840 + }, + { + "epoch": 0.9846233363483654, + "learning_rate": 9.450699217824128e-07, + "loss": 4.7698, + "step": 22860 + }, + { + "epoch": 0.9854847740879528, + "learning_rate": 9.45021439806934e-07, + "loss": 4.5851, + "step": 22880 + }, + { + "epoch": 0.9863462118275401, + "learning_rate": 9.449729578314551e-07, + "loss": 4.3717, + "step": 22900 + }, + { + "epoch": 0.9872076495671276, + "learning_rate": 9.449244758559761e-07, + "loss": 4.7009, + "step": 22920 + }, + { + "epoch": 0.9880690873067149, + "learning_rate": 9.448759938804972e-07, + "loss": 4.5667, + "step": 22940 + }, + { + "epoch": 0.9889305250463023, + "learning_rate": 9.448275119050184e-07, + "loss": 4.7279, + "step": 22960 + }, + { + "epoch": 0.9897919627858897, + "learning_rate": 9.447790299295395e-07, + "loss": 4.6005, + "step": 22980 + }, + { + "epoch": 0.990653400525477, + "learning_rate": 9.447305479540606e-07, + "loss": 4.875, + "step": 23000 + }, + { + "epoch": 0.9915148382650644, + "learning_rate": 9.446820659785817e-07, + "loss": 4.6195, + "step": 23020 + }, + { + "epoch": 0.9923762760046517, + "learning_rate": 9.446335840031028e-07, + "loss": 4.3456, + "step": 23040 + }, + { + "epoch": 0.9932377137442391, + "learning_rate": 9.445851020276238e-07, + "loss": 4.5356, + "step": 23060 + }, + { + "epoch": 0.9940991514838265, + "learning_rate": 9.44536620052145e-07, + "loss": 4.5905, + "step": 23080 + }, + { + "epoch": 0.9949605892234139, + "learning_rate": 9.444881380766661e-07, + "loss": 4.6106, + "step": 23100 + }, + { + "epoch": 0.9958220269630013, + "learning_rate": 9.444396561011873e-07, + "loss": 4.443, + "step": 23120 + }, + { + "epoch": 0.9966834647025886, + "learning_rate": 9.443911741257083e-07, + "loss": 4.7554, + "step": 23140 + }, + { + "epoch": 0.997544902442176, + "learning_rate": 9.443426921502295e-07, + "loss": 4.4825, + "step": 23160 + }, + { + "epoch": 0.9984063401817633, + "learning_rate": 9.442942101747505e-07, + "loss": 4.8178, + "step": 23180 + }, + { + "epoch": 0.9992677779213507, + "learning_rate": 9.442457281992717e-07, + "loss": 4.51, + "step": 23200 + }, + { + "epoch": 1.000129215660938, + "learning_rate": 9.441972462237927e-07, + "loss": 4.6528, + "step": 23220 + }, + { + "epoch": 1.0009906534005255, + "learning_rate": 9.441487642483139e-07, + "loss": 4.6363, + "step": 23240 + }, + { + "epoch": 1.0018520911401128, + "learning_rate": 9.44100282272835e-07, + "loss": 4.398, + "step": 23260 + }, + { + "epoch": 1.0027135288797002, + "learning_rate": 9.440518002973562e-07, + "loss": 4.5253, + "step": 23280 + }, + { + "epoch": 1.0035749666192877, + "learning_rate": 9.44003318321877e-07, + "loss": 4.3179, + "step": 23300 + }, + { + "epoch": 1.004436404358875, + "learning_rate": 9.439548363463983e-07, + "loss": 4.6132, + "step": 23320 + }, + { + "epoch": 1.0052978420984624, + "learning_rate": 9.439063543709194e-07, + "loss": 4.3085, + "step": 23340 + }, + { + "epoch": 1.0061592798380496, + "learning_rate": 9.438578723954406e-07, + "loss": 4.6764, + "step": 23360 + }, + { + "epoch": 1.0070207175776371, + "learning_rate": 9.438093904199616e-07, + "loss": 4.6113, + "step": 23380 + }, + { + "epoch": 1.0078821553172244, + "learning_rate": 9.437609084444828e-07, + "loss": 4.7279, + "step": 23400 + }, + { + "epoch": 1.0087435930568118, + "learning_rate": 9.437124264690038e-07, + "loss": 4.4206, + "step": 23420 + }, + { + "epoch": 1.0096050307963993, + "learning_rate": 9.436639444935248e-07, + "loss": 4.5451, + "step": 23440 + }, + { + "epoch": 1.0104664685359865, + "learning_rate": 9.43615462518046e-07, + "loss": 4.6053, + "step": 23460 + }, + { + "epoch": 1.011327906275574, + "learning_rate": 9.435669805425671e-07, + "loss": 4.6269, + "step": 23480 + }, + { + "epoch": 1.0121893440151613, + "learning_rate": 9.435184985670883e-07, + "loss": 4.559, + "step": 23500 + }, + { + "epoch": 1.0130507817547487, + "learning_rate": 9.434700165916093e-07, + "loss": 4.4435, + "step": 23520 + }, + { + "epoch": 1.013912219494336, + "learning_rate": 9.434215346161305e-07, + "loss": 4.6021, + "step": 23540 + }, + { + "epoch": 1.0147736572339234, + "learning_rate": 9.433730526406515e-07, + "loss": 4.8653, + "step": 23560 + }, + { + "epoch": 1.015635094973511, + "learning_rate": 9.433245706651727e-07, + "loss": 4.6137, + "step": 23580 + }, + { + "epoch": 1.0164965327130981, + "learning_rate": 9.432760886896937e-07, + "loss": 4.4127, + "step": 23600 + }, + { + "epoch": 1.0173579704526856, + "learning_rate": 9.432276067142149e-07, + "loss": 4.43, + "step": 23620 + }, + { + "epoch": 1.0182194081922729, + "learning_rate": 9.43179124738736e-07, + "loss": 4.5782, + "step": 23640 + }, + { + "epoch": 1.0190808459318603, + "learning_rate": 9.431306427632572e-07, + "loss": 4.6049, + "step": 23660 + }, + { + "epoch": 1.0199422836714476, + "learning_rate": 9.430821607877781e-07, + "loss": 4.6632, + "step": 23680 + }, + { + "epoch": 1.020803721411035, + "learning_rate": 9.430336788122993e-07, + "loss": 4.3574, + "step": 23700 + }, + { + "epoch": 1.0216651591506223, + "learning_rate": 9.429851968368204e-07, + "loss": 4.7483, + "step": 23720 + }, + { + "epoch": 1.0225265968902097, + "learning_rate": 9.429367148613415e-07, + "loss": 4.4544, + "step": 23740 + }, + { + "epoch": 1.0233880346297972, + "learning_rate": 9.428882328858626e-07, + "loss": 4.652, + "step": 23760 + }, + { + "epoch": 1.0242494723693845, + "learning_rate": 9.428397509103838e-07, + "loss": 4.7933, + "step": 23780 + }, + { + "epoch": 1.025110910108972, + "learning_rate": 9.427912689349049e-07, + "loss": 4.6546, + "step": 23800 + }, + { + "epoch": 1.0259723478485592, + "learning_rate": 9.427427869594258e-07, + "loss": 4.5992, + "step": 23820 + }, + { + "epoch": 1.0268337855881466, + "learning_rate": 9.42694304983947e-07, + "loss": 4.3489, + "step": 23840 + }, + { + "epoch": 1.0276952233277339, + "learning_rate": 9.426458230084681e-07, + "loss": 4.5493, + "step": 23860 + }, + { + "epoch": 1.0285566610673214, + "learning_rate": 9.425973410329893e-07, + "loss": 4.4427, + "step": 23880 + }, + { + "epoch": 1.0294180988069088, + "learning_rate": 9.425488590575104e-07, + "loss": 4.4732, + "step": 23900 + }, + { + "epoch": 1.030279536546496, + "learning_rate": 9.425003770820316e-07, + "loss": 4.4412, + "step": 23920 + }, + { + "epoch": 1.0311409742860835, + "learning_rate": 9.424518951065525e-07, + "loss": 4.3106, + "step": 23940 + }, + { + "epoch": 1.0320024120256708, + "learning_rate": 9.424034131310737e-07, + "loss": 4.5784, + "step": 23960 + }, + { + "epoch": 1.0328638497652582, + "learning_rate": 9.423549311555947e-07, + "loss": 4.5837, + "step": 23980 + }, + { + "epoch": 1.0337252875048455, + "learning_rate": 9.423064491801159e-07, + "loss": 4.451, + "step": 24000 + }, + { + "epoch": 1.034586725244433, + "learning_rate": 9.42257967204637e-07, + "loss": 4.52, + "step": 24020 + }, + { + "epoch": 1.0354481629840204, + "learning_rate": 9.422094852291581e-07, + "loss": 4.2861, + "step": 24040 + }, + { + "epoch": 1.0363096007236077, + "learning_rate": 9.421610032536791e-07, + "loss": 4.5557, + "step": 24060 + }, + { + "epoch": 1.0371710384631951, + "learning_rate": 9.421125212782003e-07, + "loss": 4.807, + "step": 24080 + }, + { + "epoch": 1.0380324762027824, + "learning_rate": 9.420640393027214e-07, + "loss": 4.514, + "step": 24100 + }, + { + "epoch": 1.0388939139423699, + "learning_rate": 9.420155573272425e-07, + "loss": 4.4858, + "step": 24120 + }, + { + "epoch": 1.039755351681957, + "learning_rate": 9.419670753517636e-07, + "loss": 4.2943, + "step": 24140 + }, + { + "epoch": 1.0406167894215446, + "learning_rate": 9.419185933762848e-07, + "loss": 4.6211, + "step": 24160 + }, + { + "epoch": 1.041478227161132, + "learning_rate": 9.418701114008059e-07, + "loss": 4.7033, + "step": 24180 + }, + { + "epoch": 1.0423396649007193, + "learning_rate": 9.418216294253269e-07, + "loss": 4.5263, + "step": 24200 + }, + { + "epoch": 1.0432011026403067, + "learning_rate": 9.41773147449848e-07, + "loss": 4.4634, + "step": 24220 + }, + { + "epoch": 1.044062540379894, + "learning_rate": 9.417246654743692e-07, + "loss": 4.6139, + "step": 24240 + }, + { + "epoch": 1.0449239781194815, + "learning_rate": 9.416761834988903e-07, + "loss": 4.2911, + "step": 24260 + }, + { + "epoch": 1.0457854158590687, + "learning_rate": 9.416277015234114e-07, + "loss": 4.5205, + "step": 24280 + }, + { + "epoch": 1.0466468535986562, + "learning_rate": 9.415792195479325e-07, + "loss": 4.5224, + "step": 24300 + }, + { + "epoch": 1.0475082913382436, + "learning_rate": 9.415307375724536e-07, + "loss": 4.4512, + "step": 24320 + }, + { + "epoch": 1.0483697290778309, + "learning_rate": 9.414822555969746e-07, + "loss": 4.7147, + "step": 24340 + }, + { + "epoch": 1.0492311668174183, + "learning_rate": 9.414337736214958e-07, + "loss": 4.7131, + "step": 24360 + }, + { + "epoch": 1.0500926045570056, + "learning_rate": 9.413852916460169e-07, + "loss": 4.4743, + "step": 24380 + }, + { + "epoch": 1.050954042296593, + "learning_rate": 9.41336809670538e-07, + "loss": 4.5406, + "step": 24400 + }, + { + "epoch": 1.0518154800361803, + "learning_rate": 9.412883276950591e-07, + "loss": 4.5443, + "step": 24420 + }, + { + "epoch": 1.0526769177757678, + "learning_rate": 9.412398457195802e-07, + "loss": 4.6274, + "step": 24440 + }, + { + "epoch": 1.053538355515355, + "learning_rate": 9.411913637441013e-07, + "loss": 4.6099, + "step": 24460 + }, + { + "epoch": 1.0543997932549425, + "learning_rate": 9.411428817686224e-07, + "loss": 4.5776, + "step": 24480 + }, + { + "epoch": 1.05526123099453, + "learning_rate": 9.410943997931435e-07, + "loss": 4.6551, + "step": 24500 + }, + { + "epoch": 1.0561226687341172, + "learning_rate": 9.410459178176646e-07, + "loss": 4.713, + "step": 24520 + }, + { + "epoch": 1.0569841064737047, + "learning_rate": 9.409974358421858e-07, + "loss": 4.6046, + "step": 24540 + }, + { + "epoch": 1.057845544213292, + "learning_rate": 9.409489538667069e-07, + "loss": 4.3717, + "step": 24560 + }, + { + "epoch": 1.0587069819528794, + "learning_rate": 9.409004718912279e-07, + "loss": 4.7505, + "step": 24580 + }, + { + "epoch": 1.0595684196924666, + "learning_rate": 9.40851989915749e-07, + "loss": 4.4285, + "step": 24600 + }, + { + "epoch": 1.060429857432054, + "learning_rate": 9.408035079402702e-07, + "loss": 4.3466, + "step": 24620 + }, + { + "epoch": 1.0612912951716416, + "learning_rate": 9.407550259647912e-07, + "loss": 4.7403, + "step": 24640 + }, + { + "epoch": 1.0621527329112288, + "learning_rate": 9.407065439893124e-07, + "loss": 4.5777, + "step": 24660 + }, + { + "epoch": 1.0630141706508163, + "learning_rate": 9.406580620138335e-07, + "loss": 4.4154, + "step": 24680 + }, + { + "epoch": 1.0638756083904035, + "learning_rate": 9.406095800383546e-07, + "loss": 4.6982, + "step": 24700 + }, + { + "epoch": 1.064737046129991, + "learning_rate": 9.405610980628756e-07, + "loss": 4.3179, + "step": 24720 + }, + { + "epoch": 1.0655984838695782, + "learning_rate": 9.405126160873968e-07, + "loss": 4.1368, + "step": 24740 + }, + { + "epoch": 1.0664599216091657, + "learning_rate": 9.404641341119179e-07, + "loss": 4.5744, + "step": 24760 + }, + { + "epoch": 1.0673213593487532, + "learning_rate": 9.404156521364391e-07, + "loss": 4.3126, + "step": 24780 + }, + { + "epoch": 1.0681827970883404, + "learning_rate": 9.403671701609601e-07, + "loss": 4.3316, + "step": 24800 + }, + { + "epoch": 1.0690442348279279, + "learning_rate": 9.403186881854812e-07, + "loss": 4.4101, + "step": 24820 + }, + { + "epoch": 1.0699056725675151, + "learning_rate": 9.402702062100023e-07, + "loss": 4.5575, + "step": 24840 + }, + { + "epoch": 1.0707671103071026, + "learning_rate": 9.402217242345235e-07, + "loss": 4.4639, + "step": 24860 + }, + { + "epoch": 1.0716285480466898, + "learning_rate": 9.401732422590445e-07, + "loss": 4.5161, + "step": 24880 + }, + { + "epoch": 1.0724899857862773, + "learning_rate": 9.401247602835657e-07, + "loss": 4.5473, + "step": 24900 + }, + { + "epoch": 1.0733514235258648, + "learning_rate": 9.400762783080868e-07, + "loss": 4.3648, + "step": 24920 + }, + { + "epoch": 1.074212861265452, + "learning_rate": 9.400277963326078e-07, + "loss": 4.6174, + "step": 24940 + }, + { + "epoch": 1.0750742990050395, + "learning_rate": 9.399793143571289e-07, + "loss": 4.697, + "step": 24960 + }, + { + "epoch": 1.0759357367446267, + "learning_rate": 9.399308323816501e-07, + "loss": 4.4841, + "step": 24980 + }, + { + "epoch": 1.0767971744842142, + "learning_rate": 9.398823504061712e-07, + "loss": 4.5219, + "step": 25000 + }, + { + "epoch": 1.0776586122238014, + "learning_rate": 9.398338684306922e-07, + "loss": 4.6514, + "step": 25020 + }, + { + "epoch": 1.078520049963389, + "learning_rate": 9.397853864552134e-07, + "loss": 4.4543, + "step": 25040 + }, + { + "epoch": 1.0793814877029764, + "learning_rate": 9.397369044797345e-07, + "loss": 4.5329, + "step": 25060 + }, + { + "epoch": 1.0802429254425636, + "learning_rate": 9.396884225042555e-07, + "loss": 4.7501, + "step": 25080 + }, + { + "epoch": 1.081104363182151, + "learning_rate": 9.396399405287766e-07, + "loss": 4.2067, + "step": 25100 + }, + { + "epoch": 1.0819658009217383, + "learning_rate": 9.395914585532978e-07, + "loss": 4.5308, + "step": 25120 + }, + { + "epoch": 1.0828272386613258, + "learning_rate": 9.395429765778189e-07, + "loss": 4.4915, + "step": 25140 + }, + { + "epoch": 1.083688676400913, + "learning_rate": 9.394944946023401e-07, + "loss": 4.5847, + "step": 25160 + }, + { + "epoch": 1.0845501141405005, + "learning_rate": 9.394460126268612e-07, + "loss": 4.4696, + "step": 25180 + }, + { + "epoch": 1.085411551880088, + "learning_rate": 9.393975306513822e-07, + "loss": 4.4294, + "step": 25200 + }, + { + "epoch": 1.0862729896196752, + "learning_rate": 9.393490486759033e-07, + "loss": 4.4869, + "step": 25220 + }, + { + "epoch": 1.0871344273592627, + "learning_rate": 9.393005667004245e-07, + "loss": 4.5689, + "step": 25240 + }, + { + "epoch": 1.08799586509885, + "learning_rate": 9.392520847249455e-07, + "loss": 4.4144, + "step": 25260 + }, + { + "epoch": 1.0888573028384374, + "learning_rate": 9.392036027494667e-07, + "loss": 4.4279, + "step": 25280 + }, + { + "epoch": 1.0897187405780246, + "learning_rate": 9.391551207739878e-07, + "loss": 4.5716, + "step": 25300 + }, + { + "epoch": 1.0905801783176121, + "learning_rate": 9.39106638798509e-07, + "loss": 4.4915, + "step": 25320 + }, + { + "epoch": 1.0914416160571996, + "learning_rate": 9.390581568230299e-07, + "loss": 4.3967, + "step": 25340 + }, + { + "epoch": 1.0923030537967868, + "learning_rate": 9.390096748475511e-07, + "loss": 4.5609, + "step": 25360 + }, + { + "epoch": 1.0931644915363743, + "learning_rate": 9.389611928720722e-07, + "loss": 4.4044, + "step": 25380 + }, + { + "epoch": 1.0940259292759615, + "learning_rate": 9.389127108965933e-07, + "loss": 4.5284, + "step": 25400 + }, + { + "epoch": 1.094887367015549, + "learning_rate": 9.388642289211144e-07, + "loss": 4.558, + "step": 25420 + }, + { + "epoch": 1.0957488047551363, + "learning_rate": 9.388157469456356e-07, + "loss": 4.647, + "step": 25440 + }, + { + "epoch": 1.0966102424947237, + "learning_rate": 9.387672649701566e-07, + "loss": 4.4125, + "step": 25460 + }, + { + "epoch": 1.097471680234311, + "learning_rate": 9.387187829946776e-07, + "loss": 4.3079, + "step": 25480 + }, + { + "epoch": 1.0983331179738984, + "learning_rate": 9.386703010191988e-07, + "loss": 4.5785, + "step": 25500 + }, + { + "epoch": 1.099194555713486, + "learning_rate": 9.3862181904372e-07, + "loss": 4.5369, + "step": 25520 + }, + { + "epoch": 1.1000559934530731, + "learning_rate": 9.38573337068241e-07, + "loss": 4.4112, + "step": 25540 + }, + { + "epoch": 1.1009174311926606, + "learning_rate": 9.385248550927621e-07, + "loss": 4.6571, + "step": 25560 + }, + { + "epoch": 1.1017788689322479, + "learning_rate": 9.384763731172833e-07, + "loss": 4.2386, + "step": 25580 + }, + { + "epoch": 1.1026403066718353, + "learning_rate": 9.384278911418043e-07, + "loss": 4.577, + "step": 25600 + }, + { + "epoch": 1.1035017444114226, + "learning_rate": 9.383794091663254e-07, + "loss": 4.623, + "step": 25620 + }, + { + "epoch": 1.10436318215101, + "learning_rate": 9.383309271908465e-07, + "loss": 4.6539, + "step": 25640 + }, + { + "epoch": 1.1052246198905975, + "learning_rate": 9.382824452153677e-07, + "loss": 4.5414, + "step": 25660 + }, + { + "epoch": 1.1060860576301847, + "learning_rate": 9.382339632398888e-07, + "loss": 4.6351, + "step": 25680 + }, + { + "epoch": 1.1069474953697722, + "learning_rate": 9.3818548126441e-07, + "loss": 4.5367, + "step": 25700 + }, + { + "epoch": 1.1078089331093595, + "learning_rate": 9.381369992889309e-07, + "loss": 4.403, + "step": 25720 + }, + { + "epoch": 1.108670370848947, + "learning_rate": 9.380885173134521e-07, + "loss": 4.4065, + "step": 25740 + }, + { + "epoch": 1.1095318085885342, + "learning_rate": 9.380400353379732e-07, + "loss": 4.4638, + "step": 25760 + }, + { + "epoch": 1.1103932463281216, + "learning_rate": 9.379915533624943e-07, + "loss": 4.5257, + "step": 25780 + }, + { + "epoch": 1.111254684067709, + "learning_rate": 9.379430713870154e-07, + "loss": 4.568, + "step": 25800 + }, + { + "epoch": 1.1121161218072964, + "learning_rate": 9.378945894115366e-07, + "loss": 4.4757, + "step": 25820 + }, + { + "epoch": 1.1129775595468838, + "learning_rate": 9.378461074360575e-07, + "loss": 4.4528, + "step": 25840 + }, + { + "epoch": 1.113838997286471, + "learning_rate": 9.377976254605787e-07, + "loss": 4.5492, + "step": 25860 + }, + { + "epoch": 1.1147004350260585, + "learning_rate": 9.377491434850998e-07, + "loss": 4.3896, + "step": 25880 + }, + { + "epoch": 1.1155618727656458, + "learning_rate": 9.37700661509621e-07, + "loss": 4.6046, + "step": 25900 + }, + { + "epoch": 1.1164233105052332, + "learning_rate": 9.37652179534142e-07, + "loss": 4.516, + "step": 25920 + }, + { + "epoch": 1.1172847482448205, + "learning_rate": 9.376036975586632e-07, + "loss": 4.4047, + "step": 25940 + }, + { + "epoch": 1.118146185984408, + "learning_rate": 9.375552155831843e-07, + "loss": 4.442, + "step": 25960 + }, + { + "epoch": 1.1190076237239954, + "learning_rate": 9.375067336077054e-07, + "loss": 4.5514, + "step": 25980 + }, + { + "epoch": 1.1198690614635827, + "learning_rate": 9.374582516322264e-07, + "loss": 4.9406, + "step": 26000 + }, + { + "epoch": 1.1207304992031701, + "learning_rate": 9.374097696567476e-07, + "loss": 4.5569, + "step": 26020 + }, + { + "epoch": 1.1215919369427574, + "learning_rate": 9.373612876812687e-07, + "loss": 4.5373, + "step": 26040 + }, + { + "epoch": 1.1224533746823449, + "learning_rate": 9.373128057057899e-07, + "loss": 4.4326, + "step": 26060 + }, + { + "epoch": 1.123314812421932, + "learning_rate": 9.372643237303109e-07, + "loss": 4.4376, + "step": 26080 + }, + { + "epoch": 1.1241762501615196, + "learning_rate": 9.372158417548319e-07, + "loss": 4.5073, + "step": 26100 + }, + { + "epoch": 1.125037687901107, + "learning_rate": 9.371673597793531e-07, + "loss": 4.5078, + "step": 26120 + }, + { + "epoch": 1.1258991256406943, + "learning_rate": 9.371188778038741e-07, + "loss": 4.3807, + "step": 26140 + }, + { + "epoch": 1.1267605633802817, + "learning_rate": 9.370703958283953e-07, + "loss": 4.5271, + "step": 26160 + }, + { + "epoch": 1.127622001119869, + "learning_rate": 9.370219138529164e-07, + "loss": 4.5432, + "step": 26180 + }, + { + "epoch": 1.1284834388594565, + "learning_rate": 9.369734318774376e-07, + "loss": 4.5553, + "step": 26200 + }, + { + "epoch": 1.1293448765990437, + "learning_rate": 9.369249499019586e-07, + "loss": 4.6125, + "step": 26220 + }, + { + "epoch": 1.1302063143386312, + "learning_rate": 9.368764679264797e-07, + "loss": 4.6217, + "step": 26240 + }, + { + "epoch": 1.1310677520782186, + "learning_rate": 9.368279859510008e-07, + "loss": 4.5566, + "step": 26260 + }, + { + "epoch": 1.1319291898178059, + "learning_rate": 9.36779503975522e-07, + "loss": 4.4153, + "step": 26280 + }, + { + "epoch": 1.1327906275573933, + "learning_rate": 9.36731022000043e-07, + "loss": 4.3595, + "step": 26300 + }, + { + "epoch": 1.1336520652969806, + "learning_rate": 9.366825400245642e-07, + "loss": 4.5201, + "step": 26320 + }, + { + "epoch": 1.134513503036568, + "learning_rate": 9.366340580490853e-07, + "loss": 4.7035, + "step": 26340 + }, + { + "epoch": 1.1353749407761553, + "learning_rate": 9.365855760736064e-07, + "loss": 4.2116, + "step": 26360 + }, + { + "epoch": 1.1362363785157428, + "learning_rate": 9.365370940981274e-07, + "loss": 4.2687, + "step": 26380 + }, + { + "epoch": 1.1370978162553302, + "learning_rate": 9.364886121226486e-07, + "loss": 4.55, + "step": 26400 + }, + { + "epoch": 1.1379592539949175, + "learning_rate": 9.364401301471697e-07, + "loss": 4.9486, + "step": 26420 + }, + { + "epoch": 1.138820691734505, + "learning_rate": 9.363916481716909e-07, + "loss": 4.3644, + "step": 26440 + }, + { + "epoch": 1.1396821294740922, + "learning_rate": 9.363431661962119e-07, + "loss": 4.6457, + "step": 26460 + }, + { + "epoch": 1.1405435672136797, + "learning_rate": 9.36294684220733e-07, + "loss": 4.5175, + "step": 26480 + }, + { + "epoch": 1.141405004953267, + "learning_rate": 9.362462022452541e-07, + "loss": 4.4466, + "step": 26500 + }, + { + "epoch": 1.1422664426928544, + "learning_rate": 9.361977202697752e-07, + "loss": 4.4972, + "step": 26520 + }, + { + "epoch": 1.1431278804324418, + "learning_rate": 9.361492382942963e-07, + "loss": 4.4487, + "step": 26540 + }, + { + "epoch": 1.143989318172029, + "learning_rate": 9.361007563188174e-07, + "loss": 4.3274, + "step": 26560 + }, + { + "epoch": 1.1448507559116166, + "learning_rate": 9.360522743433386e-07, + "loss": 4.4503, + "step": 26580 + }, + { + "epoch": 1.1457121936512038, + "learning_rate": 9.360037923678596e-07, + "loss": 4.3373, + "step": 26600 + }, + { + "epoch": 1.1465736313907913, + "learning_rate": 9.359553103923807e-07, + "loss": 4.3123, + "step": 26620 + }, + { + "epoch": 1.1474350691303785, + "learning_rate": 9.359068284169018e-07, + "loss": 4.4104, + "step": 26640 + }, + { + "epoch": 1.148296506869966, + "learning_rate": 9.35858346441423e-07, + "loss": 4.7109, + "step": 26660 + }, + { + "epoch": 1.1491579446095535, + "learning_rate": 9.35809864465944e-07, + "loss": 4.5663, + "step": 26680 + }, + { + "epoch": 1.1500193823491407, + "learning_rate": 9.357613824904652e-07, + "loss": 4.3707, + "step": 26700 + }, + { + "epoch": 1.1508808200887282, + "learning_rate": 9.357129005149863e-07, + "loss": 4.3986, + "step": 26720 + }, + { + "epoch": 1.1517422578283154, + "learning_rate": 9.356644185395074e-07, + "loss": 4.4001, + "step": 26740 + }, + { + "epoch": 1.1526036955679029, + "learning_rate": 9.356159365640284e-07, + "loss": 4.4226, + "step": 26760 + }, + { + "epoch": 1.1534651333074901, + "learning_rate": 9.355674545885496e-07, + "loss": 4.8085, + "step": 26780 + }, + { + "epoch": 1.1543265710470776, + "learning_rate": 9.355189726130707e-07, + "loss": 4.5611, + "step": 26800 + }, + { + "epoch": 1.155188008786665, + "learning_rate": 9.354704906375919e-07, + "loss": 4.3543, + "step": 26820 + }, + { + "epoch": 1.1560494465262523, + "learning_rate": 9.354220086621129e-07, + "loss": 4.4754, + "step": 26840 + }, + { + "epoch": 1.1569108842658398, + "learning_rate": 9.353735266866339e-07, + "loss": 4.6213, + "step": 26860 + }, + { + "epoch": 1.157772322005427, + "learning_rate": 9.353250447111551e-07, + "loss": 4.3756, + "step": 26880 + }, + { + "epoch": 1.1586337597450145, + "learning_rate": 9.352765627356762e-07, + "loss": 4.2725, + "step": 26900 + }, + { + "epoch": 1.1594951974846017, + "learning_rate": 9.352280807601973e-07, + "loss": 4.4786, + "step": 26920 + }, + { + "epoch": 1.1603566352241892, + "learning_rate": 9.351795987847185e-07, + "loss": 4.6271, + "step": 26940 + }, + { + "epoch": 1.1612180729637767, + "learning_rate": 9.351311168092397e-07, + "loss": 4.6945, + "step": 26960 + }, + { + "epoch": 1.162079510703364, + "learning_rate": 9.350826348337606e-07, + "loss": 4.5041, + "step": 26980 + }, + { + "epoch": 1.1629409484429514, + "learning_rate": 9.350341528582817e-07, + "loss": 4.6746, + "step": 27000 + }, + { + "epoch": 1.1638023861825386, + "learning_rate": 9.349856708828029e-07, + "loss": 4.5183, + "step": 27020 + }, + { + "epoch": 1.164663823922126, + "learning_rate": 9.34937188907324e-07, + "loss": 4.5165, + "step": 27040 + }, + { + "epoch": 1.1655252616617133, + "learning_rate": 9.348887069318451e-07, + "loss": 4.3108, + "step": 27060 + }, + { + "epoch": 1.1663866994013008, + "learning_rate": 9.348402249563662e-07, + "loss": 4.321, + "step": 27080 + }, + { + "epoch": 1.1672481371408883, + "learning_rate": 9.347917429808873e-07, + "loss": 4.3061, + "step": 27100 + }, + { + "epoch": 1.1681095748804755, + "learning_rate": 9.347432610054083e-07, + "loss": 4.617, + "step": 27120 + }, + { + "epoch": 1.168971012620063, + "learning_rate": 9.346947790299295e-07, + "loss": 4.5026, + "step": 27140 + }, + { + "epoch": 1.1698324503596502, + "learning_rate": 9.346462970544506e-07, + "loss": 4.5797, + "step": 27160 + }, + { + "epoch": 1.1706938880992377, + "learning_rate": 9.345978150789717e-07, + "loss": 4.5256, + "step": 27180 + }, + { + "epoch": 1.171555325838825, + "learning_rate": 9.345493331034928e-07, + "loss": 4.4388, + "step": 27200 + }, + { + "epoch": 1.1724167635784124, + "learning_rate": 9.345008511280139e-07, + "loss": 4.523, + "step": 27220 + }, + { + "epoch": 1.1732782013179996, + "learning_rate": 9.34452369152535e-07, + "loss": 4.4003, + "step": 27240 + }, + { + "epoch": 1.1741396390575871, + "learning_rate": 9.344038871770561e-07, + "loss": 4.1231, + "step": 27260 + }, + { + "epoch": 1.1750010767971744, + "learning_rate": 9.343554052015772e-07, + "loss": 4.4005, + "step": 27280 + }, + { + "epoch": 1.1758625145367618, + "learning_rate": 9.343069232260983e-07, + "loss": 4.5794, + "step": 27300 + }, + { + "epoch": 1.1767239522763493, + "learning_rate": 9.342584412506195e-07, + "loss": 4.4316, + "step": 27320 + }, + { + "epoch": 1.1775853900159365, + "learning_rate": 9.342099592751406e-07, + "loss": 4.5104, + "step": 27340 + }, + { + "epoch": 1.178446827755524, + "learning_rate": 9.341614772996617e-07, + "loss": 4.6388, + "step": 27360 + }, + { + "epoch": 1.1793082654951113, + "learning_rate": 9.341129953241827e-07, + "loss": 4.5311, + "step": 27380 + }, + { + "epoch": 1.1801697032346987, + "learning_rate": 9.340645133487039e-07, + "loss": 4.2994, + "step": 27400 + }, + { + "epoch": 1.181031140974286, + "learning_rate": 9.34016031373225e-07, + "loss": 4.2871, + "step": 27420 + }, + { + "epoch": 1.1818925787138734, + "learning_rate": 9.339675493977461e-07, + "loss": 4.5998, + "step": 27440 + }, + { + "epoch": 1.182754016453461, + "learning_rate": 9.339190674222672e-07, + "loss": 4.6689, + "step": 27460 + }, + { + "epoch": 1.1836154541930481, + "learning_rate": 9.338705854467885e-07, + "loss": 4.5299, + "step": 27480 + }, + { + "epoch": 1.1844768919326356, + "learning_rate": 9.338221034713093e-07, + "loss": 4.5076, + "step": 27500 + }, + { + "epoch": 1.1853383296722229, + "learning_rate": 9.337736214958305e-07, + "loss": 4.6751, + "step": 27520 + }, + { + "epoch": 1.1861997674118103, + "learning_rate": 9.337251395203516e-07, + "loss": 4.4489, + "step": 27540 + }, + { + "epoch": 1.1870612051513976, + "learning_rate": 9.336766575448728e-07, + "loss": 4.5556, + "step": 27560 + }, + { + "epoch": 1.187922642890985, + "learning_rate": 9.336281755693938e-07, + "loss": 4.543, + "step": 27580 + }, + { + "epoch": 1.1887840806305725, + "learning_rate": 9.33579693593915e-07, + "loss": 4.6117, + "step": 27600 + }, + { + "epoch": 1.1896455183701597, + "learning_rate": 9.33531211618436e-07, + "loss": 4.4452, + "step": 27620 + }, + { + "epoch": 1.1905069561097472, + "learning_rate": 9.33482729642957e-07, + "loss": 4.4695, + "step": 27640 + }, + { + "epoch": 1.1913683938493345, + "learning_rate": 9.334342476674782e-07, + "loss": 4.4768, + "step": 27660 + }, + { + "epoch": 1.192229831588922, + "learning_rate": 9.333857656919994e-07, + "loss": 4.4135, + "step": 27680 + }, + { + "epoch": 1.1930912693285092, + "learning_rate": 9.333372837165205e-07, + "loss": 4.4234, + "step": 27700 + }, + { + "epoch": 1.1939527070680966, + "learning_rate": 9.332888017410416e-07, + "loss": 4.7231, + "step": 27720 + }, + { + "epoch": 1.194814144807684, + "learning_rate": 9.332403197655627e-07, + "loss": 4.5024, + "step": 27740 + }, + { + "epoch": 1.1956755825472714, + "learning_rate": 9.331918377900837e-07, + "loss": 4.3848, + "step": 27760 + }, + { + "epoch": 1.1965370202868588, + "learning_rate": 9.331433558146049e-07, + "loss": 4.4045, + "step": 27780 + }, + { + "epoch": 1.197398458026446, + "learning_rate": 9.33094873839126e-07, + "loss": 4.5017, + "step": 27800 + }, + { + "epoch": 1.1982598957660335, + "learning_rate": 9.330463918636471e-07, + "loss": 4.397, + "step": 27820 + }, + { + "epoch": 1.1991213335056208, + "learning_rate": 9.329979098881682e-07, + "loss": 4.4905, + "step": 27840 + }, + { + "epoch": 1.1999827712452082, + "learning_rate": 9.329494279126894e-07, + "loss": 4.3495, + "step": 27860 + }, + { + "epoch": 1.2008442089847957, + "learning_rate": 9.329009459372103e-07, + "loss": 4.4544, + "step": 27880 + }, + { + "epoch": 1.201705646724383, + "learning_rate": 9.328524639617315e-07, + "loss": 4.5536, + "step": 27900 + }, + { + "epoch": 1.2025670844639704, + "learning_rate": 9.328039819862526e-07, + "loss": 4.534, + "step": 27920 + }, + { + "epoch": 1.2034285222035577, + "learning_rate": 9.327555000107738e-07, + "loss": 4.6825, + "step": 27940 + }, + { + "epoch": 1.2042899599431451, + "learning_rate": 9.327070180352948e-07, + "loss": 4.7677, + "step": 27960 + }, + { + "epoch": 1.2051513976827324, + "learning_rate": 9.32658536059816e-07, + "loss": 4.5732, + "step": 27980 + }, + { + "epoch": 1.2060128354223199, + "learning_rate": 9.326100540843371e-07, + "loss": 4.4788, + "step": 28000 + }, + { + "epoch": 1.2068742731619073, + "learning_rate": 9.325615721088582e-07, + "loss": 4.4752, + "step": 28020 + }, + { + "epoch": 1.2077357109014946, + "learning_rate": 9.325130901333792e-07, + "loss": 4.5123, + "step": 28040 + }, + { + "epoch": 1.208597148641082, + "learning_rate": 9.324646081579004e-07, + "loss": 4.2612, + "step": 28060 + }, + { + "epoch": 1.2094585863806693, + "learning_rate": 9.324161261824215e-07, + "loss": 4.7906, + "step": 28080 + }, + { + "epoch": 1.2103200241202567, + "learning_rate": 9.323676442069426e-07, + "loss": 4.4646, + "step": 28100 + }, + { + "epoch": 1.211181461859844, + "learning_rate": 9.323191622314637e-07, + "loss": 4.2376, + "step": 28120 + }, + { + "epoch": 1.2120428995994315, + "learning_rate": 9.322706802559848e-07, + "loss": 4.5163, + "step": 28140 + }, + { + "epoch": 1.212904337339019, + "learning_rate": 9.322221982805059e-07, + "loss": 4.3873, + "step": 28160 + }, + { + "epoch": 1.2137657750786062, + "learning_rate": 9.321737163050269e-07, + "loss": 4.4932, + "step": 28180 + }, + { + "epoch": 1.2146272128181936, + "learning_rate": 9.321252343295481e-07, + "loss": 4.4182, + "step": 28200 + }, + { + "epoch": 1.2154886505577809, + "learning_rate": 9.320767523540693e-07, + "loss": 4.4323, + "step": 28220 + }, + { + "epoch": 1.2163500882973683, + "learning_rate": 9.320282703785904e-07, + "loss": 4.5085, + "step": 28240 + }, + { + "epoch": 1.2172115260369556, + "learning_rate": 9.319797884031113e-07, + "loss": 4.2675, + "step": 28260 + }, + { + "epoch": 1.218072963776543, + "learning_rate": 9.319313064276325e-07, + "loss": 4.4825, + "step": 28280 + }, + { + "epoch": 1.2189344015161305, + "learning_rate": 9.318828244521536e-07, + "loss": 4.7004, + "step": 28300 + }, + { + "epoch": 1.2197958392557178, + "learning_rate": 9.318343424766748e-07, + "loss": 4.1579, + "step": 28320 + }, + { + "epoch": 1.2206572769953052, + "learning_rate": 9.317858605011958e-07, + "loss": 4.4969, + "step": 28340 + }, + { + "epoch": 1.2215187147348925, + "learning_rate": 9.31737378525717e-07, + "loss": 4.8679, + "step": 28360 + }, + { + "epoch": 1.22238015247448, + "learning_rate": 9.316888965502381e-07, + "loss": 4.6333, + "step": 28380 + }, + { + "epoch": 1.2232415902140672, + "learning_rate": 9.316404145747591e-07, + "loss": 4.6174, + "step": 28400 + }, + { + "epoch": 1.2241030279536547, + "learning_rate": 9.315919325992802e-07, + "loss": 4.3189, + "step": 28420 + }, + { + "epoch": 1.2249644656932421, + "learning_rate": 9.315434506238014e-07, + "loss": 4.2966, + "step": 28440 + }, + { + "epoch": 1.2258259034328294, + "learning_rate": 9.314949686483225e-07, + "loss": 4.5531, + "step": 28460 + }, + { + "epoch": 1.2266873411724168, + "learning_rate": 9.314464866728436e-07, + "loss": 4.4412, + "step": 28480 + }, + { + "epoch": 1.227548778912004, + "learning_rate": 9.313980046973647e-07, + "loss": 4.3971, + "step": 28500 + }, + { + "epoch": 1.2284102166515916, + "learning_rate": 9.313495227218858e-07, + "loss": 4.7037, + "step": 28520 + }, + { + "epoch": 1.2292716543911788, + "learning_rate": 9.313010407464069e-07, + "loss": 4.6395, + "step": 28540 + }, + { + "epoch": 1.2301330921307663, + "learning_rate": 9.31252558770928e-07, + "loss": 4.5955, + "step": 28560 + }, + { + "epoch": 1.2309945298703537, + "learning_rate": 9.312040767954491e-07, + "loss": 4.3676, + "step": 28580 + }, + { + "epoch": 1.231855967609941, + "learning_rate": 9.311555948199703e-07, + "loss": 4.6028, + "step": 28600 + }, + { + "epoch": 1.2327174053495285, + "learning_rate": 9.311071128444914e-07, + "loss": 4.3803, + "step": 28620 + }, + { + "epoch": 1.2335788430891157, + "learning_rate": 9.310586308690123e-07, + "loss": 4.583, + "step": 28640 + }, + { + "epoch": 1.2344402808287032, + "learning_rate": 9.310101488935335e-07, + "loss": 4.5455, + "step": 28660 + }, + { + "epoch": 1.2353017185682904, + "learning_rate": 9.309616669180547e-07, + "loss": 4.4278, + "step": 28680 + }, + { + "epoch": 1.2361631563078779, + "learning_rate": 9.309131849425757e-07, + "loss": 4.3653, + "step": 28700 + }, + { + "epoch": 1.2370245940474653, + "learning_rate": 9.308647029670968e-07, + "loss": 4.642, + "step": 28720 + }, + { + "epoch": 1.2378860317870526, + "learning_rate": 9.308162209916181e-07, + "loss": 4.4583, + "step": 28740 + }, + { + "epoch": 1.23874746952664, + "learning_rate": 9.307677390161391e-07, + "loss": 4.3084, + "step": 28760 + }, + { + "epoch": 1.2396089072662273, + "learning_rate": 9.307192570406601e-07, + "loss": 4.2672, + "step": 28780 + }, + { + "epoch": 1.2404703450058148, + "learning_rate": 9.306707750651812e-07, + "loss": 4.5146, + "step": 28800 + }, + { + "epoch": 1.241331782745402, + "learning_rate": 9.306222930897024e-07, + "loss": 4.5027, + "step": 28820 + }, + { + "epoch": 1.2421932204849895, + "learning_rate": 9.305738111142235e-07, + "loss": 4.4661, + "step": 28840 + }, + { + "epoch": 1.2430546582245767, + "learning_rate": 9.305253291387446e-07, + "loss": 4.7078, + "step": 28860 + }, + { + "epoch": 1.2439160959641642, + "learning_rate": 9.304768471632657e-07, + "loss": 4.4902, + "step": 28880 + }, + { + "epoch": 1.2447775337037514, + "learning_rate": 9.304283651877868e-07, + "loss": 4.5009, + "step": 28900 + }, + { + "epoch": 1.245638971443339, + "learning_rate": 9.303798832123079e-07, + "loss": 4.5814, + "step": 28920 + }, + { + "epoch": 1.2465004091829264, + "learning_rate": 9.30331401236829e-07, + "loss": 4.3019, + "step": 28940 + }, + { + "epoch": 1.2473618469225136, + "learning_rate": 9.302829192613501e-07, + "loss": 4.5484, + "step": 28960 + }, + { + "epoch": 1.248223284662101, + "learning_rate": 9.302344372858713e-07, + "loss": 4.2874, + "step": 28980 + }, + { + "epoch": 1.2490847224016883, + "learning_rate": 9.301859553103923e-07, + "loss": 4.6251, + "step": 29000 + }, + { + "epoch": 1.2499461601412758, + "learning_rate": 9.301374733349134e-07, + "loss": 4.4941, + "step": 29020 + }, + { + "epoch": 1.250807597880863, + "learning_rate": 9.300889913594345e-07, + "loss": 4.2387, + "step": 29040 + }, + { + "epoch": 1.2516690356204505, + "learning_rate": 9.300405093839557e-07, + "loss": 4.4437, + "step": 29060 + }, + { + "epoch": 1.252530473360038, + "learning_rate": 9.299920274084767e-07, + "loss": 4.5546, + "step": 29080 + }, + { + "epoch": 1.2533919110996252, + "learning_rate": 9.299435454329979e-07, + "loss": 4.5387, + "step": 29100 + }, + { + "epoch": 1.2542533488392127, + "learning_rate": 9.29895063457519e-07, + "loss": 4.2704, + "step": 29120 + }, + { + "epoch": 1.2551147865788, + "learning_rate": 9.298465814820402e-07, + "loss": 4.4503, + "step": 29140 + }, + { + "epoch": 1.2559762243183874, + "learning_rate": 9.297980995065611e-07, + "loss": 4.4924, + "step": 29160 + }, + { + "epoch": 1.2568376620579746, + "learning_rate": 9.297496175310823e-07, + "loss": 4.8439, + "step": 29180 + }, + { + "epoch": 1.2576990997975621, + "learning_rate": 9.297011355556034e-07, + "loss": 4.4959, + "step": 29200 + }, + { + "epoch": 1.2585605375371496, + "learning_rate": 9.296526535801246e-07, + "loss": 4.6758, + "step": 29220 + }, + { + "epoch": 1.2594219752767368, + "learning_rate": 9.296041716046456e-07, + "loss": 4.8718, + "step": 29240 + }, + { + "epoch": 1.2602834130163243, + "learning_rate": 9.295556896291667e-07, + "loss": 6.2251, + "step": 29260 + }, + { + "epoch": 1.2611448507559115, + "learning_rate": 9.295072076536878e-07, + "loss": 7.7667, + "step": 29280 + }, + { + "epoch": 1.262006288495499, + "learning_rate": 9.29458725678209e-07, + "loss": 7.3613, + "step": 29300 + }, + { + "epoch": 1.2628677262350863, + "learning_rate": 9.2941024370273e-07, + "loss": 7.3938, + "step": 29320 + }, + { + "epoch": 1.2637291639746737, + "learning_rate": 9.293617617272511e-07, + "loss": 7.4492, + "step": 29340 + }, + { + "epoch": 1.2645906017142612, + "learning_rate": 9.293132797517723e-07, + "loss": 7.3229, + "step": 29360 + }, + { + "epoch": 1.2654520394538484, + "learning_rate": 9.292647977762933e-07, + "loss": 7.4968, + "step": 29380 + }, + { + "epoch": 1.266313477193436, + "learning_rate": 9.292163158008144e-07, + "loss": 7.4763, + "step": 29400 + }, + { + "epoch": 1.2671749149330231, + "learning_rate": 9.291678338253355e-07, + "loss": 7.0305, + "step": 29420 + }, + { + "epoch": 1.2680363526726106, + "learning_rate": 9.291193518498567e-07, + "loss": 7.4039, + "step": 29440 + }, + { + "epoch": 1.2688977904121979, + "learning_rate": 9.290708698743777e-07, + "loss": 7.437, + "step": 29460 + }, + { + "epoch": 1.2697592281517853, + "learning_rate": 9.290223878988989e-07, + "loss": 7.1294, + "step": 29480 + }, + { + "epoch": 1.2706206658913728, + "learning_rate": 9.2897390592342e-07, + "loss": 7.3842, + "step": 29500 + }, + { + "epoch": 1.27148210363096, + "learning_rate": 9.289254239479412e-07, + "loss": 7.0857, + "step": 29520 + }, + { + "epoch": 1.2723435413705475, + "learning_rate": 9.288769419724621e-07, + "loss": 6.5619, + "step": 29540 + }, + { + "epoch": 1.2732049791101347, + "learning_rate": 9.288284599969833e-07, + "loss": 6.7763, + "step": 29560 + }, + { + "epoch": 1.2740664168497222, + "learning_rate": 9.287799780215045e-07, + "loss": 6.9909, + "step": 29580 + }, + { + "epoch": 1.2749278545893095, + "learning_rate": 9.287314960460256e-07, + "loss": 6.7757, + "step": 29600 + }, + { + "epoch": 1.275789292328897, + "learning_rate": 9.286830140705466e-07, + "loss": 7.1912, + "step": 29620 + }, + { + "epoch": 1.2766507300684844, + "learning_rate": 9.286345320950678e-07, + "loss": 7.0042, + "step": 29640 + }, + { + "epoch": 1.2775121678080716, + "learning_rate": 9.285860501195888e-07, + "loss": 6.9697, + "step": 29660 + }, + { + "epoch": 1.278373605547659, + "learning_rate": 9.285375681441099e-07, + "loss": 7.4797, + "step": 29680 + }, + { + "epoch": 1.2792350432872464, + "learning_rate": 9.28489086168631e-07, + "loss": 7.2872, + "step": 29700 + }, + { + "epoch": 1.2800964810268338, + "learning_rate": 9.284406041931522e-07, + "loss": 7.1162, + "step": 29720 + }, + { + "epoch": 1.280957918766421, + "learning_rate": 9.283921222176733e-07, + "loss": 7.2223, + "step": 29740 + }, + { + "epoch": 1.2818193565060085, + "learning_rate": 9.283436402421944e-07, + "loss": 7.2239, + "step": 29760 + }, + { + "epoch": 1.282680794245596, + "learning_rate": 9.282951582667155e-07, + "loss": 7.0478, + "step": 29780 + }, + { + "epoch": 1.2835422319851832, + "learning_rate": 9.282466762912365e-07, + "loss": 7.0737, + "step": 29800 + }, + { + "epoch": 1.2844036697247707, + "learning_rate": 9.281981943157577e-07, + "loss": 6.9948, + "step": 29820 + }, + { + "epoch": 1.285265107464358, + "learning_rate": 9.281497123402788e-07, + "loss": 6.7393, + "step": 29840 + }, + { + "epoch": 1.2861265452039454, + "learning_rate": 9.281012303647999e-07, + "loss": 7.1649, + "step": 29860 + }, + { + "epoch": 1.2869879829435327, + "learning_rate": 9.28052748389321e-07, + "loss": 7.1114, + "step": 29880 + }, + { + "epoch": 1.2878494206831201, + "learning_rate": 9.280042664138422e-07, + "loss": 6.8673, + "step": 29900 + }, + { + "epoch": 1.2887108584227076, + "learning_rate": 9.279557844383631e-07, + "loss": 6.8635, + "step": 29920 + }, + { + "epoch": 1.2895722961622949, + "learning_rate": 9.279073024628843e-07, + "loss": 6.8289, + "step": 29940 + }, + { + "epoch": 1.2904337339018823, + "learning_rate": 9.278588204874054e-07, + "loss": 7.1112, + "step": 29960 + }, + { + "epoch": 1.2912951716414696, + "learning_rate": 9.278103385119265e-07, + "loss": 7.1843, + "step": 29980 + }, + { + "epoch": 1.292156609381057, + "learning_rate": 9.277618565364475e-07, + "loss": 7.1475, + "step": 30000 + }, + { + "epoch": 1.2930180471206443, + "learning_rate": 9.277133745609688e-07, + "loss": 7.0125, + "step": 30020 + }, + { + "epoch": 1.2938794848602317, + "learning_rate": 9.276648925854898e-07, + "loss": 7.2633, + "step": 30040 + }, + { + "epoch": 1.2947409225998192, + "learning_rate": 9.276164106100109e-07, + "loss": 6.62, + "step": 30060 + }, + { + "epoch": 1.2956023603394065, + "learning_rate": 9.27567928634532e-07, + "loss": 7.1395, + "step": 30080 + }, + { + "epoch": 1.296463798078994, + "learning_rate": 9.275194466590532e-07, + "loss": 7.0973, + "step": 30100 + }, + { + "epoch": 1.2973252358185812, + "learning_rate": 9.274709646835743e-07, + "loss": 7.0828, + "step": 30120 + }, + { + "epoch": 1.2981866735581686, + "learning_rate": 9.274224827080954e-07, + "loss": 7.079, + "step": 30140 + }, + { + "epoch": 1.2990481112977559, + "learning_rate": 9.273740007326165e-07, + "loss": 6.8067, + "step": 30160 + }, + { + "epoch": 1.2999095490373433, + "learning_rate": 9.273255187571376e-07, + "loss": 6.7055, + "step": 30180 + }, + { + "epoch": 1.3007709867769308, + "learning_rate": 9.272770367816587e-07, + "loss": 7.0344, + "step": 30200 + }, + { + "epoch": 1.301632424516518, + "learning_rate": 9.272285548061798e-07, + "loss": 7.1736, + "step": 30220 + }, + { + "epoch": 1.3024938622561053, + "learning_rate": 9.271800728307009e-07, + "loss": 7.212, + "step": 30240 + }, + { + "epoch": 1.3033552999956928, + "learning_rate": 9.271315908552221e-07, + "loss": 6.8573, + "step": 30260 + }, + { + "epoch": 1.3042167377352802, + "learning_rate": 9.270831088797431e-07, + "loss": 6.196, + "step": 30280 + }, + { + "epoch": 1.3050781754748675, + "learning_rate": 9.270346269042642e-07, + "loss": 6.7491, + "step": 30300 + }, + { + "epoch": 1.305939613214455, + "learning_rate": 9.269861449287853e-07, + "loss": 6.6946, + "step": 30320 + }, + { + "epoch": 1.3068010509540424, + "learning_rate": 9.269376629533064e-07, + "loss": 6.5269, + "step": 30340 + }, + { + "epoch": 1.3076624886936297, + "learning_rate": 9.268891809778275e-07, + "loss": 6.6064, + "step": 30360 + }, + { + "epoch": 1.308523926433217, + "learning_rate": 9.268406990023487e-07, + "loss": 7.2097, + "step": 30380 + }, + { + "epoch": 1.3093853641728044, + "learning_rate": 9.267922170268698e-07, + "loss": 6.6234, + "step": 30400 + }, + { + "epoch": 1.3102468019123918, + "learning_rate": 9.267437350513908e-07, + "loss": 7.0382, + "step": 30420 + }, + { + "epoch": 1.311108239651979, + "learning_rate": 9.266952530759119e-07, + "loss": 6.7611, + "step": 30440 + }, + { + "epoch": 1.3119696773915666, + "learning_rate": 9.26646771100433e-07, + "loss": 6.6191, + "step": 30460 + }, + { + "epoch": 1.312831115131154, + "learning_rate": 9.265982891249542e-07, + "loss": 6.8269, + "step": 30480 + }, + { + "epoch": 1.3136925528707413, + "learning_rate": 9.265498071494753e-07, + "loss": 6.4991, + "step": 30500 + }, + { + "epoch": 1.3145539906103285, + "learning_rate": 9.265013251739965e-07, + "loss": 6.8496, + "step": 30520 + }, + { + "epoch": 1.315415428349916, + "learning_rate": 9.264528431985175e-07, + "loss": 6.6252, + "step": 30540 + }, + { + "epoch": 1.3162768660895035, + "learning_rate": 9.264043612230386e-07, + "loss": 6.5231, + "step": 30560 + }, + { + "epoch": 1.3171383038290907, + "learning_rate": 9.263558792475596e-07, + "loss": 6.6693, + "step": 30580 + }, + { + "epoch": 1.3179997415686782, + "learning_rate": 9.263073972720808e-07, + "loss": 6.6069, + "step": 30600 + }, + { + "epoch": 1.3188611793082656, + "learning_rate": 9.262589152966019e-07, + "loss": 6.7363, + "step": 30620 + }, + { + "epoch": 1.3197226170478529, + "learning_rate": 9.262104333211231e-07, + "loss": 6.4996, + "step": 30640 + }, + { + "epoch": 1.3205840547874401, + "learning_rate": 9.261619513456441e-07, + "loss": 6.7116, + "step": 30660 + }, + { + "epoch": 1.3214454925270276, + "learning_rate": 9.261134693701652e-07, + "loss": 6.7801, + "step": 30680 + }, + { + "epoch": 1.322306930266615, + "learning_rate": 9.260649873946863e-07, + "loss": 6.8897, + "step": 30700 + }, + { + "epoch": 1.3231683680062023, + "learning_rate": 9.260165054192075e-07, + "loss": 6.7371, + "step": 30720 + }, + { + "epoch": 1.3240298057457898, + "learning_rate": 9.259680234437285e-07, + "loss": 6.5691, + "step": 30740 + }, + { + "epoch": 1.324891243485377, + "learning_rate": 9.259195414682497e-07, + "loss": 6.881, + "step": 30760 + }, + { + "epoch": 1.3257526812249645, + "learning_rate": 9.258710594927708e-07, + "loss": 6.8269, + "step": 30780 + }, + { + "epoch": 1.3266141189645517, + "learning_rate": 9.258225775172919e-07, + "loss": 6.5559, + "step": 30800 + }, + { + "epoch": 1.3274755567041392, + "learning_rate": 9.257740955418129e-07, + "loss": 6.5314, + "step": 30820 + }, + { + "epoch": 1.3283369944437267, + "learning_rate": 9.257256135663341e-07, + "loss": 6.7018, + "step": 30840 + }, + { + "epoch": 1.329198432183314, + "learning_rate": 9.256771315908552e-07, + "loss": 6.5448, + "step": 30860 + }, + { + "epoch": 1.3300598699229014, + "learning_rate": 9.256286496153762e-07, + "loss": 6.63, + "step": 30880 + }, + { + "epoch": 1.3309213076624886, + "learning_rate": 9.255801676398974e-07, + "loss": 6.676, + "step": 30900 + }, + { + "epoch": 1.331782745402076, + "learning_rate": 9.255316856644186e-07, + "loss": 6.5936, + "step": 30920 + }, + { + "epoch": 1.3326441831416633, + "learning_rate": 9.254832036889396e-07, + "loss": 6.4651, + "step": 30940 + }, + { + "epoch": 1.3335056208812508, + "learning_rate": 9.254347217134606e-07, + "loss": 6.8695, + "step": 30960 + }, + { + "epoch": 1.3343670586208383, + "learning_rate": 9.253862397379818e-07, + "loss": 6.7311, + "step": 30980 + }, + { + "epoch": 1.3352284963604255, + "learning_rate": 9.253377577625029e-07, + "loss": 6.6761, + "step": 31000 + }, + { + "epoch": 1.336089934100013, + "learning_rate": 9.252892757870241e-07, + "loss": 6.9313, + "step": 31020 + }, + { + "epoch": 1.3369513718396002, + "learning_rate": 9.252407938115451e-07, + "loss": 6.7279, + "step": 31040 + }, + { + "epoch": 1.3378128095791877, + "learning_rate": 9.251923118360662e-07, + "loss": 6.5361, + "step": 31060 + }, + { + "epoch": 1.338674247318775, + "learning_rate": 9.251438298605873e-07, + "loss": 6.6605, + "step": 31080 + }, + { + "epoch": 1.3395356850583624, + "learning_rate": 9.250953478851085e-07, + "loss": 6.0599, + "step": 31100 + }, + { + "epoch": 1.3403971227979499, + "learning_rate": 9.250468659096295e-07, + "loss": 4.8999, + "step": 31120 + }, + { + "epoch": 1.3412585605375371, + "learning_rate": 9.249983839341507e-07, + "loss": 4.8532, + "step": 31140 + }, + { + "epoch": 1.3421199982771246, + "learning_rate": 9.249499019586718e-07, + "loss": 4.5862, + "step": 31160 + }, + { + "epoch": 1.3429814360167118, + "learning_rate": 9.249014199831928e-07, + "loss": 4.7988, + "step": 31180 + }, + { + "epoch": 1.3438428737562993, + "learning_rate": 9.248529380077139e-07, + "loss": 4.4932, + "step": 31200 + }, + { + "epoch": 1.3447043114958865, + "learning_rate": 9.248044560322351e-07, + "loss": 4.8881, + "step": 31220 + }, + { + "epoch": 1.345565749235474, + "learning_rate": 9.247559740567562e-07, + "loss": 5.7502, + "step": 31240 + }, + { + "epoch": 1.3464271869750615, + "learning_rate": 9.247074920812773e-07, + "loss": 5.3209, + "step": 31260 + }, + { + "epoch": 1.3472886247146487, + "learning_rate": 9.246590101057984e-07, + "loss": 5.0991, + "step": 31280 + }, + { + "epoch": 1.3481500624542362, + "learning_rate": 9.246105281303196e-07, + "loss": 4.9333, + "step": 31300 + }, + { + "epoch": 1.3490115001938234, + "learning_rate": 9.245620461548406e-07, + "loss": 5.1862, + "step": 31320 + }, + { + "epoch": 1.349872937933411, + "learning_rate": 9.245135641793617e-07, + "loss": 5.2048, + "step": 31340 + }, + { + "epoch": 1.3507343756729981, + "learning_rate": 9.244650822038829e-07, + "loss": 5.2122, + "step": 31360 + }, + { + "epoch": 1.3515958134125856, + "learning_rate": 9.24416600228404e-07, + "loss": 5.1965, + "step": 31380 + }, + { + "epoch": 1.352457251152173, + "learning_rate": 9.243681182529251e-07, + "loss": 5.2912, + "step": 31400 + }, + { + "epoch": 1.3533186888917603, + "learning_rate": 9.243196362774461e-07, + "loss": 4.911, + "step": 31420 + }, + { + "epoch": 1.3541801266313478, + "learning_rate": 9.242711543019672e-07, + "loss": 4.8417, + "step": 31440 + }, + { + "epoch": 1.355041564370935, + "learning_rate": 9.242226723264884e-07, + "loss": 5.1662, + "step": 31460 + }, + { + "epoch": 1.3559030021105225, + "learning_rate": 9.241741903510094e-07, + "loss": 5.1467, + "step": 31480 + }, + { + "epoch": 1.3567644398501097, + "learning_rate": 9.241257083755305e-07, + "loss": 5.3874, + "step": 31500 + }, + { + "epoch": 1.3576258775896972, + "learning_rate": 9.240772264000517e-07, + "loss": 5.8458, + "step": 31520 + }, + { + "epoch": 1.3584873153292847, + "learning_rate": 9.240287444245728e-07, + "loss": 6.3314, + "step": 31540 + }, + { + "epoch": 1.359348753068872, + "learning_rate": 9.23980262449094e-07, + "loss": 6.3855, + "step": 31560 + }, + { + "epoch": 1.3602101908084594, + "learning_rate": 9.239317804736149e-07, + "loss": 6.4171, + "step": 31580 + }, + { + "epoch": 1.3610716285480466, + "learning_rate": 9.238832984981361e-07, + "loss": 6.7412, + "step": 31600 + }, + { + "epoch": 1.361933066287634, + "learning_rate": 9.238348165226572e-07, + "loss": 6.7147, + "step": 31620 + }, + { + "epoch": 1.3627945040272214, + "learning_rate": 9.237863345471783e-07, + "loss": 6.4155, + "step": 31640 + }, + { + "epoch": 1.3636559417668088, + "learning_rate": 9.237378525716994e-07, + "loss": 6.4637, + "step": 31660 + }, + { + "epoch": 1.3645173795063963, + "learning_rate": 9.236893705962206e-07, + "loss": 6.6824, + "step": 31680 + }, + { + "epoch": 1.3653788172459835, + "learning_rate": 9.236408886207416e-07, + "loss": 6.4347, + "step": 31700 + }, + { + "epoch": 1.366240254985571, + "learning_rate": 9.235924066452627e-07, + "loss": 6.7485, + "step": 31720 + }, + { + "epoch": 1.3671016927251582, + "learning_rate": 9.235439246697838e-07, + "loss": 6.6971, + "step": 31740 + }, + { + "epoch": 1.3679631304647457, + "learning_rate": 9.23495442694305e-07, + "loss": 6.9485, + "step": 31760 + }, + { + "epoch": 1.368824568204333, + "learning_rate": 9.234469607188259e-07, + "loss": 6.6041, + "step": 31780 + }, + { + "epoch": 1.3696860059439204, + "learning_rate": 9.233984787433472e-07, + "loss": 6.6304, + "step": 31800 + }, + { + "epoch": 1.370547443683508, + "learning_rate": 9.233499967678682e-07, + "loss": 6.5932, + "step": 31820 + }, + { + "epoch": 1.3714088814230951, + "learning_rate": 9.233015147923894e-07, + "loss": 6.6949, + "step": 31840 + }, + { + "epoch": 1.3722703191626824, + "learning_rate": 9.232530328169104e-07, + "loss": 6.7267, + "step": 31860 + }, + { + "epoch": 1.3731317569022699, + "learning_rate": 9.232045508414316e-07, + "loss": 6.6524, + "step": 31880 + }, + { + "epoch": 1.3739931946418573, + "learning_rate": 9.231560688659527e-07, + "loss": 6.4257, + "step": 31900 + }, + { + "epoch": 1.3748546323814446, + "learning_rate": 9.231075868904739e-07, + "loss": 6.6167, + "step": 31920 + }, + { + "epoch": 1.375716070121032, + "learning_rate": 9.230591049149949e-07, + "loss": 6.3268, + "step": 31940 + }, + { + "epoch": 1.3765775078606195, + "learning_rate": 9.230106229395159e-07, + "loss": 6.8016, + "step": 31960 + }, + { + "epoch": 1.3774389456002067, + "learning_rate": 9.229621409640371e-07, + "loss": 6.6805, + "step": 31980 + }, + { + "epoch": 1.378300383339794, + "learning_rate": 9.229136589885583e-07, + "loss": 6.501, + "step": 32000 + }, + { + "epoch": 1.3791618210793815, + "learning_rate": 9.228651770130793e-07, + "loss": 6.529, + "step": 32020 + }, + { + "epoch": 1.380023258818969, + "learning_rate": 9.228166950376004e-07, + "loss": 6.2686, + "step": 32040 + }, + { + "epoch": 1.3808846965585562, + "learning_rate": 9.227682130621216e-07, + "loss": 6.5076, + "step": 32060 + }, + { + "epoch": 1.3817461342981436, + "learning_rate": 9.227197310866425e-07, + "loss": 6.3802, + "step": 32080 + }, + { + "epoch": 1.382607572037731, + "learning_rate": 9.226712491111637e-07, + "loss": 6.35, + "step": 32100 + }, + { + "epoch": 1.3834690097773183, + "learning_rate": 9.226227671356848e-07, + "loss": 6.6141, + "step": 32120 + }, + { + "epoch": 1.3843304475169056, + "learning_rate": 9.22574285160206e-07, + "loss": 6.3192, + "step": 32140 + }, + { + "epoch": 1.385191885256493, + "learning_rate": 9.22525803184727e-07, + "loss": 6.6553, + "step": 32160 + }, + { + "epoch": 1.3860533229960805, + "learning_rate": 9.224773212092482e-07, + "loss": 6.4769, + "step": 32180 + }, + { + "epoch": 1.3869147607356678, + "learning_rate": 9.224288392337692e-07, + "loss": 6.4383, + "step": 32200 + }, + { + "epoch": 1.3877761984752552, + "learning_rate": 9.223803572582904e-07, + "loss": 6.4079, + "step": 32220 + }, + { + "epoch": 1.3886376362148427, + "learning_rate": 9.223318752828114e-07, + "loss": 6.6363, + "step": 32240 + }, + { + "epoch": 1.38949907395443, + "learning_rate": 9.222833933073326e-07, + "loss": 6.5362, + "step": 32260 + }, + { + "epoch": 1.3903605116940172, + "learning_rate": 9.222349113318537e-07, + "loss": 6.6245, + "step": 32280 + }, + { + "epoch": 1.3912219494336047, + "learning_rate": 9.22186429356375e-07, + "loss": 6.6903, + "step": 32300 + }, + { + "epoch": 1.3920833871731921, + "learning_rate": 9.221379473808959e-07, + "loss": 6.7031, + "step": 32320 + }, + { + "epoch": 1.3929448249127794, + "learning_rate": 9.22089465405417e-07, + "loss": 6.4752, + "step": 32340 + }, + { + "epoch": 1.3938062626523668, + "learning_rate": 9.220409834299381e-07, + "loss": 6.403, + "step": 32360 + }, + { + "epoch": 1.394667700391954, + "learning_rate": 9.219925014544593e-07, + "loss": 6.3745, + "step": 32380 + }, + { + "epoch": 1.3955291381315416, + "learning_rate": 9.219440194789803e-07, + "loss": 6.3431, + "step": 32400 + }, + { + "epoch": 1.3963905758711288, + "learning_rate": 9.218955375035015e-07, + "loss": 6.1962, + "step": 32420 + }, + { + "epoch": 1.3972520136107163, + "learning_rate": 9.218470555280226e-07, + "loss": 6.4415, + "step": 32440 + }, + { + "epoch": 1.3981134513503037, + "learning_rate": 9.217985735525436e-07, + "loss": 6.5485, + "step": 32460 + }, + { + "epoch": 1.398974889089891, + "learning_rate": 9.217500915770647e-07, + "loss": 6.7211, + "step": 32480 + }, + { + "epoch": 1.3998363268294785, + "learning_rate": 9.217016096015858e-07, + "loss": 6.5814, + "step": 32500 + }, + { + "epoch": 1.4006977645690657, + "learning_rate": 9.21653127626107e-07, + "loss": 6.4893, + "step": 32520 + }, + { + "epoch": 1.4015592023086532, + "learning_rate": 9.216046456506281e-07, + "loss": 6.5722, + "step": 32540 + }, + { + "epoch": 1.4024206400482404, + "learning_rate": 9.215561636751492e-07, + "loss": 6.5738, + "step": 32560 + }, + { + "epoch": 1.4032820777878279, + "learning_rate": 9.215076816996703e-07, + "loss": 6.5566, + "step": 32580 + }, + { + "epoch": 1.4041435155274153, + "learning_rate": 9.214591997241914e-07, + "loss": 6.3732, + "step": 32600 + }, + { + "epoch": 1.4050049532670026, + "learning_rate": 9.214107177487125e-07, + "loss": 6.18, + "step": 32620 + }, + { + "epoch": 1.40586639100659, + "learning_rate": 9.213622357732336e-07, + "loss": 6.2922, + "step": 32640 + }, + { + "epoch": 1.4067278287461773, + "learning_rate": 9.213137537977547e-07, + "loss": 6.5907, + "step": 32660 + }, + { + "epoch": 1.4075892664857648, + "learning_rate": 9.212652718222759e-07, + "loss": 6.4877, + "step": 32680 + }, + { + "epoch": 1.408450704225352, + "learning_rate": 9.212167898467969e-07, + "loss": 6.4984, + "step": 32700 + }, + { + "epoch": 1.4093121419649395, + "learning_rate": 9.21168307871318e-07, + "loss": 6.1901, + "step": 32720 + }, + { + "epoch": 1.410173579704527, + "learning_rate": 9.211198258958391e-07, + "loss": 6.1892, + "step": 32740 + }, + { + "epoch": 1.4110350174441142, + "learning_rate": 9.210713439203602e-07, + "loss": 6.4323, + "step": 32760 + }, + { + "epoch": 1.4118964551837017, + "learning_rate": 9.210228619448813e-07, + "loss": 6.4229, + "step": 32780 + }, + { + "epoch": 1.412757892923289, + "learning_rate": 9.209743799694025e-07, + "loss": 6.315, + "step": 32800 + }, + { + "epoch": 1.4136193306628764, + "learning_rate": 9.209258979939236e-07, + "loss": 6.5691, + "step": 32820 + }, + { + "epoch": 1.4144807684024636, + "learning_rate": 9.208774160184446e-07, + "loss": 6.3881, + "step": 32840 + }, + { + "epoch": 1.415342206142051, + "learning_rate": 9.208289340429657e-07, + "loss": 6.201, + "step": 32860 + }, + { + "epoch": 1.4162036438816386, + "learning_rate": 9.207804520674869e-07, + "loss": 6.5495, + "step": 32880 + }, + { + "epoch": 1.4170650816212258, + "learning_rate": 9.20731970092008e-07, + "loss": 6.2721, + "step": 32900 + }, + { + "epoch": 1.4179265193608133, + "learning_rate": 9.206834881165291e-07, + "loss": 6.8134, + "step": 32920 + }, + { + "epoch": 1.4187879571004005, + "learning_rate": 9.206350061410502e-07, + "loss": 6.4974, + "step": 32940 + }, + { + "epoch": 1.419649394839988, + "learning_rate": 9.205865241655713e-07, + "loss": 6.2932, + "step": 32960 + }, + { + "epoch": 1.4205108325795752, + "learning_rate": 9.205380421900924e-07, + "loss": 6.1613, + "step": 32980 + }, + { + "epoch": 1.4213722703191627, + "learning_rate": 9.204895602146135e-07, + "loss": 6.4778, + "step": 33000 + }, + { + "epoch": 1.4222337080587502, + "learning_rate": 9.204410782391346e-07, + "loss": 6.3327, + "step": 33020 + }, + { + "epoch": 1.4230951457983374, + "learning_rate": 9.203925962636556e-07, + "loss": 6.2487, + "step": 33040 + }, + { + "epoch": 1.4239565835379249, + "learning_rate": 9.203441142881769e-07, + "loss": 6.2188, + "step": 33060 + }, + { + "epoch": 1.4248180212775121, + "learning_rate": 9.20295632312698e-07, + "loss": 6.5313, + "step": 33080 + }, + { + "epoch": 1.4256794590170996, + "learning_rate": 9.20247150337219e-07, + "loss": 6.3588, + "step": 33100 + }, + { + "epoch": 1.4265408967566868, + "learning_rate": 9.201986683617401e-07, + "loss": 6.0301, + "step": 33120 + }, + { + "epoch": 1.4274023344962743, + "learning_rate": 9.201501863862613e-07, + "loss": 6.2546, + "step": 33140 + }, + { + "epoch": 1.4282637722358618, + "learning_rate": 9.201017044107823e-07, + "loss": 6.5632, + "step": 33160 + }, + { + "epoch": 1.429125209975449, + "learning_rate": 9.200532224353035e-07, + "loss": 6.2113, + "step": 33180 + }, + { + "epoch": 1.4299866477150365, + "learning_rate": 9.200047404598246e-07, + "loss": 6.4124, + "step": 33200 + }, + { + "epoch": 1.4308480854546237, + "learning_rate": 9.199562584843456e-07, + "loss": 6.4584, + "step": 33220 + }, + { + "epoch": 1.4317095231942112, + "learning_rate": 9.199077765088667e-07, + "loss": 6.3494, + "step": 33240 + }, + { + "epoch": 1.4325709609337984, + "learning_rate": 9.198592945333879e-07, + "loss": 6.2183, + "step": 33260 + }, + { + "epoch": 1.433432398673386, + "learning_rate": 9.19810812557909e-07, + "loss": 6.5717, + "step": 33280 + }, + { + "epoch": 1.4342938364129734, + "learning_rate": 9.197623305824301e-07, + "loss": 6.2162, + "step": 33300 + }, + { + "epoch": 1.4351552741525606, + "learning_rate": 9.197138486069512e-07, + "loss": 6.4291, + "step": 33320 + }, + { + "epoch": 1.4360167118921479, + "learning_rate": 9.196653666314724e-07, + "loss": 6.6195, + "step": 33340 + }, + { + "epoch": 1.4368781496317353, + "learning_rate": 9.196168846559933e-07, + "loss": 6.5921, + "step": 33360 + }, + { + "epoch": 1.4377395873713228, + "learning_rate": 9.195684026805145e-07, + "loss": 6.6409, + "step": 33380 + }, + { + "epoch": 1.43860102511091, + "learning_rate": 9.195199207050356e-07, + "loss": 5.9695, + "step": 33400 + }, + { + "epoch": 1.4394624628504975, + "learning_rate": 9.194714387295568e-07, + "loss": 6.5148, + "step": 33420 + }, + { + "epoch": 1.440323900590085, + "learning_rate": 9.194229567540778e-07, + "loss": 6.447, + "step": 33440 + }, + { + "epoch": 1.4411853383296722, + "learning_rate": 9.19374474778599e-07, + "loss": 6.2085, + "step": 33460 + }, + { + "epoch": 1.4420467760692595, + "learning_rate": 9.1932599280312e-07, + "loss": 5.4657, + "step": 33480 + }, + { + "epoch": 1.442908213808847, + "learning_rate": 9.192775108276412e-07, + "loss": 4.7962, + "step": 33500 + }, + { + "epoch": 1.4437696515484344, + "learning_rate": 9.192290288521622e-07, + "loss": 4.5683, + "step": 33520 + }, + { + "epoch": 1.4446310892880216, + "learning_rate": 9.191805468766834e-07, + "loss": 4.612, + "step": 33540 + }, + { + "epoch": 1.445492527027609, + "learning_rate": 9.191320649012044e-07, + "loss": 4.7549, + "step": 33560 + }, + { + "epoch": 1.4463539647671966, + "learning_rate": 9.190835829257256e-07, + "loss": 4.7052, + "step": 33580 + }, + { + "epoch": 1.4472154025067838, + "learning_rate": 9.190351009502466e-07, + "loss": 4.5388, + "step": 33600 + }, + { + "epoch": 1.448076840246371, + "learning_rate": 9.189866189747678e-07, + "loss": 4.3169, + "step": 33620 + }, + { + "epoch": 1.4489382779859585, + "learning_rate": 9.189381369992889e-07, + "loss": 4.8239, + "step": 33640 + }, + { + "epoch": 1.449799715725546, + "learning_rate": 9.1888965502381e-07, + "loss": 4.8199, + "step": 33660 + }, + { + "epoch": 1.4506611534651332, + "learning_rate": 9.188411730483311e-07, + "loss": 4.7415, + "step": 33680 + }, + { + "epoch": 1.4515225912047207, + "learning_rate": 9.187926910728522e-07, + "loss": 4.4792, + "step": 33700 + }, + { + "epoch": 1.4523840289443082, + "learning_rate": 9.187442090973734e-07, + "loss": 4.7136, + "step": 33720 + }, + { + "epoch": 1.4532454666838954, + "learning_rate": 9.186957271218943e-07, + "loss": 4.6015, + "step": 33740 + }, + { + "epoch": 1.4541069044234827, + "learning_rate": 9.186472451464155e-07, + "loss": 4.6091, + "step": 33760 + }, + { + "epoch": 1.4549683421630701, + "learning_rate": 9.185987631709366e-07, + "loss": 4.4059, + "step": 33780 + }, + { + "epoch": 1.4558297799026576, + "learning_rate": 9.185502811954578e-07, + "loss": 4.7998, + "step": 33800 + }, + { + "epoch": 1.4566912176422449, + "learning_rate": 9.185017992199788e-07, + "loss": 4.6805, + "step": 33820 + }, + { + "epoch": 1.4575526553818323, + "learning_rate": 9.184533172445e-07, + "loss": 4.7347, + "step": 33840 + }, + { + "epoch": 1.4584140931214198, + "learning_rate": 9.18404835269021e-07, + "loss": 4.4513, + "step": 33860 + }, + { + "epoch": 1.459275530861007, + "learning_rate": 9.183563532935422e-07, + "loss": 4.8156, + "step": 33880 + }, + { + "epoch": 1.4601369686005943, + "learning_rate": 9.183078713180632e-07, + "loss": 4.6449, + "step": 33900 + }, + { + "epoch": 1.4609984063401817, + "learning_rate": 9.182593893425844e-07, + "loss": 4.5142, + "step": 33920 + }, + { + "epoch": 1.4618598440797692, + "learning_rate": 9.182109073671055e-07, + "loss": 4.6287, + "step": 33940 + }, + { + "epoch": 1.4627212818193565, + "learning_rate": 9.181624253916267e-07, + "loss": 4.5074, + "step": 33960 + }, + { + "epoch": 1.463582719558944, + "learning_rate": 9.181139434161476e-07, + "loss": 4.6228, + "step": 33980 + }, + { + "epoch": 1.4644441572985312, + "learning_rate": 9.180654614406688e-07, + "loss": 4.5035, + "step": 34000 + }, + { + "epoch": 1.4653055950381186, + "learning_rate": 9.180169794651899e-07, + "loss": 4.4645, + "step": 34020 + }, + { + "epoch": 1.4661670327777059, + "learning_rate": 9.17968497489711e-07, + "loss": 4.8142, + "step": 34040 + }, + { + "epoch": 1.4670284705172933, + "learning_rate": 9.179200155142321e-07, + "loss": 4.8715, + "step": 34060 + }, + { + "epoch": 1.4678899082568808, + "learning_rate": 9.178715335387534e-07, + "loss": 4.5135, + "step": 34080 + }, + { + "epoch": 1.468751345996468, + "learning_rate": 9.178230515632744e-07, + "loss": 4.5732, + "step": 34100 + }, + { + "epoch": 1.4696127837360555, + "learning_rate": 9.177745695877953e-07, + "loss": 4.7193, + "step": 34120 + }, + { + "epoch": 1.4704742214756428, + "learning_rate": 9.177260876123165e-07, + "loss": 4.6281, + "step": 34140 + }, + { + "epoch": 1.4713356592152302, + "learning_rate": 9.176776056368377e-07, + "loss": 4.5386, + "step": 34160 + }, + { + "epoch": 1.4721970969548175, + "learning_rate": 9.176291236613588e-07, + "loss": 4.58, + "step": 34180 + }, + { + "epoch": 1.473058534694405, + "learning_rate": 9.175806416858798e-07, + "loss": 4.5368, + "step": 34200 + }, + { + "epoch": 1.4739199724339924, + "learning_rate": 9.17532159710401e-07, + "loss": 4.3916, + "step": 34220 + }, + { + "epoch": 1.4747814101735797, + "learning_rate": 9.17483677734922e-07, + "loss": 4.5975, + "step": 34240 + }, + { + "epoch": 1.4756428479131671, + "learning_rate": 9.174351957594432e-07, + "loss": 4.563, + "step": 34260 + }, + { + "epoch": 1.4765042856527544, + "learning_rate": 9.173867137839642e-07, + "loss": 4.615, + "step": 34280 + }, + { + "epoch": 1.4773657233923418, + "learning_rate": 9.173382318084854e-07, + "loss": 4.3919, + "step": 34300 + }, + { + "epoch": 1.478227161131929, + "learning_rate": 9.172897498330065e-07, + "loss": 4.7193, + "step": 34320 + }, + { + "epoch": 1.4790885988715166, + "learning_rate": 9.172412678575276e-07, + "loss": 4.4685, + "step": 34340 + }, + { + "epoch": 1.479950036611104, + "learning_rate": 9.171927858820487e-07, + "loss": 4.6064, + "step": 34360 + }, + { + "epoch": 1.4808114743506913, + "learning_rate": 9.171443039065698e-07, + "loss": 4.5293, + "step": 34380 + }, + { + "epoch": 1.4816729120902787, + "learning_rate": 9.17095821931091e-07, + "loss": 4.4513, + "step": 34400 + }, + { + "epoch": 1.482534349829866, + "learning_rate": 9.17047339955612e-07, + "loss": 4.7872, + "step": 34420 + }, + { + "epoch": 1.4833957875694535, + "learning_rate": 9.169988579801331e-07, + "loss": 4.591, + "step": 34440 + }, + { + "epoch": 1.4842572253090407, + "learning_rate": 9.169503760046543e-07, + "loss": 4.468, + "step": 34460 + }, + { + "epoch": 1.4851186630486282, + "learning_rate": 9.169018940291754e-07, + "loss": 4.569, + "step": 34480 + }, + { + "epoch": 1.4859801007882156, + "learning_rate": 9.168534120536964e-07, + "loss": 4.429, + "step": 34500 + }, + { + "epoch": 1.4868415385278029, + "learning_rate": 9.168049300782175e-07, + "loss": 4.5698, + "step": 34520 + }, + { + "epoch": 1.4877029762673903, + "learning_rate": 9.167564481027387e-07, + "loss": 4.5016, + "step": 34540 + }, + { + "epoch": 1.4885644140069776, + "learning_rate": 9.167079661272598e-07, + "loss": 4.3315, + "step": 34560 + }, + { + "epoch": 1.489425851746565, + "learning_rate": 9.166594841517809e-07, + "loss": 4.2194, + "step": 34580 + }, + { + "epoch": 1.4902872894861523, + "learning_rate": 9.16611002176302e-07, + "loss": 4.4544, + "step": 34600 + }, + { + "epoch": 1.4911487272257398, + "learning_rate": 9.165625202008231e-07, + "loss": 4.239, + "step": 34620 + }, + { + "epoch": 1.4920101649653272, + "learning_rate": 9.165140382253441e-07, + "loss": 4.4725, + "step": 34640 + }, + { + "epoch": 1.4928716027049145, + "learning_rate": 9.164655562498652e-07, + "loss": 4.5558, + "step": 34660 + }, + { + "epoch": 1.493733040444502, + "learning_rate": 9.164170742743864e-07, + "loss": 4.3513, + "step": 34680 + }, + { + "epoch": 1.4945944781840892, + "learning_rate": 9.163685922989076e-07, + "loss": 4.3847, + "step": 34700 + }, + { + "epoch": 1.4954559159236767, + "learning_rate": 9.163201103234286e-07, + "loss": 4.7456, + "step": 34720 + }, + { + "epoch": 1.496317353663264, + "learning_rate": 9.162716283479497e-07, + "loss": 4.7135, + "step": 34740 + }, + { + "epoch": 1.4971787914028514, + "learning_rate": 9.162231463724708e-07, + "loss": 4.7201, + "step": 34760 + }, + { + "epoch": 1.4980402291424388, + "learning_rate": 9.161746643969919e-07, + "loss": 4.3865, + "step": 34780 + }, + { + "epoch": 1.498901666882026, + "learning_rate": 9.16126182421513e-07, + "loss": 4.7127, + "step": 34800 + }, + { + "epoch": 1.4997631046216136, + "learning_rate": 9.16077700446034e-07, + "loss": 4.4754, + "step": 34820 + }, + { + "epoch": 1.5006245423612008, + "learning_rate": 9.160292184705553e-07, + "loss": 4.6578, + "step": 34840 + }, + { + "epoch": 1.5014859801007883, + "learning_rate": 9.159807364950764e-07, + "loss": 4.6887, + "step": 34860 + }, + { + "epoch": 1.5023474178403755, + "learning_rate": 9.159322545195974e-07, + "loss": 4.4038, + "step": 34880 + }, + { + "epoch": 1.503208855579963, + "learning_rate": 9.158837725441185e-07, + "loss": 4.8459, + "step": 34900 + }, + { + "epoch": 1.5040702933195504, + "learning_rate": 9.158352905686398e-07, + "loss": 4.4111, + "step": 34920 + }, + { + "epoch": 1.5049317310591377, + "learning_rate": 9.157868085931607e-07, + "loss": 4.7101, + "step": 34940 + }, + { + "epoch": 1.505793168798725, + "learning_rate": 9.157383266176819e-07, + "loss": 4.6699, + "step": 34960 + }, + { + "epoch": 1.5066546065383124, + "learning_rate": 9.15689844642203e-07, + "loss": 4.5889, + "step": 34980 + }, + { + "epoch": 1.5075160442778999, + "learning_rate": 9.156413626667241e-07, + "loss": 4.6535, + "step": 35000 + }, + { + "epoch": 1.5083774820174871, + "learning_rate": 9.155928806912451e-07, + "loss": 4.5575, + "step": 35020 + }, + { + "epoch": 1.5092389197570746, + "learning_rate": 9.155443987157663e-07, + "loss": 4.5002, + "step": 35040 + }, + { + "epoch": 1.510100357496662, + "learning_rate": 9.154959167402874e-07, + "loss": 4.5789, + "step": 35060 + }, + { + "epoch": 1.5109617952362493, + "learning_rate": 9.154474347648086e-07, + "loss": 4.8286, + "step": 35080 + }, + { + "epoch": 1.5118232329758365, + "learning_rate": 9.153989527893296e-07, + "loss": 4.4564, + "step": 35100 + }, + { + "epoch": 1.512684670715424, + "learning_rate": 9.153504708138508e-07, + "loss": 4.2606, + "step": 35120 + }, + { + "epoch": 1.5135461084550115, + "learning_rate": 9.153019888383718e-07, + "loss": 4.5779, + "step": 35140 + }, + { + "epoch": 1.5144075461945987, + "learning_rate": 9.15253506862893e-07, + "loss": 4.296, + "step": 35160 + }, + { + "epoch": 1.5152689839341862, + "learning_rate": 9.15205024887414e-07, + "loss": 4.5038, + "step": 35180 + }, + { + "epoch": 1.5161304216737737, + "learning_rate": 9.151565429119351e-07, + "loss": 4.6752, + "step": 35200 + }, + { + "epoch": 1.516991859413361, + "learning_rate": 9.151080609364563e-07, + "loss": 4.3909, + "step": 35220 + }, + { + "epoch": 1.5178532971529481, + "learning_rate": 9.150595789609775e-07, + "loss": 4.6122, + "step": 35240 + }, + { + "epoch": 1.5187147348925356, + "learning_rate": 9.150110969854984e-07, + "loss": 4.4034, + "step": 35260 + }, + { + "epoch": 1.519576172632123, + "learning_rate": 9.149626150100195e-07, + "loss": 4.5078, + "step": 35280 + }, + { + "epoch": 1.5204376103717103, + "learning_rate": 9.149141330345407e-07, + "loss": 4.6073, + "step": 35300 + }, + { + "epoch": 1.5212990481112978, + "learning_rate": 9.148656510590617e-07, + "loss": 4.4251, + "step": 35320 + }, + { + "epoch": 1.5221604858508853, + "learning_rate": 9.148171690835828e-07, + "loss": 4.6143, + "step": 35340 + }, + { + "epoch": 1.5230219235904725, + "learning_rate": 9.14768687108104e-07, + "loss": 4.7287, + "step": 35360 + }, + { + "epoch": 1.5238833613300597, + "learning_rate": 9.147202051326251e-07, + "loss": 4.7114, + "step": 35380 + }, + { + "epoch": 1.5247447990696472, + "learning_rate": 9.146717231571461e-07, + "loss": 4.3877, + "step": 35400 + }, + { + "epoch": 1.5256062368092347, + "learning_rate": 9.146232411816673e-07, + "loss": 4.466, + "step": 35420 + }, + { + "epoch": 1.526467674548822, + "learning_rate": 9.145747592061884e-07, + "loss": 4.5891, + "step": 35440 + }, + { + "epoch": 1.5273291122884094, + "learning_rate": 9.145262772307096e-07, + "loss": 4.7254, + "step": 35460 + }, + { + "epoch": 1.5281905500279969, + "learning_rate": 9.144777952552306e-07, + "loss": 4.6304, + "step": 35480 + }, + { + "epoch": 1.529051987767584, + "learning_rate": 9.144293132797518e-07, + "loss": 4.5517, + "step": 35500 + }, + { + "epoch": 1.5299134255071714, + "learning_rate": 9.143808313042728e-07, + "loss": 4.5951, + "step": 35520 + }, + { + "epoch": 1.5307748632467588, + "learning_rate": 9.14332349328794e-07, + "loss": 4.6614, + "step": 35540 + }, + { + "epoch": 1.5316363009863463, + "learning_rate": 9.14283867353315e-07, + "loss": 4.352, + "step": 35560 + }, + { + "epoch": 1.5324977387259335, + "learning_rate": 9.142353853778362e-07, + "loss": 4.2372, + "step": 35580 + }, + { + "epoch": 1.533359176465521, + "learning_rate": 9.141869034023573e-07, + "loss": 4.5929, + "step": 35600 + }, + { + "epoch": 1.5342206142051085, + "learning_rate": 9.141384214268784e-07, + "loss": 4.3707, + "step": 35620 + }, + { + "epoch": 1.5350820519446957, + "learning_rate": 9.140899394513994e-07, + "loss": 4.4415, + "step": 35640 + }, + { + "epoch": 1.535943489684283, + "learning_rate": 9.140414574759206e-07, + "loss": 4.385, + "step": 35660 + }, + { + "epoch": 1.5368049274238704, + "learning_rate": 9.139929755004417e-07, + "loss": 4.3335, + "step": 35680 + }, + { + "epoch": 1.537666365163458, + "learning_rate": 9.139444935249628e-07, + "loss": 4.3402, + "step": 35700 + }, + { + "epoch": 1.5385278029030451, + "learning_rate": 9.138960115494839e-07, + "loss": 4.6116, + "step": 35720 + }, + { + "epoch": 1.5393892406426326, + "learning_rate": 9.13847529574005e-07, + "loss": 4.6895, + "step": 35740 + }, + { + "epoch": 1.54025067838222, + "learning_rate": 9.137990475985261e-07, + "loss": 4.5757, + "step": 35760 + }, + { + "epoch": 1.5411121161218073, + "learning_rate": 9.137505656230472e-07, + "loss": 4.5924, + "step": 35780 + }, + { + "epoch": 1.5419735538613946, + "learning_rate": 9.137020836475683e-07, + "loss": 4.5913, + "step": 35800 + }, + { + "epoch": 1.542834991600982, + "learning_rate": 9.136536016720894e-07, + "loss": 4.5572, + "step": 35820 + }, + { + "epoch": 1.5436964293405695, + "learning_rate": 9.136051196966106e-07, + "loss": 4.6149, + "step": 35840 + }, + { + "epoch": 1.5445578670801567, + "learning_rate": 9.135566377211316e-07, + "loss": 4.5057, + "step": 35860 + }, + { + "epoch": 1.545419304819744, + "learning_rate": 9.135081557456528e-07, + "loss": 4.5371, + "step": 35880 + }, + { + "epoch": 1.5462807425593317, + "learning_rate": 9.134596737701738e-07, + "loss": 4.4889, + "step": 35900 + }, + { + "epoch": 1.547142180298919, + "learning_rate": 9.134111917946949e-07, + "loss": 4.6459, + "step": 35920 + }, + { + "epoch": 1.5480036180385062, + "learning_rate": 9.13362709819216e-07, + "loss": 4.2167, + "step": 35940 + }, + { + "epoch": 1.5488650557780936, + "learning_rate": 9.133142278437372e-07, + "loss": 4.312, + "step": 35960 + }, + { + "epoch": 1.549726493517681, + "learning_rate": 9.132657458682583e-07, + "loss": 4.2124, + "step": 35980 + }, + { + "epoch": 1.5505879312572683, + "learning_rate": 9.132172638927794e-07, + "loss": 4.6083, + "step": 36000 + }, + { + "epoch": 1.5514493689968556, + "learning_rate": 9.131687819173004e-07, + "loss": 4.5389, + "step": 36020 + }, + { + "epoch": 1.5523108067364433, + "learning_rate": 9.131202999418216e-07, + "loss": 4.2441, + "step": 36040 + }, + { + "epoch": 1.5531722444760305, + "learning_rate": 9.130718179663427e-07, + "loss": 4.3556, + "step": 36060 + }, + { + "epoch": 1.5540336822156178, + "learning_rate": 9.130233359908638e-07, + "loss": 4.3249, + "step": 36080 + }, + { + "epoch": 1.5548951199552052, + "learning_rate": 9.129748540153849e-07, + "loss": 4.3299, + "step": 36100 + }, + { + "epoch": 1.5557565576947927, + "learning_rate": 9.129263720399061e-07, + "loss": 4.6517, + "step": 36120 + }, + { + "epoch": 1.55661799543438, + "learning_rate": 9.128778900644272e-07, + "loss": 4.3534, + "step": 36140 + }, + { + "epoch": 1.5574794331739672, + "learning_rate": 9.128294080889482e-07, + "loss": 4.7409, + "step": 36160 + }, + { + "epoch": 1.5583408709135549, + "learning_rate": 9.127809261134694e-07, + "loss": 4.516, + "step": 36180 + }, + { + "epoch": 1.5592023086531421, + "learning_rate": 9.127324441379905e-07, + "loss": 4.1291, + "step": 36200 + }, + { + "epoch": 1.5600637463927294, + "learning_rate": 9.126839621625115e-07, + "loss": 4.5695, + "step": 36220 + }, + { + "epoch": 1.5609251841323168, + "learning_rate": 9.126354801870327e-07, + "loss": 4.2779, + "step": 36240 + }, + { + "epoch": 1.5617866218719043, + "learning_rate": 9.125869982115538e-07, + "loss": 4.4595, + "step": 36260 + }, + { + "epoch": 1.5626480596114916, + "learning_rate": 9.125385162360748e-07, + "loss": 4.5646, + "step": 36280 + }, + { + "epoch": 1.5635094973510788, + "learning_rate": 9.124900342605959e-07, + "loss": 4.3038, + "step": 36300 + }, + { + "epoch": 1.5643709350906663, + "learning_rate": 9.124415522851171e-07, + "loss": 4.6117, + "step": 36320 + }, + { + "epoch": 1.5652323728302537, + "learning_rate": 9.123930703096382e-07, + "loss": 4.528, + "step": 36340 + }, + { + "epoch": 1.566093810569841, + "learning_rate": 9.123445883341593e-07, + "loss": 4.4912, + "step": 36360 + }, + { + "epoch": 1.5669552483094285, + "learning_rate": 9.122961063586804e-07, + "loss": 4.65, + "step": 36380 + }, + { + "epoch": 1.567816686049016, + "learning_rate": 9.122476243832014e-07, + "loss": 4.3715, + "step": 36400 + }, + { + "epoch": 1.5686781237886032, + "learning_rate": 9.121991424077226e-07, + "loss": 4.4528, + "step": 36420 + }, + { + "epoch": 1.5695395615281904, + "learning_rate": 9.121506604322436e-07, + "loss": 4.4725, + "step": 36440 + }, + { + "epoch": 1.5704009992677779, + "learning_rate": 9.121021784567648e-07, + "loss": 4.5761, + "step": 36460 + }, + { + "epoch": 1.5712624370073653, + "learning_rate": 9.120536964812859e-07, + "loss": 4.6145, + "step": 36480 + }, + { + "epoch": 1.5721238747469526, + "learning_rate": 9.120052145058071e-07, + "loss": 4.4612, + "step": 36500 + }, + { + "epoch": 1.57298531248654, + "learning_rate": 9.119567325303281e-07, + "loss": 4.4888, + "step": 36520 + }, + { + "epoch": 1.5738467502261275, + "learning_rate": 9.119082505548492e-07, + "loss": 4.4778, + "step": 36540 + }, + { + "epoch": 1.5747081879657148, + "learning_rate": 9.118597685793703e-07, + "loss": 4.4639, + "step": 36560 + }, + { + "epoch": 1.575569625705302, + "learning_rate": 9.118112866038915e-07, + "loss": 4.4546, + "step": 36580 + }, + { + "epoch": 1.5764310634448895, + "learning_rate": 9.117628046284124e-07, + "loss": 4.6701, + "step": 36600 + }, + { + "epoch": 1.577292501184477, + "learning_rate": 9.117143226529337e-07, + "loss": 4.3391, + "step": 36620 + }, + { + "epoch": 1.5781539389240642, + "learning_rate": 9.116658406774548e-07, + "loss": 4.3337, + "step": 36640 + }, + { + "epoch": 1.5790153766636517, + "learning_rate": 9.116173587019759e-07, + "loss": 4.3595, + "step": 36660 + }, + { + "epoch": 1.5798768144032391, + "learning_rate": 9.115688767264969e-07, + "loss": 4.29, + "step": 36680 + }, + { + "epoch": 1.5807382521428264, + "learning_rate": 9.115203947510182e-07, + "loss": 4.4379, + "step": 36700 + }, + { + "epoch": 1.5815996898824136, + "learning_rate": 9.114719127755392e-07, + "loss": 4.5454, + "step": 36720 + }, + { + "epoch": 1.582461127622001, + "learning_rate": 9.114234308000604e-07, + "loss": 4.5105, + "step": 36740 + }, + { + "epoch": 1.5833225653615886, + "learning_rate": 9.113749488245814e-07, + "loss": 4.5245, + "step": 36760 + }, + { + "epoch": 1.5841840031011758, + "learning_rate": 9.113264668491025e-07, + "loss": 4.3384, + "step": 36780 + }, + { + "epoch": 1.5850454408407633, + "learning_rate": 9.112779848736236e-07, + "loss": 4.6087, + "step": 36800 + }, + { + "epoch": 1.5859068785803507, + "learning_rate": 9.112295028981446e-07, + "loss": 4.4014, + "step": 36820 + }, + { + "epoch": 1.586768316319938, + "learning_rate": 9.111810209226658e-07, + "loss": 4.4017, + "step": 36840 + }, + { + "epoch": 1.5876297540595252, + "learning_rate": 9.11132538947187e-07, + "loss": 4.4921, + "step": 36860 + }, + { + "epoch": 1.5884911917991127, + "learning_rate": 9.110840569717081e-07, + "loss": 4.3694, + "step": 36880 + }, + { + "epoch": 1.5893526295387002, + "learning_rate": 9.110355749962291e-07, + "loss": 4.6063, + "step": 36900 + }, + { + "epoch": 1.5902140672782874, + "learning_rate": 9.109870930207502e-07, + "loss": 4.4014, + "step": 36920 + }, + { + "epoch": 1.5910755050178749, + "learning_rate": 9.109386110452713e-07, + "loss": 4.6915, + "step": 36940 + }, + { + "epoch": 1.5919369427574623, + "learning_rate": 9.108901290697925e-07, + "loss": 4.4906, + "step": 36960 + }, + { + "epoch": 1.5927983804970496, + "learning_rate": 9.108416470943135e-07, + "loss": 4.5105, + "step": 36980 + }, + { + "epoch": 1.5936598182366368, + "learning_rate": 9.107931651188347e-07, + "loss": 4.4614, + "step": 37000 + }, + { + "epoch": 1.5945212559762243, + "learning_rate": 9.107446831433558e-07, + "loss": 4.298, + "step": 37020 + }, + { + "epoch": 1.5953826937158118, + "learning_rate": 9.106962011678769e-07, + "loss": 4.363, + "step": 37040 + }, + { + "epoch": 1.596244131455399, + "learning_rate": 9.106477191923979e-07, + "loss": 4.2414, + "step": 37060 + }, + { + "epoch": 1.5971055691949865, + "learning_rate": 9.105992372169191e-07, + "loss": 4.5554, + "step": 37080 + }, + { + "epoch": 1.597967006934574, + "learning_rate": 9.105507552414402e-07, + "loss": 4.3394, + "step": 37100 + }, + { + "epoch": 1.5988284446741612, + "learning_rate": 9.105022732659612e-07, + "loss": 4.4551, + "step": 37120 + }, + { + "epoch": 1.5996898824137484, + "learning_rate": 9.104537912904824e-07, + "loss": 4.643, + "step": 37140 + }, + { + "epoch": 1.600551320153336, + "learning_rate": 9.104053093150035e-07, + "loss": 4.2845, + "step": 37160 + }, + { + "epoch": 1.6014127578929234, + "learning_rate": 9.103568273395246e-07, + "loss": 4.526, + "step": 37180 + }, + { + "epoch": 1.6022741956325106, + "learning_rate": 9.103083453640457e-07, + "loss": 4.5398, + "step": 37200 + }, + { + "epoch": 1.603135633372098, + "learning_rate": 9.102598633885668e-07, + "loss": 4.3236, + "step": 37220 + }, + { + "epoch": 1.6039970711116855, + "learning_rate": 9.10211381413088e-07, + "loss": 4.4828, + "step": 37240 + }, + { + "epoch": 1.6048585088512728, + "learning_rate": 9.101628994376091e-07, + "loss": 4.5626, + "step": 37260 + }, + { + "epoch": 1.60571994659086, + "learning_rate": 9.101144174621302e-07, + "loss": 4.5117, + "step": 37280 + }, + { + "epoch": 1.6065813843304475, + "learning_rate": 9.100659354866512e-07, + "loss": 4.4454, + "step": 37300 + }, + { + "epoch": 1.607442822070035, + "learning_rate": 9.100174535111724e-07, + "loss": 4.3637, + "step": 37320 + }, + { + "epoch": 1.6083042598096222, + "learning_rate": 9.099689715356935e-07, + "loss": 4.437, + "step": 37340 + }, + { + "epoch": 1.6091656975492097, + "learning_rate": 9.099204895602145e-07, + "loss": 4.1728, + "step": 37360 + }, + { + "epoch": 1.6100271352887972, + "learning_rate": 9.098720075847357e-07, + "loss": 4.491, + "step": 37380 + }, + { + "epoch": 1.6108885730283844, + "learning_rate": 9.098235256092569e-07, + "loss": 4.2619, + "step": 37400 + }, + { + "epoch": 1.6117500107679716, + "learning_rate": 9.097750436337778e-07, + "loss": 4.4321, + "step": 37420 + }, + { + "epoch": 1.612611448507559, + "learning_rate": 9.097265616582988e-07, + "loss": 4.3316, + "step": 37440 + }, + { + "epoch": 1.6134728862471466, + "learning_rate": 9.096780796828201e-07, + "loss": 4.3961, + "step": 37460 + }, + { + "epoch": 1.6143343239867338, + "learning_rate": 9.096295977073412e-07, + "loss": 4.345, + "step": 37480 + }, + { + "epoch": 1.615195761726321, + "learning_rate": 9.095811157318623e-07, + "loss": 4.168, + "step": 37500 + }, + { + "epoch": 1.6160571994659088, + "learning_rate": 9.095326337563834e-07, + "loss": 4.4326, + "step": 37520 + }, + { + "epoch": 1.616918637205496, + "learning_rate": 9.094841517809045e-07, + "loss": 4.2533, + "step": 37540 + }, + { + "epoch": 1.6177800749450832, + "learning_rate": 9.094356698054256e-07, + "loss": 4.3453, + "step": 37560 + }, + { + "epoch": 1.6186415126846707, + "learning_rate": 9.093871878299467e-07, + "loss": 4.2739, + "step": 37580 + }, + { + "epoch": 1.6195029504242582, + "learning_rate": 9.093387058544678e-07, + "loss": 4.3841, + "step": 37600 + }, + { + "epoch": 1.6203643881638454, + "learning_rate": 9.09290223878989e-07, + "loss": 4.3808, + "step": 37620 + }, + { + "epoch": 1.6212258259034327, + "learning_rate": 9.092417419035101e-07, + "loss": 4.366, + "step": 37640 + }, + { + "epoch": 1.6220872636430204, + "learning_rate": 9.091932599280312e-07, + "loss": 4.3987, + "step": 37660 + }, + { + "epoch": 1.6229487013826076, + "learning_rate": 9.091447779525522e-07, + "loss": 4.6227, + "step": 37680 + }, + { + "epoch": 1.6238101391221949, + "learning_rate": 9.090962959770734e-07, + "loss": 4.1742, + "step": 37700 + }, + { + "epoch": 1.6246715768617823, + "learning_rate": 9.090478140015944e-07, + "loss": 4.361, + "step": 37720 + }, + { + "epoch": 1.6255330146013698, + "learning_rate": 9.089993320261156e-07, + "loss": 4.3502, + "step": 37740 + }, + { + "epoch": 1.626394452340957, + "learning_rate": 9.089508500506367e-07, + "loss": 4.4801, + "step": 37760 + }, + { + "epoch": 1.6272558900805443, + "learning_rate": 9.089023680751579e-07, + "loss": 4.412, + "step": 37780 + }, + { + "epoch": 1.628117327820132, + "learning_rate": 9.088538860996788e-07, + "loss": 4.3157, + "step": 37800 + }, + { + "epoch": 1.6289787655597192, + "learning_rate": 9.088054041242e-07, + "loss": 4.2927, + "step": 37820 + }, + { + "epoch": 1.6298402032993065, + "learning_rate": 9.087569221487211e-07, + "loss": 4.4417, + "step": 37840 + }, + { + "epoch": 1.630701641038894, + "learning_rate": 9.087084401732423e-07, + "loss": 4.4633, + "step": 37860 + }, + { + "epoch": 1.6315630787784814, + "learning_rate": 9.086599581977633e-07, + "loss": 4.619, + "step": 37880 + }, + { + "epoch": 1.6324245165180686, + "learning_rate": 9.086114762222844e-07, + "loss": 4.429, + "step": 37900 + }, + { + "epoch": 1.6332859542576559, + "learning_rate": 9.085629942468056e-07, + "loss": 4.3935, + "step": 37920 + }, + { + "epoch": 1.6341473919972433, + "learning_rate": 9.085145122713267e-07, + "loss": 4.5075, + "step": 37940 + }, + { + "epoch": 1.6350088297368308, + "learning_rate": 9.084660302958478e-07, + "loss": 4.2795, + "step": 37960 + }, + { + "epoch": 1.635870267476418, + "learning_rate": 9.084175483203688e-07, + "loss": 4.4882, + "step": 37980 + }, + { + "epoch": 1.6367317052160055, + "learning_rate": 9.0836906634489e-07, + "loss": 4.3903, + "step": 38000 + }, + { + "epoch": 1.637593142955593, + "learning_rate": 9.08320584369411e-07, + "loss": 4.4282, + "step": 38020 + }, + { + "epoch": 1.6384545806951802, + "learning_rate": 9.082721023939322e-07, + "loss": 4.4172, + "step": 38040 + }, + { + "epoch": 1.6393160184347675, + "learning_rate": 9.082236204184532e-07, + "loss": 4.405, + "step": 38060 + }, + { + "epoch": 1.640177456174355, + "learning_rate": 9.081751384429744e-07, + "loss": 4.3862, + "step": 38080 + }, + { + "epoch": 1.6410388939139424, + "learning_rate": 9.081266564674954e-07, + "loss": 4.2601, + "step": 38100 + }, + { + "epoch": 1.6419003316535297, + "learning_rate": 9.080781744920166e-07, + "loss": 4.3856, + "step": 38120 + }, + { + "epoch": 1.6427617693931171, + "learning_rate": 9.080296925165377e-07, + "loss": 4.4117, + "step": 38140 + }, + { + "epoch": 1.6436232071327046, + "learning_rate": 9.079812105410589e-07, + "loss": 4.3225, + "step": 38160 + }, + { + "epoch": 1.6444846448722918, + "learning_rate": 9.079327285655798e-07, + "loss": 4.5712, + "step": 38180 + }, + { + "epoch": 1.645346082611879, + "learning_rate": 9.07884246590101e-07, + "loss": 4.7472, + "step": 38200 + }, + { + "epoch": 1.6462075203514666, + "learning_rate": 9.078357646146221e-07, + "loss": 4.463, + "step": 38220 + }, + { + "epoch": 1.647068958091054, + "learning_rate": 9.077872826391433e-07, + "loss": 4.3645, + "step": 38240 + }, + { + "epoch": 1.6479303958306413, + "learning_rate": 9.077388006636643e-07, + "loss": 4.1832, + "step": 38260 + }, + { + "epoch": 1.6487918335702287, + "learning_rate": 9.076903186881855e-07, + "loss": 4.2881, + "step": 38280 + }, + { + "epoch": 1.6496532713098162, + "learning_rate": 9.076418367127066e-07, + "loss": 4.5145, + "step": 38300 + }, + { + "epoch": 1.6505147090494035, + "learning_rate": 9.075933547372277e-07, + "loss": 4.46, + "step": 38320 + }, + { + "epoch": 1.6513761467889907, + "learning_rate": 9.075448727617487e-07, + "loss": 4.4185, + "step": 38340 + }, + { + "epoch": 1.6522375845285782, + "learning_rate": 9.074963907862699e-07, + "loss": 4.4524, + "step": 38360 + }, + { + "epoch": 1.6530990222681656, + "learning_rate": 9.074479088107909e-07, + "loss": 4.54, + "step": 38380 + }, + { + "epoch": 1.6539604600077529, + "learning_rate": 9.073994268353122e-07, + "loss": 4.2419, + "step": 38400 + }, + { + "epoch": 1.6548218977473403, + "learning_rate": 9.073509448598332e-07, + "loss": 4.4214, + "step": 38420 + }, + { + "epoch": 1.6556833354869278, + "learning_rate": 9.073024628843542e-07, + "loss": 4.5933, + "step": 38440 + }, + { + "epoch": 1.656544773226515, + "learning_rate": 9.072539809088754e-07, + "loss": 4.4869, + "step": 38460 + }, + { + "epoch": 1.6574062109661023, + "learning_rate": 9.072054989333966e-07, + "loss": 4.3976, + "step": 38480 + }, + { + "epoch": 1.6582676487056898, + "learning_rate": 9.071570169579176e-07, + "loss": 4.3852, + "step": 38500 + }, + { + "epoch": 1.6591290864452772, + "learning_rate": 9.071085349824387e-07, + "loss": 4.641, + "step": 38520 + }, + { + "epoch": 1.6599905241848645, + "learning_rate": 9.070600530069599e-07, + "loss": 4.4362, + "step": 38540 + }, + { + "epoch": 1.660851961924452, + "learning_rate": 9.070115710314809e-07, + "loss": 4.6085, + "step": 38560 + }, + { + "epoch": 1.6617133996640394, + "learning_rate": 9.06963089056002e-07, + "loss": 4.3794, + "step": 38580 + }, + { + "epoch": 1.6625748374036267, + "learning_rate": 9.069146070805231e-07, + "loss": 4.4347, + "step": 38600 + }, + { + "epoch": 1.663436275143214, + "learning_rate": 9.068661251050443e-07, + "loss": 4.5453, + "step": 38620 + }, + { + "epoch": 1.6642977128828014, + "learning_rate": 9.068176431295653e-07, + "loss": 4.2614, + "step": 38640 + }, + { + "epoch": 1.6651591506223888, + "learning_rate": 9.067691611540865e-07, + "loss": 4.4148, + "step": 38660 + }, + { + "epoch": 1.666020588361976, + "learning_rate": 9.067206791786076e-07, + "loss": 4.2945, + "step": 38680 + }, + { + "epoch": 1.6668820261015636, + "learning_rate": 9.066721972031286e-07, + "loss": 4.3848, + "step": 38700 + }, + { + "epoch": 1.667743463841151, + "learning_rate": 9.066237152276497e-07, + "loss": 4.2856, + "step": 38720 + }, + { + "epoch": 1.6686049015807383, + "learning_rate": 9.065752332521709e-07, + "loss": 4.4474, + "step": 38740 + }, + { + "epoch": 1.6694663393203255, + "learning_rate": 9.06526751276692e-07, + "loss": 4.3869, + "step": 38760 + }, + { + "epoch": 1.670327777059913, + "learning_rate": 9.064782693012131e-07, + "loss": 4.5912, + "step": 38780 + }, + { + "epoch": 1.6711892147995004, + "learning_rate": 9.064297873257342e-07, + "loss": 4.7353, + "step": 38800 + }, + { + "epoch": 1.6720506525390877, + "learning_rate": 9.063813053502553e-07, + "loss": 4.3667, + "step": 38820 + }, + { + "epoch": 1.6729120902786752, + "learning_rate": 9.063328233747764e-07, + "loss": 4.4119, + "step": 38840 + }, + { + "epoch": 1.6737735280182626, + "learning_rate": 9.062843413992975e-07, + "loss": 4.4805, + "step": 38860 + }, + { + "epoch": 1.6746349657578499, + "learning_rate": 9.062358594238186e-07, + "loss": 4.3972, + "step": 38880 + }, + { + "epoch": 1.6754964034974371, + "learning_rate": 9.061873774483397e-07, + "loss": 4.5487, + "step": 38900 + }, + { + "epoch": 1.6763578412370246, + "learning_rate": 9.061388954728609e-07, + "loss": 4.4102, + "step": 38920 + }, + { + "epoch": 1.677219278976612, + "learning_rate": 9.060904134973819e-07, + "loss": 4.1832, + "step": 38940 + }, + { + "epoch": 1.6780807167161993, + "learning_rate": 9.06041931521903e-07, + "loss": 4.5223, + "step": 38960 + }, + { + "epoch": 1.6789421544557868, + "learning_rate": 9.059934495464241e-07, + "loss": 4.8365, + "step": 38980 + }, + { + "epoch": 1.6798035921953742, + "learning_rate": 9.059449675709452e-07, + "loss": 4.2166, + "step": 39000 + }, + { + "epoch": 1.6806650299349615, + "learning_rate": 9.058964855954664e-07, + "loss": 4.1617, + "step": 39020 + }, + { + "epoch": 1.6815264676745487, + "learning_rate": 9.058480036199875e-07, + "loss": 4.5641, + "step": 39040 + }, + { + "epoch": 1.6823879054141362, + "learning_rate": 9.057995216445086e-07, + "loss": 4.455, + "step": 39060 + }, + { + "epoch": 1.6832493431537237, + "learning_rate": 9.057510396690296e-07, + "loss": 4.2939, + "step": 39080 + }, + { + "epoch": 1.684110780893311, + "learning_rate": 9.057025576935507e-07, + "loss": 4.3436, + "step": 39100 + }, + { + "epoch": 1.6849722186328981, + "learning_rate": 9.056540757180719e-07, + "loss": 4.2504, + "step": 39120 + }, + { + "epoch": 1.6858336563724858, + "learning_rate": 9.05605593742593e-07, + "loss": 4.3646, + "step": 39140 + }, + { + "epoch": 1.686695094112073, + "learning_rate": 9.055571117671141e-07, + "loss": 4.4256, + "step": 39160 + }, + { + "epoch": 1.6875565318516603, + "learning_rate": 9.055086297916352e-07, + "loss": 4.3223, + "step": 39180 + }, + { + "epoch": 1.6884179695912478, + "learning_rate": 9.054601478161563e-07, + "loss": 4.3405, + "step": 39200 + }, + { + "epoch": 1.6892794073308353, + "learning_rate": 9.054116658406772e-07, + "loss": 4.5136, + "step": 39220 + }, + { + "epoch": 1.6901408450704225, + "learning_rate": 9.053631838651985e-07, + "loss": 4.3134, + "step": 39240 + }, + { + "epoch": 1.6910022828100097, + "learning_rate": 9.053147018897196e-07, + "loss": 4.4582, + "step": 39260 + }, + { + "epoch": 1.6918637205495974, + "learning_rate": 9.052662199142408e-07, + "loss": 4.5434, + "step": 39280 + }, + { + "epoch": 1.6927251582891847, + "learning_rate": 9.052177379387618e-07, + "loss": 4.4312, + "step": 39300 + }, + { + "epoch": 1.693586596028772, + "learning_rate": 9.051692559632829e-07, + "loss": 4.6348, + "step": 39320 + }, + { + "epoch": 1.6944480337683594, + "learning_rate": 9.05120773987804e-07, + "loss": 4.4803, + "step": 39340 + }, + { + "epoch": 1.6953094715079469, + "learning_rate": 9.050722920123252e-07, + "loss": 4.2973, + "step": 39360 + }, + { + "epoch": 1.696170909247534, + "learning_rate": 9.050238100368462e-07, + "loss": 4.3818, + "step": 39380 + }, + { + "epoch": 1.6970323469871214, + "learning_rate": 9.049753280613674e-07, + "loss": 4.5338, + "step": 39400 + }, + { + "epoch": 1.697893784726709, + "learning_rate": 9.049268460858885e-07, + "loss": 4.2399, + "step": 39420 + }, + { + "epoch": 1.6987552224662963, + "learning_rate": 9.048783641104097e-07, + "loss": 4.3713, + "step": 39440 + }, + { + "epoch": 1.6996166602058835, + "learning_rate": 9.048298821349306e-07, + "loss": 4.2661, + "step": 39460 + }, + { + "epoch": 1.700478097945471, + "learning_rate": 9.047814001594518e-07, + "loss": 4.1642, + "step": 39480 + }, + { + "epoch": 1.7013395356850585, + "learning_rate": 9.047329181839729e-07, + "loss": 4.239, + "step": 39500 + }, + { + "epoch": 1.7022009734246457, + "learning_rate": 9.04684436208494e-07, + "loss": 4.3927, + "step": 39520 + }, + { + "epoch": 1.703062411164233, + "learning_rate": 9.046359542330151e-07, + "loss": 4.4087, + "step": 39540 + }, + { + "epoch": 1.7039238489038204, + "learning_rate": 9.045874722575363e-07, + "loss": 4.5352, + "step": 39560 + }, + { + "epoch": 1.704785286643408, + "learning_rate": 9.045389902820573e-07, + "loss": 4.5012, + "step": 39580 + }, + { + "epoch": 1.7056467243829951, + "learning_rate": 9.044905083065783e-07, + "loss": 4.5037, + "step": 39600 + }, + { + "epoch": 1.7065081621225826, + "learning_rate": 9.044420263310995e-07, + "loss": 4.3658, + "step": 39620 + }, + { + "epoch": 1.70736959986217, + "learning_rate": 9.043935443556205e-07, + "loss": 4.3553, + "step": 39640 + }, + { + "epoch": 1.7082310376017573, + "learning_rate": 9.043450623801418e-07, + "loss": 4.344, + "step": 39660 + }, + { + "epoch": 1.7090924753413446, + "learning_rate": 9.042965804046628e-07, + "loss": 4.2964, + "step": 39680 + }, + { + "epoch": 1.709953913080932, + "learning_rate": 9.04248098429184e-07, + "loss": 4.0981, + "step": 39700 + }, + { + "epoch": 1.7108153508205195, + "learning_rate": 9.04199616453705e-07, + "loss": 4.2457, + "step": 39720 + }, + { + "epoch": 1.7116767885601067, + "learning_rate": 9.041511344782263e-07, + "loss": 4.0537, + "step": 39740 + }, + { + "epoch": 1.7125382262996942, + "learning_rate": 9.041026525027472e-07, + "loss": 4.5998, + "step": 39760 + }, + { + "epoch": 1.7133996640392817, + "learning_rate": 9.040541705272684e-07, + "loss": 4.5172, + "step": 39780 + }, + { + "epoch": 1.714261101778869, + "learning_rate": 9.040056885517895e-07, + "loss": 4.3175, + "step": 39800 + }, + { + "epoch": 1.7151225395184562, + "learning_rate": 9.039572065763107e-07, + "loss": 4.5334, + "step": 39820 + }, + { + "epoch": 1.7159839772580436, + "learning_rate": 9.039087246008316e-07, + "loss": 4.4264, + "step": 39840 + }, + { + "epoch": 1.716845414997631, + "learning_rate": 9.038602426253528e-07, + "loss": 4.4285, + "step": 39860 + }, + { + "epoch": 1.7177068527372183, + "learning_rate": 9.038117606498739e-07, + "loss": 4.2603, + "step": 39880 + }, + { + "epoch": 1.7185682904768058, + "learning_rate": 9.037632786743951e-07, + "loss": 4.4473, + "step": 39900 + }, + { + "epoch": 1.7194297282163933, + "learning_rate": 9.037147966989161e-07, + "loss": 4.3832, + "step": 39920 + }, + { + "epoch": 1.7202911659559805, + "learning_rate": 9.036663147234373e-07, + "loss": 4.2055, + "step": 39940 + }, + { + "epoch": 1.7211526036955678, + "learning_rate": 9.036178327479583e-07, + "loss": 4.4615, + "step": 39960 + }, + { + "epoch": 1.7220140414351552, + "learning_rate": 9.035693507724794e-07, + "loss": 4.5087, + "step": 39980 + }, + { + "epoch": 1.7228754791747427, + "learning_rate": 9.035208687970005e-07, + "loss": 4.2625, + "step": 40000 + }, + { + "epoch": 1.72373691691433, + "learning_rate": 9.034723868215217e-07, + "loss": 4.1945, + "step": 40020 + }, + { + "epoch": 1.7245983546539174, + "learning_rate": 9.034239048460428e-07, + "loss": 4.6889, + "step": 40040 + }, + { + "epoch": 1.7254597923935049, + "learning_rate": 9.033754228705638e-07, + "loss": 4.3781, + "step": 40060 + }, + { + "epoch": 1.7263212301330921, + "learning_rate": 9.03326940895085e-07, + "loss": 4.4345, + "step": 40080 + }, + { + "epoch": 1.7271826678726794, + "learning_rate": 9.032784589196061e-07, + "loss": 4.315, + "step": 40100 + }, + { + "epoch": 1.7280441056122668, + "learning_rate": 9.032299769441272e-07, + "loss": 4.5759, + "step": 40120 + }, + { + "epoch": 1.7289055433518543, + "learning_rate": 9.031814949686482e-07, + "loss": 4.4615, + "step": 40140 + }, + { + "epoch": 1.7297669810914416, + "learning_rate": 9.031330129931693e-07, + "loss": 4.6207, + "step": 40160 + }, + { + "epoch": 1.730628418831029, + "learning_rate": 9.030845310176905e-07, + "loss": 4.4212, + "step": 40180 + }, + { + "epoch": 1.7314898565706165, + "learning_rate": 9.030360490422117e-07, + "loss": 4.3989, + "step": 40200 + }, + { + "epoch": 1.7323512943102037, + "learning_rate": 9.029875670667326e-07, + "loss": 4.4788, + "step": 40220 + }, + { + "epoch": 1.733212732049791, + "learning_rate": 9.029390850912538e-07, + "loss": 4.4364, + "step": 40240 + }, + { + "epoch": 1.7340741697893785, + "learning_rate": 9.028906031157749e-07, + "loss": 4.4474, + "step": 40260 + }, + { + "epoch": 1.734935607528966, + "learning_rate": 9.02842121140296e-07, + "loss": 4.4528, + "step": 40280 + }, + { + "epoch": 1.7357970452685532, + "learning_rate": 9.027936391648171e-07, + "loss": 4.3944, + "step": 40300 + }, + { + "epoch": 1.7366584830081406, + "learning_rate": 9.027451571893383e-07, + "loss": 4.3709, + "step": 40320 + }, + { + "epoch": 1.737519920747728, + "learning_rate": 9.026966752138594e-07, + "loss": 4.5746, + "step": 40340 + }, + { + "epoch": 1.7383813584873153, + "learning_rate": 9.026481932383804e-07, + "loss": 4.3533, + "step": 40360 + }, + { + "epoch": 1.7392427962269026, + "learning_rate": 9.025997112629015e-07, + "loss": 4.4538, + "step": 40380 + }, + { + "epoch": 1.74010423396649, + "learning_rate": 9.025512292874227e-07, + "loss": 4.2207, + "step": 40400 + }, + { + "epoch": 1.7409656717060775, + "learning_rate": 9.025027473119438e-07, + "loss": 4.4206, + "step": 40420 + }, + { + "epoch": 1.7418271094456648, + "learning_rate": 9.024542653364649e-07, + "loss": 4.4268, + "step": 40440 + }, + { + "epoch": 1.7426885471852522, + "learning_rate": 9.02405783360986e-07, + "loss": 4.4761, + "step": 40460 + }, + { + "epoch": 1.7435499849248397, + "learning_rate": 9.023573013855071e-07, + "loss": 4.5912, + "step": 40480 + }, + { + "epoch": 1.744411422664427, + "learning_rate": 9.023088194100282e-07, + "loss": 4.1854, + "step": 40500 + }, + { + "epoch": 1.7452728604040142, + "learning_rate": 9.022603374345493e-07, + "loss": 4.4955, + "step": 40520 + }, + { + "epoch": 1.7461342981436017, + "learning_rate": 9.022118554590704e-07, + "loss": 4.2738, + "step": 40540 + }, + { + "epoch": 1.7469957358831891, + "learning_rate": 9.021633734835916e-07, + "loss": 4.4197, + "step": 40560 + }, + { + "epoch": 1.7478571736227764, + "learning_rate": 9.021148915081126e-07, + "loss": 4.4153, + "step": 40580 + }, + { + "epoch": 1.7487186113623638, + "learning_rate": 9.020664095326336e-07, + "loss": 4.3795, + "step": 40600 + }, + { + "epoch": 1.7495800491019513, + "learning_rate": 9.020179275571548e-07, + "loss": 4.3038, + "step": 40620 + }, + { + "epoch": 1.7504414868415386, + "learning_rate": 9.01969445581676e-07, + "loss": 4.4357, + "step": 40640 + }, + { + "epoch": 1.7513029245811258, + "learning_rate": 9.01920963606197e-07, + "loss": 4.5771, + "step": 40660 + }, + { + "epoch": 1.7521643623207133, + "learning_rate": 9.018724816307181e-07, + "loss": 4.1847, + "step": 40680 + }, + { + "epoch": 1.7530258000603007, + "learning_rate": 9.018239996552393e-07, + "loss": 4.3277, + "step": 40700 + }, + { + "epoch": 1.753887237799888, + "learning_rate": 9.017755176797604e-07, + "loss": 4.2053, + "step": 40720 + }, + { + "epoch": 1.7547486755394752, + "learning_rate": 9.017270357042814e-07, + "loss": 4.4161, + "step": 40740 + }, + { + "epoch": 1.755610113279063, + "learning_rate": 9.016785537288025e-07, + "loss": 4.4419, + "step": 40760 + }, + { + "epoch": 1.7564715510186502, + "learning_rate": 9.016300717533237e-07, + "loss": 4.3885, + "step": 40780 + }, + { + "epoch": 1.7573329887582374, + "learning_rate": 9.015815897778448e-07, + "loss": 4.3935, + "step": 40800 + }, + { + "epoch": 1.7581944264978249, + "learning_rate": 9.015331078023659e-07, + "loss": 4.4418, + "step": 40820 + }, + { + "epoch": 1.7590558642374123, + "learning_rate": 9.01484625826887e-07, + "loss": 4.517, + "step": 40840 + }, + { + "epoch": 1.7599173019769996, + "learning_rate": 9.014361438514081e-07, + "loss": 4.2625, + "step": 40860 + }, + { + "epoch": 1.7607787397165868, + "learning_rate": 9.013876618759291e-07, + "loss": 4.5596, + "step": 40880 + }, + { + "epoch": 1.7616401774561745, + "learning_rate": 9.013391799004503e-07, + "loss": 4.3313, + "step": 40900 + }, + { + "epoch": 1.7625016151957618, + "learning_rate": 9.012906979249714e-07, + "loss": 4.4232, + "step": 40920 + }, + { + "epoch": 1.763363052935349, + "learning_rate": 9.012422159494926e-07, + "loss": 4.2839, + "step": 40940 + }, + { + "epoch": 1.7642244906749365, + "learning_rate": 9.011937339740136e-07, + "loss": 4.5854, + "step": 40960 + }, + { + "epoch": 1.765085928414524, + "learning_rate": 9.011452519985347e-07, + "loss": 4.561, + "step": 40980 + }, + { + "epoch": 1.7659473661541112, + "learning_rate": 9.010967700230557e-07, + "loss": 4.4233, + "step": 41000 + }, + { + "epoch": 1.7668088038936984, + "learning_rate": 9.01048288047577e-07, + "loss": 4.2088, + "step": 41020 + }, + { + "epoch": 1.767670241633286, + "learning_rate": 9.00999806072098e-07, + "loss": 4.473, + "step": 41040 + }, + { + "epoch": 1.7685316793728734, + "learning_rate": 9.009513240966192e-07, + "loss": 4.3715, + "step": 41060 + }, + { + "epoch": 1.7693931171124606, + "learning_rate": 9.009028421211403e-07, + "loss": 4.3307, + "step": 41080 + }, + { + "epoch": 1.770254554852048, + "learning_rate": 9.008543601456614e-07, + "loss": 4.4411, + "step": 41100 + }, + { + "epoch": 1.7711159925916355, + "learning_rate": 9.008058781701824e-07, + "loss": 4.5383, + "step": 41120 + }, + { + "epoch": 1.7719774303312228, + "learning_rate": 9.007573961947035e-07, + "loss": 4.4853, + "step": 41140 + }, + { + "epoch": 1.77283886807081, + "learning_rate": 9.007089142192247e-07, + "loss": 4.2566, + "step": 41160 + }, + { + "epoch": 1.7737003058103975, + "learning_rate": 9.006604322437459e-07, + "loss": 4.3662, + "step": 41180 + }, + { + "epoch": 1.774561743549985, + "learning_rate": 9.006119502682669e-07, + "loss": 4.293, + "step": 41200 + }, + { + "epoch": 1.7754231812895722, + "learning_rate": 9.00563468292788e-07, + "loss": 4.442, + "step": 41220 + }, + { + "epoch": 1.7762846190291597, + "learning_rate": 9.005149863173091e-07, + "loss": 4.4136, + "step": 41240 + }, + { + "epoch": 1.7771460567687472, + "learning_rate": 9.004665043418301e-07, + "loss": 4.4029, + "step": 41260 + }, + { + "epoch": 1.7780074945083344, + "learning_rate": 9.004180223663513e-07, + "loss": 4.3309, + "step": 41280 + }, + { + "epoch": 1.7788689322479216, + "learning_rate": 9.003695403908724e-07, + "loss": 4.5112, + "step": 41300 + }, + { + "epoch": 1.779730369987509, + "learning_rate": 9.003210584153936e-07, + "loss": 4.2248, + "step": 41320 + }, + { + "epoch": 1.7805918077270966, + "learning_rate": 9.002725764399146e-07, + "loss": 4.4054, + "step": 41340 + }, + { + "epoch": 1.7814532454666838, + "learning_rate": 9.002240944644357e-07, + "loss": 4.3054, + "step": 41360 + }, + { + "epoch": 1.7823146832062713, + "learning_rate": 9.001756124889568e-07, + "loss": 4.4377, + "step": 41380 + }, + { + "epoch": 1.7831761209458588, + "learning_rate": 9.00127130513478e-07, + "loss": 4.6227, + "step": 41400 + }, + { + "epoch": 1.784037558685446, + "learning_rate": 9.000786485379989e-07, + "loss": 4.4275, + "step": 41420 + }, + { + "epoch": 1.7848989964250332, + "learning_rate": 9.000301665625202e-07, + "loss": 4.2365, + "step": 41440 + }, + { + "epoch": 1.7857604341646207, + "learning_rate": 8.999816845870413e-07, + "loss": 4.2151, + "step": 41460 + }, + { + "epoch": 1.7866218719042082, + "learning_rate": 8.999332026115625e-07, + "loss": 4.4628, + "step": 41480 + }, + { + "epoch": 1.7874833096437954, + "learning_rate": 8.998847206360834e-07, + "loss": 4.1836, + "step": 41500 + }, + { + "epoch": 1.788344747383383, + "learning_rate": 8.998362386606047e-07, + "loss": 4.2967, + "step": 41520 + }, + { + "epoch": 1.7892061851229704, + "learning_rate": 8.997877566851257e-07, + "loss": 4.4711, + "step": 41540 + }, + { + "epoch": 1.7900676228625576, + "learning_rate": 8.997392747096468e-07, + "loss": 4.3517, + "step": 41560 + }, + { + "epoch": 1.7909290606021449, + "learning_rate": 8.996907927341679e-07, + "loss": 4.4242, + "step": 41580 + }, + { + "epoch": 1.7917904983417323, + "learning_rate": 8.996423107586891e-07, + "loss": 4.2808, + "step": 41600 + }, + { + "epoch": 1.7926519360813198, + "learning_rate": 8.995938287832101e-07, + "loss": 4.2009, + "step": 41620 + }, + { + "epoch": 1.793513373820907, + "learning_rate": 8.995453468077312e-07, + "loss": 4.3003, + "step": 41640 + }, + { + "epoch": 1.7943748115604945, + "learning_rate": 8.994968648322523e-07, + "loss": 4.3452, + "step": 41660 + }, + { + "epoch": 1.795236249300082, + "learning_rate": 8.994483828567734e-07, + "loss": 4.2786, + "step": 41680 + }, + { + "epoch": 1.7960976870396692, + "learning_rate": 8.993999008812946e-07, + "loss": 4.2059, + "step": 41700 + }, + { + "epoch": 1.7969591247792565, + "learning_rate": 8.993514189058157e-07, + "loss": 4.2892, + "step": 41720 + }, + { + "epoch": 1.797820562518844, + "learning_rate": 8.993029369303367e-07, + "loss": 4.4366, + "step": 41740 + }, + { + "epoch": 1.7986820002584314, + "learning_rate": 8.992544549548578e-07, + "loss": 4.2924, + "step": 41760 + }, + { + "epoch": 1.7995434379980186, + "learning_rate": 8.99205972979379e-07, + "loss": 4.4312, + "step": 41780 + }, + { + "epoch": 1.800404875737606, + "learning_rate": 8.991574910039e-07, + "loss": 4.2705, + "step": 41800 + }, + { + "epoch": 1.8012663134771936, + "learning_rate": 8.991090090284212e-07, + "loss": 4.5428, + "step": 41820 + }, + { + "epoch": 1.8021277512167808, + "learning_rate": 8.990605270529423e-07, + "loss": 4.2956, + "step": 41840 + }, + { + "epoch": 1.802989188956368, + "learning_rate": 8.990120450774634e-07, + "loss": 4.3577, + "step": 41860 + }, + { + "epoch": 1.8038506266959555, + "learning_rate": 8.989635631019844e-07, + "loss": 4.543, + "step": 41880 + }, + { + "epoch": 1.804712064435543, + "learning_rate": 8.989150811265056e-07, + "loss": 4.7283, + "step": 41900 + }, + { + "epoch": 1.8055735021751302, + "learning_rate": 8.988665991510267e-07, + "loss": 4.2655, + "step": 41920 + }, + { + "epoch": 1.8064349399147177, + "learning_rate": 8.988181171755477e-07, + "loss": 4.3562, + "step": 41940 + }, + { + "epoch": 1.8072963776543052, + "learning_rate": 8.987696352000689e-07, + "loss": 4.3716, + "step": 41960 + }, + { + "epoch": 1.8081578153938924, + "learning_rate": 8.987211532245901e-07, + "loss": 4.4538, + "step": 41980 + }, + { + "epoch": 1.8090192531334797, + "learning_rate": 8.986726712491111e-07, + "loss": 4.5135, + "step": 42000 + }, + { + "epoch": 1.8098806908730671, + "learning_rate": 8.986241892736322e-07, + "loss": 4.2465, + "step": 42020 + }, + { + "epoch": 1.8107421286126546, + "learning_rate": 8.985757072981533e-07, + "loss": 4.6041, + "step": 42040 + }, + { + "epoch": 1.8116035663522418, + "learning_rate": 8.985272253226745e-07, + "loss": 4.7304, + "step": 42060 + }, + { + "epoch": 1.8124650040918293, + "learning_rate": 8.984787433471956e-07, + "loss": 4.4824, + "step": 42080 + }, + { + "epoch": 1.8133264418314168, + "learning_rate": 8.984302613717167e-07, + "loss": 4.4615, + "step": 42100 + }, + { + "epoch": 1.814187879571004, + "learning_rate": 8.983817793962378e-07, + "loss": 4.3076, + "step": 42120 + }, + { + "epoch": 1.8150493173105913, + "learning_rate": 8.983332974207589e-07, + "loss": 4.4122, + "step": 42140 + }, + { + "epoch": 1.8159107550501787, + "learning_rate": 8.982848154452799e-07, + "loss": 4.2969, + "step": 42160 + }, + { + "epoch": 1.8167721927897662, + "learning_rate": 8.982363334698011e-07, + "loss": 4.3677, + "step": 42180 + }, + { + "epoch": 1.8176336305293535, + "learning_rate": 8.981878514943222e-07, + "loss": 4.5101, + "step": 42200 + }, + { + "epoch": 1.818495068268941, + "learning_rate": 8.981393695188433e-07, + "loss": 4.4752, + "step": 42220 + }, + { + "epoch": 1.8193565060085284, + "learning_rate": 8.980908875433644e-07, + "loss": 4.7016, + "step": 42240 + }, + { + "epoch": 1.8202179437481156, + "learning_rate": 8.980424055678855e-07, + "loss": 4.6251, + "step": 42260 + }, + { + "epoch": 1.8210793814877029, + "learning_rate": 8.979939235924066e-07, + "loss": 4.4877, + "step": 42280 + }, + { + "epoch": 1.8219408192272903, + "learning_rate": 8.979454416169277e-07, + "loss": 4.2176, + "step": 42300 + }, + { + "epoch": 1.8228022569668778, + "learning_rate": 8.978969596414488e-07, + "loss": 4.5683, + "step": 42320 + }, + { + "epoch": 1.823663694706465, + "learning_rate": 8.978484776659699e-07, + "loss": 4.3706, + "step": 42340 + }, + { + "epoch": 1.8245251324460523, + "learning_rate": 8.977999956904911e-07, + "loss": 4.3926, + "step": 42360 + }, + { + "epoch": 1.82538657018564, + "learning_rate": 8.97751513715012e-07, + "loss": 4.4718, + "step": 42380 + }, + { + "epoch": 1.8262480079252272, + "learning_rate": 8.977030317395332e-07, + "loss": 4.2747, + "step": 42400 + }, + { + "epoch": 1.8271094456648145, + "learning_rate": 8.976545497640543e-07, + "loss": 4.4391, + "step": 42420 + }, + { + "epoch": 1.827970883404402, + "learning_rate": 8.976060677885755e-07, + "loss": 4.5646, + "step": 42440 + }, + { + "epoch": 1.8288323211439894, + "learning_rate": 8.975575858130965e-07, + "loss": 4.2667, + "step": 42460 + }, + { + "epoch": 1.8296937588835767, + "learning_rate": 8.975091038376177e-07, + "loss": 4.4766, + "step": 42480 + }, + { + "epoch": 1.830555196623164, + "learning_rate": 8.974606218621388e-07, + "loss": 4.2121, + "step": 42500 + }, + { + "epoch": 1.8314166343627516, + "learning_rate": 8.974121398866599e-07, + "loss": 4.4128, + "step": 42520 + }, + { + "epoch": 1.8322780721023388, + "learning_rate": 8.973636579111809e-07, + "loss": 4.2765, + "step": 42540 + }, + { + "epoch": 1.833139509841926, + "learning_rate": 8.973151759357021e-07, + "loss": 4.5529, + "step": 42560 + }, + { + "epoch": 1.8340009475815136, + "learning_rate": 8.972666939602232e-07, + "loss": 4.3912, + "step": 42580 + }, + { + "epoch": 1.834862385321101, + "learning_rate": 8.972182119847444e-07, + "loss": 4.5823, + "step": 42600 + }, + { + "epoch": 1.8357238230606883, + "learning_rate": 8.971697300092654e-07, + "loss": 4.3639, + "step": 42620 + }, + { + "epoch": 1.8365852608002755, + "learning_rate": 8.971212480337865e-07, + "loss": 4.301, + "step": 42640 + }, + { + "epoch": 1.837446698539863, + "learning_rate": 8.970727660583076e-07, + "loss": 4.282, + "step": 42660 + }, + { + "epoch": 1.8383081362794504, + "learning_rate": 8.970242840828288e-07, + "loss": 4.4277, + "step": 42680 + }, + { + "epoch": 1.8391695740190377, + "learning_rate": 8.969758021073498e-07, + "loss": 4.5123, + "step": 42700 + }, + { + "epoch": 1.8400310117586252, + "learning_rate": 8.96927320131871e-07, + "loss": 4.4411, + "step": 42720 + }, + { + "epoch": 1.8408924494982126, + "learning_rate": 8.968788381563921e-07, + "loss": 4.4547, + "step": 42740 + }, + { + "epoch": 1.8417538872377999, + "learning_rate": 8.968303561809131e-07, + "loss": 4.2648, + "step": 42760 + }, + { + "epoch": 1.8426153249773871, + "learning_rate": 8.967818742054341e-07, + "loss": 4.4489, + "step": 42780 + }, + { + "epoch": 1.8434767627169746, + "learning_rate": 8.967333922299554e-07, + "loss": 4.4945, + "step": 42800 + }, + { + "epoch": 1.844338200456562, + "learning_rate": 8.966849102544765e-07, + "loss": 4.2532, + "step": 42820 + }, + { + "epoch": 1.8451996381961493, + "learning_rate": 8.966364282789975e-07, + "loss": 4.2342, + "step": 42840 + }, + { + "epoch": 1.8460610759357368, + "learning_rate": 8.965879463035187e-07, + "loss": 4.677, + "step": 42860 + }, + { + "epoch": 1.8469225136753242, + "learning_rate": 8.965394643280399e-07, + "loss": 4.2939, + "step": 42880 + }, + { + "epoch": 1.8477839514149115, + "learning_rate": 8.964909823525609e-07, + "loss": 4.4327, + "step": 42900 + }, + { + "epoch": 1.8486453891544987, + "learning_rate": 8.964425003770819e-07, + "loss": 4.5367, + "step": 42920 + }, + { + "epoch": 1.8495068268940862, + "learning_rate": 8.963940184016031e-07, + "loss": 4.4483, + "step": 42940 + }, + { + "epoch": 1.8503682646336737, + "learning_rate": 8.963455364261242e-07, + "loss": 4.3877, + "step": 42960 + }, + { + "epoch": 1.851229702373261, + "learning_rate": 8.962970544506454e-07, + "loss": 4.3075, + "step": 42980 + }, + { + "epoch": 1.8520911401128484, + "learning_rate": 8.962485724751664e-07, + "loss": 4.4414, + "step": 43000 + }, + { + "epoch": 1.8529525778524358, + "learning_rate": 8.962000904996875e-07, + "loss": 4.2825, + "step": 43020 + }, + { + "epoch": 1.853814015592023, + "learning_rate": 8.961516085242086e-07, + "loss": 4.3867, + "step": 43040 + }, + { + "epoch": 1.8546754533316103, + "learning_rate": 8.961031265487297e-07, + "loss": 4.3749, + "step": 43060 + }, + { + "epoch": 1.8555368910711978, + "learning_rate": 8.960546445732508e-07, + "loss": 4.4202, + "step": 43080 + }, + { + "epoch": 1.8563983288107853, + "learning_rate": 8.96006162597772e-07, + "loss": 4.3727, + "step": 43100 + }, + { + "epoch": 1.8572597665503725, + "learning_rate": 8.959576806222931e-07, + "loss": 4.4348, + "step": 43120 + }, + { + "epoch": 1.85812120428996, + "learning_rate": 8.959091986468141e-07, + "loss": 4.1969, + "step": 43140 + }, + { + "epoch": 1.8589826420295474, + "learning_rate": 8.958607166713352e-07, + "loss": 4.2814, + "step": 43160 + }, + { + "epoch": 1.8598440797691347, + "learning_rate": 8.958122346958564e-07, + "loss": 4.4414, + "step": 43180 + }, + { + "epoch": 1.860705517508722, + "learning_rate": 8.957637527203775e-07, + "loss": 4.2574, + "step": 43200 + }, + { + "epoch": 1.8615669552483094, + "learning_rate": 8.957152707448986e-07, + "loss": 4.3493, + "step": 43220 + }, + { + "epoch": 1.8624283929878969, + "learning_rate": 8.956667887694197e-07, + "loss": 4.3951, + "step": 43240 + }, + { + "epoch": 1.863289830727484, + "learning_rate": 8.956183067939409e-07, + "loss": 4.2069, + "step": 43260 + }, + { + "epoch": 1.8641512684670716, + "learning_rate": 8.955698248184619e-07, + "loss": 4.2305, + "step": 43280 + }, + { + "epoch": 1.865012706206659, + "learning_rate": 8.955213428429829e-07, + "loss": 4.2136, + "step": 43300 + }, + { + "epoch": 1.8658741439462463, + "learning_rate": 8.954728608675041e-07, + "loss": 4.3903, + "step": 43320 + }, + { + "epoch": 1.8667355816858335, + "learning_rate": 8.954243788920253e-07, + "loss": 4.6111, + "step": 43340 + }, + { + "epoch": 1.867597019425421, + "learning_rate": 8.953758969165464e-07, + "loss": 4.4162, + "step": 43360 + }, + { + "epoch": 1.8684584571650085, + "learning_rate": 8.953274149410674e-07, + "loss": 4.438, + "step": 43380 + }, + { + "epoch": 1.8693198949045957, + "learning_rate": 8.952789329655885e-07, + "loss": 4.2809, + "step": 43400 + }, + { + "epoch": 1.8701813326441832, + "learning_rate": 8.952304509901096e-07, + "loss": 4.2481, + "step": 43420 + }, + { + "epoch": 1.8710427703837706, + "learning_rate": 8.951819690146307e-07, + "loss": 4.3533, + "step": 43440 + }, + { + "epoch": 1.871904208123358, + "learning_rate": 8.951334870391518e-07, + "loss": 4.3064, + "step": 43460 + }, + { + "epoch": 1.8727656458629451, + "learning_rate": 8.95085005063673e-07, + "loss": 4.3509, + "step": 43480 + }, + { + "epoch": 1.8736270836025326, + "learning_rate": 8.950365230881941e-07, + "loss": 4.2292, + "step": 43500 + }, + { + "epoch": 1.87448852134212, + "learning_rate": 8.949880411127151e-07, + "loss": 4.321, + "step": 43520 + }, + { + "epoch": 1.8753499590817073, + "learning_rate": 8.949395591372362e-07, + "loss": 4.2077, + "step": 43540 + }, + { + "epoch": 1.8762113968212948, + "learning_rate": 8.948910771617574e-07, + "loss": 4.2244, + "step": 43560 + }, + { + "epoch": 1.8770728345608823, + "learning_rate": 8.948425951862785e-07, + "loss": 4.3698, + "step": 43580 + }, + { + "epoch": 1.8779342723004695, + "learning_rate": 8.947941132107996e-07, + "loss": 4.1308, + "step": 43600 + }, + { + "epoch": 1.8787957100400567, + "learning_rate": 8.947456312353207e-07, + "loss": 4.2336, + "step": 43620 + }, + { + "epoch": 1.8796571477796442, + "learning_rate": 8.946971492598419e-07, + "loss": 4.3006, + "step": 43640 + }, + { + "epoch": 1.8805185855192317, + "learning_rate": 8.946486672843628e-07, + "loss": 4.3912, + "step": 43660 + }, + { + "epoch": 1.881380023258819, + "learning_rate": 8.94600185308884e-07, + "loss": 4.3259, + "step": 43680 + }, + { + "epoch": 1.8822414609984064, + "learning_rate": 8.945517033334051e-07, + "loss": 4.456, + "step": 43700 + }, + { + "epoch": 1.8831028987379939, + "learning_rate": 8.945032213579262e-07, + "loss": 4.2251, + "step": 43720 + }, + { + "epoch": 1.883964336477581, + "learning_rate": 8.944547393824473e-07, + "loss": 4.5059, + "step": 43740 + }, + { + "epoch": 1.8848257742171683, + "learning_rate": 8.944062574069685e-07, + "loss": 4.3112, + "step": 43760 + }, + { + "epoch": 1.8856872119567558, + "learning_rate": 8.943577754314895e-07, + "loss": 4.353, + "step": 43780 + }, + { + "epoch": 1.8865486496963433, + "learning_rate": 8.943092934560107e-07, + "loss": 4.3783, + "step": 43800 + }, + { + "epoch": 1.8874100874359305, + "learning_rate": 8.942608114805317e-07, + "loss": 4.3289, + "step": 43820 + }, + { + "epoch": 1.8882715251755178, + "learning_rate": 8.942123295050528e-07, + "loss": 4.519, + "step": 43840 + }, + { + "epoch": 1.8891329629151055, + "learning_rate": 8.94163847529574e-07, + "loss": 4.2126, + "step": 43860 + }, + { + "epoch": 1.8899944006546927, + "learning_rate": 8.941153655540952e-07, + "loss": 4.2742, + "step": 43880 + }, + { + "epoch": 1.89085583839428, + "learning_rate": 8.940668835786162e-07, + "loss": 4.3156, + "step": 43900 + }, + { + "epoch": 1.8917172761338674, + "learning_rate": 8.940184016031372e-07, + "loss": 4.2147, + "step": 43920 + }, + { + "epoch": 1.8925787138734549, + "learning_rate": 8.939699196276584e-07, + "loss": 4.5395, + "step": 43940 + }, + { + "epoch": 1.8934401516130421, + "learning_rate": 8.939214376521794e-07, + "loss": 4.2764, + "step": 43960 + }, + { + "epoch": 1.8943015893526294, + "learning_rate": 8.938729556767006e-07, + "loss": 4.4255, + "step": 43980 + }, + { + "epoch": 1.895163027092217, + "learning_rate": 8.938244737012217e-07, + "loss": 4.4142, + "step": 44000 + }, + { + "epoch": 1.8960244648318043, + "learning_rate": 8.937759917257429e-07, + "loss": 4.6664, + "step": 44020 + }, + { + "epoch": 1.8968859025713916, + "learning_rate": 8.937275097502637e-07, + "loss": 4.4711, + "step": 44040 + }, + { + "epoch": 1.897747340310979, + "learning_rate": 8.93679027774785e-07, + "loss": 4.2542, + "step": 44060 + }, + { + "epoch": 1.8986087780505665, + "learning_rate": 8.936305457993061e-07, + "loss": 4.2999, + "step": 44080 + }, + { + "epoch": 1.8994702157901537, + "learning_rate": 8.935820638238273e-07, + "loss": 4.5732, + "step": 44100 + }, + { + "epoch": 1.900331653529741, + "learning_rate": 8.935335818483483e-07, + "loss": 4.4929, + "step": 44120 + }, + { + "epoch": 1.9011930912693287, + "learning_rate": 8.934850998728695e-07, + "loss": 4.2182, + "step": 44140 + }, + { + "epoch": 1.902054529008916, + "learning_rate": 8.934366178973905e-07, + "loss": 4.4137, + "step": 44160 + }, + { + "epoch": 1.9029159667485032, + "learning_rate": 8.933881359219117e-07, + "loss": 4.3164, + "step": 44180 + }, + { + "epoch": 1.9037774044880906, + "learning_rate": 8.933396539464327e-07, + "loss": 4.1988, + "step": 44200 + }, + { + "epoch": 1.904638842227678, + "learning_rate": 8.932911719709539e-07, + "loss": 4.281, + "step": 44220 + }, + { + "epoch": 1.9055002799672653, + "learning_rate": 8.93242689995475e-07, + "loss": 4.5675, + "step": 44240 + }, + { + "epoch": 1.9063617177068526, + "learning_rate": 8.931942080199962e-07, + "loss": 4.4851, + "step": 44260 + }, + { + "epoch": 1.90722315544644, + "learning_rate": 8.931457260445172e-07, + "loss": 4.5483, + "step": 44280 + }, + { + "epoch": 1.9080845931860275, + "learning_rate": 8.930972440690383e-07, + "loss": 4.3183, + "step": 44300 + }, + { + "epoch": 1.9089460309256148, + "learning_rate": 8.930487620935594e-07, + "loss": 4.5662, + "step": 44320 + }, + { + "epoch": 1.9098074686652022, + "learning_rate": 8.930002801180805e-07, + "loss": 4.1618, + "step": 44340 + }, + { + "epoch": 1.9106689064047897, + "learning_rate": 8.929517981426016e-07, + "loss": 4.5918, + "step": 44360 + }, + { + "epoch": 1.911530344144377, + "learning_rate": 8.929033161671227e-07, + "loss": 4.41, + "step": 44380 + }, + { + "epoch": 1.9123917818839642, + "learning_rate": 8.928548341916439e-07, + "loss": 4.3782, + "step": 44400 + }, + { + "epoch": 1.9132532196235517, + "learning_rate": 8.928063522161649e-07, + "loss": 4.518, + "step": 44420 + }, + { + "epoch": 1.9141146573631391, + "learning_rate": 8.92757870240686e-07, + "loss": 4.4662, + "step": 44440 + }, + { + "epoch": 1.9149760951027264, + "learning_rate": 8.927093882652071e-07, + "loss": 4.2872, + "step": 44460 + }, + { + "epoch": 1.9158375328423138, + "learning_rate": 8.926609062897283e-07, + "loss": 4.3866, + "step": 44480 + }, + { + "epoch": 1.9166989705819013, + "learning_rate": 8.926124243142493e-07, + "loss": 4.436, + "step": 44500 + }, + { + "epoch": 1.9175604083214886, + "learning_rate": 8.925639423387705e-07, + "loss": 4.3438, + "step": 44520 + }, + { + "epoch": 1.9184218460610758, + "learning_rate": 8.925154603632916e-07, + "loss": 4.4794, + "step": 44540 + }, + { + "epoch": 1.9192832838006633, + "learning_rate": 8.924669783878125e-07, + "loss": 4.2837, + "step": 44560 + }, + { + "epoch": 1.9201447215402507, + "learning_rate": 8.924184964123337e-07, + "loss": 4.1781, + "step": 44580 + }, + { + "epoch": 1.921006159279838, + "learning_rate": 8.923700144368549e-07, + "loss": 4.244, + "step": 44600 + }, + { + "epoch": 1.9218675970194254, + "learning_rate": 8.92321532461376e-07, + "loss": 4.2507, + "step": 44620 + }, + { + "epoch": 1.922729034759013, + "learning_rate": 8.922730504858971e-07, + "loss": 4.3202, + "step": 44640 + }, + { + "epoch": 1.9235904724986002, + "learning_rate": 8.922245685104183e-07, + "loss": 4.3428, + "step": 44660 + }, + { + "epoch": 1.9244519102381874, + "learning_rate": 8.921760865349393e-07, + "loss": 4.2586, + "step": 44680 + }, + { + "epoch": 1.9253133479777749, + "learning_rate": 8.921276045594604e-07, + "loss": 4.2734, + "step": 44700 + }, + { + "epoch": 1.9261747857173623, + "learning_rate": 8.920791225839815e-07, + "loss": 4.2984, + "step": 44720 + }, + { + "epoch": 1.9270362234569496, + "learning_rate": 8.920306406085026e-07, + "loss": 4.4429, + "step": 44740 + }, + { + "epoch": 1.927897661196537, + "learning_rate": 8.919821586330238e-07, + "loss": 4.0907, + "step": 44760 + }, + { + "epoch": 1.9287590989361245, + "learning_rate": 8.919336766575449e-07, + "loss": 4.4002, + "step": 44780 + }, + { + "epoch": 1.9296205366757118, + "learning_rate": 8.918851946820659e-07, + "loss": 4.2448, + "step": 44800 + }, + { + "epoch": 1.930481974415299, + "learning_rate": 8.91836712706587e-07, + "loss": 4.302, + "step": 44820 + }, + { + "epoch": 1.9313434121548865, + "learning_rate": 8.917882307311082e-07, + "loss": 4.322, + "step": 44840 + }, + { + "epoch": 1.932204849894474, + "learning_rate": 8.917397487556293e-07, + "loss": 4.2744, + "step": 44860 + }, + { + "epoch": 1.9330662876340612, + "learning_rate": 8.916912667801504e-07, + "loss": 4.5504, + "step": 44880 + }, + { + "epoch": 1.9339277253736487, + "learning_rate": 8.916427848046715e-07, + "loss": 4.3332, + "step": 44900 + }, + { + "epoch": 1.9347891631132361, + "learning_rate": 8.915943028291926e-07, + "loss": 4.4607, + "step": 44920 + }, + { + "epoch": 1.9356506008528234, + "learning_rate": 8.915458208537136e-07, + "loss": 4.293, + "step": 44940 + }, + { + "epoch": 1.9365120385924106, + "learning_rate": 8.914973388782348e-07, + "loss": 4.3819, + "step": 44960 + }, + { + "epoch": 1.937373476331998, + "learning_rate": 8.914488569027559e-07, + "loss": 4.5778, + "step": 44980 + }, + { + "epoch": 1.9382349140715855, + "learning_rate": 8.91400374927277e-07, + "loss": 4.4136, + "step": 45000 + }, + { + "epoch": 1.9390963518111728, + "learning_rate": 8.913518929517981e-07, + "loss": 4.3509, + "step": 45020 + }, + { + "epoch": 1.9399577895507603, + "learning_rate": 8.913034109763192e-07, + "loss": 4.5572, + "step": 45040 + }, + { + "epoch": 1.9408192272903477, + "learning_rate": 8.912549290008403e-07, + "loss": 4.479, + "step": 45060 + }, + { + "epoch": 1.941680665029935, + "learning_rate": 8.912064470253614e-07, + "loss": 4.2881, + "step": 45080 + }, + { + "epoch": 1.9425421027695222, + "learning_rate": 8.911579650498825e-07, + "loss": 4.3303, + "step": 45100 + }, + { + "epoch": 1.9434035405091097, + "learning_rate": 8.911094830744036e-07, + "loss": 4.2722, + "step": 45120 + }, + { + "epoch": 1.9442649782486972, + "learning_rate": 8.910610010989248e-07, + "loss": 4.4041, + "step": 45140 + }, + { + "epoch": 1.9451264159882844, + "learning_rate": 8.910125191234459e-07, + "loss": 4.3474, + "step": 45160 + }, + { + "epoch": 1.9459878537278719, + "learning_rate": 8.909640371479669e-07, + "loss": 4.4535, + "step": 45180 + }, + { + "epoch": 1.9468492914674593, + "learning_rate": 8.90915555172488e-07, + "loss": 4.5766, + "step": 45200 + }, + { + "epoch": 1.9477107292070466, + "learning_rate": 8.908670731970092e-07, + "loss": 4.4793, + "step": 45220 + }, + { + "epoch": 1.9485721669466338, + "learning_rate": 8.908185912215302e-07, + "loss": 4.3069, + "step": 45240 + }, + { + "epoch": 1.9494336046862213, + "learning_rate": 8.907701092460514e-07, + "loss": 4.3347, + "step": 45260 + }, + { + "epoch": 1.9502950424258088, + "learning_rate": 8.907216272705725e-07, + "loss": 4.3836, + "step": 45280 + }, + { + "epoch": 1.951156480165396, + "learning_rate": 8.906731452950936e-07, + "loss": 4.2522, + "step": 45300 + }, + { + "epoch": 1.9520179179049835, + "learning_rate": 8.906246633196146e-07, + "loss": 4.1439, + "step": 45320 + }, + { + "epoch": 1.952879355644571, + "learning_rate": 8.905761813441358e-07, + "loss": 4.3673, + "step": 45340 + }, + { + "epoch": 1.9537407933841582, + "learning_rate": 8.905276993686569e-07, + "loss": 4.3393, + "step": 45360 + }, + { + "epoch": 1.9546022311237454, + "learning_rate": 8.904792173931781e-07, + "loss": 4.4877, + "step": 45380 + }, + { + "epoch": 1.955463668863333, + "learning_rate": 8.904307354176991e-07, + "loss": 4.6196, + "step": 45400 + }, + { + "epoch": 1.9563251066029204, + "learning_rate": 8.903822534422203e-07, + "loss": 4.3089, + "step": 45420 + }, + { + "epoch": 1.9571865443425076, + "learning_rate": 8.903337714667413e-07, + "loss": 4.363, + "step": 45440 + }, + { + "epoch": 1.9580479820820949, + "learning_rate": 8.902852894912624e-07, + "loss": 4.329, + "step": 45460 + }, + { + "epoch": 1.9589094198216825, + "learning_rate": 8.902368075157835e-07, + "loss": 4.4342, + "step": 45480 + }, + { + "epoch": 1.9597708575612698, + "learning_rate": 8.901883255403046e-07, + "loss": 4.4552, + "step": 45500 + }, + { + "epoch": 1.960632295300857, + "learning_rate": 8.901398435648258e-07, + "loss": 4.3011, + "step": 45520 + }, + { + "epoch": 1.9614937330404445, + "learning_rate": 8.900913615893468e-07, + "loss": 4.3669, + "step": 45540 + }, + { + "epoch": 1.962355170780032, + "learning_rate": 8.900428796138679e-07, + "loss": 4.2394, + "step": 45560 + }, + { + "epoch": 1.9632166085196192, + "learning_rate": 8.89994397638389e-07, + "loss": 4.2049, + "step": 45580 + }, + { + "epoch": 1.9640780462592065, + "learning_rate": 8.899459156629102e-07, + "loss": 4.2717, + "step": 45600 + }, + { + "epoch": 1.9649394839987941, + "learning_rate": 8.898974336874312e-07, + "loss": 4.5402, + "step": 45620 + }, + { + "epoch": 1.9658009217383814, + "learning_rate": 8.898489517119524e-07, + "loss": 4.304, + "step": 45640 + }, + { + "epoch": 1.9666623594779686, + "learning_rate": 8.898004697364735e-07, + "loss": 4.3852, + "step": 45660 + }, + { + "epoch": 1.967523797217556, + "learning_rate": 8.897519877609947e-07, + "loss": 4.3731, + "step": 45680 + }, + { + "epoch": 1.9683852349571436, + "learning_rate": 8.897035057855156e-07, + "loss": 4.3931, + "step": 45700 + }, + { + "epoch": 1.9692466726967308, + "learning_rate": 8.896550238100368e-07, + "loss": 4.2081, + "step": 45720 + }, + { + "epoch": 1.970108110436318, + "learning_rate": 8.896065418345579e-07, + "loss": 4.1342, + "step": 45740 + }, + { + "epoch": 1.9709695481759058, + "learning_rate": 8.895580598590791e-07, + "loss": 4.3196, + "step": 45760 + }, + { + "epoch": 1.971830985915493, + "learning_rate": 8.895095778836001e-07, + "loss": 4.2608, + "step": 45780 + }, + { + "epoch": 1.9726924236550802, + "learning_rate": 8.894610959081213e-07, + "loss": 4.2443, + "step": 45800 + }, + { + "epoch": 1.9735538613946677, + "learning_rate": 8.894126139326422e-07, + "loss": 4.5963, + "step": 45820 + }, + { + "epoch": 1.9744152991342552, + "learning_rate": 8.893641319571635e-07, + "loss": 4.1681, + "step": 45840 + }, + { + "epoch": 1.9752767368738424, + "learning_rate": 8.893156499816845e-07, + "loss": 4.5358, + "step": 45860 + }, + { + "epoch": 1.9761381746134297, + "learning_rate": 8.892671680062057e-07, + "loss": 4.2574, + "step": 45880 + }, + { + "epoch": 1.9769996123530171, + "learning_rate": 8.892186860307268e-07, + "loss": 4.2747, + "step": 45900 + }, + { + "epoch": 1.9778610500926046, + "learning_rate": 8.89170204055248e-07, + "loss": 4.4428, + "step": 45920 + }, + { + "epoch": 1.9787224878321918, + "learning_rate": 8.891217220797689e-07, + "loss": 4.3901, + "step": 45940 + }, + { + "epoch": 1.9795839255717793, + "learning_rate": 8.890732401042901e-07, + "loss": 4.508, + "step": 45960 + }, + { + "epoch": 1.9804453633113668, + "learning_rate": 8.890247581288112e-07, + "loss": 4.3826, + "step": 45980 + }, + { + "epoch": 1.981306801050954, + "learning_rate": 8.889762761533322e-07, + "loss": 4.355, + "step": 46000 + }, + { + "epoch": 1.9821682387905413, + "learning_rate": 8.889277941778534e-07, + "loss": 4.2947, + "step": 46020 + }, + { + "epoch": 1.9830296765301287, + "learning_rate": 8.888793122023746e-07, + "loss": 4.5955, + "step": 46040 + }, + { + "epoch": 1.9838911142697162, + "learning_rate": 8.888308302268957e-07, + "loss": 4.3466, + "step": 46060 + }, + { + "epoch": 1.9847525520093035, + "learning_rate": 8.887823482514166e-07, + "loss": 4.2408, + "step": 46080 + }, + { + "epoch": 1.985613989748891, + "learning_rate": 8.887338662759378e-07, + "loss": 4.5384, + "step": 46100 + }, + { + "epoch": 1.9864754274884784, + "learning_rate": 8.886853843004589e-07, + "loss": 4.2042, + "step": 46120 + }, + { + "epoch": 1.9873368652280656, + "learning_rate": 8.8863690232498e-07, + "loss": 4.2729, + "step": 46140 + }, + { + "epoch": 1.9881983029676529, + "learning_rate": 8.885884203495011e-07, + "loss": 4.3113, + "step": 46160 + }, + { + "epoch": 1.9890597407072403, + "learning_rate": 8.885399383740223e-07, + "loss": 4.5347, + "step": 46180 + }, + { + "epoch": 1.9899211784468278, + "learning_rate": 8.884914563985433e-07, + "loss": 4.6352, + "step": 46200 + }, + { + "epoch": 1.990782616186415, + "learning_rate": 8.884429744230644e-07, + "loss": 4.1104, + "step": 46220 + }, + { + "epoch": 1.9916440539260025, + "learning_rate": 8.883944924475855e-07, + "loss": 4.314, + "step": 46240 + }, + { + "epoch": 1.99250549166559, + "learning_rate": 8.883460104721067e-07, + "loss": 4.3375, + "step": 46260 + }, + { + "epoch": 1.9933669294051772, + "learning_rate": 8.882975284966278e-07, + "loss": 4.2552, + "step": 46280 + }, + { + "epoch": 1.9942283671447645, + "learning_rate": 8.882490465211489e-07, + "loss": 4.4266, + "step": 46300 + }, + { + "epoch": 1.995089804884352, + "learning_rate": 8.8820056454567e-07, + "loss": 4.2751, + "step": 46320 + }, + { + "epoch": 1.9959512426239394, + "learning_rate": 8.88152082570191e-07, + "loss": 4.1229, + "step": 46340 + }, + { + "epoch": 1.9968126803635267, + "learning_rate": 8.881036005947122e-07, + "loss": 4.6671, + "step": 46360 + }, + { + "epoch": 1.9976741181031141, + "learning_rate": 8.880551186192333e-07, + "loss": 4.4372, + "step": 46380 + }, + { + "epoch": 1.9985355558427016, + "learning_rate": 8.880066366437544e-07, + "loss": 4.2064, + "step": 46400 + }, + { + "epoch": 1.9993969935822888, + "learning_rate": 8.879581546682756e-07, + "loss": 4.3167, + "step": 46420 + } + ], + "logging_steps": 20, + "max_steps": 371472, + "num_input_tokens_seen": 0, + "num_train_epochs": 16, + "save_steps": 10000.0, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 5.079387107794944e+16, + "train_batch_size": 2, + "trial_name": null, + "trial_params": null +}