{ "best_metric": null, "best_model_checkpoint": null, "epoch": 20.0, "global_step": 6909440, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 4.999927924694332e-05, "loss": 3.4997, "step": 500 }, { "epoch": 0.0, "learning_rate": 4.999855559929604e-05, "loss": 3.2767, "step": 1000 }, { "epoch": 0.0, "learning_rate": 4.999783195164876e-05, "loss": 3.1477, "step": 1500 }, { "epoch": 0.01, "learning_rate": 4.9997108304001485e-05, "loss": 3.131, "step": 2000 }, { "epoch": 0.01, "learning_rate": 4.99963861036495e-05, "loss": 3.073, "step": 2500 }, { "epoch": 0.01, "learning_rate": 4.999566245600222e-05, "loss": 3.0248, "step": 3000 }, { "epoch": 0.01, "learning_rate": 4.9994938808354945e-05, "loss": 3.0096, "step": 3500 }, { "epoch": 0.01, "learning_rate": 4.999421660800297e-05, "loss": 2.9718, "step": 4000 }, { "epoch": 0.01, "learning_rate": 4.999349296035569e-05, "loss": 2.9779, "step": 4500 }, { "epoch": 0.01, "learning_rate": 4.999276931270842e-05, "loss": 2.9819, "step": 5000 }, { "epoch": 0.02, "learning_rate": 4.999204566506114e-05, "loss": 2.9197, "step": 5500 }, { "epoch": 0.02, "learning_rate": 4.999132201741386e-05, "loss": 2.9364, "step": 6000 }, { "epoch": 0.02, "learning_rate": 4.9990598369766585e-05, "loss": 2.9215, "step": 6500 }, { "epoch": 0.02, "learning_rate": 4.998987472211931e-05, "loss": 2.9021, "step": 7000 }, { "epoch": 0.02, "learning_rate": 4.998915107447203e-05, "loss": 2.8957, "step": 7500 }, { "epoch": 0.02, "learning_rate": 4.998842742682475e-05, "loss": 2.8654, "step": 8000 }, { "epoch": 0.02, "learning_rate": 4.9987703779177474e-05, "loss": 2.8845, "step": 8500 }, { "epoch": 0.03, "learning_rate": 4.9986980131530196e-05, "loss": 2.8847, "step": 9000 }, { "epoch": 0.03, "learning_rate": 4.998625648388292e-05, "loss": 2.8642, "step": 9500 }, { "epoch": 0.03, "learning_rate": 4.998553428353094e-05, "loss": 2.883, "step": 10000 }, { "epoch": 0.03, "learning_rate": 4.998481063588366e-05, "loss": 2.8344, "step": 10500 }, { "epoch": 0.03, "learning_rate": 4.9984086988236385e-05, "loss": 2.8419, "step": 11000 }, { "epoch": 0.03, "learning_rate": 4.99833647878844e-05, "loss": 2.853, "step": 11500 }, { "epoch": 0.03, "learning_rate": 4.998264114023713e-05, "loss": 2.8367, "step": 12000 }, { "epoch": 0.04, "learning_rate": 4.998191749258985e-05, "loss": 2.8295, "step": 12500 }, { "epoch": 0.04, "learning_rate": 4.9981193844942574e-05, "loss": 2.8572, "step": 13000 }, { "epoch": 0.04, "learning_rate": 4.9980470197295297e-05, "loss": 2.8238, "step": 13500 }, { "epoch": 0.04, "learning_rate": 4.997974654964802e-05, "loss": 2.8145, "step": 14000 }, { "epoch": 0.04, "learning_rate": 4.997902290200074e-05, "loss": 2.8248, "step": 14500 }, { "epoch": 0.04, "learning_rate": 4.997829925435347e-05, "loss": 2.8202, "step": 15000 }, { "epoch": 0.04, "learning_rate": 4.997757850129678e-05, "loss": 2.7887, "step": 15500 }, { "epoch": 0.05, "learning_rate": 4.99768548536495e-05, "loss": 2.7912, "step": 16000 }, { "epoch": 0.05, "learning_rate": 4.997613120600222e-05, "loss": 2.7774, "step": 16500 }, { "epoch": 0.05, "learning_rate": 4.9975407558354946e-05, "loss": 2.7762, "step": 17000 }, { "epoch": 0.05, "learning_rate": 4.997468391070767e-05, "loss": 2.7691, "step": 17500 }, { "epoch": 0.05, "learning_rate": 4.997396026306039e-05, "loss": 2.7753, "step": 18000 }, { "epoch": 0.05, "learning_rate": 4.9973240957299e-05, "loss": 2.788, "step": 18500 }, { "epoch": 0.05, "learning_rate": 4.997251730965172e-05, "loss": 2.78, "step": 19000 }, { "epoch": 0.06, "learning_rate": 4.9971793662004443e-05, "loss": 2.7617, "step": 19500 }, { "epoch": 0.06, "learning_rate": 4.997107001435717e-05, "loss": 2.7686, "step": 20000 }, { "epoch": 0.06, "learning_rate": 4.9970346366709895e-05, "loss": 2.7487, "step": 20500 }, { "epoch": 0.06, "learning_rate": 4.996962271906262e-05, "loss": 2.7864, "step": 21000 }, { "epoch": 0.06, "learning_rate": 4.9968899071415346e-05, "loss": 2.7937, "step": 21500 }, { "epoch": 0.06, "learning_rate": 4.996817542376807e-05, "loss": 2.7423, "step": 22000 }, { "epoch": 0.07, "learning_rate": 4.996745177612079e-05, "loss": 2.7574, "step": 22500 }, { "epoch": 0.07, "learning_rate": 4.996672812847351e-05, "loss": 2.7589, "step": 23000 }, { "epoch": 0.07, "learning_rate": 4.9966004480826235e-05, "loss": 2.772, "step": 23500 }, { "epoch": 0.07, "learning_rate": 4.996528083317896e-05, "loss": 2.7527, "step": 24000 }, { "epoch": 0.07, "learning_rate": 4.996455718553168e-05, "loss": 2.7626, "step": 24500 }, { "epoch": 0.07, "learning_rate": 4.99638335378844e-05, "loss": 2.7416, "step": 25000 }, { "epoch": 0.07, "learning_rate": 4.996311133753242e-05, "loss": 2.744, "step": 25500 }, { "epoch": 0.08, "learning_rate": 4.9962387689885146e-05, "loss": 2.7605, "step": 26000 }, { "epoch": 0.08, "learning_rate": 4.996166548953316e-05, "loss": 2.7441, "step": 26500 }, { "epoch": 0.08, "learning_rate": 4.996094184188589e-05, "loss": 2.7389, "step": 27000 }, { "epoch": 0.08, "learning_rate": 4.996021819423861e-05, "loss": 2.7474, "step": 27500 }, { "epoch": 0.08, "learning_rate": 4.9959494546591335e-05, "loss": 2.7306, "step": 28000 }, { "epoch": 0.08, "learning_rate": 4.995877089894406e-05, "loss": 2.7143, "step": 28500 }, { "epoch": 0.08, "learning_rate": 4.995804725129678e-05, "loss": 2.7321, "step": 29000 }, { "epoch": 0.09, "learning_rate": 4.99573236036495e-05, "loss": 2.7652, "step": 29500 }, { "epoch": 0.09, "learning_rate": 4.9956599956002224e-05, "loss": 2.7128, "step": 30000 }, { "epoch": 0.09, "learning_rate": 4.9955876308354946e-05, "loss": 2.7794, "step": 30500 }, { "epoch": 0.09, "learning_rate": 4.995515266070767e-05, "loss": 2.7457, "step": 31000 }, { "epoch": 0.09, "learning_rate": 4.995443046035569e-05, "loss": 2.7409, "step": 31500 }, { "epoch": 0.09, "learning_rate": 4.995370681270841e-05, "loss": 2.7397, "step": 32000 }, { "epoch": 0.09, "learning_rate": 4.9952983165061135e-05, "loss": 2.7085, "step": 32500 }, { "epoch": 0.1, "learning_rate": 4.995225951741386e-05, "loss": 2.7524, "step": 33000 }, { "epoch": 0.1, "learning_rate": 4.995153586976658e-05, "loss": 2.7253, "step": 33500 }, { "epoch": 0.1, "learning_rate": 4.9950813669414595e-05, "loss": 2.7287, "step": 34000 }, { "epoch": 0.1, "learning_rate": 4.9950090021767324e-05, "loss": 2.7169, "step": 34500 }, { "epoch": 0.1, "learning_rate": 4.994936782141535e-05, "loss": 2.7318, "step": 35000 }, { "epoch": 0.1, "learning_rate": 4.994864417376807e-05, "loss": 2.7368, "step": 35500 }, { "epoch": 0.1, "learning_rate": 4.994792052612079e-05, "loss": 2.7312, "step": 36000 }, { "epoch": 0.11, "learning_rate": 4.9947196878473513e-05, "loss": 2.7155, "step": 36500 }, { "epoch": 0.11, "learning_rate": 4.9946473230826236e-05, "loss": 2.7206, "step": 37000 }, { "epoch": 0.11, "learning_rate": 4.994574958317896e-05, "loss": 2.7177, "step": 37500 }, { "epoch": 0.11, "learning_rate": 4.994502593553168e-05, "loss": 2.6947, "step": 38000 }, { "epoch": 0.11, "learning_rate": 4.99443022878844e-05, "loss": 2.6902, "step": 38500 }, { "epoch": 0.11, "learning_rate": 4.994358008753242e-05, "loss": 2.7243, "step": 39000 }, { "epoch": 0.11, "learning_rate": 4.994285643988515e-05, "loss": 2.6992, "step": 39500 }, { "epoch": 0.12, "learning_rate": 4.994213279223787e-05, "loss": 2.7074, "step": 40000 }, { "epoch": 0.12, "learning_rate": 4.994140914459059e-05, "loss": 2.7062, "step": 40500 }, { "epoch": 0.12, "learning_rate": 4.9940685496943314e-05, "loss": 2.6884, "step": 41000 }, { "epoch": 0.12, "learning_rate": 4.9939961849296036e-05, "loss": 2.6899, "step": 41500 }, { "epoch": 0.12, "learning_rate": 4.9939238201648765e-05, "loss": 2.7145, "step": 42000 }, { "epoch": 0.12, "learning_rate": 4.993851600129678e-05, "loss": 2.7053, "step": 42500 }, { "epoch": 0.12, "learning_rate": 4.99377923536495e-05, "loss": 2.7103, "step": 43000 }, { "epoch": 0.13, "learning_rate": 4.993707015329752e-05, "loss": 2.6959, "step": 43500 }, { "epoch": 0.13, "learning_rate": 4.993634650565025e-05, "loss": 2.6981, "step": 44000 }, { "epoch": 0.13, "learning_rate": 4.993562285800297e-05, "loss": 2.6876, "step": 44500 }, { "epoch": 0.13, "learning_rate": 4.993489921035569e-05, "loss": 2.6825, "step": 45000 }, { "epoch": 0.13, "learning_rate": 4.9934175562708414e-05, "loss": 2.6952, "step": 45500 }, { "epoch": 0.13, "learning_rate": 4.9933451915061136e-05, "loss": 2.6979, "step": 46000 }, { "epoch": 0.13, "learning_rate": 4.993272826741386e-05, "loss": 2.6938, "step": 46500 }, { "epoch": 0.14, "learning_rate": 4.993200461976658e-05, "loss": 2.7004, "step": 47000 }, { "epoch": 0.14, "learning_rate": 4.9931282419414596e-05, "loss": 2.6848, "step": 47500 }, { "epoch": 0.14, "learning_rate": 4.993055877176732e-05, "loss": 2.6907, "step": 48000 }, { "epoch": 0.14, "learning_rate": 4.992983512412005e-05, "loss": 2.7073, "step": 48500 }, { "epoch": 0.14, "learning_rate": 4.992911147647277e-05, "loss": 2.6378, "step": 49000 }, { "epoch": 0.14, "learning_rate": 4.992838927612079e-05, "loss": 2.6931, "step": 49500 }, { "epoch": 0.14, "learning_rate": 4.9927665628473514e-05, "loss": 2.6709, "step": 50000 }, { "epoch": 0.15, "learning_rate": 4.9926941980826236e-05, "loss": 2.7085, "step": 50500 }, { "epoch": 0.15, "learning_rate": 4.992621978047425e-05, "loss": 2.6938, "step": 51000 }, { "epoch": 0.15, "learning_rate": 4.9925496132826974e-05, "loss": 2.6894, "step": 51500 }, { "epoch": 0.15, "learning_rate": 4.9924772485179697e-05, "loss": 2.6854, "step": 52000 }, { "epoch": 0.15, "learning_rate": 4.9924048837532426e-05, "loss": 2.6828, "step": 52500 }, { "epoch": 0.15, "learning_rate": 4.992332518988515e-05, "loss": 2.7017, "step": 53000 }, { "epoch": 0.15, "learning_rate": 4.992260154223787e-05, "loss": 2.677, "step": 53500 }, { "epoch": 0.16, "learning_rate": 4.992187789459059e-05, "loss": 2.6788, "step": 54000 }, { "epoch": 0.16, "learning_rate": 4.9921154246943314e-05, "loss": 2.6774, "step": 54500 }, { "epoch": 0.16, "learning_rate": 4.992043059929604e-05, "loss": 2.6771, "step": 55000 }, { "epoch": 0.16, "learning_rate": 4.991970695164876e-05, "loss": 2.6716, "step": 55500 }, { "epoch": 0.16, "learning_rate": 4.991898330400148e-05, "loss": 2.6622, "step": 56000 }, { "epoch": 0.16, "learning_rate": 4.991825965635421e-05, "loss": 2.6711, "step": 56500 }, { "epoch": 0.16, "learning_rate": 4.991753600870693e-05, "loss": 2.7045, "step": 57000 }, { "epoch": 0.17, "learning_rate": 4.9916812361059655e-05, "loss": 2.6518, "step": 57500 }, { "epoch": 0.17, "learning_rate": 4.991608871341238e-05, "loss": 2.684, "step": 58000 }, { "epoch": 0.17, "learning_rate": 4.99153650657651e-05, "loss": 2.6713, "step": 58500 }, { "epoch": 0.17, "learning_rate": 4.991464141811782e-05, "loss": 2.6608, "step": 59000 }, { "epoch": 0.17, "learning_rate": 4.9913919217765844e-05, "loss": 2.6881, "step": 59500 }, { "epoch": 0.17, "learning_rate": 4.9913195570118566e-05, "loss": 2.6478, "step": 60000 }, { "epoch": 0.18, "learning_rate": 4.991247192247129e-05, "loss": 2.6859, "step": 60500 }, { "epoch": 0.18, "learning_rate": 4.991174827482401e-05, "loss": 2.6577, "step": 61000 }, { "epoch": 0.18, "learning_rate": 4.991102462717673e-05, "loss": 2.6815, "step": 61500 }, { "epoch": 0.18, "learning_rate": 4.991030242682475e-05, "loss": 2.6627, "step": 62000 }, { "epoch": 0.18, "learning_rate": 4.990957877917748e-05, "loss": 2.6699, "step": 62500 }, { "epoch": 0.18, "learning_rate": 4.99088551315302e-05, "loss": 2.6567, "step": 63000 }, { "epoch": 0.18, "learning_rate": 4.9908132931178215e-05, "loss": 2.6464, "step": 63500 }, { "epoch": 0.19, "learning_rate": 4.990740928353094e-05, "loss": 2.6627, "step": 64000 }, { "epoch": 0.19, "learning_rate": 4.9906685635883666e-05, "loss": 2.6536, "step": 64500 }, { "epoch": 0.19, "learning_rate": 4.990596198823639e-05, "loss": 2.6682, "step": 65000 }, { "epoch": 0.19, "learning_rate": 4.990523834058911e-05, "loss": 2.6684, "step": 65500 }, { "epoch": 0.19, "learning_rate": 4.9904516140237126e-05, "loss": 2.6456, "step": 66000 }, { "epoch": 0.19, "learning_rate": 4.990379249258985e-05, "loss": 2.6441, "step": 66500 }, { "epoch": 0.19, "learning_rate": 4.990306884494258e-05, "loss": 2.636, "step": 67000 }, { "epoch": 0.2, "learning_rate": 4.99023451972953e-05, "loss": 2.6335, "step": 67500 }, { "epoch": 0.2, "learning_rate": 4.990162154964802e-05, "loss": 2.6513, "step": 68000 }, { "epoch": 0.2, "learning_rate": 4.9900897902000744e-05, "loss": 2.6286, "step": 68500 }, { "epoch": 0.2, "learning_rate": 4.990017570164876e-05, "loss": 2.653, "step": 69000 }, { "epoch": 0.2, "learning_rate": 4.9899453501296775e-05, "loss": 2.6234, "step": 69500 }, { "epoch": 0.2, "learning_rate": 4.98987298536495e-05, "loss": 2.6718, "step": 70000 }, { "epoch": 0.2, "learning_rate": 4.9898006206002227e-05, "loss": 2.6521, "step": 70500 }, { "epoch": 0.21, "learning_rate": 4.989728255835495e-05, "loss": 2.6371, "step": 71000 }, { "epoch": 0.21, "learning_rate": 4.989655891070767e-05, "loss": 2.6477, "step": 71500 }, { "epoch": 0.21, "learning_rate": 4.98958352630604e-05, "loss": 2.6493, "step": 72000 }, { "epoch": 0.21, "learning_rate": 4.989511161541312e-05, "loss": 2.64, "step": 72500 }, { "epoch": 0.21, "learning_rate": 4.9894387967765844e-05, "loss": 2.6353, "step": 73000 }, { "epoch": 0.21, "learning_rate": 4.989366432011857e-05, "loss": 2.6465, "step": 73500 }, { "epoch": 0.21, "learning_rate": 4.989294211976658e-05, "loss": 2.6686, "step": 74000 }, { "epoch": 0.22, "learning_rate": 4.9892218472119304e-05, "loss": 2.6793, "step": 74500 }, { "epoch": 0.22, "learning_rate": 4.989149482447203e-05, "loss": 2.6164, "step": 75000 }, { "epoch": 0.22, "learning_rate": 4.989077117682475e-05, "loss": 2.6425, "step": 75500 }, { "epoch": 0.22, "learning_rate": 4.989004752917748e-05, "loss": 2.6471, "step": 76000 }, { "epoch": 0.22, "learning_rate": 4.98893238815302e-05, "loss": 2.6553, "step": 76500 }, { "epoch": 0.22, "learning_rate": 4.988860023388292e-05, "loss": 2.6485, "step": 77000 }, { "epoch": 0.22, "learning_rate": 4.9887876586235645e-05, "loss": 2.6462, "step": 77500 }, { "epoch": 0.23, "learning_rate": 4.988715293858837e-05, "loss": 2.6275, "step": 78000 }, { "epoch": 0.23, "learning_rate": 4.988643073823638e-05, "loss": 2.6418, "step": 78500 }, { "epoch": 0.23, "learning_rate": 4.988570709058911e-05, "loss": 2.6582, "step": 79000 }, { "epoch": 0.23, "learning_rate": 4.9884983442941834e-05, "loss": 2.6366, "step": 79500 }, { "epoch": 0.23, "learning_rate": 4.9884259795294556e-05, "loss": 2.6325, "step": 80000 }, { "epoch": 0.23, "learning_rate": 4.988353614764728e-05, "loss": 2.6434, "step": 80500 }, { "epoch": 0.23, "learning_rate": 4.98828125e-05, "loss": 2.624, "step": 81000 }, { "epoch": 0.24, "learning_rate": 4.988209029964802e-05, "loss": 2.6362, "step": 81500 }, { "epoch": 0.24, "learning_rate": 4.9881366652000745e-05, "loss": 2.6123, "step": 82000 }, { "epoch": 0.24, "learning_rate": 4.988064300435347e-05, "loss": 2.6453, "step": 82500 }, { "epoch": 0.24, "learning_rate": 4.987991935670619e-05, "loss": 2.6372, "step": 83000 }, { "epoch": 0.24, "learning_rate": 4.987919570905891e-05, "loss": 2.6045, "step": 83500 }, { "epoch": 0.24, "learning_rate": 4.987847350870693e-05, "loss": 2.6328, "step": 84000 }, { "epoch": 0.24, "learning_rate": 4.987774986105965e-05, "loss": 2.665, "step": 84500 }, { "epoch": 0.25, "learning_rate": 4.987702621341238e-05, "loss": 2.6354, "step": 85000 }, { "epoch": 0.25, "learning_rate": 4.98763025657651e-05, "loss": 2.6335, "step": 85500 }, { "epoch": 0.25, "learning_rate": 4.9875580365413116e-05, "loss": 2.6547, "step": 86000 }, { "epoch": 0.25, "learning_rate": 4.9874856717765845e-05, "loss": 2.6419, "step": 86500 }, { "epoch": 0.25, "learning_rate": 4.987413451741386e-05, "loss": 2.6194, "step": 87000 }, { "epoch": 0.25, "learning_rate": 4.987341086976658e-05, "loss": 2.6221, "step": 87500 }, { "epoch": 0.25, "learning_rate": 4.9872687222119305e-05, "loss": 2.6137, "step": 88000 }, { "epoch": 0.26, "learning_rate": 4.987196357447203e-05, "loss": 2.6274, "step": 88500 }, { "epoch": 0.26, "learning_rate": 4.9871239926824757e-05, "loss": 2.6297, "step": 89000 }, { "epoch": 0.26, "learning_rate": 4.987051627917748e-05, "loss": 2.6419, "step": 89500 }, { "epoch": 0.26, "learning_rate": 4.98697926315302e-05, "loss": 2.6202, "step": 90000 }, { "epoch": 0.26, "learning_rate": 4.9869070431178217e-05, "loss": 2.6238, "step": 90500 }, { "epoch": 0.26, "learning_rate": 4.986834678353094e-05, "loss": 2.6561, "step": 91000 }, { "epoch": 0.26, "learning_rate": 4.986762313588366e-05, "loss": 2.6071, "step": 91500 }, { "epoch": 0.27, "learning_rate": 4.986689948823638e-05, "loss": 2.6369, "step": 92000 }, { "epoch": 0.27, "learning_rate": 4.9866177287884406e-05, "loss": 2.6028, "step": 92500 }, { "epoch": 0.27, "learning_rate": 4.986545364023713e-05, "loss": 2.6313, "step": 93000 }, { "epoch": 0.27, "learning_rate": 4.986472999258985e-05, "loss": 2.5917, "step": 93500 }, { "epoch": 0.27, "learning_rate": 4.986400634494257e-05, "loss": 2.6462, "step": 94000 }, { "epoch": 0.27, "learning_rate": 4.98632826972953e-05, "loss": 2.6301, "step": 94500 }, { "epoch": 0.27, "learning_rate": 4.9862559049648024e-05, "loss": 2.6154, "step": 95000 }, { "epoch": 0.28, "learning_rate": 4.986183684929604e-05, "loss": 2.6074, "step": 95500 }, { "epoch": 0.28, "learning_rate": 4.986111320164876e-05, "loss": 2.6039, "step": 96000 }, { "epoch": 0.28, "learning_rate": 4.9860389554001484e-05, "loss": 2.6335, "step": 96500 }, { "epoch": 0.28, "learning_rate": 4.9859665906354206e-05, "loss": 2.6242, "step": 97000 }, { "epoch": 0.28, "learning_rate": 4.985894225870693e-05, "loss": 2.6268, "step": 97500 }, { "epoch": 0.28, "learning_rate": 4.985821861105966e-05, "loss": 2.6285, "step": 98000 }, { "epoch": 0.29, "learning_rate": 4.985749496341238e-05, "loss": 2.6378, "step": 98500 }, { "epoch": 0.29, "learning_rate": 4.98567713157651e-05, "loss": 2.6083, "step": 99000 }, { "epoch": 0.29, "learning_rate": 4.9856047668117824e-05, "loss": 2.5959, "step": 99500 }, { "epoch": 0.29, "learning_rate": 4.985532546776584e-05, "loss": 2.6288, "step": 100000 }, { "epoch": 0.29, "learning_rate": 4.9854603267413855e-05, "loss": 2.6411, "step": 100500 }, { "epoch": 0.29, "learning_rate": 4.985387961976658e-05, "loss": 2.6273, "step": 101000 }, { "epoch": 0.29, "learning_rate": 4.9853155972119306e-05, "loss": 2.6014, "step": 101500 }, { "epoch": 0.3, "learning_rate": 4.985243232447203e-05, "loss": 2.6173, "step": 102000 }, { "epoch": 0.3, "learning_rate": 4.985170867682476e-05, "loss": 2.6032, "step": 102500 }, { "epoch": 0.3, "learning_rate": 4.985098502917748e-05, "loss": 2.627, "step": 103000 }, { "epoch": 0.3, "learning_rate": 4.98502613815302e-05, "loss": 2.6256, "step": 103500 }, { "epoch": 0.3, "learning_rate": 4.984953918117822e-05, "loss": 2.6208, "step": 104000 }, { "epoch": 0.3, "learning_rate": 4.984881553353094e-05, "loss": 2.6073, "step": 104500 }, { "epoch": 0.3, "learning_rate": 4.984809188588366e-05, "loss": 2.6002, "step": 105000 }, { "epoch": 0.31, "learning_rate": 4.9847368238236384e-05, "loss": 2.6262, "step": 105500 }, { "epoch": 0.31, "learning_rate": 4.9846644590589106e-05, "loss": 2.6144, "step": 106000 }, { "epoch": 0.31, "learning_rate": 4.984592094294183e-05, "loss": 2.6177, "step": 106500 }, { "epoch": 0.31, "learning_rate": 4.984519729529456e-05, "loss": 2.6107, "step": 107000 }, { "epoch": 0.31, "learning_rate": 4.984447364764728e-05, "loss": 2.6033, "step": 107500 }, { "epoch": 0.31, "learning_rate": 4.984375e-05, "loss": 2.6085, "step": 108000 }, { "epoch": 0.31, "learning_rate": 4.9843026352352724e-05, "loss": 2.5907, "step": 108500 }, { "epoch": 0.32, "learning_rate": 4.9842304152000747e-05, "loss": 2.6222, "step": 109000 }, { "epoch": 0.32, "learning_rate": 4.984158050435347e-05, "loss": 2.5994, "step": 109500 }, { "epoch": 0.32, "learning_rate": 4.984085685670619e-05, "loss": 2.6151, "step": 110000 }, { "epoch": 0.32, "learning_rate": 4.984013320905891e-05, "loss": 2.6356, "step": 110500 }, { "epoch": 0.32, "learning_rate": 4.9839409561411635e-05, "loss": 2.6248, "step": 111000 }, { "epoch": 0.32, "learning_rate": 4.983868591376436e-05, "loss": 2.5996, "step": 111500 }, { "epoch": 0.32, "learning_rate": 4.983796226611708e-05, "loss": 2.6143, "step": 112000 }, { "epoch": 0.33, "learning_rate": 4.983723861846981e-05, "loss": 2.6101, "step": 112500 }, { "epoch": 0.33, "learning_rate": 4.9836516418117825e-05, "loss": 2.6164, "step": 113000 }, { "epoch": 0.33, "learning_rate": 4.983579421776584e-05, "loss": 2.6186, "step": 113500 }, { "epoch": 0.33, "learning_rate": 4.983507057011856e-05, "loss": 2.585, "step": 114000 }, { "epoch": 0.33, "learning_rate": 4.983434981706188e-05, "loss": 2.6116, "step": 114500 }, { "epoch": 0.33, "learning_rate": 4.98336261694146e-05, "loss": 2.5995, "step": 115000 }, { "epoch": 0.33, "learning_rate": 4.983290252176732e-05, "loss": 2.6086, "step": 115500 }, { "epoch": 0.34, "learning_rate": 4.9832178874120045e-05, "loss": 2.6049, "step": 116000 }, { "epoch": 0.34, "learning_rate": 4.983145522647277e-05, "loss": 2.6249, "step": 116500 }, { "epoch": 0.34, "learning_rate": 4.9830731578825496e-05, "loss": 2.592, "step": 117000 }, { "epoch": 0.34, "learning_rate": 4.983000793117822e-05, "loss": 2.6163, "step": 117500 }, { "epoch": 0.34, "learning_rate": 4.982928428353094e-05, "loss": 2.5989, "step": 118000 }, { "epoch": 0.34, "learning_rate": 4.982856063588366e-05, "loss": 2.6092, "step": 118500 }, { "epoch": 0.34, "learning_rate": 4.9827836988236385e-05, "loss": 2.6214, "step": 119000 }, { "epoch": 0.35, "learning_rate": 4.982711334058911e-05, "loss": 2.6576, "step": 119500 }, { "epoch": 0.35, "learning_rate": 4.982639114023713e-05, "loss": 2.6243, "step": 120000 }, { "epoch": 0.35, "learning_rate": 4.9825668939885145e-05, "loss": 2.6128, "step": 120500 }, { "epoch": 0.35, "learning_rate": 4.982494529223787e-05, "loss": 2.588, "step": 121000 }, { "epoch": 0.35, "learning_rate": 4.982422164459059e-05, "loss": 2.6178, "step": 121500 }, { "epoch": 0.35, "learning_rate": 4.982349799694331e-05, "loss": 2.6046, "step": 122000 }, { "epoch": 0.35, "learning_rate": 4.9822774349296034e-05, "loss": 2.5921, "step": 122500 }, { "epoch": 0.36, "learning_rate": 4.9822050701648756e-05, "loss": 2.6107, "step": 123000 }, { "epoch": 0.36, "learning_rate": 4.9821327054001485e-05, "loss": 2.5997, "step": 123500 }, { "epoch": 0.36, "learning_rate": 4.98206048536495e-05, "loss": 2.5989, "step": 124000 }, { "epoch": 0.36, "learning_rate": 4.981988120600223e-05, "loss": 2.6019, "step": 124500 }, { "epoch": 0.36, "learning_rate": 4.981915755835495e-05, "loss": 2.617, "step": 125000 }, { "epoch": 0.36, "learning_rate": 4.9818433910707674e-05, "loss": 2.5885, "step": 125500 }, { "epoch": 0.36, "learning_rate": 4.9817710263060396e-05, "loss": 2.6094, "step": 126000 }, { "epoch": 0.37, "learning_rate": 4.981698661541312e-05, "loss": 2.592, "step": 126500 }, { "epoch": 0.37, "learning_rate": 4.981626296776584e-05, "loss": 2.5981, "step": 127000 }, { "epoch": 0.37, "learning_rate": 4.981553932011856e-05, "loss": 2.6105, "step": 127500 }, { "epoch": 0.37, "learning_rate": 4.9814815672471285e-05, "loss": 2.6168, "step": 128000 }, { "epoch": 0.37, "learning_rate": 4.981409202482401e-05, "loss": 2.6152, "step": 128500 }, { "epoch": 0.37, "learning_rate": 4.9813368377176737e-05, "loss": 2.592, "step": 129000 }, { "epoch": 0.37, "learning_rate": 4.981264472952946e-05, "loss": 2.611, "step": 129500 }, { "epoch": 0.38, "learning_rate": 4.981192108188218e-05, "loss": 2.5882, "step": 130000 }, { "epoch": 0.38, "learning_rate": 4.9811198881530197e-05, "loss": 2.617, "step": 130500 }, { "epoch": 0.38, "learning_rate": 4.981047523388292e-05, "loss": 2.595, "step": 131000 }, { "epoch": 0.38, "learning_rate": 4.980975158623565e-05, "loss": 2.6077, "step": 131500 }, { "epoch": 0.38, "learning_rate": 4.9809029385883663e-05, "loss": 2.6118, "step": 132000 }, { "epoch": 0.38, "learning_rate": 4.9808305738236386e-05, "loss": 2.612, "step": 132500 }, { "epoch": 0.38, "learning_rate": 4.980758209058911e-05, "loss": 2.6042, "step": 133000 }, { "epoch": 0.39, "learning_rate": 4.980685844294184e-05, "loss": 2.5783, "step": 133500 }, { "epoch": 0.39, "learning_rate": 4.980613479529456e-05, "loss": 2.5667, "step": 134000 }, { "epoch": 0.39, "learning_rate": 4.980541114764728e-05, "loss": 2.5936, "step": 134500 }, { "epoch": 0.39, "learning_rate": 4.9804687500000004e-05, "loss": 2.6037, "step": 135000 }, { "epoch": 0.39, "learning_rate": 4.9803963852352726e-05, "loss": 2.607, "step": 135500 }, { "epoch": 0.39, "learning_rate": 4.980324165200074e-05, "loss": 2.595, "step": 136000 }, { "epoch": 0.4, "learning_rate": 4.9802519451648764e-05, "loss": 2.5824, "step": 136500 }, { "epoch": 0.4, "learning_rate": 4.9801795804001486e-05, "loss": 2.581, "step": 137000 }, { "epoch": 0.4, "learning_rate": 4.980107215635421e-05, "loss": 2.613, "step": 137500 }, { "epoch": 0.4, "learning_rate": 4.9800349956002224e-05, "loss": 2.5996, "step": 138000 }, { "epoch": 0.4, "learning_rate": 4.9799626308354946e-05, "loss": 2.5965, "step": 138500 }, { "epoch": 0.4, "learning_rate": 4.979890266070767e-05, "loss": 2.6006, "step": 139000 }, { "epoch": 0.4, "learning_rate": 4.97981790130604e-05, "loss": 2.6041, "step": 139500 }, { "epoch": 0.41, "learning_rate": 4.979745536541312e-05, "loss": 2.5862, "step": 140000 }, { "epoch": 0.41, "learning_rate": 4.979673171776584e-05, "loss": 2.578, "step": 140500 }, { "epoch": 0.41, "learning_rate": 4.9796008070118564e-05, "loss": 2.5761, "step": 141000 }, { "epoch": 0.41, "learning_rate": 4.9795284422471286e-05, "loss": 2.5802, "step": 141500 }, { "epoch": 0.41, "learning_rate": 4.979456077482401e-05, "loss": 2.6018, "step": 142000 }, { "epoch": 0.41, "learning_rate": 4.979383712717674e-05, "loss": 2.6103, "step": 142500 }, { "epoch": 0.41, "learning_rate": 4.979311347952946e-05, "loss": 2.606, "step": 143000 }, { "epoch": 0.42, "learning_rate": 4.9792391279177475e-05, "loss": 2.6264, "step": 143500 }, { "epoch": 0.42, "learning_rate": 4.97916676315302e-05, "loss": 2.5784, "step": 144000 }, { "epoch": 0.42, "learning_rate": 4.979094398388292e-05, "loss": 2.6203, "step": 144500 }, { "epoch": 0.42, "learning_rate": 4.979022033623564e-05, "loss": 2.6052, "step": 145000 }, { "epoch": 0.42, "learning_rate": 4.9789496688588364e-05, "loss": 2.5849, "step": 145500 }, { "epoch": 0.42, "learning_rate": 4.9788773040941086e-05, "loss": 2.6116, "step": 146000 }, { "epoch": 0.42, "learning_rate": 4.9788049393293815e-05, "loss": 2.5938, "step": 146500 }, { "epoch": 0.43, "learning_rate": 4.978732574564654e-05, "loss": 2.5817, "step": 147000 }, { "epoch": 0.43, "learning_rate": 4.978660354529456e-05, "loss": 2.5906, "step": 147500 }, { "epoch": 0.43, "learning_rate": 4.978587989764728e-05, "loss": 2.5725, "step": 148000 }, { "epoch": 0.43, "learning_rate": 4.9785156250000004e-05, "loss": 2.6172, "step": 148500 }, { "epoch": 0.43, "learning_rate": 4.9784432602352727e-05, "loss": 2.5772, "step": 149000 }, { "epoch": 0.43, "learning_rate": 4.978370895470545e-05, "loss": 2.6032, "step": 149500 }, { "epoch": 0.43, "learning_rate": 4.978298530705817e-05, "loss": 2.6136, "step": 150000 }, { "epoch": 0.44, "learning_rate": 4.978226165941089e-05, "loss": 2.5862, "step": 150500 }, { "epoch": 0.44, "learning_rate": 4.9781538011763616e-05, "loss": 2.6046, "step": 151000 }, { "epoch": 0.44, "learning_rate": 4.978081725870693e-05, "loss": 2.5817, "step": 151500 }, { "epoch": 0.44, "learning_rate": 4.978009505835495e-05, "loss": 2.5955, "step": 152000 }, { "epoch": 0.44, "learning_rate": 4.977937141070767e-05, "loss": 2.595, "step": 152500 }, { "epoch": 0.44, "learning_rate": 4.977864776306039e-05, "loss": 2.5895, "step": 153000 }, { "epoch": 0.44, "learning_rate": 4.9777924115413113e-05, "loss": 2.5952, "step": 153500 }, { "epoch": 0.45, "learning_rate": 4.977720046776584e-05, "loss": 2.5721, "step": 154000 }, { "epoch": 0.45, "learning_rate": 4.9776476820118565e-05, "loss": 2.6026, "step": 154500 }, { "epoch": 0.45, "learning_rate": 4.977575317247129e-05, "loss": 2.5926, "step": 155000 }, { "epoch": 0.45, "learning_rate": 4.9775029524824016e-05, "loss": 2.6018, "step": 155500 }, { "epoch": 0.45, "learning_rate": 4.977430587717674e-05, "loss": 2.5689, "step": 156000 }, { "epoch": 0.45, "learning_rate": 4.977358222952946e-05, "loss": 2.6006, "step": 156500 }, { "epoch": 0.45, "learning_rate": 4.977285858188218e-05, "loss": 2.6144, "step": 157000 }, { "epoch": 0.46, "learning_rate": 4.9772134934234905e-05, "loss": 2.5735, "step": 157500 }, { "epoch": 0.46, "learning_rate": 4.977141128658763e-05, "loss": 2.5917, "step": 158000 }, { "epoch": 0.46, "learning_rate": 4.977068908623564e-05, "loss": 2.5926, "step": 158500 }, { "epoch": 0.46, "learning_rate": 4.9769965438588365e-05, "loss": 2.5988, "step": 159000 }, { "epoch": 0.46, "learning_rate": 4.976924179094109e-05, "loss": 2.5623, "step": 159500 }, { "epoch": 0.46, "learning_rate": 4.9768518143293816e-05, "loss": 2.5749, "step": 160000 }, { "epoch": 0.46, "learning_rate": 4.976779449564654e-05, "loss": 2.606, "step": 160500 }, { "epoch": 0.47, "learning_rate": 4.9767072295294554e-05, "loss": 2.5791, "step": 161000 }, { "epoch": 0.47, "learning_rate": 4.976634864764728e-05, "loss": 2.5861, "step": 161500 }, { "epoch": 0.47, "learning_rate": 4.9765625000000005e-05, "loss": 2.5721, "step": 162000 }, { "epoch": 0.47, "learning_rate": 4.976490135235273e-05, "loss": 2.6001, "step": 162500 }, { "epoch": 0.47, "learning_rate": 4.976417915200074e-05, "loss": 2.5877, "step": 163000 }, { "epoch": 0.47, "learning_rate": 4.9763455504353465e-05, "loss": 2.5896, "step": 163500 }, { "epoch": 0.47, "learning_rate": 4.976273185670619e-05, "loss": 2.5604, "step": 164000 }, { "epoch": 0.48, "learning_rate": 4.9762008209058916e-05, "loss": 2.5859, "step": 164500 }, { "epoch": 0.48, "learning_rate": 4.976128456141164e-05, "loss": 2.5724, "step": 165000 }, { "epoch": 0.48, "learning_rate": 4.9760562361059654e-05, "loss": 2.5876, "step": 165500 }, { "epoch": 0.48, "learning_rate": 4.9759838713412376e-05, "loss": 2.5885, "step": 166000 }, { "epoch": 0.48, "learning_rate": 4.97591150657651e-05, "loss": 2.582, "step": 166500 }, { "epoch": 0.48, "learning_rate": 4.9758392865413114e-05, "loss": 2.573, "step": 167000 }, { "epoch": 0.48, "learning_rate": 4.975766921776584e-05, "loss": 2.5841, "step": 167500 }, { "epoch": 0.49, "learning_rate": 4.9756945570118565e-05, "loss": 2.5913, "step": 168000 }, { "epoch": 0.49, "learning_rate": 4.975622192247129e-05, "loss": 2.588, "step": 168500 }, { "epoch": 0.49, "learning_rate": 4.975549827482402e-05, "loss": 2.6033, "step": 169000 }, { "epoch": 0.49, "learning_rate": 4.975477607447203e-05, "loss": 2.575, "step": 169500 }, { "epoch": 0.49, "learning_rate": 4.9754052426824755e-05, "loss": 2.5825, "step": 170000 }, { "epoch": 0.49, "learning_rate": 4.975332877917748e-05, "loss": 2.5677, "step": 170500 }, { "epoch": 0.49, "learning_rate": 4.97526051315302e-05, "loss": 2.579, "step": 171000 }, { "epoch": 0.5, "learning_rate": 4.975188148388292e-05, "loss": 2.5699, "step": 171500 }, { "epoch": 0.5, "learning_rate": 4.9751157836235643e-05, "loss": 2.5901, "step": 172000 }, { "epoch": 0.5, "learning_rate": 4.9750434188588366e-05, "loss": 2.5933, "step": 172500 }, { "epoch": 0.5, "learning_rate": 4.974971054094109e-05, "loss": 2.5922, "step": 173000 }, { "epoch": 0.5, "learning_rate": 4.974898689329382e-05, "loss": 2.5713, "step": 173500 }, { "epoch": 0.5, "learning_rate": 4.974826469294183e-05, "loss": 2.6005, "step": 174000 }, { "epoch": 0.51, "learning_rate": 4.9747541045294555e-05, "loss": 2.6035, "step": 174500 }, { "epoch": 0.51, "learning_rate": 4.974681739764728e-05, "loss": 2.6029, "step": 175000 }, { "epoch": 0.51, "learning_rate": 4.974609375e-05, "loss": 2.5654, "step": 175500 }, { "epoch": 0.51, "learning_rate": 4.9745372996943315e-05, "loss": 2.5823, "step": 176000 }, { "epoch": 0.51, "learning_rate": 4.974464934929604e-05, "loss": 2.5894, "step": 176500 }, { "epoch": 0.51, "learning_rate": 4.9743925701648766e-05, "loss": 2.5976, "step": 177000 }, { "epoch": 0.51, "learning_rate": 4.974320205400149e-05, "loss": 2.5442, "step": 177500 }, { "epoch": 0.52, "learning_rate": 4.974247840635421e-05, "loss": 2.5717, "step": 178000 }, { "epoch": 0.52, "learning_rate": 4.974175475870693e-05, "loss": 2.5774, "step": 178500 }, { "epoch": 0.52, "learning_rate": 4.974103255835495e-05, "loss": 2.584, "step": 179000 }, { "epoch": 0.52, "learning_rate": 4.974030891070767e-05, "loss": 2.5976, "step": 179500 }, { "epoch": 0.52, "learning_rate": 4.973958526306039e-05, "loss": 2.6014, "step": 180000 }, { "epoch": 0.52, "learning_rate": 4.9738861615413115e-05, "loss": 2.6014, "step": 180500 }, { "epoch": 0.52, "learning_rate": 4.9738137967765844e-05, "loss": 2.5905, "step": 181000 }, { "epoch": 0.53, "learning_rate": 4.9737414320118566e-05, "loss": 2.5585, "step": 181500 }, { "epoch": 0.53, "learning_rate": 4.973669211976658e-05, "loss": 2.5887, "step": 182000 }, { "epoch": 0.53, "learning_rate": 4.9735968472119304e-05, "loss": 2.5591, "step": 182500 }, { "epoch": 0.53, "learning_rate": 4.973524627176732e-05, "loss": 2.5815, "step": 183000 }, { "epoch": 0.53, "learning_rate": 4.973452262412004e-05, "loss": 2.5682, "step": 183500 }, { "epoch": 0.53, "learning_rate": 4.9733798976472764e-05, "loss": 2.567, "step": 184000 }, { "epoch": 0.53, "learning_rate": 4.973307532882549e-05, "loss": 2.5902, "step": 184500 }, { "epoch": 0.54, "learning_rate": 4.9732351681178215e-05, "loss": 2.5673, "step": 185000 }, { "epoch": 0.54, "learning_rate": 4.9731628033530944e-05, "loss": 2.5725, "step": 185500 }, { "epoch": 0.54, "learning_rate": 4.9730904385883667e-05, "loss": 2.5808, "step": 186000 }, { "epoch": 0.54, "learning_rate": 4.973018218553168e-05, "loss": 2.5914, "step": 186500 }, { "epoch": 0.54, "learning_rate": 4.9729458537884404e-05, "loss": 2.5639, "step": 187000 }, { "epoch": 0.54, "learning_rate": 4.9728734890237127e-05, "loss": 2.5918, "step": 187500 }, { "epoch": 0.54, "learning_rate": 4.972801124258985e-05, "loss": 2.5788, "step": 188000 }, { "epoch": 0.55, "learning_rate": 4.972728759494257e-05, "loss": 2.5749, "step": 188500 }, { "epoch": 0.55, "learning_rate": 4.972656394729529e-05, "loss": 2.5681, "step": 189000 }, { "epoch": 0.55, "learning_rate": 4.9725840299648016e-05, "loss": 2.5967, "step": 189500 }, { "epoch": 0.55, "learning_rate": 4.9725116652000745e-05, "loss": 2.5961, "step": 190000 }, { "epoch": 0.55, "learning_rate": 4.972439300435347e-05, "loss": 2.5669, "step": 190500 }, { "epoch": 0.55, "learning_rate": 4.972366935670619e-05, "loss": 2.5713, "step": 191000 }, { "epoch": 0.55, "learning_rate": 4.972294570905892e-05, "loss": 2.5539, "step": 191500 }, { "epoch": 0.56, "learning_rate": 4.972222206141164e-05, "loss": 2.5615, "step": 192000 }, { "epoch": 0.56, "learning_rate": 4.9721499861059656e-05, "loss": 2.5902, "step": 192500 }, { "epoch": 0.56, "learning_rate": 4.972077621341238e-05, "loss": 2.5476, "step": 193000 }, { "epoch": 0.56, "learning_rate": 4.9720054013060394e-05, "loss": 2.5784, "step": 193500 }, { "epoch": 0.56, "learning_rate": 4.971933036541312e-05, "loss": 2.5748, "step": 194000 }, { "epoch": 0.56, "learning_rate": 4.9718606717765845e-05, "loss": 2.605, "step": 194500 }, { "epoch": 0.56, "learning_rate": 4.971788451741386e-05, "loss": 2.576, "step": 195000 }, { "epoch": 0.57, "learning_rate": 4.971716086976658e-05, "loss": 2.5688, "step": 195500 }, { "epoch": 0.57, "learning_rate": 4.9716437222119305e-05, "loss": 2.5431, "step": 196000 }, { "epoch": 0.57, "learning_rate": 4.971571357447203e-05, "loss": 2.589, "step": 196500 }, { "epoch": 0.57, "learning_rate": 4.971498992682475e-05, "loss": 2.5671, "step": 197000 }, { "epoch": 0.57, "learning_rate": 4.971426627917747e-05, "loss": 2.5773, "step": 197500 }, { "epoch": 0.57, "learning_rate": 4.9713542631530194e-05, "loss": 2.5651, "step": 198000 }, { "epoch": 0.57, "learning_rate": 4.971281898388292e-05, "loss": 2.5874, "step": 198500 }, { "epoch": 0.58, "learning_rate": 4.9712095336235645e-05, "loss": 2.5541, "step": 199000 }, { "epoch": 0.58, "learning_rate": 4.9711371688588374e-05, "loss": 2.6027, "step": 199500 }, { "epoch": 0.58, "learning_rate": 4.9710648040941096e-05, "loss": 2.5907, "step": 200000 }, { "epoch": 0.58, "learning_rate": 4.970992439329382e-05, "loss": 2.5391, "step": 200500 }, { "epoch": 0.58, "learning_rate": 4.970920074564654e-05, "loss": 2.5756, "step": 201000 }, { "epoch": 0.58, "learning_rate": 4.970847999258985e-05, "loss": 2.5635, "step": 201500 }, { "epoch": 0.58, "learning_rate": 4.970775634494257e-05, "loss": 2.56, "step": 202000 }, { "epoch": 0.59, "learning_rate": 4.9707032697295294e-05, "loss": 2.5507, "step": 202500 }, { "epoch": 0.59, "learning_rate": 4.970630904964802e-05, "loss": 2.5677, "step": 203000 }, { "epoch": 0.59, "learning_rate": 4.9705585402000745e-05, "loss": 2.5626, "step": 203500 }, { "epoch": 0.59, "learning_rate": 4.970486320164876e-05, "loss": 2.5593, "step": 204000 }, { "epoch": 0.59, "learning_rate": 4.970413955400148e-05, "loss": 2.5737, "step": 204500 }, { "epoch": 0.59, "learning_rate": 4.9703415906354205e-05, "loss": 2.5599, "step": 205000 }, { "epoch": 0.59, "learning_rate": 4.970269225870693e-05, "loss": 2.5798, "step": 205500 }, { "epoch": 0.6, "learning_rate": 4.970196861105965e-05, "loss": 2.5696, "step": 206000 }, { "epoch": 0.6, "learning_rate": 4.970124496341238e-05, "loss": 2.5569, "step": 206500 }, { "epoch": 0.6, "learning_rate": 4.97005213157651e-05, "loss": 2.5668, "step": 207000 }, { "epoch": 0.6, "learning_rate": 4.969979766811782e-05, "loss": 2.5624, "step": 207500 }, { "epoch": 0.6, "learning_rate": 4.9699075467765846e-05, "loss": 2.5666, "step": 208000 }, { "epoch": 0.6, "learning_rate": 4.969835182011857e-05, "loss": 2.5669, "step": 208500 }, { "epoch": 0.6, "learning_rate": 4.969762817247129e-05, "loss": 2.5701, "step": 209000 }, { "epoch": 0.61, "learning_rate": 4.969690452482401e-05, "loss": 2.5646, "step": 209500 }, { "epoch": 0.61, "learning_rate": 4.9696180877176735e-05, "loss": 2.5781, "step": 210000 }, { "epoch": 0.61, "learning_rate": 4.969545722952946e-05, "loss": 2.5663, "step": 210500 }, { "epoch": 0.61, "learning_rate": 4.969473502917747e-05, "loss": 2.5697, "step": 211000 }, { "epoch": 0.61, "learning_rate": 4.9694012828825495e-05, "loss": 2.5585, "step": 211500 }, { "epoch": 0.61, "learning_rate": 4.969328918117822e-05, "loss": 2.543, "step": 212000 }, { "epoch": 0.62, "learning_rate": 4.969256553353094e-05, "loss": 2.5626, "step": 212500 }, { "epoch": 0.62, "learning_rate": 4.969184188588366e-05, "loss": 2.5703, "step": 213000 }, { "epoch": 0.62, "learning_rate": 4.969111968553168e-05, "loss": 2.5592, "step": 213500 }, { "epoch": 0.62, "learning_rate": 4.96903960378844e-05, "loss": 2.55, "step": 214000 }, { "epoch": 0.62, "learning_rate": 4.968967239023713e-05, "loss": 2.5781, "step": 214500 }, { "epoch": 0.62, "learning_rate": 4.968894874258985e-05, "loss": 2.5719, "step": 215000 }, { "epoch": 0.62, "learning_rate": 4.968822509494257e-05, "loss": 2.5836, "step": 215500 }, { "epoch": 0.63, "learning_rate": 4.9687501447295295e-05, "loss": 2.5637, "step": 216000 }, { "epoch": 0.63, "learning_rate": 4.9686777799648024e-05, "loss": 2.5533, "step": 216500 }, { "epoch": 0.63, "learning_rate": 4.968605559929604e-05, "loss": 2.5651, "step": 217000 }, { "epoch": 0.63, "learning_rate": 4.968533195164876e-05, "loss": 2.5562, "step": 217500 }, { "epoch": 0.63, "learning_rate": 4.9684608304001484e-05, "loss": 2.5557, "step": 218000 }, { "epoch": 0.63, "learning_rate": 4.9683884656354206e-05, "loss": 2.5495, "step": 218500 }, { "epoch": 0.63, "learning_rate": 4.968316100870693e-05, "loss": 2.5603, "step": 219000 }, { "epoch": 0.64, "learning_rate": 4.968243736105965e-05, "loss": 2.5799, "step": 219500 }, { "epoch": 0.64, "learning_rate": 4.968171371341237e-05, "loss": 2.5503, "step": 220000 }, { "epoch": 0.64, "learning_rate": 4.9680990065765095e-05, "loss": 2.5524, "step": 220500 }, { "epoch": 0.64, "learning_rate": 4.968026786541312e-05, "loss": 2.5719, "step": 221000 }, { "epoch": 0.64, "learning_rate": 4.9679544217765846e-05, "loss": 2.5378, "step": 221500 }, { "epoch": 0.64, "learning_rate": 4.967882057011857e-05, "loss": 2.5751, "step": 222000 }, { "epoch": 0.64, "learning_rate": 4.967809692247129e-05, "loss": 2.5557, "step": 222500 }, { "epoch": 0.65, "learning_rate": 4.967737327482401e-05, "loss": 2.5859, "step": 223000 }, { "epoch": 0.65, "learning_rate": 4.967665107447203e-05, "loss": 2.5642, "step": 223500 }, { "epoch": 0.65, "learning_rate": 4.967592742682475e-05, "loss": 2.5683, "step": 224000 }, { "epoch": 0.65, "learning_rate": 4.967520377917747e-05, "loss": 2.552, "step": 224500 }, { "epoch": 0.65, "learning_rate": 4.9674481578825495e-05, "loss": 2.5785, "step": 225000 }, { "epoch": 0.65, "learning_rate": 4.967375793117822e-05, "loss": 2.564, "step": 225500 }, { "epoch": 0.65, "learning_rate": 4.967303428353094e-05, "loss": 2.5478, "step": 226000 }, { "epoch": 0.66, "learning_rate": 4.967231063588366e-05, "loss": 2.5504, "step": 226500 }, { "epoch": 0.66, "learning_rate": 4.9671586988236384e-05, "loss": 2.5643, "step": 227000 }, { "epoch": 0.66, "learning_rate": 4.967086334058911e-05, "loss": 2.5322, "step": 227500 }, { "epoch": 0.66, "learning_rate": 4.967013969294183e-05, "loss": 2.562, "step": 228000 }, { "epoch": 0.66, "learning_rate": 4.966941604529455e-05, "loss": 2.5706, "step": 228500 }, { "epoch": 0.66, "learning_rate": 4.966869239764728e-05, "loss": 2.5509, "step": 229000 }, { "epoch": 0.66, "learning_rate": 4.96679701972953e-05, "loss": 2.5523, "step": 229500 }, { "epoch": 0.67, "learning_rate": 4.9667246549648025e-05, "loss": 2.5591, "step": 230000 }, { "epoch": 0.67, "learning_rate": 4.966652290200075e-05, "loss": 2.56, "step": 230500 }, { "epoch": 0.67, "learning_rate": 4.966579925435347e-05, "loss": 2.5508, "step": 231000 }, { "epoch": 0.67, "learning_rate": 4.9665077054001485e-05, "loss": 2.5699, "step": 231500 }, { "epoch": 0.67, "learning_rate": 4.966435340635421e-05, "loss": 2.5605, "step": 232000 }, { "epoch": 0.67, "learning_rate": 4.966362975870693e-05, "loss": 2.5706, "step": 232500 }, { "epoch": 0.67, "learning_rate": 4.966290611105965e-05, "loss": 2.5612, "step": 233000 }, { "epoch": 0.68, "learning_rate": 4.9662182463412374e-05, "loss": 2.5758, "step": 233500 }, { "epoch": 0.68, "learning_rate": 4.96614588157651e-05, "loss": 2.5689, "step": 234000 }, { "epoch": 0.68, "learning_rate": 4.9660735168117825e-05, "loss": 2.5324, "step": 234500 }, { "epoch": 0.68, "learning_rate": 4.966001152047055e-05, "loss": 2.5659, "step": 235000 }, { "epoch": 0.68, "learning_rate": 4.965928932011856e-05, "loss": 2.5712, "step": 235500 }, { "epoch": 0.68, "learning_rate": 4.965856711976658e-05, "loss": 2.5641, "step": 236000 }, { "epoch": 0.68, "learning_rate": 4.96578434721193e-05, "loss": 2.564, "step": 236500 }, { "epoch": 0.69, "learning_rate": 4.965711982447203e-05, "loss": 2.5625, "step": 237000 }, { "epoch": 0.69, "learning_rate": 4.965639617682475e-05, "loss": 2.5742, "step": 237500 }, { "epoch": 0.69, "learning_rate": 4.9655672529177474e-05, "loss": 2.5691, "step": 238000 }, { "epoch": 0.69, "learning_rate": 4.96549488815302e-05, "loss": 2.5498, "step": 238500 }, { "epoch": 0.69, "learning_rate": 4.9654225233882925e-05, "loss": 2.5589, "step": 239000 }, { "epoch": 0.69, "learning_rate": 4.965350158623565e-05, "loss": 2.548, "step": 239500 }, { "epoch": 0.69, "learning_rate": 4.965277938588366e-05, "loss": 2.5458, "step": 240000 }, { "epoch": 0.7, "learning_rate": 4.965205718553168e-05, "loss": 2.563, "step": 240500 }, { "epoch": 0.7, "learning_rate": 4.96513335378844e-05, "loss": 2.5474, "step": 241000 }, { "epoch": 0.7, "learning_rate": 4.965060989023712e-05, "loss": 2.5495, "step": 241500 }, { "epoch": 0.7, "learning_rate": 4.964988624258985e-05, "loss": 2.5573, "step": 242000 }, { "epoch": 0.7, "learning_rate": 4.9649162594942574e-05, "loss": 2.5471, "step": 242500 }, { "epoch": 0.7, "learning_rate": 4.9648438947295296e-05, "loss": 2.5519, "step": 243000 }, { "epoch": 0.7, "learning_rate": 4.964771674694331e-05, "loss": 2.5537, "step": 243500 }, { "epoch": 0.71, "learning_rate": 4.9646993099296034e-05, "loss": 2.5506, "step": 244000 }, { "epoch": 0.71, "learning_rate": 4.964626945164876e-05, "loss": 2.5406, "step": 244500 }, { "epoch": 0.71, "learning_rate": 4.9645545804001486e-05, "loss": 2.5416, "step": 245000 }, { "epoch": 0.71, "learning_rate": 4.964482215635421e-05, "loss": 2.5441, "step": 245500 }, { "epoch": 0.71, "learning_rate": 4.964409850870693e-05, "loss": 2.5514, "step": 246000 }, { "epoch": 0.71, "learning_rate": 4.964337486105965e-05, "loss": 2.5569, "step": 246500 }, { "epoch": 0.71, "learning_rate": 4.9642651213412374e-05, "loss": 2.5481, "step": 247000 }, { "epoch": 0.72, "learning_rate": 4.9641927565765103e-05, "loss": 2.5606, "step": 247500 }, { "epoch": 0.72, "learning_rate": 4.964120536541312e-05, "loss": 2.5593, "step": 248000 }, { "epoch": 0.72, "learning_rate": 4.964048171776584e-05, "loss": 2.5548, "step": 248500 }, { "epoch": 0.72, "learning_rate": 4.9639758070118563e-05, "loss": 2.5462, "step": 249000 }, { "epoch": 0.72, "learning_rate": 4.9639034422471286e-05, "loss": 2.5785, "step": 249500 }, { "epoch": 0.72, "learning_rate": 4.963831077482401e-05, "loss": 2.563, "step": 250000 }, { "epoch": 0.73, "learning_rate": 4.963758712717673e-05, "loss": 2.5651, "step": 250500 }, { "epoch": 0.73, "learning_rate": 4.963686492682475e-05, "loss": 2.5509, "step": 251000 }, { "epoch": 0.73, "learning_rate": 4.963614127917748e-05, "loss": 2.5435, "step": 251500 }, { "epoch": 0.73, "learning_rate": 4.9635417631530204e-05, "loss": 2.5479, "step": 252000 }, { "epoch": 0.73, "learning_rate": 4.9634693983882926e-05, "loss": 2.5208, "step": 252500 }, { "epoch": 0.73, "learning_rate": 4.963397178353094e-05, "loss": 2.5491, "step": 253000 }, { "epoch": 0.73, "learning_rate": 4.9633248135883664e-05, "loss": 2.5547, "step": 253500 }, { "epoch": 0.74, "learning_rate": 4.963252593553168e-05, "loss": 2.5425, "step": 254000 }, { "epoch": 0.74, "learning_rate": 4.96318022878844e-05, "loss": 2.5343, "step": 254500 }, { "epoch": 0.74, "learning_rate": 4.963107864023713e-05, "loss": 2.5502, "step": 255000 }, { "epoch": 0.74, "learning_rate": 4.963035499258985e-05, "loss": 2.5575, "step": 255500 }, { "epoch": 0.74, "learning_rate": 4.962963423953316e-05, "loss": 2.5557, "step": 256000 }, { "epoch": 0.74, "learning_rate": 4.9628910591885884e-05, "loss": 2.5413, "step": 256500 }, { "epoch": 0.74, "learning_rate": 4.9628188391533906e-05, "loss": 2.552, "step": 257000 }, { "epoch": 0.75, "learning_rate": 4.962746474388663e-05, "loss": 2.5499, "step": 257500 }, { "epoch": 0.75, "learning_rate": 4.962674109623935e-05, "loss": 2.5515, "step": 258000 }, { "epoch": 0.75, "learning_rate": 4.962601744859207e-05, "loss": 2.5387, "step": 258500 }, { "epoch": 0.75, "learning_rate": 4.9625293800944795e-05, "loss": 2.5693, "step": 259000 }, { "epoch": 0.75, "learning_rate": 4.9624570153297524e-05, "loss": 2.5589, "step": 259500 }, { "epoch": 0.75, "learning_rate": 4.9623846505650246e-05, "loss": 2.523, "step": 260000 }, { "epoch": 0.75, "learning_rate": 4.962312285800297e-05, "loss": 2.5451, "step": 260500 }, { "epoch": 0.76, "learning_rate": 4.9622400657650984e-05, "loss": 2.5508, "step": 261000 }, { "epoch": 0.76, "learning_rate": 4.9621677010003706e-05, "loss": 2.5668, "step": 261500 }, { "epoch": 0.76, "learning_rate": 4.962095336235643e-05, "loss": 2.5312, "step": 262000 }, { "epoch": 0.76, "learning_rate": 4.962022971470916e-05, "loss": 2.5467, "step": 262500 }, { "epoch": 0.76, "learning_rate": 4.961950606706188e-05, "loss": 2.5604, "step": 263000 }, { "epoch": 0.76, "learning_rate": 4.96187824194146e-05, "loss": 2.5361, "step": 263500 }, { "epoch": 0.76, "learning_rate": 4.9618058771767324e-05, "loss": 2.5436, "step": 264000 }, { "epoch": 0.77, "learning_rate": 4.961733512412005e-05, "loss": 2.5387, "step": 264500 }, { "epoch": 0.77, "learning_rate": 4.961661147647277e-05, "loss": 2.5314, "step": 265000 }, { "epoch": 0.77, "learning_rate": 4.961588782882549e-05, "loss": 2.5349, "step": 265500 }, { "epoch": 0.77, "learning_rate": 4.961516418117821e-05, "loss": 2.5463, "step": 266000 }, { "epoch": 0.77, "learning_rate": 4.9614440533530936e-05, "loss": 2.5531, "step": 266500 }, { "epoch": 0.77, "learning_rate": 4.961371833317896e-05, "loss": 2.5421, "step": 267000 }, { "epoch": 0.77, "learning_rate": 4.961299613282698e-05, "loss": 2.548, "step": 267500 }, { "epoch": 0.78, "learning_rate": 4.96122724851797e-05, "loss": 2.5439, "step": 268000 }, { "epoch": 0.78, "learning_rate": 4.9611548837532425e-05, "loss": 2.5345, "step": 268500 }, { "epoch": 0.78, "learning_rate": 4.961082518988515e-05, "loss": 2.5571, "step": 269000 }, { "epoch": 0.78, "learning_rate": 4.961010154223787e-05, "loss": 2.5308, "step": 269500 }, { "epoch": 0.78, "learning_rate": 4.960937789459059e-05, "loss": 2.5601, "step": 270000 }, { "epoch": 0.78, "learning_rate": 4.960865569423861e-05, "loss": 2.5552, "step": 270500 }, { "epoch": 0.78, "learning_rate": 4.960793204659133e-05, "loss": 2.5522, "step": 271000 }, { "epoch": 0.79, "learning_rate": 4.960720839894406e-05, "loss": 2.5313, "step": 271500 }, { "epoch": 0.79, "learning_rate": 4.960648475129678e-05, "loss": 2.533, "step": 272000 }, { "epoch": 0.79, "learning_rate": 4.9605762550944796e-05, "loss": 2.5278, "step": 272500 }, { "epoch": 0.79, "learning_rate": 4.960504035059281e-05, "loss": 2.5569, "step": 273000 }, { "epoch": 0.79, "learning_rate": 4.9604316702945534e-05, "loss": 2.5368, "step": 273500 }, { "epoch": 0.79, "learning_rate": 4.9603593055298256e-05, "loss": 2.5321, "step": 274000 }, { "epoch": 0.79, "learning_rate": 4.960286940765098e-05, "loss": 2.5491, "step": 274500 }, { "epoch": 0.8, "learning_rate": 4.960214576000371e-05, "loss": 2.5417, "step": 275000 }, { "epoch": 0.8, "learning_rate": 4.960142211235643e-05, "loss": 2.53, "step": 275500 }, { "epoch": 0.8, "learning_rate": 4.960069846470916e-05, "loss": 2.5556, "step": 276000 }, { "epoch": 0.8, "learning_rate": 4.959997481706188e-05, "loss": 2.5469, "step": 276500 }, { "epoch": 0.8, "learning_rate": 4.95992511694146e-05, "loss": 2.5585, "step": 277000 }, { "epoch": 0.8, "learning_rate": 4.9598527521767325e-05, "loss": 2.5562, "step": 277500 }, { "epoch": 0.8, "learning_rate": 4.959780387412005e-05, "loss": 2.5432, "step": 278000 }, { "epoch": 0.81, "learning_rate": 4.959708167376806e-05, "loss": 2.53, "step": 278500 }, { "epoch": 0.81, "learning_rate": 4.959635947341608e-05, "loss": 2.5359, "step": 279000 }, { "epoch": 0.81, "learning_rate": 4.959563582576881e-05, "loss": 2.5695, "step": 279500 }, { "epoch": 0.81, "learning_rate": 4.959491217812153e-05, "loss": 2.5665, "step": 280000 }, { "epoch": 0.81, "learning_rate": 4.959418853047425e-05, "loss": 2.5413, "step": 280500 }, { "epoch": 0.81, "learning_rate": 4.9593464882826974e-05, "loss": 2.547, "step": 281000 }, { "epoch": 0.81, "learning_rate": 4.9592741235179696e-05, "loss": 2.5466, "step": 281500 }, { "epoch": 0.82, "learning_rate": 4.9592017587532425e-05, "loss": 2.5729, "step": 282000 }, { "epoch": 0.82, "learning_rate": 4.959129393988515e-05, "loss": 2.5552, "step": 282500 }, { "epoch": 0.82, "learning_rate": 4.959057029223787e-05, "loss": 2.5592, "step": 283000 }, { "epoch": 0.82, "learning_rate": 4.958984664459059e-05, "loss": 2.5439, "step": 283500 }, { "epoch": 0.82, "learning_rate": 4.9589122996943314e-05, "loss": 2.5459, "step": 284000 }, { "epoch": 0.82, "learning_rate": 4.958839934929604e-05, "loss": 2.5536, "step": 284500 }, { "epoch": 0.82, "learning_rate": 4.958767714894406e-05, "loss": 2.5262, "step": 285000 }, { "epoch": 0.83, "learning_rate": 4.958695350129678e-05, "loss": 2.5212, "step": 285500 }, { "epoch": 0.83, "learning_rate": 4.95862313009448e-05, "loss": 2.5396, "step": 286000 }, { "epoch": 0.83, "learning_rate": 4.958550765329752e-05, "loss": 2.5437, "step": 286500 }, { "epoch": 0.83, "learning_rate": 4.958478400565024e-05, "loss": 2.5319, "step": 287000 }, { "epoch": 0.83, "learning_rate": 4.9584060358002963e-05, "loss": 2.5542, "step": 287500 }, { "epoch": 0.83, "learning_rate": 4.9583336710355686e-05, "loss": 2.5505, "step": 288000 }, { "epoch": 0.84, "learning_rate": 4.958261306270841e-05, "loss": 2.5352, "step": 288500 }, { "epoch": 0.84, "learning_rate": 4.958189086235643e-05, "loss": 2.5431, "step": 289000 }, { "epoch": 0.84, "learning_rate": 4.958116721470916e-05, "loss": 2.5431, "step": 289500 }, { "epoch": 0.84, "learning_rate": 4.958044356706188e-05, "loss": 2.5352, "step": 290000 }, { "epoch": 0.84, "learning_rate": 4.9579719919414604e-05, "loss": 2.5251, "step": 290500 }, { "epoch": 0.84, "learning_rate": 4.9578996271767326e-05, "loss": 2.5557, "step": 291000 }, { "epoch": 0.84, "learning_rate": 4.957827407141534e-05, "loss": 2.5366, "step": 291500 }, { "epoch": 0.85, "learning_rate": 4.9577550423768064e-05, "loss": 2.5192, "step": 292000 }, { "epoch": 0.85, "learning_rate": 4.9576826776120786e-05, "loss": 2.5426, "step": 292500 }, { "epoch": 0.85, "learning_rate": 4.957610312847351e-05, "loss": 2.5335, "step": 293000 }, { "epoch": 0.85, "learning_rate": 4.957538092812153e-05, "loss": 2.5064, "step": 293500 }, { "epoch": 0.85, "learning_rate": 4.957465728047425e-05, "loss": 2.5572, "step": 294000 }, { "epoch": 0.85, "learning_rate": 4.9573933632826975e-05, "loss": 2.5598, "step": 294500 }, { "epoch": 0.85, "learning_rate": 4.95732099851797e-05, "loss": 2.5467, "step": 295000 }, { "epoch": 0.86, "learning_rate": 4.957248633753242e-05, "loss": 2.5209, "step": 295500 }, { "epoch": 0.86, "learning_rate": 4.9571764137180435e-05, "loss": 2.5475, "step": 296000 }, { "epoch": 0.86, "learning_rate": 4.957104048953316e-05, "loss": 2.5553, "step": 296500 }, { "epoch": 0.86, "learning_rate": 4.9570316841885886e-05, "loss": 2.5091, "step": 297000 }, { "epoch": 0.86, "learning_rate": 4.956959319423861e-05, "loss": 2.5599, "step": 297500 }, { "epoch": 0.86, "learning_rate": 4.956886954659134e-05, "loss": 2.5294, "step": 298000 }, { "epoch": 0.86, "learning_rate": 4.956814589894406e-05, "loss": 2.536, "step": 298500 }, { "epoch": 0.87, "learning_rate": 4.956742225129678e-05, "loss": 2.5513, "step": 299000 }, { "epoch": 0.87, "learning_rate": 4.9566698603649504e-05, "loss": 2.5353, "step": 299500 }, { "epoch": 0.87, "learning_rate": 4.9565974956002226e-05, "loss": 2.5504, "step": 300000 }, { "epoch": 0.87, "learning_rate": 4.956525275565024e-05, "loss": 2.518, "step": 300500 }, { "epoch": 0.87, "learning_rate": 4.9564529108002964e-05, "loss": 2.5467, "step": 301000 }, { "epoch": 0.87, "learning_rate": 4.956380690765099e-05, "loss": 2.5237, "step": 301500 }, { "epoch": 0.87, "learning_rate": 4.956308326000371e-05, "loss": 2.5409, "step": 302000 }, { "epoch": 0.88, "learning_rate": 4.956235961235643e-05, "loss": 2.52, "step": 302500 }, { "epoch": 0.88, "learning_rate": 4.956163596470915e-05, "loss": 2.5535, "step": 303000 }, { "epoch": 0.88, "learning_rate": 4.956091376435717e-05, "loss": 2.5086, "step": 303500 }, { "epoch": 0.88, "learning_rate": 4.956019011670989e-05, "loss": 2.5657, "step": 304000 }, { "epoch": 0.88, "learning_rate": 4.955946646906262e-05, "loss": 2.5526, "step": 304500 }, { "epoch": 0.88, "learning_rate": 4.955874282141534e-05, "loss": 2.5273, "step": 305000 }, { "epoch": 0.88, "learning_rate": 4.9558019173768065e-05, "loss": 2.5482, "step": 305500 }, { "epoch": 0.89, "learning_rate": 4.955729552612079e-05, "loss": 2.5281, "step": 306000 }, { "epoch": 0.89, "learning_rate": 4.955657187847351e-05, "loss": 2.5439, "step": 306500 }, { "epoch": 0.89, "learning_rate": 4.955584823082624e-05, "loss": 2.5476, "step": 307000 }, { "epoch": 0.89, "learning_rate": 4.955512458317896e-05, "loss": 2.5357, "step": 307500 }, { "epoch": 0.89, "learning_rate": 4.955440093553168e-05, "loss": 2.5434, "step": 308000 }, { "epoch": 0.89, "learning_rate": 4.9553677287884405e-05, "loss": 2.545, "step": 308500 }, { "epoch": 0.89, "learning_rate": 4.955295364023713e-05, "loss": 2.5369, "step": 309000 }, { "epoch": 0.9, "learning_rate": 4.955223143988514e-05, "loss": 2.5486, "step": 309500 }, { "epoch": 0.9, "learning_rate": 4.9551507792237865e-05, "loss": 2.5518, "step": 310000 }, { "epoch": 0.9, "learning_rate": 4.955078414459059e-05, "loss": 2.5394, "step": 310500 }, { "epoch": 0.9, "learning_rate": 4.955006049694331e-05, "loss": 2.5424, "step": 311000 }, { "epoch": 0.9, "learning_rate": 4.954933684929604e-05, "loss": 2.5329, "step": 311500 }, { "epoch": 0.9, "learning_rate": 4.954861320164876e-05, "loss": 2.5612, "step": 312000 }, { "epoch": 0.9, "learning_rate": 4.954788955400149e-05, "loss": 2.5105, "step": 312500 }, { "epoch": 0.91, "learning_rate": 4.9547167353649505e-05, "loss": 2.5164, "step": 313000 }, { "epoch": 0.91, "learning_rate": 4.954644370600223e-05, "loss": 2.5203, "step": 313500 }, { "epoch": 0.91, "learning_rate": 4.954572005835495e-05, "loss": 2.4984, "step": 314000 }, { "epoch": 0.91, "learning_rate": 4.954499641070767e-05, "loss": 2.5298, "step": 314500 }, { "epoch": 0.91, "learning_rate": 4.9544272763060394e-05, "loss": 2.5382, "step": 315000 }, { "epoch": 0.91, "learning_rate": 4.9543549115413116e-05, "loss": 2.5289, "step": 315500 }, { "epoch": 0.91, "learning_rate": 4.954282546776584e-05, "loss": 2.554, "step": 316000 }, { "epoch": 0.92, "learning_rate": 4.954210182011856e-05, "loss": 2.5542, "step": 316500 }, { "epoch": 0.92, "learning_rate": 4.954137961976658e-05, "loss": 2.5502, "step": 317000 }, { "epoch": 0.92, "learning_rate": 4.954065886670989e-05, "loss": 2.5347, "step": 317500 }, { "epoch": 0.92, "learning_rate": 4.9539935219062614e-05, "loss": 2.5296, "step": 318000 }, { "epoch": 0.92, "learning_rate": 4.9539211571415336e-05, "loss": 2.528, "step": 318500 }, { "epoch": 0.92, "learning_rate": 4.9538487923768065e-05, "loss": 2.54, "step": 319000 }, { "epoch": 0.92, "learning_rate": 4.953776427612079e-05, "loss": 2.5157, "step": 319500 }, { "epoch": 0.93, "learning_rate": 4.953704062847352e-05, "loss": 2.5307, "step": 320000 }, { "epoch": 0.93, "learning_rate": 4.953631698082624e-05, "loss": 2.5318, "step": 320500 }, { "epoch": 0.93, "learning_rate": 4.9535594780474254e-05, "loss": 2.5449, "step": 321000 }, { "epoch": 0.93, "learning_rate": 4.953487113282698e-05, "loss": 2.5484, "step": 321500 }, { "epoch": 0.93, "learning_rate": 4.95341474851797e-05, "loss": 2.5369, "step": 322000 }, { "epoch": 0.93, "learning_rate": 4.953342383753242e-05, "loss": 2.5524, "step": 322500 }, { "epoch": 0.93, "learning_rate": 4.953270018988514e-05, "loss": 2.5162, "step": 323000 }, { "epoch": 0.94, "learning_rate": 4.9531976542237866e-05, "loss": 2.5119, "step": 323500 }, { "epoch": 0.94, "learning_rate": 4.953125289459059e-05, "loss": 2.5199, "step": 324000 }, { "epoch": 0.94, "learning_rate": 4.953052924694332e-05, "loss": 2.5306, "step": 324500 }, { "epoch": 0.94, "learning_rate": 4.952980704659133e-05, "loss": 2.5014, "step": 325000 }, { "epoch": 0.94, "learning_rate": 4.9529083398944055e-05, "loss": 2.512, "step": 325500 }, { "epoch": 0.94, "learning_rate": 4.952835975129678e-05, "loss": 2.553, "step": 326000 }, { "epoch": 0.95, "learning_rate": 4.95276361036495e-05, "loss": 2.5293, "step": 326500 }, { "epoch": 0.95, "learning_rate": 4.952691245600223e-05, "loss": 2.528, "step": 327000 }, { "epoch": 0.95, "learning_rate": 4.952618880835495e-05, "loss": 2.5372, "step": 327500 }, { "epoch": 0.95, "learning_rate": 4.9525466608002966e-05, "loss": 2.5085, "step": 328000 }, { "epoch": 0.95, "learning_rate": 4.952474296035569e-05, "loss": 2.536, "step": 328500 }, { "epoch": 0.95, "learning_rate": 4.952401931270842e-05, "loss": 2.5238, "step": 329000 }, { "epoch": 0.95, "learning_rate": 4.952329566506114e-05, "loss": 2.5466, "step": 329500 }, { "epoch": 0.96, "learning_rate": 4.952257201741386e-05, "loss": 2.546, "step": 330000 }, { "epoch": 0.96, "learning_rate": 4.9521848369766584e-05, "loss": 2.5203, "step": 330500 }, { "epoch": 0.96, "learning_rate": 4.95211261694146e-05, "loss": 2.541, "step": 331000 }, { "epoch": 0.96, "learning_rate": 4.952040252176732e-05, "loss": 2.5226, "step": 331500 }, { "epoch": 0.96, "learning_rate": 4.9519678874120044e-05, "loss": 2.5345, "step": 332000 }, { "epoch": 0.96, "learning_rate": 4.9518955226472766e-05, "loss": 2.5377, "step": 332500 }, { "epoch": 0.96, "learning_rate": 4.951823157882549e-05, "loss": 2.5331, "step": 333000 }, { "epoch": 0.97, "learning_rate": 4.951750793117822e-05, "loss": 2.5586, "step": 333500 }, { "epoch": 0.97, "learning_rate": 4.951678573082623e-05, "loss": 2.5186, "step": 334000 }, { "epoch": 0.97, "learning_rate": 4.9516063530474255e-05, "loss": 2.5191, "step": 334500 }, { "epoch": 0.97, "learning_rate": 4.951533988282698e-05, "loss": 2.5165, "step": 335000 }, { "epoch": 0.97, "learning_rate": 4.95146162351797e-05, "loss": 2.533, "step": 335500 }, { "epoch": 0.97, "learning_rate": 4.951389258753242e-05, "loss": 2.5351, "step": 336000 }, { "epoch": 0.97, "learning_rate": 4.9513168939885144e-05, "loss": 2.5606, "step": 336500 }, { "epoch": 0.98, "learning_rate": 4.9512445292237866e-05, "loss": 2.5051, "step": 337000 }, { "epoch": 0.98, "learning_rate": 4.951172164459059e-05, "loss": 2.5284, "step": 337500 }, { "epoch": 0.98, "learning_rate": 4.951099799694332e-05, "loss": 2.5166, "step": 338000 }, { "epoch": 0.98, "learning_rate": 4.951027434929604e-05, "loss": 2.523, "step": 338500 }, { "epoch": 0.98, "learning_rate": 4.950955070164876e-05, "loss": 2.5179, "step": 339000 }, { "epoch": 0.98, "learning_rate": 4.9508827054001484e-05, "loss": 2.5472, "step": 339500 }, { "epoch": 0.98, "learning_rate": 4.9508103406354207e-05, "loss": 2.5212, "step": 340000 }, { "epoch": 0.99, "learning_rate": 4.950738120600222e-05, "loss": 2.5265, "step": 340500 }, { "epoch": 0.99, "learning_rate": 4.9506657558354944e-05, "loss": 2.5355, "step": 341000 }, { "epoch": 0.99, "learning_rate": 4.9505933910707667e-05, "loss": 2.5299, "step": 341500 }, { "epoch": 0.99, "learning_rate": 4.9505210263060396e-05, "loss": 2.5259, "step": 342000 }, { "epoch": 0.99, "learning_rate": 4.950448951000371e-05, "loss": 2.5472, "step": 342500 }, { "epoch": 0.99, "learning_rate": 4.950376730965173e-05, "loss": 2.542, "step": 343000 }, { "epoch": 0.99, "learning_rate": 4.950304366200445e-05, "loss": 2.5357, "step": 343500 }, { "epoch": 1.0, "learning_rate": 4.950232001435717e-05, "loss": 2.5297, "step": 344000 }, { "epoch": 1.0, "learning_rate": 4.9501596366709893e-05, "loss": 2.5294, "step": 344500 }, { "epoch": 1.0, "learning_rate": 4.9500874166357916e-05, "loss": 2.525, "step": 345000 }, { "epoch": 1.0, "eval_accuracy": 0.6262474735680433, "eval_accuracy_mlm": 0.5877085333313983, "eval_accuracy_nsp": 0.8331431347476775, "eval_loss": 2.4722180366516113, "eval_runtime": 330.2224, "eval_samples_per_second": 1321.491, "eval_steps_per_second": 55.063, "step": 345472 }, { "epoch": 1.0, "learning_rate": 4.950015051871064e-05, "loss": 2.5138, "step": 345500 }, { "epoch": 1.0, "learning_rate": 4.949942687106336e-05, "loss": 2.5146, "step": 346000 }, { "epoch": 1.0, "learning_rate": 4.949870322341608e-05, "loss": 2.5179, "step": 346500 }, { "epoch": 1.0, "learning_rate": 4.9497979575768805e-05, "loss": 2.5145, "step": 347000 }, { "epoch": 1.01, "learning_rate": 4.949725592812153e-05, "loss": 2.4945, "step": 347500 }, { "epoch": 1.01, "learning_rate": 4.949653228047425e-05, "loss": 2.5222, "step": 348000 }, { "epoch": 1.01, "learning_rate": 4.949580863282697e-05, "loss": 2.5155, "step": 348500 }, { "epoch": 1.01, "learning_rate": 4.9495084985179694e-05, "loss": 2.5079, "step": 349000 }, { "epoch": 1.01, "learning_rate": 4.9494362784827716e-05, "loss": 2.5266, "step": 349500 }, { "epoch": 1.01, "learning_rate": 4.9493639137180445e-05, "loss": 2.5136, "step": 350000 }, { "epoch": 1.01, "learning_rate": 4.949291548953317e-05, "loss": 2.5107, "step": 350500 }, { "epoch": 1.02, "learning_rate": 4.949219184188589e-05, "loss": 2.5021, "step": 351000 }, { "epoch": 1.02, "learning_rate": 4.949146819423861e-05, "loss": 2.5204, "step": 351500 }, { "epoch": 1.02, "learning_rate": 4.949074599388663e-05, "loss": 2.5008, "step": 352000 }, { "epoch": 1.02, "learning_rate": 4.949002234623935e-05, "loss": 2.5348, "step": 352500 }, { "epoch": 1.02, "learning_rate": 4.948929869859207e-05, "loss": 2.5246, "step": 353000 }, { "epoch": 1.02, "learning_rate": 4.9488575050944794e-05, "loss": 2.5132, "step": 353500 }, { "epoch": 1.02, "learning_rate": 4.9487851403297516e-05, "loss": 2.518, "step": 354000 }, { "epoch": 1.03, "learning_rate": 4.9487127755650245e-05, "loss": 2.4789, "step": 354500 }, { "epoch": 1.03, "learning_rate": 4.948640555529826e-05, "loss": 2.5195, "step": 355000 }, { "epoch": 1.03, "learning_rate": 4.948568190765098e-05, "loss": 2.5053, "step": 355500 }, { "epoch": 1.03, "learning_rate": 4.9484958260003705e-05, "loss": 2.5159, "step": 356000 }, { "epoch": 1.03, "learning_rate": 4.948423461235643e-05, "loss": 2.4797, "step": 356500 }, { "epoch": 1.03, "learning_rate": 4.948351241200444e-05, "loss": 2.5152, "step": 357000 }, { "epoch": 1.03, "learning_rate": 4.948279021165247e-05, "loss": 2.4891, "step": 357500 }, { "epoch": 1.04, "learning_rate": 4.9482066564005194e-05, "loss": 2.5081, "step": 358000 }, { "epoch": 1.04, "learning_rate": 4.948134291635792e-05, "loss": 2.5037, "step": 358500 }, { "epoch": 1.04, "learning_rate": 4.948061926871064e-05, "loss": 2.5016, "step": 359000 }, { "epoch": 1.04, "learning_rate": 4.947989562106336e-05, "loss": 2.4997, "step": 359500 }, { "epoch": 1.04, "learning_rate": 4.947917197341608e-05, "loss": 2.4921, "step": 360000 }, { "epoch": 1.04, "learning_rate": 4.9478448325768806e-05, "loss": 2.5125, "step": 360500 }, { "epoch": 1.04, "learning_rate": 4.947772612541682e-05, "loss": 2.5305, "step": 361000 }, { "epoch": 1.05, "learning_rate": 4.947700247776954e-05, "loss": 2.5027, "step": 361500 }, { "epoch": 1.05, "learning_rate": 4.947627883012227e-05, "loss": 2.51, "step": 362000 }, { "epoch": 1.05, "learning_rate": 4.9475555182474995e-05, "loss": 2.5035, "step": 362500 }, { "epoch": 1.05, "learning_rate": 4.947483153482772e-05, "loss": 2.5229, "step": 363000 }, { "epoch": 1.05, "learning_rate": 4.947410788718044e-05, "loss": 2.5101, "step": 363500 }, { "epoch": 1.05, "learning_rate": 4.947338423953316e-05, "loss": 2.5042, "step": 364000 }, { "epoch": 1.06, "learning_rate": 4.947266059188589e-05, "loss": 2.5228, "step": 364500 }, { "epoch": 1.06, "learning_rate": 4.947193694423861e-05, "loss": 2.501, "step": 365000 }, { "epoch": 1.06, "learning_rate": 4.947121619118192e-05, "loss": 2.5274, "step": 365500 }, { "epoch": 1.06, "learning_rate": 4.9470492543534644e-05, "loss": 2.5239, "step": 366000 }, { "epoch": 1.06, "learning_rate": 4.946976889588737e-05, "loss": 2.5091, "step": 366500 }, { "epoch": 1.06, "learning_rate": 4.9469045248240095e-05, "loss": 2.4958, "step": 367000 }, { "epoch": 1.06, "learning_rate": 4.946832160059282e-05, "loss": 2.5151, "step": 367500 }, { "epoch": 1.07, "learning_rate": 4.946759940024083e-05, "loss": 2.4916, "step": 368000 }, { "epoch": 1.07, "learning_rate": 4.9466875752593555e-05, "loss": 2.5197, "step": 368500 }, { "epoch": 1.07, "learning_rate": 4.946615210494628e-05, "loss": 2.5194, "step": 369000 }, { "epoch": 1.07, "learning_rate": 4.9465428457299e-05, "loss": 2.4897, "step": 369500 }, { "epoch": 1.07, "learning_rate": 4.946470480965172e-05, "loss": 2.4969, "step": 370000 }, { "epoch": 1.07, "learning_rate": 4.9463981162004444e-05, "loss": 2.5156, "step": 370500 }, { "epoch": 1.07, "learning_rate": 4.946325751435717e-05, "loss": 2.5165, "step": 371000 }, { "epoch": 1.08, "learning_rate": 4.9462533866709895e-05, "loss": 2.5243, "step": 371500 }, { "epoch": 1.08, "learning_rate": 4.9461810219062624e-05, "loss": 2.5281, "step": 372000 }, { "epoch": 1.08, "learning_rate": 4.946108801871064e-05, "loss": 2.5226, "step": 372500 }, { "epoch": 1.08, "learning_rate": 4.946036437106336e-05, "loss": 2.5179, "step": 373000 }, { "epoch": 1.08, "learning_rate": 4.9459640723416084e-05, "loss": 2.5278, "step": 373500 }, { "epoch": 1.08, "learning_rate": 4.9458917075768806e-05, "loss": 2.5329, "step": 374000 }, { "epoch": 1.08, "learning_rate": 4.945819487541682e-05, "loss": 2.5171, "step": 374500 }, { "epoch": 1.09, "learning_rate": 4.9457471227769544e-05, "loss": 2.5229, "step": 375000 }, { "epoch": 1.09, "learning_rate": 4.945674758012227e-05, "loss": 2.5151, "step": 375500 }, { "epoch": 1.09, "learning_rate": 4.9456023932474995e-05, "loss": 2.5138, "step": 376000 }, { "epoch": 1.09, "learning_rate": 4.945530028482772e-05, "loss": 2.5092, "step": 376500 }, { "epoch": 1.09, "learning_rate": 4.945457663718044e-05, "loss": 2.5097, "step": 377000 }, { "epoch": 1.09, "learning_rate": 4.9453854436828455e-05, "loss": 2.5012, "step": 377500 }, { "epoch": 1.09, "learning_rate": 4.945313078918118e-05, "loss": 2.5067, "step": 378000 }, { "epoch": 1.1, "learning_rate": 4.94524071415339e-05, "loss": 2.5044, "step": 378500 }, { "epoch": 1.1, "learning_rate": 4.945168349388662e-05, "loss": 2.5119, "step": 379000 }, { "epoch": 1.1, "learning_rate": 4.9450959846239344e-05, "loss": 2.4923, "step": 379500 }, { "epoch": 1.1, "learning_rate": 4.945023619859207e-05, "loss": 2.4909, "step": 380000 }, { "epoch": 1.1, "learning_rate": 4.9449513998240096e-05, "loss": 2.5173, "step": 380500 }, { "epoch": 1.1, "learning_rate": 4.944879035059282e-05, "loss": 2.5205, "step": 381000 }, { "epoch": 1.1, "learning_rate": 4.944806670294554e-05, "loss": 2.4839, "step": 381500 }, { "epoch": 1.11, "learning_rate": 4.944734305529826e-05, "loss": 2.5309, "step": 382000 }, { "epoch": 1.11, "learning_rate": 4.9446619407650985e-05, "loss": 2.5114, "step": 382500 }, { "epoch": 1.11, "learning_rate": 4.944589576000371e-05, "loss": 2.5328, "step": 383000 }, { "epoch": 1.11, "learning_rate": 4.944517211235643e-05, "loss": 2.54, "step": 383500 }, { "epoch": 1.11, "learning_rate": 4.9444449912004445e-05, "loss": 2.4924, "step": 384000 }, { "epoch": 1.11, "learning_rate": 4.9443726264357174e-05, "loss": 2.5132, "step": 384500 }, { "epoch": 1.11, "learning_rate": 4.9443002616709896e-05, "loss": 2.5103, "step": 385000 }, { "epoch": 1.12, "learning_rate": 4.944227896906262e-05, "loss": 2.5259, "step": 385500 }, { "epoch": 1.12, "learning_rate": 4.944155532141534e-05, "loss": 2.4902, "step": 386000 }, { "epoch": 1.12, "learning_rate": 4.9440833121063356e-05, "loss": 2.5077, "step": 386500 }, { "epoch": 1.12, "learning_rate": 4.944010947341608e-05, "loss": 2.4976, "step": 387000 }, { "epoch": 1.12, "learning_rate": 4.943938582576881e-05, "loss": 2.5192, "step": 387500 }, { "epoch": 1.12, "learning_rate": 4.943866217812153e-05, "loss": 2.5017, "step": 388000 }, { "epoch": 1.12, "learning_rate": 4.943793853047425e-05, "loss": 2.4885, "step": 388500 }, { "epoch": 1.13, "learning_rate": 4.9437214882826974e-05, "loss": 2.4972, "step": 389000 }, { "epoch": 1.13, "learning_rate": 4.9436491235179696e-05, "loss": 2.5149, "step": 389500 }, { "epoch": 1.13, "learning_rate": 4.943576903482772e-05, "loss": 2.4801, "step": 390000 }, { "epoch": 1.13, "learning_rate": 4.943504538718044e-05, "loss": 2.5214, "step": 390500 }, { "epoch": 1.13, "learning_rate": 4.943432173953316e-05, "loss": 2.5126, "step": 391000 }, { "epoch": 1.13, "learning_rate": 4.9433598091885885e-05, "loss": 2.5125, "step": 391500 }, { "epoch": 1.13, "learning_rate": 4.943287444423861e-05, "loss": 2.5096, "step": 392000 }, { "epoch": 1.14, "learning_rate": 4.943215224388662e-05, "loss": 2.5078, "step": 392500 }, { "epoch": 1.14, "learning_rate": 4.943142859623935e-05, "loss": 2.4889, "step": 393000 }, { "epoch": 1.14, "learning_rate": 4.9430704948592074e-05, "loss": 2.5152, "step": 393500 }, { "epoch": 1.14, "learning_rate": 4.9429981300944796e-05, "loss": 2.4852, "step": 394000 }, { "epoch": 1.14, "learning_rate": 4.9429257653297525e-05, "loss": 2.4944, "step": 394500 }, { "epoch": 1.14, "learning_rate": 4.942853545294554e-05, "loss": 2.4867, "step": 395000 }, { "epoch": 1.14, "learning_rate": 4.942781180529826e-05, "loss": 2.5099, "step": 395500 }, { "epoch": 1.15, "learning_rate": 4.9427088157650985e-05, "loss": 2.5048, "step": 396000 }, { "epoch": 1.15, "learning_rate": 4.942636451000371e-05, "loss": 2.5001, "step": 396500 }, { "epoch": 1.15, "learning_rate": 4.942564230965172e-05, "loss": 2.4917, "step": 397000 }, { "epoch": 1.15, "learning_rate": 4.942491866200445e-05, "loss": 2.4789, "step": 397500 }, { "epoch": 1.15, "learning_rate": 4.9424195014357174e-05, "loss": 2.4923, "step": 398000 }, { "epoch": 1.15, "learning_rate": 4.94234713667099e-05, "loss": 2.5209, "step": 398500 }, { "epoch": 1.15, "learning_rate": 4.942274771906262e-05, "loss": 2.4976, "step": 399000 }, { "epoch": 1.16, "learning_rate": 4.942202407141534e-05, "loss": 2.5105, "step": 399500 }, { "epoch": 1.16, "learning_rate": 4.942130187106336e-05, "loss": 2.4869, "step": 400000 }, { "epoch": 1.16, "learning_rate": 4.942057822341608e-05, "loss": 2.5045, "step": 400500 }, { "epoch": 1.16, "learning_rate": 4.94198545757688e-05, "loss": 2.4824, "step": 401000 }, { "epoch": 1.16, "learning_rate": 4.9419130928121523e-05, "loss": 2.5137, "step": 401500 }, { "epoch": 1.16, "learning_rate": 4.941840728047425e-05, "loss": 2.5157, "step": 402000 }, { "epoch": 1.17, "learning_rate": 4.9417685080122275e-05, "loss": 2.4984, "step": 402500 }, { "epoch": 1.17, "learning_rate": 4.9416961432475e-05, "loss": 2.5037, "step": 403000 }, { "epoch": 1.17, "learning_rate": 4.941623778482772e-05, "loss": 2.5096, "step": 403500 }, { "epoch": 1.17, "learning_rate": 4.941551413718044e-05, "loss": 2.5093, "step": 404000 }, { "epoch": 1.17, "learning_rate": 4.9414790489533164e-05, "loss": 2.5198, "step": 404500 }, { "epoch": 1.17, "learning_rate": 4.941406828918118e-05, "loss": 2.5293, "step": 405000 }, { "epoch": 1.17, "learning_rate": 4.94133446415339e-05, "loss": 2.4997, "step": 405500 }, { "epoch": 1.18, "learning_rate": 4.9412620993886624e-05, "loss": 2.5142, "step": 406000 }, { "epoch": 1.18, "learning_rate": 4.941189734623935e-05, "loss": 2.5088, "step": 406500 }, { "epoch": 1.18, "learning_rate": 4.9411173698592075e-05, "loss": 2.4991, "step": 407000 }, { "epoch": 1.18, "learning_rate": 4.94104500509448e-05, "loss": 2.5031, "step": 407500 }, { "epoch": 1.18, "learning_rate": 4.940972785059281e-05, "loss": 2.5247, "step": 408000 }, { "epoch": 1.18, "learning_rate": 4.9409004202945535e-05, "loss": 2.5288, "step": 408500 }, { "epoch": 1.18, "learning_rate": 4.940828344988885e-05, "loss": 2.5248, "step": 409000 }, { "epoch": 1.19, "learning_rate": 4.940755980224157e-05, "loss": 2.5121, "step": 409500 }, { "epoch": 1.19, "learning_rate": 4.94068361545943e-05, "loss": 2.4909, "step": 410000 }, { "epoch": 1.19, "learning_rate": 4.9406112506947024e-05, "loss": 2.4942, "step": 410500 }, { "epoch": 1.19, "learning_rate": 4.9405388859299746e-05, "loss": 2.5046, "step": 411000 }, { "epoch": 1.19, "learning_rate": 4.940466521165247e-05, "loss": 2.4941, "step": 411500 }, { "epoch": 1.19, "learning_rate": 4.940394156400519e-05, "loss": 2.5106, "step": 412000 }, { "epoch": 1.19, "learning_rate": 4.940321791635791e-05, "loss": 2.511, "step": 412500 }, { "epoch": 1.2, "learning_rate": 4.9402494268710635e-05, "loss": 2.5029, "step": 413000 }, { "epoch": 1.2, "learning_rate": 4.940177206835865e-05, "loss": 2.5084, "step": 413500 }, { "epoch": 1.2, "learning_rate": 4.940104842071138e-05, "loss": 2.495, "step": 414000 }, { "epoch": 1.2, "learning_rate": 4.94003247730641e-05, "loss": 2.5141, "step": 414500 }, { "epoch": 1.2, "learning_rate": 4.9399601125416824e-05, "loss": 2.4925, "step": 415000 }, { "epoch": 1.2, "learning_rate": 4.9398877477769547e-05, "loss": 2.5013, "step": 415500 }, { "epoch": 1.2, "learning_rate": 4.939815383012227e-05, "loss": 2.5177, "step": 416000 }, { "epoch": 1.21, "learning_rate": 4.939743018247499e-05, "loss": 2.4974, "step": 416500 }, { "epoch": 1.21, "learning_rate": 4.939670653482771e-05, "loss": 2.5108, "step": 417000 }, { "epoch": 1.21, "learning_rate": 4.939598288718044e-05, "loss": 2.4925, "step": 417500 }, { "epoch": 1.21, "learning_rate": 4.9395259239533164e-05, "loss": 2.5317, "step": 418000 }, { "epoch": 1.21, "learning_rate": 4.939453703918118e-05, "loss": 2.5045, "step": 418500 }, { "epoch": 1.21, "learning_rate": 4.93938133915339e-05, "loss": 2.4958, "step": 419000 }, { "epoch": 1.21, "learning_rate": 4.939308974388663e-05, "loss": 2.4963, "step": 419500 }, { "epoch": 1.22, "learning_rate": 4.939236754353465e-05, "loss": 2.4783, "step": 420000 }, { "epoch": 1.22, "learning_rate": 4.939164389588737e-05, "loss": 2.497, "step": 420500 }, { "epoch": 1.22, "learning_rate": 4.939092024824009e-05, "loss": 2.4957, "step": 421000 }, { "epoch": 1.22, "learning_rate": 4.9390196600592814e-05, "loss": 2.4938, "step": 421500 }, { "epoch": 1.22, "learning_rate": 4.9389472952945536e-05, "loss": 2.4833, "step": 422000 }, { "epoch": 1.22, "learning_rate": 4.938874930529826e-05, "loss": 2.5055, "step": 422500 }, { "epoch": 1.22, "learning_rate": 4.938802565765098e-05, "loss": 2.5022, "step": 423000 }, { "epoch": 1.23, "learning_rate": 4.93873020100037e-05, "loss": 2.5036, "step": 423500 }, { "epoch": 1.23, "learning_rate": 4.938657836235643e-05, "loss": 2.5116, "step": 424000 }, { "epoch": 1.23, "learning_rate": 4.938585616200445e-05, "loss": 2.5032, "step": 424500 }, { "epoch": 1.23, "learning_rate": 4.9385132514357176e-05, "loss": 2.5109, "step": 425000 }, { "epoch": 1.23, "learning_rate": 4.938441031400519e-05, "loss": 2.5154, "step": 425500 }, { "epoch": 1.23, "learning_rate": 4.9383686666357914e-05, "loss": 2.4932, "step": 426000 }, { "epoch": 1.23, "learning_rate": 4.9382963018710636e-05, "loss": 2.4841, "step": 426500 }, { "epoch": 1.24, "learning_rate": 4.938223937106336e-05, "loss": 2.5052, "step": 427000 }, { "epoch": 1.24, "learning_rate": 4.938151572341608e-05, "loss": 2.4979, "step": 427500 }, { "epoch": 1.24, "learning_rate": 4.93807920757688e-05, "loss": 2.5041, "step": 428000 }, { "epoch": 1.24, "learning_rate": 4.9380069875416825e-05, "loss": 2.5115, "step": 428500 }, { "epoch": 1.24, "learning_rate": 4.937934622776955e-05, "loss": 2.501, "step": 429000 }, { "epoch": 1.24, "learning_rate": 4.937862258012227e-05, "loss": 2.4892, "step": 429500 }, { "epoch": 1.24, "learning_rate": 4.937789893247499e-05, "loss": 2.474, "step": 430000 }, { "epoch": 1.25, "learning_rate": 4.9377175284827714e-05, "loss": 2.482, "step": 430500 }, { "epoch": 1.25, "learning_rate": 4.9376451637180436e-05, "loss": 2.5014, "step": 431000 }, { "epoch": 1.25, "learning_rate": 4.937572798953316e-05, "loss": 2.4921, "step": 431500 }, { "epoch": 1.25, "learning_rate": 4.937500434188589e-05, "loss": 2.4943, "step": 432000 }, { "epoch": 1.25, "learning_rate": 4.937428069423861e-05, "loss": 2.4741, "step": 432500 }, { "epoch": 1.25, "learning_rate": 4.937355849388663e-05, "loss": 2.4568, "step": 433000 }, { "epoch": 1.25, "learning_rate": 4.9372834846239354e-05, "loss": 2.4895, "step": 433500 }, { "epoch": 1.26, "learning_rate": 4.9372111198592077e-05, "loss": 2.5086, "step": 434000 }, { "epoch": 1.26, "learning_rate": 4.93713875509448e-05, "loss": 2.5169, "step": 434500 }, { "epoch": 1.26, "learning_rate": 4.937066390329752e-05, "loss": 2.4948, "step": 435000 }, { "epoch": 1.26, "learning_rate": 4.936994025565024e-05, "loss": 2.4773, "step": 435500 }, { "epoch": 1.26, "learning_rate": 4.9369216608002965e-05, "loss": 2.4763, "step": 436000 }, { "epoch": 1.26, "learning_rate": 4.936849296035569e-05, "loss": 2.4989, "step": 436500 }, { "epoch": 1.26, "learning_rate": 4.936776931270841e-05, "loss": 2.4928, "step": 437000 }, { "epoch": 1.27, "learning_rate": 4.9367048559651726e-05, "loss": 2.4956, "step": 437500 }, { "epoch": 1.27, "learning_rate": 4.936632635929974e-05, "loss": 2.5082, "step": 438000 }, { "epoch": 1.27, "learning_rate": 4.936560271165246e-05, "loss": 2.5149, "step": 438500 }, { "epoch": 1.27, "learning_rate": 4.9364879064005186e-05, "loss": 2.5122, "step": 439000 }, { "epoch": 1.27, "learning_rate": 4.936415541635791e-05, "loss": 2.483, "step": 439500 }, { "epoch": 1.27, "learning_rate": 4.936343176871064e-05, "loss": 2.5168, "step": 440000 }, { "epoch": 1.28, "learning_rate": 4.936270812106336e-05, "loss": 2.4978, "step": 440500 }, { "epoch": 1.28, "learning_rate": 4.936198447341608e-05, "loss": 2.4748, "step": 441000 }, { "epoch": 1.28, "learning_rate": 4.936126082576881e-05, "loss": 2.5069, "step": 441500 }, { "epoch": 1.28, "learning_rate": 4.936053717812153e-05, "loss": 2.4774, "step": 442000 }, { "epoch": 1.28, "learning_rate": 4.935981497776955e-05, "loss": 2.4984, "step": 442500 }, { "epoch": 1.28, "learning_rate": 4.935909133012227e-05, "loss": 2.5064, "step": 443000 }, { "epoch": 1.28, "learning_rate": 4.935836768247499e-05, "loss": 2.4752, "step": 443500 }, { "epoch": 1.29, "learning_rate": 4.9357644034827715e-05, "loss": 2.4882, "step": 444000 }, { "epoch": 1.29, "learning_rate": 4.935692038718044e-05, "loss": 2.4752, "step": 444500 }, { "epoch": 1.29, "learning_rate": 4.935619673953316e-05, "loss": 2.4859, "step": 445000 }, { "epoch": 1.29, "learning_rate": 4.935547309188588e-05, "loss": 2.5133, "step": 445500 }, { "epoch": 1.29, "learning_rate": 4.9354750891533904e-05, "loss": 2.5026, "step": 446000 }, { "epoch": 1.29, "learning_rate": 4.9354027243886626e-05, "loss": 2.4923, "step": 446500 }, { "epoch": 1.29, "learning_rate": 4.935330359623935e-05, "loss": 2.484, "step": 447000 }, { "epoch": 1.3, "learning_rate": 4.935257994859208e-05, "loss": 2.496, "step": 447500 }, { "epoch": 1.3, "learning_rate": 4.93518563009448e-05, "loss": 2.5054, "step": 448000 }, { "epoch": 1.3, "learning_rate": 4.935113265329752e-05, "loss": 2.4784, "step": 448500 }, { "epoch": 1.3, "learning_rate": 4.9350409005650244e-05, "loss": 2.488, "step": 449000 }, { "epoch": 1.3, "learning_rate": 4.934968680529826e-05, "loss": 2.5065, "step": 449500 }, { "epoch": 1.3, "learning_rate": 4.934896315765098e-05, "loss": 2.5161, "step": 450000 }, { "epoch": 1.3, "learning_rate": 4.934823951000371e-05, "loss": 2.4963, "step": 450500 }, { "epoch": 1.31, "learning_rate": 4.9347517309651726e-05, "loss": 2.5131, "step": 451000 }, { "epoch": 1.31, "learning_rate": 4.934679510929974e-05, "loss": 2.4986, "step": 451500 }, { "epoch": 1.31, "learning_rate": 4.9346071461652464e-05, "loss": 2.4783, "step": 452000 }, { "epoch": 1.31, "learning_rate": 4.9345347814005186e-05, "loss": 2.5201, "step": 452500 }, { "epoch": 1.31, "learning_rate": 4.934462416635791e-05, "loss": 2.4925, "step": 453000 }, { "epoch": 1.31, "learning_rate": 4.934390051871063e-05, "loss": 2.5166, "step": 453500 }, { "epoch": 1.31, "learning_rate": 4.934317687106336e-05, "loss": 2.5019, "step": 454000 }, { "epoch": 1.32, "learning_rate": 4.934245322341608e-05, "loss": 2.4894, "step": 454500 }, { "epoch": 1.32, "learning_rate": 4.934172957576881e-05, "loss": 2.4914, "step": 455000 }, { "epoch": 1.32, "learning_rate": 4.934100737541683e-05, "loss": 2.5387, "step": 455500 }, { "epoch": 1.32, "learning_rate": 4.934028372776955e-05, "loss": 2.4868, "step": 456000 }, { "epoch": 1.32, "learning_rate": 4.933956008012227e-05, "loss": 2.4947, "step": 456500 }, { "epoch": 1.32, "learning_rate": 4.933883787977029e-05, "loss": 2.5142, "step": 457000 }, { "epoch": 1.32, "learning_rate": 4.933811423212301e-05, "loss": 2.5149, "step": 457500 }, { "epoch": 1.33, "learning_rate": 4.933739058447573e-05, "loss": 2.5074, "step": 458000 }, { "epoch": 1.33, "learning_rate": 4.933666693682846e-05, "loss": 2.514, "step": 458500 }, { "epoch": 1.33, "learning_rate": 4.933594328918118e-05, "loss": 2.5022, "step": 459000 }, { "epoch": 1.33, "learning_rate": 4.9335219641533905e-05, "loss": 2.4818, "step": 459500 }, { "epoch": 1.33, "learning_rate": 4.933449599388663e-05, "loss": 2.4612, "step": 460000 }, { "epoch": 1.33, "learning_rate": 4.933377234623935e-05, "loss": 2.511, "step": 460500 }, { "epoch": 1.33, "learning_rate": 4.933304869859207e-05, "loss": 2.5015, "step": 461000 }, { "epoch": 1.34, "learning_rate": 4.9332325050944794e-05, "loss": 2.5214, "step": 461500 }, { "epoch": 1.34, "learning_rate": 4.933160140329752e-05, "loss": 2.4951, "step": 462000 }, { "epoch": 1.34, "learning_rate": 4.933087920294554e-05, "loss": 2.4968, "step": 462500 }, { "epoch": 1.34, "learning_rate": 4.933015555529826e-05, "loss": 2.4917, "step": 463000 }, { "epoch": 1.34, "learning_rate": 4.932943190765098e-05, "loss": 2.4832, "step": 463500 }, { "epoch": 1.34, "learning_rate": 4.932870826000371e-05, "loss": 2.5156, "step": 464000 }, { "epoch": 1.34, "learning_rate": 4.9327984612356434e-05, "loss": 2.5042, "step": 464500 }, { "epoch": 1.35, "learning_rate": 4.9327260964709156e-05, "loss": 2.5103, "step": 465000 }, { "epoch": 1.35, "learning_rate": 4.932653731706188e-05, "loss": 2.5106, "step": 465500 }, { "epoch": 1.35, "learning_rate": 4.93258136694146e-05, "loss": 2.4987, "step": 466000 }, { "epoch": 1.35, "learning_rate": 4.932509002176732e-05, "loss": 2.5337, "step": 466500 }, { "epoch": 1.35, "learning_rate": 4.9324366374120045e-05, "loss": 2.5077, "step": 467000 }, { "epoch": 1.35, "learning_rate": 4.932364272647277e-05, "loss": 2.5011, "step": 467500 }, { "epoch": 1.35, "learning_rate": 4.932291907882549e-05, "loss": 2.4958, "step": 468000 }, { "epoch": 1.36, "learning_rate": 4.932219687847351e-05, "loss": 2.5053, "step": 468500 }, { "epoch": 1.36, "learning_rate": 4.9321473230826234e-05, "loss": 2.5022, "step": 469000 }, { "epoch": 1.36, "learning_rate": 4.932074958317896e-05, "loss": 2.4982, "step": 469500 }, { "epoch": 1.36, "learning_rate": 4.9320025935531685e-05, "loss": 2.4773, "step": 470000 }, { "epoch": 1.36, "learning_rate": 4.9319305182474994e-05, "loss": 2.5151, "step": 470500 }, { "epoch": 1.36, "learning_rate": 4.9318581534827716e-05, "loss": 2.5076, "step": 471000 }, { "epoch": 1.36, "learning_rate": 4.931785788718044e-05, "loss": 2.501, "step": 471500 }, { "epoch": 1.37, "learning_rate": 4.931713423953316e-05, "loss": 2.4904, "step": 472000 }, { "epoch": 1.37, "learning_rate": 4.931641059188589e-05, "loss": 2.5191, "step": 472500 }, { "epoch": 1.37, "learning_rate": 4.931568694423861e-05, "loss": 2.5079, "step": 473000 }, { "epoch": 1.37, "learning_rate": 4.9314963296591334e-05, "loss": 2.5204, "step": 473500 }, { "epoch": 1.37, "learning_rate": 4.9314239648944057e-05, "loss": 2.4859, "step": 474000 }, { "epoch": 1.37, "learning_rate": 4.931351600129678e-05, "loss": 2.4855, "step": 474500 }, { "epoch": 1.37, "learning_rate": 4.93127923536495e-05, "loss": 2.518, "step": 475000 }, { "epoch": 1.38, "learning_rate": 4.931206870600222e-05, "loss": 2.488, "step": 475500 }, { "epoch": 1.38, "learning_rate": 4.931134650565024e-05, "loss": 2.4985, "step": 476000 }, { "epoch": 1.38, "learning_rate": 4.931062285800296e-05, "loss": 2.5175, "step": 476500 }, { "epoch": 1.38, "learning_rate": 4.930989921035569e-05, "loss": 2.4883, "step": 477000 }, { "epoch": 1.38, "learning_rate": 4.930917556270841e-05, "loss": 2.4961, "step": 477500 }, { "epoch": 1.38, "learning_rate": 4.9308451915061135e-05, "loss": 2.5053, "step": 478000 }, { "epoch": 1.39, "learning_rate": 4.930772971470916e-05, "loss": 2.5313, "step": 478500 }, { "epoch": 1.39, "learning_rate": 4.930700606706188e-05, "loss": 2.4957, "step": 479000 }, { "epoch": 1.39, "learning_rate": 4.93062824194146e-05, "loss": 2.4974, "step": 479500 }, { "epoch": 1.39, "learning_rate": 4.930556021906262e-05, "loss": 2.4726, "step": 480000 }, { "epoch": 1.39, "learning_rate": 4.930483657141534e-05, "loss": 2.5018, "step": 480500 }, { "epoch": 1.39, "learning_rate": 4.930411292376806e-05, "loss": 2.522, "step": 481000 }, { "epoch": 1.39, "learning_rate": 4.930338927612079e-05, "loss": 2.491, "step": 481500 }, { "epoch": 1.4, "learning_rate": 4.930266562847351e-05, "loss": 2.499, "step": 482000 }, { "epoch": 1.4, "learning_rate": 4.930194342812153e-05, "loss": 2.4959, "step": 482500 }, { "epoch": 1.4, "learning_rate": 4.930121978047425e-05, "loss": 2.5091, "step": 483000 }, { "epoch": 1.4, "learning_rate": 4.930049613282697e-05, "loss": 2.4756, "step": 483500 }, { "epoch": 1.4, "learning_rate": 4.9299772485179695e-05, "loss": 2.4984, "step": 484000 }, { "epoch": 1.4, "learning_rate": 4.9299048837532424e-05, "loss": 2.5107, "step": 484500 }, { "epoch": 1.4, "learning_rate": 4.9298325189885146e-05, "loss": 2.5076, "step": 485000 }, { "epoch": 1.41, "learning_rate": 4.929760154223787e-05, "loss": 2.4887, "step": 485500 }, { "epoch": 1.41, "learning_rate": 4.929687789459059e-05, "loss": 2.496, "step": 486000 }, { "epoch": 1.41, "learning_rate": 4.929615569423861e-05, "loss": 2.4606, "step": 486500 }, { "epoch": 1.41, "learning_rate": 4.9295432046591335e-05, "loss": 2.5043, "step": 487000 }, { "epoch": 1.41, "learning_rate": 4.929470839894406e-05, "loss": 2.5159, "step": 487500 }, { "epoch": 1.41, "learning_rate": 4.929398475129678e-05, "loss": 2.5117, "step": 488000 }, { "epoch": 1.41, "learning_rate": 4.92932611036495e-05, "loss": 2.5025, "step": 488500 }, { "epoch": 1.42, "learning_rate": 4.9292537456002224e-05, "loss": 2.4962, "step": 489000 }, { "epoch": 1.42, "learning_rate": 4.9291813808354946e-05, "loss": 2.5245, "step": 489500 }, { "epoch": 1.42, "learning_rate": 4.929109016070767e-05, "loss": 2.4995, "step": 490000 }, { "epoch": 1.42, "learning_rate": 4.929036796035569e-05, "loss": 2.5143, "step": 490500 }, { "epoch": 1.42, "learning_rate": 4.928964431270841e-05, "loss": 2.489, "step": 491000 }, { "epoch": 1.42, "learning_rate": 4.9288920665061135e-05, "loss": 2.5129, "step": 491500 }, { "epoch": 1.42, "learning_rate": 4.9288197017413864e-05, "loss": 2.49, "step": 492000 }, { "epoch": 1.43, "learning_rate": 4.9287473369766587e-05, "loss": 2.481, "step": 492500 }, { "epoch": 1.43, "learning_rate": 4.92867511694146e-05, "loss": 2.4899, "step": 493000 }, { "epoch": 1.43, "learning_rate": 4.928602896906262e-05, "loss": 2.4908, "step": 493500 }, { "epoch": 1.43, "learning_rate": 4.928530532141534e-05, "loss": 2.491, "step": 494000 }, { "epoch": 1.43, "learning_rate": 4.928458167376806e-05, "loss": 2.4896, "step": 494500 }, { "epoch": 1.43, "learning_rate": 4.928385802612079e-05, "loss": 2.5265, "step": 495000 }, { "epoch": 1.43, "learning_rate": 4.9283134378473513e-05, "loss": 2.4916, "step": 495500 }, { "epoch": 1.44, "learning_rate": 4.9282410730826236e-05, "loss": 2.4934, "step": 496000 }, { "epoch": 1.44, "learning_rate": 4.928168853047425e-05, "loss": 2.4993, "step": 496500 }, { "epoch": 1.44, "learning_rate": 4.928096633012227e-05, "loss": 2.4813, "step": 497000 }, { "epoch": 1.44, "learning_rate": 4.928024268247499e-05, "loss": 2.5088, "step": 497500 }, { "epoch": 1.44, "learning_rate": 4.927951903482772e-05, "loss": 2.5156, "step": 498000 }, { "epoch": 1.44, "learning_rate": 4.927879538718044e-05, "loss": 2.4897, "step": 498500 }, { "epoch": 1.44, "learning_rate": 4.927807173953316e-05, "loss": 2.5155, "step": 499000 }, { "epoch": 1.45, "learning_rate": 4.927734809188589e-05, "loss": 2.4775, "step": 499500 }, { "epoch": 1.45, "learning_rate": 4.9276624444238614e-05, "loss": 2.4957, "step": 500000 }, { "epoch": 1.45, "learning_rate": 4.9275900796591336e-05, "loss": 2.5048, "step": 500500 }, { "epoch": 1.45, "learning_rate": 4.927517714894406e-05, "loss": 2.477, "step": 501000 }, { "epoch": 1.45, "learning_rate": 4.927445350129678e-05, "loss": 2.4807, "step": 501500 }, { "epoch": 1.45, "learning_rate": 4.92737298536495e-05, "loss": 2.5127, "step": 502000 }, { "epoch": 1.45, "learning_rate": 4.9273006206002225e-05, "loss": 2.4951, "step": 502500 }, { "epoch": 1.46, "learning_rate": 4.927228400565024e-05, "loss": 2.5041, "step": 503000 }, { "epoch": 1.46, "learning_rate": 4.927156035800297e-05, "loss": 2.5055, "step": 503500 }, { "epoch": 1.46, "learning_rate": 4.927083671035569e-05, "loss": 2.4868, "step": 504000 }, { "epoch": 1.46, "learning_rate": 4.9270113062708414e-05, "loss": 2.4818, "step": 504500 }, { "epoch": 1.46, "learning_rate": 4.9269389415061136e-05, "loss": 2.5032, "step": 505000 }, { "epoch": 1.46, "learning_rate": 4.926866576741386e-05, "loss": 2.4784, "step": 505500 }, { "epoch": 1.46, "learning_rate": 4.926794211976658e-05, "loss": 2.4953, "step": 506000 }, { "epoch": 1.47, "learning_rate": 4.92672184721193e-05, "loss": 2.4901, "step": 506500 }, { "epoch": 1.47, "learning_rate": 4.9266496271767325e-05, "loss": 2.4891, "step": 507000 }, { "epoch": 1.47, "learning_rate": 4.926577262412005e-05, "loss": 2.5102, "step": 507500 }, { "epoch": 1.47, "learning_rate": 4.926504897647277e-05, "loss": 2.5084, "step": 508000 }, { "epoch": 1.47, "learning_rate": 4.926432532882549e-05, "loss": 2.5296, "step": 508500 }, { "epoch": 1.47, "learning_rate": 4.926360168117822e-05, "loss": 2.5006, "step": 509000 }, { "epoch": 1.47, "learning_rate": 4.926287803353094e-05, "loss": 2.4797, "step": 509500 }, { "epoch": 1.48, "learning_rate": 4.9262154385883665e-05, "loss": 2.4872, "step": 510000 }, { "epoch": 1.48, "learning_rate": 4.926143073823639e-05, "loss": 2.4826, "step": 510500 }, { "epoch": 1.48, "learning_rate": 4.926070709058911e-05, "loss": 2.4968, "step": 511000 }, { "epoch": 1.48, "learning_rate": 4.9259984890237125e-05, "loss": 2.4826, "step": 511500 }, { "epoch": 1.48, "learning_rate": 4.925926268988514e-05, "loss": 2.485, "step": 512000 }, { "epoch": 1.48, "learning_rate": 4.925853904223787e-05, "loss": 2.4727, "step": 512500 }, { "epoch": 1.48, "learning_rate": 4.925781539459059e-05, "loss": 2.5098, "step": 513000 }, { "epoch": 1.49, "learning_rate": 4.9257091746943314e-05, "loss": 2.4909, "step": 513500 }, { "epoch": 1.49, "learning_rate": 4.925636809929604e-05, "loss": 2.4925, "step": 514000 }, { "epoch": 1.49, "learning_rate": 4.9255644451648766e-05, "loss": 2.4956, "step": 514500 }, { "epoch": 1.49, "learning_rate": 4.925492080400149e-05, "loss": 2.4979, "step": 515000 }, { "epoch": 1.49, "learning_rate": 4.9254198603649503e-05, "loss": 2.5061, "step": 515500 }, { "epoch": 1.49, "learning_rate": 4.9253474956002226e-05, "loss": 2.4945, "step": 516000 }, { "epoch": 1.5, "learning_rate": 4.925275130835495e-05, "loss": 2.4895, "step": 516500 }, { "epoch": 1.5, "learning_rate": 4.925202766070767e-05, "loss": 2.4949, "step": 517000 }, { "epoch": 1.5, "learning_rate": 4.925130401306039e-05, "loss": 2.4787, "step": 517500 }, { "epoch": 1.5, "learning_rate": 4.925058036541312e-05, "loss": 2.4998, "step": 518000 }, { "epoch": 1.5, "learning_rate": 4.9249856717765844e-05, "loss": 2.4783, "step": 518500 }, { "epoch": 1.5, "learning_rate": 4.9249133070118566e-05, "loss": 2.4817, "step": 519000 }, { "epoch": 1.5, "learning_rate": 4.924840942247129e-05, "loss": 2.5147, "step": 519500 }, { "epoch": 1.51, "learning_rate": 4.9247687222119304e-05, "loss": 2.4879, "step": 520000 }, { "epoch": 1.51, "learning_rate": 4.9246963574472026e-05, "loss": 2.4775, "step": 520500 }, { "epoch": 1.51, "learning_rate": 4.924623992682475e-05, "loss": 2.5011, "step": 521000 }, { "epoch": 1.51, "learning_rate": 4.924551627917748e-05, "loss": 2.4795, "step": 521500 }, { "epoch": 1.51, "learning_rate": 4.92447926315302e-05, "loss": 2.4879, "step": 522000 }, { "epoch": 1.51, "learning_rate": 4.924406898388292e-05, "loss": 2.4898, "step": 522500 }, { "epoch": 1.51, "learning_rate": 4.9243346783530944e-05, "loss": 2.4919, "step": 523000 }, { "epoch": 1.52, "learning_rate": 4.924262458317896e-05, "loss": 2.4747, "step": 523500 }, { "epoch": 1.52, "learning_rate": 4.924190093553168e-05, "loss": 2.5038, "step": 524000 }, { "epoch": 1.52, "learning_rate": 4.9241177287884404e-05, "loss": 2.482, "step": 524500 }, { "epoch": 1.52, "learning_rate": 4.9240453640237126e-05, "loss": 2.4987, "step": 525000 }, { "epoch": 1.52, "learning_rate": 4.923972999258985e-05, "loss": 2.5086, "step": 525500 }, { "epoch": 1.52, "learning_rate": 4.9239009239533164e-05, "loss": 2.4829, "step": 526000 }, { "epoch": 1.52, "learning_rate": 4.9238285591885886e-05, "loss": 2.495, "step": 526500 }, { "epoch": 1.53, "learning_rate": 4.923756194423861e-05, "loss": 2.4826, "step": 527000 }, { "epoch": 1.53, "learning_rate": 4.923683829659133e-05, "loss": 2.495, "step": 527500 }, { "epoch": 1.53, "learning_rate": 4.9236116096239346e-05, "loss": 2.5171, "step": 528000 }, { "epoch": 1.53, "learning_rate": 4.923539244859207e-05, "loss": 2.4922, "step": 528500 }, { "epoch": 1.53, "learning_rate": 4.92346688009448e-05, "loss": 2.5008, "step": 529000 }, { "epoch": 1.53, "learning_rate": 4.923394515329752e-05, "loss": 2.4647, "step": 529500 }, { "epoch": 1.53, "learning_rate": 4.923322150565025e-05, "loss": 2.5083, "step": 530000 }, { "epoch": 1.54, "learning_rate": 4.923249785800297e-05, "loss": 2.5027, "step": 530500 }, { "epoch": 1.54, "learning_rate": 4.923177421035569e-05, "loss": 2.4817, "step": 531000 }, { "epoch": 1.54, "learning_rate": 4.9231050562708416e-05, "loss": 2.4812, "step": 531500 }, { "epoch": 1.54, "learning_rate": 4.923032691506114e-05, "loss": 2.4984, "step": 532000 }, { "epoch": 1.54, "learning_rate": 4.922960326741386e-05, "loss": 2.4879, "step": 532500 }, { "epoch": 1.54, "learning_rate": 4.9228881067061876e-05, "loss": 2.4955, "step": 533000 }, { "epoch": 1.54, "learning_rate": 4.92281574194146e-05, "loss": 2.5079, "step": 533500 }, { "epoch": 1.55, "learning_rate": 4.922743377176732e-05, "loss": 2.494, "step": 534000 }, { "epoch": 1.55, "learning_rate": 4.922671012412005e-05, "loss": 2.484, "step": 534500 }, { "epoch": 1.55, "learning_rate": 4.922598647647277e-05, "loss": 2.4868, "step": 535000 }, { "epoch": 1.55, "learning_rate": 4.9225262828825493e-05, "loss": 2.4798, "step": 535500 }, { "epoch": 1.55, "learning_rate": 4.9224539181178216e-05, "loss": 2.4837, "step": 536000 }, { "epoch": 1.55, "learning_rate": 4.922381553353094e-05, "loss": 2.4827, "step": 536500 }, { "epoch": 1.55, "learning_rate": 4.922309188588367e-05, "loss": 2.4863, "step": 537000 }, { "epoch": 1.56, "learning_rate": 4.922236968553168e-05, "loss": 2.4668, "step": 537500 }, { "epoch": 1.56, "learning_rate": 4.9221646037884405e-05, "loss": 2.4885, "step": 538000 }, { "epoch": 1.56, "learning_rate": 4.922092239023713e-05, "loss": 2.4987, "step": 538500 }, { "epoch": 1.56, "learning_rate": 4.922019874258985e-05, "loss": 2.4963, "step": 539000 }, { "epoch": 1.56, "learning_rate": 4.921947509494257e-05, "loss": 2.4776, "step": 539500 }, { "epoch": 1.56, "learning_rate": 4.9218752894590594e-05, "loss": 2.5057, "step": 540000 }, { "epoch": 1.56, "learning_rate": 4.921803069423861e-05, "loss": 2.5057, "step": 540500 }, { "epoch": 1.57, "learning_rate": 4.921730704659133e-05, "loss": 2.4833, "step": 541000 }, { "epoch": 1.57, "learning_rate": 4.9216583398944054e-05, "loss": 2.5018, "step": 541500 }, { "epoch": 1.57, "learning_rate": 4.9215859751296776e-05, "loss": 2.4705, "step": 542000 }, { "epoch": 1.57, "learning_rate": 4.92151361036495e-05, "loss": 2.518, "step": 542500 }, { "epoch": 1.57, "learning_rate": 4.921441245600222e-05, "loss": 2.4667, "step": 543000 }, { "epoch": 1.57, "learning_rate": 4.921368880835495e-05, "loss": 2.4827, "step": 543500 }, { "epoch": 1.57, "learning_rate": 4.921296516070767e-05, "loss": 2.4745, "step": 544000 }, { "epoch": 1.58, "learning_rate": 4.92122415130604e-05, "loss": 2.4953, "step": 544500 }, { "epoch": 1.58, "learning_rate": 4.921151786541312e-05, "loss": 2.4967, "step": 545000 }, { "epoch": 1.58, "learning_rate": 4.9210794217765845e-05, "loss": 2.4962, "step": 545500 }, { "epoch": 1.58, "learning_rate": 4.921007057011857e-05, "loss": 2.5051, "step": 546000 }, { "epoch": 1.58, "learning_rate": 4.920934836976658e-05, "loss": 2.4943, "step": 546500 }, { "epoch": 1.58, "learning_rate": 4.9208624722119305e-05, "loss": 2.4863, "step": 547000 }, { "epoch": 1.58, "learning_rate": 4.920790107447203e-05, "loss": 2.4967, "step": 547500 }, { "epoch": 1.59, "learning_rate": 4.920717887412005e-05, "loss": 2.4824, "step": 548000 }, { "epoch": 1.59, "learning_rate": 4.9206456673768065e-05, "loss": 2.4566, "step": 548500 }, { "epoch": 1.59, "learning_rate": 4.920573302612079e-05, "loss": 2.5106, "step": 549000 }, { "epoch": 1.59, "learning_rate": 4.920500937847351e-05, "loss": 2.4874, "step": 549500 }, { "epoch": 1.59, "learning_rate": 4.920428573082623e-05, "loss": 2.4767, "step": 550000 }, { "epoch": 1.59, "learning_rate": 4.9203562083178954e-05, "loss": 2.4894, "step": 550500 }, { "epoch": 1.59, "learning_rate": 4.9202838435531677e-05, "loss": 2.4815, "step": 551000 }, { "epoch": 1.6, "learning_rate": 4.92021147878844e-05, "loss": 2.5012, "step": 551500 }, { "epoch": 1.6, "learning_rate": 4.920139114023713e-05, "loss": 2.5052, "step": 552000 }, { "epoch": 1.6, "learning_rate": 4.920066749258985e-05, "loss": 2.4719, "step": 552500 }, { "epoch": 1.6, "learning_rate": 4.919994384494257e-05, "loss": 2.5186, "step": 553000 }, { "epoch": 1.6, "learning_rate": 4.9199221644590595e-05, "loss": 2.4941, "step": 553500 }, { "epoch": 1.6, "learning_rate": 4.919849944423861e-05, "loss": 2.4848, "step": 554000 }, { "epoch": 1.61, "learning_rate": 4.919777579659133e-05, "loss": 2.4758, "step": 554500 }, { "epoch": 1.61, "learning_rate": 4.9197052148944055e-05, "loss": 2.4923, "step": 555000 }, { "epoch": 1.61, "learning_rate": 4.919632850129678e-05, "loss": 2.4876, "step": 555500 }, { "epoch": 1.61, "learning_rate": 4.91956048536495e-05, "loss": 2.4918, "step": 556000 }, { "epoch": 1.61, "learning_rate": 4.919488120600222e-05, "loss": 2.4641, "step": 556500 }, { "epoch": 1.61, "learning_rate": 4.919415755835495e-05, "loss": 2.4979, "step": 557000 }, { "epoch": 1.61, "learning_rate": 4.919343391070767e-05, "loss": 2.499, "step": 557500 }, { "epoch": 1.62, "learning_rate": 4.919271171035569e-05, "loss": 2.4795, "step": 558000 }, { "epoch": 1.62, "learning_rate": 4.919198806270841e-05, "loss": 2.4906, "step": 558500 }, { "epoch": 1.62, "learning_rate": 4.919126441506113e-05, "loss": 2.486, "step": 559000 }, { "epoch": 1.62, "learning_rate": 4.919054076741386e-05, "loss": 2.4906, "step": 559500 }, { "epoch": 1.62, "learning_rate": 4.9189817119766584e-05, "loss": 2.4984, "step": 560000 }, { "epoch": 1.62, "learning_rate": 4.91890949194146e-05, "loss": 2.4984, "step": 560500 }, { "epoch": 1.62, "learning_rate": 4.918837127176733e-05, "loss": 2.4949, "step": 561000 }, { "epoch": 1.63, "learning_rate": 4.918764762412005e-05, "loss": 2.4666, "step": 561500 }, { "epoch": 1.63, "learning_rate": 4.918692397647277e-05, "loss": 2.4936, "step": 562000 }, { "epoch": 1.63, "learning_rate": 4.9186200328825495e-05, "loss": 2.4996, "step": 562500 }, { "epoch": 1.63, "learning_rate": 4.918547668117822e-05, "loss": 2.5063, "step": 563000 }, { "epoch": 1.63, "learning_rate": 4.918475448082623e-05, "loss": 2.4814, "step": 563500 }, { "epoch": 1.63, "learning_rate": 4.9184030833178955e-05, "loss": 2.4874, "step": 564000 }, { "epoch": 1.63, "learning_rate": 4.918330718553168e-05, "loss": 2.4583, "step": 564500 }, { "epoch": 1.64, "learning_rate": 4.91825835378844e-05, "loss": 2.4563, "step": 565000 }, { "epoch": 1.64, "learning_rate": 4.918185989023713e-05, "loss": 2.4901, "step": 565500 }, { "epoch": 1.64, "learning_rate": 4.9181137689885144e-05, "loss": 2.4948, "step": 566000 }, { "epoch": 1.64, "learning_rate": 4.9180414042237866e-05, "loss": 2.5025, "step": 566500 }, { "epoch": 1.64, "learning_rate": 4.9179690394590595e-05, "loss": 2.4818, "step": 567000 }, { "epoch": 1.64, "learning_rate": 4.917896674694332e-05, "loss": 2.502, "step": 567500 }, { "epoch": 1.64, "learning_rate": 4.917824309929604e-05, "loss": 2.5016, "step": 568000 }, { "epoch": 1.65, "learning_rate": 4.9177520898944055e-05, "loss": 2.4501, "step": 568500 }, { "epoch": 1.65, "learning_rate": 4.917679725129678e-05, "loss": 2.4832, "step": 569000 }, { "epoch": 1.65, "learning_rate": 4.91760736036495e-05, "loss": 2.4943, "step": 569500 }, { "epoch": 1.65, "learning_rate": 4.917534995600223e-05, "loss": 2.4702, "step": 570000 }, { "epoch": 1.65, "learning_rate": 4.917462630835495e-05, "loss": 2.4958, "step": 570500 }, { "epoch": 1.65, "learning_rate": 4.917390266070767e-05, "loss": 2.4763, "step": 571000 }, { "epoch": 1.65, "learning_rate": 4.9173179013060396e-05, "loss": 2.4955, "step": 571500 }, { "epoch": 1.66, "learning_rate": 4.917245536541312e-05, "loss": 2.4843, "step": 572000 }, { "epoch": 1.66, "learning_rate": 4.917173316506113e-05, "loss": 2.5, "step": 572500 }, { "epoch": 1.66, "learning_rate": 4.917101241200445e-05, "loss": 2.4998, "step": 573000 }, { "epoch": 1.66, "learning_rate": 4.917028876435717e-05, "loss": 2.5153, "step": 573500 }, { "epoch": 1.66, "learning_rate": 4.9169565116709893e-05, "loss": 2.4344, "step": 574000 }, { "epoch": 1.66, "learning_rate": 4.916884146906262e-05, "loss": 2.4757, "step": 574500 }, { "epoch": 1.66, "learning_rate": 4.9168117821415345e-05, "loss": 2.4799, "step": 575000 }, { "epoch": 1.67, "learning_rate": 4.916739562106336e-05, "loss": 2.4955, "step": 575500 }, { "epoch": 1.67, "learning_rate": 4.916667197341608e-05, "loss": 2.4672, "step": 576000 }, { "epoch": 1.67, "learning_rate": 4.9165948325768805e-05, "loss": 2.4709, "step": 576500 }, { "epoch": 1.67, "learning_rate": 4.916522467812153e-05, "loss": 2.4897, "step": 577000 }, { "epoch": 1.67, "learning_rate": 4.9164501030474256e-05, "loss": 2.4983, "step": 577500 }, { "epoch": 1.67, "learning_rate": 4.916377738282698e-05, "loss": 2.4891, "step": 578000 }, { "epoch": 1.67, "learning_rate": 4.91630537351797e-05, "loss": 2.4642, "step": 578500 }, { "epoch": 1.68, "learning_rate": 4.916233008753242e-05, "loss": 2.4995, "step": 579000 }, { "epoch": 1.68, "learning_rate": 4.9161606439885145e-05, "loss": 2.4767, "step": 579500 }, { "epoch": 1.68, "learning_rate": 4.916088279223787e-05, "loss": 2.4562, "step": 580000 }, { "epoch": 1.68, "learning_rate": 4.916015914459059e-05, "loss": 2.4799, "step": 580500 }, { "epoch": 1.68, "learning_rate": 4.915943549694331e-05, "loss": 2.489, "step": 581000 }, { "epoch": 1.68, "learning_rate": 4.9158711849296034e-05, "loss": 2.4999, "step": 581500 }, { "epoch": 1.68, "learning_rate": 4.915799109623935e-05, "loss": 2.5148, "step": 582000 }, { "epoch": 1.69, "learning_rate": 4.915726744859208e-05, "loss": 2.476, "step": 582500 }, { "epoch": 1.69, "learning_rate": 4.91565438009448e-05, "loss": 2.4783, "step": 583000 }, { "epoch": 1.69, "learning_rate": 4.915582015329752e-05, "loss": 2.4555, "step": 583500 }, { "epoch": 1.69, "learning_rate": 4.915509795294554e-05, "loss": 2.4898, "step": 584000 }, { "epoch": 1.69, "learning_rate": 4.915437430529826e-05, "loss": 2.4522, "step": 584500 }, { "epoch": 1.69, "learning_rate": 4.9153652104946276e-05, "loss": 2.4888, "step": 585000 }, { "epoch": 1.69, "learning_rate": 4.9152928457299005e-05, "loss": 2.5128, "step": 585500 }, { "epoch": 1.7, "learning_rate": 4.915220480965173e-05, "loss": 2.5045, "step": 586000 }, { "epoch": 1.7, "learning_rate": 4.915148116200445e-05, "loss": 2.4815, "step": 586500 }, { "epoch": 1.7, "learning_rate": 4.915075751435717e-05, "loss": 2.4852, "step": 587000 }, { "epoch": 1.7, "learning_rate": 4.9150033866709894e-05, "loss": 2.4794, "step": 587500 }, { "epoch": 1.7, "learning_rate": 4.9149310219062616e-05, "loss": 2.4932, "step": 588000 }, { "epoch": 1.7, "learning_rate": 4.914858657141534e-05, "loss": 2.5042, "step": 588500 }, { "epoch": 1.7, "learning_rate": 4.9147864371063354e-05, "loss": 2.4954, "step": 589000 }, { "epoch": 1.71, "learning_rate": 4.9147140723416077e-05, "loss": 2.4832, "step": 589500 }, { "epoch": 1.71, "learning_rate": 4.9146417075768806e-05, "loss": 2.5055, "step": 590000 }, { "epoch": 1.71, "learning_rate": 4.914569342812153e-05, "loss": 2.5143, "step": 590500 }, { "epoch": 1.71, "learning_rate": 4.914497122776955e-05, "loss": 2.4649, "step": 591000 }, { "epoch": 1.71, "learning_rate": 4.9144249027417566e-05, "loss": 2.4511, "step": 591500 }, { "epoch": 1.71, "learning_rate": 4.914352537977029e-05, "loss": 2.4861, "step": 592000 }, { "epoch": 1.72, "learning_rate": 4.914280173212301e-05, "loss": 2.4928, "step": 592500 }, { "epoch": 1.72, "learning_rate": 4.914207808447573e-05, "loss": 2.4933, "step": 593000 }, { "epoch": 1.72, "learning_rate": 4.9141354436828455e-05, "loss": 2.4866, "step": 593500 }, { "epoch": 1.72, "learning_rate": 4.914063223647648e-05, "loss": 2.48, "step": 594000 }, { "epoch": 1.72, "learning_rate": 4.913991003612449e-05, "loss": 2.4971, "step": 594500 }, { "epoch": 1.72, "learning_rate": 4.9139186388477215e-05, "loss": 2.4701, "step": 595000 }, { "epoch": 1.72, "learning_rate": 4.913846274082994e-05, "loss": 2.4785, "step": 595500 }, { "epoch": 1.73, "learning_rate": 4.913773909318266e-05, "loss": 2.4776, "step": 596000 }, { "epoch": 1.73, "learning_rate": 4.913701544553538e-05, "loss": 2.4903, "step": 596500 }, { "epoch": 1.73, "learning_rate": 4.9136291797888104e-05, "loss": 2.4744, "step": 597000 }, { "epoch": 1.73, "learning_rate": 4.9135569597536126e-05, "loss": 2.4594, "step": 597500 }, { "epoch": 1.73, "learning_rate": 4.9134845949888855e-05, "loss": 2.4831, "step": 598000 }, { "epoch": 1.73, "learning_rate": 4.913412230224158e-05, "loss": 2.4806, "step": 598500 }, { "epoch": 1.73, "learning_rate": 4.91333986545943e-05, "loss": 2.5012, "step": 599000 }, { "epoch": 1.74, "learning_rate": 4.913267500694702e-05, "loss": 2.4777, "step": 599500 }, { "epoch": 1.74, "learning_rate": 4.9131951359299744e-05, "loss": 2.4872, "step": 600000 }, { "epoch": 1.74, "learning_rate": 4.9131227711652466e-05, "loss": 2.4898, "step": 600500 }, { "epoch": 1.74, "learning_rate": 4.913050406400519e-05, "loss": 2.5102, "step": 601000 }, { "epoch": 1.74, "learning_rate": 4.912978041635791e-05, "loss": 2.5119, "step": 601500 }, { "epoch": 1.74, "learning_rate": 4.912905676871063e-05, "loss": 2.4605, "step": 602000 }, { "epoch": 1.74, "learning_rate": 4.9128334568358655e-05, "loss": 2.5017, "step": 602500 }, { "epoch": 1.75, "learning_rate": 4.912761092071138e-05, "loss": 2.4838, "step": 603000 }, { "epoch": 1.75, "learning_rate": 4.91268872730641e-05, "loss": 2.4753, "step": 603500 }, { "epoch": 1.75, "learning_rate": 4.912616362541682e-05, "loss": 2.502, "step": 604000 }, { "epoch": 1.75, "learning_rate": 4.9125439977769544e-05, "loss": 2.4865, "step": 604500 }, { "epoch": 1.75, "learning_rate": 4.912471633012227e-05, "loss": 2.5068, "step": 605000 }, { "epoch": 1.75, "learning_rate": 4.9123992682474995e-05, "loss": 2.4926, "step": 605500 }, { "epoch": 1.75, "learning_rate": 4.912327048212301e-05, "loss": 2.5047, "step": 606000 }, { "epoch": 1.76, "learning_rate": 4.912254683447573e-05, "loss": 2.4919, "step": 606500 }, { "epoch": 1.76, "learning_rate": 4.9121824634123755e-05, "loss": 2.4874, "step": 607000 }, { "epoch": 1.76, "learning_rate": 4.912110098647648e-05, "loss": 2.4928, "step": 607500 }, { "epoch": 1.76, "learning_rate": 4.91203773388292e-05, "loss": 2.4781, "step": 608000 }, { "epoch": 1.76, "learning_rate": 4.911965369118192e-05, "loss": 2.4617, "step": 608500 }, { "epoch": 1.76, "learning_rate": 4.9118930043534644e-05, "loss": 2.4693, "step": 609000 }, { "epoch": 1.76, "learning_rate": 4.911820639588737e-05, "loss": 2.4976, "step": 609500 }, { "epoch": 1.77, "learning_rate": 4.911748274824009e-05, "loss": 2.4636, "step": 610000 }, { "epoch": 1.77, "learning_rate": 4.911675910059281e-05, "loss": 2.4877, "step": 610500 }, { "epoch": 1.77, "learning_rate": 4.911603545294553e-05, "loss": 2.4891, "step": 611000 }, { "epoch": 1.77, "learning_rate": 4.9115313252593556e-05, "loss": 2.4957, "step": 611500 }, { "epoch": 1.77, "learning_rate": 4.911458960494628e-05, "loss": 2.5055, "step": 612000 }, { "epoch": 1.77, "learning_rate": 4.911386595729901e-05, "loss": 2.5022, "step": 612500 }, { "epoch": 1.77, "learning_rate": 4.911314230965173e-05, "loss": 2.4679, "step": 613000 }, { "epoch": 1.78, "learning_rate": 4.911241866200445e-05, "loss": 2.4918, "step": 613500 }, { "epoch": 1.78, "learning_rate": 4.911169646165247e-05, "loss": 2.483, "step": 614000 }, { "epoch": 1.78, "learning_rate": 4.911097281400519e-05, "loss": 2.4976, "step": 614500 }, { "epoch": 1.78, "learning_rate": 4.911024916635791e-05, "loss": 2.4811, "step": 615000 }, { "epoch": 1.78, "learning_rate": 4.9109526966005934e-05, "loss": 2.468, "step": 615500 }, { "epoch": 1.78, "learning_rate": 4.9108803318358656e-05, "loss": 2.481, "step": 616000 }, { "epoch": 1.78, "learning_rate": 4.910807967071138e-05, "loss": 2.4925, "step": 616500 }, { "epoch": 1.79, "learning_rate": 4.91073560230641e-05, "loss": 2.4752, "step": 617000 }, { "epoch": 1.79, "learning_rate": 4.910663237541682e-05, "loss": 2.4989, "step": 617500 }, { "epoch": 1.79, "learning_rate": 4.9105908727769545e-05, "loss": 2.4888, "step": 618000 }, { "epoch": 1.79, "learning_rate": 4.910518508012227e-05, "loss": 2.4675, "step": 618500 }, { "epoch": 1.79, "learning_rate": 4.910446143247499e-05, "loss": 2.4727, "step": 619000 }, { "epoch": 1.79, "learning_rate": 4.910373778482771e-05, "loss": 2.4667, "step": 619500 }, { "epoch": 1.79, "learning_rate": 4.910301413718044e-05, "loss": 2.4961, "step": 620000 }, { "epoch": 1.8, "learning_rate": 4.910229048953316e-05, "loss": 2.4695, "step": 620500 }, { "epoch": 1.8, "learning_rate": 4.9101566841885885e-05, "loss": 2.4772, "step": 621000 }, { "epoch": 1.8, "learning_rate": 4.910084464153391e-05, "loss": 2.476, "step": 621500 }, { "epoch": 1.8, "learning_rate": 4.910012099388663e-05, "loss": 2.492, "step": 622000 }, { "epoch": 1.8, "learning_rate": 4.909939734623935e-05, "loss": 2.4669, "step": 622500 }, { "epoch": 1.8, "learning_rate": 4.9098673698592074e-05, "loss": 2.4866, "step": 623000 }, { "epoch": 1.8, "learning_rate": 4.9097950050944796e-05, "loss": 2.4787, "step": 623500 }, { "epoch": 1.81, "learning_rate": 4.909722785059281e-05, "loss": 2.4945, "step": 624000 }, { "epoch": 1.81, "learning_rate": 4.9096504202945534e-05, "loss": 2.4766, "step": 624500 }, { "epoch": 1.81, "learning_rate": 4.9095780555298256e-05, "loss": 2.488, "step": 625000 }, { "epoch": 1.81, "learning_rate": 4.9095056907650985e-05, "loss": 2.4855, "step": 625500 }, { "epoch": 1.81, "learning_rate": 4.909433326000371e-05, "loss": 2.4832, "step": 626000 }, { "epoch": 1.81, "learning_rate": 4.909360961235643e-05, "loss": 2.4698, "step": 626500 }, { "epoch": 1.81, "learning_rate": 4.909288885929974e-05, "loss": 2.4649, "step": 627000 }, { "epoch": 1.82, "learning_rate": 4.909216521165247e-05, "loss": 2.4727, "step": 627500 }, { "epoch": 1.82, "learning_rate": 4.909144156400519e-05, "loss": 2.5006, "step": 628000 }, { "epoch": 1.82, "learning_rate": 4.909071791635791e-05, "loss": 2.4958, "step": 628500 }, { "epoch": 1.82, "learning_rate": 4.9089994268710634e-05, "loss": 2.4839, "step": 629000 }, { "epoch": 1.82, "learning_rate": 4.9089270621063363e-05, "loss": 2.4677, "step": 629500 }, { "epoch": 1.82, "learning_rate": 4.9088546973416086e-05, "loss": 2.5128, "step": 630000 }, { "epoch": 1.83, "learning_rate": 4.908782332576881e-05, "loss": 2.4619, "step": 630500 }, { "epoch": 1.83, "learning_rate": 4.908709967812153e-05, "loss": 2.4832, "step": 631000 }, { "epoch": 1.83, "learning_rate": 4.908637603047425e-05, "loss": 2.4842, "step": 631500 }, { "epoch": 1.83, "learning_rate": 4.9085652382826975e-05, "loss": 2.4805, "step": 632000 }, { "epoch": 1.83, "learning_rate": 4.90849287351797e-05, "loss": 2.4968, "step": 632500 }, { "epoch": 1.83, "learning_rate": 4.908420653482771e-05, "loss": 2.474, "step": 633000 }, { "epoch": 1.83, "learning_rate": 4.9083482887180435e-05, "loss": 2.4662, "step": 633500 }, { "epoch": 1.84, "learning_rate": 4.9082759239533164e-05, "loss": 2.4548, "step": 634000 }, { "epoch": 1.84, "learning_rate": 4.9082035591885886e-05, "loss": 2.4702, "step": 634500 }, { "epoch": 1.84, "learning_rate": 4.9081311944238615e-05, "loss": 2.4821, "step": 635000 }, { "epoch": 1.84, "learning_rate": 4.908058829659134e-05, "loss": 2.4687, "step": 635500 }, { "epoch": 1.84, "learning_rate": 4.907986609623935e-05, "loss": 2.4818, "step": 636000 }, { "epoch": 1.84, "learning_rate": 4.9079142448592075e-05, "loss": 2.4792, "step": 636500 }, { "epoch": 1.84, "learning_rate": 4.90784188009448e-05, "loss": 2.4861, "step": 637000 }, { "epoch": 1.85, "learning_rate": 4.907769515329752e-05, "loss": 2.4768, "step": 637500 }, { "epoch": 1.85, "learning_rate": 4.907697150565024e-05, "loss": 2.4473, "step": 638000 }, { "epoch": 1.85, "learning_rate": 4.9076247858002964e-05, "loss": 2.4703, "step": 638500 }, { "epoch": 1.85, "learning_rate": 4.9075524210355686e-05, "loss": 2.4754, "step": 639000 }, { "epoch": 1.85, "learning_rate": 4.9074800562708415e-05, "loss": 2.4752, "step": 639500 }, { "epoch": 1.85, "learning_rate": 4.9074079809651724e-05, "loss": 2.4809, "step": 640000 }, { "epoch": 1.85, "learning_rate": 4.9073356162004446e-05, "loss": 2.4762, "step": 640500 }, { "epoch": 1.86, "learning_rate": 4.907263251435717e-05, "loss": 2.4845, "step": 641000 }, { "epoch": 1.86, "learning_rate": 4.907190886670989e-05, "loss": 2.4835, "step": 641500 }, { "epoch": 1.86, "learning_rate": 4.907118666635791e-05, "loss": 2.4753, "step": 642000 }, { "epoch": 1.86, "learning_rate": 4.9070463018710635e-05, "loss": 2.5147, "step": 642500 }, { "epoch": 1.86, "learning_rate": 4.9069739371063364e-05, "loss": 2.4759, "step": 643000 }, { "epoch": 1.86, "learning_rate": 4.9069015723416086e-05, "loss": 2.4877, "step": 643500 }, { "epoch": 1.86, "learning_rate": 4.906829207576881e-05, "loss": 2.4614, "step": 644000 }, { "epoch": 1.87, "learning_rate": 4.906756842812153e-05, "loss": 2.4912, "step": 644500 }, { "epoch": 1.87, "learning_rate": 4.906684478047425e-05, "loss": 2.4848, "step": 645000 }, { "epoch": 1.87, "learning_rate": 4.9066121132826975e-05, "loss": 2.4814, "step": 645500 }, { "epoch": 1.87, "learning_rate": 4.90653974851797e-05, "loss": 2.4908, "step": 646000 }, { "epoch": 1.87, "learning_rate": 4.906467528482771e-05, "loss": 2.4888, "step": 646500 }, { "epoch": 1.87, "learning_rate": 4.9063953084475736e-05, "loss": 2.4565, "step": 647000 }, { "epoch": 1.87, "learning_rate": 4.906322943682846e-05, "loss": 2.4566, "step": 647500 }, { "epoch": 1.88, "learning_rate": 4.906250578918118e-05, "loss": 2.4845, "step": 648000 }, { "epoch": 1.88, "learning_rate": 4.90617821415339e-05, "loss": 2.4821, "step": 648500 }, { "epoch": 1.88, "learning_rate": 4.9061058493886624e-05, "loss": 2.4841, "step": 649000 }, { "epoch": 1.88, "learning_rate": 4.906033484623935e-05, "loss": 2.4614, "step": 649500 }, { "epoch": 1.88, "learning_rate": 4.9059611198592076e-05, "loss": 2.5012, "step": 650000 }, { "epoch": 1.88, "learning_rate": 4.90588875509448e-05, "loss": 2.4752, "step": 650500 }, { "epoch": 1.88, "learning_rate": 4.905816390329752e-05, "loss": 2.4913, "step": 651000 }, { "epoch": 1.89, "learning_rate": 4.9057441702945536e-05, "loss": 2.4803, "step": 651500 }, { "epoch": 1.89, "learning_rate": 4.905671950259356e-05, "loss": 2.4744, "step": 652000 }, { "epoch": 1.89, "learning_rate": 4.905599585494628e-05, "loss": 2.4641, "step": 652500 }, { "epoch": 1.89, "learning_rate": 4.9055272207299e-05, "loss": 2.4697, "step": 653000 }, { "epoch": 1.89, "learning_rate": 4.9054548559651725e-05, "loss": 2.4737, "step": 653500 }, { "epoch": 1.89, "learning_rate": 4.905382491200445e-05, "loss": 2.4853, "step": 654000 }, { "epoch": 1.89, "learning_rate": 4.905310126435717e-05, "loss": 2.4855, "step": 654500 }, { "epoch": 1.9, "learning_rate": 4.905237761670989e-05, "loss": 2.4808, "step": 655000 }, { "epoch": 1.9, "learning_rate": 4.9051655416357914e-05, "loss": 2.456, "step": 655500 }, { "epoch": 1.9, "learning_rate": 4.9050931768710636e-05, "loss": 2.4938, "step": 656000 }, { "epoch": 1.9, "learning_rate": 4.905020812106336e-05, "loss": 2.4884, "step": 656500 }, { "epoch": 1.9, "learning_rate": 4.904948447341608e-05, "loss": 2.5071, "step": 657000 }, { "epoch": 1.9, "learning_rate": 4.904876082576881e-05, "loss": 2.4802, "step": 657500 }, { "epoch": 1.9, "learning_rate": 4.904803717812153e-05, "loss": 2.4809, "step": 658000 }, { "epoch": 1.91, "learning_rate": 4.9047313530474254e-05, "loss": 2.4812, "step": 658500 }, { "epoch": 1.91, "learning_rate": 4.9046589882826976e-05, "loss": 2.4638, "step": 659000 }, { "epoch": 1.91, "learning_rate": 4.90458662351797e-05, "loss": 2.4786, "step": 659500 }, { "epoch": 1.91, "learning_rate": 4.9045144034827714e-05, "loss": 2.4768, "step": 660000 }, { "epoch": 1.91, "learning_rate": 4.904442038718044e-05, "loss": 2.4629, "step": 660500 }, { "epoch": 1.91, "learning_rate": 4.9043696739533165e-05, "loss": 2.484, "step": 661000 }, { "epoch": 1.91, "learning_rate": 4.904297309188589e-05, "loss": 2.4887, "step": 661500 }, { "epoch": 1.92, "learning_rate": 4.90422508915339e-05, "loss": 2.4757, "step": 662000 }, { "epoch": 1.92, "learning_rate": 4.9041527243886625e-05, "loss": 2.4783, "step": 662500 }, { "epoch": 1.92, "learning_rate": 4.904080359623935e-05, "loss": 2.462, "step": 663000 }, { "epoch": 1.92, "learning_rate": 4.904007994859207e-05, "loss": 2.4724, "step": 663500 }, { "epoch": 1.92, "learning_rate": 4.903935630094479e-05, "loss": 2.4859, "step": 664000 }, { "epoch": 1.92, "learning_rate": 4.903863265329752e-05, "loss": 2.4667, "step": 664500 }, { "epoch": 1.92, "learning_rate": 4.903790900565024e-05, "loss": 2.4517, "step": 665000 }, { "epoch": 1.93, "learning_rate": 4.9037185358002965e-05, "loss": 2.5009, "step": 665500 }, { "epoch": 1.93, "learning_rate": 4.903646315765099e-05, "loss": 2.4788, "step": 666000 }, { "epoch": 1.93, "learning_rate": 4.903573951000371e-05, "loss": 2.4601, "step": 666500 }, { "epoch": 1.93, "learning_rate": 4.903501586235643e-05, "loss": 2.4836, "step": 667000 }, { "epoch": 1.93, "learning_rate": 4.9034292214709154e-05, "loss": 2.4827, "step": 667500 }, { "epoch": 1.93, "learning_rate": 4.903356856706188e-05, "loss": 2.4738, "step": 668000 }, { "epoch": 1.94, "learning_rate": 4.90328449194146e-05, "loss": 2.4936, "step": 668500 }, { "epoch": 1.94, "learning_rate": 4.903212127176732e-05, "loss": 2.4621, "step": 669000 }, { "epoch": 1.94, "learning_rate": 4.9031397624120043e-05, "loss": 2.494, "step": 669500 }, { "epoch": 1.94, "learning_rate": 4.9030675423768066e-05, "loss": 2.4628, "step": 670000 }, { "epoch": 1.94, "learning_rate": 4.902995322341608e-05, "loss": 2.4776, "step": 670500 }, { "epoch": 1.94, "learning_rate": 4.9029229575768804e-05, "loss": 2.4852, "step": 671000 }, { "epoch": 1.94, "learning_rate": 4.9028505928121526e-05, "loss": 2.4855, "step": 671500 }, { "epoch": 1.95, "learning_rate": 4.9027782280474255e-05, "loss": 2.4762, "step": 672000 }, { "epoch": 1.95, "learning_rate": 4.902705863282698e-05, "loss": 2.4773, "step": 672500 }, { "epoch": 1.95, "learning_rate": 4.90263349851797e-05, "loss": 2.4942, "step": 673000 }, { "epoch": 1.95, "learning_rate": 4.902561133753242e-05, "loss": 2.4752, "step": 673500 }, { "epoch": 1.95, "learning_rate": 4.9024889137180444e-05, "loss": 2.4601, "step": 674000 }, { "epoch": 1.95, "learning_rate": 4.9024165489533166e-05, "loss": 2.4394, "step": 674500 }, { "epoch": 1.95, "learning_rate": 4.902344184188589e-05, "loss": 2.4907, "step": 675000 }, { "epoch": 1.96, "learning_rate": 4.902271819423861e-05, "loss": 2.4857, "step": 675500 }, { "epoch": 1.96, "learning_rate": 4.902199454659133e-05, "loss": 2.4761, "step": 676000 }, { "epoch": 1.96, "learning_rate": 4.9021270898944055e-05, "loss": 2.4873, "step": 676500 }, { "epoch": 1.96, "learning_rate": 4.902054725129678e-05, "loss": 2.4789, "step": 677000 }, { "epoch": 1.96, "learning_rate": 4.901982505094479e-05, "loss": 2.4626, "step": 677500 }, { "epoch": 1.96, "learning_rate": 4.9019102850592815e-05, "loss": 2.4559, "step": 678000 }, { "epoch": 1.96, "learning_rate": 4.901837920294554e-05, "loss": 2.5011, "step": 678500 }, { "epoch": 1.97, "learning_rate": 4.901765555529826e-05, "loss": 2.4815, "step": 679000 }, { "epoch": 1.97, "learning_rate": 4.901693190765098e-05, "loss": 2.4702, "step": 679500 }, { "epoch": 1.97, "learning_rate": 4.9016209707299004e-05, "loss": 2.484, "step": 680000 }, { "epoch": 1.97, "learning_rate": 4.9015486059651726e-05, "loss": 2.4513, "step": 680500 }, { "epoch": 1.97, "learning_rate": 4.901476241200445e-05, "loss": 2.4704, "step": 681000 }, { "epoch": 1.97, "learning_rate": 4.901403876435717e-05, "loss": 2.4706, "step": 681500 }, { "epoch": 1.97, "learning_rate": 4.901331511670989e-05, "loss": 2.4342, "step": 682000 }, { "epoch": 1.98, "learning_rate": 4.9012591469062615e-05, "loss": 2.4872, "step": 682500 }, { "epoch": 1.98, "learning_rate": 4.9011867821415344e-05, "loss": 2.4518, "step": 683000 }, { "epoch": 1.98, "learning_rate": 4.9011144173768067e-05, "loss": 2.4546, "step": 683500 }, { "epoch": 1.98, "learning_rate": 4.901042052612079e-05, "loss": 2.4902, "step": 684000 }, { "epoch": 1.98, "learning_rate": 4.9009698325768804e-05, "loss": 2.4703, "step": 684500 }, { "epoch": 1.98, "learning_rate": 4.9008974678121527e-05, "loss": 2.4694, "step": 685000 }, { "epoch": 1.98, "learning_rate": 4.900825103047425e-05, "loss": 2.4974, "step": 685500 }, { "epoch": 1.99, "learning_rate": 4.900752738282697e-05, "loss": 2.4718, "step": 686000 }, { "epoch": 1.99, "learning_rate": 4.900680373517969e-05, "loss": 2.4765, "step": 686500 }, { "epoch": 1.99, "learning_rate": 4.900608008753242e-05, "loss": 2.4749, "step": 687000 }, { "epoch": 1.99, "learning_rate": 4.9005356439885145e-05, "loss": 2.4696, "step": 687500 }, { "epoch": 1.99, "learning_rate": 4.900463279223787e-05, "loss": 2.4695, "step": 688000 }, { "epoch": 1.99, "learning_rate": 4.900391059188589e-05, "loss": 2.4779, "step": 688500 }, { "epoch": 1.99, "learning_rate": 4.900318694423861e-05, "loss": 2.4661, "step": 689000 }, { "epoch": 2.0, "learning_rate": 4.900246474388663e-05, "loss": 2.4856, "step": 689500 }, { "epoch": 2.0, "learning_rate": 4.900174109623935e-05, "loss": 2.4432, "step": 690000 }, { "epoch": 2.0, "learning_rate": 4.900101744859207e-05, "loss": 2.4816, "step": 690500 }, { "epoch": 2.0, "eval_accuracy": 0.631185162487384, "eval_accuracy_mlm": 0.5930999275902505, "eval_accuracy_nsp": 0.8354805149569418, "eval_loss": 2.435854196548462, "eval_runtime": 330.8554, "eval_samples_per_second": 1318.963, "eval_steps_per_second": 54.958, "step": 690944 }, { "epoch": 2.0, "learning_rate": 4.9000293800944794e-05, "loss": 2.4789, "step": 691000 }, { "epoch": 2.0, "learning_rate": 4.899957015329752e-05, "loss": 2.4598, "step": 691500 }, { "epoch": 2.0, "learning_rate": 4.8998846505650245e-05, "loss": 2.454, "step": 692000 }, { "epoch": 2.0, "learning_rate": 4.899812430529826e-05, "loss": 2.4464, "step": 692500 }, { "epoch": 2.01, "learning_rate": 4.899740065765098e-05, "loss": 2.4518, "step": 693000 }, { "epoch": 2.01, "learning_rate": 4.8996677010003705e-05, "loss": 2.4586, "step": 693500 }, { "epoch": 2.01, "learning_rate": 4.899595336235643e-05, "loss": 2.461, "step": 694000 }, { "epoch": 2.01, "learning_rate": 4.8995229714709156e-05, "loss": 2.4471, "step": 694500 }, { "epoch": 2.01, "learning_rate": 4.899450606706188e-05, "loss": 2.4592, "step": 695000 }, { "epoch": 2.01, "learning_rate": 4.89937824194146e-05, "loss": 2.4399, "step": 695500 }, { "epoch": 2.01, "learning_rate": 4.899305877176732e-05, "loss": 2.4515, "step": 696000 }, { "epoch": 2.02, "learning_rate": 4.8992336571415345e-05, "loss": 2.4645, "step": 696500 }, { "epoch": 2.02, "learning_rate": 4.899161292376807e-05, "loss": 2.4637, "step": 697000 }, { "epoch": 2.02, "learning_rate": 4.899088927612079e-05, "loss": 2.4408, "step": 697500 }, { "epoch": 2.02, "learning_rate": 4.899016562847351e-05, "loss": 2.4525, "step": 698000 }, { "epoch": 2.02, "learning_rate": 4.8989441980826234e-05, "loss": 2.4656, "step": 698500 }, { "epoch": 2.02, "learning_rate": 4.8988718333178956e-05, "loss": 2.4563, "step": 699000 }, { "epoch": 2.02, "learning_rate": 4.898799468553168e-05, "loss": 2.4592, "step": 699500 }, { "epoch": 2.03, "learning_rate": 4.89872710378844e-05, "loss": 2.4711, "step": 700000 }, { "epoch": 2.03, "learning_rate": 4.898654739023712e-05, "loss": 2.4476, "step": 700500 }, { "epoch": 2.03, "learning_rate": 4.8985823742589845e-05, "loss": 2.4665, "step": 701000 }, { "epoch": 2.03, "learning_rate": 4.898510154223787e-05, "loss": 2.4621, "step": 701500 }, { "epoch": 2.03, "learning_rate": 4.8984377894590597e-05, "loss": 2.4613, "step": 702000 }, { "epoch": 2.03, "learning_rate": 4.898365424694332e-05, "loss": 2.4539, "step": 702500 }, { "epoch": 2.03, "learning_rate": 4.898293059929604e-05, "loss": 2.4689, "step": 703000 }, { "epoch": 2.04, "learning_rate": 4.898220695164876e-05, "loss": 2.4776, "step": 703500 }, { "epoch": 2.04, "learning_rate": 4.8981483304001485e-05, "loss": 2.4484, "step": 704000 }, { "epoch": 2.04, "learning_rate": 4.89807611036495e-05, "loss": 2.4657, "step": 704500 }, { "epoch": 2.04, "learning_rate": 4.898003745600222e-05, "loss": 2.4293, "step": 705000 }, { "epoch": 2.04, "learning_rate": 4.8979313808354945e-05, "loss": 2.4566, "step": 705500 }, { "epoch": 2.04, "learning_rate": 4.8978590160707675e-05, "loss": 2.4353, "step": 706000 }, { "epoch": 2.05, "learning_rate": 4.897786940765098e-05, "loss": 2.4369, "step": 706500 }, { "epoch": 2.05, "learning_rate": 4.8977145760003706e-05, "loss": 2.4478, "step": 707000 }, { "epoch": 2.05, "learning_rate": 4.897642211235643e-05, "loss": 2.4426, "step": 707500 }, { "epoch": 2.05, "learning_rate": 4.897569846470915e-05, "loss": 2.4564, "step": 708000 }, { "epoch": 2.05, "learning_rate": 4.897497481706187e-05, "loss": 2.464, "step": 708500 }, { "epoch": 2.05, "learning_rate": 4.8974251169414595e-05, "loss": 2.4649, "step": 709000 }, { "epoch": 2.05, "learning_rate": 4.8973527521767324e-05, "loss": 2.4279, "step": 709500 }, { "epoch": 2.06, "learning_rate": 4.8972803874120046e-05, "loss": 2.4731, "step": 710000 }, { "epoch": 2.06, "learning_rate": 4.8972080226472775e-05, "loss": 2.4758, "step": 710500 }, { "epoch": 2.06, "learning_rate": 4.89713565788255e-05, "loss": 2.4639, "step": 711000 }, { "epoch": 2.06, "learning_rate": 4.897063293117822e-05, "loss": 2.4463, "step": 711500 }, { "epoch": 2.06, "learning_rate": 4.896990928353094e-05, "loss": 2.4581, "step": 712000 }, { "epoch": 2.06, "learning_rate": 4.896918708317896e-05, "loss": 2.4639, "step": 712500 }, { "epoch": 2.06, "learning_rate": 4.896846343553168e-05, "loss": 2.4564, "step": 713000 }, { "epoch": 2.07, "learning_rate": 4.89677397878844e-05, "loss": 2.4583, "step": 713500 }, { "epoch": 2.07, "learning_rate": 4.8967016140237124e-05, "loss": 2.4617, "step": 714000 }, { "epoch": 2.07, "learning_rate": 4.8966292492589846e-05, "loss": 2.4357, "step": 714500 }, { "epoch": 2.07, "learning_rate": 4.8965568844942575e-05, "loss": 2.4506, "step": 715000 }, { "epoch": 2.07, "learning_rate": 4.89648451972953e-05, "loss": 2.4512, "step": 715500 }, { "epoch": 2.07, "learning_rate": 4.896412299694331e-05, "loss": 2.4475, "step": 716000 }, { "epoch": 2.07, "learning_rate": 4.8963399349296035e-05, "loss": 2.4574, "step": 716500 }, { "epoch": 2.08, "learning_rate": 4.8962675701648764e-05, "loss": 2.4479, "step": 717000 }, { "epoch": 2.08, "learning_rate": 4.8961952054001486e-05, "loss": 2.4697, "step": 717500 }, { "epoch": 2.08, "learning_rate": 4.89612298536495e-05, "loss": 2.4785, "step": 718000 }, { "epoch": 2.08, "learning_rate": 4.8960506206002224e-05, "loss": 2.4566, "step": 718500 }, { "epoch": 2.08, "learning_rate": 4.8959782558354946e-05, "loss": 2.4424, "step": 719000 }, { "epoch": 2.08, "learning_rate": 4.8959058910707675e-05, "loss": 2.4512, "step": 719500 }, { "epoch": 2.08, "learning_rate": 4.89583352630604e-05, "loss": 2.4534, "step": 720000 }, { "epoch": 2.09, "learning_rate": 4.895761161541312e-05, "loss": 2.4666, "step": 720500 }, { "epoch": 2.09, "learning_rate": 4.895688796776584e-05, "loss": 2.4413, "step": 721000 }, { "epoch": 2.09, "learning_rate": 4.8956164320118564e-05, "loss": 2.489, "step": 721500 }, { "epoch": 2.09, "learning_rate": 4.895544211976658e-05, "loss": 2.4536, "step": 722000 }, { "epoch": 2.09, "learning_rate": 4.89547184721193e-05, "loss": 2.4472, "step": 722500 }, { "epoch": 2.09, "learning_rate": 4.8953994824472024e-05, "loss": 2.452, "step": 723000 }, { "epoch": 2.09, "learning_rate": 4.8953271176824746e-05, "loss": 2.4544, "step": 723500 }, { "epoch": 2.1, "learning_rate": 4.8952547529177476e-05, "loss": 2.4361, "step": 724000 }, { "epoch": 2.1, "learning_rate": 4.895182677612079e-05, "loss": 2.4403, "step": 724500 }, { "epoch": 2.1, "learning_rate": 4.8951103128473513e-05, "loss": 2.4623, "step": 725000 }, { "epoch": 2.1, "learning_rate": 4.8950379480826236e-05, "loss": 2.448, "step": 725500 }, { "epoch": 2.1, "learning_rate": 4.894965583317896e-05, "loss": 2.4598, "step": 726000 }, { "epoch": 2.1, "learning_rate": 4.8948933632826973e-05, "loss": 2.4239, "step": 726500 }, { "epoch": 2.1, "learning_rate": 4.89482099851797e-05, "loss": 2.4484, "step": 727000 }, { "epoch": 2.11, "learning_rate": 4.8947486337532425e-05, "loss": 2.463, "step": 727500 }, { "epoch": 2.11, "learning_rate": 4.894676268988515e-05, "loss": 2.4606, "step": 728000 }, { "epoch": 2.11, "learning_rate": 4.894603904223787e-05, "loss": 2.4716, "step": 728500 }, { "epoch": 2.11, "learning_rate": 4.8945316841885885e-05, "loss": 2.4688, "step": 729000 }, { "epoch": 2.11, "learning_rate": 4.894459319423861e-05, "loss": 2.471, "step": 729500 }, { "epoch": 2.11, "learning_rate": 4.894387099388662e-05, "loss": 2.4771, "step": 730000 }, { "epoch": 2.11, "learning_rate": 4.894314734623935e-05, "loss": 2.4761, "step": 730500 }, { "epoch": 2.12, "learning_rate": 4.894242514588737e-05, "loss": 2.4848, "step": 731000 }, { "epoch": 2.12, "learning_rate": 4.894170149824009e-05, "loss": 2.5034, "step": 731500 }, { "epoch": 2.12, "learning_rate": 4.894097785059281e-05, "loss": 2.4692, "step": 732000 }, { "epoch": 2.12, "learning_rate": 4.894025420294554e-05, "loss": 2.4524, "step": 732500 }, { "epoch": 2.12, "learning_rate": 4.893953055529826e-05, "loss": 2.4573, "step": 733000 }, { "epoch": 2.12, "learning_rate": 4.8938806907650985e-05, "loss": 2.4928, "step": 733500 }, { "epoch": 2.12, "learning_rate": 4.893808326000371e-05, "loss": 2.4969, "step": 734000 }, { "epoch": 2.13, "learning_rate": 4.893735961235643e-05, "loss": 2.4951, "step": 734500 }, { "epoch": 2.13, "learning_rate": 4.893663596470915e-05, "loss": 2.4963, "step": 735000 }, { "epoch": 2.13, "learning_rate": 4.8935912317061874e-05, "loss": 2.4626, "step": 735500 }, { "epoch": 2.13, "learning_rate": 4.89351886694146e-05, "loss": 2.4454, "step": 736000 }, { "epoch": 2.13, "learning_rate": 4.8934465021767325e-05, "loss": 2.4879, "step": 736500 }, { "epoch": 2.13, "learning_rate": 4.893374137412005e-05, "loss": 2.4734, "step": 737000 }, { "epoch": 2.13, "learning_rate": 4.893301772647277e-05, "loss": 2.4603, "step": 737500 }, { "epoch": 2.14, "learning_rate": 4.893229407882549e-05, "loss": 2.4828, "step": 738000 }, { "epoch": 2.14, "learning_rate": 4.8931570431178214e-05, "loss": 2.4814, "step": 738500 }, { "epoch": 2.14, "learning_rate": 4.893084823082623e-05, "loss": 2.4689, "step": 739000 }, { "epoch": 2.14, "learning_rate": 4.893012458317896e-05, "loss": 2.4681, "step": 739500 }, { "epoch": 2.14, "learning_rate": 4.892940093553168e-05, "loss": 2.4555, "step": 740000 }, { "epoch": 2.14, "learning_rate": 4.89286772878844e-05, "loss": 2.4646, "step": 740500 }, { "epoch": 2.14, "learning_rate": 4.8927953640237125e-05, "loss": 2.4376, "step": 741000 }, { "epoch": 2.15, "learning_rate": 4.892723143988515e-05, "loss": 2.4509, "step": 741500 }, { "epoch": 2.15, "learning_rate": 4.892650923953316e-05, "loss": 2.4518, "step": 742000 }, { "epoch": 2.15, "learning_rate": 4.8925785591885885e-05, "loss": 2.4623, "step": 742500 }, { "epoch": 2.15, "learning_rate": 4.892506194423861e-05, "loss": 2.4877, "step": 743000 }, { "epoch": 2.15, "learning_rate": 4.892433829659133e-05, "loss": 2.4892, "step": 743500 }, { "epoch": 2.15, "learning_rate": 4.892361464894405e-05, "loss": 2.5121, "step": 744000 }, { "epoch": 2.16, "learning_rate": 4.892289100129678e-05, "loss": 2.4631, "step": 744500 }, { "epoch": 2.16, "learning_rate": 4.89221688009448e-05, "loss": 2.4688, "step": 745000 }, { "epoch": 2.16, "learning_rate": 4.892144515329752e-05, "loss": 2.4775, "step": 745500 }, { "epoch": 2.16, "learning_rate": 4.892072150565024e-05, "loss": 2.475, "step": 746000 }, { "epoch": 2.16, "learning_rate": 4.8919997858002963e-05, "loss": 2.4943, "step": 746500 }, { "epoch": 2.16, "learning_rate": 4.891927421035569e-05, "loss": 2.4642, "step": 747000 }, { "epoch": 2.16, "learning_rate": 4.8918550562708415e-05, "loss": 2.4447, "step": 747500 }, { "epoch": 2.17, "learning_rate": 4.891782691506114e-05, "loss": 2.4554, "step": 748000 }, { "epoch": 2.17, "learning_rate": 4.891710326741386e-05, "loss": 2.4652, "step": 748500 }, { "epoch": 2.17, "learning_rate": 4.891637961976658e-05, "loss": 2.4624, "step": 749000 }, { "epoch": 2.17, "learning_rate": 4.8915657419414604e-05, "loss": 2.4517, "step": 749500 }, { "epoch": 2.17, "learning_rate": 4.8914933771767326e-05, "loss": 2.4722, "step": 750000 }, { "epoch": 2.17, "learning_rate": 4.891421012412005e-05, "loss": 2.4652, "step": 750500 }, { "epoch": 2.17, "learning_rate": 4.891348647647277e-05, "loss": 2.4866, "step": 751000 }, { "epoch": 2.18, "learning_rate": 4.891276282882549e-05, "loss": 2.5011, "step": 751500 }, { "epoch": 2.18, "learning_rate": 4.8912039181178215e-05, "loss": 2.4952, "step": 752000 }, { "epoch": 2.18, "learning_rate": 4.891131553353094e-05, "loss": 2.4565, "step": 752500 }, { "epoch": 2.18, "learning_rate": 4.891059188588366e-05, "loss": 2.4535, "step": 753000 }, { "epoch": 2.18, "learning_rate": 4.890986968553168e-05, "loss": 2.4375, "step": 753500 }, { "epoch": 2.18, "learning_rate": 4.8909146037884404e-05, "loss": 2.4754, "step": 754000 }, { "epoch": 2.18, "learning_rate": 4.890842239023713e-05, "loss": 2.4667, "step": 754500 }, { "epoch": 2.19, "learning_rate": 4.8907698742589855e-05, "loss": 2.4556, "step": 755000 }, { "epoch": 2.19, "learning_rate": 4.890697509494258e-05, "loss": 2.4461, "step": 755500 }, { "epoch": 2.19, "learning_rate": 4.890625289459059e-05, "loss": 2.4475, "step": 756000 }, { "epoch": 2.19, "learning_rate": 4.8905529246943315e-05, "loss": 2.4501, "step": 756500 }, { "epoch": 2.19, "learning_rate": 4.890480559929604e-05, "loss": 2.4606, "step": 757000 }, { "epoch": 2.19, "learning_rate": 4.890408195164876e-05, "loss": 2.4686, "step": 757500 }, { "epoch": 2.19, "learning_rate": 4.890335830400148e-05, "loss": 2.4598, "step": 758000 }, { "epoch": 2.2, "learning_rate": 4.8902636103649504e-05, "loss": 2.4641, "step": 758500 }, { "epoch": 2.2, "learning_rate": 4.8901912456002226e-05, "loss": 2.4744, "step": 759000 }, { "epoch": 2.2, "learning_rate": 4.890118880835495e-05, "loss": 2.4543, "step": 759500 }, { "epoch": 2.2, "learning_rate": 4.890046516070767e-05, "loss": 2.4908, "step": 760000 }, { "epoch": 2.2, "learning_rate": 4.889974151306039e-05, "loss": 2.4717, "step": 760500 }, { "epoch": 2.2, "learning_rate": 4.889901931270841e-05, "loss": 2.4567, "step": 761000 }, { "epoch": 2.2, "learning_rate": 4.8898298559651724e-05, "loss": 2.4823, "step": 761500 }, { "epoch": 2.21, "learning_rate": 4.8897574912004447e-05, "loss": 2.4703, "step": 762000 }, { "epoch": 2.21, "learning_rate": 4.8896851264357176e-05, "loss": 2.4668, "step": 762500 }, { "epoch": 2.21, "learning_rate": 4.88961276167099e-05, "loss": 2.4704, "step": 763000 }, { "epoch": 2.21, "learning_rate": 4.889540396906262e-05, "loss": 2.4578, "step": 763500 }, { "epoch": 2.21, "learning_rate": 4.889468032141534e-05, "loss": 2.4705, "step": 764000 }, { "epoch": 2.21, "learning_rate": 4.8893956673768065e-05, "loss": 2.4727, "step": 764500 }, { "epoch": 2.21, "learning_rate": 4.889323302612079e-05, "loss": 2.4628, "step": 765000 }, { "epoch": 2.22, "learning_rate": 4.889250937847351e-05, "loss": 2.4639, "step": 765500 }, { "epoch": 2.22, "learning_rate": 4.889179007271212e-05, "loss": 2.4698, "step": 766000 }, { "epoch": 2.22, "learning_rate": 4.889106642506484e-05, "loss": 2.4549, "step": 766500 }, { "epoch": 2.22, "learning_rate": 4.889034277741756e-05, "loss": 2.4666, "step": 767000 }, { "epoch": 2.22, "learning_rate": 4.8889619129770285e-05, "loss": 2.4899, "step": 767500 }, { "epoch": 2.22, "learning_rate": 4.888889548212301e-05, "loss": 2.4596, "step": 768000 }, { "epoch": 2.22, "learning_rate": 4.888817328177103e-05, "loss": 2.4762, "step": 768500 }, { "epoch": 2.23, "learning_rate": 4.888744963412375e-05, "loss": 2.496, "step": 769000 }, { "epoch": 2.23, "learning_rate": 4.8886725986476474e-05, "loss": 2.4773, "step": 769500 }, { "epoch": 2.23, "learning_rate": 4.88860023388292e-05, "loss": 2.4481, "step": 770000 }, { "epoch": 2.23, "learning_rate": 4.8885278691181925e-05, "loss": 2.4701, "step": 770500 }, { "epoch": 2.23, "learning_rate": 4.888455504353465e-05, "loss": 2.454, "step": 771000 }, { "epoch": 2.23, "learning_rate": 4.888383139588737e-05, "loss": 2.4689, "step": 771500 }, { "epoch": 2.23, "learning_rate": 4.888310774824009e-05, "loss": 2.4477, "step": 772000 }, { "epoch": 2.24, "learning_rate": 4.888238554788811e-05, "loss": 2.475, "step": 772500 }, { "epoch": 2.24, "learning_rate": 4.888166190024083e-05, "loss": 2.4597, "step": 773000 }, { "epoch": 2.24, "learning_rate": 4.888093825259356e-05, "loss": 2.4765, "step": 773500 }, { "epoch": 2.24, "learning_rate": 4.888021460494628e-05, "loss": 2.4601, "step": 774000 }, { "epoch": 2.24, "learning_rate": 4.8879490957299e-05, "loss": 2.4864, "step": 774500 }, { "epoch": 2.24, "learning_rate": 4.8878767309651725e-05, "loss": 2.442, "step": 775000 }, { "epoch": 2.24, "learning_rate": 4.887804366200445e-05, "loss": 2.4807, "step": 775500 }, { "epoch": 2.25, "learning_rate": 4.887732001435717e-05, "loss": 2.4628, "step": 776000 }, { "epoch": 2.25, "learning_rate": 4.887659636670989e-05, "loss": 2.4546, "step": 776500 }, { "epoch": 2.25, "learning_rate": 4.887587271906262e-05, "loss": 2.475, "step": 777000 }, { "epoch": 2.25, "learning_rate": 4.887514907141534e-05, "loss": 2.4683, "step": 777500 }, { "epoch": 2.25, "learning_rate": 4.887442687106336e-05, "loss": 2.4653, "step": 778000 }, { "epoch": 2.25, "learning_rate": 4.887370467071138e-05, "loss": 2.4586, "step": 778500 }, { "epoch": 2.25, "learning_rate": 4.88729810230641e-05, "loss": 2.4554, "step": 779000 }, { "epoch": 2.26, "learning_rate": 4.887225882271212e-05, "loss": 2.4355, "step": 779500 }, { "epoch": 2.26, "learning_rate": 4.887153517506484e-05, "loss": 2.458, "step": 780000 }, { "epoch": 2.26, "learning_rate": 4.887081152741756e-05, "loss": 2.4656, "step": 780500 }, { "epoch": 2.26, "learning_rate": 4.8870087879770285e-05, "loss": 2.4528, "step": 781000 }, { "epoch": 2.26, "learning_rate": 4.886936423212301e-05, "loss": 2.4758, "step": 781500 }, { "epoch": 2.26, "learning_rate": 4.886864058447574e-05, "loss": 2.4729, "step": 782000 }, { "epoch": 2.27, "learning_rate": 4.886791693682846e-05, "loss": 2.4626, "step": 782500 }, { "epoch": 2.27, "learning_rate": 4.886719328918118e-05, "loss": 2.4532, "step": 783000 }, { "epoch": 2.27, "learning_rate": 4.8866469641533903e-05, "loss": 2.4532, "step": 783500 }, { "epoch": 2.27, "learning_rate": 4.8865745993886626e-05, "loss": 2.4658, "step": 784000 }, { "epoch": 2.27, "learning_rate": 4.886502234623935e-05, "loss": 2.4934, "step": 784500 }, { "epoch": 2.27, "learning_rate": 4.886429869859208e-05, "loss": 2.4615, "step": 785000 }, { "epoch": 2.27, "learning_rate": 4.88635750509448e-05, "loss": 2.4628, "step": 785500 }, { "epoch": 2.28, "learning_rate": 4.886285140329752e-05, "loss": 2.4608, "step": 786000 }, { "epoch": 2.28, "learning_rate": 4.8862127755650244e-05, "loss": 2.4669, "step": 786500 }, { "epoch": 2.28, "learning_rate": 4.8861404108002966e-05, "loss": 2.4682, "step": 787000 }, { "epoch": 2.28, "learning_rate": 4.886068046035569e-05, "loss": 2.4519, "step": 787500 }, { "epoch": 2.28, "learning_rate": 4.885995681270841e-05, "loss": 2.4596, "step": 788000 }, { "epoch": 2.28, "learning_rate": 4.885923316506113e-05, "loss": 2.4557, "step": 788500 }, { "epoch": 2.28, "learning_rate": 4.885850951741386e-05, "loss": 2.4598, "step": 789000 }, { "epoch": 2.29, "learning_rate": 4.885778731706188e-05, "loss": 2.4646, "step": 789500 }, { "epoch": 2.29, "learning_rate": 4.885706511670989e-05, "loss": 2.4645, "step": 790000 }, { "epoch": 2.29, "learning_rate": 4.8856341469062615e-05, "loss": 2.4555, "step": 790500 }, { "epoch": 2.29, "learning_rate": 4.885561782141534e-05, "loss": 2.4786, "step": 791000 }, { "epoch": 2.29, "learning_rate": 4.885489417376806e-05, "loss": 2.452, "step": 791500 }, { "epoch": 2.29, "learning_rate": 4.885417052612079e-05, "loss": 2.4585, "step": 792000 }, { "epoch": 2.29, "learning_rate": 4.885344687847351e-05, "loss": 2.4583, "step": 792500 }, { "epoch": 2.3, "learning_rate": 4.885272323082623e-05, "loss": 2.4383, "step": 793000 }, { "epoch": 2.3, "learning_rate": 4.885199958317896e-05, "loss": 2.4437, "step": 793500 }, { "epoch": 2.3, "learning_rate": 4.885127738282698e-05, "loss": 2.4578, "step": 794000 }, { "epoch": 2.3, "learning_rate": 4.88505537351797e-05, "loss": 2.4858, "step": 794500 }, { "epoch": 2.3, "learning_rate": 4.884983008753242e-05, "loss": 2.4365, "step": 795000 }, { "epoch": 2.3, "learning_rate": 4.8849106439885144e-05, "loss": 2.4526, "step": 795500 }, { "epoch": 2.3, "learning_rate": 4.8848382792237866e-05, "loss": 2.4697, "step": 796000 }, { "epoch": 2.31, "learning_rate": 4.884766059188589e-05, "loss": 2.4756, "step": 796500 }, { "epoch": 2.31, "learning_rate": 4.884693694423861e-05, "loss": 2.4617, "step": 797000 }, { "epoch": 2.31, "learning_rate": 4.884621329659133e-05, "loss": 2.4737, "step": 797500 }, { "epoch": 2.31, "learning_rate": 4.8845489648944055e-05, "loss": 2.4507, "step": 798000 }, { "epoch": 2.31, "learning_rate": 4.8844768895887364e-05, "loss": 2.485, "step": 798500 }, { "epoch": 2.31, "learning_rate": 4.8844045248240086e-05, "loss": 2.4573, "step": 799000 }, { "epoch": 2.31, "learning_rate": 4.884332160059281e-05, "loss": 2.4446, "step": 799500 }, { "epoch": 2.32, "learning_rate": 4.884259795294554e-05, "loss": 2.4701, "step": 800000 }, { "epoch": 2.32, "learning_rate": 4.884187430529826e-05, "loss": 2.4703, "step": 800500 }, { "epoch": 2.32, "learning_rate": 4.884115065765099e-05, "loss": 2.4766, "step": 801000 }, { "epoch": 2.32, "learning_rate": 4.884042701000371e-05, "loss": 2.451, "step": 801500 }, { "epoch": 2.32, "learning_rate": 4.8839703362356433e-05, "loss": 2.4538, "step": 802000 }, { "epoch": 2.32, "learning_rate": 4.883898116200445e-05, "loss": 2.4579, "step": 802500 }, { "epoch": 2.32, "learning_rate": 4.883825751435717e-05, "loss": 2.4677, "step": 803000 }, { "epoch": 2.33, "learning_rate": 4.8837533866709893e-05, "loss": 2.4384, "step": 803500 }, { "epoch": 2.33, "learning_rate": 4.8836810219062616e-05, "loss": 2.4621, "step": 804000 }, { "epoch": 2.33, "learning_rate": 4.883608657141534e-05, "loss": 2.4513, "step": 804500 }, { "epoch": 2.33, "learning_rate": 4.883536292376806e-05, "loss": 2.4515, "step": 805000 }, { "epoch": 2.33, "learning_rate": 4.883463927612079e-05, "loss": 2.4732, "step": 805500 }, { "epoch": 2.33, "learning_rate": 4.883391562847351e-05, "loss": 2.4462, "step": 806000 }, { "epoch": 2.33, "learning_rate": 4.8833191980826234e-05, "loss": 2.4348, "step": 806500 }, { "epoch": 2.34, "learning_rate": 4.883247122776954e-05, "loss": 2.4618, "step": 807000 }, { "epoch": 2.34, "learning_rate": 4.883174758012227e-05, "loss": 2.4726, "step": 807500 }, { "epoch": 2.34, "learning_rate": 4.8831023932474994e-05, "loss": 2.4495, "step": 808000 }, { "epoch": 2.34, "learning_rate": 4.8830300284827716e-05, "loss": 2.466, "step": 808500 }, { "epoch": 2.34, "learning_rate": 4.882957663718044e-05, "loss": 2.4551, "step": 809000 }, { "epoch": 2.34, "learning_rate": 4.882885298953316e-05, "loss": 2.464, "step": 809500 }, { "epoch": 2.34, "learning_rate": 4.882812934188589e-05, "loss": 2.4649, "step": 810000 }, { "epoch": 2.35, "learning_rate": 4.882740569423861e-05, "loss": 2.4627, "step": 810500 }, { "epoch": 2.35, "learning_rate": 4.8826682046591334e-05, "loss": 2.4657, "step": 811000 }, { "epoch": 2.35, "learning_rate": 4.8825958398944056e-05, "loss": 2.4704, "step": 811500 }, { "epoch": 2.35, "learning_rate": 4.882523475129678e-05, "loss": 2.4645, "step": 812000 }, { "epoch": 2.35, "learning_rate": 4.88245111036495e-05, "loss": 2.436, "step": 812500 }, { "epoch": 2.35, "learning_rate": 4.8823788903297516e-05, "loss": 2.4777, "step": 813000 }, { "epoch": 2.35, "learning_rate": 4.882306525565024e-05, "loss": 2.4749, "step": 813500 }, { "epoch": 2.36, "learning_rate": 4.882234160800296e-05, "loss": 2.449, "step": 814000 }, { "epoch": 2.36, "learning_rate": 4.882161940765098e-05, "loss": 2.4526, "step": 814500 }, { "epoch": 2.36, "learning_rate": 4.882089576000371e-05, "loss": 2.456, "step": 815000 }, { "epoch": 2.36, "learning_rate": 4.8820172112356434e-05, "loss": 2.469, "step": 815500 }, { "epoch": 2.36, "learning_rate": 4.8819448464709156e-05, "loss": 2.4608, "step": 816000 }, { "epoch": 2.36, "learning_rate": 4.881872626435717e-05, "loss": 2.4611, "step": 816500 }, { "epoch": 2.36, "learning_rate": 4.8818002616709894e-05, "loss": 2.4812, "step": 817000 }, { "epoch": 2.37, "learning_rate": 4.8817278969062616e-05, "loss": 2.4608, "step": 817500 }, { "epoch": 2.37, "learning_rate": 4.881655532141534e-05, "loss": 2.4905, "step": 818000 }, { "epoch": 2.37, "learning_rate": 4.881583167376806e-05, "loss": 2.4559, "step": 818500 }, { "epoch": 2.37, "learning_rate": 4.881510947341608e-05, "loss": 2.4667, "step": 819000 }, { "epoch": 2.37, "learning_rate": 4.8814385825768806e-05, "loss": 2.4609, "step": 819500 }, { "epoch": 2.37, "learning_rate": 4.881366217812153e-05, "loss": 2.4696, "step": 820000 }, { "epoch": 2.38, "learning_rate": 4.881293853047425e-05, "loss": 2.4453, "step": 820500 }, { "epoch": 2.38, "learning_rate": 4.881221488282697e-05, "loss": 2.4563, "step": 821000 }, { "epoch": 2.38, "learning_rate": 4.881149268247499e-05, "loss": 2.474, "step": 821500 }, { "epoch": 2.38, "learning_rate": 4.881077048212301e-05, "loss": 2.4627, "step": 822000 }, { "epoch": 2.38, "learning_rate": 4.881004683447574e-05, "loss": 2.4507, "step": 822500 }, { "epoch": 2.38, "learning_rate": 4.880932318682846e-05, "loss": 2.4714, "step": 823000 }, { "epoch": 2.38, "learning_rate": 4.8808599539181184e-05, "loss": 2.4486, "step": 823500 }, { "epoch": 2.39, "learning_rate": 4.8807875891533906e-05, "loss": 2.4582, "step": 824000 }, { "epoch": 2.39, "learning_rate": 4.880715224388663e-05, "loss": 2.4489, "step": 824500 }, { "epoch": 2.39, "learning_rate": 4.880642859623935e-05, "loss": 2.4812, "step": 825000 }, { "epoch": 2.39, "learning_rate": 4.880570494859207e-05, "loss": 2.4713, "step": 825500 }, { "epoch": 2.39, "learning_rate": 4.8804981300944795e-05, "loss": 2.4539, "step": 826000 }, { "epoch": 2.39, "learning_rate": 4.880425765329752e-05, "loss": 2.4657, "step": 826500 }, { "epoch": 2.39, "learning_rate": 4.880353400565024e-05, "loss": 2.4488, "step": 827000 }, { "epoch": 2.4, "learning_rate": 4.880281035800297e-05, "loss": 2.4482, "step": 827500 }, { "epoch": 2.4, "learning_rate": 4.8802088157650984e-05, "loss": 2.4655, "step": 828000 }, { "epoch": 2.4, "learning_rate": 4.8801365957299e-05, "loss": 2.472, "step": 828500 }, { "epoch": 2.4, "learning_rate": 4.880064230965172e-05, "loss": 2.4784, "step": 829000 }, { "epoch": 2.4, "learning_rate": 4.8799918662004444e-05, "loss": 2.4726, "step": 829500 }, { "epoch": 2.4, "learning_rate": 4.8799196461652466e-05, "loss": 2.4602, "step": 830000 }, { "epoch": 2.4, "learning_rate": 4.879847281400519e-05, "loss": 2.4414, "step": 830500 }, { "epoch": 2.41, "learning_rate": 4.879774916635792e-05, "loss": 2.4778, "step": 831000 }, { "epoch": 2.41, "learning_rate": 4.879702551871064e-05, "loss": 2.4769, "step": 831500 }, { "epoch": 2.41, "learning_rate": 4.879630187106336e-05, "loss": 2.4492, "step": 832000 }, { "epoch": 2.41, "learning_rate": 4.8795578223416084e-05, "loss": 2.4726, "step": 832500 }, { "epoch": 2.41, "learning_rate": 4.8794854575768806e-05, "loss": 2.4534, "step": 833000 }, { "epoch": 2.41, "learning_rate": 4.879413092812153e-05, "loss": 2.4727, "step": 833500 }, { "epoch": 2.41, "learning_rate": 4.8793408727769544e-05, "loss": 2.4681, "step": 834000 }, { "epoch": 2.42, "learning_rate": 4.8792685080122266e-05, "loss": 2.467, "step": 834500 }, { "epoch": 2.42, "learning_rate": 4.879196143247499e-05, "loss": 2.4551, "step": 835000 }, { "epoch": 2.42, "learning_rate": 4.879123923212301e-05, "loss": 2.4706, "step": 835500 }, { "epoch": 2.42, "learning_rate": 4.879051558447573e-05, "loss": 2.4576, "step": 836000 }, { "epoch": 2.42, "learning_rate": 4.8789791936828455e-05, "loss": 2.4657, "step": 836500 }, { "epoch": 2.42, "learning_rate": 4.878906828918118e-05, "loss": 2.4434, "step": 837000 }, { "epoch": 2.42, "learning_rate": 4.8788344641533907e-05, "loss": 2.4745, "step": 837500 }, { "epoch": 2.43, "learning_rate": 4.878762099388663e-05, "loss": 2.4514, "step": 838000 }, { "epoch": 2.43, "learning_rate": 4.878689734623935e-05, "loss": 2.4496, "step": 838500 }, { "epoch": 2.43, "learning_rate": 4.878617369859207e-05, "loss": 2.4691, "step": 839000 }, { "epoch": 2.43, "learning_rate": 4.8785450050944796e-05, "loss": 2.484, "step": 839500 }, { "epoch": 2.43, "learning_rate": 4.878472640329752e-05, "loss": 2.4308, "step": 840000 }, { "epoch": 2.43, "learning_rate": 4.878400420294554e-05, "loss": 2.4559, "step": 840500 }, { "epoch": 2.43, "learning_rate": 4.878328055529826e-05, "loss": 2.4676, "step": 841000 }, { "epoch": 2.44, "learning_rate": 4.8782556907650985e-05, "loss": 2.4656, "step": 841500 }, { "epoch": 2.44, "learning_rate": 4.878183326000371e-05, "loss": 2.4534, "step": 842000 }, { "epoch": 2.44, "learning_rate": 4.878110961235643e-05, "loss": 2.4556, "step": 842500 }, { "epoch": 2.44, "learning_rate": 4.878038596470915e-05, "loss": 2.4398, "step": 843000 }, { "epoch": 2.44, "learning_rate": 4.8779662317061874e-05, "loss": 2.452, "step": 843500 }, { "epoch": 2.44, "learning_rate": 4.8778938669414596e-05, "loss": 2.4619, "step": 844000 }, { "epoch": 2.44, "learning_rate": 4.877821646906262e-05, "loss": 2.4557, "step": 844500 }, { "epoch": 2.45, "learning_rate": 4.877749282141535e-05, "loss": 2.4486, "step": 845000 }, { "epoch": 2.45, "learning_rate": 4.877676917376807e-05, "loss": 2.4471, "step": 845500 }, { "epoch": 2.45, "learning_rate": 4.877604552612079e-05, "loss": 2.4459, "step": 846000 }, { "epoch": 2.45, "learning_rate": 4.8775321878473514e-05, "loss": 2.4626, "step": 846500 }, { "epoch": 2.45, "learning_rate": 4.877459967812153e-05, "loss": 2.4616, "step": 847000 }, { "epoch": 2.45, "learning_rate": 4.877387603047425e-05, "loss": 2.4736, "step": 847500 }, { "epoch": 2.45, "learning_rate": 4.8773152382826974e-05, "loss": 2.4513, "step": 848000 }, { "epoch": 2.46, "learning_rate": 4.8772428735179696e-05, "loss": 2.4405, "step": 848500 }, { "epoch": 2.46, "learning_rate": 4.877170508753242e-05, "loss": 2.4534, "step": 849000 }, { "epoch": 2.46, "learning_rate": 4.877098143988515e-05, "loss": 2.4626, "step": 849500 }, { "epoch": 2.46, "learning_rate": 4.877025779223787e-05, "loss": 2.4684, "step": 850000 }, { "epoch": 2.46, "learning_rate": 4.876953414459059e-05, "loss": 2.4783, "step": 850500 }, { "epoch": 2.46, "learning_rate": 4.876881194423861e-05, "loss": 2.4883, "step": 851000 }, { "epoch": 2.46, "learning_rate": 4.876808829659133e-05, "loss": 2.4863, "step": 851500 }, { "epoch": 2.47, "learning_rate": 4.876736464894406e-05, "loss": 2.4668, "step": 852000 }, { "epoch": 2.47, "learning_rate": 4.8766642448592074e-05, "loss": 2.4822, "step": 852500 }, { "epoch": 2.47, "learning_rate": 4.8765918800944796e-05, "loss": 2.441, "step": 853000 }, { "epoch": 2.47, "learning_rate": 4.876519515329752e-05, "loss": 2.4355, "step": 853500 }, { "epoch": 2.47, "learning_rate": 4.876447150565025e-05, "loss": 2.4657, "step": 854000 }, { "epoch": 2.47, "learning_rate": 4.876374785800297e-05, "loss": 2.4465, "step": 854500 }, { "epoch": 2.47, "learning_rate": 4.876302421035569e-05, "loss": 2.4633, "step": 855000 }, { "epoch": 2.48, "learning_rate": 4.8762300562708414e-05, "loss": 2.4369, "step": 855500 }, { "epoch": 2.48, "learning_rate": 4.8761576915061137e-05, "loss": 2.4499, "step": 856000 }, { "epoch": 2.48, "learning_rate": 4.876085326741386e-05, "loss": 2.4599, "step": 856500 }, { "epoch": 2.48, "learning_rate": 4.876012961976658e-05, "loss": 2.4461, "step": 857000 }, { "epoch": 2.48, "learning_rate": 4.87594059721193e-05, "loss": 2.4432, "step": 857500 }, { "epoch": 2.48, "learning_rate": 4.8758682324472025e-05, "loss": 2.4752, "step": 858000 }, { "epoch": 2.49, "learning_rate": 4.875795867682475e-05, "loss": 2.4624, "step": 858500 }, { "epoch": 2.49, "learning_rate": 4.875723647647277e-05, "loss": 2.4722, "step": 859000 }, { "epoch": 2.49, "learning_rate": 4.87565128288255e-05, "loss": 2.5101, "step": 859500 }, { "epoch": 2.49, "learning_rate": 4.875578918117822e-05, "loss": 2.4928, "step": 860000 }, { "epoch": 2.49, "learning_rate": 4.8755065533530943e-05, "loss": 2.4738, "step": 860500 }, { "epoch": 2.49, "learning_rate": 4.875434333317896e-05, "loss": 2.4671, "step": 861000 }, { "epoch": 2.49, "learning_rate": 4.8753621132826975e-05, "loss": 2.4745, "step": 861500 }, { "epoch": 2.5, "learning_rate": 4.87528974851797e-05, "loss": 2.4534, "step": 862000 }, { "epoch": 2.5, "learning_rate": 4.875217383753242e-05, "loss": 2.4471, "step": 862500 }, { "epoch": 2.5, "learning_rate": 4.875145018988515e-05, "loss": 2.4532, "step": 863000 }, { "epoch": 2.5, "learning_rate": 4.875072654223787e-05, "loss": 2.4462, "step": 863500 }, { "epoch": 2.5, "learning_rate": 4.875000289459059e-05, "loss": 2.4864, "step": 864000 }, { "epoch": 2.5, "learning_rate": 4.8749279246943315e-05, "loss": 2.4572, "step": 864500 }, { "epoch": 2.5, "learning_rate": 4.874855559929604e-05, "loss": 2.4308, "step": 865000 }, { "epoch": 2.51, "learning_rate": 4.874783195164876e-05, "loss": 2.4404, "step": 865500 }, { "epoch": 2.51, "learning_rate": 4.8747109751296775e-05, "loss": 2.4205, "step": 866000 }, { "epoch": 2.51, "learning_rate": 4.87463861036495e-05, "loss": 2.462, "step": 866500 }, { "epoch": 2.51, "learning_rate": 4.8745662456002226e-05, "loss": 2.4615, "step": 867000 }, { "epoch": 2.51, "learning_rate": 4.874493880835495e-05, "loss": 2.4668, "step": 867500 }, { "epoch": 2.51, "learning_rate": 4.8744218055298264e-05, "loss": 2.4706, "step": 868000 }, { "epoch": 2.51, "learning_rate": 4.8743494407650986e-05, "loss": 2.463, "step": 868500 }, { "epoch": 2.52, "learning_rate": 4.874277076000371e-05, "loss": 2.4379, "step": 869000 }, { "epoch": 2.52, "learning_rate": 4.874204711235643e-05, "loss": 2.4478, "step": 869500 }, { "epoch": 2.52, "learning_rate": 4.874132346470915e-05, "loss": 2.4451, "step": 870000 }, { "epoch": 2.52, "learning_rate": 4.8740599817061875e-05, "loss": 2.4479, "step": 870500 }, { "epoch": 2.52, "learning_rate": 4.87398761694146e-05, "loss": 2.4983, "step": 871000 }, { "epoch": 2.52, "learning_rate": 4.873915252176732e-05, "loss": 2.4713, "step": 871500 }, { "epoch": 2.52, "learning_rate": 4.873842887412005e-05, "loss": 2.4586, "step": 872000 }, { "epoch": 2.53, "learning_rate": 4.873770522647277e-05, "loss": 2.4466, "step": 872500 }, { "epoch": 2.53, "learning_rate": 4.8736983026120786e-05, "loss": 2.4379, "step": 873000 }, { "epoch": 2.53, "learning_rate": 4.873625937847351e-05, "loss": 2.4507, "step": 873500 }, { "epoch": 2.53, "learning_rate": 4.873553573082623e-05, "loss": 2.4723, "step": 874000 }, { "epoch": 2.53, "learning_rate": 4.873481208317896e-05, "loss": 2.4562, "step": 874500 }, { "epoch": 2.53, "learning_rate": 4.8734089882826975e-05, "loss": 2.4718, "step": 875000 }, { "epoch": 2.53, "learning_rate": 4.87333662351797e-05, "loss": 2.453, "step": 875500 }, { "epoch": 2.54, "learning_rate": 4.873264258753243e-05, "loss": 2.4409, "step": 876000 }, { "epoch": 2.54, "learning_rate": 4.873191893988515e-05, "loss": 2.4678, "step": 876500 }, { "epoch": 2.54, "learning_rate": 4.873119529223787e-05, "loss": 2.4712, "step": 877000 }, { "epoch": 2.54, "learning_rate": 4.873047164459059e-05, "loss": 2.4599, "step": 877500 }, { "epoch": 2.54, "learning_rate": 4.8729747996943316e-05, "loss": 2.4334, "step": 878000 }, { "epoch": 2.54, "learning_rate": 4.872902434929604e-05, "loss": 2.4179, "step": 878500 }, { "epoch": 2.54, "learning_rate": 4.872830214894405e-05, "loss": 2.4463, "step": 879000 }, { "epoch": 2.55, "learning_rate": 4.8727578501296776e-05, "loss": 2.4667, "step": 879500 }, { "epoch": 2.55, "learning_rate": 4.87268548536495e-05, "loss": 2.475, "step": 880000 }, { "epoch": 2.55, "learning_rate": 4.872613120600223e-05, "loss": 2.4829, "step": 880500 }, { "epoch": 2.55, "learning_rate": 4.872540755835495e-05, "loss": 2.4636, "step": 881000 }, { "epoch": 2.55, "learning_rate": 4.872468391070767e-05, "loss": 2.4525, "step": 881500 }, { "epoch": 2.55, "learning_rate": 4.87239602630604e-05, "loss": 2.4419, "step": 882000 }, { "epoch": 2.55, "learning_rate": 4.872323661541312e-05, "loss": 2.4189, "step": 882500 }, { "epoch": 2.56, "learning_rate": 4.872251441506114e-05, "loss": 2.4375, "step": 883000 }, { "epoch": 2.56, "learning_rate": 4.872179076741386e-05, "loss": 2.4487, "step": 883500 }, { "epoch": 2.56, "learning_rate": 4.872106711976658e-05, "loss": 2.4688, "step": 884000 }, { "epoch": 2.56, "learning_rate": 4.8720343472119305e-05, "loss": 2.4355, "step": 884500 }, { "epoch": 2.56, "learning_rate": 4.871961982447203e-05, "loss": 2.4716, "step": 885000 }, { "epoch": 2.56, "learning_rate": 4.871889617682475e-05, "loss": 2.4849, "step": 885500 }, { "epoch": 2.56, "learning_rate": 4.871817397647277e-05, "loss": 2.4473, "step": 886000 }, { "epoch": 2.57, "learning_rate": 4.8717450328825494e-05, "loss": 2.4466, "step": 886500 }, { "epoch": 2.57, "learning_rate": 4.871672812847351e-05, "loss": 2.441, "step": 887000 }, { "epoch": 2.57, "learning_rate": 4.871600448082623e-05, "loss": 2.4405, "step": 887500 }, { "epoch": 2.57, "learning_rate": 4.8715280833178954e-05, "loss": 2.4461, "step": 888000 }, { "epoch": 2.57, "learning_rate": 4.8714557185531676e-05, "loss": 2.4505, "step": 888500 }, { "epoch": 2.57, "learning_rate": 4.87138335378844e-05, "loss": 2.4672, "step": 889000 }, { "epoch": 2.57, "learning_rate": 4.871310989023713e-05, "loss": 2.4518, "step": 889500 }, { "epoch": 2.58, "learning_rate": 4.871238624258985e-05, "loss": 2.4466, "step": 890000 }, { "epoch": 2.58, "learning_rate": 4.871166259494258e-05, "loss": 2.4747, "step": 890500 }, { "epoch": 2.58, "learning_rate": 4.87109389472953e-05, "loss": 2.4575, "step": 891000 }, { "epoch": 2.58, "learning_rate": 4.871021819423861e-05, "loss": 2.4662, "step": 891500 }, { "epoch": 2.58, "learning_rate": 4.870949454659133e-05, "loss": 2.4587, "step": 892000 }, { "epoch": 2.58, "learning_rate": 4.8708770898944054e-05, "loss": 2.4539, "step": 892500 }, { "epoch": 2.58, "learning_rate": 4.8708047251296776e-05, "loss": 2.4213, "step": 893000 }, { "epoch": 2.59, "learning_rate": 4.87073250509448e-05, "loss": 2.4429, "step": 893500 }, { "epoch": 2.59, "learning_rate": 4.870660140329752e-05, "loss": 2.4597, "step": 894000 }, { "epoch": 2.59, "learning_rate": 4.870587775565024e-05, "loss": 2.4739, "step": 894500 }, { "epoch": 2.59, "learning_rate": 4.8705154108002965e-05, "loss": 2.4595, "step": 895000 }, { "epoch": 2.59, "learning_rate": 4.870443046035569e-05, "loss": 2.4374, "step": 895500 }, { "epoch": 2.59, "learning_rate": 4.87037082600037e-05, "loss": 2.4726, "step": 896000 }, { "epoch": 2.6, "learning_rate": 4.8702984612356425e-05, "loss": 2.4594, "step": 896500 }, { "epoch": 2.6, "learning_rate": 4.870226241200445e-05, "loss": 2.4582, "step": 897000 }, { "epoch": 2.6, "learning_rate": 4.870153876435718e-05, "loss": 2.4465, "step": 897500 }, { "epoch": 2.6, "learning_rate": 4.87008151167099e-05, "loss": 2.4757, "step": 898000 }, { "epoch": 2.6, "learning_rate": 4.870009146906262e-05, "loss": 2.4431, "step": 898500 }, { "epoch": 2.6, "learning_rate": 4.8699367821415343e-05, "loss": 2.4492, "step": 899000 }, { "epoch": 2.6, "learning_rate": 4.8698644173768066e-05, "loss": 2.4233, "step": 899500 }, { "epoch": 2.61, "learning_rate": 4.869792052612079e-05, "loss": 2.4649, "step": 900000 }, { "epoch": 2.61, "learning_rate": 4.869719687847351e-05, "loss": 2.481, "step": 900500 }, { "epoch": 2.61, "learning_rate": 4.869647323082623e-05, "loss": 2.4592, "step": 901000 }, { "epoch": 2.61, "learning_rate": 4.8695749583178955e-05, "loss": 2.4467, "step": 901500 }, { "epoch": 2.61, "learning_rate": 4.869502593553168e-05, "loss": 2.4453, "step": 902000 }, { "epoch": 2.61, "learning_rate": 4.86943022878844e-05, "loss": 2.4858, "step": 902500 }, { "epoch": 2.61, "learning_rate": 4.869357864023713e-05, "loss": 2.4187, "step": 903000 }, { "epoch": 2.62, "learning_rate": 4.869285499258985e-05, "loss": 2.4596, "step": 903500 }, { "epoch": 2.62, "learning_rate": 4.869213134494257e-05, "loss": 2.468, "step": 904000 }, { "epoch": 2.62, "learning_rate": 4.86914076972953e-05, "loss": 2.428, "step": 904500 }, { "epoch": 2.62, "learning_rate": 4.8690684049648024e-05, "loss": 2.4579, "step": 905000 }, { "epoch": 2.62, "learning_rate": 4.8689960402000746e-05, "loss": 2.4583, "step": 905500 }, { "epoch": 2.62, "learning_rate": 4.868923820164876e-05, "loss": 2.4549, "step": 906000 }, { "epoch": 2.62, "learning_rate": 4.8688514554001484e-05, "loss": 2.4625, "step": 906500 }, { "epoch": 2.63, "learning_rate": 4.8687792353649506e-05, "loss": 2.461, "step": 907000 }, { "epoch": 2.63, "learning_rate": 4.868706870600223e-05, "loss": 2.4467, "step": 907500 }, { "epoch": 2.63, "learning_rate": 4.868634505835495e-05, "loss": 2.4873, "step": 908000 }, { "epoch": 2.63, "learning_rate": 4.868562141070767e-05, "loss": 2.4587, "step": 908500 }, { "epoch": 2.63, "learning_rate": 4.8684897763060395e-05, "loss": 2.4456, "step": 909000 }, { "epoch": 2.63, "learning_rate": 4.868417556270841e-05, "loss": 2.4724, "step": 909500 }, { "epoch": 2.63, "learning_rate": 4.868345191506113e-05, "loss": 2.4145, "step": 910000 }, { "epoch": 2.64, "learning_rate": 4.8682728267413855e-05, "loss": 2.4492, "step": 910500 }, { "epoch": 2.64, "learning_rate": 4.868200461976658e-05, "loss": 2.4593, "step": 911000 }, { "epoch": 2.64, "learning_rate": 4.86812824194146e-05, "loss": 2.4748, "step": 911500 }, { "epoch": 2.64, "learning_rate": 4.868055877176733e-05, "loss": 2.4635, "step": 912000 }, { "epoch": 2.64, "learning_rate": 4.867983512412005e-05, "loss": 2.4113, "step": 912500 }, { "epoch": 2.64, "learning_rate": 4.867911147647277e-05, "loss": 2.4451, "step": 913000 }, { "epoch": 2.64, "learning_rate": 4.8678387828825495e-05, "loss": 2.4659, "step": 913500 }, { "epoch": 2.65, "learning_rate": 4.867766418117822e-05, "loss": 2.4472, "step": 914000 }, { "epoch": 2.65, "learning_rate": 4.867694053353094e-05, "loss": 2.4232, "step": 914500 }, { "epoch": 2.65, "learning_rate": 4.867621688588366e-05, "loss": 2.4305, "step": 915000 }, { "epoch": 2.65, "learning_rate": 4.8675493238236384e-05, "loss": 2.4543, "step": 915500 }, { "epoch": 2.65, "learning_rate": 4.867477103788441e-05, "loss": 2.4772, "step": 916000 }, { "epoch": 2.65, "learning_rate": 4.867404739023713e-05, "loss": 2.4502, "step": 916500 }, { "epoch": 2.65, "learning_rate": 4.867332374258985e-05, "loss": 2.4392, "step": 917000 }, { "epoch": 2.66, "learning_rate": 4.8672600094942573e-05, "loss": 2.425, "step": 917500 }, { "epoch": 2.66, "learning_rate": 4.8671876447295296e-05, "loss": 2.4573, "step": 918000 }, { "epoch": 2.66, "learning_rate": 4.867115279964802e-05, "loss": 2.4606, "step": 918500 }, { "epoch": 2.66, "learning_rate": 4.867042915200075e-05, "loss": 2.4845, "step": 919000 }, { "epoch": 2.66, "learning_rate": 4.866970550435347e-05, "loss": 2.4546, "step": 919500 }, { "epoch": 2.66, "learning_rate": 4.8668983304001485e-05, "loss": 2.4337, "step": 920000 }, { "epoch": 2.66, "learning_rate": 4.866826110364951e-05, "loss": 2.4631, "step": 920500 }, { "epoch": 2.67, "learning_rate": 4.866753745600223e-05, "loss": 2.4756, "step": 921000 }, { "epoch": 2.67, "learning_rate": 4.866681380835495e-05, "loss": 2.4742, "step": 921500 }, { "epoch": 2.67, "learning_rate": 4.8666090160707674e-05, "loss": 2.4588, "step": 922000 }, { "epoch": 2.67, "learning_rate": 4.8665366513060396e-05, "loss": 2.4369, "step": 922500 }, { "epoch": 2.67, "learning_rate": 4.866464286541312e-05, "loss": 2.4665, "step": 923000 }, { "epoch": 2.67, "learning_rate": 4.866391921776584e-05, "loss": 2.4556, "step": 923500 }, { "epoch": 2.67, "learning_rate": 4.866319557011856e-05, "loss": 2.4519, "step": 924000 }, { "epoch": 2.68, "learning_rate": 4.866247336976658e-05, "loss": 2.4419, "step": 924500 }, { "epoch": 2.68, "learning_rate": 4.866174972211931e-05, "loss": 2.4378, "step": 925000 }, { "epoch": 2.68, "learning_rate": 4.866102752176732e-05, "loss": 2.4506, "step": 925500 }, { "epoch": 2.68, "learning_rate": 4.8660303874120045e-05, "loss": 2.4561, "step": 926000 }, { "epoch": 2.68, "learning_rate": 4.865958022647277e-05, "loss": 2.4601, "step": 926500 }, { "epoch": 2.68, "learning_rate": 4.8658856578825496e-05, "loss": 2.4544, "step": 927000 }, { "epoch": 2.68, "learning_rate": 4.865813293117822e-05, "loss": 2.434, "step": 927500 }, { "epoch": 2.69, "learning_rate": 4.865740928353094e-05, "loss": 2.4714, "step": 928000 }, { "epoch": 2.69, "learning_rate": 4.865668563588366e-05, "loss": 2.4271, "step": 928500 }, { "epoch": 2.69, "learning_rate": 4.8655961988236385e-05, "loss": 2.4399, "step": 929000 }, { "epoch": 2.69, "learning_rate": 4.865523834058911e-05, "loss": 2.4481, "step": 929500 }, { "epoch": 2.69, "learning_rate": 4.865451614023713e-05, "loss": 2.4669, "step": 930000 }, { "epoch": 2.69, "learning_rate": 4.865379249258985e-05, "loss": 2.4572, "step": 930500 }, { "epoch": 2.69, "learning_rate": 4.8653068844942574e-05, "loss": 2.4523, "step": 931000 }, { "epoch": 2.7, "learning_rate": 4.8652345197295296e-05, "loss": 2.4464, "step": 931500 }, { "epoch": 2.7, "learning_rate": 4.865162299694331e-05, "loss": 2.4635, "step": 932000 }, { "epoch": 2.7, "learning_rate": 4.8650900796591334e-05, "loss": 2.4644, "step": 932500 }, { "epoch": 2.7, "learning_rate": 4.8650177148944057e-05, "loss": 2.4424, "step": 933000 }, { "epoch": 2.7, "learning_rate": 4.864945350129678e-05, "loss": 2.4443, "step": 933500 }, { "epoch": 2.7, "learning_rate": 4.86487298536495e-05, "loss": 2.4308, "step": 934000 }, { "epoch": 2.7, "learning_rate": 4.864800620600223e-05, "loss": 2.4478, "step": 934500 }, { "epoch": 2.71, "learning_rate": 4.864728255835495e-05, "loss": 2.4514, "step": 935000 }, { "epoch": 2.71, "learning_rate": 4.8646558910707674e-05, "loss": 2.4682, "step": 935500 }, { "epoch": 2.71, "learning_rate": 4.86458352630604e-05, "loss": 2.4719, "step": 936000 }, { "epoch": 2.71, "learning_rate": 4.864511306270841e-05, "loss": 2.465, "step": 936500 }, { "epoch": 2.71, "learning_rate": 4.8644389415061135e-05, "loss": 2.4763, "step": 937000 }, { "epoch": 2.71, "learning_rate": 4.864366576741386e-05, "loss": 2.4436, "step": 937500 }, { "epoch": 2.72, "learning_rate": 4.8642942119766586e-05, "loss": 2.4721, "step": 938000 }, { "epoch": 2.72, "learning_rate": 4.8642221366709895e-05, "loss": 2.4606, "step": 938500 }, { "epoch": 2.72, "learning_rate": 4.864149771906262e-05, "loss": 2.4603, "step": 939000 }, { "epoch": 2.72, "learning_rate": 4.864077407141534e-05, "loss": 2.4776, "step": 939500 }, { "epoch": 2.72, "learning_rate": 4.864005042376806e-05, "loss": 2.4467, "step": 940000 }, { "epoch": 2.72, "learning_rate": 4.8639326776120784e-05, "loss": 2.455, "step": 940500 }, { "epoch": 2.72, "learning_rate": 4.8638603128473506e-05, "loss": 2.4479, "step": 941000 }, { "epoch": 2.73, "learning_rate": 4.8637879480826235e-05, "loss": 2.4769, "step": 941500 }, { "epoch": 2.73, "learning_rate": 4.863715583317896e-05, "loss": 2.4656, "step": 942000 }, { "epoch": 2.73, "learning_rate": 4.8636432185531686e-05, "loss": 2.4547, "step": 942500 }, { "epoch": 2.73, "learning_rate": 4.863570853788441e-05, "loss": 2.4633, "step": 943000 }, { "epoch": 2.73, "learning_rate": 4.863498489023713e-05, "loss": 2.4582, "step": 943500 }, { "epoch": 2.73, "learning_rate": 4.863426124258985e-05, "loss": 2.4589, "step": 944000 }, { "epoch": 2.73, "learning_rate": 4.8633537594942575e-05, "loss": 2.4509, "step": 944500 }, { "epoch": 2.74, "learning_rate": 4.863281539459059e-05, "loss": 2.4781, "step": 945000 }, { "epoch": 2.74, "learning_rate": 4.863209174694331e-05, "loss": 2.4617, "step": 945500 }, { "epoch": 2.74, "learning_rate": 4.8631369546591335e-05, "loss": 2.4691, "step": 946000 }, { "epoch": 2.74, "learning_rate": 4.863064589894406e-05, "loss": 2.4536, "step": 946500 }, { "epoch": 2.74, "learning_rate": 4.862992225129678e-05, "loss": 2.4258, "step": 947000 }, { "epoch": 2.74, "learning_rate": 4.86291986036495e-05, "loss": 2.4414, "step": 947500 }, { "epoch": 2.74, "learning_rate": 4.8628474956002224e-05, "loss": 2.4639, "step": 948000 }, { "epoch": 2.75, "learning_rate": 4.8627751308354946e-05, "loss": 2.4517, "step": 948500 }, { "epoch": 2.75, "learning_rate": 4.862702766070767e-05, "loss": 2.454, "step": 949000 }, { "epoch": 2.75, "learning_rate": 4.86263040130604e-05, "loss": 2.448, "step": 949500 }, { "epoch": 2.75, "learning_rate": 4.862558036541312e-05, "loss": 2.4216, "step": 950000 }, { "epoch": 2.75, "learning_rate": 4.8624858165061135e-05, "loss": 2.4298, "step": 950500 }, { "epoch": 2.75, "learning_rate": 4.862413451741386e-05, "loss": 2.4333, "step": 951000 }, { "epoch": 2.75, "learning_rate": 4.8623410869766587e-05, "loss": 2.4494, "step": 951500 }, { "epoch": 2.76, "learning_rate": 4.862268722211931e-05, "loss": 2.4553, "step": 952000 }, { "epoch": 2.76, "learning_rate": 4.8621965021767324e-05, "loss": 2.4588, "step": 952500 }, { "epoch": 2.76, "learning_rate": 4.8621241374120047e-05, "loss": 2.4468, "step": 953000 }, { "epoch": 2.76, "learning_rate": 4.862051772647277e-05, "loss": 2.4701, "step": 953500 }, { "epoch": 2.76, "learning_rate": 4.861979407882549e-05, "loss": 2.4475, "step": 954000 }, { "epoch": 2.76, "learning_rate": 4.861907043117821e-05, "loss": 2.4553, "step": 954500 }, { "epoch": 2.76, "learning_rate": 4.8618346783530936e-05, "loss": 2.4246, "step": 955000 }, { "epoch": 2.77, "learning_rate": 4.861762313588366e-05, "loss": 2.4622, "step": 955500 }, { "epoch": 2.77, "learning_rate": 4.861689948823639e-05, "loss": 2.4683, "step": 956000 }, { "epoch": 2.77, "learning_rate": 4.8616178735179696e-05, "loss": 2.4612, "step": 956500 }, { "epoch": 2.77, "learning_rate": 4.8615455087532425e-05, "loss": 2.4624, "step": 957000 }, { "epoch": 2.77, "learning_rate": 4.861473143988515e-05, "loss": 2.4681, "step": 957500 }, { "epoch": 2.77, "learning_rate": 4.861400779223787e-05, "loss": 2.477, "step": 958000 }, { "epoch": 2.77, "learning_rate": 4.861328414459059e-05, "loss": 2.4291, "step": 958500 }, { "epoch": 2.78, "learning_rate": 4.8612561944238614e-05, "loss": 2.4417, "step": 959000 }, { "epoch": 2.78, "learning_rate": 4.8611838296591336e-05, "loss": 2.4448, "step": 959500 }, { "epoch": 2.78, "learning_rate": 4.861111464894406e-05, "loss": 2.42, "step": 960000 }, { "epoch": 2.78, "learning_rate": 4.861039100129678e-05, "loss": 2.4458, "step": 960500 }, { "epoch": 2.78, "learning_rate": 4.86096673536495e-05, "loss": 2.4689, "step": 961000 }, { "epoch": 2.78, "learning_rate": 4.8608943706002225e-05, "loss": 2.45, "step": 961500 }, { "epoch": 2.78, "learning_rate": 4.860822005835495e-05, "loss": 2.439, "step": 962000 }, { "epoch": 2.79, "learning_rate": 4.860749641070767e-05, "loss": 2.4277, "step": 962500 }, { "epoch": 2.79, "learning_rate": 4.8606775657650985e-05, "loss": 2.4394, "step": 963000 }, { "epoch": 2.79, "learning_rate": 4.860605201000371e-05, "loss": 2.446, "step": 963500 }, { "epoch": 2.79, "learning_rate": 4.860532836235643e-05, "loss": 2.4517, "step": 964000 }, { "epoch": 2.79, "learning_rate": 4.860460471470916e-05, "loss": 2.4274, "step": 964500 }, { "epoch": 2.79, "learning_rate": 4.860388106706188e-05, "loss": 2.4332, "step": 965000 }, { "epoch": 2.79, "learning_rate": 4.86031574194146e-05, "loss": 2.4417, "step": 965500 }, { "epoch": 2.8, "learning_rate": 4.8602433771767325e-05, "loss": 2.4321, "step": 966000 }, { "epoch": 2.8, "learning_rate": 4.860171012412005e-05, "loss": 2.4553, "step": 966500 }, { "epoch": 2.8, "learning_rate": 4.860098792376806e-05, "loss": 2.467, "step": 967000 }, { "epoch": 2.8, "learning_rate": 4.8600264276120785e-05, "loss": 2.4659, "step": 967500 }, { "epoch": 2.8, "learning_rate": 4.8599540628473514e-05, "loss": 2.4483, "step": 968000 }, { "epoch": 2.8, "learning_rate": 4.8598816980826236e-05, "loss": 2.4443, "step": 968500 }, { "epoch": 2.8, "learning_rate": 4.859809333317896e-05, "loss": 2.4585, "step": 969000 }, { "epoch": 2.81, "learning_rate": 4.859736968553168e-05, "loss": 2.4495, "step": 969500 }, { "epoch": 2.81, "learning_rate": 4.859664893247499e-05, "loss": 2.4341, "step": 970000 }, { "epoch": 2.81, "learning_rate": 4.859592528482771e-05, "loss": 2.4353, "step": 970500 }, { "epoch": 2.81, "learning_rate": 4.8595201637180434e-05, "loss": 2.4307, "step": 971000 }, { "epoch": 2.81, "learning_rate": 4.859447798953316e-05, "loss": 2.4511, "step": 971500 }, { "epoch": 2.81, "learning_rate": 4.8593754341885885e-05, "loss": 2.4669, "step": 972000 }, { "epoch": 2.81, "learning_rate": 4.8593030694238614e-05, "loss": 2.4567, "step": 972500 }, { "epoch": 2.82, "learning_rate": 4.859230704659134e-05, "loss": 2.4674, "step": 973000 }, { "epoch": 2.82, "learning_rate": 4.859158339894406e-05, "loss": 2.4895, "step": 973500 }, { "epoch": 2.82, "learning_rate": 4.859085975129678e-05, "loss": 2.4793, "step": 974000 }, { "epoch": 2.82, "learning_rate": 4.85901375509448e-05, "loss": 2.4346, "step": 974500 }, { "epoch": 2.82, "learning_rate": 4.858941390329752e-05, "loss": 2.4512, "step": 975000 }, { "epoch": 2.82, "learning_rate": 4.858869025565024e-05, "loss": 2.455, "step": 975500 }, { "epoch": 2.83, "learning_rate": 4.8587966608002963e-05, "loss": 2.4627, "step": 976000 }, { "epoch": 2.83, "learning_rate": 4.8587244407650986e-05, "loss": 2.4585, "step": 976500 }, { "epoch": 2.83, "learning_rate": 4.858652076000371e-05, "loss": 2.4504, "step": 977000 }, { "epoch": 2.83, "learning_rate": 4.8585798559651724e-05, "loss": 2.4254, "step": 977500 }, { "epoch": 2.83, "learning_rate": 4.8585074912004446e-05, "loss": 2.438, "step": 978000 }, { "epoch": 2.83, "learning_rate": 4.858435126435717e-05, "loss": 2.4483, "step": 978500 }, { "epoch": 2.83, "learning_rate": 4.858362761670989e-05, "loss": 2.4106, "step": 979000 }, { "epoch": 2.84, "learning_rate": 4.858290396906262e-05, "loss": 2.4446, "step": 979500 }, { "epoch": 2.84, "learning_rate": 4.858218032141534e-05, "loss": 2.4544, "step": 980000 }, { "epoch": 2.84, "learning_rate": 4.8581456673768064e-05, "loss": 2.4631, "step": 980500 }, { "epoch": 2.84, "learning_rate": 4.858073302612079e-05, "loss": 2.4517, "step": 981000 }, { "epoch": 2.84, "learning_rate": 4.8580009378473515e-05, "loss": 2.4564, "step": 981500 }, { "epoch": 2.84, "learning_rate": 4.857928717812153e-05, "loss": 2.4526, "step": 982000 }, { "epoch": 2.84, "learning_rate": 4.857856353047425e-05, "loss": 2.4403, "step": 982500 }, { "epoch": 2.85, "learning_rate": 4.8577839882826975e-05, "loss": 2.4405, "step": 983000 }, { "epoch": 2.85, "learning_rate": 4.85771162351797e-05, "loss": 2.4396, "step": 983500 }, { "epoch": 2.85, "learning_rate": 4.857639403482771e-05, "loss": 2.4476, "step": 984000 }, { "epoch": 2.85, "learning_rate": 4.857567038718044e-05, "loss": 2.4527, "step": 984500 }, { "epoch": 2.85, "learning_rate": 4.8574946739533164e-05, "loss": 2.4661, "step": 985000 }, { "epoch": 2.85, "learning_rate": 4.8574223091885886e-05, "loss": 2.4549, "step": 985500 }, { "epoch": 2.85, "learning_rate": 4.857349944423861e-05, "loss": 2.4619, "step": 986000 }, { "epoch": 2.86, "learning_rate": 4.8572777243886624e-05, "loss": 2.4462, "step": 986500 }, { "epoch": 2.86, "learning_rate": 4.8572053596239346e-05, "loss": 2.4388, "step": 987000 }, { "epoch": 2.86, "learning_rate": 4.8571329948592075e-05, "loss": 2.453, "step": 987500 }, { "epoch": 2.86, "learning_rate": 4.85706063009448e-05, "loss": 2.4359, "step": 988000 }, { "epoch": 2.86, "learning_rate": 4.856988410059281e-05, "loss": 2.458, "step": 988500 }, { "epoch": 2.86, "learning_rate": 4.8569161900240835e-05, "loss": 2.4579, "step": 989000 }, { "epoch": 2.86, "learning_rate": 4.856843825259356e-05, "loss": 2.4353, "step": 989500 }, { "epoch": 2.87, "learning_rate": 4.856771605224157e-05, "loss": 2.4816, "step": 990000 }, { "epoch": 2.87, "learning_rate": 4.8566992404594295e-05, "loss": 2.4242, "step": 990500 }, { "epoch": 2.87, "learning_rate": 4.856626875694702e-05, "loss": 2.4759, "step": 991000 }, { "epoch": 2.87, "learning_rate": 4.856554510929974e-05, "loss": 2.4336, "step": 991500 }, { "epoch": 2.87, "learning_rate": 4.856482146165246e-05, "loss": 2.4736, "step": 992000 }, { "epoch": 2.87, "learning_rate": 4.856409781400519e-05, "loss": 2.4378, "step": 992500 }, { "epoch": 2.87, "learning_rate": 4.856337416635791e-05, "loss": 2.4494, "step": 993000 }, { "epoch": 2.88, "learning_rate": 4.8562650518710636e-05, "loss": 2.4686, "step": 993500 }, { "epoch": 2.88, "learning_rate": 4.856192687106336e-05, "loss": 2.4552, "step": 994000 }, { "epoch": 2.88, "learning_rate": 4.8561204670711373e-05, "loss": 2.4392, "step": 994500 }, { "epoch": 2.88, "learning_rate": 4.8560482470359396e-05, "loss": 2.444, "step": 995000 }, { "epoch": 2.88, "learning_rate": 4.855975882271212e-05, "loss": 2.4615, "step": 995500 }, { "epoch": 2.88, "learning_rate": 4.855903517506484e-05, "loss": 2.4409, "step": 996000 }, { "epoch": 2.88, "learning_rate": 4.855831152741757e-05, "loss": 2.4721, "step": 996500 }, { "epoch": 2.89, "learning_rate": 4.855758787977029e-05, "loss": 2.4188, "step": 997000 }, { "epoch": 2.89, "learning_rate": 4.8556864232123014e-05, "loss": 2.4752, "step": 997500 }, { "epoch": 2.89, "learning_rate": 4.8556140584475736e-05, "loss": 2.4554, "step": 998000 }, { "epoch": 2.89, "learning_rate": 4.855541693682846e-05, "loss": 2.4502, "step": 998500 }, { "epoch": 2.89, "learning_rate": 4.855469328918118e-05, "loss": 2.469, "step": 999000 }, { "epoch": 2.89, "learning_rate": 4.85539696415339e-05, "loss": 2.4287, "step": 999500 }, { "epoch": 2.89, "learning_rate": 4.8553245993886625e-05, "loss": 2.4789, "step": 1000000 }, { "epoch": 2.9, "learning_rate": 4.855252234623935e-05, "loss": 2.4481, "step": 1000500 }, { "epoch": 2.9, "learning_rate": 4.855179869859207e-05, "loss": 2.4394, "step": 1001000 }, { "epoch": 2.9, "learning_rate": 4.855107505094479e-05, "loss": 2.4331, "step": 1001500 }, { "epoch": 2.9, "learning_rate": 4.855035140329752e-05, "loss": 2.4554, "step": 1002000 }, { "epoch": 2.9, "learning_rate": 4.854962775565024e-05, "loss": 2.4142, "step": 1002500 }, { "epoch": 2.9, "learning_rate": 4.8548904108002965e-05, "loss": 2.4857, "step": 1003000 }, { "epoch": 2.9, "learning_rate": 4.8548180460355694e-05, "loss": 2.437, "step": 1003500 }, { "epoch": 2.91, "learning_rate": 4.854745826000371e-05, "loss": 2.4008, "step": 1004000 }, { "epoch": 2.91, "learning_rate": 4.854673461235643e-05, "loss": 2.4431, "step": 1004500 }, { "epoch": 2.91, "learning_rate": 4.8546010964709154e-05, "loss": 2.4424, "step": 1005000 }, { "epoch": 2.91, "learning_rate": 4.8545287317061876e-05, "loss": 2.454, "step": 1005500 }, { "epoch": 2.91, "learning_rate": 4.85445636694146e-05, "loss": 2.4359, "step": 1006000 }, { "epoch": 2.91, "learning_rate": 4.854384002176732e-05, "loss": 2.4597, "step": 1006500 }, { "epoch": 2.91, "learning_rate": 4.854311782141534e-05, "loss": 2.4399, "step": 1007000 }, { "epoch": 2.92, "learning_rate": 4.8542394173768065e-05, "loss": 2.4619, "step": 1007500 }, { "epoch": 2.92, "learning_rate": 4.854167052612079e-05, "loss": 2.4393, "step": 1008000 }, { "epoch": 2.92, "learning_rate": 4.85409483257688e-05, "loss": 2.4616, "step": 1008500 }, { "epoch": 2.92, "learning_rate": 4.8540224678121525e-05, "loss": 2.4497, "step": 1009000 }, { "epoch": 2.92, "learning_rate": 4.8539501030474254e-05, "loss": 2.4246, "step": 1009500 }, { "epoch": 2.92, "learning_rate": 4.8538777382826977e-05, "loss": 2.4532, "step": 1010000 }, { "epoch": 2.92, "learning_rate": 4.85380537351797e-05, "loss": 2.4217, "step": 1010500 }, { "epoch": 2.93, "learning_rate": 4.853733153482772e-05, "loss": 2.4443, "step": 1011000 }, { "epoch": 2.93, "learning_rate": 4.853660788718044e-05, "loss": 2.4778, "step": 1011500 }, { "epoch": 2.93, "learning_rate": 4.8535884239533166e-05, "loss": 2.4233, "step": 1012000 }, { "epoch": 2.93, "learning_rate": 4.853516059188589e-05, "loss": 2.4459, "step": 1012500 }, { "epoch": 2.93, "learning_rate": 4.853443694423861e-05, "loss": 2.4646, "step": 1013000 }, { "epoch": 2.93, "learning_rate": 4.853371329659133e-05, "loss": 2.4486, "step": 1013500 }, { "epoch": 2.94, "learning_rate": 4.8532989648944055e-05, "loss": 2.461, "step": 1014000 }, { "epoch": 2.94, "learning_rate": 4.853226600129678e-05, "loss": 2.4285, "step": 1014500 }, { "epoch": 2.94, "learning_rate": 4.853154380094479e-05, "loss": 2.4527, "step": 1015000 }, { "epoch": 2.94, "learning_rate": 4.853082015329752e-05, "loss": 2.431, "step": 1015500 }, { "epoch": 2.94, "learning_rate": 4.8530096505650244e-05, "loss": 2.4355, "step": 1016000 }, { "epoch": 2.94, "learning_rate": 4.8529372858002966e-05, "loss": 2.4793, "step": 1016500 }, { "epoch": 2.94, "learning_rate": 4.8528649210355695e-05, "loss": 2.4321, "step": 1017000 }, { "epoch": 2.95, "learning_rate": 4.852792556270842e-05, "loss": 2.4442, "step": 1017500 }, { "epoch": 2.95, "learning_rate": 4.852720336235643e-05, "loss": 2.4496, "step": 1018000 }, { "epoch": 2.95, "learning_rate": 4.8526479714709155e-05, "loss": 2.4377, "step": 1018500 }, { "epoch": 2.95, "learning_rate": 4.852575606706188e-05, "loss": 2.4384, "step": 1019000 }, { "epoch": 2.95, "learning_rate": 4.85250324194146e-05, "loss": 2.4626, "step": 1019500 }, { "epoch": 2.95, "learning_rate": 4.852430877176732e-05, "loss": 2.438, "step": 1020000 }, { "epoch": 2.95, "learning_rate": 4.8523585124120044e-05, "loss": 2.4431, "step": 1020500 }, { "epoch": 2.96, "learning_rate": 4.852286147647277e-05, "loss": 2.4611, "step": 1021000 }, { "epoch": 2.96, "learning_rate": 4.8522137828825495e-05, "loss": 2.4586, "step": 1021500 }, { "epoch": 2.96, "learning_rate": 4.852141562847351e-05, "loss": 2.4446, "step": 1022000 }, { "epoch": 2.96, "learning_rate": 4.852069198082623e-05, "loss": 2.4435, "step": 1022500 }, { "epoch": 2.96, "learning_rate": 4.8519968333178955e-05, "loss": 2.4525, "step": 1023000 }, { "epoch": 2.96, "learning_rate": 4.851924468553168e-05, "loss": 2.4259, "step": 1023500 }, { "epoch": 2.96, "learning_rate": 4.85185210378844e-05, "loss": 2.4355, "step": 1024000 }, { "epoch": 2.97, "learning_rate": 4.851779739023713e-05, "loss": 2.4419, "step": 1024500 }, { "epoch": 2.97, "learning_rate": 4.851707374258985e-05, "loss": 2.45, "step": 1025000 }, { "epoch": 2.97, "learning_rate": 4.851635009494257e-05, "loss": 2.4389, "step": 1025500 }, { "epoch": 2.97, "learning_rate": 4.8515627894590595e-05, "loss": 2.4503, "step": 1026000 }, { "epoch": 2.97, "learning_rate": 4.851490424694332e-05, "loss": 2.4466, "step": 1026500 }, { "epoch": 2.97, "learning_rate": 4.851418059929604e-05, "loss": 2.4771, "step": 1027000 }, { "epoch": 2.97, "learning_rate": 4.851345695164876e-05, "loss": 2.4416, "step": 1027500 }, { "epoch": 2.98, "learning_rate": 4.8512733304001484e-05, "loss": 2.4252, "step": 1028000 }, { "epoch": 2.98, "learning_rate": 4.85120111036495e-05, "loss": 2.4382, "step": 1028500 }, { "epoch": 2.98, "learning_rate": 4.851128745600222e-05, "loss": 2.4497, "step": 1029000 }, { "epoch": 2.98, "learning_rate": 4.8510563808354944e-05, "loss": 2.4864, "step": 1029500 }, { "epoch": 2.98, "learning_rate": 4.850984016070767e-05, "loss": 2.4526, "step": 1030000 }, { "epoch": 2.98, "learning_rate": 4.8509116513060396e-05, "loss": 2.4475, "step": 1030500 }, { "epoch": 2.98, "learning_rate": 4.850839286541312e-05, "loss": 2.4792, "step": 1031000 }, { "epoch": 2.99, "learning_rate": 4.850766921776585e-05, "loss": 2.4451, "step": 1031500 }, { "epoch": 2.99, "learning_rate": 4.850694557011857e-05, "loss": 2.4516, "step": 1032000 }, { "epoch": 2.99, "learning_rate": 4.850622481706188e-05, "loss": 2.4405, "step": 1032500 }, { "epoch": 2.99, "learning_rate": 4.85055011694146e-05, "loss": 2.4382, "step": 1033000 }, { "epoch": 2.99, "learning_rate": 4.850477896906262e-05, "loss": 2.4564, "step": 1033500 }, { "epoch": 2.99, "learning_rate": 4.8504055321415345e-05, "loss": 2.4371, "step": 1034000 }, { "epoch": 2.99, "learning_rate": 4.850333167376807e-05, "loss": 2.4276, "step": 1034500 }, { "epoch": 3.0, "learning_rate": 4.850260802612079e-05, "loss": 2.4658, "step": 1035000 }, { "epoch": 3.0, "learning_rate": 4.850188437847351e-05, "loss": 2.4655, "step": 1035500 }, { "epoch": 3.0, "learning_rate": 4.8501160730826234e-05, "loss": 2.4413, "step": 1036000 }, { "epoch": 3.0, "eval_accuracy": 0.6356301080160491, "eval_accuracy_mlm": 0.5974497137172354, "eval_accuracy_nsp": 0.840544838743681, "eval_loss": 2.409207344055176, "eval_runtime": 330.6147, "eval_samples_per_second": 1319.923, "eval_steps_per_second": 54.998, "step": 1036416 }, { "epoch": 3.0, "learning_rate": 4.850043853047425e-05, "loss": 2.4452, "step": 1036500 }, { "epoch": 3.0, "learning_rate": 4.849971488282697e-05, "loss": 2.3943, "step": 1037000 }, { "epoch": 3.0, "learning_rate": 4.84989912351797e-05, "loss": 2.4045, "step": 1037500 }, { "epoch": 3.0, "learning_rate": 4.849826758753242e-05, "loss": 2.417, "step": 1038000 }, { "epoch": 3.01, "learning_rate": 4.8497543939885145e-05, "loss": 2.414, "step": 1038500 }, { "epoch": 3.01, "learning_rate": 4.849682029223787e-05, "loss": 2.4318, "step": 1039000 }, { "epoch": 3.01, "learning_rate": 4.8496096644590596e-05, "loss": 2.4007, "step": 1039500 }, { "epoch": 3.01, "learning_rate": 4.849537444423861e-05, "loss": 2.4196, "step": 1040000 }, { "epoch": 3.01, "learning_rate": 4.8494650796591334e-05, "loss": 2.4055, "step": 1040500 }, { "epoch": 3.01, "learning_rate": 4.8493927148944056e-05, "loss": 2.3992, "step": 1041000 }, { "epoch": 3.01, "learning_rate": 4.849320350129678e-05, "loss": 2.4361, "step": 1041500 }, { "epoch": 3.02, "learning_rate": 4.84924798536495e-05, "loss": 2.4479, "step": 1042000 }, { "epoch": 3.02, "learning_rate": 4.849175620600222e-05, "loss": 2.432, "step": 1042500 }, { "epoch": 3.02, "learning_rate": 4.849103255835495e-05, "loss": 2.4275, "step": 1043000 }, { "epoch": 3.02, "learning_rate": 4.8490308910707674e-05, "loss": 2.4418, "step": 1043500 }, { "epoch": 3.02, "learning_rate": 4.8489585263060396e-05, "loss": 2.4165, "step": 1044000 }, { "epoch": 3.02, "learning_rate": 4.848886161541312e-05, "loss": 2.4225, "step": 1044500 }, { "epoch": 3.02, "learning_rate": 4.8488139415061134e-05, "loss": 2.4336, "step": 1045000 }, { "epoch": 3.03, "learning_rate": 4.8487415767413856e-05, "loss": 2.4391, "step": 1045500 }, { "epoch": 3.03, "learning_rate": 4.848669211976658e-05, "loss": 2.418, "step": 1046000 }, { "epoch": 3.03, "learning_rate": 4.84859699194146e-05, "loss": 2.4402, "step": 1046500 }, { "epoch": 3.03, "learning_rate": 4.848524627176732e-05, "loss": 2.4287, "step": 1047000 }, { "epoch": 3.03, "learning_rate": 4.848452262412005e-05, "loss": 2.4331, "step": 1047500 }, { "epoch": 3.03, "learning_rate": 4.8483798976472774e-05, "loss": 2.4332, "step": 1048000 }, { "epoch": 3.03, "learning_rate": 4.8483075328825497e-05, "loss": 2.4311, "step": 1048500 }, { "epoch": 3.04, "learning_rate": 4.848235168117822e-05, "loss": 2.4193, "step": 1049000 }, { "epoch": 3.04, "learning_rate": 4.848162803353094e-05, "loss": 2.4216, "step": 1049500 }, { "epoch": 3.04, "learning_rate": 4.848090438588366e-05, "loss": 2.4298, "step": 1050000 }, { "epoch": 3.04, "learning_rate": 4.848018218553168e-05, "loss": 2.4346, "step": 1050500 }, { "epoch": 3.04, "learning_rate": 4.84794599851797e-05, "loss": 2.4377, "step": 1051000 }, { "epoch": 3.04, "learning_rate": 4.8478736337532423e-05, "loss": 2.4087, "step": 1051500 }, { "epoch": 3.05, "learning_rate": 4.8478012689885146e-05, "loss": 2.4181, "step": 1052000 }, { "epoch": 3.05, "learning_rate": 4.847728904223787e-05, "loss": 2.4182, "step": 1052500 }, { "epoch": 3.05, "learning_rate": 4.847656539459059e-05, "loss": 2.4013, "step": 1053000 }, { "epoch": 3.05, "learning_rate": 4.847584174694331e-05, "loss": 2.4271, "step": 1053500 }, { "epoch": 3.05, "learning_rate": 4.8475118099296035e-05, "loss": 2.4351, "step": 1054000 }, { "epoch": 3.05, "learning_rate": 4.8474394451648764e-05, "loss": 2.4035, "step": 1054500 }, { "epoch": 3.05, "learning_rate": 4.847367225129678e-05, "loss": 2.4358, "step": 1055000 }, { "epoch": 3.06, "learning_rate": 4.84729486036495e-05, "loss": 2.4037, "step": 1055500 }, { "epoch": 3.06, "learning_rate": 4.8472224956002224e-05, "loss": 2.4436, "step": 1056000 }, { "epoch": 3.06, "learning_rate": 4.847150130835495e-05, "loss": 2.4227, "step": 1056500 }, { "epoch": 3.06, "learning_rate": 4.8470777660707675e-05, "loss": 2.4031, "step": 1057000 }, { "epoch": 3.06, "learning_rate": 4.84700540130604e-05, "loss": 2.4215, "step": 1057500 }, { "epoch": 3.06, "learning_rate": 4.846933036541312e-05, "loss": 2.4342, "step": 1058000 }, { "epoch": 3.06, "learning_rate": 4.846860671776584e-05, "loss": 2.423, "step": 1058500 }, { "epoch": 3.07, "learning_rate": 4.846788451741386e-05, "loss": 2.4351, "step": 1059000 }, { "epoch": 3.07, "learning_rate": 4.846716086976658e-05, "loss": 2.4426, "step": 1059500 }, { "epoch": 3.07, "learning_rate": 4.84664372221193e-05, "loss": 2.4297, "step": 1060000 }, { "epoch": 3.07, "learning_rate": 4.8465713574472024e-05, "loss": 2.434, "step": 1060500 }, { "epoch": 3.07, "learning_rate": 4.8464991374120046e-05, "loss": 2.4293, "step": 1061000 }, { "epoch": 3.07, "learning_rate": 4.8464270621063355e-05, "loss": 2.4548, "step": 1061500 }, { "epoch": 3.07, "learning_rate": 4.846354697341608e-05, "loss": 2.4127, "step": 1062000 }, { "epoch": 3.08, "learning_rate": 4.8462823325768806e-05, "loss": 2.4312, "step": 1062500 }, { "epoch": 3.08, "learning_rate": 4.846209967812153e-05, "loss": 2.4466, "step": 1063000 }, { "epoch": 3.08, "learning_rate": 4.846137603047425e-05, "loss": 2.4456, "step": 1063500 }, { "epoch": 3.08, "learning_rate": 4.846065238282698e-05, "loss": 2.4025, "step": 1064000 }, { "epoch": 3.08, "learning_rate": 4.84599287351797e-05, "loss": 2.4439, "step": 1064500 }, { "epoch": 3.08, "learning_rate": 4.8459205087532424e-05, "loss": 2.4462, "step": 1065000 }, { "epoch": 3.08, "learning_rate": 4.845848288718044e-05, "loss": 2.4142, "step": 1065500 }, { "epoch": 3.09, "learning_rate": 4.845775923953316e-05, "loss": 2.4313, "step": 1066000 }, { "epoch": 3.09, "learning_rate": 4.8457035591885884e-05, "loss": 2.4297, "step": 1066500 }, { "epoch": 3.09, "learning_rate": 4.8456311944238606e-05, "loss": 2.4325, "step": 1067000 }, { "epoch": 3.09, "learning_rate": 4.845558829659133e-05, "loss": 2.4212, "step": 1067500 }, { "epoch": 3.09, "learning_rate": 4.845486609623935e-05, "loss": 2.4053, "step": 1068000 }, { "epoch": 3.09, "learning_rate": 4.845414244859207e-05, "loss": 2.4469, "step": 1068500 }, { "epoch": 3.09, "learning_rate": 4.845342024824009e-05, "loss": 2.4292, "step": 1069000 }, { "epoch": 3.1, "learning_rate": 4.845269660059281e-05, "loss": 2.4385, "step": 1069500 }, { "epoch": 3.1, "learning_rate": 4.845197295294554e-05, "loss": 2.4314, "step": 1070000 }, { "epoch": 3.1, "learning_rate": 4.845124930529826e-05, "loss": 2.4111, "step": 1070500 }, { "epoch": 3.1, "learning_rate": 4.8450525657650985e-05, "loss": 2.4339, "step": 1071000 }, { "epoch": 3.1, "learning_rate": 4.844980201000371e-05, "loss": 2.4236, "step": 1071500 }, { "epoch": 3.1, "learning_rate": 4.844907836235643e-05, "loss": 2.4201, "step": 1072000 }, { "epoch": 3.1, "learning_rate": 4.844835471470915e-05, "loss": 2.4308, "step": 1072500 }, { "epoch": 3.11, "learning_rate": 4.844763106706188e-05, "loss": 2.415, "step": 1073000 }, { "epoch": 3.11, "learning_rate": 4.84469074194146e-05, "loss": 2.4171, "step": 1073500 }, { "epoch": 3.11, "learning_rate": 4.8446183771767325e-05, "loss": 2.4266, "step": 1074000 }, { "epoch": 3.11, "learning_rate": 4.844546012412005e-05, "loss": 2.4144, "step": 1074500 }, { "epoch": 3.11, "learning_rate": 4.844473792376806e-05, "loss": 2.4512, "step": 1075000 }, { "epoch": 3.11, "learning_rate": 4.8444014276120785e-05, "loss": 2.4423, "step": 1075500 }, { "epoch": 3.11, "learning_rate": 4.844329062847351e-05, "loss": 2.4201, "step": 1076000 }, { "epoch": 3.12, "learning_rate": 4.844256698082623e-05, "loss": 2.4084, "step": 1076500 }, { "epoch": 3.12, "learning_rate": 4.844184333317896e-05, "loss": 2.42, "step": 1077000 }, { "epoch": 3.12, "learning_rate": 4.844112113282698e-05, "loss": 2.4122, "step": 1077500 }, { "epoch": 3.12, "learning_rate": 4.84403974851797e-05, "loss": 2.408, "step": 1078000 }, { "epoch": 3.12, "learning_rate": 4.8439673837532425e-05, "loss": 2.4049, "step": 1078500 }, { "epoch": 3.12, "learning_rate": 4.843895018988515e-05, "loss": 2.4484, "step": 1079000 }, { "epoch": 3.12, "learning_rate": 4.843822654223787e-05, "loss": 2.4219, "step": 1079500 }, { "epoch": 3.13, "learning_rate": 4.843750289459059e-05, "loss": 2.4253, "step": 1080000 }, { "epoch": 3.13, "learning_rate": 4.843678069423861e-05, "loss": 2.4329, "step": 1080500 }, { "epoch": 3.13, "learning_rate": 4.843605704659133e-05, "loss": 2.4303, "step": 1081000 }, { "epoch": 3.13, "learning_rate": 4.843533339894405e-05, "loss": 2.4306, "step": 1081500 }, { "epoch": 3.13, "learning_rate": 4.843460975129678e-05, "loss": 2.436, "step": 1082000 }, { "epoch": 3.13, "learning_rate": 4.84338861036495e-05, "loss": 2.4217, "step": 1082500 }, { "epoch": 3.13, "learning_rate": 4.8433162456002225e-05, "loss": 2.4221, "step": 1083000 }, { "epoch": 3.14, "learning_rate": 4.843243880835495e-05, "loss": 2.4217, "step": 1083500 }, { "epoch": 3.14, "learning_rate": 4.843171660800296e-05, "loss": 2.4286, "step": 1084000 }, { "epoch": 3.14, "learning_rate": 4.843099296035569e-05, "loss": 2.4251, "step": 1084500 }, { "epoch": 3.14, "learning_rate": 4.8430269312708414e-05, "loss": 2.4343, "step": 1085000 }, { "epoch": 3.14, "learning_rate": 4.8429545665061136e-05, "loss": 2.4556, "step": 1085500 }, { "epoch": 3.14, "learning_rate": 4.842882346470916e-05, "loss": 2.4046, "step": 1086000 }, { "epoch": 3.14, "learning_rate": 4.8428101264357174e-05, "loss": 2.4043, "step": 1086500 }, { "epoch": 3.15, "learning_rate": 4.8427377616709897e-05, "loss": 2.4409, "step": 1087000 }, { "epoch": 3.15, "learning_rate": 4.842665396906262e-05, "loss": 2.4579, "step": 1087500 }, { "epoch": 3.15, "learning_rate": 4.842593032141534e-05, "loss": 2.4121, "step": 1088000 }, { "epoch": 3.15, "learning_rate": 4.842520667376806e-05, "loss": 2.4464, "step": 1088500 }, { "epoch": 3.15, "learning_rate": 4.8424483026120786e-05, "loss": 2.4233, "step": 1089000 }, { "epoch": 3.15, "learning_rate": 4.842376082576881e-05, "loss": 2.4584, "step": 1089500 }, { "epoch": 3.16, "learning_rate": 4.842303717812153e-05, "loss": 2.4302, "step": 1090000 }, { "epoch": 3.16, "learning_rate": 4.842231353047425e-05, "loss": 2.4151, "step": 1090500 }, { "epoch": 3.16, "learning_rate": 4.8421589882826975e-05, "loss": 2.43, "step": 1091000 }, { "epoch": 3.16, "learning_rate": 4.84208662351797e-05, "loss": 2.4354, "step": 1091500 }, { "epoch": 3.16, "learning_rate": 4.8420142587532426e-05, "loss": 2.4394, "step": 1092000 }, { "epoch": 3.16, "learning_rate": 4.841941893988515e-05, "loss": 2.4332, "step": 1092500 }, { "epoch": 3.16, "learning_rate": 4.841869529223787e-05, "loss": 2.4345, "step": 1093000 }, { "epoch": 3.17, "learning_rate": 4.8417973091885886e-05, "loss": 2.4262, "step": 1093500 }, { "epoch": 3.17, "learning_rate": 4.841724944423861e-05, "loss": 2.4161, "step": 1094000 }, { "epoch": 3.17, "learning_rate": 4.841652724388663e-05, "loss": 2.4201, "step": 1094500 }, { "epoch": 3.17, "learning_rate": 4.841580359623935e-05, "loss": 2.432, "step": 1095000 }, { "epoch": 3.17, "learning_rate": 4.8415079948592075e-05, "loss": 2.4254, "step": 1095500 }, { "epoch": 3.17, "learning_rate": 4.84143563009448e-05, "loss": 2.4094, "step": 1096000 }, { "epoch": 3.17, "learning_rate": 4.841363265329752e-05, "loss": 2.4188, "step": 1096500 }, { "epoch": 3.18, "learning_rate": 4.841290900565024e-05, "loss": 2.4185, "step": 1097000 }, { "epoch": 3.18, "learning_rate": 4.841218680529826e-05, "loss": 2.4332, "step": 1097500 }, { "epoch": 3.18, "learning_rate": 4.841146315765098e-05, "loss": 2.4131, "step": 1098000 }, { "epoch": 3.18, "learning_rate": 4.841073951000371e-05, "loss": 2.4287, "step": 1098500 }, { "epoch": 3.18, "learning_rate": 4.841001586235643e-05, "loss": 2.451, "step": 1099000 }, { "epoch": 3.18, "learning_rate": 4.840929221470916e-05, "loss": 2.4399, "step": 1099500 }, { "epoch": 3.18, "learning_rate": 4.840856856706188e-05, "loss": 2.4287, "step": 1100000 }, { "epoch": 3.19, "learning_rate": 4.8407844919414604e-05, "loss": 2.4255, "step": 1100500 }, { "epoch": 3.19, "learning_rate": 4.8407121271767326e-05, "loss": 2.4205, "step": 1101000 }, { "epoch": 3.19, "learning_rate": 4.840639762412005e-05, "loss": 2.4328, "step": 1101500 }, { "epoch": 3.19, "learning_rate": 4.840567397647277e-05, "loss": 2.4476, "step": 1102000 }, { "epoch": 3.19, "learning_rate": 4.840495032882549e-05, "loss": 2.4451, "step": 1102500 }, { "epoch": 3.19, "learning_rate": 4.840422812847351e-05, "loss": 2.4246, "step": 1103000 }, { "epoch": 3.19, "learning_rate": 4.840350448082623e-05, "loss": 2.4083, "step": 1103500 }, { "epoch": 3.2, "learning_rate": 4.840278083317896e-05, "loss": 2.4306, "step": 1104000 }, { "epoch": 3.2, "learning_rate": 4.840205718553168e-05, "loss": 2.4391, "step": 1104500 }, { "epoch": 3.2, "learning_rate": 4.8401333537884404e-05, "loss": 2.4241, "step": 1105000 }, { "epoch": 3.2, "learning_rate": 4.8400609890237127e-05, "loss": 2.4067, "step": 1105500 }, { "epoch": 3.2, "learning_rate": 4.839988624258985e-05, "loss": 2.4417, "step": 1106000 }, { "epoch": 3.2, "learning_rate": 4.839916259494257e-05, "loss": 2.4489, "step": 1106500 }, { "epoch": 3.2, "learning_rate": 4.83984389472953e-05, "loss": 2.4237, "step": 1107000 }, { "epoch": 3.21, "learning_rate": 4.839771529964802e-05, "loss": 2.4424, "step": 1107500 }, { "epoch": 3.21, "learning_rate": 4.839699309929604e-05, "loss": 2.4198, "step": 1108000 }, { "epoch": 3.21, "learning_rate": 4.839626945164876e-05, "loss": 2.436, "step": 1108500 }, { "epoch": 3.21, "learning_rate": 4.839554580400148e-05, "loss": 2.4163, "step": 1109000 }, { "epoch": 3.21, "learning_rate": 4.839482215635421e-05, "loss": 2.4078, "step": 1109500 }, { "epoch": 3.21, "learning_rate": 4.8394098508706933e-05, "loss": 2.4216, "step": 1110000 }, { "epoch": 3.21, "learning_rate": 4.839337630835495e-05, "loss": 2.4037, "step": 1110500 }, { "epoch": 3.22, "learning_rate": 4.839265266070767e-05, "loss": 2.4294, "step": 1111000 }, { "epoch": 3.22, "learning_rate": 4.839193046035569e-05, "loss": 2.4004, "step": 1111500 }, { "epoch": 3.22, "learning_rate": 4.839120681270841e-05, "loss": 2.3921, "step": 1112000 }, { "epoch": 3.22, "learning_rate": 4.839048316506113e-05, "loss": 2.4397, "step": 1112500 }, { "epoch": 3.22, "learning_rate": 4.838975951741386e-05, "loss": 2.4385, "step": 1113000 }, { "epoch": 3.22, "learning_rate": 4.838903586976658e-05, "loss": 2.4428, "step": 1113500 }, { "epoch": 3.22, "learning_rate": 4.83883136694146e-05, "loss": 2.4383, "step": 1114000 }, { "epoch": 3.23, "learning_rate": 4.838759002176733e-05, "loss": 2.4422, "step": 1114500 }, { "epoch": 3.23, "learning_rate": 4.838686637412005e-05, "loss": 2.4426, "step": 1115000 }, { "epoch": 3.23, "learning_rate": 4.838614272647277e-05, "loss": 2.4225, "step": 1115500 }, { "epoch": 3.23, "learning_rate": 4.8385419078825494e-05, "loss": 2.4309, "step": 1116000 }, { "epoch": 3.23, "learning_rate": 4.838469687847351e-05, "loss": 2.4215, "step": 1116500 }, { "epoch": 3.23, "learning_rate": 4.838397323082624e-05, "loss": 2.4181, "step": 1117000 }, { "epoch": 3.23, "learning_rate": 4.838324958317896e-05, "loss": 2.3943, "step": 1117500 }, { "epoch": 3.24, "learning_rate": 4.838252593553168e-05, "loss": 2.4258, "step": 1118000 }, { "epoch": 3.24, "learning_rate": 4.83818037351797e-05, "loss": 2.4429, "step": 1118500 }, { "epoch": 3.24, "learning_rate": 4.838108008753242e-05, "loss": 2.4329, "step": 1119000 }, { "epoch": 3.24, "learning_rate": 4.838035643988514e-05, "loss": 2.4246, "step": 1119500 }, { "epoch": 3.24, "learning_rate": 4.8379632792237865e-05, "loss": 2.4321, "step": 1120000 }, { "epoch": 3.24, "learning_rate": 4.837890914459059e-05, "loss": 2.4159, "step": 1120500 }, { "epoch": 3.24, "learning_rate": 4.837818549694331e-05, "loss": 2.4185, "step": 1121000 }, { "epoch": 3.25, "learning_rate": 4.837746329659133e-05, "loss": 2.414, "step": 1121500 }, { "epoch": 3.25, "learning_rate": 4.837673964894406e-05, "loss": 2.4198, "step": 1122000 }, { "epoch": 3.25, "learning_rate": 4.837601600129678e-05, "loss": 2.4408, "step": 1122500 }, { "epoch": 3.25, "learning_rate": 4.83752938009448e-05, "loss": 2.4407, "step": 1123000 }, { "epoch": 3.25, "learning_rate": 4.837457015329752e-05, "loss": 2.4362, "step": 1123500 }, { "epoch": 3.25, "learning_rate": 4.837384650565024e-05, "loss": 2.4417, "step": 1124000 }, { "epoch": 3.25, "learning_rate": 4.8373122858002965e-05, "loss": 2.4111, "step": 1124500 }, { "epoch": 3.26, "learning_rate": 4.837240065765099e-05, "loss": 2.4214, "step": 1125000 }, { "epoch": 3.26, "learning_rate": 4.837167701000371e-05, "loss": 2.4135, "step": 1125500 }, { "epoch": 3.26, "learning_rate": 4.837095336235643e-05, "loss": 2.4503, "step": 1126000 }, { "epoch": 3.26, "learning_rate": 4.8370229714709154e-05, "loss": 2.4433, "step": 1126500 }, { "epoch": 3.26, "learning_rate": 4.836950606706188e-05, "loss": 2.4278, "step": 1127000 }, { "epoch": 3.26, "learning_rate": 4.83687824194146e-05, "loss": 2.4666, "step": 1127500 }, { "epoch": 3.27, "learning_rate": 4.836805877176732e-05, "loss": 2.4303, "step": 1128000 }, { "epoch": 3.27, "learning_rate": 4.836733512412004e-05, "loss": 2.3959, "step": 1128500 }, { "epoch": 3.27, "learning_rate": 4.8366611476472766e-05, "loss": 2.3983, "step": 1129000 }, { "epoch": 3.27, "learning_rate": 4.8365887828825495e-05, "loss": 2.4182, "step": 1129500 }, { "epoch": 3.27, "learning_rate": 4.836516418117822e-05, "loss": 2.4196, "step": 1130000 }, { "epoch": 3.27, "learning_rate": 4.836444198082624e-05, "loss": 2.4087, "step": 1130500 }, { "epoch": 3.27, "learning_rate": 4.8363719780474255e-05, "loss": 2.4292, "step": 1131000 }, { "epoch": 3.28, "learning_rate": 4.836299613282698e-05, "loss": 2.4288, "step": 1131500 }, { "epoch": 3.28, "learning_rate": 4.83622724851797e-05, "loss": 2.411, "step": 1132000 }, { "epoch": 3.28, "learning_rate": 4.836154883753242e-05, "loss": 2.4312, "step": 1132500 }, { "epoch": 3.28, "learning_rate": 4.8360825189885144e-05, "loss": 2.4307, "step": 1133000 }, { "epoch": 3.28, "learning_rate": 4.8360101542237866e-05, "loss": 2.4319, "step": 1133500 }, { "epoch": 3.28, "learning_rate": 4.835937789459059e-05, "loss": 2.4174, "step": 1134000 }, { "epoch": 3.28, "learning_rate": 4.835865424694331e-05, "loss": 2.4448, "step": 1134500 }, { "epoch": 3.29, "learning_rate": 4.835793059929604e-05, "loss": 2.4286, "step": 1135000 }, { "epoch": 3.29, "learning_rate": 4.835720695164876e-05, "loss": 2.4344, "step": 1135500 }, { "epoch": 3.29, "learning_rate": 4.835648475129678e-05, "loss": 2.3902, "step": 1136000 }, { "epoch": 3.29, "learning_rate": 4.83557611036495e-05, "loss": 2.4348, "step": 1136500 }, { "epoch": 3.29, "learning_rate": 4.835503890329752e-05, "loss": 2.4417, "step": 1137000 }, { "epoch": 3.29, "learning_rate": 4.8354315255650244e-05, "loss": 2.4511, "step": 1137500 }, { "epoch": 3.29, "learning_rate": 4.8353591608002966e-05, "loss": 2.4133, "step": 1138000 }, { "epoch": 3.3, "learning_rate": 4.835286796035569e-05, "loss": 2.4115, "step": 1138500 }, { "epoch": 3.3, "learning_rate": 4.835214431270841e-05, "loss": 2.4133, "step": 1139000 }, { "epoch": 3.3, "learning_rate": 4.835142066506114e-05, "loss": 2.4293, "step": 1139500 }, { "epoch": 3.3, "learning_rate": 4.835069701741386e-05, "loss": 2.4113, "step": 1140000 }, { "epoch": 3.3, "learning_rate": 4.8349973369766584e-05, "loss": 2.4502, "step": 1140500 }, { "epoch": 3.3, "learning_rate": 4.8349249722119306e-05, "loss": 2.4369, "step": 1141000 }, { "epoch": 3.3, "learning_rate": 4.834852752176732e-05, "loss": 2.4519, "step": 1141500 }, { "epoch": 3.31, "learning_rate": 4.8347803874120044e-05, "loss": 2.454, "step": 1142000 }, { "epoch": 3.31, "learning_rate": 4.8347080226472766e-05, "loss": 2.4145, "step": 1142500 }, { "epoch": 3.31, "learning_rate": 4.834635657882549e-05, "loss": 2.4184, "step": 1143000 }, { "epoch": 3.31, "learning_rate": 4.834563293117821e-05, "loss": 2.4382, "step": 1143500 }, { "epoch": 3.31, "learning_rate": 4.834490928353094e-05, "loss": 2.4054, "step": 1144000 }, { "epoch": 3.31, "learning_rate": 4.834418563588366e-05, "loss": 2.4211, "step": 1144500 }, { "epoch": 3.31, "learning_rate": 4.834346198823639e-05, "loss": 2.4061, "step": 1145000 }, { "epoch": 3.32, "learning_rate": 4.834273978788441e-05, "loss": 2.4193, "step": 1145500 }, { "epoch": 3.32, "learning_rate": 4.834201614023713e-05, "loss": 2.4293, "step": 1146000 }, { "epoch": 3.32, "learning_rate": 4.834129249258985e-05, "loss": 2.4431, "step": 1146500 }, { "epoch": 3.32, "learning_rate": 4.834056884494257e-05, "loss": 2.4383, "step": 1147000 }, { "epoch": 3.32, "learning_rate": 4.8339845197295296e-05, "loss": 2.411, "step": 1147500 }, { "epoch": 3.32, "learning_rate": 4.833912154964802e-05, "loss": 2.4335, "step": 1148000 }, { "epoch": 3.32, "learning_rate": 4.833839790200074e-05, "loss": 2.4163, "step": 1148500 }, { "epoch": 3.33, "learning_rate": 4.833767425435346e-05, "loss": 2.4231, "step": 1149000 }, { "epoch": 3.33, "learning_rate": 4.8336952054001485e-05, "loss": 2.4378, "step": 1149500 }, { "epoch": 3.33, "learning_rate": 4.833622840635421e-05, "loss": 2.4059, "step": 1150000 }, { "epoch": 3.33, "learning_rate": 4.833550475870693e-05, "loss": 2.4374, "step": 1150500 }, { "epoch": 3.33, "learning_rate": 4.833478111105965e-05, "loss": 2.4324, "step": 1151000 }, { "epoch": 3.33, "learning_rate": 4.833405746341238e-05, "loss": 2.4312, "step": 1151500 }, { "epoch": 3.33, "learning_rate": 4.8333335263060396e-05, "loss": 2.4337, "step": 1152000 }, { "epoch": 3.34, "learning_rate": 4.833261161541312e-05, "loss": 2.4512, "step": 1152500 }, { "epoch": 3.34, "learning_rate": 4.833188796776584e-05, "loss": 2.4142, "step": 1153000 }, { "epoch": 3.34, "learning_rate": 4.833116432011857e-05, "loss": 2.4261, "step": 1153500 }, { "epoch": 3.34, "learning_rate": 4.8330442119766585e-05, "loss": 2.4344, "step": 1154000 }, { "epoch": 3.34, "learning_rate": 4.832971847211931e-05, "loss": 2.4287, "step": 1154500 }, { "epoch": 3.34, "learning_rate": 4.832899482447203e-05, "loss": 2.4232, "step": 1155000 }, { "epoch": 3.34, "learning_rate": 4.832827117682475e-05, "loss": 2.4272, "step": 1155500 }, { "epoch": 3.35, "learning_rate": 4.8327547529177474e-05, "loss": 2.4335, "step": 1156000 }, { "epoch": 3.35, "learning_rate": 4.832682532882549e-05, "loss": 2.4176, "step": 1156500 }, { "epoch": 3.35, "learning_rate": 4.832610168117822e-05, "loss": 2.4352, "step": 1157000 }, { "epoch": 3.35, "learning_rate": 4.832537803353094e-05, "loss": 2.4218, "step": 1157500 }, { "epoch": 3.35, "learning_rate": 4.832465438588366e-05, "loss": 2.4126, "step": 1158000 }, { "epoch": 3.35, "learning_rate": 4.8323930738236385e-05, "loss": 2.4224, "step": 1158500 }, { "epoch": 3.35, "learning_rate": 4.8323207090589114e-05, "loss": 2.4306, "step": 1159000 }, { "epoch": 3.36, "learning_rate": 4.8322483442941836e-05, "loss": 2.435, "step": 1159500 }, { "epoch": 3.36, "learning_rate": 4.8321762689885145e-05, "loss": 2.4143, "step": 1160000 }, { "epoch": 3.36, "learning_rate": 4.832103904223787e-05, "loss": 2.4402, "step": 1160500 }, { "epoch": 3.36, "learning_rate": 4.832031539459059e-05, "loss": 2.4185, "step": 1161000 }, { "epoch": 3.36, "learning_rate": 4.831959174694332e-05, "loss": 2.4112, "step": 1161500 }, { "epoch": 3.36, "learning_rate": 4.831886809929604e-05, "loss": 2.4638, "step": 1162000 }, { "epoch": 3.36, "learning_rate": 4.831814445164876e-05, "loss": 2.4319, "step": 1162500 }, { "epoch": 3.37, "learning_rate": 4.8317420804001485e-05, "loss": 2.4416, "step": 1163000 }, { "epoch": 3.37, "learning_rate": 4.831669715635421e-05, "loss": 2.4471, "step": 1163500 }, { "epoch": 3.37, "learning_rate": 4.831597350870693e-05, "loss": 2.436, "step": 1164000 }, { "epoch": 3.37, "learning_rate": 4.8315251308354945e-05, "loss": 2.4093, "step": 1164500 }, { "epoch": 3.37, "learning_rate": 4.831452910800297e-05, "loss": 2.4335, "step": 1165000 }, { "epoch": 3.37, "learning_rate": 4.831380546035569e-05, "loss": 2.4101, "step": 1165500 }, { "epoch": 3.38, "learning_rate": 4.831308181270841e-05, "loss": 2.4077, "step": 1166000 }, { "epoch": 3.38, "learning_rate": 4.8312358165061134e-05, "loss": 2.4213, "step": 1166500 }, { "epoch": 3.38, "learning_rate": 4.8311634517413863e-05, "loss": 2.4343, "step": 1167000 }, { "epoch": 3.38, "learning_rate": 4.8310910869766586e-05, "loss": 2.4155, "step": 1167500 }, { "epoch": 3.38, "learning_rate": 4.831018722211931e-05, "loss": 2.418, "step": 1168000 }, { "epoch": 3.38, "learning_rate": 4.830946357447203e-05, "loss": 2.4335, "step": 1168500 }, { "epoch": 3.38, "learning_rate": 4.830873992682475e-05, "loss": 2.434, "step": 1169000 }, { "epoch": 3.39, "learning_rate": 4.830801772647277e-05, "loss": 2.4335, "step": 1169500 }, { "epoch": 3.39, "learning_rate": 4.830729842071138e-05, "loss": 2.4446, "step": 1170000 }, { "epoch": 3.39, "learning_rate": 4.83065747730641e-05, "loss": 2.4432, "step": 1170500 }, { "epoch": 3.39, "learning_rate": 4.830585112541682e-05, "loss": 2.447, "step": 1171000 }, { "epoch": 3.39, "learning_rate": 4.8305127477769544e-05, "loss": 2.4355, "step": 1171500 }, { "epoch": 3.39, "learning_rate": 4.8304403830122266e-05, "loss": 2.4295, "step": 1172000 }, { "epoch": 3.39, "learning_rate": 4.8303680182474995e-05, "loss": 2.4018, "step": 1172500 }, { "epoch": 3.4, "learning_rate": 4.830295653482772e-05, "loss": 2.4415, "step": 1173000 }, { "epoch": 3.4, "learning_rate": 4.830223288718044e-05, "loss": 2.4028, "step": 1173500 }, { "epoch": 3.4, "learning_rate": 4.830150923953316e-05, "loss": 2.4311, "step": 1174000 }, { "epoch": 3.4, "learning_rate": 4.830078559188589e-05, "loss": 2.444, "step": 1174500 }, { "epoch": 3.4, "learning_rate": 4.830006194423861e-05, "loss": 2.4408, "step": 1175000 }, { "epoch": 3.4, "learning_rate": 4.8299338296591335e-05, "loss": 2.46, "step": 1175500 }, { "epoch": 3.4, "learning_rate": 4.829861464894406e-05, "loss": 2.4365, "step": 1176000 }, { "epoch": 3.41, "learning_rate": 4.829789100129678e-05, "loss": 2.4417, "step": 1176500 }, { "epoch": 3.41, "learning_rate": 4.82971673536495e-05, "loss": 2.4429, "step": 1177000 }, { "epoch": 3.41, "learning_rate": 4.8296443706002224e-05, "loss": 2.4227, "step": 1177500 }, { "epoch": 3.41, "learning_rate": 4.8295721505650246e-05, "loss": 2.4069, "step": 1178000 }, { "epoch": 3.41, "learning_rate": 4.829499785800297e-05, "loss": 2.4409, "step": 1178500 }, { "epoch": 3.41, "learning_rate": 4.829427421035569e-05, "loss": 2.426, "step": 1179000 }, { "epoch": 3.41, "learning_rate": 4.829355056270841e-05, "loss": 2.426, "step": 1179500 }, { "epoch": 3.42, "learning_rate": 4.8292826915061135e-05, "loss": 2.4345, "step": 1180000 }, { "epoch": 3.42, "learning_rate": 4.829210326741386e-05, "loss": 2.4392, "step": 1180500 }, { "epoch": 3.42, "learning_rate": 4.829138106706187e-05, "loss": 2.4197, "step": 1181000 }, { "epoch": 3.42, "learning_rate": 4.8290657419414595e-05, "loss": 2.4217, "step": 1181500 }, { "epoch": 3.42, "learning_rate": 4.8289933771767324e-05, "loss": 2.4195, "step": 1182000 }, { "epoch": 3.42, "learning_rate": 4.828921157141535e-05, "loss": 2.4332, "step": 1182500 }, { "epoch": 3.42, "learning_rate": 4.828848937106336e-05, "loss": 2.4353, "step": 1183000 }, { "epoch": 3.43, "learning_rate": 4.8287765723416084e-05, "loss": 2.4537, "step": 1183500 }, { "epoch": 3.43, "learning_rate": 4.828704207576881e-05, "loss": 2.4294, "step": 1184000 }, { "epoch": 3.43, "learning_rate": 4.828631842812153e-05, "loss": 2.4077, "step": 1184500 }, { "epoch": 3.43, "learning_rate": 4.828559478047425e-05, "loss": 2.4219, "step": 1185000 }, { "epoch": 3.43, "learning_rate": 4.828487113282697e-05, "loss": 2.4401, "step": 1185500 }, { "epoch": 3.43, "learning_rate": 4.8284147485179696e-05, "loss": 2.4377, "step": 1186000 }, { "epoch": 3.43, "learning_rate": 4.828342383753242e-05, "loss": 2.4103, "step": 1186500 }, { "epoch": 3.44, "learning_rate": 4.828270018988515e-05, "loss": 2.4433, "step": 1187000 }, { "epoch": 3.44, "learning_rate": 4.828197798953316e-05, "loss": 2.4175, "step": 1187500 }, { "epoch": 3.44, "learning_rate": 4.8281254341885885e-05, "loss": 2.438, "step": 1188000 }, { "epoch": 3.44, "learning_rate": 4.828053069423861e-05, "loss": 2.4471, "step": 1188500 }, { "epoch": 3.44, "learning_rate": 4.827980704659133e-05, "loss": 2.4164, "step": 1189000 }, { "epoch": 3.44, "learning_rate": 4.827908339894406e-05, "loss": 2.4303, "step": 1189500 }, { "epoch": 3.44, "learning_rate": 4.827835975129678e-05, "loss": 2.4189, "step": 1190000 }, { "epoch": 3.45, "learning_rate": 4.82776361036495e-05, "loss": 2.4233, "step": 1190500 }, { "epoch": 3.45, "learning_rate": 4.8276912456002225e-05, "loss": 2.4196, "step": 1191000 }, { "epoch": 3.45, "learning_rate": 4.827618880835495e-05, "loss": 2.4229, "step": 1191500 }, { "epoch": 3.45, "learning_rate": 4.827546660800297e-05, "loss": 2.4396, "step": 1192000 }, { "epoch": 3.45, "learning_rate": 4.8274744407650985e-05, "loss": 2.4434, "step": 1192500 }, { "epoch": 3.45, "learning_rate": 4.827402076000371e-05, "loss": 2.4126, "step": 1193000 }, { "epoch": 3.45, "learning_rate": 4.827329711235643e-05, "loss": 2.4403, "step": 1193500 }, { "epoch": 3.46, "learning_rate": 4.827257346470915e-05, "loss": 2.4368, "step": 1194000 }, { "epoch": 3.46, "learning_rate": 4.8271849817061874e-05, "loss": 2.4116, "step": 1194500 }, { "epoch": 3.46, "learning_rate": 4.8271127616709896e-05, "loss": 2.451, "step": 1195000 }, { "epoch": 3.46, "learning_rate": 4.827040396906262e-05, "loss": 2.4131, "step": 1195500 }, { "epoch": 3.46, "learning_rate": 4.826968032141534e-05, "loss": 2.425, "step": 1196000 }, { "epoch": 3.46, "learning_rate": 4.826895667376806e-05, "loss": 2.4327, "step": 1196500 }, { "epoch": 3.46, "learning_rate": 4.826823447341608e-05, "loss": 2.4246, "step": 1197000 }, { "epoch": 3.47, "learning_rate": 4.826751082576881e-05, "loss": 2.4216, "step": 1197500 }, { "epoch": 3.47, "learning_rate": 4.826678717812153e-05, "loss": 2.4227, "step": 1198000 }, { "epoch": 3.47, "learning_rate": 4.826606353047425e-05, "loss": 2.4432, "step": 1198500 }, { "epoch": 3.47, "learning_rate": 4.8265339882826974e-05, "loss": 2.4037, "step": 1199000 }, { "epoch": 3.47, "learning_rate": 4.8264616235179696e-05, "loss": 2.423, "step": 1199500 }, { "epoch": 3.47, "learning_rate": 4.826389403482772e-05, "loss": 2.4477, "step": 1200000 }, { "epoch": 3.47, "learning_rate": 4.826317038718044e-05, "loss": 2.4238, "step": 1200500 }, { "epoch": 3.48, "learning_rate": 4.826244673953316e-05, "loss": 2.4316, "step": 1201000 }, { "epoch": 3.48, "learning_rate": 4.8261723091885885e-05, "loss": 2.4349, "step": 1201500 }, { "epoch": 3.48, "learning_rate": 4.826099944423861e-05, "loss": 2.4383, "step": 1202000 }, { "epoch": 3.48, "learning_rate": 4.826027579659133e-05, "loss": 2.4125, "step": 1202500 }, { "epoch": 3.48, "learning_rate": 4.825955214894405e-05, "loss": 2.4545, "step": 1203000 }, { "epoch": 3.48, "learning_rate": 4.8258828501296774e-05, "loss": 2.4122, "step": 1203500 }, { "epoch": 3.49, "learning_rate": 4.8258104853649497e-05, "loss": 2.4183, "step": 1204000 }, { "epoch": 3.49, "learning_rate": 4.8257381206002226e-05, "loss": 2.4182, "step": 1204500 }, { "epoch": 3.49, "learning_rate": 4.825665900565025e-05, "loss": 2.4351, "step": 1205000 }, { "epoch": 3.49, "learning_rate": 4.825593535800297e-05, "loss": 2.4204, "step": 1205500 }, { "epoch": 3.49, "learning_rate": 4.825521171035569e-05, "loss": 2.416, "step": 1206000 }, { "epoch": 3.49, "learning_rate": 4.8254488062708415e-05, "loss": 2.431, "step": 1206500 }, { "epoch": 3.49, "learning_rate": 4.825376586235643e-05, "loss": 2.4293, "step": 1207000 }, { "epoch": 3.5, "learning_rate": 4.825304221470915e-05, "loss": 2.4246, "step": 1207500 }, { "epoch": 3.5, "learning_rate": 4.8252318567061875e-05, "loss": 2.4058, "step": 1208000 }, { "epoch": 3.5, "learning_rate": 4.82515949194146e-05, "loss": 2.4322, "step": 1208500 }, { "epoch": 3.5, "learning_rate": 4.8250871271767326e-05, "loss": 2.4452, "step": 1209000 }, { "epoch": 3.5, "learning_rate": 4.825014762412005e-05, "loss": 2.4331, "step": 1209500 }, { "epoch": 3.5, "learning_rate": 4.8249425423768064e-05, "loss": 2.435, "step": 1210000 }, { "epoch": 3.5, "learning_rate": 4.8248701776120786e-05, "loss": 2.4215, "step": 1210500 }, { "epoch": 3.51, "learning_rate": 4.824797812847351e-05, "loss": 2.424, "step": 1211000 }, { "epoch": 3.51, "learning_rate": 4.824725448082623e-05, "loss": 2.4337, "step": 1211500 }, { "epoch": 3.51, "learning_rate": 4.824653083317896e-05, "loss": 2.4117, "step": 1212000 }, { "epoch": 3.51, "learning_rate": 4.824580718553168e-05, "loss": 2.4298, "step": 1212500 }, { "epoch": 3.51, "learning_rate": 4.82450849851797e-05, "loss": 2.4275, "step": 1213000 }, { "epoch": 3.51, "learning_rate": 4.8244361337532426e-05, "loss": 2.4143, "step": 1213500 }, { "epoch": 3.51, "learning_rate": 4.824363768988515e-05, "loss": 2.4143, "step": 1214000 }, { "epoch": 3.52, "learning_rate": 4.824291404223787e-05, "loss": 2.4756, "step": 1214500 }, { "epoch": 3.52, "learning_rate": 4.8242191841885886e-05, "loss": 2.4193, "step": 1215000 }, { "epoch": 3.52, "learning_rate": 4.824146819423861e-05, "loss": 2.4218, "step": 1215500 }, { "epoch": 3.52, "learning_rate": 4.8240745993886624e-05, "loss": 2.4228, "step": 1216000 }, { "epoch": 3.52, "learning_rate": 4.824002234623935e-05, "loss": 2.4055, "step": 1216500 }, { "epoch": 3.52, "learning_rate": 4.8239298698592075e-05, "loss": 2.461, "step": 1217000 }, { "epoch": 3.52, "learning_rate": 4.82385750509448e-05, "loss": 2.424, "step": 1217500 }, { "epoch": 3.53, "learning_rate": 4.823785140329752e-05, "loss": 2.4423, "step": 1218000 }, { "epoch": 3.53, "learning_rate": 4.8237129202945535e-05, "loss": 2.421, "step": 1218500 }, { "epoch": 3.53, "learning_rate": 4.823640700259355e-05, "loss": 2.4237, "step": 1219000 }, { "epoch": 3.53, "learning_rate": 4.823568335494627e-05, "loss": 2.4117, "step": 1219500 }, { "epoch": 3.53, "learning_rate": 4.8234959707299e-05, "loss": 2.4164, "step": 1220000 }, { "epoch": 3.53, "learning_rate": 4.8234236059651724e-05, "loss": 2.4516, "step": 1220500 }, { "epoch": 3.53, "learning_rate": 4.823351241200445e-05, "loss": 2.4339, "step": 1221000 }, { "epoch": 3.54, "learning_rate": 4.8232788764357176e-05, "loss": 2.4274, "step": 1221500 }, { "epoch": 3.54, "learning_rate": 4.82320651167099e-05, "loss": 2.4402, "step": 1222000 }, { "epoch": 3.54, "learning_rate": 4.823134146906262e-05, "loss": 2.4327, "step": 1222500 }, { "epoch": 3.54, "learning_rate": 4.8230619268710636e-05, "loss": 2.4276, "step": 1223000 }, { "epoch": 3.54, "learning_rate": 4.822989562106336e-05, "loss": 2.4445, "step": 1223500 }, { "epoch": 3.54, "learning_rate": 4.822917197341608e-05, "loss": 2.404, "step": 1224000 }, { "epoch": 3.54, "learning_rate": 4.82284483257688e-05, "loss": 2.4296, "step": 1224500 }, { "epoch": 3.55, "learning_rate": 4.8227724678121525e-05, "loss": 2.4391, "step": 1225000 }, { "epoch": 3.55, "learning_rate": 4.8227001030474254e-05, "loss": 2.4312, "step": 1225500 }, { "epoch": 3.55, "learning_rate": 4.8226277382826976e-05, "loss": 2.4328, "step": 1226000 }, { "epoch": 3.55, "learning_rate": 4.82255537351797e-05, "loss": 2.4149, "step": 1226500 }, { "epoch": 3.55, "learning_rate": 4.822483008753243e-05, "loss": 2.4247, "step": 1227000 }, { "epoch": 3.55, "learning_rate": 4.822410788718044e-05, "loss": 2.4215, "step": 1227500 }, { "epoch": 3.55, "learning_rate": 4.8223384239533165e-05, "loss": 2.4079, "step": 1228000 }, { "epoch": 3.56, "learning_rate": 4.822266059188589e-05, "loss": 2.4283, "step": 1228500 }, { "epoch": 3.56, "learning_rate": 4.822193694423861e-05, "loss": 2.4042, "step": 1229000 }, { "epoch": 3.56, "learning_rate": 4.822121329659133e-05, "loss": 2.3948, "step": 1229500 }, { "epoch": 3.56, "learning_rate": 4.8220489648944054e-05, "loss": 2.4235, "step": 1230000 }, { "epoch": 3.56, "learning_rate": 4.8219766001296776e-05, "loss": 2.4266, "step": 1230500 }, { "epoch": 3.56, "learning_rate": 4.8219042353649505e-05, "loss": 2.4235, "step": 1231000 }, { "epoch": 3.56, "learning_rate": 4.821832015329752e-05, "loss": 2.3996, "step": 1231500 }, { "epoch": 3.57, "learning_rate": 4.8217597952945536e-05, "loss": 2.436, "step": 1232000 }, { "epoch": 3.57, "learning_rate": 4.821687575259355e-05, "loss": 2.4567, "step": 1232500 }, { "epoch": 3.57, "learning_rate": 4.8216152104946274e-05, "loss": 2.444, "step": 1233000 }, { "epoch": 3.57, "learning_rate": 4.8215428457299e-05, "loss": 2.433, "step": 1233500 }, { "epoch": 3.57, "learning_rate": 4.8214704809651725e-05, "loss": 2.4231, "step": 1234000 }, { "epoch": 3.57, "learning_rate": 4.821398116200445e-05, "loss": 2.4273, "step": 1234500 }, { "epoch": 3.57, "learning_rate": 4.8213257514357176e-05, "loss": 2.417, "step": 1235000 }, { "epoch": 3.58, "learning_rate": 4.82125338667099e-05, "loss": 2.4329, "step": 1235500 }, { "epoch": 3.58, "learning_rate": 4.821181021906262e-05, "loss": 2.3982, "step": 1236000 }, { "epoch": 3.58, "learning_rate": 4.8211088018710636e-05, "loss": 2.423, "step": 1236500 }, { "epoch": 3.58, "learning_rate": 4.821036437106336e-05, "loss": 2.4177, "step": 1237000 }, { "epoch": 3.58, "learning_rate": 4.820964072341608e-05, "loss": 2.4401, "step": 1237500 }, { "epoch": 3.58, "learning_rate": 4.82089170757688e-05, "loss": 2.4101, "step": 1238000 }, { "epoch": 3.58, "learning_rate": 4.8208193428121525e-05, "loss": 2.4292, "step": 1238500 }, { "epoch": 3.59, "learning_rate": 4.8207469780474254e-05, "loss": 2.4192, "step": 1239000 }, { "epoch": 3.59, "learning_rate": 4.8206746132826977e-05, "loss": 2.417, "step": 1239500 }, { "epoch": 3.59, "learning_rate": 4.82060224851797e-05, "loss": 2.445, "step": 1240000 }, { "epoch": 3.59, "learning_rate": 4.820530173212301e-05, "loss": 2.4244, "step": 1240500 }, { "epoch": 3.59, "learning_rate": 4.820457953177103e-05, "loss": 2.4335, "step": 1241000 }, { "epoch": 3.59, "learning_rate": 4.820385588412375e-05, "loss": 2.4446, "step": 1241500 }, { "epoch": 3.6, "learning_rate": 4.8203132236476474e-05, "loss": 2.4007, "step": 1242000 }, { "epoch": 3.6, "learning_rate": 4.8202408588829203e-05, "loss": 2.4339, "step": 1242500 }, { "epoch": 3.6, "learning_rate": 4.8201684941181926e-05, "loss": 2.4158, "step": 1243000 }, { "epoch": 3.6, "learning_rate": 4.820096129353465e-05, "loss": 2.4197, "step": 1243500 }, { "epoch": 3.6, "learning_rate": 4.8200239093182663e-05, "loss": 2.4168, "step": 1244000 }, { "epoch": 3.6, "learning_rate": 4.8199515445535386e-05, "loss": 2.4229, "step": 1244500 }, { "epoch": 3.6, "learning_rate": 4.819879179788811e-05, "loss": 2.4032, "step": 1245000 }, { "epoch": 3.61, "learning_rate": 4.819806815024083e-05, "loss": 2.4437, "step": 1245500 }, { "epoch": 3.61, "learning_rate": 4.819734594988885e-05, "loss": 2.4356, "step": 1246000 }, { "epoch": 3.61, "learning_rate": 4.8196622302241575e-05, "loss": 2.4037, "step": 1246500 }, { "epoch": 3.61, "learning_rate": 4.81958986545943e-05, "loss": 2.4229, "step": 1247000 }, { "epoch": 3.61, "learning_rate": 4.819517500694702e-05, "loss": 2.4333, "step": 1247500 }, { "epoch": 3.61, "learning_rate": 4.819445135929974e-05, "loss": 2.4445, "step": 1248000 }, { "epoch": 3.61, "learning_rate": 4.8193727711652464e-05, "loss": 2.4335, "step": 1248500 }, { "epoch": 3.62, "learning_rate": 4.819300551130048e-05, "loss": 2.4567, "step": 1249000 }, { "epoch": 3.62, "learning_rate": 4.81922818636532e-05, "loss": 2.4338, "step": 1249500 }, { "epoch": 3.62, "learning_rate": 4.819155821600593e-05, "loss": 2.4366, "step": 1250000 }, { "epoch": 3.62, "learning_rate": 4.819083456835865e-05, "loss": 2.4331, "step": 1250500 }, { "epoch": 3.62, "learning_rate": 4.8190112368006675e-05, "loss": 2.4288, "step": 1251000 }, { "epoch": 3.62, "learning_rate": 4.81893887203594e-05, "loss": 2.425, "step": 1251500 }, { "epoch": 3.62, "learning_rate": 4.818866507271212e-05, "loss": 2.4575, "step": 1252000 }, { "epoch": 3.63, "learning_rate": 4.818794142506484e-05, "loss": 2.4143, "step": 1252500 }, { "epoch": 3.63, "learning_rate": 4.8187217777417564e-05, "loss": 2.4364, "step": 1253000 }, { "epoch": 3.63, "learning_rate": 4.8186494129770286e-05, "loss": 2.4026, "step": 1253500 }, { "epoch": 3.63, "learning_rate": 4.818577048212301e-05, "loss": 2.4256, "step": 1254000 }, { "epoch": 3.63, "learning_rate": 4.818504683447573e-05, "loss": 2.4331, "step": 1254500 }, { "epoch": 3.63, "learning_rate": 4.818432318682845e-05, "loss": 2.4328, "step": 1255000 }, { "epoch": 3.63, "learning_rate": 4.818359953918118e-05, "loss": 2.4224, "step": 1255500 }, { "epoch": 3.64, "learning_rate": 4.81828773388292e-05, "loss": 2.4352, "step": 1256000 }, { "epoch": 3.64, "learning_rate": 4.818215369118192e-05, "loss": 2.4238, "step": 1256500 }, { "epoch": 3.64, "learning_rate": 4.8181431490829935e-05, "loss": 2.422, "step": 1257000 }, { "epoch": 3.64, "learning_rate": 4.8180707843182664e-05, "loss": 2.4191, "step": 1257500 }, { "epoch": 3.64, "learning_rate": 4.8179984195535387e-05, "loss": 2.4395, "step": 1258000 }, { "epoch": 3.64, "learning_rate": 4.817926054788811e-05, "loss": 2.4337, "step": 1258500 }, { "epoch": 3.64, "learning_rate": 4.817853690024083e-05, "loss": 2.4176, "step": 1259000 }, { "epoch": 3.65, "learning_rate": 4.817781469988885e-05, "loss": 2.4236, "step": 1259500 }, { "epoch": 3.65, "learning_rate": 4.8177091052241576e-05, "loss": 2.4154, "step": 1260000 }, { "epoch": 3.65, "learning_rate": 4.817636885188959e-05, "loss": 2.4269, "step": 1260500 }, { "epoch": 3.65, "learning_rate": 4.817564520424231e-05, "loss": 2.4117, "step": 1261000 }, { "epoch": 3.65, "learning_rate": 4.8174921556595036e-05, "loss": 2.4116, "step": 1261500 }, { "epoch": 3.65, "learning_rate": 4.817419790894776e-05, "loss": 2.4261, "step": 1262000 }, { "epoch": 3.65, "learning_rate": 4.817347426130048e-05, "loss": 2.4448, "step": 1262500 }, { "epoch": 3.66, "learning_rate": 4.817275061365321e-05, "loss": 2.447, "step": 1263000 }, { "epoch": 3.66, "learning_rate": 4.817202696600593e-05, "loss": 2.4519, "step": 1263500 }, { "epoch": 3.66, "learning_rate": 4.8171303318358654e-05, "loss": 2.4223, "step": 1264000 }, { "epoch": 3.66, "learning_rate": 4.8170579670711376e-05, "loss": 2.4382, "step": 1264500 }, { "epoch": 3.66, "learning_rate": 4.8169856023064105e-05, "loss": 2.4353, "step": 1265000 }, { "epoch": 3.66, "learning_rate": 4.816913237541683e-05, "loss": 2.4163, "step": 1265500 }, { "epoch": 3.66, "learning_rate": 4.816840872776955e-05, "loss": 2.433, "step": 1266000 }, { "epoch": 3.67, "learning_rate": 4.8167686527417565e-05, "loss": 2.4141, "step": 1266500 }, { "epoch": 3.67, "learning_rate": 4.816696287977029e-05, "loss": 2.4328, "step": 1267000 }, { "epoch": 3.67, "learning_rate": 4.816623923212301e-05, "loss": 2.4732, "step": 1267500 }, { "epoch": 3.67, "learning_rate": 4.816551558447573e-05, "loss": 2.4206, "step": 1268000 }, { "epoch": 3.67, "learning_rate": 4.816479193682846e-05, "loss": 2.4173, "step": 1268500 }, { "epoch": 3.67, "learning_rate": 4.816406828918118e-05, "loss": 2.4316, "step": 1269000 }, { "epoch": 3.67, "learning_rate": 4.81633460888292e-05, "loss": 2.4174, "step": 1269500 }, { "epoch": 3.68, "learning_rate": 4.816262244118192e-05, "loss": 2.434, "step": 1270000 }, { "epoch": 3.68, "learning_rate": 4.816189879353464e-05, "loss": 2.4408, "step": 1270500 }, { "epoch": 3.68, "learning_rate": 4.8161175145887365e-05, "loss": 2.4026, "step": 1271000 }, { "epoch": 3.68, "learning_rate": 4.816045149824009e-05, "loss": 2.4296, "step": 1271500 }, { "epoch": 3.68, "learning_rate": 4.815972929788811e-05, "loss": 2.3987, "step": 1272000 }, { "epoch": 3.68, "learning_rate": 4.815900565024083e-05, "loss": 2.4176, "step": 1272500 }, { "epoch": 3.68, "learning_rate": 4.815828200259356e-05, "loss": 2.423, "step": 1273000 }, { "epoch": 3.69, "learning_rate": 4.815755835494628e-05, "loss": 2.4257, "step": 1273500 }, { "epoch": 3.69, "learning_rate": 4.8156834707299005e-05, "loss": 2.4294, "step": 1274000 }, { "epoch": 3.69, "learning_rate": 4.815611105965173e-05, "loss": 2.429, "step": 1274500 }, { "epoch": 3.69, "learning_rate": 4.815538741200445e-05, "loss": 2.4067, "step": 1275000 }, { "epoch": 3.69, "learning_rate": 4.815466376435717e-05, "loss": 2.4233, "step": 1275500 }, { "epoch": 3.69, "learning_rate": 4.815394156400519e-05, "loss": 2.3878, "step": 1276000 }, { "epoch": 3.69, "learning_rate": 4.815321791635791e-05, "loss": 2.4257, "step": 1276500 }, { "epoch": 3.7, "learning_rate": 4.815249426871063e-05, "loss": 2.4187, "step": 1277000 }, { "epoch": 3.7, "learning_rate": 4.815177062106336e-05, "loss": 2.458, "step": 1277500 }, { "epoch": 3.7, "learning_rate": 4.8151048420711377e-05, "loss": 2.4246, "step": 1278000 }, { "epoch": 3.7, "learning_rate": 4.81503247730641e-05, "loss": 2.4596, "step": 1278500 }, { "epoch": 3.7, "learning_rate": 4.814960112541682e-05, "loss": 2.4157, "step": 1279000 }, { "epoch": 3.7, "learning_rate": 4.814887747776954e-05, "loss": 2.4042, "step": 1279500 }, { "epoch": 3.71, "learning_rate": 4.814815383012227e-05, "loss": 2.4086, "step": 1280000 }, { "epoch": 3.71, "learning_rate": 4.814743162977029e-05, "loss": 2.4344, "step": 1280500 }, { "epoch": 3.71, "learning_rate": 4.814670798212301e-05, "loss": 2.4476, "step": 1281000 }, { "epoch": 3.71, "learning_rate": 4.814598578177103e-05, "loss": 2.4386, "step": 1281500 }, { "epoch": 3.71, "learning_rate": 4.8145262134123755e-05, "loss": 2.4211, "step": 1282000 }, { "epoch": 3.71, "learning_rate": 4.814453848647648e-05, "loss": 2.4258, "step": 1282500 }, { "epoch": 3.71, "learning_rate": 4.81438148388292e-05, "loss": 2.415, "step": 1283000 }, { "epoch": 3.72, "learning_rate": 4.814309119118192e-05, "loss": 2.4173, "step": 1283500 }, { "epoch": 3.72, "learning_rate": 4.8142367543534644e-05, "loss": 2.4412, "step": 1284000 }, { "epoch": 3.72, "learning_rate": 4.814164534318266e-05, "loss": 2.4238, "step": 1284500 }, { "epoch": 3.72, "learning_rate": 4.814092169553539e-05, "loss": 2.4145, "step": 1285000 }, { "epoch": 3.72, "learning_rate": 4.814019804788811e-05, "loss": 2.4287, "step": 1285500 }, { "epoch": 3.72, "learning_rate": 4.813947440024083e-05, "loss": 2.4186, "step": 1286000 }, { "epoch": 3.72, "learning_rate": 4.8138750752593555e-05, "loss": 2.4485, "step": 1286500 }, { "epoch": 3.73, "learning_rate": 4.813802710494628e-05, "loss": 2.4074, "step": 1287000 }, { "epoch": 3.73, "learning_rate": 4.8137303457299006e-05, "loss": 2.4174, "step": 1287500 }, { "epoch": 3.73, "learning_rate": 4.813657980965173e-05, "loss": 2.43, "step": 1288000 }, { "epoch": 3.73, "learning_rate": 4.813585616200445e-05, "loss": 2.3858, "step": 1288500 }, { "epoch": 3.73, "learning_rate": 4.813513251435717e-05, "loss": 2.4399, "step": 1289000 }, { "epoch": 3.73, "learning_rate": 4.8134408866709895e-05, "loss": 2.4221, "step": 1289500 }, { "epoch": 3.73, "learning_rate": 4.813368666635791e-05, "loss": 2.4263, "step": 1290000 }, { "epoch": 3.74, "learning_rate": 4.813296301871064e-05, "loss": 2.4412, "step": 1290500 }, { "epoch": 3.74, "learning_rate": 4.813223937106336e-05, "loss": 2.4292, "step": 1291000 }, { "epoch": 3.74, "learning_rate": 4.8131515723416084e-05, "loss": 2.4214, "step": 1291500 }, { "epoch": 3.74, "learning_rate": 4.8130792075768806e-05, "loss": 2.4352, "step": 1292000 }, { "epoch": 3.74, "learning_rate": 4.813006842812153e-05, "loss": 2.4199, "step": 1292500 }, { "epoch": 3.74, "learning_rate": 4.812934478047425e-05, "loss": 2.4381, "step": 1293000 }, { "epoch": 3.74, "learning_rate": 4.812862113282697e-05, "loss": 2.4331, "step": 1293500 }, { "epoch": 3.75, "learning_rate": 4.8127897485179695e-05, "loss": 2.4357, "step": 1294000 }, { "epoch": 3.75, "learning_rate": 4.812717528482771e-05, "loss": 2.4215, "step": 1294500 }, { "epoch": 3.75, "learning_rate": 4.812645163718044e-05, "loss": 2.415, "step": 1295000 }, { "epoch": 3.75, "learning_rate": 4.812572798953316e-05, "loss": 2.4224, "step": 1295500 }, { "epoch": 3.75, "learning_rate": 4.8125004341885884e-05, "loss": 2.446, "step": 1296000 }, { "epoch": 3.75, "learning_rate": 4.8124282141533907e-05, "loss": 2.4502, "step": 1296500 }, { "epoch": 3.75, "learning_rate": 4.812355849388663e-05, "loss": 2.4429, "step": 1297000 }, { "epoch": 3.76, "learning_rate": 4.812283484623935e-05, "loss": 2.4396, "step": 1297500 }, { "epoch": 3.76, "learning_rate": 4.812211119859207e-05, "loss": 2.4352, "step": 1298000 }, { "epoch": 3.76, "learning_rate": 4.8121387550944795e-05, "loss": 2.4425, "step": 1298500 }, { "epoch": 3.76, "learning_rate": 4.812066390329752e-05, "loss": 2.4317, "step": 1299000 }, { "epoch": 3.76, "learning_rate": 4.811994025565024e-05, "loss": 2.44, "step": 1299500 }, { "epoch": 3.76, "learning_rate": 4.811921660800296e-05, "loss": 2.4341, "step": 1300000 }, { "epoch": 3.76, "learning_rate": 4.8118492960355684e-05, "loss": 2.4326, "step": 1300500 }, { "epoch": 3.77, "learning_rate": 4.8117769312708413e-05, "loss": 2.4014, "step": 1301000 }, { "epoch": 3.77, "learning_rate": 4.8117045665061136e-05, "loss": 2.4141, "step": 1301500 }, { "epoch": 3.77, "learning_rate": 4.811632346470916e-05, "loss": 2.4218, "step": 1302000 }, { "epoch": 3.77, "learning_rate": 4.8115601264357174e-05, "loss": 2.4317, "step": 1302500 }, { "epoch": 3.77, "learning_rate": 4.8114877616709896e-05, "loss": 2.4316, "step": 1303000 }, { "epoch": 3.77, "learning_rate": 4.811415396906262e-05, "loss": 2.4366, "step": 1303500 }, { "epoch": 3.77, "learning_rate": 4.811343176871064e-05, "loss": 2.4316, "step": 1304000 }, { "epoch": 3.78, "learning_rate": 4.811270812106336e-05, "loss": 2.4032, "step": 1304500 }, { "epoch": 3.78, "learning_rate": 4.8111984473416085e-05, "loss": 2.4203, "step": 1305000 }, { "epoch": 3.78, "learning_rate": 4.811126082576881e-05, "loss": 2.4244, "step": 1305500 }, { "epoch": 3.78, "learning_rate": 4.811053717812153e-05, "loss": 2.4367, "step": 1306000 }, { "epoch": 3.78, "learning_rate": 4.810981353047425e-05, "loss": 2.4347, "step": 1306500 }, { "epoch": 3.78, "learning_rate": 4.8109089882826974e-05, "loss": 2.4255, "step": 1307000 }, { "epoch": 3.78, "learning_rate": 4.810836768247499e-05, "loss": 2.4211, "step": 1307500 }, { "epoch": 3.79, "learning_rate": 4.810764403482771e-05, "loss": 2.428, "step": 1308000 }, { "epoch": 3.79, "learning_rate": 4.810692038718044e-05, "loss": 2.4268, "step": 1308500 }, { "epoch": 3.79, "learning_rate": 4.810619673953316e-05, "loss": 2.4191, "step": 1309000 }, { "epoch": 3.79, "learning_rate": 4.810547309188589e-05, "loss": 2.4293, "step": 1309500 }, { "epoch": 3.79, "learning_rate": 4.8104749444238614e-05, "loss": 2.4387, "step": 1310000 }, { "epoch": 3.79, "learning_rate": 4.8104025796591336e-05, "loss": 2.4398, "step": 1310500 }, { "epoch": 3.79, "learning_rate": 4.810330214894406e-05, "loss": 2.4295, "step": 1311000 }, { "epoch": 3.8, "learning_rate": 4.810257850129678e-05, "loss": 2.441, "step": 1311500 }, { "epoch": 3.8, "learning_rate": 4.81018548536495e-05, "loss": 2.4312, "step": 1312000 }, { "epoch": 3.8, "learning_rate": 4.8101131206002225e-05, "loss": 2.4329, "step": 1312500 }, { "epoch": 3.8, "learning_rate": 4.810040755835495e-05, "loss": 2.4419, "step": 1313000 }, { "epoch": 3.8, "learning_rate": 4.809968535800296e-05, "loss": 2.421, "step": 1313500 }, { "epoch": 3.8, "learning_rate": 4.8098963157650985e-05, "loss": 2.4071, "step": 1314000 }, { "epoch": 3.8, "learning_rate": 4.809823951000371e-05, "loss": 2.4252, "step": 1314500 }, { "epoch": 3.81, "learning_rate": 4.809751586235643e-05, "loss": 2.417, "step": 1315000 }, { "epoch": 3.81, "learning_rate": 4.809679221470915e-05, "loss": 2.4127, "step": 1315500 }, { "epoch": 3.81, "learning_rate": 4.8096068567061874e-05, "loss": 2.428, "step": 1316000 }, { "epoch": 3.81, "learning_rate": 4.8095344919414596e-05, "loss": 2.4273, "step": 1316500 }, { "epoch": 3.81, "learning_rate": 4.8094621271767325e-05, "loss": 2.4146, "step": 1317000 }, { "epoch": 3.81, "learning_rate": 4.809389762412005e-05, "loss": 2.4411, "step": 1317500 }, { "epoch": 3.82, "learning_rate": 4.809317542376806e-05, "loss": 2.4393, "step": 1318000 }, { "epoch": 3.82, "learning_rate": 4.8092453223416086e-05, "loss": 2.4286, "step": 1318500 }, { "epoch": 3.82, "learning_rate": 4.80917310230641e-05, "loss": 2.3934, "step": 1319000 }, { "epoch": 3.82, "learning_rate": 4.8091007375416823e-05, "loss": 2.4268, "step": 1319500 }, { "epoch": 3.82, "learning_rate": 4.8090283727769546e-05, "loss": 2.4266, "step": 1320000 }, { "epoch": 3.82, "learning_rate": 4.808956008012227e-05, "loss": 2.3891, "step": 1320500 }, { "epoch": 3.82, "learning_rate": 4.808883643247499e-05, "loss": 2.422, "step": 1321000 }, { "epoch": 3.83, "learning_rate": 4.808811278482772e-05, "loss": 2.4301, "step": 1321500 }, { "epoch": 3.83, "learning_rate": 4.8087390584475735e-05, "loss": 2.4249, "step": 1322000 }, { "epoch": 3.83, "learning_rate": 4.808666693682846e-05, "loss": 2.4192, "step": 1322500 }, { "epoch": 3.83, "learning_rate": 4.808594328918118e-05, "loss": 2.421, "step": 1323000 }, { "epoch": 3.83, "learning_rate": 4.80852196415339e-05, "loss": 2.4379, "step": 1323500 }, { "epoch": 3.83, "learning_rate": 4.8084495993886624e-05, "loss": 2.4508, "step": 1324000 }, { "epoch": 3.83, "learning_rate": 4.8083772346239346e-05, "loss": 2.4288, "step": 1324500 }, { "epoch": 3.84, "learning_rate": 4.8083048698592075e-05, "loss": 2.4091, "step": 1325000 }, { "epoch": 3.84, "learning_rate": 4.80823250509448e-05, "loss": 2.4314, "step": 1325500 }, { "epoch": 3.84, "learning_rate": 4.808160140329752e-05, "loss": 2.4258, "step": 1326000 }, { "epoch": 3.84, "learning_rate": 4.808087920294554e-05, "loss": 2.4267, "step": 1326500 }, { "epoch": 3.84, "learning_rate": 4.8080155555298264e-05, "loss": 2.4119, "step": 1327000 }, { "epoch": 3.84, "learning_rate": 4.8079431907650986e-05, "loss": 2.4082, "step": 1327500 }, { "epoch": 3.84, "learning_rate": 4.807870826000371e-05, "loss": 2.3922, "step": 1328000 }, { "epoch": 3.85, "learning_rate": 4.807798461235643e-05, "loss": 2.4193, "step": 1328500 }, { "epoch": 3.85, "learning_rate": 4.807726096470915e-05, "loss": 2.4151, "step": 1329000 }, { "epoch": 3.85, "learning_rate": 4.8076537317061875e-05, "loss": 2.396, "step": 1329500 }, { "epoch": 3.85, "learning_rate": 4.807581511670989e-05, "loss": 2.4071, "step": 1330000 }, { "epoch": 3.85, "learning_rate": 4.807509146906262e-05, "loss": 2.4629, "step": 1330500 }, { "epoch": 3.85, "learning_rate": 4.807436782141534e-05, "loss": 2.4507, "step": 1331000 }, { "epoch": 3.85, "learning_rate": 4.8073644173768064e-05, "loss": 2.4299, "step": 1331500 }, { "epoch": 3.86, "learning_rate": 4.807292052612079e-05, "loss": 2.4352, "step": 1332000 }, { "epoch": 3.86, "learning_rate": 4.8072196878473515e-05, "loss": 2.417, "step": 1332500 }, { "epoch": 3.86, "learning_rate": 4.807147323082624e-05, "loss": 2.3957, "step": 1333000 }, { "epoch": 3.86, "learning_rate": 4.807074958317896e-05, "loss": 2.4212, "step": 1333500 }, { "epoch": 3.86, "learning_rate": 4.807002593553168e-05, "loss": 2.411, "step": 1334000 }, { "epoch": 3.86, "learning_rate": 4.8069302287884404e-05, "loss": 2.4211, "step": 1334500 }, { "epoch": 3.86, "learning_rate": 4.8068578640237126e-05, "loss": 2.4157, "step": 1335000 }, { "epoch": 3.87, "learning_rate": 4.806785788718044e-05, "loss": 2.4459, "step": 1335500 }, { "epoch": 3.87, "learning_rate": 4.8067134239533164e-05, "loss": 2.4213, "step": 1336000 }, { "epoch": 3.87, "learning_rate": 4.8066410591885887e-05, "loss": 2.4198, "step": 1336500 }, { "epoch": 3.87, "learning_rate": 4.806568694423861e-05, "loss": 2.4189, "step": 1337000 }, { "epoch": 3.87, "learning_rate": 4.806496329659133e-05, "loss": 2.4328, "step": 1337500 }, { "epoch": 3.87, "learning_rate": 4.806423964894405e-05, "loss": 2.4289, "step": 1338000 }, { "epoch": 3.87, "learning_rate": 4.8063516001296776e-05, "loss": 2.414, "step": 1338500 }, { "epoch": 3.88, "learning_rate": 4.80627923536495e-05, "loss": 2.3961, "step": 1339000 }, { "epoch": 3.88, "learning_rate": 4.806206870600223e-05, "loss": 2.4342, "step": 1339500 }, { "epoch": 3.88, "learning_rate": 4.806134505835495e-05, "loss": 2.4338, "step": 1340000 }, { "epoch": 3.88, "learning_rate": 4.806062285800297e-05, "loss": 2.3912, "step": 1340500 }, { "epoch": 3.88, "learning_rate": 4.8059899210355694e-05, "loss": 2.4179, "step": 1341000 }, { "epoch": 3.88, "learning_rate": 4.8059175562708416e-05, "loss": 2.4317, "step": 1341500 }, { "epoch": 3.88, "learning_rate": 4.805845191506114e-05, "loss": 2.419, "step": 1342000 }, { "epoch": 3.89, "learning_rate": 4.805772826741386e-05, "loss": 2.4045, "step": 1342500 }, { "epoch": 3.89, "learning_rate": 4.8057006067061876e-05, "loss": 2.4215, "step": 1343000 }, { "epoch": 3.89, "learning_rate": 4.80562824194146e-05, "loss": 2.4569, "step": 1343500 }, { "epoch": 3.89, "learning_rate": 4.805555877176732e-05, "loss": 2.4238, "step": 1344000 }, { "epoch": 3.89, "learning_rate": 4.805483512412004e-05, "loss": 2.4439, "step": 1344500 }, { "epoch": 3.89, "learning_rate": 4.805411147647277e-05, "loss": 2.4197, "step": 1345000 }, { "epoch": 3.89, "learning_rate": 4.805338927612079e-05, "loss": 2.4259, "step": 1345500 }, { "epoch": 3.9, "learning_rate": 4.805266562847351e-05, "loss": 2.4058, "step": 1346000 }, { "epoch": 3.9, "learning_rate": 4.805194198082623e-05, "loss": 2.3936, "step": 1346500 }, { "epoch": 3.9, "learning_rate": 4.805121833317896e-05, "loss": 2.4384, "step": 1347000 }, { "epoch": 3.9, "learning_rate": 4.8050496132826976e-05, "loss": 2.4298, "step": 1347500 }, { "epoch": 3.9, "learning_rate": 4.80497724851797e-05, "loss": 2.4311, "step": 1348000 }, { "epoch": 3.9, "learning_rate": 4.804905028482772e-05, "loss": 2.4213, "step": 1348500 }, { "epoch": 3.9, "learning_rate": 4.804832663718044e-05, "loss": 2.4476, "step": 1349000 }, { "epoch": 3.91, "learning_rate": 4.8047602989533165e-05, "loss": 2.4244, "step": 1349500 }, { "epoch": 3.91, "learning_rate": 4.804687934188589e-05, "loss": 2.4237, "step": 1350000 }, { "epoch": 3.91, "learning_rate": 4.804615569423861e-05, "loss": 2.4254, "step": 1350500 }, { "epoch": 3.91, "learning_rate": 4.8045433493886625e-05, "loss": 2.4384, "step": 1351000 }, { "epoch": 3.91, "learning_rate": 4.804470984623935e-05, "loss": 2.4046, "step": 1351500 }, { "epoch": 3.91, "learning_rate": 4.804398619859207e-05, "loss": 2.4058, "step": 1352000 }, { "epoch": 3.91, "learning_rate": 4.80432625509448e-05, "loss": 2.4128, "step": 1352500 }, { "epoch": 3.92, "learning_rate": 4.804253890329752e-05, "loss": 2.4224, "step": 1353000 }, { "epoch": 3.92, "learning_rate": 4.8041816702945536e-05, "loss": 2.4236, "step": 1353500 }, { "epoch": 3.92, "learning_rate": 4.8041095949888845e-05, "loss": 2.4291, "step": 1354000 }, { "epoch": 3.92, "learning_rate": 4.804037230224157e-05, "loss": 2.4215, "step": 1354500 }, { "epoch": 3.92, "learning_rate": 4.8039648654594297e-05, "loss": 2.4117, "step": 1355000 }, { "epoch": 3.92, "learning_rate": 4.803892500694702e-05, "loss": 2.4358, "step": 1355500 }, { "epoch": 3.93, "learning_rate": 4.803820135929975e-05, "loss": 2.4003, "step": 1356000 }, { "epoch": 3.93, "learning_rate": 4.803747771165247e-05, "loss": 2.4192, "step": 1356500 }, { "epoch": 3.93, "learning_rate": 4.803675406400519e-05, "loss": 2.4209, "step": 1357000 }, { "epoch": 3.93, "learning_rate": 4.8036030416357915e-05, "loss": 2.425, "step": 1357500 }, { "epoch": 3.93, "learning_rate": 4.803530676871064e-05, "loss": 2.4545, "step": 1358000 }, { "epoch": 3.93, "learning_rate": 4.803458312106336e-05, "loss": 2.4534, "step": 1358500 }, { "epoch": 3.93, "learning_rate": 4.803385947341608e-05, "loss": 2.4285, "step": 1359000 }, { "epoch": 3.94, "learning_rate": 4.8033135825768803e-05, "loss": 2.4247, "step": 1359500 }, { "epoch": 3.94, "learning_rate": 4.8032412178121526e-05, "loss": 2.441, "step": 1360000 }, { "epoch": 3.94, "learning_rate": 4.803168853047425e-05, "loss": 2.4374, "step": 1360500 }, { "epoch": 3.94, "learning_rate": 4.803096488282697e-05, "loss": 2.43, "step": 1361000 }, { "epoch": 3.94, "learning_rate": 4.80302412351797e-05, "loss": 2.4067, "step": 1361500 }, { "epoch": 3.94, "learning_rate": 4.802951758753242e-05, "loss": 2.4285, "step": 1362000 }, { "epoch": 3.94, "learning_rate": 4.8028795387180444e-05, "loss": 2.3931, "step": 1362500 }, { "epoch": 3.95, "learning_rate": 4.8028071739533166e-05, "loss": 2.4061, "step": 1363000 }, { "epoch": 3.95, "learning_rate": 4.802734953918118e-05, "loss": 2.426, "step": 1363500 }, { "epoch": 3.95, "learning_rate": 4.8026625891533904e-05, "loss": 2.4208, "step": 1364000 }, { "epoch": 3.95, "learning_rate": 4.8025902243886626e-05, "loss": 2.4332, "step": 1364500 }, { "epoch": 3.95, "learning_rate": 4.802517859623935e-05, "loss": 2.4315, "step": 1365000 }, { "epoch": 3.95, "learning_rate": 4.802445639588737e-05, "loss": 2.411, "step": 1365500 }, { "epoch": 3.95, "learning_rate": 4.802373274824009e-05, "loss": 2.4098, "step": 1366000 }, { "epoch": 3.96, "learning_rate": 4.8023009100592815e-05, "loss": 2.4216, "step": 1366500 }, { "epoch": 3.96, "learning_rate": 4.802228545294554e-05, "loss": 2.4097, "step": 1367000 }, { "epoch": 3.96, "learning_rate": 4.802156180529826e-05, "loss": 2.396, "step": 1367500 }, { "epoch": 3.96, "learning_rate": 4.802083815765098e-05, "loss": 2.4259, "step": 1368000 }, { "epoch": 3.96, "learning_rate": 4.8020114510003704e-05, "loss": 2.3974, "step": 1368500 }, { "epoch": 3.96, "learning_rate": 4.8019390862356426e-05, "loss": 2.4331, "step": 1369000 }, { "epoch": 3.96, "learning_rate": 4.8018667214709155e-05, "loss": 2.4195, "step": 1369500 }, { "epoch": 3.97, "learning_rate": 4.801794356706188e-05, "loss": 2.4285, "step": 1370000 }, { "epoch": 3.97, "learning_rate": 4.80172199194146e-05, "loss": 2.4201, "step": 1370500 }, { "epoch": 3.97, "learning_rate": 4.801649771906262e-05, "loss": 2.4078, "step": 1371000 }, { "epoch": 3.97, "learning_rate": 4.8015774071415344e-05, "loss": 2.4173, "step": 1371500 }, { "epoch": 3.97, "learning_rate": 4.801505187106336e-05, "loss": 2.4188, "step": 1372000 }, { "epoch": 3.97, "learning_rate": 4.801432822341608e-05, "loss": 2.3722, "step": 1372500 }, { "epoch": 3.97, "learning_rate": 4.8013604575768804e-05, "loss": 2.4505, "step": 1373000 }, { "epoch": 3.98, "learning_rate": 4.8012880928121526e-05, "loss": 2.4144, "step": 1373500 }, { "epoch": 3.98, "learning_rate": 4.801215728047425e-05, "loss": 2.4428, "step": 1374000 }, { "epoch": 3.98, "learning_rate": 4.801143363282697e-05, "loss": 2.415, "step": 1374500 }, { "epoch": 3.98, "learning_rate": 4.80107099851797e-05, "loss": 2.4256, "step": 1375000 }, { "epoch": 3.98, "learning_rate": 4.800998633753242e-05, "loss": 2.424, "step": 1375500 }, { "epoch": 3.98, "learning_rate": 4.8009262689885144e-05, "loss": 2.4306, "step": 1376000 }, { "epoch": 3.98, "learning_rate": 4.800854048953316e-05, "loss": 2.4234, "step": 1376500 }, { "epoch": 3.99, "learning_rate": 4.8007818289181176e-05, "loss": 2.4003, "step": 1377000 }, { "epoch": 3.99, "learning_rate": 4.8007094641533905e-05, "loss": 2.4053, "step": 1377500 }, { "epoch": 3.99, "learning_rate": 4.800637099388663e-05, "loss": 2.4395, "step": 1378000 }, { "epoch": 3.99, "learning_rate": 4.800564734623935e-05, "loss": 2.4306, "step": 1378500 }, { "epoch": 3.99, "learning_rate": 4.800492369859208e-05, "loss": 2.4261, "step": 1379000 }, { "epoch": 3.99, "learning_rate": 4.80042000509448e-05, "loss": 2.3887, "step": 1379500 }, { "epoch": 3.99, "learning_rate": 4.800347640329752e-05, "loss": 2.4228, "step": 1380000 }, { "epoch": 4.0, "learning_rate": 4.8002752755650245e-05, "loss": 2.4308, "step": 1380500 }, { "epoch": 4.0, "learning_rate": 4.800202910800297e-05, "loss": 2.4063, "step": 1381000 }, { "epoch": 4.0, "learning_rate": 4.800130546035569e-05, "loss": 2.4291, "step": 1381500 }, { "epoch": 4.0, "eval_accuracy": 0.6380546923174228, "eval_accuracy_mlm": 0.5996378678679578, "eval_accuracy_nsp": 0.8439111245548666, "eval_loss": 2.3823225498199463, "eval_runtime": 330.6953, "eval_samples_per_second": 1319.602, "eval_steps_per_second": 54.984, "step": 1381888 }, { "epoch": 4.0, "learning_rate": 4.800058181270841e-05, "loss": 2.4164, "step": 1382000 }, { "epoch": 4.0, "learning_rate": 4.7999858165061134e-05, "loss": 2.3971, "step": 1382500 }, { "epoch": 4.0, "learning_rate": 4.799913741200445e-05, "loss": 2.4212, "step": 1383000 }, { "epoch": 4.0, "learning_rate": 4.799841376435717e-05, "loss": 2.3961, "step": 1383500 }, { "epoch": 4.01, "learning_rate": 4.799769156400519e-05, "loss": 2.4252, "step": 1384000 }, { "epoch": 4.01, "learning_rate": 4.799696791635791e-05, "loss": 2.4034, "step": 1384500 }, { "epoch": 4.01, "learning_rate": 4.799624426871064e-05, "loss": 2.4007, "step": 1385000 }, { "epoch": 4.01, "learning_rate": 4.799552062106336e-05, "loss": 2.4086, "step": 1385500 }, { "epoch": 4.01, "learning_rate": 4.799479697341608e-05, "loss": 2.3956, "step": 1386000 }, { "epoch": 4.01, "learning_rate": 4.7994073325768805e-05, "loss": 2.4184, "step": 1386500 }, { "epoch": 4.01, "learning_rate": 4.799334967812153e-05, "loss": 2.3853, "step": 1387000 }, { "epoch": 4.02, "learning_rate": 4.799262603047425e-05, "loss": 2.4257, "step": 1387500 }, { "epoch": 4.02, "learning_rate": 4.799190383012227e-05, "loss": 2.4009, "step": 1388000 }, { "epoch": 4.02, "learning_rate": 4.7991180182474994e-05, "loss": 2.4059, "step": 1388500 }, { "epoch": 4.02, "learning_rate": 4.7990456534827716e-05, "loss": 2.402, "step": 1389000 }, { "epoch": 4.02, "learning_rate": 4.798973433447573e-05, "loss": 2.3876, "step": 1389500 }, { "epoch": 4.02, "learning_rate": 4.7989010686828454e-05, "loss": 2.4002, "step": 1390000 }, { "epoch": 4.02, "learning_rate": 4.7988287039181176e-05, "loss": 2.3913, "step": 1390500 }, { "epoch": 4.03, "learning_rate": 4.79875633915339e-05, "loss": 2.3864, "step": 1391000 }, { "epoch": 4.03, "learning_rate": 4.798683974388663e-05, "loss": 2.4068, "step": 1391500 }, { "epoch": 4.03, "learning_rate": 4.798611609623935e-05, "loss": 2.3933, "step": 1392000 }, { "epoch": 4.03, "learning_rate": 4.798539389588737e-05, "loss": 2.4108, "step": 1392500 }, { "epoch": 4.03, "learning_rate": 4.7984670248240094e-05, "loss": 2.3859, "step": 1393000 }, { "epoch": 4.03, "learning_rate": 4.798394660059282e-05, "loss": 2.373, "step": 1393500 }, { "epoch": 4.04, "learning_rate": 4.798322295294554e-05, "loss": 2.3733, "step": 1394000 }, { "epoch": 4.04, "learning_rate": 4.798249930529826e-05, "loss": 2.3831, "step": 1394500 }, { "epoch": 4.04, "learning_rate": 4.798177565765098e-05, "loss": 2.3858, "step": 1395000 }, { "epoch": 4.04, "learning_rate": 4.7981052010003706e-05, "loss": 2.3947, "step": 1395500 }, { "epoch": 4.04, "learning_rate": 4.798032836235643e-05, "loss": 2.4082, "step": 1396000 }, { "epoch": 4.04, "learning_rate": 4.797960471470915e-05, "loss": 2.3939, "step": 1396500 }, { "epoch": 4.04, "learning_rate": 4.797888251435717e-05, "loss": 2.4033, "step": 1397000 }, { "epoch": 4.05, "learning_rate": 4.7978158866709895e-05, "loss": 2.4142, "step": 1397500 }, { "epoch": 4.05, "learning_rate": 4.797743521906262e-05, "loss": 2.3937, "step": 1398000 }, { "epoch": 4.05, "learning_rate": 4.797671301871063e-05, "loss": 2.4138, "step": 1398500 }, { "epoch": 4.05, "learning_rate": 4.7975989371063355e-05, "loss": 2.4142, "step": 1399000 }, { "epoch": 4.05, "learning_rate": 4.797526572341608e-05, "loss": 2.4306, "step": 1399500 }, { "epoch": 4.05, "learning_rate": 4.7974542075768806e-05, "loss": 2.4093, "step": 1400000 }, { "epoch": 4.05, "learning_rate": 4.797381842812153e-05, "loss": 2.3929, "step": 1400500 }, { "epoch": 4.06, "learning_rate": 4.797309622776955e-05, "loss": 2.4118, "step": 1401000 }, { "epoch": 4.06, "learning_rate": 4.797237258012227e-05, "loss": 2.3764, "step": 1401500 }, { "epoch": 4.06, "learning_rate": 4.7971648932474995e-05, "loss": 2.4231, "step": 1402000 }, { "epoch": 4.06, "learning_rate": 4.797092528482772e-05, "loss": 2.3849, "step": 1402500 }, { "epoch": 4.06, "learning_rate": 4.797020163718044e-05, "loss": 2.3885, "step": 1403000 }, { "epoch": 4.06, "learning_rate": 4.7969479436828455e-05, "loss": 2.3948, "step": 1403500 }, { "epoch": 4.06, "learning_rate": 4.796875578918118e-05, "loss": 2.4065, "step": 1404000 }, { "epoch": 4.07, "learning_rate": 4.7968032141533906e-05, "loss": 2.3936, "step": 1404500 }, { "epoch": 4.07, "learning_rate": 4.796730849388663e-05, "loss": 2.3983, "step": 1405000 }, { "epoch": 4.07, "learning_rate": 4.796658484623935e-05, "loss": 2.3947, "step": 1405500 }, { "epoch": 4.07, "learning_rate": 4.796586119859207e-05, "loss": 2.4273, "step": 1406000 }, { "epoch": 4.07, "learning_rate": 4.7965137550944795e-05, "loss": 2.3968, "step": 1406500 }, { "epoch": 4.07, "learning_rate": 4.796441535059281e-05, "loss": 2.4137, "step": 1407000 }, { "epoch": 4.07, "learning_rate": 4.796369170294554e-05, "loss": 2.4183, "step": 1407500 }, { "epoch": 4.08, "learning_rate": 4.796296805529826e-05, "loss": 2.3906, "step": 1408000 }, { "epoch": 4.08, "learning_rate": 4.7962244407650984e-05, "loss": 2.3973, "step": 1408500 }, { "epoch": 4.08, "learning_rate": 4.7961520760003706e-05, "loss": 2.4204, "step": 1409000 }, { "epoch": 4.08, "learning_rate": 4.796079711235643e-05, "loss": 2.392, "step": 1409500 }, { "epoch": 4.08, "learning_rate": 4.796007346470916e-05, "loss": 2.402, "step": 1410000 }, { "epoch": 4.08, "learning_rate": 4.795934981706188e-05, "loss": 2.4066, "step": 1410500 }, { "epoch": 4.08, "learning_rate": 4.79586261694146e-05, "loss": 2.3839, "step": 1411000 }, { "epoch": 4.09, "learning_rate": 4.7957902521767324e-05, "loss": 2.3936, "step": 1411500 }, { "epoch": 4.09, "learning_rate": 4.7957178874120047e-05, "loss": 2.3889, "step": 1412000 }, { "epoch": 4.09, "learning_rate": 4.795645667376806e-05, "loss": 2.4001, "step": 1412500 }, { "epoch": 4.09, "learning_rate": 4.7955733026120784e-05, "loss": 2.3953, "step": 1413000 }, { "epoch": 4.09, "learning_rate": 4.7955009378473507e-05, "loss": 2.3988, "step": 1413500 }, { "epoch": 4.09, "learning_rate": 4.795428717812153e-05, "loss": 2.3983, "step": 1414000 }, { "epoch": 4.09, "learning_rate": 4.795356353047426e-05, "loss": 2.4095, "step": 1414500 }, { "epoch": 4.1, "learning_rate": 4.795283988282698e-05, "loss": 2.3946, "step": 1415000 }, { "epoch": 4.1, "learning_rate": 4.79521162351797e-05, "loss": 2.4018, "step": 1415500 }, { "epoch": 4.1, "learning_rate": 4.7951392587532425e-05, "loss": 2.4063, "step": 1416000 }, { "epoch": 4.1, "learning_rate": 4.795067038718044e-05, "loss": 2.42, "step": 1416500 }, { "epoch": 4.1, "learning_rate": 4.794994673953316e-05, "loss": 2.4007, "step": 1417000 }, { "epoch": 4.1, "learning_rate": 4.7949223091885885e-05, "loss": 2.4054, "step": 1417500 }, { "epoch": 4.1, "learning_rate": 4.794849944423861e-05, "loss": 2.412, "step": 1418000 }, { "epoch": 4.11, "learning_rate": 4.794777579659133e-05, "loss": 2.4087, "step": 1418500 }, { "epoch": 4.11, "learning_rate": 4.794705214894406e-05, "loss": 2.4149, "step": 1419000 }, { "epoch": 4.11, "learning_rate": 4.7946329948592074e-05, "loss": 2.399, "step": 1419500 }, { "epoch": 4.11, "learning_rate": 4.7945606300944796e-05, "loss": 2.4041, "step": 1420000 }, { "epoch": 4.11, "learning_rate": 4.794488265329752e-05, "loss": 2.3879, "step": 1420500 }, { "epoch": 4.11, "learning_rate": 4.794415900565024e-05, "loss": 2.4068, "step": 1421000 }, { "epoch": 4.11, "learning_rate": 4.794343535800296e-05, "loss": 2.4068, "step": 1421500 }, { "epoch": 4.12, "learning_rate": 4.794271171035569e-05, "loss": 2.3957, "step": 1422000 }, { "epoch": 4.12, "learning_rate": 4.7941988062708414e-05, "loss": 2.3911, "step": 1422500 }, { "epoch": 4.12, "learning_rate": 4.7941264415061136e-05, "loss": 2.4026, "step": 1423000 }, { "epoch": 4.12, "learning_rate": 4.794054076741386e-05, "loss": 2.4124, "step": 1423500 }, { "epoch": 4.12, "learning_rate": 4.793981711976658e-05, "loss": 2.4108, "step": 1424000 }, { "epoch": 4.12, "learning_rate": 4.79390949194146e-05, "loss": 2.3982, "step": 1424500 }, { "epoch": 4.12, "learning_rate": 4.7938371271767325e-05, "loss": 2.399, "step": 1425000 }, { "epoch": 4.13, "learning_rate": 4.793764762412005e-05, "loss": 2.4116, "step": 1425500 }, { "epoch": 4.13, "learning_rate": 4.793692397647277e-05, "loss": 2.3898, "step": 1426000 }, { "epoch": 4.13, "learning_rate": 4.793620032882549e-05, "loss": 2.4199, "step": 1426500 }, { "epoch": 4.13, "learning_rate": 4.793547812847351e-05, "loss": 2.4235, "step": 1427000 }, { "epoch": 4.13, "learning_rate": 4.793475448082623e-05, "loss": 2.4074, "step": 1427500 }, { "epoch": 4.13, "learning_rate": 4.793403083317896e-05, "loss": 2.4129, "step": 1428000 }, { "epoch": 4.13, "learning_rate": 4.793330718553168e-05, "loss": 2.3994, "step": 1428500 }, { "epoch": 4.14, "learning_rate": 4.79325835378844e-05, "loss": 2.3804, "step": 1429000 }, { "epoch": 4.14, "learning_rate": 4.7931861337532425e-05, "loss": 2.4101, "step": 1429500 }, { "epoch": 4.14, "learning_rate": 4.793113768988515e-05, "loss": 2.4108, "step": 1430000 }, { "epoch": 4.14, "learning_rate": 4.793041404223787e-05, "loss": 2.3996, "step": 1430500 }, { "epoch": 4.14, "learning_rate": 4.792969039459059e-05, "loss": 2.3899, "step": 1431000 }, { "epoch": 4.14, "learning_rate": 4.7928966746943314e-05, "loss": 2.3757, "step": 1431500 }, { "epoch": 4.15, "learning_rate": 4.792824454659133e-05, "loss": 2.3744, "step": 1432000 }, { "epoch": 4.15, "learning_rate": 4.792752089894406e-05, "loss": 2.428, "step": 1432500 }, { "epoch": 4.15, "learning_rate": 4.792679725129678e-05, "loss": 2.4109, "step": 1433000 }, { "epoch": 4.15, "learning_rate": 4.79260736036495e-05, "loss": 2.4095, "step": 1433500 }, { "epoch": 4.15, "learning_rate": 4.792535140329752e-05, "loss": 2.3963, "step": 1434000 }, { "epoch": 4.15, "learning_rate": 4.792462775565024e-05, "loss": 2.3924, "step": 1434500 }, { "epoch": 4.15, "learning_rate": 4.792390410800296e-05, "loss": 2.4288, "step": 1435000 }, { "epoch": 4.16, "learning_rate": 4.7923180460355686e-05, "loss": 2.4117, "step": 1435500 }, { "epoch": 4.16, "learning_rate": 4.792245681270841e-05, "loss": 2.4063, "step": 1436000 }, { "epoch": 4.16, "learning_rate": 4.792173461235643e-05, "loss": 2.3963, "step": 1436500 }, { "epoch": 4.16, "learning_rate": 4.792101096470916e-05, "loss": 2.4123, "step": 1437000 }, { "epoch": 4.16, "learning_rate": 4.792028731706188e-05, "loss": 2.3983, "step": 1437500 }, { "epoch": 4.16, "learning_rate": 4.7919563669414604e-05, "loss": 2.4186, "step": 1438000 }, { "epoch": 4.16, "learning_rate": 4.7918840021767326e-05, "loss": 2.3993, "step": 1438500 }, { "epoch": 4.17, "learning_rate": 4.791811637412005e-05, "loss": 2.4046, "step": 1439000 }, { "epoch": 4.17, "learning_rate": 4.791739272647277e-05, "loss": 2.4206, "step": 1439500 }, { "epoch": 4.17, "learning_rate": 4.791666907882549e-05, "loss": 2.4095, "step": 1440000 }, { "epoch": 4.17, "learning_rate": 4.7915945431178215e-05, "loss": 2.4123, "step": 1440500 }, { "epoch": 4.17, "learning_rate": 4.791522467812153e-05, "loss": 2.3863, "step": 1441000 }, { "epoch": 4.17, "learning_rate": 4.791450103047425e-05, "loss": 2.4229, "step": 1441500 }, { "epoch": 4.17, "learning_rate": 4.7913777382826975e-05, "loss": 2.3915, "step": 1442000 }, { "epoch": 4.18, "learning_rate": 4.79130537351797e-05, "loss": 2.3945, "step": 1442500 }, { "epoch": 4.18, "learning_rate": 4.791233008753242e-05, "loss": 2.3893, "step": 1443000 }, { "epoch": 4.18, "learning_rate": 4.791160643988514e-05, "loss": 2.4127, "step": 1443500 }, { "epoch": 4.18, "learning_rate": 4.791088423953316e-05, "loss": 2.4116, "step": 1444000 }, { "epoch": 4.18, "learning_rate": 4.7910160591885886e-05, "loss": 2.3936, "step": 1444500 }, { "epoch": 4.18, "learning_rate": 4.790943694423861e-05, "loss": 2.4095, "step": 1445000 }, { "epoch": 4.18, "learning_rate": 4.790871329659134e-05, "loss": 2.4139, "step": 1445500 }, { "epoch": 4.19, "learning_rate": 4.790798964894406e-05, "loss": 2.4085, "step": 1446000 }, { "epoch": 4.19, "learning_rate": 4.790726600129678e-05, "loss": 2.373, "step": 1446500 }, { "epoch": 4.19, "learning_rate": 4.7906542353649504e-05, "loss": 2.4134, "step": 1447000 }, { "epoch": 4.19, "learning_rate": 4.7905818706002226e-05, "loss": 2.4054, "step": 1447500 }, { "epoch": 4.19, "learning_rate": 4.790509505835495e-05, "loss": 2.4086, "step": 1448000 }, { "epoch": 4.19, "learning_rate": 4.7904372858002964e-05, "loss": 2.4124, "step": 1448500 }, { "epoch": 4.19, "learning_rate": 4.7903649210355686e-05, "loss": 2.4193, "step": 1449000 }, { "epoch": 4.2, "learning_rate": 4.790292701000371e-05, "loss": 2.4019, "step": 1449500 }, { "epoch": 4.2, "learning_rate": 4.7902204809651724e-05, "loss": 2.4089, "step": 1450000 }, { "epoch": 4.2, "learning_rate": 4.7901481162004447e-05, "loss": 2.4057, "step": 1450500 }, { "epoch": 4.2, "learning_rate": 4.790075751435717e-05, "loss": 2.4023, "step": 1451000 }, { "epoch": 4.2, "learning_rate": 4.790003386670989e-05, "loss": 2.4455, "step": 1451500 }, { "epoch": 4.2, "learning_rate": 4.789931021906262e-05, "loss": 2.3947, "step": 1452000 }, { "epoch": 4.2, "learning_rate": 4.789858657141534e-05, "loss": 2.4088, "step": 1452500 }, { "epoch": 4.21, "learning_rate": 4.7897862923768064e-05, "loss": 2.4084, "step": 1453000 }, { "epoch": 4.21, "learning_rate": 4.789713927612079e-05, "loss": 2.4271, "step": 1453500 }, { "epoch": 4.21, "learning_rate": 4.789641707576881e-05, "loss": 2.4157, "step": 1454000 }, { "epoch": 4.21, "learning_rate": 4.789569342812153e-05, "loss": 2.413, "step": 1454500 }, { "epoch": 4.21, "learning_rate": 4.7894969780474253e-05, "loss": 2.408, "step": 1455000 }, { "epoch": 4.21, "learning_rate": 4.7894246132826976e-05, "loss": 2.4071, "step": 1455500 }, { "epoch": 4.21, "learning_rate": 4.78935224851797e-05, "loss": 2.3904, "step": 1456000 }, { "epoch": 4.22, "learning_rate": 4.7892800284827714e-05, "loss": 2.3726, "step": 1456500 }, { "epoch": 4.22, "learning_rate": 4.7892076637180436e-05, "loss": 2.4006, "step": 1457000 }, { "epoch": 4.22, "learning_rate": 4.7891352989533165e-05, "loss": 2.4204, "step": 1457500 }, { "epoch": 4.22, "learning_rate": 4.789062934188589e-05, "loss": 2.4124, "step": 1458000 }, { "epoch": 4.22, "learning_rate": 4.788990569423861e-05, "loss": 2.3924, "step": 1458500 }, { "epoch": 4.22, "learning_rate": 4.7889183493886625e-05, "loss": 2.3998, "step": 1459000 }, { "epoch": 4.22, "learning_rate": 4.788845984623935e-05, "loss": 2.4241, "step": 1459500 }, { "epoch": 4.23, "learning_rate": 4.7887736198592076e-05, "loss": 2.4058, "step": 1460000 }, { "epoch": 4.23, "learning_rate": 4.78870125509448e-05, "loss": 2.409, "step": 1460500 }, { "epoch": 4.23, "learning_rate": 4.788628890329752e-05, "loss": 2.3869, "step": 1461000 }, { "epoch": 4.23, "learning_rate": 4.788556525565024e-05, "loss": 2.3828, "step": 1461500 }, { "epoch": 4.23, "learning_rate": 4.7884841608002965e-05, "loss": 2.402, "step": 1462000 }, { "epoch": 4.23, "learning_rate": 4.788411796035569e-05, "loss": 2.4244, "step": 1462500 }, { "epoch": 4.23, "learning_rate": 4.788339431270841e-05, "loss": 2.3978, "step": 1463000 }, { "epoch": 4.24, "learning_rate": 4.788267066506114e-05, "loss": 2.3862, "step": 1463500 }, { "epoch": 4.24, "learning_rate": 4.7881948464709154e-05, "loss": 2.4021, "step": 1464000 }, { "epoch": 4.24, "learning_rate": 4.7881224817061876e-05, "loss": 2.4202, "step": 1464500 }, { "epoch": 4.24, "learning_rate": 4.78805011694146e-05, "loss": 2.4003, "step": 1465000 }, { "epoch": 4.24, "learning_rate": 4.787977752176732e-05, "loss": 2.3728, "step": 1465500 }, { "epoch": 4.24, "learning_rate": 4.787905387412004e-05, "loss": 2.3955, "step": 1466000 }, { "epoch": 4.24, "learning_rate": 4.7878331673768065e-05, "loss": 2.3951, "step": 1466500 }, { "epoch": 4.25, "learning_rate": 4.787760802612079e-05, "loss": 2.4074, "step": 1467000 }, { "epoch": 4.25, "learning_rate": 4.7876884378473517e-05, "loss": 2.4202, "step": 1467500 }, { "epoch": 4.25, "learning_rate": 4.787616217812153e-05, "loss": 2.4177, "step": 1468000 }, { "epoch": 4.25, "learning_rate": 4.7875438530474254e-05, "loss": 2.4062, "step": 1468500 }, { "epoch": 4.25, "learning_rate": 4.7874714882826977e-05, "loss": 2.4015, "step": 1469000 }, { "epoch": 4.25, "learning_rate": 4.78739912351797e-05, "loss": 2.3816, "step": 1469500 }, { "epoch": 4.26, "learning_rate": 4.787326758753242e-05, "loss": 2.3834, "step": 1470000 }, { "epoch": 4.26, "learning_rate": 4.787254393988514e-05, "loss": 2.4231, "step": 1470500 }, { "epoch": 4.26, "learning_rate": 4.7871821739533166e-05, "loss": 2.3844, "step": 1471000 }, { "epoch": 4.26, "learning_rate": 4.787109809188589e-05, "loss": 2.4058, "step": 1471500 }, { "epoch": 4.26, "learning_rate": 4.787037444423861e-05, "loss": 2.4149, "step": 1472000 }, { "epoch": 4.26, "learning_rate": 4.786965079659133e-05, "loss": 2.4195, "step": 1472500 }, { "epoch": 4.26, "learning_rate": 4.7868927148944054e-05, "loss": 2.4304, "step": 1473000 }, { "epoch": 4.27, "learning_rate": 4.786820350129678e-05, "loss": 2.3994, "step": 1473500 }, { "epoch": 4.27, "learning_rate": 4.7867482748240086e-05, "loss": 2.3795, "step": 1474000 }, { "epoch": 4.27, "learning_rate": 4.7866759100592815e-05, "loss": 2.3872, "step": 1474500 }, { "epoch": 4.27, "learning_rate": 4.786603545294554e-05, "loss": 2.4111, "step": 1475000 }, { "epoch": 4.27, "learning_rate": 4.786531325259356e-05, "loss": 2.431, "step": 1475500 }, { "epoch": 4.27, "learning_rate": 4.786458960494628e-05, "loss": 2.4072, "step": 1476000 }, { "epoch": 4.27, "learning_rate": 4.7863865957299004e-05, "loss": 2.3743, "step": 1476500 }, { "epoch": 4.28, "learning_rate": 4.7863142309651726e-05, "loss": 2.3986, "step": 1477000 }, { "epoch": 4.28, "learning_rate": 4.786241866200445e-05, "loss": 2.408, "step": 1477500 }, { "epoch": 4.28, "learning_rate": 4.786169501435717e-05, "loss": 2.408, "step": 1478000 }, { "epoch": 4.28, "learning_rate": 4.786097136670989e-05, "loss": 2.3984, "step": 1478500 }, { "epoch": 4.28, "learning_rate": 4.7860249166357915e-05, "loss": 2.398, "step": 1479000 }, { "epoch": 4.28, "learning_rate": 4.785952551871064e-05, "loss": 2.4052, "step": 1479500 }, { "epoch": 4.28, "learning_rate": 4.785880187106336e-05, "loss": 2.4227, "step": 1480000 }, { "epoch": 4.29, "learning_rate": 4.785807822341608e-05, "loss": 2.4046, "step": 1480500 }, { "epoch": 4.29, "learning_rate": 4.7857354575768804e-05, "loss": 2.3982, "step": 1481000 }, { "epoch": 4.29, "learning_rate": 4.7856630928121526e-05, "loss": 2.3867, "step": 1481500 }, { "epoch": 4.29, "learning_rate": 4.7855907280474255e-05, "loss": 2.4008, "step": 1482000 }, { "epoch": 4.29, "learning_rate": 4.785518363282698e-05, "loss": 2.4027, "step": 1482500 }, { "epoch": 4.29, "learning_rate": 4.78544599851797e-05, "loss": 2.4033, "step": 1483000 }, { "epoch": 4.29, "learning_rate": 4.785373633753242e-05, "loss": 2.3946, "step": 1483500 }, { "epoch": 4.3, "learning_rate": 4.7853012689885144e-05, "loss": 2.4017, "step": 1484000 }, { "epoch": 4.3, "learning_rate": 4.7852289042237866e-05, "loss": 2.4037, "step": 1484500 }, { "epoch": 4.3, "learning_rate": 4.785156684188589e-05, "loss": 2.4074, "step": 1485000 }, { "epoch": 4.3, "learning_rate": 4.7850844641533904e-05, "loss": 2.3921, "step": 1485500 }, { "epoch": 4.3, "learning_rate": 4.7850120993886626e-05, "loss": 2.3932, "step": 1486000 }, { "epoch": 4.3, "learning_rate": 4.784939734623935e-05, "loss": 2.4037, "step": 1486500 }, { "epoch": 4.3, "learning_rate": 4.784867369859207e-05, "loss": 2.4039, "step": 1487000 }, { "epoch": 4.31, "learning_rate": 4.784795005094479e-05, "loss": 2.4231, "step": 1487500 }, { "epoch": 4.31, "learning_rate": 4.7847226403297515e-05, "loss": 2.4135, "step": 1488000 }, { "epoch": 4.31, "learning_rate": 4.7846502755650244e-05, "loss": 2.4141, "step": 1488500 }, { "epoch": 4.31, "learning_rate": 4.7845779108002967e-05, "loss": 2.399, "step": 1489000 }, { "epoch": 4.31, "learning_rate": 4.7845055460355696e-05, "loss": 2.3927, "step": 1489500 }, { "epoch": 4.31, "learning_rate": 4.784433181270842e-05, "loss": 2.4216, "step": 1490000 }, { "epoch": 4.31, "learning_rate": 4.784360816506114e-05, "loss": 2.4004, "step": 1490500 }, { "epoch": 4.32, "learning_rate": 4.784288451741386e-05, "loss": 2.4099, "step": 1491000 }, { "epoch": 4.32, "learning_rate": 4.7842160869766584e-05, "loss": 2.4077, "step": 1491500 }, { "epoch": 4.32, "learning_rate": 4.784143722211931e-05, "loss": 2.4071, "step": 1492000 }, { "epoch": 4.32, "learning_rate": 4.784071357447203e-05, "loss": 2.4223, "step": 1492500 }, { "epoch": 4.32, "learning_rate": 4.783998992682475e-05, "loss": 2.3678, "step": 1493000 }, { "epoch": 4.32, "learning_rate": 4.783926772647277e-05, "loss": 2.4064, "step": 1493500 }, { "epoch": 4.32, "learning_rate": 4.7838544078825496e-05, "loss": 2.3764, "step": 1494000 }, { "epoch": 4.33, "learning_rate": 4.783782043117822e-05, "loss": 2.3944, "step": 1494500 }, { "epoch": 4.33, "learning_rate": 4.783709678353094e-05, "loss": 2.4337, "step": 1495000 }, { "epoch": 4.33, "learning_rate": 4.783637313588366e-05, "loss": 2.3923, "step": 1495500 }, { "epoch": 4.33, "learning_rate": 4.7835649488236385e-05, "loss": 2.4043, "step": 1496000 }, { "epoch": 4.33, "learning_rate": 4.78349272878844e-05, "loss": 2.3757, "step": 1496500 }, { "epoch": 4.33, "learning_rate": 4.783420364023713e-05, "loss": 2.3823, "step": 1497000 }, { "epoch": 4.33, "learning_rate": 4.783347999258985e-05, "loss": 2.4062, "step": 1497500 }, { "epoch": 4.34, "learning_rate": 4.7832756344942574e-05, "loss": 2.4109, "step": 1498000 }, { "epoch": 4.34, "learning_rate": 4.7832032697295296e-05, "loss": 2.4158, "step": 1498500 }, { "epoch": 4.34, "learning_rate": 4.783131049694332e-05, "loss": 2.4295, "step": 1499000 }, { "epoch": 4.34, "learning_rate": 4.783058684929604e-05, "loss": 2.3893, "step": 1499500 }, { "epoch": 4.34, "learning_rate": 4.782986320164876e-05, "loss": 2.3914, "step": 1500000 }, { "epoch": 4.34, "learning_rate": 4.7829139554001485e-05, "loss": 2.4094, "step": 1500500 }, { "epoch": 4.34, "learning_rate": 4.78284173536495e-05, "loss": 2.4106, "step": 1501000 }, { "epoch": 4.35, "learning_rate": 4.782769370600222e-05, "loss": 2.4123, "step": 1501500 }, { "epoch": 4.35, "learning_rate": 4.7826970058354945e-05, "loss": 2.3907, "step": 1502000 }, { "epoch": 4.35, "learning_rate": 4.782624641070767e-05, "loss": 2.3986, "step": 1502500 }, { "epoch": 4.35, "learning_rate": 4.782552421035569e-05, "loss": 2.4186, "step": 1503000 }, { "epoch": 4.35, "learning_rate": 4.782480056270841e-05, "loss": 2.3867, "step": 1503500 }, { "epoch": 4.35, "learning_rate": 4.7824076915061134e-05, "loss": 2.4112, "step": 1504000 }, { "epoch": 4.35, "learning_rate": 4.782335326741386e-05, "loss": 2.409, "step": 1504500 }, { "epoch": 4.36, "learning_rate": 4.7822629619766585e-05, "loss": 2.4011, "step": 1505000 }, { "epoch": 4.36, "learning_rate": 4.782190597211931e-05, "loss": 2.3981, "step": 1505500 }, { "epoch": 4.36, "learning_rate": 4.782118377176732e-05, "loss": 2.4429, "step": 1506000 }, { "epoch": 4.36, "learning_rate": 4.7820461571415345e-05, "loss": 2.3963, "step": 1506500 }, { "epoch": 4.36, "learning_rate": 4.781973792376807e-05, "loss": 2.4017, "step": 1507000 }, { "epoch": 4.36, "learning_rate": 4.781901427612079e-05, "loss": 2.3928, "step": 1507500 }, { "epoch": 4.37, "learning_rate": 4.781829062847351e-05, "loss": 2.4359, "step": 1508000 }, { "epoch": 4.37, "learning_rate": 4.7817566980826234e-05, "loss": 2.428, "step": 1508500 }, { "epoch": 4.37, "learning_rate": 4.7816843333178957e-05, "loss": 2.4102, "step": 1509000 }, { "epoch": 4.37, "learning_rate": 4.781612113282697e-05, "loss": 2.4251, "step": 1509500 }, { "epoch": 4.37, "learning_rate": 4.7815397485179694e-05, "loss": 2.4143, "step": 1510000 }, { "epoch": 4.37, "learning_rate": 4.7814673837532417e-05, "loss": 2.3965, "step": 1510500 }, { "epoch": 4.37, "learning_rate": 4.7813950189885146e-05, "loss": 2.4148, "step": 1511000 }, { "epoch": 4.38, "learning_rate": 4.781322654223787e-05, "loss": 2.4207, "step": 1511500 }, { "epoch": 4.38, "learning_rate": 4.78125028945906e-05, "loss": 2.4228, "step": 1512000 }, { "epoch": 4.38, "learning_rate": 4.781177924694332e-05, "loss": 2.4213, "step": 1512500 }, { "epoch": 4.38, "learning_rate": 4.781105559929604e-05, "loss": 2.4023, "step": 1513000 }, { "epoch": 4.38, "learning_rate": 4.7810331951648764e-05, "loss": 2.4147, "step": 1513500 }, { "epoch": 4.38, "learning_rate": 4.7809608304001486e-05, "loss": 2.3836, "step": 1514000 }, { "epoch": 4.38, "learning_rate": 4.78088861036495e-05, "loss": 2.3771, "step": 1514500 }, { "epoch": 4.39, "learning_rate": 4.7808162456002224e-05, "loss": 2.3647, "step": 1515000 }, { "epoch": 4.39, "learning_rate": 4.7807438808354946e-05, "loss": 2.407, "step": 1515500 }, { "epoch": 4.39, "learning_rate": 4.780671516070767e-05, "loss": 2.4139, "step": 1516000 }, { "epoch": 4.39, "learning_rate": 4.78059915130604e-05, "loss": 2.4004, "step": 1516500 }, { "epoch": 4.39, "learning_rate": 4.780526786541312e-05, "loss": 2.3937, "step": 1517000 }, { "epoch": 4.39, "learning_rate": 4.780454421776584e-05, "loss": 2.3948, "step": 1517500 }, { "epoch": 4.39, "learning_rate": 4.7803820570118564e-05, "loss": 2.393, "step": 1518000 }, { "epoch": 4.4, "learning_rate": 4.780309836976658e-05, "loss": 2.4086, "step": 1518500 }, { "epoch": 4.4, "learning_rate": 4.78023747221193e-05, "loss": 2.3971, "step": 1519000 }, { "epoch": 4.4, "learning_rate": 4.780165107447203e-05, "loss": 2.4093, "step": 1519500 }, { "epoch": 4.4, "learning_rate": 4.780092742682475e-05, "loss": 2.4049, "step": 1520000 }, { "epoch": 4.4, "learning_rate": 4.7800203779177475e-05, "loss": 2.4124, "step": 1520500 }, { "epoch": 4.4, "learning_rate": 4.77994815788255e-05, "loss": 2.4371, "step": 1521000 }, { "epoch": 4.4, "learning_rate": 4.779875937847351e-05, "loss": 2.4112, "step": 1521500 }, { "epoch": 4.41, "learning_rate": 4.7798035730826235e-05, "loss": 2.4119, "step": 1522000 }, { "epoch": 4.41, "learning_rate": 4.779731208317896e-05, "loss": 2.3831, "step": 1522500 }, { "epoch": 4.41, "learning_rate": 4.779658988282697e-05, "loss": 2.4081, "step": 1523000 }, { "epoch": 4.41, "learning_rate": 4.7795866235179695e-05, "loss": 2.4268, "step": 1523500 }, { "epoch": 4.41, "learning_rate": 4.7795142587532424e-05, "loss": 2.3927, "step": 1524000 }, { "epoch": 4.41, "learning_rate": 4.7794418939885146e-05, "loss": 2.3992, "step": 1524500 }, { "epoch": 4.41, "learning_rate": 4.779369529223787e-05, "loss": 2.4229, "step": 1525000 }, { "epoch": 4.42, "learning_rate": 4.779297164459059e-05, "loss": 2.4264, "step": 1525500 }, { "epoch": 4.42, "learning_rate": 4.779224799694331e-05, "loss": 2.4176, "step": 1526000 }, { "epoch": 4.42, "learning_rate": 4.7791524349296035e-05, "loss": 2.3986, "step": 1526500 }, { "epoch": 4.42, "learning_rate": 4.7790800701648764e-05, "loss": 2.3926, "step": 1527000 }, { "epoch": 4.42, "learning_rate": 4.7790077054001487e-05, "loss": 2.4146, "step": 1527500 }, { "epoch": 4.42, "learning_rate": 4.778935340635421e-05, "loss": 2.3776, "step": 1528000 }, { "epoch": 4.42, "learning_rate": 4.778862975870693e-05, "loss": 2.4156, "step": 1528500 }, { "epoch": 4.43, "learning_rate": 4.778790611105965e-05, "loss": 2.4318, "step": 1529000 }, { "epoch": 4.43, "learning_rate": 4.7787182463412376e-05, "loss": 2.399, "step": 1529500 }, { "epoch": 4.43, "learning_rate": 4.77864588157651e-05, "loss": 2.4095, "step": 1530000 }, { "epoch": 4.43, "learning_rate": 4.778573516811782e-05, "loss": 2.3916, "step": 1530500 }, { "epoch": 4.43, "learning_rate": 4.778501296776584e-05, "loss": 2.3924, "step": 1531000 }, { "epoch": 4.43, "learning_rate": 4.7784289320118565e-05, "loss": 2.409, "step": 1531500 }, { "epoch": 4.43, "learning_rate": 4.778356711976658e-05, "loss": 2.4005, "step": 1532000 }, { "epoch": 4.44, "learning_rate": 4.77828434721193e-05, "loss": 2.4065, "step": 1532500 }, { "epoch": 4.44, "learning_rate": 4.7782119824472025e-05, "loss": 2.3815, "step": 1533000 }, { "epoch": 4.44, "learning_rate": 4.778139617682475e-05, "loss": 2.3923, "step": 1533500 }, { "epoch": 4.44, "learning_rate": 4.778067397647277e-05, "loss": 2.4268, "step": 1534000 }, { "epoch": 4.44, "learning_rate": 4.77799503288255e-05, "loss": 2.4016, "step": 1534500 }, { "epoch": 4.44, "learning_rate": 4.777922668117822e-05, "loss": 2.4032, "step": 1535000 }, { "epoch": 4.44, "learning_rate": 4.777850303353094e-05, "loss": 2.4165, "step": 1535500 }, { "epoch": 4.45, "learning_rate": 4.777778083317896e-05, "loss": 2.4157, "step": 1536000 }, { "epoch": 4.45, "learning_rate": 4.777705718553168e-05, "loss": 2.414, "step": 1536500 }, { "epoch": 4.45, "learning_rate": 4.77763335378844e-05, "loss": 2.4169, "step": 1537000 }, { "epoch": 4.45, "learning_rate": 4.7775609890237125e-05, "loss": 2.3847, "step": 1537500 }, { "epoch": 4.45, "learning_rate": 4.777488768988515e-05, "loss": 2.4039, "step": 1538000 }, { "epoch": 4.45, "learning_rate": 4.777416548953316e-05, "loss": 2.4156, "step": 1538500 }, { "epoch": 4.45, "learning_rate": 4.7773441841885885e-05, "loss": 2.41, "step": 1539000 }, { "epoch": 4.46, "learning_rate": 4.777271819423861e-05, "loss": 2.4072, "step": 1539500 }, { "epoch": 4.46, "learning_rate": 4.777199454659133e-05, "loss": 2.4014, "step": 1540000 }, { "epoch": 4.46, "learning_rate": 4.777127089894405e-05, "loss": 2.368, "step": 1540500 }, { "epoch": 4.46, "learning_rate": 4.7770547251296774e-05, "loss": 2.3863, "step": 1541000 }, { "epoch": 4.46, "learning_rate": 4.7769825050944796e-05, "loss": 2.4064, "step": 1541500 }, { "epoch": 4.46, "learning_rate": 4.776910285059281e-05, "loss": 2.4054, "step": 1542000 }, { "epoch": 4.46, "learning_rate": 4.776837920294554e-05, "loss": 2.3758, "step": 1542500 }, { "epoch": 4.47, "learning_rate": 4.776765555529826e-05, "loss": 2.4019, "step": 1543000 }, { "epoch": 4.47, "learning_rate": 4.7766931907650985e-05, "loss": 2.4036, "step": 1543500 }, { "epoch": 4.47, "learning_rate": 4.776620826000371e-05, "loss": 2.3779, "step": 1544000 }, { "epoch": 4.47, "learning_rate": 4.776548461235643e-05, "loss": 2.4146, "step": 1544500 }, { "epoch": 4.47, "learning_rate": 4.776476096470915e-05, "loss": 2.4077, "step": 1545000 }, { "epoch": 4.47, "learning_rate": 4.7764037317061874e-05, "loss": 2.395, "step": 1545500 }, { "epoch": 4.48, "learning_rate": 4.77633136694146e-05, "loss": 2.4024, "step": 1546000 }, { "epoch": 4.48, "learning_rate": 4.7762590021767325e-05, "loss": 2.4123, "step": 1546500 }, { "epoch": 4.48, "learning_rate": 4.776186637412005e-05, "loss": 2.3997, "step": 1547000 }, { "epoch": 4.48, "learning_rate": 4.776114417376806e-05, "loss": 2.4081, "step": 1547500 }, { "epoch": 4.48, "learning_rate": 4.7760420526120785e-05, "loss": 2.4073, "step": 1548000 }, { "epoch": 4.48, "learning_rate": 4.775969687847351e-05, "loss": 2.4255, "step": 1548500 }, { "epoch": 4.48, "learning_rate": 4.775897323082623e-05, "loss": 2.4003, "step": 1549000 }, { "epoch": 4.49, "learning_rate": 4.775825103047425e-05, "loss": 2.4371, "step": 1549500 }, { "epoch": 4.49, "learning_rate": 4.7757527382826975e-05, "loss": 2.3938, "step": 1550000 }, { "epoch": 4.49, "learning_rate": 4.7756803735179704e-05, "loss": 2.4154, "step": 1550500 }, { "epoch": 4.49, "learning_rate": 4.7756080087532426e-05, "loss": 2.4271, "step": 1551000 }, { "epoch": 4.49, "learning_rate": 4.775535643988515e-05, "loss": 2.4115, "step": 1551500 }, { "epoch": 4.49, "learning_rate": 4.775463279223787e-05, "loss": 2.4289, "step": 1552000 }, { "epoch": 4.49, "learning_rate": 4.775390914459059e-05, "loss": 2.4203, "step": 1552500 }, { "epoch": 4.5, "learning_rate": 4.7753185496943315e-05, "loss": 2.3844, "step": 1553000 }, { "epoch": 4.5, "learning_rate": 4.775246184929604e-05, "loss": 2.41, "step": 1553500 }, { "epoch": 4.5, "learning_rate": 4.775173964894405e-05, "loss": 2.4078, "step": 1554000 }, { "epoch": 4.5, "learning_rate": 4.7751017448592075e-05, "loss": 2.3975, "step": 1554500 }, { "epoch": 4.5, "learning_rate": 4.77502938009448e-05, "loss": 2.4143, "step": 1555000 }, { "epoch": 4.5, "learning_rate": 4.774957015329752e-05, "loss": 2.4097, "step": 1555500 }, { "epoch": 4.5, "learning_rate": 4.774884650565024e-05, "loss": 2.4038, "step": 1556000 }, { "epoch": 4.51, "learning_rate": 4.7748122858002964e-05, "loss": 2.4089, "step": 1556500 }, { "epoch": 4.51, "learning_rate": 4.774740065765098e-05, "loss": 2.4178, "step": 1557000 }, { "epoch": 4.51, "learning_rate": 4.774667701000371e-05, "loss": 2.3995, "step": 1557500 }, { "epoch": 4.51, "learning_rate": 4.774595336235643e-05, "loss": 2.3735, "step": 1558000 }, { "epoch": 4.51, "learning_rate": 4.774522971470915e-05, "loss": 2.4168, "step": 1558500 }, { "epoch": 4.51, "learning_rate": 4.7744506067061875e-05, "loss": 2.4159, "step": 1559000 }, { "epoch": 4.51, "learning_rate": 4.7743782419414604e-05, "loss": 2.375, "step": 1559500 }, { "epoch": 4.52, "learning_rate": 4.774306021906262e-05, "loss": 2.377, "step": 1560000 }, { "epoch": 4.52, "learning_rate": 4.774233657141534e-05, "loss": 2.3908, "step": 1560500 }, { "epoch": 4.52, "learning_rate": 4.7741612923768064e-05, "loss": 2.407, "step": 1561000 }, { "epoch": 4.52, "learning_rate": 4.7740889276120786e-05, "loss": 2.4081, "step": 1561500 }, { "epoch": 4.52, "learning_rate": 4.774016562847351e-05, "loss": 2.4069, "step": 1562000 }, { "epoch": 4.52, "learning_rate": 4.773944198082623e-05, "loss": 2.3855, "step": 1562500 }, { "epoch": 4.52, "learning_rate": 4.773871978047425e-05, "loss": 2.4028, "step": 1563000 }, { "epoch": 4.53, "learning_rate": 4.7737996132826975e-05, "loss": 2.4095, "step": 1563500 }, { "epoch": 4.53, "learning_rate": 4.77372724851797e-05, "loss": 2.3938, "step": 1564000 }, { "epoch": 4.53, "learning_rate": 4.7736548837532427e-05, "loss": 2.4144, "step": 1564500 }, { "epoch": 4.53, "learning_rate": 4.773582518988515e-05, "loss": 2.4065, "step": 1565000 }, { "epoch": 4.53, "learning_rate": 4.773510154223787e-05, "loss": 2.42, "step": 1565500 }, { "epoch": 4.53, "learning_rate": 4.773437789459059e-05, "loss": 2.402, "step": 1566000 }, { "epoch": 4.53, "learning_rate": 4.7733654246943315e-05, "loss": 2.4092, "step": 1566500 }, { "epoch": 4.54, "learning_rate": 4.773293059929604e-05, "loss": 2.4149, "step": 1567000 }, { "epoch": 4.54, "learning_rate": 4.773220695164876e-05, "loss": 2.3853, "step": 1567500 }, { "epoch": 4.54, "learning_rate": 4.773148330400148e-05, "loss": 2.4162, "step": 1568000 }, { "epoch": 4.54, "learning_rate": 4.7730759656354204e-05, "loss": 2.3906, "step": 1568500 }, { "epoch": 4.54, "learning_rate": 4.773003890329752e-05, "loss": 2.4269, "step": 1569000 }, { "epoch": 4.54, "learning_rate": 4.772931525565024e-05, "loss": 2.4229, "step": 1569500 }, { "epoch": 4.54, "learning_rate": 4.7728591608002965e-05, "loss": 2.4178, "step": 1570000 }, { "epoch": 4.55, "learning_rate": 4.772786796035569e-05, "loss": 2.3728, "step": 1570500 }, { "epoch": 4.55, "learning_rate": 4.772714431270841e-05, "loss": 2.4172, "step": 1571000 }, { "epoch": 4.55, "learning_rate": 4.772642211235643e-05, "loss": 2.3942, "step": 1571500 }, { "epoch": 4.55, "learning_rate": 4.7725698464709154e-05, "loss": 2.4161, "step": 1572000 }, { "epoch": 4.55, "learning_rate": 4.772497481706188e-05, "loss": 2.4211, "step": 1572500 }, { "epoch": 4.55, "learning_rate": 4.7724251169414605e-05, "loss": 2.3863, "step": 1573000 }, { "epoch": 4.55, "learning_rate": 4.772352752176733e-05, "loss": 2.4052, "step": 1573500 }, { "epoch": 4.56, "learning_rate": 4.772280387412005e-05, "loss": 2.4065, "step": 1574000 }, { "epoch": 4.56, "learning_rate": 4.772208022647277e-05, "loss": 2.4304, "step": 1574500 }, { "epoch": 4.56, "learning_rate": 4.7721356578825494e-05, "loss": 2.3987, "step": 1575000 }, { "epoch": 4.56, "learning_rate": 4.77206372730641e-05, "loss": 2.4176, "step": 1575500 }, { "epoch": 4.56, "learning_rate": 4.7719913625416825e-05, "loss": 2.3861, "step": 1576000 }, { "epoch": 4.56, "learning_rate": 4.771918997776955e-05, "loss": 2.4126, "step": 1576500 }, { "epoch": 4.56, "learning_rate": 4.771846633012227e-05, "loss": 2.4089, "step": 1577000 }, { "epoch": 4.57, "learning_rate": 4.771774268247499e-05, "loss": 2.3926, "step": 1577500 }, { "epoch": 4.57, "learning_rate": 4.7717019034827714e-05, "loss": 2.4098, "step": 1578000 }, { "epoch": 4.57, "learning_rate": 4.771629683447573e-05, "loss": 2.3958, "step": 1578500 }, { "epoch": 4.57, "learning_rate": 4.771557318682845e-05, "loss": 2.4016, "step": 1579000 }, { "epoch": 4.57, "learning_rate": 4.771484953918118e-05, "loss": 2.3863, "step": 1579500 }, { "epoch": 4.57, "learning_rate": 4.77141258915339e-05, "loss": 2.4079, "step": 1580000 }, { "epoch": 4.57, "learning_rate": 4.771340224388663e-05, "loss": 2.4021, "step": 1580500 }, { "epoch": 4.58, "learning_rate": 4.7712678596239354e-05, "loss": 2.3927, "step": 1581000 }, { "epoch": 4.58, "learning_rate": 4.7711954948592076e-05, "loss": 2.3973, "step": 1581500 }, { "epoch": 4.58, "learning_rate": 4.77112313009448e-05, "loss": 2.3911, "step": 1582000 }, { "epoch": 4.58, "learning_rate": 4.771050765329752e-05, "loss": 2.3867, "step": 1582500 }, { "epoch": 4.58, "learning_rate": 4.770978400565024e-05, "loss": 2.4236, "step": 1583000 }, { "epoch": 4.58, "learning_rate": 4.770906325259356e-05, "loss": 2.3837, "step": 1583500 }, { "epoch": 4.59, "learning_rate": 4.770833960494628e-05, "loss": 2.423, "step": 1584000 }, { "epoch": 4.59, "learning_rate": 4.7707615957299e-05, "loss": 2.3865, "step": 1584500 }, { "epoch": 4.59, "learning_rate": 4.7706892309651725e-05, "loss": 2.4223, "step": 1585000 }, { "epoch": 4.59, "learning_rate": 4.770616866200445e-05, "loss": 2.4254, "step": 1585500 }, { "epoch": 4.59, "learning_rate": 4.770544501435717e-05, "loss": 2.4118, "step": 1586000 }, { "epoch": 4.59, "learning_rate": 4.770472136670989e-05, "loss": 2.3966, "step": 1586500 }, { "epoch": 4.59, "learning_rate": 4.770399771906262e-05, "loss": 2.3925, "step": 1587000 }, { "epoch": 4.6, "learning_rate": 4.770327551871064e-05, "loss": 2.4005, "step": 1587500 }, { "epoch": 4.6, "learning_rate": 4.770255187106336e-05, "loss": 2.4169, "step": 1588000 }, { "epoch": 4.6, "learning_rate": 4.770182822341608e-05, "loss": 2.4099, "step": 1588500 }, { "epoch": 4.6, "learning_rate": 4.770110457576881e-05, "loss": 2.4093, "step": 1589000 }, { "epoch": 4.6, "learning_rate": 4.770038092812153e-05, "loss": 2.4065, "step": 1589500 }, { "epoch": 4.6, "learning_rate": 4.7699657280474255e-05, "loss": 2.3872, "step": 1590000 }, { "epoch": 4.6, "learning_rate": 4.769893363282698e-05, "loss": 2.4044, "step": 1590500 }, { "epoch": 4.61, "learning_rate": 4.76982099851797e-05, "loss": 2.4164, "step": 1591000 }, { "epoch": 4.61, "learning_rate": 4.769748633753242e-05, "loss": 2.4102, "step": 1591500 }, { "epoch": 4.61, "learning_rate": 4.7696762689885144e-05, "loss": 2.3964, "step": 1592000 }, { "epoch": 4.61, "learning_rate": 4.7696039042237866e-05, "loss": 2.3916, "step": 1592500 }, { "epoch": 4.61, "learning_rate": 4.769531828918118e-05, "loss": 2.3872, "step": 1593000 }, { "epoch": 4.61, "learning_rate": 4.7694594641533904e-05, "loss": 2.3998, "step": 1593500 }, { "epoch": 4.61, "learning_rate": 4.7693870993886626e-05, "loss": 2.377, "step": 1594000 }, { "epoch": 4.62, "learning_rate": 4.769314734623935e-05, "loss": 2.4061, "step": 1594500 }, { "epoch": 4.62, "learning_rate": 4.769242369859208e-05, "loss": 2.4111, "step": 1595000 }, { "epoch": 4.62, "learning_rate": 4.76917000509448e-05, "loss": 2.4019, "step": 1595500 }, { "epoch": 4.62, "learning_rate": 4.769097640329752e-05, "loss": 2.4048, "step": 1596000 }, { "epoch": 4.62, "learning_rate": 4.7690252755650244e-05, "loss": 2.415, "step": 1596500 }, { "epoch": 4.62, "learning_rate": 4.7689529108002966e-05, "loss": 2.3938, "step": 1597000 }, { "epoch": 4.62, "learning_rate": 4.768880690765098e-05, "loss": 2.3854, "step": 1597500 }, { "epoch": 4.63, "learning_rate": 4.768808326000371e-05, "loss": 2.4076, "step": 1598000 }, { "epoch": 4.63, "learning_rate": 4.768735961235643e-05, "loss": 2.4007, "step": 1598500 }, { "epoch": 4.63, "learning_rate": 4.7686635964709155e-05, "loss": 2.4099, "step": 1599000 }, { "epoch": 4.63, "learning_rate": 4.768591231706188e-05, "loss": 2.4026, "step": 1599500 }, { "epoch": 4.63, "learning_rate": 4.768519011670989e-05, "loss": 2.4239, "step": 1600000 }, { "epoch": 4.63, "learning_rate": 4.7684466469062615e-05, "loss": 2.4118, "step": 1600500 }, { "epoch": 4.63, "learning_rate": 4.768374282141534e-05, "loss": 2.4259, "step": 1601000 }, { "epoch": 4.64, "learning_rate": 4.768301917376806e-05, "loss": 2.3864, "step": 1601500 }, { "epoch": 4.64, "learning_rate": 4.768229552612079e-05, "loss": 2.4108, "step": 1602000 }, { "epoch": 4.64, "learning_rate": 4.768157332576881e-05, "loss": 2.4319, "step": 1602500 }, { "epoch": 4.64, "learning_rate": 4.768084967812153e-05, "loss": 2.4033, "step": 1603000 }, { "epoch": 4.64, "learning_rate": 4.7680126030474255e-05, "loss": 2.4052, "step": 1603500 }, { "epoch": 4.64, "learning_rate": 4.767940238282698e-05, "loss": 2.4218, "step": 1604000 }, { "epoch": 4.64, "learning_rate": 4.767868018247499e-05, "loss": 2.4048, "step": 1604500 }, { "epoch": 4.65, "learning_rate": 4.767795798212301e-05, "loss": 2.3961, "step": 1605000 }, { "epoch": 4.65, "learning_rate": 4.767723433447573e-05, "loss": 2.4039, "step": 1605500 }, { "epoch": 4.65, "learning_rate": 4.767651068682846e-05, "loss": 2.4094, "step": 1606000 }, { "epoch": 4.65, "learning_rate": 4.767578703918118e-05, "loss": 2.4187, "step": 1606500 }, { "epoch": 4.65, "learning_rate": 4.7675063391533905e-05, "loss": 2.3923, "step": 1607000 }, { "epoch": 4.65, "learning_rate": 4.767433974388663e-05, "loss": 2.4008, "step": 1607500 }, { "epoch": 4.65, "learning_rate": 4.767361609623935e-05, "loss": 2.3778, "step": 1608000 }, { "epoch": 4.66, "learning_rate": 4.767289244859207e-05, "loss": 2.3893, "step": 1608500 }, { "epoch": 4.66, "learning_rate": 4.767217169553539e-05, "loss": 2.4255, "step": 1609000 }, { "epoch": 4.66, "learning_rate": 4.767144804788811e-05, "loss": 2.4109, "step": 1609500 }, { "epoch": 4.66, "learning_rate": 4.767072440024084e-05, "loss": 2.4215, "step": 1610000 }, { "epoch": 4.66, "learning_rate": 4.767000075259356e-05, "loss": 2.3852, "step": 1610500 }, { "epoch": 4.66, "learning_rate": 4.766927710494628e-05, "loss": 2.3951, "step": 1611000 }, { "epoch": 4.66, "learning_rate": 4.7668553457299005e-05, "loss": 2.4263, "step": 1611500 }, { "epoch": 4.67, "learning_rate": 4.766782980965173e-05, "loss": 2.3964, "step": 1612000 }, { "epoch": 4.67, "learning_rate": 4.766710760929974e-05, "loss": 2.4086, "step": 1612500 }, { "epoch": 4.67, "learning_rate": 4.7666383961652465e-05, "loss": 2.423, "step": 1613000 }, { "epoch": 4.67, "learning_rate": 4.766566031400519e-05, "loss": 2.3995, "step": 1613500 }, { "epoch": 4.67, "learning_rate": 4.766493666635791e-05, "loss": 2.4126, "step": 1614000 }, { "epoch": 4.67, "learning_rate": 4.766421301871064e-05, "loss": 2.3919, "step": 1614500 }, { "epoch": 4.67, "learning_rate": 4.7663490818358654e-05, "loss": 2.399, "step": 1615000 }, { "epoch": 4.68, "learning_rate": 4.7662767170711376e-05, "loss": 2.4192, "step": 1615500 }, { "epoch": 4.68, "learning_rate": 4.76620435230641e-05, "loss": 2.4094, "step": 1616000 }, { "epoch": 4.68, "learning_rate": 4.766131987541682e-05, "loss": 2.396, "step": 1616500 }, { "epoch": 4.68, "learning_rate": 4.766059622776954e-05, "loss": 2.4232, "step": 1617000 }, { "epoch": 4.68, "learning_rate": 4.765987258012227e-05, "loss": 2.3948, "step": 1617500 }, { "epoch": 4.68, "learning_rate": 4.7659148932474994e-05, "loss": 2.4086, "step": 1618000 }, { "epoch": 4.68, "learning_rate": 4.7658425284827716e-05, "loss": 2.4057, "step": 1618500 }, { "epoch": 4.69, "learning_rate": 4.765770308447574e-05, "loss": 2.4064, "step": 1619000 }, { "epoch": 4.69, "learning_rate": 4.765697943682846e-05, "loss": 2.4137, "step": 1619500 }, { "epoch": 4.69, "learning_rate": 4.765625578918118e-05, "loss": 2.3835, "step": 1620000 }, { "epoch": 4.69, "learning_rate": 4.7655532141533905e-05, "loss": 2.39, "step": 1620500 }, { "epoch": 4.69, "learning_rate": 4.765480849388663e-05, "loss": 2.3955, "step": 1621000 }, { "epoch": 4.69, "learning_rate": 4.765408484623935e-05, "loss": 2.3853, "step": 1621500 }, { "epoch": 4.7, "learning_rate": 4.765336119859207e-05, "loss": 2.4171, "step": 1622000 }, { "epoch": 4.7, "learning_rate": 4.7652637550944794e-05, "loss": 2.3885, "step": 1622500 }, { "epoch": 4.7, "learning_rate": 4.7651913903297516e-05, "loss": 2.3954, "step": 1623000 }, { "epoch": 4.7, "learning_rate": 4.765119315024083e-05, "loss": 2.4048, "step": 1623500 }, { "epoch": 4.7, "learning_rate": 4.7650469502593554e-05, "loss": 2.3949, "step": 1624000 }, { "epoch": 4.7, "learning_rate": 4.764974585494628e-05, "loss": 2.4042, "step": 1624500 }, { "epoch": 4.7, "learning_rate": 4.7649022207299006e-05, "loss": 2.4083, "step": 1625000 }, { "epoch": 4.71, "learning_rate": 4.764829855965173e-05, "loss": 2.3785, "step": 1625500 }, { "epoch": 4.71, "learning_rate": 4.764757491200445e-05, "loss": 2.3923, "step": 1626000 }, { "epoch": 4.71, "learning_rate": 4.764685126435717e-05, "loss": 2.3973, "step": 1626500 }, { "epoch": 4.71, "learning_rate": 4.7646127616709895e-05, "loss": 2.4261, "step": 1627000 }, { "epoch": 4.71, "learning_rate": 4.764540396906262e-05, "loss": 2.4099, "step": 1627500 }, { "epoch": 4.71, "learning_rate": 4.764468176871064e-05, "loss": 2.4242, "step": 1628000 }, { "epoch": 4.71, "learning_rate": 4.764395812106336e-05, "loss": 2.4028, "step": 1628500 }, { "epoch": 4.72, "learning_rate": 4.7643234473416084e-05, "loss": 2.3986, "step": 1629000 }, { "epoch": 4.72, "learning_rate": 4.7642510825768806e-05, "loss": 2.3936, "step": 1629500 }, { "epoch": 4.72, "learning_rate": 4.764178717812153e-05, "loss": 2.402, "step": 1630000 }, { "epoch": 4.72, "learning_rate": 4.764106353047425e-05, "loss": 2.4277, "step": 1630500 }, { "epoch": 4.72, "learning_rate": 4.7640342777417566e-05, "loss": 2.4316, "step": 1631000 }, { "epoch": 4.72, "learning_rate": 4.763961912977029e-05, "loss": 2.4045, "step": 1631500 }, { "epoch": 4.72, "learning_rate": 4.763889548212301e-05, "loss": 2.381, "step": 1632000 }, { "epoch": 4.73, "learning_rate": 4.763817183447574e-05, "loss": 2.4049, "step": 1632500 }, { "epoch": 4.73, "learning_rate": 4.763744818682846e-05, "loss": 2.4047, "step": 1633000 }, { "epoch": 4.73, "learning_rate": 4.7636724539181184e-05, "loss": 2.4086, "step": 1633500 }, { "epoch": 4.73, "learning_rate": 4.7636000891533906e-05, "loss": 2.4135, "step": 1634000 }, { "epoch": 4.73, "learning_rate": 4.763527724388663e-05, "loss": 2.3905, "step": 1634500 }, { "epoch": 4.73, "learning_rate": 4.763455359623935e-05, "loss": 2.4004, "step": 1635000 }, { "epoch": 4.73, "learning_rate": 4.7633831395887366e-05, "loss": 2.4013, "step": 1635500 }, { "epoch": 4.74, "learning_rate": 4.7633112090125975e-05, "loss": 2.39, "step": 1636000 }, { "epoch": 4.74, "learning_rate": 4.76323884424787e-05, "loss": 2.3971, "step": 1636500 }, { "epoch": 4.74, "learning_rate": 4.763166624212671e-05, "loss": 2.4329, "step": 1637000 }, { "epoch": 4.74, "learning_rate": 4.7630944041774735e-05, "loss": 2.3996, "step": 1637500 }, { "epoch": 4.74, "learning_rate": 4.763022039412746e-05, "loss": 2.4248, "step": 1638000 }, { "epoch": 4.74, "learning_rate": 4.762949674648018e-05, "loss": 2.4253, "step": 1638500 }, { "epoch": 4.74, "learning_rate": 4.76287730988329e-05, "loss": 2.3683, "step": 1639000 }, { "epoch": 4.75, "learning_rate": 4.7628049451185624e-05, "loss": 2.3863, "step": 1639500 }, { "epoch": 4.75, "learning_rate": 4.7627325803538346e-05, "loss": 2.3909, "step": 1640000 }, { "epoch": 4.75, "learning_rate": 4.7626602155891075e-05, "loss": 2.4019, "step": 1640500 }, { "epoch": 4.75, "learning_rate": 4.76258785082438e-05, "loss": 2.4288, "step": 1641000 }, { "epoch": 4.75, "learning_rate": 4.762515486059652e-05, "loss": 2.4021, "step": 1641500 }, { "epoch": 4.75, "learning_rate": 4.762443121294924e-05, "loss": 2.4128, "step": 1642000 }, { "epoch": 4.75, "learning_rate": 4.7623707565301964e-05, "loss": 2.3846, "step": 1642500 }, { "epoch": 4.76, "learning_rate": 4.7622983917654687e-05, "loss": 2.3924, "step": 1643000 }, { "epoch": 4.76, "learning_rate": 4.762226171730271e-05, "loss": 2.3907, "step": 1643500 }, { "epoch": 4.76, "learning_rate": 4.762153806965543e-05, "loss": 2.3863, "step": 1644000 }, { "epoch": 4.76, "learning_rate": 4.762081442200815e-05, "loss": 2.4026, "step": 1644500 }, { "epoch": 4.76, "learning_rate": 4.7620090774360876e-05, "loss": 2.4063, "step": 1645000 }, { "epoch": 4.76, "learning_rate": 4.76193671267136e-05, "loss": 2.3891, "step": 1645500 }, { "epoch": 4.76, "learning_rate": 4.761864347906632e-05, "loss": 2.4053, "step": 1646000 }, { "epoch": 4.77, "learning_rate": 4.761791983141904e-05, "loss": 2.3859, "step": 1646500 }, { "epoch": 4.77, "learning_rate": 4.7617196183771765e-05, "loss": 2.3951, "step": 1647000 }, { "epoch": 4.77, "learning_rate": 4.761647253612449e-05, "loss": 2.4227, "step": 1647500 }, { "epoch": 4.77, "learning_rate": 4.7615748888477216e-05, "loss": 2.4123, "step": 1648000 }, { "epoch": 4.77, "learning_rate": 4.761502524082994e-05, "loss": 2.3933, "step": 1648500 }, { "epoch": 4.77, "learning_rate": 4.761430159318267e-05, "loss": 2.4027, "step": 1649000 }, { "epoch": 4.77, "learning_rate": 4.761357794553539e-05, "loss": 2.407, "step": 1649500 }, { "epoch": 4.78, "learning_rate": 4.761285429788811e-05, "loss": 2.3933, "step": 1650000 }, { "epoch": 4.78, "learning_rate": 4.7612130650240834e-05, "loss": 2.385, "step": 1650500 }, { "epoch": 4.78, "learning_rate": 4.7611407002593556e-05, "loss": 2.3984, "step": 1651000 }, { "epoch": 4.78, "learning_rate": 4.761068335494628e-05, "loss": 2.4191, "step": 1651500 }, { "epoch": 4.78, "learning_rate": 4.7609959707299e-05, "loss": 2.4072, "step": 1652000 }, { "epoch": 4.78, "learning_rate": 4.760923605965172e-05, "loss": 2.3882, "step": 1652500 }, { "epoch": 4.78, "learning_rate": 4.7608512412004445e-05, "loss": 2.4072, "step": 1653000 }, { "epoch": 4.79, "learning_rate": 4.760778876435717e-05, "loss": 2.3693, "step": 1653500 }, { "epoch": 4.79, "learning_rate": 4.760706656400519e-05, "loss": 2.3927, "step": 1654000 }, { "epoch": 4.79, "learning_rate": 4.760634291635791e-05, "loss": 2.4096, "step": 1654500 }, { "epoch": 4.79, "learning_rate": 4.7605620716005934e-05, "loss": 2.3886, "step": 1655000 }, { "epoch": 4.79, "learning_rate": 4.7604897068358656e-05, "loss": 2.411, "step": 1655500 }, { "epoch": 4.79, "learning_rate": 4.760417342071138e-05, "loss": 2.4072, "step": 1656000 }, { "epoch": 4.79, "learning_rate": 4.76034497730641e-05, "loss": 2.3964, "step": 1656500 }, { "epoch": 4.8, "learning_rate": 4.760272612541682e-05, "loss": 2.3904, "step": 1657000 }, { "epoch": 4.8, "learning_rate": 4.7602002477769545e-05, "loss": 2.4005, "step": 1657500 }, { "epoch": 4.8, "learning_rate": 4.760127883012227e-05, "loss": 2.3994, "step": 1658000 }, { "epoch": 4.8, "learning_rate": 4.760055518247499e-05, "loss": 2.397, "step": 1658500 }, { "epoch": 4.8, "learning_rate": 4.759983153482772e-05, "loss": 2.3958, "step": 1659000 }, { "epoch": 4.8, "learning_rate": 4.759910788718044e-05, "loss": 2.4105, "step": 1659500 }, { "epoch": 4.81, "learning_rate": 4.7598385686828456e-05, "loss": 2.3894, "step": 1660000 }, { "epoch": 4.81, "learning_rate": 4.759766203918118e-05, "loss": 2.3845, "step": 1660500 }, { "epoch": 4.81, "learning_rate": 4.7596939838829194e-05, "loss": 2.429, "step": 1661000 }, { "epoch": 4.81, "learning_rate": 4.7596216191181916e-05, "loss": 2.3914, "step": 1661500 }, { "epoch": 4.81, "learning_rate": 4.7595492543534646e-05, "loss": 2.4105, "step": 1662000 }, { "epoch": 4.81, "learning_rate": 4.759476889588737e-05, "loss": 2.4035, "step": 1662500 }, { "epoch": 4.81, "learning_rate": 4.75940452482401e-05, "loss": 2.3958, "step": 1663000 }, { "epoch": 4.82, "learning_rate": 4.759332160059282e-05, "loss": 2.4198, "step": 1663500 }, { "epoch": 4.82, "learning_rate": 4.7592599400240835e-05, "loss": 2.426, "step": 1664000 }, { "epoch": 4.82, "learning_rate": 4.759187575259356e-05, "loss": 2.4335, "step": 1664500 }, { "epoch": 4.82, "learning_rate": 4.759115210494628e-05, "loss": 2.4085, "step": 1665000 }, { "epoch": 4.82, "learning_rate": 4.7590428457299e-05, "loss": 2.41, "step": 1665500 }, { "epoch": 4.82, "learning_rate": 4.7589704809651723e-05, "loss": 2.4087, "step": 1666000 }, { "epoch": 4.82, "learning_rate": 4.7588981162004446e-05, "loss": 2.4285, "step": 1666500 }, { "epoch": 4.83, "learning_rate": 4.758825751435717e-05, "loss": 2.3893, "step": 1667000 }, { "epoch": 4.83, "learning_rate": 4.758753386670989e-05, "loss": 2.3787, "step": 1667500 }, { "epoch": 4.83, "learning_rate": 4.758681021906262e-05, "loss": 2.3992, "step": 1668000 }, { "epoch": 4.83, "learning_rate": 4.7586088018710635e-05, "loss": 2.3929, "step": 1668500 }, { "epoch": 4.83, "learning_rate": 4.758536437106336e-05, "loss": 2.3992, "step": 1669000 }, { "epoch": 4.83, "learning_rate": 4.758464072341608e-05, "loss": 2.4003, "step": 1669500 }, { "epoch": 4.83, "learning_rate": 4.758391707576881e-05, "loss": 2.3888, "step": 1670000 }, { "epoch": 4.84, "learning_rate": 4.7583194875416824e-05, "loss": 2.3795, "step": 1670500 }, { "epoch": 4.84, "learning_rate": 4.7582471227769546e-05, "loss": 2.4026, "step": 1671000 }, { "epoch": 4.84, "learning_rate": 4.758174758012227e-05, "loss": 2.4039, "step": 1671500 }, { "epoch": 4.84, "learning_rate": 4.758102537977029e-05, "loss": 2.4054, "step": 1672000 }, { "epoch": 4.84, "learning_rate": 4.758030173212301e-05, "loss": 2.3865, "step": 1672500 }, { "epoch": 4.84, "learning_rate": 4.7579578084475735e-05, "loss": 2.3997, "step": 1673000 }, { "epoch": 4.84, "learning_rate": 4.757885443682846e-05, "loss": 2.4018, "step": 1673500 }, { "epoch": 4.85, "learning_rate": 4.757813223647647e-05, "loss": 2.3884, "step": 1674000 }, { "epoch": 4.85, "learning_rate": 4.7577408588829195e-05, "loss": 2.4059, "step": 1674500 }, { "epoch": 4.85, "learning_rate": 4.757668638847722e-05, "loss": 2.3905, "step": 1675000 }, { "epoch": 4.85, "learning_rate": 4.757596274082994e-05, "loss": 2.4172, "step": 1675500 }, { "epoch": 4.85, "learning_rate": 4.757523909318266e-05, "loss": 2.393, "step": 1676000 }, { "epoch": 4.85, "learning_rate": 4.7574515445535384e-05, "loss": 2.4145, "step": 1676500 }, { "epoch": 4.85, "learning_rate": 4.7573791797888106e-05, "loss": 2.3721, "step": 1677000 }, { "epoch": 4.86, "learning_rate": 4.7573068150240835e-05, "loss": 2.4038, "step": 1677500 }, { "epoch": 4.86, "learning_rate": 4.757234450259356e-05, "loss": 2.3879, "step": 1678000 }, { "epoch": 4.86, "learning_rate": 4.757162085494628e-05, "loss": 2.4002, "step": 1678500 }, { "epoch": 4.86, "learning_rate": 4.7570897207299e-05, "loss": 2.399, "step": 1679000 }, { "epoch": 4.86, "learning_rate": 4.7570173559651724e-05, "loss": 2.3991, "step": 1679500 }, { "epoch": 4.86, "learning_rate": 4.7569449912004446e-05, "loss": 2.3853, "step": 1680000 }, { "epoch": 4.86, "learning_rate": 4.756872626435717e-05, "loss": 2.4077, "step": 1680500 }, { "epoch": 4.87, "learning_rate": 4.75680026167099e-05, "loss": 2.3924, "step": 1681000 }, { "epoch": 4.87, "learning_rate": 4.756728041635791e-05, "loss": 2.3925, "step": 1681500 }, { "epoch": 4.87, "learning_rate": 4.7566556768710636e-05, "loss": 2.4028, "step": 1682000 }, { "epoch": 4.87, "learning_rate": 4.756583456835865e-05, "loss": 2.4064, "step": 1682500 }, { "epoch": 4.87, "learning_rate": 4.756511092071137e-05, "loss": 2.3987, "step": 1683000 }, { "epoch": 4.87, "learning_rate": 4.7564387273064096e-05, "loss": 2.4013, "step": 1683500 }, { "epoch": 4.87, "learning_rate": 4.756366507271212e-05, "loss": 2.3857, "step": 1684000 }, { "epoch": 4.88, "learning_rate": 4.756294142506484e-05, "loss": 2.4104, "step": 1684500 }, { "epoch": 4.88, "learning_rate": 4.756221777741757e-05, "loss": 2.3847, "step": 1685000 }, { "epoch": 4.88, "learning_rate": 4.756149412977029e-05, "loss": 2.3875, "step": 1685500 }, { "epoch": 4.88, "learning_rate": 4.7560770482123014e-05, "loss": 2.3956, "step": 1686000 }, { "epoch": 4.88, "learning_rate": 4.7560046834475736e-05, "loss": 2.3835, "step": 1686500 }, { "epoch": 4.88, "learning_rate": 4.755932318682846e-05, "loss": 2.4119, "step": 1687000 }, { "epoch": 4.88, "learning_rate": 4.755859953918118e-05, "loss": 2.4088, "step": 1687500 }, { "epoch": 4.89, "learning_rate": 4.7557877338829196e-05, "loss": 2.3904, "step": 1688000 }, { "epoch": 4.89, "learning_rate": 4.7557153691181925e-05, "loss": 2.3946, "step": 1688500 }, { "epoch": 4.89, "learning_rate": 4.755643004353465e-05, "loss": 2.395, "step": 1689000 }, { "epoch": 4.89, "learning_rate": 4.755570639588737e-05, "loss": 2.3964, "step": 1689500 }, { "epoch": 4.89, "learning_rate": 4.755498274824009e-05, "loss": 2.3735, "step": 1690000 }, { "epoch": 4.89, "learning_rate": 4.755426054788811e-05, "loss": 2.3745, "step": 1690500 }, { "epoch": 4.89, "learning_rate": 4.755353690024083e-05, "loss": 2.4106, "step": 1691000 }, { "epoch": 4.9, "learning_rate": 4.755281325259355e-05, "loss": 2.4186, "step": 1691500 }, { "epoch": 4.9, "learning_rate": 4.7552089604946274e-05, "loss": 2.3922, "step": 1692000 }, { "epoch": 4.9, "learning_rate": 4.7551367404594296e-05, "loss": 2.4133, "step": 1692500 }, { "epoch": 4.9, "learning_rate": 4.7550643756947025e-05, "loss": 2.3982, "step": 1693000 }, { "epoch": 4.9, "learning_rate": 4.754992010929975e-05, "loss": 2.3984, "step": 1693500 }, { "epoch": 4.9, "learning_rate": 4.754919646165247e-05, "loss": 2.414, "step": 1694000 }, { "epoch": 4.9, "learning_rate": 4.754847281400519e-05, "loss": 2.3721, "step": 1694500 }, { "epoch": 4.91, "learning_rate": 4.7547749166357914e-05, "loss": 2.4146, "step": 1695000 }, { "epoch": 4.91, "learning_rate": 4.7547025518710636e-05, "loss": 2.4154, "step": 1695500 }, { "epoch": 4.91, "learning_rate": 4.754630187106336e-05, "loss": 2.3934, "step": 1696000 }, { "epoch": 4.91, "learning_rate": 4.7545579670711374e-05, "loss": 2.3936, "step": 1696500 }, { "epoch": 4.91, "learning_rate": 4.7544856023064096e-05, "loss": 2.3903, "step": 1697000 }, { "epoch": 4.91, "learning_rate": 4.7544132375416825e-05, "loss": 2.3697, "step": 1697500 }, { "epoch": 4.92, "learning_rate": 4.754341017506484e-05, "loss": 2.4071, "step": 1698000 }, { "epoch": 4.92, "learning_rate": 4.754268652741756e-05, "loss": 2.413, "step": 1698500 }, { "epoch": 4.92, "learning_rate": 4.7541962879770285e-05, "loss": 2.4157, "step": 1699000 }, { "epoch": 4.92, "learning_rate": 4.754123923212301e-05, "loss": 2.3772, "step": 1699500 }, { "epoch": 4.92, "learning_rate": 4.754051558447574e-05, "loss": 2.407, "step": 1700000 }, { "epoch": 4.92, "learning_rate": 4.753979193682846e-05, "loss": 2.414, "step": 1700500 }, { "epoch": 4.92, "learning_rate": 4.753906828918118e-05, "loss": 2.3995, "step": 1701000 }, { "epoch": 4.93, "learning_rate": 4.75383446415339e-05, "loss": 2.4001, "step": 1701500 }, { "epoch": 4.93, "learning_rate": 4.7537620993886626e-05, "loss": 2.4058, "step": 1702000 }, { "epoch": 4.93, "learning_rate": 4.753689734623935e-05, "loss": 2.4071, "step": 1702500 }, { "epoch": 4.93, "learning_rate": 4.753617369859208e-05, "loss": 2.3934, "step": 1703000 }, { "epoch": 4.93, "learning_rate": 4.75354500509448e-05, "loss": 2.3817, "step": 1703500 }, { "epoch": 4.93, "learning_rate": 4.7534727850592815e-05, "loss": 2.3877, "step": 1704000 }, { "epoch": 4.93, "learning_rate": 4.753400420294554e-05, "loss": 2.4008, "step": 1704500 }, { "epoch": 4.94, "learning_rate": 4.753328055529826e-05, "loss": 2.4216, "step": 1705000 }, { "epoch": 4.94, "learning_rate": 4.753255690765098e-05, "loss": 2.3879, "step": 1705500 }, { "epoch": 4.94, "learning_rate": 4.7531833260003704e-05, "loss": 2.3824, "step": 1706000 }, { "epoch": 4.94, "learning_rate": 4.7531109612356426e-05, "loss": 2.4059, "step": 1706500 }, { "epoch": 4.94, "learning_rate": 4.7530385964709155e-05, "loss": 2.4005, "step": 1707000 }, { "epoch": 4.94, "learning_rate": 4.752966376435718e-05, "loss": 2.3979, "step": 1707500 }, { "epoch": 4.94, "learning_rate": 4.75289401167099e-05, "loss": 2.3794, "step": 1708000 }, { "epoch": 4.95, "learning_rate": 4.752821646906262e-05, "loss": 2.3907, "step": 1708500 }, { "epoch": 4.95, "learning_rate": 4.7527492821415344e-05, "loss": 2.4196, "step": 1709000 }, { "epoch": 4.95, "learning_rate": 4.7526769173768066e-05, "loss": 2.4044, "step": 1709500 }, { "epoch": 4.95, "learning_rate": 4.752604697341608e-05, "loss": 2.3837, "step": 1710000 }, { "epoch": 4.95, "learning_rate": 4.75253247730641e-05, "loss": 2.3791, "step": 1710500 }, { "epoch": 4.95, "learning_rate": 4.7524601125416826e-05, "loss": 2.3985, "step": 1711000 }, { "epoch": 4.95, "learning_rate": 4.752387747776955e-05, "loss": 2.3697, "step": 1711500 }, { "epoch": 4.96, "learning_rate": 4.752315383012227e-05, "loss": 2.3993, "step": 1712000 }, { "epoch": 4.96, "learning_rate": 4.752243018247499e-05, "loss": 2.398, "step": 1712500 }, { "epoch": 4.96, "learning_rate": 4.7521706534827715e-05, "loss": 2.4138, "step": 1713000 }, { "epoch": 4.96, "learning_rate": 4.752098288718044e-05, "loss": 2.4165, "step": 1713500 }, { "epoch": 4.96, "learning_rate": 4.752025923953316e-05, "loss": 2.386, "step": 1714000 }, { "epoch": 4.96, "learning_rate": 4.751953559188589e-05, "loss": 2.394, "step": 1714500 }, { "epoch": 4.96, "learning_rate": 4.751881194423861e-05, "loss": 2.3911, "step": 1715000 }, { "epoch": 4.97, "learning_rate": 4.7518089743886626e-05, "loss": 2.3885, "step": 1715500 }, { "epoch": 4.97, "learning_rate": 4.751736609623935e-05, "loss": 2.4196, "step": 1716000 }, { "epoch": 4.97, "learning_rate": 4.751664244859208e-05, "loss": 2.3817, "step": 1716500 }, { "epoch": 4.97, "learning_rate": 4.75159188009448e-05, "loss": 2.3664, "step": 1717000 }, { "epoch": 4.97, "learning_rate": 4.751519515329752e-05, "loss": 2.3953, "step": 1717500 }, { "epoch": 4.97, "learning_rate": 4.751447295294554e-05, "loss": 2.3918, "step": 1718000 }, { "epoch": 4.97, "learning_rate": 4.751374930529826e-05, "loss": 2.3905, "step": 1718500 }, { "epoch": 4.98, "learning_rate": 4.751302565765098e-05, "loss": 2.3795, "step": 1719000 }, { "epoch": 4.98, "learning_rate": 4.7512303457299004e-05, "loss": 2.3891, "step": 1719500 }, { "epoch": 4.98, "learning_rate": 4.751157980965173e-05, "loss": 2.4072, "step": 1720000 }, { "epoch": 4.98, "learning_rate": 4.751085616200445e-05, "loss": 2.3928, "step": 1720500 }, { "epoch": 4.98, "learning_rate": 4.751013251435717e-05, "loss": 2.4127, "step": 1721000 }, { "epoch": 4.98, "learning_rate": 4.750940886670989e-05, "loss": 2.3841, "step": 1721500 }, { "epoch": 4.98, "learning_rate": 4.750868521906262e-05, "loss": 2.3948, "step": 1722000 }, { "epoch": 4.99, "learning_rate": 4.7507961571415345e-05, "loss": 2.4033, "step": 1722500 }, { "epoch": 4.99, "learning_rate": 4.750723792376807e-05, "loss": 2.3875, "step": 1723000 }, { "epoch": 4.99, "learning_rate": 4.750651427612079e-05, "loss": 2.4035, "step": 1723500 }, { "epoch": 4.99, "learning_rate": 4.750579062847351e-05, "loss": 2.3926, "step": 1724000 }, { "epoch": 4.99, "learning_rate": 4.750506842812153e-05, "loss": 2.4168, "step": 1724500 }, { "epoch": 4.99, "learning_rate": 4.7504344780474256e-05, "loss": 2.3791, "step": 1725000 }, { "epoch": 4.99, "learning_rate": 4.750362113282698e-05, "loss": 2.4007, "step": 1725500 }, { "epoch": 5.0, "learning_rate": 4.7502898932474994e-05, "loss": 2.4131, "step": 1726000 }, { "epoch": 5.0, "learning_rate": 4.7502175284827716e-05, "loss": 2.4227, "step": 1726500 }, { "epoch": 5.0, "learning_rate": 4.750145163718044e-05, "loss": 2.3993, "step": 1727000 }, { "epoch": 5.0, "eval_accuracy": 0.6404864024640362, "eval_accuracy_mlm": 0.6024742252199724, "eval_accuracy_nsp": 0.8445596329854762, "eval_loss": 2.3650872707366943, "eval_runtime": 330.6241, "eval_samples_per_second": 1319.885, "eval_steps_per_second": 54.996, "step": 1727360 }, { "epoch": 5.0, "learning_rate": 4.7500729436828454e-05, "loss": 2.4071, "step": 1727500 }, { "epoch": 5.0, "learning_rate": 4.7500005789181176e-05, "loss": 2.367, "step": 1728000 }, { "epoch": 5.0, "learning_rate": 4.7499282141533905e-05, "loss": 2.3755, "step": 1728500 }, { "epoch": 5.0, "learning_rate": 4.749855849388663e-05, "loss": 2.3854, "step": 1729000 }, { "epoch": 5.01, "learning_rate": 4.749783484623935e-05, "loss": 2.3872, "step": 1729500 }, { "epoch": 5.01, "learning_rate": 4.749711119859208e-05, "loss": 2.3991, "step": 1730000 }, { "epoch": 5.01, "learning_rate": 4.74963875509448e-05, "loss": 2.3943, "step": 1730500 }, { "epoch": 5.01, "learning_rate": 4.749566390329752e-05, "loss": 2.3519, "step": 1731000 }, { "epoch": 5.01, "learning_rate": 4.7494940255650245e-05, "loss": 2.383, "step": 1731500 }, { "epoch": 5.01, "learning_rate": 4.749421660800297e-05, "loss": 2.3782, "step": 1732000 }, { "epoch": 5.01, "learning_rate": 4.749349296035569e-05, "loss": 2.3797, "step": 1732500 }, { "epoch": 5.02, "learning_rate": 4.749276931270841e-05, "loss": 2.3814, "step": 1733000 }, { "epoch": 5.02, "learning_rate": 4.7492045665061134e-05, "loss": 2.3829, "step": 1733500 }, { "epoch": 5.02, "learning_rate": 4.7491323464709156e-05, "loss": 2.3858, "step": 1734000 }, { "epoch": 5.02, "learning_rate": 4.749059981706188e-05, "loss": 2.3658, "step": 1734500 }, { "epoch": 5.02, "learning_rate": 4.7489877616709894e-05, "loss": 2.378, "step": 1735000 }, { "epoch": 5.02, "learning_rate": 4.748915541635791e-05, "loss": 2.3784, "step": 1735500 }, { "epoch": 5.03, "learning_rate": 4.748843176871063e-05, "loss": 2.3898, "step": 1736000 }, { "epoch": 5.03, "learning_rate": 4.7487708121063354e-05, "loss": 2.3917, "step": 1736500 }, { "epoch": 5.03, "learning_rate": 4.7486984473416076e-05, "loss": 2.3947, "step": 1737000 }, { "epoch": 5.03, "learning_rate": 4.7486260825768805e-05, "loss": 2.3529, "step": 1737500 }, { "epoch": 5.03, "learning_rate": 4.748553717812153e-05, "loss": 2.3713, "step": 1738000 }, { "epoch": 5.03, "learning_rate": 4.748481497776955e-05, "loss": 2.3602, "step": 1738500 }, { "epoch": 5.03, "learning_rate": 4.748409133012227e-05, "loss": 2.346, "step": 1739000 }, { "epoch": 5.04, "learning_rate": 4.7483367682474994e-05, "loss": 2.3819, "step": 1739500 }, { "epoch": 5.04, "learning_rate": 4.748264403482772e-05, "loss": 2.3741, "step": 1740000 }, { "epoch": 5.04, "learning_rate": 4.748192038718044e-05, "loss": 2.3783, "step": 1740500 }, { "epoch": 5.04, "learning_rate": 4.748119673953316e-05, "loss": 2.3638, "step": 1741000 }, { "epoch": 5.04, "learning_rate": 4.748047309188588e-05, "loss": 2.3874, "step": 1741500 }, { "epoch": 5.04, "learning_rate": 4.7479749444238606e-05, "loss": 2.3663, "step": 1742000 }, { "epoch": 5.04, "learning_rate": 4.747902724388663e-05, "loss": 2.3445, "step": 1742500 }, { "epoch": 5.05, "learning_rate": 4.747830359623935e-05, "loss": 2.3853, "step": 1743000 }, { "epoch": 5.05, "learning_rate": 4.747757994859207e-05, "loss": 2.3965, "step": 1743500 }, { "epoch": 5.05, "learning_rate": 4.7476856300944795e-05, "loss": 2.4022, "step": 1744000 }, { "epoch": 5.05, "learning_rate": 4.7476132653297524e-05, "loss": 2.3874, "step": 1744500 }, { "epoch": 5.05, "learning_rate": 4.747541045294554e-05, "loss": 2.3794, "step": 1745000 }, { "epoch": 5.05, "learning_rate": 4.747468680529826e-05, "loss": 2.3763, "step": 1745500 }, { "epoch": 5.05, "learning_rate": 4.7473963157650984e-05, "loss": 2.3632, "step": 1746000 }, { "epoch": 5.06, "learning_rate": 4.7473239510003706e-05, "loss": 2.3596, "step": 1746500 }, { "epoch": 5.06, "learning_rate": 4.747251586235643e-05, "loss": 2.3961, "step": 1747000 }, { "epoch": 5.06, "learning_rate": 4.747179221470916e-05, "loss": 2.3573, "step": 1747500 }, { "epoch": 5.06, "learning_rate": 4.747106856706188e-05, "loss": 2.3902, "step": 1748000 }, { "epoch": 5.06, "learning_rate": 4.74703449194146e-05, "loss": 2.3707, "step": 1748500 }, { "epoch": 5.06, "learning_rate": 4.7469621271767324e-05, "loss": 2.3878, "step": 1749000 }, { "epoch": 5.06, "learning_rate": 4.746889907141534e-05, "loss": 2.3955, "step": 1749500 }, { "epoch": 5.07, "learning_rate": 4.746817542376806e-05, "loss": 2.3746, "step": 1750000 }, { "epoch": 5.07, "learning_rate": 4.7467453223416084e-05, "loss": 2.3771, "step": 1750500 }, { "epoch": 5.07, "learning_rate": 4.7466729575768806e-05, "loss": 2.3907, "step": 1751000 }, { "epoch": 5.07, "learning_rate": 4.746600592812153e-05, "loss": 2.3933, "step": 1751500 }, { "epoch": 5.07, "learning_rate": 4.746528228047426e-05, "loss": 2.3892, "step": 1752000 }, { "epoch": 5.07, "learning_rate": 4.746455863282698e-05, "loss": 2.3967, "step": 1752500 }, { "epoch": 5.07, "learning_rate": 4.74638349851797e-05, "loss": 2.4095, "step": 1753000 }, { "epoch": 5.08, "learning_rate": 4.7463111337532424e-05, "loss": 2.3727, "step": 1753500 }, { "epoch": 5.08, "learning_rate": 4.7462387689885146e-05, "loss": 2.3861, "step": 1754000 }, { "epoch": 5.08, "learning_rate": 4.746166548953316e-05, "loss": 2.3774, "step": 1754500 }, { "epoch": 5.08, "learning_rate": 4.7460941841885884e-05, "loss": 2.4048, "step": 1755000 }, { "epoch": 5.08, "learning_rate": 4.7460218194238606e-05, "loss": 2.384, "step": 1755500 }, { "epoch": 5.08, "learning_rate": 4.7459494546591335e-05, "loss": 2.3666, "step": 1756000 }, { "epoch": 5.08, "learning_rate": 4.745877234623935e-05, "loss": 2.3775, "step": 1756500 }, { "epoch": 5.09, "learning_rate": 4.745804869859207e-05, "loss": 2.3725, "step": 1757000 }, { "epoch": 5.09, "learning_rate": 4.7457325050944795e-05, "loss": 2.3778, "step": 1757500 }, { "epoch": 5.09, "learning_rate": 4.745660285059281e-05, "loss": 2.3716, "step": 1758000 }, { "epoch": 5.09, "learning_rate": 4.745587920294553e-05, "loss": 2.3939, "step": 1758500 }, { "epoch": 5.09, "learning_rate": 4.7455155555298255e-05, "loss": 2.3838, "step": 1759000 }, { "epoch": 5.09, "learning_rate": 4.7454431907650984e-05, "loss": 2.3606, "step": 1759500 }, { "epoch": 5.09, "learning_rate": 4.745370826000371e-05, "loss": 2.374, "step": 1760000 }, { "epoch": 5.1, "learning_rate": 4.745298605965173e-05, "loss": 2.3835, "step": 1760500 }, { "epoch": 5.1, "learning_rate": 4.745226241200445e-05, "loss": 2.3654, "step": 1761000 }, { "epoch": 5.1, "learning_rate": 4.7451538764357174e-05, "loss": 2.3893, "step": 1761500 }, { "epoch": 5.1, "learning_rate": 4.7450815116709896e-05, "loss": 2.3515, "step": 1762000 }, { "epoch": 5.1, "learning_rate": 4.745009146906262e-05, "loss": 2.353, "step": 1762500 }, { "epoch": 5.1, "learning_rate": 4.744936782141534e-05, "loss": 2.3741, "step": 1763000 }, { "epoch": 5.1, "learning_rate": 4.744864417376806e-05, "loss": 2.3632, "step": 1763500 }, { "epoch": 5.11, "learning_rate": 4.7447920526120785e-05, "loss": 2.3711, "step": 1764000 }, { "epoch": 5.11, "learning_rate": 4.744719832576881e-05, "loss": 2.3788, "step": 1764500 }, { "epoch": 5.11, "learning_rate": 4.744647467812153e-05, "loss": 2.3975, "step": 1765000 }, { "epoch": 5.11, "learning_rate": 4.744575103047425e-05, "loss": 2.3832, "step": 1765500 }, { "epoch": 5.11, "learning_rate": 4.7445027382826974e-05, "loss": 2.3882, "step": 1766000 }, { "epoch": 5.11, "learning_rate": 4.7444303735179696e-05, "loss": 2.4107, "step": 1766500 }, { "epoch": 5.11, "learning_rate": 4.7443580087532425e-05, "loss": 2.3764, "step": 1767000 }, { "epoch": 5.12, "learning_rate": 4.744285643988515e-05, "loss": 2.3906, "step": 1767500 }, { "epoch": 5.12, "learning_rate": 4.744213423953316e-05, "loss": 2.3683, "step": 1768000 }, { "epoch": 5.12, "learning_rate": 4.7441412039181185e-05, "loss": 2.3758, "step": 1768500 }, { "epoch": 5.12, "learning_rate": 4.744068839153391e-05, "loss": 2.3808, "step": 1769000 }, { "epoch": 5.12, "learning_rate": 4.743996474388663e-05, "loss": 2.3902, "step": 1769500 }, { "epoch": 5.12, "learning_rate": 4.743924109623935e-05, "loss": 2.3682, "step": 1770000 }, { "epoch": 5.12, "learning_rate": 4.7438517448592074e-05, "loss": 2.3682, "step": 1770500 }, { "epoch": 5.13, "learning_rate": 4.7437793800944796e-05, "loss": 2.3859, "step": 1771000 }, { "epoch": 5.13, "learning_rate": 4.743707015329752e-05, "loss": 2.3927, "step": 1771500 }, { "epoch": 5.13, "learning_rate": 4.743634650565024e-05, "loss": 2.3812, "step": 1772000 }, { "epoch": 5.13, "learning_rate": 4.743562285800296e-05, "loss": 2.3971, "step": 1772500 }, { "epoch": 5.13, "learning_rate": 4.7434899210355685e-05, "loss": 2.3739, "step": 1773000 }, { "epoch": 5.13, "learning_rate": 4.743417701000371e-05, "loss": 2.3691, "step": 1773500 }, { "epoch": 5.14, "learning_rate": 4.743345336235643e-05, "loss": 2.395, "step": 1774000 }, { "epoch": 5.14, "learning_rate": 4.743272971470916e-05, "loss": 2.3592, "step": 1774500 }, { "epoch": 5.14, "learning_rate": 4.743200606706188e-05, "loss": 2.4117, "step": 1775000 }, { "epoch": 5.14, "learning_rate": 4.74312824194146e-05, "loss": 2.3694, "step": 1775500 }, { "epoch": 5.14, "learning_rate": 4.7430558771767325e-05, "loss": 2.3732, "step": 1776000 }, { "epoch": 5.14, "learning_rate": 4.742983512412005e-05, "loss": 2.3718, "step": 1776500 }, { "epoch": 5.14, "learning_rate": 4.742911147647277e-05, "loss": 2.3877, "step": 1777000 }, { "epoch": 5.15, "learning_rate": 4.742838782882549e-05, "loss": 2.3647, "step": 1777500 }, { "epoch": 5.15, "learning_rate": 4.742766562847351e-05, "loss": 2.3959, "step": 1778000 }, { "epoch": 5.15, "learning_rate": 4.742694342812153e-05, "loss": 2.3962, "step": 1778500 }, { "epoch": 5.15, "learning_rate": 4.742621978047425e-05, "loss": 2.3691, "step": 1779000 }, { "epoch": 5.15, "learning_rate": 4.7425496132826974e-05, "loss": 2.401, "step": 1779500 }, { "epoch": 5.15, "learning_rate": 4.742477393247499e-05, "loss": 2.3888, "step": 1780000 }, { "epoch": 5.15, "learning_rate": 4.742405028482771e-05, "loss": 2.3976, "step": 1780500 }, { "epoch": 5.16, "learning_rate": 4.7423326637180435e-05, "loss": 2.3761, "step": 1781000 }, { "epoch": 5.16, "learning_rate": 4.7422602989533164e-05, "loss": 2.367, "step": 1781500 }, { "epoch": 5.16, "learning_rate": 4.7421879341885886e-05, "loss": 2.4191, "step": 1782000 }, { "epoch": 5.16, "learning_rate": 4.742115714153391e-05, "loss": 2.3683, "step": 1782500 }, { "epoch": 5.16, "learning_rate": 4.742043349388663e-05, "loss": 2.3726, "step": 1783000 }, { "epoch": 5.16, "learning_rate": 4.741970984623935e-05, "loss": 2.3932, "step": 1783500 }, { "epoch": 5.16, "learning_rate": 4.7418986198592075e-05, "loss": 2.3841, "step": 1784000 }, { "epoch": 5.17, "learning_rate": 4.74182625509448e-05, "loss": 2.3792, "step": 1784500 }, { "epoch": 5.17, "learning_rate": 4.741753890329752e-05, "loss": 2.3805, "step": 1785000 }, { "epoch": 5.17, "learning_rate": 4.741681525565024e-05, "loss": 2.383, "step": 1785500 }, { "epoch": 5.17, "learning_rate": 4.7416091608002964e-05, "loss": 2.389, "step": 1786000 }, { "epoch": 5.17, "learning_rate": 4.7415367960355686e-05, "loss": 2.3876, "step": 1786500 }, { "epoch": 5.17, "learning_rate": 4.7414644312708415e-05, "loss": 2.3426, "step": 1787000 }, { "epoch": 5.17, "learning_rate": 4.741392066506114e-05, "loss": 2.3482, "step": 1787500 }, { "epoch": 5.18, "learning_rate": 4.741319701741386e-05, "loss": 2.3731, "step": 1788000 }, { "epoch": 5.18, "learning_rate": 4.741247336976658e-05, "loss": 2.3767, "step": 1788500 }, { "epoch": 5.18, "learning_rate": 4.74117511694146e-05, "loss": 2.3832, "step": 1789000 }, { "epoch": 5.18, "learning_rate": 4.7411027521767326e-05, "loss": 2.3954, "step": 1789500 }, { "epoch": 5.18, "learning_rate": 4.741030532141534e-05, "loss": 2.3708, "step": 1790000 }, { "epoch": 5.18, "learning_rate": 4.7409581673768064e-05, "loss": 2.3777, "step": 1790500 }, { "epoch": 5.18, "learning_rate": 4.7408858026120786e-05, "loss": 2.388, "step": 1791000 }, { "epoch": 5.19, "learning_rate": 4.7408134378473515e-05, "loss": 2.3924, "step": 1791500 }, { "epoch": 5.19, "learning_rate": 4.740741073082624e-05, "loss": 2.4062, "step": 1792000 }, { "epoch": 5.19, "learning_rate": 4.740668708317896e-05, "loss": 2.3952, "step": 1792500 }, { "epoch": 5.19, "learning_rate": 4.740596343553168e-05, "loss": 2.3561, "step": 1793000 }, { "epoch": 5.19, "learning_rate": 4.7405239787884404e-05, "loss": 2.3949, "step": 1793500 }, { "epoch": 5.19, "learning_rate": 4.740451758753242e-05, "loss": 2.3595, "step": 1794000 }, { "epoch": 5.19, "learning_rate": 4.740379393988514e-05, "loss": 2.3916, "step": 1794500 }, { "epoch": 5.2, "learning_rate": 4.7403070292237864e-05, "loss": 2.3747, "step": 1795000 }, { "epoch": 5.2, "learning_rate": 4.7402346644590586e-05, "loss": 2.3892, "step": 1795500 }, { "epoch": 5.2, "learning_rate": 4.740162444423861e-05, "loss": 2.3946, "step": 1796000 }, { "epoch": 5.2, "learning_rate": 4.740090079659133e-05, "loss": 2.3741, "step": 1796500 }, { "epoch": 5.2, "learning_rate": 4.740017714894406e-05, "loss": 2.3738, "step": 1797000 }, { "epoch": 5.2, "learning_rate": 4.739945350129678e-05, "loss": 2.3968, "step": 1797500 }, { "epoch": 5.2, "learning_rate": 4.7398729853649505e-05, "loss": 2.3736, "step": 1798000 }, { "epoch": 5.21, "learning_rate": 4.739800765329752e-05, "loss": 2.3853, "step": 1798500 }, { "epoch": 5.21, "learning_rate": 4.739728400565024e-05, "loss": 2.368, "step": 1799000 }, { "epoch": 5.21, "learning_rate": 4.7396560358002965e-05, "loss": 2.4036, "step": 1799500 }, { "epoch": 5.21, "learning_rate": 4.739583671035569e-05, "loss": 2.3831, "step": 1800000 }, { "epoch": 5.21, "learning_rate": 4.7395113062708416e-05, "loss": 2.3612, "step": 1800500 }, { "epoch": 5.21, "learning_rate": 4.739438941506114e-05, "loss": 2.3826, "step": 1801000 }, { "epoch": 5.21, "learning_rate": 4.739366576741386e-05, "loss": 2.4293, "step": 1801500 }, { "epoch": 5.22, "learning_rate": 4.739294501435717e-05, "loss": 2.3633, "step": 1802000 }, { "epoch": 5.22, "learning_rate": 4.739222281400519e-05, "loss": 2.3606, "step": 1802500 }, { "epoch": 5.22, "learning_rate": 4.7391499166357914e-05, "loss": 2.4079, "step": 1803000 }, { "epoch": 5.22, "learning_rate": 4.7390775518710636e-05, "loss": 2.363, "step": 1803500 }, { "epoch": 5.22, "learning_rate": 4.739005187106336e-05, "loss": 2.3774, "step": 1804000 }, { "epoch": 5.22, "learning_rate": 4.738932822341608e-05, "loss": 2.3716, "step": 1804500 }, { "epoch": 5.22, "learning_rate": 4.738860457576881e-05, "loss": 2.374, "step": 1805000 }, { "epoch": 5.23, "learning_rate": 4.738788092812153e-05, "loss": 2.3518, "step": 1805500 }, { "epoch": 5.23, "learning_rate": 4.7387157280474254e-05, "loss": 2.3775, "step": 1806000 }, { "epoch": 5.23, "learning_rate": 4.738643508012227e-05, "loss": 2.3628, "step": 1806500 }, { "epoch": 5.23, "learning_rate": 4.738571143247499e-05, "loss": 2.3742, "step": 1807000 }, { "epoch": 5.23, "learning_rate": 4.7384987784827714e-05, "loss": 2.3834, "step": 1807500 }, { "epoch": 5.23, "learning_rate": 4.738426413718044e-05, "loss": 2.3827, "step": 1808000 }, { "epoch": 5.23, "learning_rate": 4.7383540489533165e-05, "loss": 2.3519, "step": 1808500 }, { "epoch": 5.24, "learning_rate": 4.738281684188589e-05, "loss": 2.399, "step": 1809000 }, { "epoch": 5.24, "learning_rate": 4.738209319423861e-05, "loss": 2.3733, "step": 1809500 }, { "epoch": 5.24, "learning_rate": 4.738136954659133e-05, "loss": 2.3998, "step": 1810000 }, { "epoch": 5.24, "learning_rate": 4.7380645898944054e-05, "loss": 2.3917, "step": 1810500 }, { "epoch": 5.24, "learning_rate": 4.7379922251296776e-05, "loss": 2.3691, "step": 1811000 }, { "epoch": 5.24, "learning_rate": 4.73791986036495e-05, "loss": 2.3995, "step": 1811500 }, { "epoch": 5.24, "learning_rate": 4.737847495600223e-05, "loss": 2.3581, "step": 1812000 }, { "epoch": 5.25, "learning_rate": 4.737775130835495e-05, "loss": 2.4082, "step": 1812500 }, { "epoch": 5.25, "learning_rate": 4.737702766070767e-05, "loss": 2.3743, "step": 1813000 }, { "epoch": 5.25, "learning_rate": 4.7376305460355694e-05, "loss": 2.3733, "step": 1813500 }, { "epoch": 5.25, "learning_rate": 4.7375581812708417e-05, "loss": 2.3884, "step": 1814000 }, { "epoch": 5.25, "learning_rate": 4.737485816506114e-05, "loss": 2.3524, "step": 1814500 }, { "epoch": 5.25, "learning_rate": 4.737413451741386e-05, "loss": 2.3757, "step": 1815000 }, { "epoch": 5.26, "learning_rate": 4.737341086976658e-05, "loss": 2.3976, "step": 1815500 }, { "epoch": 5.26, "learning_rate": 4.7372687222119305e-05, "loss": 2.3632, "step": 1816000 }, { "epoch": 5.26, "learning_rate": 4.737196357447203e-05, "loss": 2.372, "step": 1816500 }, { "epoch": 5.26, "learning_rate": 4.737123992682475e-05, "loss": 2.3764, "step": 1817000 }, { "epoch": 5.26, "learning_rate": 4.7370517726472766e-05, "loss": 2.3828, "step": 1817500 }, { "epoch": 5.26, "learning_rate": 4.7369794078825495e-05, "loss": 2.3834, "step": 1818000 }, { "epoch": 5.26, "learning_rate": 4.736907043117822e-05, "loss": 2.3817, "step": 1818500 }, { "epoch": 5.27, "learning_rate": 4.736834678353094e-05, "loss": 2.3768, "step": 1819000 }, { "epoch": 5.27, "learning_rate": 4.736762458317896e-05, "loss": 2.3905, "step": 1819500 }, { "epoch": 5.27, "learning_rate": 4.7366900935531684e-05, "loss": 2.3846, "step": 1820000 }, { "epoch": 5.27, "learning_rate": 4.7366177287884406e-05, "loss": 2.3619, "step": 1820500 }, { "epoch": 5.27, "learning_rate": 4.736545364023713e-05, "loss": 2.3772, "step": 1821000 }, { "epoch": 5.27, "learning_rate": 4.736472999258985e-05, "loss": 2.3972, "step": 1821500 }, { "epoch": 5.27, "learning_rate": 4.736400634494257e-05, "loss": 2.3525, "step": 1822000 }, { "epoch": 5.28, "learning_rate": 4.7363282697295295e-05, "loss": 2.3757, "step": 1822500 }, { "epoch": 5.28, "learning_rate": 4.736256049694332e-05, "loss": 2.3885, "step": 1823000 }, { "epoch": 5.28, "learning_rate": 4.736183684929604e-05, "loss": 2.3797, "step": 1823500 }, { "epoch": 5.28, "learning_rate": 4.736111320164876e-05, "loss": 2.3754, "step": 1824000 }, { "epoch": 5.28, "learning_rate": 4.7360389554001484e-05, "loss": 2.396, "step": 1824500 }, { "epoch": 5.28, "learning_rate": 4.7359665906354206e-05, "loss": 2.3785, "step": 1825000 }, { "epoch": 5.28, "learning_rate": 4.735894225870693e-05, "loss": 2.3814, "step": 1825500 }, { "epoch": 5.29, "learning_rate": 4.735821861105965e-05, "loss": 2.3737, "step": 1826000 }, { "epoch": 5.29, "learning_rate": 4.735749496341238e-05, "loss": 2.3669, "step": 1826500 }, { "epoch": 5.29, "learning_rate": 4.73567713157651e-05, "loss": 2.3985, "step": 1827000 }, { "epoch": 5.29, "learning_rate": 4.735604911541312e-05, "loss": 2.3641, "step": 1827500 }, { "epoch": 5.29, "learning_rate": 4.735532691506114e-05, "loss": 2.4, "step": 1828000 }, { "epoch": 5.29, "learning_rate": 4.735460326741386e-05, "loss": 2.4089, "step": 1828500 }, { "epoch": 5.29, "learning_rate": 4.735388106706188e-05, "loss": 2.3796, "step": 1829000 }, { "epoch": 5.3, "learning_rate": 4.73531574194146e-05, "loss": 2.3769, "step": 1829500 }, { "epoch": 5.3, "learning_rate": 4.735243377176732e-05, "loss": 2.3571, "step": 1830000 }, { "epoch": 5.3, "learning_rate": 4.7351710124120044e-05, "loss": 2.3712, "step": 1830500 }, { "epoch": 5.3, "learning_rate": 4.7350986476472766e-05, "loss": 2.3894, "step": 1831000 }, { "epoch": 5.3, "learning_rate": 4.7350262828825495e-05, "loss": 2.3837, "step": 1831500 }, { "epoch": 5.3, "learning_rate": 4.734953918117822e-05, "loss": 2.3639, "step": 1832000 }, { "epoch": 5.3, "learning_rate": 4.734881553353094e-05, "loss": 2.3925, "step": 1832500 }, { "epoch": 5.31, "learning_rate": 4.7348093333178955e-05, "loss": 2.3843, "step": 1833000 }, { "epoch": 5.31, "learning_rate": 4.734736968553168e-05, "loss": 2.3865, "step": 1833500 }, { "epoch": 5.31, "learning_rate": 4.73466460378844e-05, "loss": 2.3784, "step": 1834000 }, { "epoch": 5.31, "learning_rate": 4.734592239023713e-05, "loss": 2.366, "step": 1834500 }, { "epoch": 5.31, "learning_rate": 4.734519874258985e-05, "loss": 2.3607, "step": 1835000 }, { "epoch": 5.31, "learning_rate": 4.734447509494257e-05, "loss": 2.3726, "step": 1835500 }, { "epoch": 5.31, "learning_rate": 4.7343751447295296e-05, "loss": 2.3853, "step": 1836000 }, { "epoch": 5.32, "learning_rate": 4.734302779964802e-05, "loss": 2.3979, "step": 1836500 }, { "epoch": 5.32, "learning_rate": 4.734230415200075e-05, "loss": 2.4087, "step": 1837000 }, { "epoch": 5.32, "learning_rate": 4.734158195164876e-05, "loss": 2.3815, "step": 1837500 }, { "epoch": 5.32, "learning_rate": 4.7340858304001485e-05, "loss": 2.3579, "step": 1838000 }, { "epoch": 5.32, "learning_rate": 4.73401361036495e-05, "loss": 2.367, "step": 1838500 }, { "epoch": 5.32, "learning_rate": 4.733941245600222e-05, "loss": 2.376, "step": 1839000 }, { "epoch": 5.32, "learning_rate": 4.7338688808354945e-05, "loss": 2.3939, "step": 1839500 }, { "epoch": 5.33, "learning_rate": 4.733796516070767e-05, "loss": 2.3648, "step": 1840000 }, { "epoch": 5.33, "learning_rate": 4.7337241513060396e-05, "loss": 2.37, "step": 1840500 }, { "epoch": 5.33, "learning_rate": 4.733651786541312e-05, "loss": 2.3816, "step": 1841000 }, { "epoch": 5.33, "learning_rate": 4.7335795665061134e-05, "loss": 2.393, "step": 1841500 }, { "epoch": 5.33, "learning_rate": 4.733507201741386e-05, "loss": 2.3828, "step": 1842000 }, { "epoch": 5.33, "learning_rate": 4.7334348369766585e-05, "loss": 2.3752, "step": 1842500 }, { "epoch": 5.33, "learning_rate": 4.733362472211931e-05, "loss": 2.3883, "step": 1843000 }, { "epoch": 5.34, "learning_rate": 4.733290396906262e-05, "loss": 2.3784, "step": 1843500 }, { "epoch": 5.34, "learning_rate": 4.7332180321415345e-05, "loss": 2.3734, "step": 1844000 }, { "epoch": 5.34, "learning_rate": 4.733145667376807e-05, "loss": 2.3978, "step": 1844500 }, { "epoch": 5.34, "learning_rate": 4.733073302612079e-05, "loss": 2.3879, "step": 1845000 }, { "epoch": 5.34, "learning_rate": 4.733000937847351e-05, "loss": 2.3861, "step": 1845500 }, { "epoch": 5.34, "learning_rate": 4.7329285730826234e-05, "loss": 2.3928, "step": 1846000 }, { "epoch": 5.34, "learning_rate": 4.7328562083178956e-05, "loss": 2.3909, "step": 1846500 }, { "epoch": 5.35, "learning_rate": 4.732783843553168e-05, "loss": 2.3773, "step": 1847000 }, { "epoch": 5.35, "learning_rate": 4.7327116235179694e-05, "loss": 2.3936, "step": 1847500 }, { "epoch": 5.35, "learning_rate": 4.732639258753242e-05, "loss": 2.3855, "step": 1848000 }, { "epoch": 5.35, "learning_rate": 4.7325668939885145e-05, "loss": 2.4164, "step": 1848500 }, { "epoch": 5.35, "learning_rate": 4.732494529223787e-05, "loss": 2.34, "step": 1849000 }, { "epoch": 5.35, "learning_rate": 4.7324221644590596e-05, "loss": 2.3758, "step": 1849500 }, { "epoch": 5.35, "learning_rate": 4.732349799694332e-05, "loss": 2.3595, "step": 1850000 }, { "epoch": 5.36, "learning_rate": 4.732277434929604e-05, "loss": 2.3902, "step": 1850500 }, { "epoch": 5.36, "learning_rate": 4.732205070164876e-05, "loss": 2.3759, "step": 1851000 }, { "epoch": 5.36, "learning_rate": 4.7321327054001485e-05, "loss": 2.3805, "step": 1851500 }, { "epoch": 5.36, "learning_rate": 4.73206048536495e-05, "loss": 2.3842, "step": 1852000 }, { "epoch": 5.36, "learning_rate": 4.731988120600222e-05, "loss": 2.3776, "step": 1852500 }, { "epoch": 5.36, "learning_rate": 4.7319157558354945e-05, "loss": 2.3794, "step": 1853000 }, { "epoch": 5.37, "learning_rate": 4.7318433910707674e-05, "loss": 2.3803, "step": 1853500 }, { "epoch": 5.37, "learning_rate": 4.731771171035569e-05, "loss": 2.3943, "step": 1854000 }, { "epoch": 5.37, "learning_rate": 4.731698806270841e-05, "loss": 2.3779, "step": 1854500 }, { "epoch": 5.37, "learning_rate": 4.731626586235643e-05, "loss": 2.3947, "step": 1855000 }, { "epoch": 5.37, "learning_rate": 4.731554221470915e-05, "loss": 2.3792, "step": 1855500 }, { "epoch": 5.37, "learning_rate": 4.731481856706187e-05, "loss": 2.3694, "step": 1856000 }, { "epoch": 5.37, "learning_rate": 4.7314094919414594e-05, "loss": 2.3817, "step": 1856500 }, { "epoch": 5.38, "learning_rate": 4.7313371271767323e-05, "loss": 2.3686, "step": 1857000 }, { "epoch": 5.38, "learning_rate": 4.7312647624120046e-05, "loss": 2.3854, "step": 1857500 }, { "epoch": 5.38, "learning_rate": 4.7311923976472775e-05, "loss": 2.4116, "step": 1858000 }, { "epoch": 5.38, "learning_rate": 4.73112003288255e-05, "loss": 2.3985, "step": 1858500 }, { "epoch": 5.38, "learning_rate": 4.731047812847351e-05, "loss": 2.3651, "step": 1859000 }, { "epoch": 5.38, "learning_rate": 4.7309754480826235e-05, "loss": 2.4042, "step": 1859500 }, { "epoch": 5.38, "learning_rate": 4.730903083317896e-05, "loss": 2.3556, "step": 1860000 }, { "epoch": 5.39, "learning_rate": 4.730830718553168e-05, "loss": 2.3837, "step": 1860500 }, { "epoch": 5.39, "learning_rate": 4.73075849851797e-05, "loss": 2.3746, "step": 1861000 }, { "epoch": 5.39, "learning_rate": 4.7306861337532424e-05, "loss": 2.3957, "step": 1861500 }, { "epoch": 5.39, "learning_rate": 4.7306137689885146e-05, "loss": 2.3825, "step": 1862000 }, { "epoch": 5.39, "learning_rate": 4.730541548953316e-05, "loss": 2.3972, "step": 1862500 }, { "epoch": 5.39, "learning_rate": 4.7304691841885884e-05, "loss": 2.3815, "step": 1863000 }, { "epoch": 5.39, "learning_rate": 4.7303968194238606e-05, "loss": 2.3895, "step": 1863500 }, { "epoch": 5.4, "learning_rate": 4.730324599388662e-05, "loss": 2.3755, "step": 1864000 }, { "epoch": 5.4, "learning_rate": 4.730252234623935e-05, "loss": 2.3808, "step": 1864500 }, { "epoch": 5.4, "learning_rate": 4.730179869859207e-05, "loss": 2.3661, "step": 1865000 }, { "epoch": 5.4, "learning_rate": 4.73010750509448e-05, "loss": 2.3536, "step": 1865500 }, { "epoch": 5.4, "learning_rate": 4.7300351403297524e-05, "loss": 2.3845, "step": 1866000 }, { "epoch": 5.4, "learning_rate": 4.7299627755650246e-05, "loss": 2.3924, "step": 1866500 }, { "epoch": 5.4, "learning_rate": 4.729890410800297e-05, "loss": 2.3821, "step": 1867000 }, { "epoch": 5.41, "learning_rate": 4.729818046035569e-05, "loss": 2.392, "step": 1867500 }, { "epoch": 5.41, "learning_rate": 4.729745681270841e-05, "loss": 2.3652, "step": 1868000 }, { "epoch": 5.41, "learning_rate": 4.7296733165061135e-05, "loss": 2.3777, "step": 1868500 }, { "epoch": 5.41, "learning_rate": 4.729600951741386e-05, "loss": 2.3909, "step": 1869000 }, { "epoch": 5.41, "learning_rate": 4.729528586976658e-05, "loss": 2.3707, "step": 1869500 }, { "epoch": 5.41, "learning_rate": 4.72945636694146e-05, "loss": 2.3875, "step": 1870000 }, { "epoch": 5.41, "learning_rate": 4.7293840021767324e-05, "loss": 2.3904, "step": 1870500 }, { "epoch": 5.42, "learning_rate": 4.7293116374120046e-05, "loss": 2.4019, "step": 1871000 }, { "epoch": 5.42, "learning_rate": 4.729239272647277e-05, "loss": 2.4042, "step": 1871500 }, { "epoch": 5.42, "learning_rate": 4.72916690788255e-05, "loss": 2.3815, "step": 1872000 }, { "epoch": 5.42, "learning_rate": 4.729094543117822e-05, "loss": 2.3943, "step": 1872500 }, { "epoch": 5.42, "learning_rate": 4.729022178353094e-05, "loss": 2.4012, "step": 1873000 }, { "epoch": 5.42, "learning_rate": 4.7289498135883664e-05, "loss": 2.3796, "step": 1873500 }, { "epoch": 5.42, "learning_rate": 4.728877448823639e-05, "loss": 2.3847, "step": 1874000 }, { "epoch": 5.43, "learning_rate": 4.72880522878844e-05, "loss": 2.3902, "step": 1874500 }, { "epoch": 5.43, "learning_rate": 4.7287328640237124e-05, "loss": 2.4118, "step": 1875000 }, { "epoch": 5.43, "learning_rate": 4.7286604992589853e-05, "loss": 2.3796, "step": 1875500 }, { "epoch": 5.43, "learning_rate": 4.728588279223787e-05, "loss": 2.389, "step": 1876000 }, { "epoch": 5.43, "learning_rate": 4.728515914459059e-05, "loss": 2.39, "step": 1876500 }, { "epoch": 5.43, "learning_rate": 4.7284435496943313e-05, "loss": 2.3849, "step": 1877000 }, { "epoch": 5.43, "learning_rate": 4.7283711849296036e-05, "loss": 2.3932, "step": 1877500 }, { "epoch": 5.44, "learning_rate": 4.728298820164876e-05, "loss": 2.3834, "step": 1878000 }, { "epoch": 5.44, "learning_rate": 4.728226455400148e-05, "loss": 2.3769, "step": 1878500 }, { "epoch": 5.44, "learning_rate": 4.728154090635421e-05, "loss": 2.3834, "step": 1879000 }, { "epoch": 5.44, "learning_rate": 4.728081725870693e-05, "loss": 2.3822, "step": 1879500 }, { "epoch": 5.44, "learning_rate": 4.7280095058354954e-05, "loss": 2.3963, "step": 1880000 }, { "epoch": 5.44, "learning_rate": 4.7279371410707676e-05, "loss": 2.3573, "step": 1880500 }, { "epoch": 5.44, "learning_rate": 4.727864921035569e-05, "loss": 2.3737, "step": 1881000 }, { "epoch": 5.45, "learning_rate": 4.7277925562708414e-05, "loss": 2.3784, "step": 1881500 }, { "epoch": 5.45, "learning_rate": 4.7277201915061136e-05, "loss": 2.392, "step": 1882000 }, { "epoch": 5.45, "learning_rate": 4.727647826741386e-05, "loss": 2.4019, "step": 1882500 }, { "epoch": 5.45, "learning_rate": 4.727575461976658e-05, "loss": 2.3869, "step": 1883000 }, { "epoch": 5.45, "learning_rate": 4.72750309721193e-05, "loss": 2.3843, "step": 1883500 }, { "epoch": 5.45, "learning_rate": 4.7274307324472025e-05, "loss": 2.3981, "step": 1884000 }, { "epoch": 5.45, "learning_rate": 4.7273583676824754e-05, "loss": 2.3824, "step": 1884500 }, { "epoch": 5.46, "learning_rate": 4.7272860029177476e-05, "loss": 2.3845, "step": 1885000 }, { "epoch": 5.46, "learning_rate": 4.727213782882549e-05, "loss": 2.4037, "step": 1885500 }, { "epoch": 5.46, "learning_rate": 4.7271414181178214e-05, "loss": 2.3769, "step": 1886000 }, { "epoch": 5.46, "learning_rate": 4.7270690533530936e-05, "loss": 2.3452, "step": 1886500 }, { "epoch": 5.46, "learning_rate": 4.7269966885883665e-05, "loss": 2.3751, "step": 1887000 }, { "epoch": 5.46, "learning_rate": 4.726924468553168e-05, "loss": 2.3725, "step": 1887500 }, { "epoch": 5.46, "learning_rate": 4.72685210378844e-05, "loss": 2.3723, "step": 1888000 }, { "epoch": 5.47, "learning_rate": 4.7267797390237125e-05, "loss": 2.3817, "step": 1888500 }, { "epoch": 5.47, "learning_rate": 4.7267073742589854e-05, "loss": 2.4071, "step": 1889000 }, { "epoch": 5.47, "learning_rate": 4.7266350094942576e-05, "loss": 2.3741, "step": 1889500 }, { "epoch": 5.47, "learning_rate": 4.72656264472953e-05, "loss": 2.3813, "step": 1890000 }, { "epoch": 5.47, "learning_rate": 4.726490279964802e-05, "loss": 2.3977, "step": 1890500 }, { "epoch": 5.47, "learning_rate": 4.726417915200074e-05, "loss": 2.4075, "step": 1891000 }, { "epoch": 5.48, "learning_rate": 4.726345695164876e-05, "loss": 2.38, "step": 1891500 }, { "epoch": 5.48, "learning_rate": 4.726273330400148e-05, "loss": 2.3625, "step": 1892000 }, { "epoch": 5.48, "learning_rate": 4.72620096563542e-05, "loss": 2.3791, "step": 1892500 }, { "epoch": 5.48, "learning_rate": 4.7261286008706925e-05, "loss": 2.3769, "step": 1893000 }, { "epoch": 5.48, "learning_rate": 4.726056380835495e-05, "loss": 2.3913, "step": 1893500 }, { "epoch": 5.48, "learning_rate": 4.725984016070767e-05, "loss": 2.3991, "step": 1894000 }, { "epoch": 5.48, "learning_rate": 4.72591165130604e-05, "loss": 2.3851, "step": 1894500 }, { "epoch": 5.49, "learning_rate": 4.725839286541312e-05, "loss": 2.3767, "step": 1895000 }, { "epoch": 5.49, "learning_rate": 4.7257669217765843e-05, "loss": 2.3751, "step": 1895500 }, { "epoch": 5.49, "learning_rate": 4.725694701741386e-05, "loss": 2.3781, "step": 1896000 }, { "epoch": 5.49, "learning_rate": 4.725622336976658e-05, "loss": 2.3955, "step": 1896500 }, { "epoch": 5.49, "learning_rate": 4.7255499722119303e-05, "loss": 2.395, "step": 1897000 }, { "epoch": 5.49, "learning_rate": 4.725477607447203e-05, "loss": 2.376, "step": 1897500 }, { "epoch": 5.49, "learning_rate": 4.725405387412005e-05, "loss": 2.3773, "step": 1898000 }, { "epoch": 5.5, "learning_rate": 4.725333022647277e-05, "loss": 2.3621, "step": 1898500 }, { "epoch": 5.5, "learning_rate": 4.725260657882549e-05, "loss": 2.3761, "step": 1899000 }, { "epoch": 5.5, "learning_rate": 4.7251882931178215e-05, "loss": 2.4153, "step": 1899500 }, { "epoch": 5.5, "learning_rate": 4.725115928353094e-05, "loss": 2.3971, "step": 1900000 }, { "epoch": 5.5, "learning_rate": 4.725043708317895e-05, "loss": 2.3833, "step": 1900500 }, { "epoch": 5.5, "learning_rate": 4.724971343553168e-05, "loss": 2.3857, "step": 1901000 }, { "epoch": 5.5, "learning_rate": 4.7248989787884404e-05, "loss": 2.3729, "step": 1901500 }, { "epoch": 5.51, "learning_rate": 4.724826614023713e-05, "loss": 2.3998, "step": 1902000 }, { "epoch": 5.51, "learning_rate": 4.7247542492589855e-05, "loss": 2.4016, "step": 1902500 }, { "epoch": 5.51, "learning_rate": 4.724681884494258e-05, "loss": 2.3877, "step": 1903000 }, { "epoch": 5.51, "learning_rate": 4.724609664459059e-05, "loss": 2.3807, "step": 1903500 }, { "epoch": 5.51, "learning_rate": 4.7245372996943315e-05, "loss": 2.3852, "step": 1904000 }, { "epoch": 5.51, "learning_rate": 4.724464934929604e-05, "loss": 2.358, "step": 1904500 }, { "epoch": 5.51, "learning_rate": 4.724392570164876e-05, "loss": 2.3786, "step": 1905000 }, { "epoch": 5.52, "learning_rate": 4.724320205400148e-05, "loss": 2.399, "step": 1905500 }, { "epoch": 5.52, "learning_rate": 4.7242478406354204e-05, "loss": 2.3834, "step": 1906000 }, { "epoch": 5.52, "learning_rate": 4.724175475870693e-05, "loss": 2.3744, "step": 1906500 }, { "epoch": 5.52, "learning_rate": 4.7241031111059655e-05, "loss": 2.3933, "step": 1907000 }, { "epoch": 5.52, "learning_rate": 4.7240310358002964e-05, "loss": 2.3643, "step": 1907500 }, { "epoch": 5.52, "learning_rate": 4.7239586710355686e-05, "loss": 2.4048, "step": 1908000 }, { "epoch": 5.52, "learning_rate": 4.723886306270841e-05, "loss": 2.3791, "step": 1908500 }, { "epoch": 5.53, "learning_rate": 4.723813941506113e-05, "loss": 2.3894, "step": 1909000 }, { "epoch": 5.53, "learning_rate": 4.723741576741386e-05, "loss": 2.3747, "step": 1909500 }, { "epoch": 5.53, "learning_rate": 4.723669211976658e-05, "loss": 2.3744, "step": 1910000 }, { "epoch": 5.53, "learning_rate": 4.7235969919414604e-05, "loss": 2.3994, "step": 1910500 }, { "epoch": 5.53, "learning_rate": 4.723524627176733e-05, "loss": 2.3694, "step": 1911000 }, { "epoch": 5.53, "learning_rate": 4.723452262412005e-05, "loss": 2.3967, "step": 1911500 }, { "epoch": 5.53, "learning_rate": 4.723379897647277e-05, "loss": 2.3869, "step": 1912000 }, { "epoch": 5.54, "learning_rate": 4.723307532882549e-05, "loss": 2.3946, "step": 1912500 }, { "epoch": 5.54, "learning_rate": 4.7232351681178216e-05, "loss": 2.3746, "step": 1913000 }, { "epoch": 5.54, "learning_rate": 4.723162803353094e-05, "loss": 2.3918, "step": 1913500 }, { "epoch": 5.54, "learning_rate": 4.723090438588366e-05, "loss": 2.3544, "step": 1914000 }, { "epoch": 5.54, "learning_rate": 4.723018218553168e-05, "loss": 2.3579, "step": 1914500 }, { "epoch": 5.54, "learning_rate": 4.7229458537884405e-05, "loss": 2.4125, "step": 1915000 }, { "epoch": 5.54, "learning_rate": 4.722873489023713e-05, "loss": 2.3694, "step": 1915500 }, { "epoch": 5.55, "learning_rate": 4.722801124258985e-05, "loss": 2.3747, "step": 1916000 }, { "epoch": 5.55, "learning_rate": 4.722728759494257e-05, "loss": 2.3482, "step": 1916500 }, { "epoch": 5.55, "learning_rate": 4.7226565394590594e-05, "loss": 2.3728, "step": 1917000 }, { "epoch": 5.55, "learning_rate": 4.7225841746943316e-05, "loss": 2.3837, "step": 1917500 }, { "epoch": 5.55, "learning_rate": 4.722511809929604e-05, "loss": 2.372, "step": 1918000 }, { "epoch": 5.55, "learning_rate": 4.722439445164876e-05, "loss": 2.4121, "step": 1918500 }, { "epoch": 5.55, "learning_rate": 4.722367080400148e-05, "loss": 2.3956, "step": 1919000 }, { "epoch": 5.56, "learning_rate": 4.7222947156354205e-05, "loss": 2.3946, "step": 1919500 }, { "epoch": 5.56, "learning_rate": 4.722222495600223e-05, "loss": 2.3633, "step": 1920000 }, { "epoch": 5.56, "learning_rate": 4.722150130835495e-05, "loss": 2.3888, "step": 1920500 }, { "epoch": 5.56, "learning_rate": 4.722077766070767e-05, "loss": 2.3929, "step": 1921000 }, { "epoch": 5.56, "learning_rate": 4.7220054013060394e-05, "loss": 2.3743, "step": 1921500 }, { "epoch": 5.56, "learning_rate": 4.7219330365413116e-05, "loss": 2.3567, "step": 1922000 }, { "epoch": 5.56, "learning_rate": 4.721860671776584e-05, "loss": 2.3792, "step": 1922500 }, { "epoch": 5.57, "learning_rate": 4.721788307011856e-05, "loss": 2.3948, "step": 1923000 }, { "epoch": 5.57, "learning_rate": 4.721715942247128e-05, "loss": 2.3983, "step": 1923500 }, { "epoch": 5.57, "learning_rate": 4.7216437222119305e-05, "loss": 2.3829, "step": 1924000 }, { "epoch": 5.57, "learning_rate": 4.7215713574472034e-05, "loss": 2.358, "step": 1924500 }, { "epoch": 5.57, "learning_rate": 4.7214989926824756e-05, "loss": 2.3706, "step": 1925000 }, { "epoch": 5.57, "learning_rate": 4.721426627917748e-05, "loss": 2.3924, "step": 1925500 }, { "epoch": 5.57, "learning_rate": 4.72135426315302e-05, "loss": 2.3907, "step": 1926000 }, { "epoch": 5.58, "learning_rate": 4.721281898388292e-05, "loss": 2.3856, "step": 1926500 }, { "epoch": 5.58, "learning_rate": 4.721209678353094e-05, "loss": 2.3961, "step": 1927000 }, { "epoch": 5.58, "learning_rate": 4.721137313588366e-05, "loss": 2.3946, "step": 1927500 }, { "epoch": 5.58, "learning_rate": 4.721064948823638e-05, "loss": 2.3719, "step": 1928000 }, { "epoch": 5.58, "learning_rate": 4.720992584058911e-05, "loss": 2.387, "step": 1928500 }, { "epoch": 5.58, "learning_rate": 4.720920364023713e-05, "loss": 2.3995, "step": 1929000 }, { "epoch": 5.59, "learning_rate": 4.720847999258985e-05, "loss": 2.3858, "step": 1929500 }, { "epoch": 5.59, "learning_rate": 4.7207757792237865e-05, "loss": 2.3955, "step": 1930000 }, { "epoch": 5.59, "learning_rate": 4.720703414459059e-05, "loss": 2.3766, "step": 1930500 }, { "epoch": 5.59, "learning_rate": 4.720631049694331e-05, "loss": 2.363, "step": 1931000 }, { "epoch": 5.59, "learning_rate": 4.720558684929603e-05, "loss": 2.384, "step": 1931500 }, { "epoch": 5.59, "learning_rate": 4.720486320164876e-05, "loss": 2.3996, "step": 1932000 }, { "epoch": 5.59, "learning_rate": 4.720413955400148e-05, "loss": 2.3959, "step": 1932500 }, { "epoch": 5.6, "learning_rate": 4.720341590635421e-05, "loss": 2.3983, "step": 1933000 }, { "epoch": 5.6, "learning_rate": 4.7202692258706935e-05, "loss": 2.3745, "step": 1933500 }, { "epoch": 5.6, "learning_rate": 4.720196861105966e-05, "loss": 2.3597, "step": 1934000 }, { "epoch": 5.6, "learning_rate": 4.720124641070767e-05, "loss": 2.3677, "step": 1934500 }, { "epoch": 5.6, "learning_rate": 4.720052421035569e-05, "loss": 2.3778, "step": 1935000 }, { "epoch": 5.6, "learning_rate": 4.719980056270841e-05, "loss": 2.372, "step": 1935500 }, { "epoch": 5.6, "learning_rate": 4.719907691506113e-05, "loss": 2.3909, "step": 1936000 }, { "epoch": 5.61, "learning_rate": 4.719835326741386e-05, "loss": 2.3738, "step": 1936500 }, { "epoch": 5.61, "learning_rate": 4.7197629619766584e-05, "loss": 2.3847, "step": 1937000 }, { "epoch": 5.61, "learning_rate": 4.7196905972119306e-05, "loss": 2.3871, "step": 1937500 }, { "epoch": 5.61, "learning_rate": 4.719618377176732e-05, "loss": 2.3885, "step": 1938000 }, { "epoch": 5.61, "learning_rate": 4.7195460124120044e-05, "loss": 2.3941, "step": 1938500 }, { "epoch": 5.61, "learning_rate": 4.7194736476472766e-05, "loss": 2.3825, "step": 1939000 }, { "epoch": 5.61, "learning_rate": 4.719401572341608e-05, "loss": 2.3999, "step": 1939500 }, { "epoch": 5.62, "learning_rate": 4.719329207576881e-05, "loss": 2.3694, "step": 1940000 }, { "epoch": 5.62, "learning_rate": 4.719256842812153e-05, "loss": 2.3837, "step": 1940500 }, { "epoch": 5.62, "learning_rate": 4.7191844780474255e-05, "loss": 2.3803, "step": 1941000 }, { "epoch": 5.62, "learning_rate": 4.719112113282698e-05, "loss": 2.4023, "step": 1941500 }, { "epoch": 5.62, "learning_rate": 4.71903974851797e-05, "loss": 2.3822, "step": 1942000 }, { "epoch": 5.62, "learning_rate": 4.718967383753242e-05, "loss": 2.3926, "step": 1942500 }, { "epoch": 5.62, "learning_rate": 4.7188950189885144e-05, "loss": 2.3649, "step": 1943000 }, { "epoch": 5.63, "learning_rate": 4.7188226542237866e-05, "loss": 2.3778, "step": 1943500 }, { "epoch": 5.63, "learning_rate": 4.718750434188589e-05, "loss": 2.3736, "step": 1944000 }, { "epoch": 5.63, "learning_rate": 4.718678069423861e-05, "loss": 2.4025, "step": 1944500 }, { "epoch": 5.63, "learning_rate": 4.718605704659133e-05, "loss": 2.3807, "step": 1945000 }, { "epoch": 5.63, "learning_rate": 4.7185333398944055e-05, "loss": 2.3834, "step": 1945500 }, { "epoch": 5.63, "learning_rate": 4.718460975129678e-05, "loss": 2.4152, "step": 1946000 }, { "epoch": 5.63, "learning_rate": 4.71838861036495e-05, "loss": 2.4023, "step": 1946500 }, { "epoch": 5.64, "learning_rate": 4.718316245600223e-05, "loss": 2.4071, "step": 1947000 }, { "epoch": 5.64, "learning_rate": 4.718243880835495e-05, "loss": 2.4014, "step": 1947500 }, { "epoch": 5.64, "learning_rate": 4.718171516070767e-05, "loss": 2.3668, "step": 1948000 }, { "epoch": 5.64, "learning_rate": 4.718099440765099e-05, "loss": 2.3775, "step": 1948500 }, { "epoch": 5.64, "learning_rate": 4.7180272207299004e-05, "loss": 2.4065, "step": 1949000 }, { "epoch": 5.64, "learning_rate": 4.717954855965173e-05, "loss": 2.3939, "step": 1949500 }, { "epoch": 5.64, "learning_rate": 4.717882491200445e-05, "loss": 2.3973, "step": 1950000 }, { "epoch": 5.65, "learning_rate": 4.717810126435717e-05, "loss": 2.3794, "step": 1950500 }, { "epoch": 5.65, "learning_rate": 4.717737761670989e-05, "loss": 2.4148, "step": 1951000 }, { "epoch": 5.65, "learning_rate": 4.7176653969062616e-05, "loss": 2.3663, "step": 1951500 }, { "epoch": 5.65, "learning_rate": 4.717593032141534e-05, "loss": 2.3833, "step": 1952000 }, { "epoch": 5.65, "learning_rate": 4.717520667376806e-05, "loss": 2.3735, "step": 1952500 }, { "epoch": 5.65, "learning_rate": 4.717448302612079e-05, "loss": 2.4025, "step": 1953000 }, { "epoch": 5.65, "learning_rate": 4.717375937847351e-05, "loss": 2.3985, "step": 1953500 }, { "epoch": 5.66, "learning_rate": 4.7173035730826234e-05, "loss": 2.3914, "step": 1954000 }, { "epoch": 5.66, "learning_rate": 4.717231208317896e-05, "loss": 2.3794, "step": 1954500 }, { "epoch": 5.66, "learning_rate": 4.7171588435531685e-05, "loss": 2.4027, "step": 1955000 }, { "epoch": 5.66, "learning_rate": 4.71708662351797e-05, "loss": 2.3727, "step": 1955500 }, { "epoch": 5.66, "learning_rate": 4.717014258753242e-05, "loss": 2.4064, "step": 1956000 }, { "epoch": 5.66, "learning_rate": 4.716942038718044e-05, "loss": 2.3803, "step": 1956500 }, { "epoch": 5.66, "learning_rate": 4.716869673953316e-05, "loss": 2.3729, "step": 1957000 }, { "epoch": 5.67, "learning_rate": 4.716797309188589e-05, "loss": 2.3676, "step": 1957500 }, { "epoch": 5.67, "learning_rate": 4.716724944423861e-05, "loss": 2.3585, "step": 1958000 }, { "epoch": 5.67, "learning_rate": 4.7166525796591334e-05, "loss": 2.3636, "step": 1958500 }, { "epoch": 5.67, "learning_rate": 4.7165802148944056e-05, "loss": 2.3827, "step": 1959000 }, { "epoch": 5.67, "learning_rate": 4.716507850129678e-05, "loss": 2.4058, "step": 1959500 }, { "epoch": 5.67, "learning_rate": 4.71643548536495e-05, "loss": 2.3796, "step": 1960000 }, { "epoch": 5.67, "learning_rate": 4.716363120600222e-05, "loss": 2.3967, "step": 1960500 }, { "epoch": 5.68, "learning_rate": 4.7162907558354945e-05, "loss": 2.3791, "step": 1961000 }, { "epoch": 5.68, "learning_rate": 4.716218391070767e-05, "loss": 2.3689, "step": 1961500 }, { "epoch": 5.68, "learning_rate": 4.7161460263060396e-05, "loss": 2.3546, "step": 1962000 }, { "epoch": 5.68, "learning_rate": 4.716073806270841e-05, "loss": 2.3829, "step": 1962500 }, { "epoch": 5.68, "learning_rate": 4.716001441506114e-05, "loss": 2.4013, "step": 1963000 }, { "epoch": 5.68, "learning_rate": 4.715929076741386e-05, "loss": 2.3984, "step": 1963500 }, { "epoch": 5.68, "learning_rate": 4.7158567119766585e-05, "loss": 2.3762, "step": 1964000 }, { "epoch": 5.69, "learning_rate": 4.715784347211931e-05, "loss": 2.3814, "step": 1964500 }, { "epoch": 5.69, "learning_rate": 4.715711982447203e-05, "loss": 2.3857, "step": 1965000 }, { "epoch": 5.69, "learning_rate": 4.715639617682475e-05, "loss": 2.3932, "step": 1965500 }, { "epoch": 5.69, "learning_rate": 4.7155672529177474e-05, "loss": 2.3902, "step": 1966000 }, { "epoch": 5.69, "learning_rate": 4.715495032882549e-05, "loss": 2.3814, "step": 1966500 }, { "epoch": 5.69, "learning_rate": 4.715422668117821e-05, "loss": 2.3788, "step": 1967000 }, { "epoch": 5.7, "learning_rate": 4.715350303353094e-05, "loss": 2.3552, "step": 1967500 }, { "epoch": 5.7, "learning_rate": 4.715277938588366e-05, "loss": 2.4134, "step": 1968000 }, { "epoch": 5.7, "learning_rate": 4.7152055738236385e-05, "loss": 2.3887, "step": 1968500 }, { "epoch": 5.7, "learning_rate": 4.71513335378844e-05, "loss": 2.38, "step": 1969000 }, { "epoch": 5.7, "learning_rate": 4.715060989023713e-05, "loss": 2.3779, "step": 1969500 }, { "epoch": 5.7, "learning_rate": 4.714988624258985e-05, "loss": 2.4008, "step": 1970000 }, { "epoch": 5.7, "learning_rate": 4.7149162594942574e-05, "loss": 2.3748, "step": 1970500 }, { "epoch": 5.71, "learning_rate": 4.71484389472953e-05, "loss": 2.3949, "step": 1971000 }, { "epoch": 5.71, "learning_rate": 4.714771819423861e-05, "loss": 2.3804, "step": 1971500 }, { "epoch": 5.71, "learning_rate": 4.7146994546591335e-05, "loss": 2.3868, "step": 1972000 }, { "epoch": 5.71, "learning_rate": 4.714627089894406e-05, "loss": 2.3698, "step": 1972500 }, { "epoch": 5.71, "learning_rate": 4.714554725129678e-05, "loss": 2.3875, "step": 1973000 }, { "epoch": 5.71, "learning_rate": 4.71448236036495e-05, "loss": 2.3954, "step": 1973500 }, { "epoch": 5.71, "learning_rate": 4.7144099956002224e-05, "loss": 2.3576, "step": 1974000 }, { "epoch": 5.72, "learning_rate": 4.7143376308354946e-05, "loss": 2.3737, "step": 1974500 }, { "epoch": 5.72, "learning_rate": 4.714265410800297e-05, "loss": 2.3711, "step": 1975000 }, { "epoch": 5.72, "learning_rate": 4.714193046035569e-05, "loss": 2.3996, "step": 1975500 }, { "epoch": 5.72, "learning_rate": 4.714120681270841e-05, "loss": 2.3747, "step": 1976000 }, { "epoch": 5.72, "learning_rate": 4.7140483165061135e-05, "loss": 2.4001, "step": 1976500 }, { "epoch": 5.72, "learning_rate": 4.7139759517413864e-05, "loss": 2.4039, "step": 1977000 }, { "epoch": 5.72, "learning_rate": 4.7139035869766586e-05, "loss": 2.4134, "step": 1977500 }, { "epoch": 5.73, "learning_rate": 4.713831222211931e-05, "loss": 2.408, "step": 1978000 }, { "epoch": 5.73, "learning_rate": 4.713758857447203e-05, "loss": 2.3788, "step": 1978500 }, { "epoch": 5.73, "learning_rate": 4.713686492682475e-05, "loss": 2.3784, "step": 1979000 }, { "epoch": 5.73, "learning_rate": 4.7136141279177475e-05, "loss": 2.4177, "step": 1979500 }, { "epoch": 5.73, "learning_rate": 4.71354176315302e-05, "loss": 2.368, "step": 1980000 }, { "epoch": 5.73, "learning_rate": 4.713469543117822e-05, "loss": 2.3758, "step": 1980500 }, { "epoch": 5.73, "learning_rate": 4.713397178353094e-05, "loss": 2.4088, "step": 1981000 }, { "epoch": 5.74, "learning_rate": 4.7133248135883664e-05, "loss": 2.3667, "step": 1981500 }, { "epoch": 5.74, "learning_rate": 4.7132524488236386e-05, "loss": 2.3874, "step": 1982000 }, { "epoch": 5.74, "learning_rate": 4.713180084058911e-05, "loss": 2.3792, "step": 1982500 }, { "epoch": 5.74, "learning_rate": 4.713107719294183e-05, "loss": 2.3587, "step": 1983000 }, { "epoch": 5.74, "learning_rate": 4.713035354529455e-05, "loss": 2.3993, "step": 1983500 }, { "epoch": 5.74, "learning_rate": 4.712963134494257e-05, "loss": 2.3943, "step": 1984000 }, { "epoch": 5.74, "learning_rate": 4.712890914459059e-05, "loss": 2.3887, "step": 1984500 }, { "epoch": 5.75, "learning_rate": 4.712818549694332e-05, "loss": 2.3722, "step": 1985000 }, { "epoch": 5.75, "learning_rate": 4.712746184929604e-05, "loss": 2.3769, "step": 1985500 }, { "epoch": 5.75, "learning_rate": 4.712674109623935e-05, "loss": 2.3837, "step": 1986000 }, { "epoch": 5.75, "learning_rate": 4.712601744859207e-05, "loss": 2.3808, "step": 1986500 }, { "epoch": 5.75, "learning_rate": 4.7125293800944795e-05, "loss": 2.3969, "step": 1987000 }, { "epoch": 5.75, "learning_rate": 4.712457015329752e-05, "loss": 2.3683, "step": 1987500 }, { "epoch": 5.75, "learning_rate": 4.712384650565024e-05, "loss": 2.3694, "step": 1988000 }, { "epoch": 5.76, "learning_rate": 4.712312285800297e-05, "loss": 2.3858, "step": 1988500 }, { "epoch": 5.76, "learning_rate": 4.712239921035569e-05, "loss": 2.376, "step": 1989000 }, { "epoch": 5.76, "learning_rate": 4.712167556270841e-05, "loss": 2.3844, "step": 1989500 }, { "epoch": 5.76, "learning_rate": 4.7120951915061136e-05, "loss": 2.3661, "step": 1990000 }, { "epoch": 5.76, "learning_rate": 4.712022826741386e-05, "loss": 2.4116, "step": 1990500 }, { "epoch": 5.76, "learning_rate": 4.711950461976658e-05, "loss": 2.3662, "step": 1991000 }, { "epoch": 5.76, "learning_rate": 4.71187809721193e-05, "loss": 2.3853, "step": 1991500 }, { "epoch": 5.77, "learning_rate": 4.711805732447203e-05, "loss": 2.3857, "step": 1992000 }, { "epoch": 5.77, "learning_rate": 4.7117333676824754e-05, "loss": 2.3943, "step": 1992500 }, { "epoch": 5.77, "learning_rate": 4.7116610029177476e-05, "loss": 2.3485, "step": 1993000 }, { "epoch": 5.77, "learning_rate": 4.71158863815302e-05, "loss": 2.394, "step": 1993500 }, { "epoch": 5.77, "learning_rate": 4.711516273388292e-05, "loss": 2.3716, "step": 1994000 }, { "epoch": 5.77, "learning_rate": 4.711444053353094e-05, "loss": 2.3826, "step": 1994500 }, { "epoch": 5.77, "learning_rate": 4.7113716885883665e-05, "loss": 2.4211, "step": 1995000 }, { "epoch": 5.78, "learning_rate": 4.711299468553168e-05, "loss": 2.3832, "step": 1995500 }, { "epoch": 5.78, "learning_rate": 4.71122710378844e-05, "loss": 2.3848, "step": 1996000 }, { "epoch": 5.78, "learning_rate": 4.711154883753242e-05, "loss": 2.3673, "step": 1996500 }, { "epoch": 5.78, "learning_rate": 4.711082518988515e-05, "loss": 2.4016, "step": 1997000 }, { "epoch": 5.78, "learning_rate": 4.711010154223787e-05, "loss": 2.3936, "step": 1997500 }, { "epoch": 5.78, "learning_rate": 4.710937789459059e-05, "loss": 2.3675, "step": 1998000 }, { "epoch": 5.78, "learning_rate": 4.7108654246943314e-05, "loss": 2.4028, "step": 1998500 }, { "epoch": 5.79, "learning_rate": 4.7107930599296036e-05, "loss": 2.3641, "step": 1999000 }, { "epoch": 5.79, "learning_rate": 4.7107206951648765e-05, "loss": 2.3918, "step": 1999500 }, { "epoch": 5.79, "learning_rate": 4.710648330400149e-05, "loss": 2.3992, "step": 2000000 }, { "epoch": 5.79, "learning_rate": 4.71057611036495e-05, "loss": 2.3716, "step": 2000500 }, { "epoch": 5.79, "learning_rate": 4.7105037456002225e-05, "loss": 2.3907, "step": 2001000 }, { "epoch": 5.79, "learning_rate": 4.710431525565025e-05, "loss": 2.3759, "step": 2001500 }, { "epoch": 5.79, "learning_rate": 4.710359160800297e-05, "loss": 2.4059, "step": 2002000 }, { "epoch": 5.8, "learning_rate": 4.710286796035569e-05, "loss": 2.3719, "step": 2002500 }, { "epoch": 5.8, "learning_rate": 4.7102144312708414e-05, "loss": 2.3871, "step": 2003000 }, { "epoch": 5.8, "learning_rate": 4.7101420665061136e-05, "loss": 2.3887, "step": 2003500 }, { "epoch": 5.8, "learning_rate": 4.710069701741386e-05, "loss": 2.399, "step": 2004000 }, { "epoch": 5.8, "learning_rate": 4.709997336976658e-05, "loss": 2.3803, "step": 2004500 }, { "epoch": 5.8, "learning_rate": 4.70992497221193e-05, "loss": 2.3503, "step": 2005000 }, { "epoch": 5.81, "learning_rate": 4.7098526074472025e-05, "loss": 2.4099, "step": 2005500 }, { "epoch": 5.81, "learning_rate": 4.709780242682475e-05, "loss": 2.3697, "step": 2006000 }, { "epoch": 5.81, "learning_rate": 4.7097078779177477e-05, "loss": 2.3846, "step": 2006500 }, { "epoch": 5.81, "learning_rate": 4.70963551315302e-05, "loss": 2.3776, "step": 2007000 }, { "epoch": 5.81, "learning_rate": 4.709563293117822e-05, "loss": 2.3733, "step": 2007500 }, { "epoch": 5.81, "learning_rate": 4.709490928353094e-05, "loss": 2.3539, "step": 2008000 }, { "epoch": 5.81, "learning_rate": 4.7094185635883666e-05, "loss": 2.3895, "step": 2008500 }, { "epoch": 5.82, "learning_rate": 4.709346343553168e-05, "loss": 2.3751, "step": 2009000 }, { "epoch": 5.82, "learning_rate": 4.70927397878844e-05, "loss": 2.3897, "step": 2009500 }, { "epoch": 5.82, "learning_rate": 4.7092016140237126e-05, "loss": 2.3627, "step": 2010000 }, { "epoch": 5.82, "learning_rate": 4.709129249258985e-05, "loss": 2.3638, "step": 2010500 }, { "epoch": 5.82, "learning_rate": 4.709057029223787e-05, "loss": 2.4012, "step": 2011000 }, { "epoch": 5.82, "learning_rate": 4.708984664459059e-05, "loss": 2.3915, "step": 2011500 }, { "epoch": 5.82, "learning_rate": 4.7089122996943315e-05, "loss": 2.3684, "step": 2012000 }, { "epoch": 5.83, "learning_rate": 4.708839934929604e-05, "loss": 2.3794, "step": 2012500 }, { "epoch": 5.83, "learning_rate": 4.708767570164876e-05, "loss": 2.3899, "step": 2013000 }, { "epoch": 5.83, "learning_rate": 4.708695205400148e-05, "loss": 2.362, "step": 2013500 }, { "epoch": 5.83, "learning_rate": 4.708622840635421e-05, "loss": 2.3655, "step": 2014000 }, { "epoch": 5.83, "learning_rate": 4.708550475870693e-05, "loss": 2.3679, "step": 2014500 }, { "epoch": 5.83, "learning_rate": 4.7084781111059655e-05, "loss": 2.384, "step": 2015000 }, { "epoch": 5.83, "learning_rate": 4.708405746341238e-05, "loss": 2.3809, "step": 2015500 }, { "epoch": 5.84, "learning_rate": 4.70833352630604e-05, "loss": 2.3962, "step": 2016000 }, { "epoch": 5.84, "learning_rate": 4.708261161541312e-05, "loss": 2.3696, "step": 2016500 }, { "epoch": 5.84, "learning_rate": 4.7081887967765844e-05, "loss": 2.3931, "step": 2017000 }, { "epoch": 5.84, "learning_rate": 4.708116576741386e-05, "loss": 2.3783, "step": 2017500 }, { "epoch": 5.84, "learning_rate": 4.708044211976658e-05, "loss": 2.3664, "step": 2018000 }, { "epoch": 5.84, "learning_rate": 4.7079718472119304e-05, "loss": 2.3734, "step": 2018500 }, { "epoch": 5.84, "learning_rate": 4.7078994824472026e-05, "loss": 2.3872, "step": 2019000 }, { "epoch": 5.85, "learning_rate": 4.707827117682475e-05, "loss": 2.3886, "step": 2019500 }, { "epoch": 5.85, "learning_rate": 4.707754752917747e-05, "loss": 2.3625, "step": 2020000 }, { "epoch": 5.85, "learning_rate": 4.707682532882549e-05, "loss": 2.3816, "step": 2020500 }, { "epoch": 5.85, "learning_rate": 4.7076101681178215e-05, "loss": 2.3705, "step": 2021000 }, { "epoch": 5.85, "learning_rate": 4.707537948082623e-05, "loss": 2.3774, "step": 2021500 }, { "epoch": 5.85, "learning_rate": 4.707465583317896e-05, "loss": 2.3927, "step": 2022000 }, { "epoch": 5.85, "learning_rate": 4.707393218553168e-05, "loss": 2.3833, "step": 2022500 }, { "epoch": 5.86, "learning_rate": 4.7073208537884404e-05, "loss": 2.3826, "step": 2023000 }, { "epoch": 5.86, "learning_rate": 4.7072484890237126e-05, "loss": 2.3857, "step": 2023500 }, { "epoch": 5.86, "learning_rate": 4.707176124258985e-05, "loss": 2.3733, "step": 2024000 }, { "epoch": 5.86, "learning_rate": 4.707103759494257e-05, "loss": 2.3873, "step": 2024500 }, { "epoch": 5.86, "learning_rate": 4.70703139472953e-05, "loss": 2.3684, "step": 2025000 }, { "epoch": 5.86, "learning_rate": 4.706959029964802e-05, "loss": 2.383, "step": 2025500 }, { "epoch": 5.86, "learning_rate": 4.706886809929604e-05, "loss": 2.3871, "step": 2026000 }, { "epoch": 5.87, "learning_rate": 4.706814445164876e-05, "loss": 2.3815, "step": 2026500 }, { "epoch": 5.87, "learning_rate": 4.7067422251296775e-05, "loss": 2.4149, "step": 2027000 }, { "epoch": 5.87, "learning_rate": 4.70666986036495e-05, "loss": 2.3954, "step": 2027500 }, { "epoch": 5.87, "learning_rate": 4.706597495600223e-05, "loss": 2.3776, "step": 2028000 }, { "epoch": 5.87, "learning_rate": 4.706525130835495e-05, "loss": 2.3585, "step": 2028500 }, { "epoch": 5.87, "learning_rate": 4.706452766070767e-05, "loss": 2.3811, "step": 2029000 }, { "epoch": 5.87, "learning_rate": 4.70638040130604e-05, "loss": 2.3892, "step": 2029500 }, { "epoch": 5.88, "learning_rate": 4.706308036541312e-05, "loss": 2.396, "step": 2030000 }, { "epoch": 5.88, "learning_rate": 4.7062356717765845e-05, "loss": 2.3786, "step": 2030500 }, { "epoch": 5.88, "learning_rate": 4.706163307011857e-05, "loss": 2.3701, "step": 2031000 }, { "epoch": 5.88, "learning_rate": 4.706090942247129e-05, "loss": 2.371, "step": 2031500 }, { "epoch": 5.88, "learning_rate": 4.7060187222119305e-05, "loss": 2.3551, "step": 2032000 }, { "epoch": 5.88, "learning_rate": 4.705946357447203e-05, "loss": 2.3897, "step": 2032500 }, { "epoch": 5.88, "learning_rate": 4.705873992682475e-05, "loss": 2.405, "step": 2033000 }, { "epoch": 5.89, "learning_rate": 4.705801627917748e-05, "loss": 2.3634, "step": 2033500 }, { "epoch": 5.89, "learning_rate": 4.70572926315302e-05, "loss": 2.4021, "step": 2034000 }, { "epoch": 5.89, "learning_rate": 4.7056570431178216e-05, "loss": 2.3885, "step": 2034500 }, { "epoch": 5.89, "learning_rate": 4.705584678353094e-05, "loss": 2.3855, "step": 2035000 }, { "epoch": 5.89, "learning_rate": 4.705512313588366e-05, "loss": 2.3706, "step": 2035500 }, { "epoch": 5.89, "learning_rate": 4.705439948823638e-05, "loss": 2.4031, "step": 2036000 }, { "epoch": 5.89, "learning_rate": 4.705367584058911e-05, "loss": 2.3733, "step": 2036500 }, { "epoch": 5.9, "learning_rate": 4.7052952192941834e-05, "loss": 2.3849, "step": 2037000 }, { "epoch": 5.9, "learning_rate": 4.7052228545294556e-05, "loss": 2.3905, "step": 2037500 }, { "epoch": 5.9, "learning_rate": 4.705150489764728e-05, "loss": 2.3929, "step": 2038000 }, { "epoch": 5.9, "learning_rate": 4.705078125e-05, "loss": 2.3736, "step": 2038500 }, { "epoch": 5.9, "learning_rate": 4.705005904964802e-05, "loss": 2.3812, "step": 2039000 }, { "epoch": 5.9, "learning_rate": 4.7049335402000745e-05, "loss": 2.3967, "step": 2039500 }, { "epoch": 5.9, "learning_rate": 4.704861320164876e-05, "loss": 2.3684, "step": 2040000 }, { "epoch": 5.91, "learning_rate": 4.7047891001296776e-05, "loss": 2.3816, "step": 2040500 }, { "epoch": 5.91, "learning_rate": 4.70471673536495e-05, "loss": 2.3795, "step": 2041000 }, { "epoch": 5.91, "learning_rate": 4.704644370600223e-05, "loss": 2.3959, "step": 2041500 }, { "epoch": 5.91, "learning_rate": 4.704572005835495e-05, "loss": 2.3928, "step": 2042000 }, { "epoch": 5.91, "learning_rate": 4.704499930529826e-05, "loss": 2.3924, "step": 2042500 }, { "epoch": 5.91, "learning_rate": 4.704427565765098e-05, "loss": 2.3793, "step": 2043000 }, { "epoch": 5.92, "learning_rate": 4.70435520100037e-05, "loss": 2.3637, "step": 2043500 }, { "epoch": 5.92, "learning_rate": 4.7042828362356425e-05, "loss": 2.3946, "step": 2044000 }, { "epoch": 5.92, "learning_rate": 4.7042104714709154e-05, "loss": 2.3805, "step": 2044500 }, { "epoch": 5.92, "learning_rate": 4.7041381067061877e-05, "loss": 2.4095, "step": 2045000 }, { "epoch": 5.92, "learning_rate": 4.70406574194146e-05, "loss": 2.3866, "step": 2045500 }, { "epoch": 5.92, "learning_rate": 4.703993377176733e-05, "loss": 2.3965, "step": 2046000 }, { "epoch": 5.92, "learning_rate": 4.703921012412005e-05, "loss": 2.3837, "step": 2046500 }, { "epoch": 5.93, "learning_rate": 4.703848647647277e-05, "loss": 2.3962, "step": 2047000 }, { "epoch": 5.93, "learning_rate": 4.7037762828825495e-05, "loss": 2.376, "step": 2047500 }, { "epoch": 5.93, "learning_rate": 4.703703918117822e-05, "loss": 2.3644, "step": 2048000 }, { "epoch": 5.93, "learning_rate": 4.703631553353094e-05, "loss": 2.3736, "step": 2048500 }, { "epoch": 5.93, "learning_rate": 4.703559188588366e-05, "loss": 2.3803, "step": 2049000 }, { "epoch": 5.93, "learning_rate": 4.7034868238236383e-05, "loss": 2.3758, "step": 2049500 }, { "epoch": 5.93, "learning_rate": 4.7034144590589106e-05, "loss": 2.3715, "step": 2050000 }, { "epoch": 5.94, "learning_rate": 4.703342239023713e-05, "loss": 2.378, "step": 2050500 }, { "epoch": 5.94, "learning_rate": 4.703269874258985e-05, "loss": 2.3933, "step": 2051000 }, { "epoch": 5.94, "learning_rate": 4.7031976542237866e-05, "loss": 2.3834, "step": 2051500 }, { "epoch": 5.94, "learning_rate": 4.7031252894590595e-05, "loss": 2.412, "step": 2052000 }, { "epoch": 5.94, "learning_rate": 4.703052924694332e-05, "loss": 2.3855, "step": 2052500 }, { "epoch": 5.94, "learning_rate": 4.702980559929604e-05, "loss": 2.3717, "step": 2053000 }, { "epoch": 5.94, "learning_rate": 4.702908195164876e-05, "loss": 2.3955, "step": 2053500 }, { "epoch": 5.95, "learning_rate": 4.7028358304001484e-05, "loss": 2.3487, "step": 2054000 }, { "epoch": 5.95, "learning_rate": 4.7027634656354206e-05, "loss": 2.3901, "step": 2054500 }, { "epoch": 5.95, "learning_rate": 4.702691100870693e-05, "loss": 2.3734, "step": 2055000 }, { "epoch": 5.95, "learning_rate": 4.702618880835495e-05, "loss": 2.3852, "step": 2055500 }, { "epoch": 5.95, "learning_rate": 4.702546516070767e-05, "loss": 2.3837, "step": 2056000 }, { "epoch": 5.95, "learning_rate": 4.7024741513060395e-05, "loss": 2.3786, "step": 2056500 }, { "epoch": 5.95, "learning_rate": 4.702401786541312e-05, "loss": 2.3686, "step": 2057000 }, { "epoch": 5.96, "learning_rate": 4.702329566506113e-05, "loss": 2.3666, "step": 2057500 }, { "epoch": 5.96, "learning_rate": 4.7022572017413855e-05, "loss": 2.4034, "step": 2058000 }, { "epoch": 5.96, "learning_rate": 4.702184836976658e-05, "loss": 2.3837, "step": 2058500 }, { "epoch": 5.96, "learning_rate": 4.7021124722119306e-05, "loss": 2.3775, "step": 2059000 }, { "epoch": 5.96, "learning_rate": 4.702040107447203e-05, "loss": 2.3759, "step": 2059500 }, { "epoch": 5.96, "learning_rate": 4.701967742682476e-05, "loss": 2.3655, "step": 2060000 }, { "epoch": 5.96, "learning_rate": 4.701895377917748e-05, "loss": 2.3825, "step": 2060500 }, { "epoch": 5.97, "learning_rate": 4.70182301315302e-05, "loss": 2.3888, "step": 2061000 }, { "epoch": 5.97, "learning_rate": 4.7017506483882924e-05, "loss": 2.3719, "step": 2061500 }, { "epoch": 5.97, "learning_rate": 4.7016782836235646e-05, "loss": 2.3644, "step": 2062000 }, { "epoch": 5.97, "learning_rate": 4.701605918858837e-05, "loss": 2.4086, "step": 2062500 }, { "epoch": 5.97, "learning_rate": 4.7015336988236384e-05, "loss": 2.3545, "step": 2063000 }, { "epoch": 5.97, "learning_rate": 4.7014613340589106e-05, "loss": 2.3857, "step": 2063500 }, { "epoch": 5.97, "learning_rate": 4.701388969294183e-05, "loss": 2.3775, "step": 2064000 }, { "epoch": 5.98, "learning_rate": 4.701316604529456e-05, "loss": 2.4024, "step": 2064500 }, { "epoch": 5.98, "learning_rate": 4.701244239764728e-05, "loss": 2.3788, "step": 2065000 }, { "epoch": 5.98, "learning_rate": 4.701171875e-05, "loss": 2.3851, "step": 2065500 }, { "epoch": 5.98, "learning_rate": 4.7010995102352724e-05, "loss": 2.3836, "step": 2066000 }, { "epoch": 5.98, "learning_rate": 4.7010271454705453e-05, "loss": 2.3854, "step": 2066500 }, { "epoch": 5.98, "learning_rate": 4.700954925435347e-05, "loss": 2.3641, "step": 2067000 }, { "epoch": 5.98, "learning_rate": 4.700882560670619e-05, "loss": 2.3776, "step": 2067500 }, { "epoch": 5.99, "learning_rate": 4.7008101959058913e-05, "loss": 2.3852, "step": 2068000 }, { "epoch": 5.99, "learning_rate": 4.7007378311411636e-05, "loss": 2.3769, "step": 2068500 }, { "epoch": 5.99, "learning_rate": 4.700665466376436e-05, "loss": 2.3651, "step": 2069000 }, { "epoch": 5.99, "learning_rate": 4.700593101611708e-05, "loss": 2.3713, "step": 2069500 }, { "epoch": 5.99, "learning_rate": 4.700520736846981e-05, "loss": 2.3794, "step": 2070000 }, { "epoch": 5.99, "learning_rate": 4.700448372082253e-05, "loss": 2.3631, "step": 2070500 }, { "epoch": 5.99, "learning_rate": 4.700376152047055e-05, "loss": 2.3789, "step": 2071000 }, { "epoch": 6.0, "learning_rate": 4.700303787282327e-05, "loss": 2.3672, "step": 2071500 }, { "epoch": 6.0, "learning_rate": 4.700231422517599e-05, "loss": 2.3673, "step": 2072000 }, { "epoch": 6.0, "learning_rate": 4.7001590577528714e-05, "loss": 2.3625, "step": 2072500 }, { "epoch": 6.0, "eval_accuracy": 0.6423148797851449, "eval_accuracy_mlm": 0.6042832537684684, "eval_accuracy_nsp": 0.8462324639195574, "eval_loss": 2.3543269634246826, "eval_runtime": 330.7221, "eval_samples_per_second": 1319.494, "eval_steps_per_second": 54.98, "step": 2072832 }, { "epoch": 6.0, "learning_rate": 4.700086837717673e-05, "loss": 2.3918, "step": 2073000 }, { "epoch": 6.0, "learning_rate": 4.700014472952946e-05, "loss": 2.3507, "step": 2073500 }, { "epoch": 6.0, "learning_rate": 4.699942108188218e-05, "loss": 2.372, "step": 2074000 }, { "epoch": 6.0, "learning_rate": 4.699869743423491e-05, "loss": 2.3506, "step": 2074500 }, { "epoch": 6.01, "learning_rate": 4.699797378658763e-05, "loss": 2.3532, "step": 2075000 }, { "epoch": 6.01, "learning_rate": 4.6997250138940354e-05, "loss": 2.3656, "step": 2075500 }, { "epoch": 6.01, "learning_rate": 4.699652793858837e-05, "loss": 2.3596, "step": 2076000 }, { "epoch": 6.01, "learning_rate": 4.699580429094109e-05, "loss": 2.374, "step": 2076500 }, { "epoch": 6.01, "learning_rate": 4.6995080643293814e-05, "loss": 2.341, "step": 2077000 }, { "epoch": 6.01, "learning_rate": 4.6994356995646536e-05, "loss": 2.3713, "step": 2077500 }, { "epoch": 6.01, "learning_rate": 4.699363334799926e-05, "loss": 2.3596, "step": 2078000 }, { "epoch": 6.02, "learning_rate": 4.699290970035198e-05, "loss": 2.3553, "step": 2078500 }, { "epoch": 6.02, "learning_rate": 4.699218605270471e-05, "loss": 2.3466, "step": 2079000 }, { "epoch": 6.02, "learning_rate": 4.699146240505743e-05, "loss": 2.3673, "step": 2079500 }, { "epoch": 6.02, "learning_rate": 4.6990738757410154e-05, "loss": 2.3483, "step": 2080000 }, { "epoch": 6.02, "learning_rate": 4.699001800435346e-05, "loss": 2.3788, "step": 2080500 }, { "epoch": 6.02, "learning_rate": 4.6989294356706185e-05, "loss": 2.3499, "step": 2081000 }, { "epoch": 6.03, "learning_rate": 4.6988570709058914e-05, "loss": 2.3337, "step": 2081500 }, { "epoch": 6.03, "learning_rate": 4.6987847061411636e-05, "loss": 2.341, "step": 2082000 }, { "epoch": 6.03, "learning_rate": 4.698712341376436e-05, "loss": 2.37, "step": 2082500 }, { "epoch": 6.03, "learning_rate": 4.698639976611708e-05, "loss": 2.3718, "step": 2083000 }, { "epoch": 6.03, "learning_rate": 4.69856775657651e-05, "loss": 2.3788, "step": 2083500 }, { "epoch": 6.03, "learning_rate": 4.6984953918117826e-05, "loss": 2.3597, "step": 2084000 }, { "epoch": 6.03, "learning_rate": 4.698423027047055e-05, "loss": 2.3582, "step": 2084500 }, { "epoch": 6.04, "learning_rate": 4.698350662282327e-05, "loss": 2.3628, "step": 2085000 }, { "epoch": 6.04, "learning_rate": 4.698278297517599e-05, "loss": 2.3534, "step": 2085500 }, { "epoch": 6.04, "learning_rate": 4.698206222211931e-05, "loss": 2.3635, "step": 2086000 }, { "epoch": 6.04, "learning_rate": 4.698133857447203e-05, "loss": 2.3822, "step": 2086500 }, { "epoch": 6.04, "learning_rate": 4.698061492682475e-05, "loss": 2.3685, "step": 2087000 }, { "epoch": 6.04, "learning_rate": 4.6979891279177475e-05, "loss": 2.3415, "step": 2087500 }, { "epoch": 6.04, "learning_rate": 4.69791676315302e-05, "loss": 2.3515, "step": 2088000 }, { "epoch": 6.05, "learning_rate": 4.697844543117821e-05, "loss": 2.3672, "step": 2088500 }, { "epoch": 6.05, "learning_rate": 4.6977721783530935e-05, "loss": 2.3742, "step": 2089000 }, { "epoch": 6.05, "learning_rate": 4.6976998135883664e-05, "loss": 2.3683, "step": 2089500 }, { "epoch": 6.05, "learning_rate": 4.6976274488236386e-05, "loss": 2.3628, "step": 2090000 }, { "epoch": 6.05, "learning_rate": 4.697555084058911e-05, "loss": 2.346, "step": 2090500 }, { "epoch": 6.05, "learning_rate": 4.697482719294184e-05, "loss": 2.3676, "step": 2091000 }, { "epoch": 6.05, "learning_rate": 4.697410354529456e-05, "loss": 2.3842, "step": 2091500 }, { "epoch": 6.06, "learning_rate": 4.697338279223787e-05, "loss": 2.3769, "step": 2092000 }, { "epoch": 6.06, "learning_rate": 4.697265914459059e-05, "loss": 2.3262, "step": 2092500 }, { "epoch": 6.06, "learning_rate": 4.697193549694331e-05, "loss": 2.372, "step": 2093000 }, { "epoch": 6.06, "learning_rate": 4.6971211849296035e-05, "loss": 2.3758, "step": 2093500 }, { "epoch": 6.06, "learning_rate": 4.697048820164876e-05, "loss": 2.3922, "step": 2094000 }, { "epoch": 6.06, "learning_rate": 4.6969764554001486e-05, "loss": 2.3735, "step": 2094500 }, { "epoch": 6.06, "learning_rate": 4.696904090635421e-05, "loss": 2.3765, "step": 2095000 }, { "epoch": 6.07, "learning_rate": 4.696831725870693e-05, "loss": 2.354, "step": 2095500 }, { "epoch": 6.07, "learning_rate": 4.696759361105965e-05, "loss": 2.3798, "step": 2096000 }, { "epoch": 6.07, "learning_rate": 4.696686996341238e-05, "loss": 2.3618, "step": 2096500 }, { "epoch": 6.07, "learning_rate": 4.6966146315765104e-05, "loss": 2.3829, "step": 2097000 }, { "epoch": 6.07, "learning_rate": 4.6965422668117826e-05, "loss": 2.3469, "step": 2097500 }, { "epoch": 6.07, "learning_rate": 4.696469902047055e-05, "loss": 2.3785, "step": 2098000 }, { "epoch": 6.07, "learning_rate": 4.6963976820118564e-05, "loss": 2.3434, "step": 2098500 }, { "epoch": 6.08, "learning_rate": 4.6963253172471286e-05, "loss": 2.3405, "step": 2099000 }, { "epoch": 6.08, "learning_rate": 4.696252952482401e-05, "loss": 2.3497, "step": 2099500 }, { "epoch": 6.08, "learning_rate": 4.696180587717674e-05, "loss": 2.3807, "step": 2100000 }, { "epoch": 6.08, "learning_rate": 4.696108222952946e-05, "loss": 2.3713, "step": 2100500 }, { "epoch": 6.08, "learning_rate": 4.696035858188218e-05, "loss": 2.375, "step": 2101000 }, { "epoch": 6.08, "learning_rate": 4.6959634934234904e-05, "loss": 2.3421, "step": 2101500 }, { "epoch": 6.08, "learning_rate": 4.6958911286587627e-05, "loss": 2.3695, "step": 2102000 }, { "epoch": 6.09, "learning_rate": 4.695818763894035e-05, "loss": 2.3596, "step": 2102500 }, { "epoch": 6.09, "learning_rate": 4.6957465438588364e-05, "loss": 2.3408, "step": 2103000 }, { "epoch": 6.09, "learning_rate": 4.6956741790941087e-05, "loss": 2.3629, "step": 2103500 }, { "epoch": 6.09, "learning_rate": 4.6956018143293816e-05, "loss": 2.376, "step": 2104000 }, { "epoch": 6.09, "learning_rate": 4.695529449564654e-05, "loss": 2.3499, "step": 2104500 }, { "epoch": 6.09, "learning_rate": 4.695457084799926e-05, "loss": 2.3687, "step": 2105000 }, { "epoch": 6.09, "learning_rate": 4.695384864764728e-05, "loss": 2.368, "step": 2105500 }, { "epoch": 6.1, "learning_rate": 4.6953125000000005e-05, "loss": 2.3579, "step": 2106000 }, { "epoch": 6.1, "learning_rate": 4.695240135235273e-05, "loss": 2.3651, "step": 2106500 }, { "epoch": 6.1, "learning_rate": 4.695167770470545e-05, "loss": 2.3676, "step": 2107000 }, { "epoch": 6.1, "learning_rate": 4.6950955504353465e-05, "loss": 2.3701, "step": 2107500 }, { "epoch": 6.1, "learning_rate": 4.695023185670619e-05, "loss": 2.3695, "step": 2108000 }, { "epoch": 6.1, "learning_rate": 4.694950820905891e-05, "loss": 2.3582, "step": 2108500 }, { "epoch": 6.1, "learning_rate": 4.694878456141164e-05, "loss": 2.3709, "step": 2109000 }, { "epoch": 6.11, "learning_rate": 4.694806091376436e-05, "loss": 2.3746, "step": 2109500 }, { "epoch": 6.11, "learning_rate": 4.694733726611708e-05, "loss": 2.3498, "step": 2110000 }, { "epoch": 6.11, "learning_rate": 4.6946613618469805e-05, "loss": 2.3483, "step": 2110500 }, { "epoch": 6.11, "learning_rate": 4.694588997082253e-05, "loss": 2.3708, "step": 2111000 }, { "epoch": 6.11, "learning_rate": 4.694516777047055e-05, "loss": 2.3852, "step": 2111500 }, { "epoch": 6.11, "learning_rate": 4.694444412282327e-05, "loss": 2.3415, "step": 2112000 }, { "epoch": 6.11, "learning_rate": 4.6943720475175994e-05, "loss": 2.356, "step": 2112500 }, { "epoch": 6.12, "learning_rate": 4.694299827482401e-05, "loss": 2.3529, "step": 2113000 }, { "epoch": 6.12, "learning_rate": 4.694227462717674e-05, "loss": 2.3696, "step": 2113500 }, { "epoch": 6.12, "learning_rate": 4.6941552426824754e-05, "loss": 2.3569, "step": 2114000 }, { "epoch": 6.12, "learning_rate": 4.6940828779177476e-05, "loss": 2.364, "step": 2114500 }, { "epoch": 6.12, "learning_rate": 4.69401051315302e-05, "loss": 2.3801, "step": 2115000 }, { "epoch": 6.12, "learning_rate": 4.693938148388292e-05, "loss": 2.3793, "step": 2115500 }, { "epoch": 6.12, "learning_rate": 4.693865783623564e-05, "loss": 2.3607, "step": 2116000 }, { "epoch": 6.13, "learning_rate": 4.6937934188588365e-05, "loss": 2.3535, "step": 2116500 }, { "epoch": 6.13, "learning_rate": 4.693721054094109e-05, "loss": 2.3726, "step": 2117000 }, { "epoch": 6.13, "learning_rate": 4.693648689329381e-05, "loss": 2.378, "step": 2117500 }, { "epoch": 6.13, "learning_rate": 4.693576469294183e-05, "loss": 2.3514, "step": 2118000 }, { "epoch": 6.13, "learning_rate": 4.693504249258985e-05, "loss": 2.3659, "step": 2118500 }, { "epoch": 6.13, "learning_rate": 4.693431884494257e-05, "loss": 2.3537, "step": 2119000 }, { "epoch": 6.14, "learning_rate": 4.69335951972953e-05, "loss": 2.3537, "step": 2119500 }, { "epoch": 6.14, "learning_rate": 4.693287154964802e-05, "loss": 2.3598, "step": 2120000 }, { "epoch": 6.14, "learning_rate": 4.693214790200074e-05, "loss": 2.3877, "step": 2120500 }, { "epoch": 6.14, "learning_rate": 4.6931424254353465e-05, "loss": 2.3866, "step": 2121000 }, { "epoch": 6.14, "learning_rate": 4.693070060670619e-05, "loss": 2.3477, "step": 2121500 }, { "epoch": 6.14, "learning_rate": 4.692997695905892e-05, "loss": 2.3544, "step": 2122000 }, { "epoch": 6.14, "learning_rate": 4.692925331141164e-05, "loss": 2.3507, "step": 2122500 }, { "epoch": 6.15, "learning_rate": 4.692852966376436e-05, "loss": 2.3723, "step": 2123000 }, { "epoch": 6.15, "learning_rate": 4.692780746341238e-05, "loss": 2.3637, "step": 2123500 }, { "epoch": 6.15, "learning_rate": 4.69270838157651e-05, "loss": 2.356, "step": 2124000 }, { "epoch": 6.15, "learning_rate": 4.692636016811782e-05, "loss": 2.3704, "step": 2124500 }, { "epoch": 6.15, "learning_rate": 4.692563652047054e-05, "loss": 2.3638, "step": 2125000 }, { "epoch": 6.15, "learning_rate": 4.6924912872823266e-05, "loss": 2.3622, "step": 2125500 }, { "epoch": 6.15, "learning_rate": 4.692418922517599e-05, "loss": 2.3803, "step": 2126000 }, { "epoch": 6.16, "learning_rate": 4.692346557752872e-05, "loss": 2.3765, "step": 2126500 }, { "epoch": 6.16, "learning_rate": 4.692274192988144e-05, "loss": 2.3619, "step": 2127000 }, { "epoch": 6.16, "learning_rate": 4.692201828223417e-05, "loss": 2.3729, "step": 2127500 }, { "epoch": 6.16, "learning_rate": 4.6921296081882184e-05, "loss": 2.3576, "step": 2128000 }, { "epoch": 6.16, "learning_rate": 4.6920572434234906e-05, "loss": 2.3688, "step": 2128500 }, { "epoch": 6.16, "learning_rate": 4.691984878658763e-05, "loss": 2.3887, "step": 2129000 }, { "epoch": 6.16, "learning_rate": 4.691912513894035e-05, "loss": 2.3907, "step": 2129500 }, { "epoch": 6.17, "learning_rate": 4.6918402938588366e-05, "loss": 2.3966, "step": 2130000 }, { "epoch": 6.17, "learning_rate": 4.691767929094109e-05, "loss": 2.3553, "step": 2130500 }, { "epoch": 6.17, "learning_rate": 4.691695564329382e-05, "loss": 2.3488, "step": 2131000 }, { "epoch": 6.17, "learning_rate": 4.691623199564654e-05, "loss": 2.3577, "step": 2131500 }, { "epoch": 6.17, "learning_rate": 4.691550834799926e-05, "loss": 2.3384, "step": 2132000 }, { "epoch": 6.17, "learning_rate": 4.691478759494257e-05, "loss": 2.3765, "step": 2132500 }, { "epoch": 6.17, "learning_rate": 4.691406394729529e-05, "loss": 2.3917, "step": 2133000 }, { "epoch": 6.18, "learning_rate": 4.6913340299648015e-05, "loss": 2.3614, "step": 2133500 }, { "epoch": 6.18, "learning_rate": 4.6912616652000744e-05, "loss": 2.357, "step": 2134000 }, { "epoch": 6.18, "learning_rate": 4.6911893004353466e-05, "loss": 2.3662, "step": 2134500 }, { "epoch": 6.18, "learning_rate": 4.691117080400149e-05, "loss": 2.378, "step": 2135000 }, { "epoch": 6.18, "learning_rate": 4.691044715635421e-05, "loss": 2.3553, "step": 2135500 }, { "epoch": 6.18, "learning_rate": 4.690972350870693e-05, "loss": 2.3592, "step": 2136000 }, { "epoch": 6.18, "learning_rate": 4.6908999861059655e-05, "loss": 2.3456, "step": 2136500 }, { "epoch": 6.19, "learning_rate": 4.690827766070767e-05, "loss": 2.3497, "step": 2137000 }, { "epoch": 6.19, "learning_rate": 4.690755401306039e-05, "loss": 2.3843, "step": 2137500 }, { "epoch": 6.19, "learning_rate": 4.6906830365413115e-05, "loss": 2.3822, "step": 2138000 }, { "epoch": 6.19, "learning_rate": 4.6906106717765844e-05, "loss": 2.3547, "step": 2138500 }, { "epoch": 6.19, "learning_rate": 4.6905383070118566e-05, "loss": 2.375, "step": 2139000 }, { "epoch": 6.19, "learning_rate": 4.690465942247129e-05, "loss": 2.3486, "step": 2139500 }, { "epoch": 6.19, "learning_rate": 4.690393577482401e-05, "loss": 2.3556, "step": 2140000 }, { "epoch": 6.2, "learning_rate": 4.690321212717673e-05, "loss": 2.3776, "step": 2140500 }, { "epoch": 6.2, "learning_rate": 4.690248992682475e-05, "loss": 2.3417, "step": 2141000 }, { "epoch": 6.2, "learning_rate": 4.690176627917748e-05, "loss": 2.3609, "step": 2141500 }, { "epoch": 6.2, "learning_rate": 4.69010426315302e-05, "loss": 2.3566, "step": 2142000 }, { "epoch": 6.2, "learning_rate": 4.690031898388292e-05, "loss": 2.3523, "step": 2142500 }, { "epoch": 6.2, "learning_rate": 4.6899595336235644e-05, "loss": 2.3796, "step": 2143000 }, { "epoch": 6.2, "learning_rate": 4.689887168858837e-05, "loss": 2.3689, "step": 2143500 }, { "epoch": 6.21, "learning_rate": 4.689814804094109e-05, "loss": 2.385, "step": 2144000 }, { "epoch": 6.21, "learning_rate": 4.689742439329382e-05, "loss": 2.3643, "step": 2144500 }, { "epoch": 6.21, "learning_rate": 4.689670074564654e-05, "loss": 2.3744, "step": 2145000 }, { "epoch": 6.21, "learning_rate": 4.689597709799926e-05, "loss": 2.3603, "step": 2145500 }, { "epoch": 6.21, "learning_rate": 4.689525489764728e-05, "loss": 2.3515, "step": 2146000 }, { "epoch": 6.21, "learning_rate": 4.689453125e-05, "loss": 2.3585, "step": 2146500 }, { "epoch": 6.21, "learning_rate": 4.689380760235272e-05, "loss": 2.3688, "step": 2147000 }, { "epoch": 6.22, "learning_rate": 4.6893083954705445e-05, "loss": 2.3699, "step": 2147500 }, { "epoch": 6.22, "learning_rate": 4.689236030705817e-05, "loss": 2.37, "step": 2148000 }, { "epoch": 6.22, "learning_rate": 4.689163810670619e-05, "loss": 2.3677, "step": 2148500 }, { "epoch": 6.22, "learning_rate": 4.689091590635421e-05, "loss": 2.3488, "step": 2149000 }, { "epoch": 6.22, "learning_rate": 4.689019370600223e-05, "loss": 2.351, "step": 2149500 }, { "epoch": 6.22, "learning_rate": 4.688947005835495e-05, "loss": 2.3574, "step": 2150000 }, { "epoch": 6.22, "learning_rate": 4.688874641070767e-05, "loss": 2.4013, "step": 2150500 }, { "epoch": 6.23, "learning_rate": 4.6888022763060394e-05, "loss": 2.3765, "step": 2151000 }, { "epoch": 6.23, "learning_rate": 4.6887299115413116e-05, "loss": 2.3605, "step": 2151500 }, { "epoch": 6.23, "learning_rate": 4.6886575467765845e-05, "loss": 2.3808, "step": 2152000 }, { "epoch": 6.23, "learning_rate": 4.688585182011857e-05, "loss": 2.3506, "step": 2152500 }, { "epoch": 6.23, "learning_rate": 4.688512961976658e-05, "loss": 2.3876, "step": 2153000 }, { "epoch": 6.23, "learning_rate": 4.6884405972119305e-05, "loss": 2.3571, "step": 2153500 }, { "epoch": 6.23, "learning_rate": 4.688368232447203e-05, "loss": 2.3668, "step": 2154000 }, { "epoch": 6.24, "learning_rate": 4.688295867682475e-05, "loss": 2.3712, "step": 2154500 }, { "epoch": 6.24, "learning_rate": 4.688223502917747e-05, "loss": 2.348, "step": 2155000 }, { "epoch": 6.24, "learning_rate": 4.6881511381530194e-05, "loss": 2.3677, "step": 2155500 }, { "epoch": 6.24, "learning_rate": 4.6880787733882916e-05, "loss": 2.3612, "step": 2156000 }, { "epoch": 6.24, "learning_rate": 4.688006553353094e-05, "loss": 2.3619, "step": 2156500 }, { "epoch": 6.24, "learning_rate": 4.687934188588367e-05, "loss": 2.381, "step": 2157000 }, { "epoch": 6.25, "learning_rate": 4.687861823823639e-05, "loss": 2.3667, "step": 2157500 }, { "epoch": 6.25, "learning_rate": 4.687789459058911e-05, "loss": 2.3327, "step": 2158000 }, { "epoch": 6.25, "learning_rate": 4.6877170942941834e-05, "loss": 2.3503, "step": 2158500 }, { "epoch": 6.25, "learning_rate": 4.6876447295294557e-05, "loss": 2.3861, "step": 2159000 }, { "epoch": 6.25, "learning_rate": 4.687572364764728e-05, "loss": 2.3485, "step": 2159500 }, { "epoch": 6.25, "learning_rate": 4.6875e-05, "loss": 2.3681, "step": 2160000 }, { "epoch": 6.25, "learning_rate": 4.687427635235272e-05, "loss": 2.3773, "step": 2160500 }, { "epoch": 6.26, "learning_rate": 4.6873552704705445e-05, "loss": 2.3604, "step": 2161000 }, { "epoch": 6.26, "learning_rate": 4.687283050435347e-05, "loss": 2.3872, "step": 2161500 }, { "epoch": 6.26, "learning_rate": 4.687210685670619e-05, "loss": 2.3777, "step": 2162000 }, { "epoch": 6.26, "learning_rate": 4.687138320905891e-05, "loss": 2.3461, "step": 2162500 }, { "epoch": 6.26, "learning_rate": 4.6870659561411634e-05, "loss": 2.3857, "step": 2163000 }, { "epoch": 6.26, "learning_rate": 4.686993591376436e-05, "loss": 2.3407, "step": 2163500 }, { "epoch": 6.26, "learning_rate": 4.6869212266117086e-05, "loss": 2.3797, "step": 2164000 }, { "epoch": 6.27, "learning_rate": 4.68684900657651e-05, "loss": 2.3387, "step": 2164500 }, { "epoch": 6.27, "learning_rate": 4.6867766418117824e-05, "loss": 2.3751, "step": 2165000 }, { "epoch": 6.27, "learning_rate": 4.6867042770470546e-05, "loss": 2.3693, "step": 2165500 }, { "epoch": 6.27, "learning_rate": 4.686631912282327e-05, "loss": 2.3725, "step": 2166000 }, { "epoch": 6.27, "learning_rate": 4.6865598369766584e-05, "loss": 2.3467, "step": 2166500 }, { "epoch": 6.27, "learning_rate": 4.6864874722119306e-05, "loss": 2.3693, "step": 2167000 }, { "epoch": 6.27, "learning_rate": 4.686415107447203e-05, "loss": 2.3826, "step": 2167500 }, { "epoch": 6.28, "learning_rate": 4.686342742682475e-05, "loss": 2.3718, "step": 2168000 }, { "epoch": 6.28, "learning_rate": 4.686270377917747e-05, "loss": 2.3606, "step": 2168500 }, { "epoch": 6.28, "learning_rate": 4.6861980131530195e-05, "loss": 2.3672, "step": 2169000 }, { "epoch": 6.28, "learning_rate": 4.6861256483882924e-05, "loss": 2.3834, "step": 2169500 }, { "epoch": 6.28, "learning_rate": 4.6860532836235646e-05, "loss": 2.3701, "step": 2170000 }, { "epoch": 6.28, "learning_rate": 4.685980918858837e-05, "loss": 2.3453, "step": 2170500 }, { "epoch": 6.28, "learning_rate": 4.6859086988236384e-05, "loss": 2.3805, "step": 2171000 }, { "epoch": 6.29, "learning_rate": 4.68583647878844e-05, "loss": 2.3817, "step": 2171500 }, { "epoch": 6.29, "learning_rate": 4.685764114023713e-05, "loss": 2.3477, "step": 2172000 }, { "epoch": 6.29, "learning_rate": 4.685691749258985e-05, "loss": 2.3609, "step": 2172500 }, { "epoch": 6.29, "learning_rate": 4.685619384494257e-05, "loss": 2.3642, "step": 2173000 }, { "epoch": 6.29, "learning_rate": 4.6855470197295295e-05, "loss": 2.3416, "step": 2173500 }, { "epoch": 6.29, "learning_rate": 4.6854746549648024e-05, "loss": 2.3709, "step": 2174000 }, { "epoch": 6.29, "learning_rate": 4.6854022902000746e-05, "loss": 2.3672, "step": 2174500 }, { "epoch": 6.3, "learning_rate": 4.685329925435347e-05, "loss": 2.3981, "step": 2175000 }, { "epoch": 6.3, "learning_rate": 4.6852577054001484e-05, "loss": 2.363, "step": 2175500 }, { "epoch": 6.3, "learning_rate": 4.6851853406354206e-05, "loss": 2.3728, "step": 2176000 }, { "epoch": 6.3, "learning_rate": 4.685112975870693e-05, "loss": 2.3595, "step": 2176500 }, { "epoch": 6.3, "learning_rate": 4.685040611105965e-05, "loss": 2.3594, "step": 2177000 }, { "epoch": 6.3, "learning_rate": 4.684968246341237e-05, "loss": 2.3706, "step": 2177500 }, { "epoch": 6.3, "learning_rate": 4.6848960263060395e-05, "loss": 2.3742, "step": 2178000 }, { "epoch": 6.31, "learning_rate": 4.684823661541312e-05, "loss": 2.3752, "step": 2178500 }, { "epoch": 6.31, "learning_rate": 4.684751296776585e-05, "loss": 2.3609, "step": 2179000 }, { "epoch": 6.31, "learning_rate": 4.684678932011857e-05, "loss": 2.3696, "step": 2179500 }, { "epoch": 6.31, "learning_rate": 4.684606567247129e-05, "loss": 2.3516, "step": 2180000 }, { "epoch": 6.31, "learning_rate": 4.684534202482401e-05, "loss": 2.3724, "step": 2180500 }, { "epoch": 6.31, "learning_rate": 4.684461982447203e-05, "loss": 2.368, "step": 2181000 }, { "epoch": 6.31, "learning_rate": 4.684389617682475e-05, "loss": 2.3759, "step": 2181500 }, { "epoch": 6.32, "learning_rate": 4.684317252917747e-05, "loss": 2.3627, "step": 2182000 }, { "epoch": 6.32, "learning_rate": 4.6842448881530196e-05, "loss": 2.3484, "step": 2182500 }, { "epoch": 6.32, "learning_rate": 4.6841725233882925e-05, "loss": 2.3777, "step": 2183000 }, { "epoch": 6.32, "learning_rate": 4.684100158623565e-05, "loss": 2.3716, "step": 2183500 }, { "epoch": 6.32, "learning_rate": 4.684027938588366e-05, "loss": 2.3728, "step": 2184000 }, { "epoch": 6.32, "learning_rate": 4.6839555738236385e-05, "loss": 2.3774, "step": 2184500 }, { "epoch": 6.32, "learning_rate": 4.683883209058911e-05, "loss": 2.3743, "step": 2185000 }, { "epoch": 6.33, "learning_rate": 4.683810844294183e-05, "loss": 2.3813, "step": 2185500 }, { "epoch": 6.33, "learning_rate": 4.6837386242589845e-05, "loss": 2.3933, "step": 2186000 }, { "epoch": 6.33, "learning_rate": 4.6836662594942574e-05, "loss": 2.3717, "step": 2186500 }, { "epoch": 6.33, "learning_rate": 4.6835938947295296e-05, "loss": 2.3362, "step": 2187000 }, { "epoch": 6.33, "learning_rate": 4.6835215299648025e-05, "loss": 2.3886, "step": 2187500 }, { "epoch": 6.33, "learning_rate": 4.683449165200075e-05, "loss": 2.3769, "step": 2188000 }, { "epoch": 6.33, "learning_rate": 4.683376800435347e-05, "loss": 2.363, "step": 2188500 }, { "epoch": 6.34, "learning_rate": 4.683304435670619e-05, "loss": 2.3578, "step": 2189000 }, { "epoch": 6.34, "learning_rate": 4.6832320709058914e-05, "loss": 2.3573, "step": 2189500 }, { "epoch": 6.34, "learning_rate": 4.6831597061411636e-05, "loss": 2.363, "step": 2190000 }, { "epoch": 6.34, "learning_rate": 4.683087341376436e-05, "loss": 2.3692, "step": 2190500 }, { "epoch": 6.34, "learning_rate": 4.683014976611708e-05, "loss": 2.3606, "step": 2191000 }, { "epoch": 6.34, "learning_rate": 4.68294261184698e-05, "loss": 2.3495, "step": 2191500 }, { "epoch": 6.34, "learning_rate": 4.6828703918117825e-05, "loss": 2.3556, "step": 2192000 }, { "epoch": 6.35, "learning_rate": 4.682798027047055e-05, "loss": 2.3772, "step": 2192500 }, { "epoch": 6.35, "learning_rate": 4.682725662282327e-05, "loss": 2.3979, "step": 2193000 }, { "epoch": 6.35, "learning_rate": 4.682653297517599e-05, "loss": 2.3626, "step": 2193500 }, { "epoch": 6.35, "learning_rate": 4.6825810774824014e-05, "loss": 2.3607, "step": 2194000 }, { "epoch": 6.35, "learning_rate": 4.6825087127176736e-05, "loss": 2.3665, "step": 2194500 }, { "epoch": 6.35, "learning_rate": 4.682436492682475e-05, "loss": 2.3769, "step": 2195000 }, { "epoch": 6.36, "learning_rate": 4.682364417376807e-05, "loss": 2.3621, "step": 2195500 }, { "epoch": 6.36, "learning_rate": 4.682292052612079e-05, "loss": 2.3919, "step": 2196000 }, { "epoch": 6.36, "learning_rate": 4.682219687847351e-05, "loss": 2.3559, "step": 2196500 }, { "epoch": 6.36, "learning_rate": 4.6821473230826234e-05, "loss": 2.3766, "step": 2197000 }, { "epoch": 6.36, "learning_rate": 4.6820749583178957e-05, "loss": 2.3619, "step": 2197500 }, { "epoch": 6.36, "learning_rate": 4.682002593553168e-05, "loss": 2.3717, "step": 2198000 }, { "epoch": 6.36, "learning_rate": 4.68193022878844e-05, "loss": 2.3744, "step": 2198500 }, { "epoch": 6.37, "learning_rate": 4.681857864023712e-05, "loss": 2.3556, "step": 2199000 }, { "epoch": 6.37, "learning_rate": 4.681785499258985e-05, "loss": 2.3724, "step": 2199500 }, { "epoch": 6.37, "learning_rate": 4.6817131344942574e-05, "loss": 2.3997, "step": 2200000 }, { "epoch": 6.37, "learning_rate": 4.68164076972953e-05, "loss": 2.3644, "step": 2200500 }, { "epoch": 6.37, "learning_rate": 4.681568404964802e-05, "loss": 2.3654, "step": 2201000 }, { "epoch": 6.37, "learning_rate": 4.681496040200075e-05, "loss": 2.3407, "step": 2201500 }, { "epoch": 6.37, "learning_rate": 4.681423675435347e-05, "loss": 2.3706, "step": 2202000 }, { "epoch": 6.38, "learning_rate": 4.6813514554001486e-05, "loss": 2.3513, "step": 2202500 }, { "epoch": 6.38, "learning_rate": 4.681279090635421e-05, "loss": 2.3764, "step": 2203000 }, { "epoch": 6.38, "learning_rate": 4.681206725870693e-05, "loss": 2.3667, "step": 2203500 }, { "epoch": 6.38, "learning_rate": 4.681134505835495e-05, "loss": 2.3659, "step": 2204000 }, { "epoch": 6.38, "learning_rate": 4.6810621410707675e-05, "loss": 2.3518, "step": 2204500 }, { "epoch": 6.38, "learning_rate": 4.68098977630604e-05, "loss": 2.361, "step": 2205000 }, { "epoch": 6.38, "learning_rate": 4.680917411541312e-05, "loss": 2.3567, "step": 2205500 }, { "epoch": 6.39, "learning_rate": 4.680845046776584e-05, "loss": 2.3524, "step": 2206000 }, { "epoch": 6.39, "learning_rate": 4.6807726820118564e-05, "loss": 2.3682, "step": 2206500 }, { "epoch": 6.39, "learning_rate": 4.6807003172471286e-05, "loss": 2.3725, "step": 2207000 }, { "epoch": 6.39, "learning_rate": 4.680627952482401e-05, "loss": 2.3803, "step": 2207500 }, { "epoch": 6.39, "learning_rate": 4.680555587717673e-05, "loss": 2.3696, "step": 2208000 }, { "epoch": 6.39, "learning_rate": 4.680483222952945e-05, "loss": 2.3454, "step": 2208500 }, { "epoch": 6.39, "learning_rate": 4.6804110029177475e-05, "loss": 2.3648, "step": 2209000 }, { "epoch": 6.4, "learning_rate": 4.6803386381530204e-05, "loss": 2.3479, "step": 2209500 }, { "epoch": 6.4, "learning_rate": 4.6802662733882926e-05, "loss": 2.3729, "step": 2210000 }, { "epoch": 6.4, "learning_rate": 4.680193908623565e-05, "loss": 2.3662, "step": 2210500 }, { "epoch": 6.4, "learning_rate": 4.680121543858837e-05, "loss": 2.3798, "step": 2211000 }, { "epoch": 6.4, "learning_rate": 4.680049179094109e-05, "loss": 2.3545, "step": 2211500 }, { "epoch": 6.4, "learning_rate": 4.6799768143293815e-05, "loss": 2.3814, "step": 2212000 }, { "epoch": 6.4, "learning_rate": 4.679904449564654e-05, "loss": 2.3774, "step": 2212500 }, { "epoch": 6.41, "learning_rate": 4.679832084799926e-05, "loss": 2.3773, "step": 2213000 }, { "epoch": 6.41, "learning_rate": 4.679759720035198e-05, "loss": 2.3539, "step": 2213500 }, { "epoch": 6.41, "learning_rate": 4.6796873552704704e-05, "loss": 2.3646, "step": 2214000 }, { "epoch": 6.41, "learning_rate": 4.6796149905057426e-05, "loss": 2.3858, "step": 2214500 }, { "epoch": 6.41, "learning_rate": 4.679542770470545e-05, "loss": 2.3749, "step": 2215000 }, { "epoch": 6.41, "learning_rate": 4.6794705504353464e-05, "loss": 2.3559, "step": 2215500 }, { "epoch": 6.41, "learning_rate": 4.6793981856706186e-05, "loss": 2.3766, "step": 2216000 }, { "epoch": 6.42, "learning_rate": 4.6793258209058915e-05, "loss": 2.3712, "step": 2216500 }, { "epoch": 6.42, "learning_rate": 4.679253600870693e-05, "loss": 2.3731, "step": 2217000 }, { "epoch": 6.42, "learning_rate": 4.679181236105965e-05, "loss": 2.3631, "step": 2217500 }, { "epoch": 6.42, "learning_rate": 4.6791088713412375e-05, "loss": 2.3669, "step": 2218000 }, { "epoch": 6.42, "learning_rate": 4.6790365065765104e-05, "loss": 2.3862, "step": 2218500 }, { "epoch": 6.42, "learning_rate": 4.678964286541312e-05, "loss": 2.3633, "step": 2219000 }, { "epoch": 6.42, "learning_rate": 4.678891921776584e-05, "loss": 2.3704, "step": 2219500 }, { "epoch": 6.43, "learning_rate": 4.6788195570118564e-05, "loss": 2.3771, "step": 2220000 }, { "epoch": 6.43, "learning_rate": 4.678747192247129e-05, "loss": 2.3643, "step": 2220500 }, { "epoch": 6.43, "learning_rate": 4.678674827482401e-05, "loss": 2.3565, "step": 2221000 }, { "epoch": 6.43, "learning_rate": 4.678602462717673e-05, "loss": 2.365, "step": 2221500 }, { "epoch": 6.43, "learning_rate": 4.6785300979529453e-05, "loss": 2.3808, "step": 2222000 }, { "epoch": 6.43, "learning_rate": 4.6784577331882176e-05, "loss": 2.3632, "step": 2222500 }, { "epoch": 6.43, "learning_rate": 4.6783853684234905e-05, "loss": 2.3745, "step": 2223000 }, { "epoch": 6.44, "learning_rate": 4.678313003658763e-05, "loss": 2.3782, "step": 2223500 }, { "epoch": 6.44, "learning_rate": 4.6782406388940356e-05, "loss": 2.3597, "step": 2224000 }, { "epoch": 6.44, "learning_rate": 4.678168274129308e-05, "loss": 2.382, "step": 2224500 }, { "epoch": 6.44, "learning_rate": 4.67809590936458e-05, "loss": 2.3748, "step": 2225000 }, { "epoch": 6.44, "learning_rate": 4.678023544599852e-05, "loss": 2.3384, "step": 2225500 }, { "epoch": 6.44, "learning_rate": 4.6779511798351245e-05, "loss": 2.361, "step": 2226000 }, { "epoch": 6.44, "learning_rate": 4.677878959799926e-05, "loss": 2.3572, "step": 2226500 }, { "epoch": 6.45, "learning_rate": 4.677806595035198e-05, "loss": 2.3402, "step": 2227000 }, { "epoch": 6.45, "learning_rate": 4.6777342302704705e-05, "loss": 2.3631, "step": 2227500 }, { "epoch": 6.45, "learning_rate": 4.677661865505743e-05, "loss": 2.3629, "step": 2228000 }, { "epoch": 6.45, "learning_rate": 4.6775895007410156e-05, "loss": 2.3656, "step": 2228500 }, { "epoch": 6.45, "learning_rate": 4.677517135976288e-05, "loss": 2.3748, "step": 2229000 }, { "epoch": 6.45, "learning_rate": 4.6774449159410894e-05, "loss": 2.3541, "step": 2229500 }, { "epoch": 6.45, "learning_rate": 4.6773725511763616e-05, "loss": 2.3596, "step": 2230000 }, { "epoch": 6.46, "learning_rate": 4.677300186411634e-05, "loss": 2.3516, "step": 2230500 }, { "epoch": 6.46, "learning_rate": 4.6772279663764354e-05, "loss": 2.3757, "step": 2231000 }, { "epoch": 6.46, "learning_rate": 4.677155601611708e-05, "loss": 2.3759, "step": 2231500 }, { "epoch": 6.46, "learning_rate": 4.6770832368469805e-05, "loss": 2.3601, "step": 2232000 }, { "epoch": 6.46, "learning_rate": 4.6770108720822534e-05, "loss": 2.394, "step": 2232500 }, { "epoch": 6.46, "learning_rate": 4.6769385073175256e-05, "loss": 2.3873, "step": 2233000 }, { "epoch": 6.47, "learning_rate": 4.676866287282327e-05, "loss": 2.3627, "step": 2233500 }, { "epoch": 6.47, "learning_rate": 4.6767939225175994e-05, "loss": 2.3596, "step": 2234000 }, { "epoch": 6.47, "learning_rate": 4.6767215577528716e-05, "loss": 2.3651, "step": 2234500 }, { "epoch": 6.47, "learning_rate": 4.676649192988144e-05, "loss": 2.3707, "step": 2235000 }, { "epoch": 6.47, "learning_rate": 4.676576828223416e-05, "loss": 2.3775, "step": 2235500 }, { "epoch": 6.47, "learning_rate": 4.676504463458688e-05, "loss": 2.3654, "step": 2236000 }, { "epoch": 6.47, "learning_rate": 4.6764320986939605e-05, "loss": 2.3503, "step": 2236500 }, { "epoch": 6.48, "learning_rate": 4.676359878658763e-05, "loss": 2.3315, "step": 2237000 }, { "epoch": 6.48, "learning_rate": 4.676287513894035e-05, "loss": 2.3735, "step": 2237500 }, { "epoch": 6.48, "learning_rate": 4.676215149129307e-05, "loss": 2.3999, "step": 2238000 }, { "epoch": 6.48, "learning_rate": 4.6761427843645794e-05, "loss": 2.3536, "step": 2238500 }, { "epoch": 6.48, "learning_rate": 4.6760704195998523e-05, "loss": 2.3719, "step": 2239000 }, { "epoch": 6.48, "learning_rate": 4.6759980548351246e-05, "loss": 2.3727, "step": 2239500 }, { "epoch": 6.48, "learning_rate": 4.675925690070397e-05, "loss": 2.3668, "step": 2240000 }, { "epoch": 6.49, "learning_rate": 4.675853325305669e-05, "loss": 2.3679, "step": 2240500 }, { "epoch": 6.49, "learning_rate": 4.6757811052704706e-05, "loss": 2.3649, "step": 2241000 }, { "epoch": 6.49, "learning_rate": 4.6757087405057435e-05, "loss": 2.3763, "step": 2241500 }, { "epoch": 6.49, "learning_rate": 4.675636520470545e-05, "loss": 2.3668, "step": 2242000 }, { "epoch": 6.49, "learning_rate": 4.675564155705817e-05, "loss": 2.3512, "step": 2242500 }, { "epoch": 6.49, "learning_rate": 4.6754917909410895e-05, "loss": 2.3881, "step": 2243000 }, { "epoch": 6.49, "learning_rate": 4.675419426176362e-05, "loss": 2.391, "step": 2243500 }, { "epoch": 6.5, "learning_rate": 4.675347061411634e-05, "loss": 2.3642, "step": 2244000 }, { "epoch": 6.5, "learning_rate": 4.6752748413764355e-05, "loss": 2.3765, "step": 2244500 }, { "epoch": 6.5, "learning_rate": 4.6752024766117084e-05, "loss": 2.3701, "step": 2245000 }, { "epoch": 6.5, "learning_rate": 4.6751301118469806e-05, "loss": 2.3619, "step": 2245500 }, { "epoch": 6.5, "learning_rate": 4.675057747082253e-05, "loss": 2.3619, "step": 2246000 }, { "epoch": 6.5, "learning_rate": 4.674985382317526e-05, "loss": 2.3714, "step": 2246500 }, { "epoch": 6.5, "learning_rate": 4.674913162282327e-05, "loss": 2.3809, "step": 2247000 }, { "epoch": 6.51, "learning_rate": 4.6748407975175995e-05, "loss": 2.3708, "step": 2247500 }, { "epoch": 6.51, "learning_rate": 4.674768432752872e-05, "loss": 2.3507, "step": 2248000 }, { "epoch": 6.51, "learning_rate": 4.674696067988144e-05, "loss": 2.3976, "step": 2248500 }, { "epoch": 6.51, "learning_rate": 4.6746238479529455e-05, "loss": 2.3717, "step": 2249000 }, { "epoch": 6.51, "learning_rate": 4.6745514831882184e-05, "loss": 2.3705, "step": 2249500 }, { "epoch": 6.51, "learning_rate": 4.6744791184234906e-05, "loss": 2.3657, "step": 2250000 }, { "epoch": 6.51, "learning_rate": 4.674406898388292e-05, "loss": 2.3953, "step": 2250500 }, { "epoch": 6.52, "learning_rate": 4.6743345336235644e-05, "loss": 2.3804, "step": 2251000 }, { "epoch": 6.52, "learning_rate": 4.6742621688588366e-05, "loss": 2.3735, "step": 2251500 }, { "epoch": 6.52, "learning_rate": 4.674189804094109e-05, "loss": 2.3932, "step": 2252000 }, { "epoch": 6.52, "learning_rate": 4.674117439329381e-05, "loss": 2.3694, "step": 2252500 }, { "epoch": 6.52, "learning_rate": 4.674045074564653e-05, "loss": 2.3733, "step": 2253000 }, { "epoch": 6.52, "learning_rate": 4.6739727097999255e-05, "loss": 2.3707, "step": 2253500 }, { "epoch": 6.52, "learning_rate": 4.6739003450351984e-05, "loss": 2.3696, "step": 2254000 }, { "epoch": 6.53, "learning_rate": 4.6738279802704706e-05, "loss": 2.3504, "step": 2254500 }, { "epoch": 6.53, "learning_rate": 4.6737556155057435e-05, "loss": 2.38, "step": 2255000 }, { "epoch": 6.53, "learning_rate": 4.673683250741016e-05, "loss": 2.3562, "step": 2255500 }, { "epoch": 6.53, "learning_rate": 4.673611030705817e-05, "loss": 2.3642, "step": 2256000 }, { "epoch": 6.53, "learning_rate": 4.6735386659410895e-05, "loss": 2.3779, "step": 2256500 }, { "epoch": 6.53, "learning_rate": 4.673466301176362e-05, "loss": 2.3865, "step": 2257000 }, { "epoch": 6.53, "learning_rate": 4.673393936411634e-05, "loss": 2.3619, "step": 2257500 }, { "epoch": 6.54, "learning_rate": 4.673321571646906e-05, "loss": 2.3782, "step": 2258000 }, { "epoch": 6.54, "learning_rate": 4.6732492068821784e-05, "loss": 2.3864, "step": 2258500 }, { "epoch": 6.54, "learning_rate": 4.673176842117451e-05, "loss": 2.3639, "step": 2259000 }, { "epoch": 6.54, "learning_rate": 4.6731044773527236e-05, "loss": 2.3859, "step": 2259500 }, { "epoch": 6.54, "learning_rate": 4.673032112587996e-05, "loss": 2.3592, "step": 2260000 }, { "epoch": 6.54, "learning_rate": 4.672959747823268e-05, "loss": 2.3578, "step": 2260500 }, { "epoch": 6.54, "learning_rate": 4.67288752778807e-05, "loss": 2.3599, "step": 2261000 }, { "epoch": 6.55, "learning_rate": 4.6728151630233425e-05, "loss": 2.3463, "step": 2261500 }, { "epoch": 6.55, "learning_rate": 4.672742798258615e-05, "loss": 2.3663, "step": 2262000 }, { "epoch": 6.55, "learning_rate": 4.672670433493887e-05, "loss": 2.3953, "step": 2262500 }, { "epoch": 6.55, "learning_rate": 4.6725982134586885e-05, "loss": 2.3726, "step": 2263000 }, { "epoch": 6.55, "learning_rate": 4.6725258486939614e-05, "loss": 2.3686, "step": 2263500 }, { "epoch": 6.55, "learning_rate": 4.6724534839292336e-05, "loss": 2.3812, "step": 2264000 }, { "epoch": 6.55, "learning_rate": 4.672381119164506e-05, "loss": 2.3718, "step": 2264500 }, { "epoch": 6.56, "learning_rate": 4.672308754399778e-05, "loss": 2.3917, "step": 2265000 }, { "epoch": 6.56, "learning_rate": 4.67223638963505e-05, "loss": 2.3881, "step": 2265500 }, { "epoch": 6.56, "learning_rate": 4.6721640248703225e-05, "loss": 2.3491, "step": 2266000 }, { "epoch": 6.56, "learning_rate": 4.672091804835124e-05, "loss": 2.3554, "step": 2266500 }, { "epoch": 6.56, "learning_rate": 4.672019584799926e-05, "loss": 2.3549, "step": 2267000 }, { "epoch": 6.56, "learning_rate": 4.6719472200351985e-05, "loss": 2.3682, "step": 2267500 }, { "epoch": 6.56, "learning_rate": 4.671874855270471e-05, "loss": 2.367, "step": 2268000 }, { "epoch": 6.57, "learning_rate": 4.6718024905057436e-05, "loss": 2.3758, "step": 2268500 }, { "epoch": 6.57, "learning_rate": 4.671730125741016e-05, "loss": 2.3955, "step": 2269000 }, { "epoch": 6.57, "learning_rate": 4.671657760976288e-05, "loss": 2.3834, "step": 2269500 }, { "epoch": 6.57, "learning_rate": 4.67158539621156e-05, "loss": 2.3969, "step": 2270000 }, { "epoch": 6.57, "learning_rate": 4.6715130314468325e-05, "loss": 2.3823, "step": 2270500 }, { "epoch": 6.57, "learning_rate": 4.671440666682105e-05, "loss": 2.3642, "step": 2271000 }, { "epoch": 6.58, "learning_rate": 4.671368301917377e-05, "loss": 2.3694, "step": 2271500 }, { "epoch": 6.58, "learning_rate": 4.671295937152649e-05, "loss": 2.3843, "step": 2272000 }, { "epoch": 6.58, "learning_rate": 4.6712237171174514e-05, "loss": 2.39, "step": 2272500 }, { "epoch": 6.58, "learning_rate": 4.6711513523527236e-05, "loss": 2.3651, "step": 2273000 }, { "epoch": 6.58, "learning_rate": 4.671079132317525e-05, "loss": 2.371, "step": 2273500 }, { "epoch": 6.58, "learning_rate": 4.6710067675527974e-05, "loss": 2.363, "step": 2274000 }, { "epoch": 6.58, "learning_rate": 4.670934547517599e-05, "loss": 2.352, "step": 2274500 }, { "epoch": 6.59, "learning_rate": 4.670862182752871e-05, "loss": 2.3782, "step": 2275000 }, { "epoch": 6.59, "learning_rate": 4.6707898179881434e-05, "loss": 2.3726, "step": 2275500 }, { "epoch": 6.59, "learning_rate": 4.670717453223416e-05, "loss": 2.3397, "step": 2276000 }, { "epoch": 6.59, "learning_rate": 4.6706450884586886e-05, "loss": 2.3566, "step": 2276500 }, { "epoch": 6.59, "learning_rate": 4.6705727236939615e-05, "loss": 2.3604, "step": 2277000 }, { "epoch": 6.59, "learning_rate": 4.670500358929234e-05, "loss": 2.4109, "step": 2277500 }, { "epoch": 6.59, "learning_rate": 4.670427994164506e-05, "loss": 2.3546, "step": 2278000 }, { "epoch": 6.6, "learning_rate": 4.670355629399778e-05, "loss": 2.3622, "step": 2278500 }, { "epoch": 6.6, "learning_rate": 4.6702832646350503e-05, "loss": 2.3509, "step": 2279000 }, { "epoch": 6.6, "learning_rate": 4.6702108998703226e-05, "loss": 2.3576, "step": 2279500 }, { "epoch": 6.6, "learning_rate": 4.670138535105595e-05, "loss": 2.3658, "step": 2280000 }, { "epoch": 6.6, "learning_rate": 4.6700663150703963e-05, "loss": 2.3563, "step": 2280500 }, { "epoch": 6.6, "learning_rate": 4.6699939503056686e-05, "loss": 2.3601, "step": 2281000 }, { "epoch": 6.6, "learning_rate": 4.6699215855409415e-05, "loss": 2.3869, "step": 2281500 }, { "epoch": 6.61, "learning_rate": 4.669849220776214e-05, "loss": 2.3667, "step": 2282000 }, { "epoch": 6.61, "learning_rate": 4.669776856011486e-05, "loss": 2.343, "step": 2282500 }, { "epoch": 6.61, "learning_rate": 4.669704491246758e-05, "loss": 2.3715, "step": 2283000 }, { "epoch": 6.61, "learning_rate": 4.669632126482031e-05, "loss": 2.3514, "step": 2283500 }, { "epoch": 6.61, "learning_rate": 4.6695599064468326e-05, "loss": 2.3638, "step": 2284000 }, { "epoch": 6.61, "learning_rate": 4.669487541682105e-05, "loss": 2.3665, "step": 2284500 }, { "epoch": 6.61, "learning_rate": 4.669415176917377e-05, "loss": 2.3683, "step": 2285000 }, { "epoch": 6.62, "learning_rate": 4.669342812152649e-05, "loss": 2.3897, "step": 2285500 }, { "epoch": 6.62, "learning_rate": 4.6692704473879215e-05, "loss": 2.359, "step": 2286000 }, { "epoch": 6.62, "learning_rate": 4.669198082623194e-05, "loss": 2.3752, "step": 2286500 }, { "epoch": 6.62, "learning_rate": 4.6691257178584666e-05, "loss": 2.3915, "step": 2287000 }, { "epoch": 6.62, "learning_rate": 4.669053353093739e-05, "loss": 2.3644, "step": 2287500 }, { "epoch": 6.62, "learning_rate": 4.668980988329011e-05, "loss": 2.3657, "step": 2288000 }, { "epoch": 6.62, "learning_rate": 4.6689087682938126e-05, "loss": 2.3596, "step": 2288500 }, { "epoch": 6.63, "learning_rate": 4.668836548258614e-05, "loss": 2.3579, "step": 2289000 }, { "epoch": 6.63, "learning_rate": 4.6687641834938864e-05, "loss": 2.3835, "step": 2289500 }, { "epoch": 6.63, "learning_rate": 4.6686919634586886e-05, "loss": 2.3672, "step": 2290000 }, { "epoch": 6.63, "learning_rate": 4.668619598693961e-05, "loss": 2.3889, "step": 2290500 }, { "epoch": 6.63, "learning_rate": 4.668547233929234e-05, "loss": 2.3416, "step": 2291000 }, { "epoch": 6.63, "learning_rate": 4.668474869164506e-05, "loss": 2.3738, "step": 2291500 }, { "epoch": 6.63, "learning_rate": 4.668402504399778e-05, "loss": 2.3787, "step": 2292000 }, { "epoch": 6.64, "learning_rate": 4.6683301396350504e-05, "loss": 2.3483, "step": 2292500 }, { "epoch": 6.64, "learning_rate": 4.6682577748703226e-05, "loss": 2.3624, "step": 2293000 }, { "epoch": 6.64, "learning_rate": 4.668185410105595e-05, "loss": 2.3656, "step": 2293500 }, { "epoch": 6.64, "learning_rate": 4.668113045340867e-05, "loss": 2.3838, "step": 2294000 }, { "epoch": 6.64, "learning_rate": 4.668040680576139e-05, "loss": 2.3809, "step": 2294500 }, { "epoch": 6.64, "learning_rate": 4.6679684605409416e-05, "loss": 2.3554, "step": 2295000 }, { "epoch": 6.64, "learning_rate": 4.667896095776214e-05, "loss": 2.3481, "step": 2295500 }, { "epoch": 6.65, "learning_rate": 4.667823731011486e-05, "loss": 2.3685, "step": 2296000 }, { "epoch": 6.65, "learning_rate": 4.667751366246758e-05, "loss": 2.3933, "step": 2296500 }, { "epoch": 6.65, "learning_rate": 4.66767914621156e-05, "loss": 2.3808, "step": 2297000 }, { "epoch": 6.65, "learning_rate": 4.667606781446832e-05, "loss": 2.3687, "step": 2297500 }, { "epoch": 6.65, "learning_rate": 4.667534416682104e-05, "loss": 2.3733, "step": 2298000 }, { "epoch": 6.65, "learning_rate": 4.667462051917377e-05, "loss": 2.4043, "step": 2298500 }, { "epoch": 6.65, "learning_rate": 4.6673896871526493e-05, "loss": 2.3438, "step": 2299000 }, { "epoch": 6.66, "learning_rate": 4.6673173223879216e-05, "loss": 2.3763, "step": 2299500 }, { "epoch": 6.66, "learning_rate": 4.6672449576231945e-05, "loss": 2.389, "step": 2300000 }, { "epoch": 6.66, "learning_rate": 4.667172592858467e-05, "loss": 2.371, "step": 2300500 }, { "epoch": 6.66, "learning_rate": 4.667100372823268e-05, "loss": 2.3942, "step": 2301000 }, { "epoch": 6.66, "learning_rate": 4.6670280080585405e-05, "loss": 2.3582, "step": 2301500 }, { "epoch": 6.66, "learning_rate": 4.666955643293813e-05, "loss": 2.3644, "step": 2302000 }, { "epoch": 6.66, "learning_rate": 4.666883278529085e-05, "loss": 2.3689, "step": 2302500 }, { "epoch": 6.67, "learning_rate": 4.666810913764357e-05, "loss": 2.3848, "step": 2303000 }, { "epoch": 6.67, "learning_rate": 4.6667386937291594e-05, "loss": 2.377, "step": 2303500 }, { "epoch": 6.67, "learning_rate": 4.6666663289644316e-05, "loss": 2.3887, "step": 2304000 }, { "epoch": 6.67, "learning_rate": 4.666593964199704e-05, "loss": 2.3615, "step": 2304500 }, { "epoch": 6.67, "learning_rate": 4.666521599434976e-05, "loss": 2.3709, "step": 2305000 }, { "epoch": 6.67, "learning_rate": 4.666449234670248e-05, "loss": 2.3548, "step": 2305500 }, { "epoch": 6.67, "learning_rate": 4.6663770146350505e-05, "loss": 2.3632, "step": 2306000 }, { "epoch": 6.68, "learning_rate": 4.666304649870323e-05, "loss": 2.3634, "step": 2306500 }, { "epoch": 6.68, "learning_rate": 4.666232285105595e-05, "loss": 2.3594, "step": 2307000 }, { "epoch": 6.68, "learning_rate": 4.666159920340867e-05, "loss": 2.3622, "step": 2307500 }, { "epoch": 6.68, "learning_rate": 4.6660875555761394e-05, "loss": 2.3867, "step": 2308000 }, { "epoch": 6.68, "learning_rate": 4.6660151908114116e-05, "loss": 2.3873, "step": 2308500 }, { "epoch": 6.68, "learning_rate": 4.6659428260466845e-05, "loss": 2.3545, "step": 2309000 }, { "epoch": 6.69, "learning_rate": 4.665870461281957e-05, "loss": 2.3582, "step": 2309500 }, { "epoch": 6.69, "learning_rate": 4.665798096517229e-05, "loss": 2.4046, "step": 2310000 }, { "epoch": 6.69, "learning_rate": 4.6657258764820305e-05, "loss": 2.3662, "step": 2310500 }, { "epoch": 6.69, "learning_rate": 4.665653656446832e-05, "loss": 2.3817, "step": 2311000 }, { "epoch": 6.69, "learning_rate": 4.665581291682104e-05, "loss": 2.3921, "step": 2311500 }, { "epoch": 6.69, "learning_rate": 4.6655090716469065e-05, "loss": 2.3638, "step": 2312000 }, { "epoch": 6.69, "learning_rate": 4.665436706882179e-05, "loss": 2.3614, "step": 2312500 }, { "epoch": 6.7, "learning_rate": 4.665364342117451e-05, "loss": 2.3463, "step": 2313000 }, { "epoch": 6.7, "learning_rate": 4.665291977352724e-05, "loss": 2.3365, "step": 2313500 }, { "epoch": 6.7, "learning_rate": 4.665219612587996e-05, "loss": 2.3892, "step": 2314000 }, { "epoch": 6.7, "learning_rate": 4.665147247823268e-05, "loss": 2.3873, "step": 2314500 }, { "epoch": 6.7, "learning_rate": 4.6650748830585406e-05, "loss": 2.3623, "step": 2315000 }, { "epoch": 6.7, "learning_rate": 4.665002518293813e-05, "loss": 2.3495, "step": 2315500 }, { "epoch": 6.7, "learning_rate": 4.664930153529085e-05, "loss": 2.3978, "step": 2316000 }, { "epoch": 6.71, "learning_rate": 4.664857788764357e-05, "loss": 2.3567, "step": 2316500 }, { "epoch": 6.71, "learning_rate": 4.6647854239996294e-05, "loss": 2.3791, "step": 2317000 }, { "epoch": 6.71, "learning_rate": 4.664713203964432e-05, "loss": 2.3733, "step": 2317500 }, { "epoch": 6.71, "learning_rate": 4.664640839199704e-05, "loss": 2.3451, "step": 2318000 }, { "epoch": 6.71, "learning_rate": 4.664568474434976e-05, "loss": 2.3656, "step": 2318500 }, { "epoch": 6.71, "learning_rate": 4.6644961096702484e-05, "loss": 2.3623, "step": 2319000 }, { "epoch": 6.71, "learning_rate": 4.66442388963505e-05, "loss": 2.3635, "step": 2319500 }, { "epoch": 6.72, "learning_rate": 4.664351524870322e-05, "loss": 2.3734, "step": 2320000 }, { "epoch": 6.72, "learning_rate": 4.6642793048351244e-05, "loss": 2.339, "step": 2320500 }, { "epoch": 6.72, "learning_rate": 4.664206940070397e-05, "loss": 2.3782, "step": 2321000 }, { "epoch": 6.72, "learning_rate": 4.6641345753056695e-05, "loss": 2.3568, "step": 2321500 }, { "epoch": 6.72, "learning_rate": 4.664062210540942e-05, "loss": 2.3476, "step": 2322000 }, { "epoch": 6.72, "learning_rate": 4.663989845776214e-05, "loss": 2.3475, "step": 2322500 }, { "epoch": 6.72, "learning_rate": 4.663917481011486e-05, "loss": 2.3831, "step": 2323000 }, { "epoch": 6.73, "learning_rate": 4.6638451162467584e-05, "loss": 2.3697, "step": 2323500 }, { "epoch": 6.73, "learning_rate": 4.6637727514820306e-05, "loss": 2.3661, "step": 2324000 }, { "epoch": 6.73, "learning_rate": 4.663700386717303e-05, "loss": 2.363, "step": 2324500 }, { "epoch": 6.73, "learning_rate": 4.663628021952575e-05, "loss": 2.3375, "step": 2325000 }, { "epoch": 6.73, "learning_rate": 4.663555657187847e-05, "loss": 2.3475, "step": 2325500 }, { "epoch": 6.73, "learning_rate": 4.6634834371526495e-05, "loss": 2.3933, "step": 2326000 }, { "epoch": 6.73, "learning_rate": 4.663411072387922e-05, "loss": 2.3695, "step": 2326500 }, { "epoch": 6.74, "learning_rate": 4.663338707623194e-05, "loss": 2.3463, "step": 2327000 }, { "epoch": 6.74, "learning_rate": 4.663266342858466e-05, "loss": 2.374, "step": 2327500 }, { "epoch": 6.74, "learning_rate": 4.6631939780937384e-05, "loss": 2.3619, "step": 2328000 }, { "epoch": 6.74, "learning_rate": 4.663121613329011e-05, "loss": 2.3693, "step": 2328500 }, { "epoch": 6.74, "learning_rate": 4.6630492485642835e-05, "loss": 2.3751, "step": 2329000 }, { "epoch": 6.74, "learning_rate": 4.662977028529085e-05, "loss": 2.3574, "step": 2329500 }, { "epoch": 6.74, "learning_rate": 4.662904663764357e-05, "loss": 2.3679, "step": 2330000 }, { "epoch": 6.75, "learning_rate": 4.6628322989996295e-05, "loss": 2.3782, "step": 2330500 }, { "epoch": 6.75, "learning_rate": 4.6627599342349024e-05, "loss": 2.3626, "step": 2331000 }, { "epoch": 6.75, "learning_rate": 4.6626875694701747e-05, "loss": 2.3977, "step": 2331500 }, { "epoch": 6.75, "learning_rate": 4.662615204705447e-05, "loss": 2.3659, "step": 2332000 }, { "epoch": 6.75, "learning_rate": 4.662542839940719e-05, "loss": 2.3607, "step": 2332500 }, { "epoch": 6.75, "learning_rate": 4.6624706199055207e-05, "loss": 2.3645, "step": 2333000 }, { "epoch": 6.75, "learning_rate": 4.662398255140793e-05, "loss": 2.3661, "step": 2333500 }, { "epoch": 6.76, "learning_rate": 4.662325890376065e-05, "loss": 2.3768, "step": 2334000 }, { "epoch": 6.76, "learning_rate": 4.662253525611337e-05, "loss": 2.3653, "step": 2334500 }, { "epoch": 6.76, "learning_rate": 4.6621811608466095e-05, "loss": 2.37, "step": 2335000 }, { "epoch": 6.76, "learning_rate": 4.6621087960818824e-05, "loss": 2.3776, "step": 2335500 }, { "epoch": 6.76, "learning_rate": 4.662036431317155e-05, "loss": 2.3806, "step": 2336000 }, { "epoch": 6.76, "learning_rate": 4.661964211281957e-05, "loss": 2.3916, "step": 2336500 }, { "epoch": 6.76, "learning_rate": 4.661891846517229e-05, "loss": 2.3743, "step": 2337000 }, { "epoch": 6.77, "learning_rate": 4.6618194817525014e-05, "loss": 2.3667, "step": 2337500 }, { "epoch": 6.77, "learning_rate": 4.661747261717303e-05, "loss": 2.3685, "step": 2338000 }, { "epoch": 6.77, "learning_rate": 4.661674896952575e-05, "loss": 2.3579, "step": 2338500 }, { "epoch": 6.77, "learning_rate": 4.6616025321878474e-05, "loss": 2.3865, "step": 2339000 }, { "epoch": 6.77, "learning_rate": 4.6615301674231196e-05, "loss": 2.3766, "step": 2339500 }, { "epoch": 6.77, "learning_rate": 4.6614578026583925e-05, "loss": 2.3569, "step": 2340000 }, { "epoch": 6.77, "learning_rate": 4.661385437893665e-05, "loss": 2.3631, "step": 2340500 }, { "epoch": 6.78, "learning_rate": 4.661313073128937e-05, "loss": 2.3821, "step": 2341000 }, { "epoch": 6.78, "learning_rate": 4.661240708364209e-05, "loss": 2.3739, "step": 2341500 }, { "epoch": 6.78, "learning_rate": 4.6611683435994814e-05, "loss": 2.3781, "step": 2342000 }, { "epoch": 6.78, "learning_rate": 4.6610959788347536e-05, "loss": 2.3672, "step": 2342500 }, { "epoch": 6.78, "learning_rate": 4.661023758799556e-05, "loss": 2.36, "step": 2343000 }, { "epoch": 6.78, "learning_rate": 4.660951394034828e-05, "loss": 2.3666, "step": 2343500 }, { "epoch": 6.78, "learning_rate": 4.6608790292701e-05, "loss": 2.3854, "step": 2344000 }, { "epoch": 6.79, "learning_rate": 4.6608066645053725e-05, "loss": 2.3722, "step": 2344500 }, { "epoch": 6.79, "learning_rate": 4.660734299740645e-05, "loss": 2.3698, "step": 2345000 }, { "epoch": 6.79, "learning_rate": 4.6606619349759176e-05, "loss": 2.3582, "step": 2345500 }, { "epoch": 6.79, "learning_rate": 4.66058957021119e-05, "loss": 2.3467, "step": 2346000 }, { "epoch": 6.79, "learning_rate": 4.660517205446462e-05, "loss": 2.3673, "step": 2346500 }, { "epoch": 6.79, "learning_rate": 4.660444840681734e-05, "loss": 2.3732, "step": 2347000 }, { "epoch": 6.8, "learning_rate": 4.6603724759170065e-05, "loss": 2.3661, "step": 2347500 }, { "epoch": 6.8, "learning_rate": 4.660300111152279e-05, "loss": 2.3865, "step": 2348000 }, { "epoch": 6.8, "learning_rate": 4.6602280358466096e-05, "loss": 2.388, "step": 2348500 }, { "epoch": 6.8, "learning_rate": 4.6601556710818825e-05, "loss": 2.3626, "step": 2349000 }, { "epoch": 6.8, "learning_rate": 4.660083306317155e-05, "loss": 2.3759, "step": 2349500 }, { "epoch": 6.8, "learning_rate": 4.660010941552427e-05, "loss": 2.3445, "step": 2350000 }, { "epoch": 6.8, "learning_rate": 4.6599385767877e-05, "loss": 2.3749, "step": 2350500 }, { "epoch": 6.81, "learning_rate": 4.659866212022972e-05, "loss": 2.3633, "step": 2351000 }, { "epoch": 6.81, "learning_rate": 4.6597939919877737e-05, "loss": 2.3735, "step": 2351500 }, { "epoch": 6.81, "learning_rate": 4.659721627223046e-05, "loss": 2.3829, "step": 2352000 }, { "epoch": 6.81, "learning_rate": 4.659649262458318e-05, "loss": 2.399, "step": 2352500 }, { "epoch": 6.81, "learning_rate": 4.65957689769359e-05, "loss": 2.3784, "step": 2353000 }, { "epoch": 6.81, "learning_rate": 4.6595045329288625e-05, "loss": 2.3593, "step": 2353500 }, { "epoch": 6.81, "learning_rate": 4.659432168164135e-05, "loss": 2.3593, "step": 2354000 }, { "epoch": 6.82, "learning_rate": 4.659359803399408e-05, "loss": 2.3739, "step": 2354500 }, { "epoch": 6.82, "learning_rate": 4.65928743863468e-05, "loss": 2.3693, "step": 2355000 }, { "epoch": 6.82, "learning_rate": 4.6592152185994815e-05, "loss": 2.3817, "step": 2355500 }, { "epoch": 6.82, "learning_rate": 4.659142853834754e-05, "loss": 2.3729, "step": 2356000 }, { "epoch": 6.82, "learning_rate": 4.659070489070026e-05, "loss": 2.3602, "step": 2356500 }, { "epoch": 6.82, "learning_rate": 4.658998124305298e-05, "loss": 2.3484, "step": 2357000 }, { "epoch": 6.82, "learning_rate": 4.6589257595405703e-05, "loss": 2.3475, "step": 2357500 }, { "epoch": 6.83, "learning_rate": 4.6588535395053726e-05, "loss": 2.3443, "step": 2358000 }, { "epoch": 6.83, "learning_rate": 4.658781174740645e-05, "loss": 2.3831, "step": 2358500 }, { "epoch": 6.83, "learning_rate": 4.658708809975918e-05, "loss": 2.3566, "step": 2359000 }, { "epoch": 6.83, "learning_rate": 4.658636589940719e-05, "loss": 2.3878, "step": 2359500 }, { "epoch": 6.83, "learning_rate": 4.6585642251759915e-05, "loss": 2.3634, "step": 2360000 }, { "epoch": 6.83, "learning_rate": 4.658491860411264e-05, "loss": 2.3503, "step": 2360500 }, { "epoch": 6.83, "learning_rate": 4.658419495646536e-05, "loss": 2.3761, "step": 2361000 }, { "epoch": 6.84, "learning_rate": 4.658347130881808e-05, "loss": 2.3566, "step": 2361500 }, { "epoch": 6.84, "learning_rate": 4.6582747661170804e-05, "loss": 2.3721, "step": 2362000 }, { "epoch": 6.84, "learning_rate": 4.6582024013523526e-05, "loss": 2.3596, "step": 2362500 }, { "epoch": 6.84, "learning_rate": 4.658130181317155e-05, "loss": 2.3811, "step": 2363000 }, { "epoch": 6.84, "learning_rate": 4.658057816552427e-05, "loss": 2.3567, "step": 2363500 }, { "epoch": 6.84, "learning_rate": 4.6579855965172286e-05, "loss": 2.3683, "step": 2364000 }, { "epoch": 6.84, "learning_rate": 4.657913231752501e-05, "loss": 2.3543, "step": 2364500 }, { "epoch": 6.85, "learning_rate": 4.657840866987773e-05, "loss": 2.3635, "step": 2365000 }, { "epoch": 6.85, "learning_rate": 4.657768502223046e-05, "loss": 2.3948, "step": 2365500 }, { "epoch": 6.85, "learning_rate": 4.657696137458318e-05, "loss": 2.3722, "step": 2366000 }, { "epoch": 6.85, "learning_rate": 4.6576237726935904e-05, "loss": 2.3509, "step": 2366500 }, { "epoch": 6.85, "learning_rate": 4.6575514079288626e-05, "loss": 2.3721, "step": 2367000 }, { "epoch": 6.85, "learning_rate": 4.6574790431641355e-05, "loss": 2.3742, "step": 2367500 }, { "epoch": 6.85, "learning_rate": 4.657406678399408e-05, "loss": 2.3646, "step": 2368000 }, { "epoch": 6.86, "learning_rate": 4.65733431363468e-05, "loss": 2.3855, "step": 2368500 }, { "epoch": 6.86, "learning_rate": 4.657261948869952e-05, "loss": 2.3762, "step": 2369000 }, { "epoch": 6.86, "learning_rate": 4.6571895841052244e-05, "loss": 2.3772, "step": 2369500 }, { "epoch": 6.86, "learning_rate": 4.6571172193404966e-05, "loss": 2.3802, "step": 2370000 }, { "epoch": 6.86, "learning_rate": 4.657044999305298e-05, "loss": 2.3768, "step": 2370500 }, { "epoch": 6.86, "learning_rate": 4.6569726345405704e-05, "loss": 2.3408, "step": 2371000 }, { "epoch": 6.86, "learning_rate": 4.6569002697758426e-05, "loss": 2.406, "step": 2371500 }, { "epoch": 6.87, "learning_rate": 4.6568279050111155e-05, "loss": 2.3444, "step": 2372000 }, { "epoch": 6.87, "learning_rate": 4.656755540246388e-05, "loss": 2.3708, "step": 2372500 }, { "epoch": 6.87, "learning_rate": 4.65668332021119e-05, "loss": 2.3894, "step": 2373000 }, { "epoch": 6.87, "learning_rate": 4.656610955446462e-05, "loss": 2.3586, "step": 2373500 }, { "epoch": 6.87, "learning_rate": 4.6565385906817345e-05, "loss": 2.3448, "step": 2374000 }, { "epoch": 6.87, "learning_rate": 4.656466225917007e-05, "loss": 2.356, "step": 2374500 }, { "epoch": 6.87, "learning_rate": 4.656393861152279e-05, "loss": 2.3706, "step": 2375000 }, { "epoch": 6.88, "learning_rate": 4.6563216411170805e-05, "loss": 2.3586, "step": 2375500 }, { "epoch": 6.88, "learning_rate": 4.656249276352353e-05, "loss": 2.3712, "step": 2376000 }, { "epoch": 6.88, "learning_rate": 4.6561769115876256e-05, "loss": 2.3665, "step": 2376500 }, { "epoch": 6.88, "learning_rate": 4.656104546822898e-05, "loss": 2.3788, "step": 2377000 }, { "epoch": 6.88, "learning_rate": 4.65603218205817e-05, "loss": 2.3624, "step": 2377500 }, { "epoch": 6.88, "learning_rate": 4.6559599620229716e-05, "loss": 2.3703, "step": 2378000 }, { "epoch": 6.88, "learning_rate": 4.655887741987773e-05, "loss": 2.3659, "step": 2378500 }, { "epoch": 6.89, "learning_rate": 4.6558153772230454e-05, "loss": 2.3495, "step": 2379000 }, { "epoch": 6.89, "learning_rate": 4.6557430124583176e-05, "loss": 2.3593, "step": 2379500 }, { "epoch": 6.89, "learning_rate": 4.6556706476935905e-05, "loss": 2.3509, "step": 2380000 }, { "epoch": 6.89, "learning_rate": 4.655598282928863e-05, "loss": 2.3664, "step": 2380500 }, { "epoch": 6.89, "learning_rate": 4.6555259181641356e-05, "loss": 2.3807, "step": 2381000 }, { "epoch": 6.89, "learning_rate": 4.655453553399408e-05, "loss": 2.3686, "step": 2381500 }, { "epoch": 6.89, "learning_rate": 4.6553813333642094e-05, "loss": 2.3645, "step": 2382000 }, { "epoch": 6.9, "learning_rate": 4.6553089685994816e-05, "loss": 2.3727, "step": 2382500 }, { "epoch": 6.9, "learning_rate": 4.655236603834754e-05, "loss": 2.3773, "step": 2383000 }, { "epoch": 6.9, "learning_rate": 4.655164239070026e-05, "loss": 2.3815, "step": 2383500 }, { "epoch": 6.9, "learning_rate": 4.655091874305298e-05, "loss": 2.3786, "step": 2384000 }, { "epoch": 6.9, "learning_rate": 4.6550195095405705e-05, "loss": 2.361, "step": 2384500 }, { "epoch": 6.9, "learning_rate": 4.654947144775843e-05, "loss": 2.3708, "step": 2385000 }, { "epoch": 6.91, "learning_rate": 4.6548747800111156e-05, "loss": 2.3851, "step": 2385500 }, { "epoch": 6.91, "learning_rate": 4.654802415246388e-05, "loss": 2.3914, "step": 2386000 }, { "epoch": 6.91, "learning_rate": 4.65473005048166e-05, "loss": 2.3632, "step": 2386500 }, { "epoch": 6.91, "learning_rate": 4.654657685716932e-05, "loss": 2.3599, "step": 2387000 }, { "epoch": 6.91, "learning_rate": 4.654585465681734e-05, "loss": 2.3746, "step": 2387500 }, { "epoch": 6.91, "learning_rate": 4.654513100917007e-05, "loss": 2.363, "step": 2388000 }, { "epoch": 6.91, "learning_rate": 4.654440880881808e-05, "loss": 2.3576, "step": 2388500 }, { "epoch": 6.92, "learning_rate": 4.6543685161170805e-05, "loss": 2.3661, "step": 2389000 }, { "epoch": 6.92, "learning_rate": 4.654296151352353e-05, "loss": 2.3627, "step": 2389500 }, { "epoch": 6.92, "learning_rate": 4.6542237865876257e-05, "loss": 2.3635, "step": 2390000 }, { "epoch": 6.92, "learning_rate": 4.654151566552427e-05, "loss": 2.3599, "step": 2390500 }, { "epoch": 6.92, "learning_rate": 4.6540792017876994e-05, "loss": 2.3704, "step": 2391000 }, { "epoch": 6.92, "learning_rate": 4.6540068370229717e-05, "loss": 2.3785, "step": 2391500 }, { "epoch": 6.92, "learning_rate": 4.653934472258244e-05, "loss": 2.3433, "step": 2392000 }, { "epoch": 6.93, "learning_rate": 4.653862107493516e-05, "loss": 2.3705, "step": 2392500 }, { "epoch": 6.93, "learning_rate": 4.653789742728788e-05, "loss": 2.3805, "step": 2393000 }, { "epoch": 6.93, "learning_rate": 4.6537173779640606e-05, "loss": 2.3674, "step": 2393500 }, { "epoch": 6.93, "learning_rate": 4.653645013199333e-05, "loss": 2.3794, "step": 2394000 }, { "epoch": 6.93, "learning_rate": 4.653572648434606e-05, "loss": 2.3576, "step": 2394500 }, { "epoch": 6.93, "learning_rate": 4.653500428399407e-05, "loss": 2.3651, "step": 2395000 }, { "epoch": 6.93, "learning_rate": 4.65342806363468e-05, "loss": 2.3979, "step": 2395500 }, { "epoch": 6.94, "learning_rate": 4.653355843599482e-05, "loss": 2.365, "step": 2396000 }, { "epoch": 6.94, "learning_rate": 4.653283478834754e-05, "loss": 2.3735, "step": 2396500 }, { "epoch": 6.94, "learning_rate": 4.653211114070026e-05, "loss": 2.3873, "step": 2397000 }, { "epoch": 6.94, "learning_rate": 4.6531387493052984e-05, "loss": 2.3867, "step": 2397500 }, { "epoch": 6.94, "learning_rate": 4.6530663845405706e-05, "loss": 2.3865, "step": 2398000 }, { "epoch": 6.94, "learning_rate": 4.6529940197758435e-05, "loss": 2.3553, "step": 2398500 }, { "epoch": 6.94, "learning_rate": 4.652921655011116e-05, "loss": 2.3491, "step": 2399000 }, { "epoch": 6.95, "learning_rate": 4.652849290246388e-05, "loss": 2.3624, "step": 2399500 }, { "epoch": 6.95, "learning_rate": 4.65277692548166e-05, "loss": 2.3769, "step": 2400000 }, { "epoch": 6.95, "learning_rate": 4.652704850175991e-05, "loss": 2.3572, "step": 2400500 }, { "epoch": 6.95, "learning_rate": 4.652632485411263e-05, "loss": 2.3813, "step": 2401000 }, { "epoch": 6.95, "learning_rate": 4.6525601206465355e-05, "loss": 2.3588, "step": 2401500 }, { "epoch": 6.95, "learning_rate": 4.6524877558818084e-05, "loss": 2.399, "step": 2402000 }, { "epoch": 6.95, "learning_rate": 4.6524153911170806e-05, "loss": 2.3505, "step": 2402500 }, { "epoch": 6.96, "learning_rate": 4.6523430263523535e-05, "loss": 2.3664, "step": 2403000 }, { "epoch": 6.96, "learning_rate": 4.652270661587626e-05, "loss": 2.3601, "step": 2403500 }, { "epoch": 6.96, "learning_rate": 4.652198296822898e-05, "loss": 2.3431, "step": 2404000 }, { "epoch": 6.96, "learning_rate": 4.65212593205817e-05, "loss": 2.3935, "step": 2404500 }, { "epoch": 6.96, "learning_rate": 4.6520535672934424e-05, "loss": 2.3774, "step": 2405000 }, { "epoch": 6.96, "learning_rate": 4.6519812025287146e-05, "loss": 2.3749, "step": 2405500 }, { "epoch": 6.96, "learning_rate": 4.651908982493516e-05, "loss": 2.3616, "step": 2406000 }, { "epoch": 6.97, "learning_rate": 4.6518366177287884e-05, "loss": 2.3737, "step": 2406500 }, { "epoch": 6.97, "learning_rate": 4.6517642529640606e-05, "loss": 2.3519, "step": 2407000 }, { "epoch": 6.97, "learning_rate": 4.6516918881993335e-05, "loss": 2.367, "step": 2407500 }, { "epoch": 6.97, "learning_rate": 4.651619668164135e-05, "loss": 2.3799, "step": 2408000 }, { "epoch": 6.97, "learning_rate": 4.651547303399407e-05, "loss": 2.3894, "step": 2408500 }, { "epoch": 6.97, "learning_rate": 4.6514749386346795e-05, "loss": 2.337, "step": 2409000 }, { "epoch": 6.97, "learning_rate": 4.651402573869952e-05, "loss": 2.3684, "step": 2409500 }, { "epoch": 6.98, "learning_rate": 4.6513302091052247e-05, "loss": 2.3523, "step": 2410000 }, { "epoch": 6.98, "learning_rate": 4.651257989070026e-05, "loss": 2.3815, "step": 2410500 }, { "epoch": 6.98, "learning_rate": 4.6511856243052984e-05, "loss": 2.3647, "step": 2411000 }, { "epoch": 6.98, "learning_rate": 4.651113259540571e-05, "loss": 2.4017, "step": 2411500 }, { "epoch": 6.98, "learning_rate": 4.6510408947758436e-05, "loss": 2.3474, "step": 2412000 }, { "epoch": 6.98, "learning_rate": 4.650968530011116e-05, "loss": 2.3772, "step": 2412500 }, { "epoch": 6.98, "learning_rate": 4.650896165246388e-05, "loss": 2.3743, "step": 2413000 }, { "epoch": 6.99, "learning_rate": 4.65082380048166e-05, "loss": 2.3843, "step": 2413500 }, { "epoch": 6.99, "learning_rate": 4.6507514357169325e-05, "loss": 2.3777, "step": 2414000 }, { "epoch": 6.99, "learning_rate": 4.650679070952205e-05, "loss": 2.3684, "step": 2414500 }, { "epoch": 6.99, "learning_rate": 4.650606850917006e-05, "loss": 2.3656, "step": 2415000 }, { "epoch": 6.99, "learning_rate": 4.6505344861522785e-05, "loss": 2.3774, "step": 2415500 }, { "epoch": 6.99, "learning_rate": 4.650462121387551e-05, "loss": 2.36, "step": 2416000 }, { "epoch": 6.99, "learning_rate": 4.650389901352353e-05, "loss": 2.3573, "step": 2416500 }, { "epoch": 7.0, "learning_rate": 4.650317536587625e-05, "loss": 2.3799, "step": 2417000 }, { "epoch": 7.0, "learning_rate": 4.6502451718228974e-05, "loss": 2.3803, "step": 2417500 }, { "epoch": 7.0, "learning_rate": 4.65017280705817e-05, "loss": 2.3814, "step": 2418000 }, { "epoch": 7.0, "eval_accuracy": 0.6437288418940945, "eval_accuracy_mlm": 0.6057274748989588, "eval_accuracy_nsp": 0.847426361065662, "eval_loss": 2.347137928009033, "eval_runtime": 330.4298, "eval_samples_per_second": 1320.662, "eval_steps_per_second": 55.028, "step": 2418304 }, { "epoch": 7.0, "learning_rate": 4.6501004422934425e-05, "loss": 2.3617, "step": 2418500 }, { "epoch": 7.0, "learning_rate": 4.650028077528715e-05, "loss": 2.3275, "step": 2419000 }, { "epoch": 7.0, "learning_rate": 4.649955712763987e-05, "loss": 2.3571, "step": 2419500 }, { "epoch": 7.0, "learning_rate": 4.649883347999259e-05, "loss": 2.3534, "step": 2420000 }, { "epoch": 7.01, "learning_rate": 4.6498109832345314e-05, "loss": 2.3498, "step": 2420500 }, { "epoch": 7.01, "learning_rate": 4.6497387631993336e-05, "loss": 2.3502, "step": 2421000 }, { "epoch": 7.01, "learning_rate": 4.649666543164135e-05, "loss": 2.3736, "step": 2421500 }, { "epoch": 7.01, "learning_rate": 4.6495941783994074e-05, "loss": 2.341, "step": 2422000 }, { "epoch": 7.01, "learning_rate": 4.649521958364209e-05, "loss": 2.3681, "step": 2422500 }, { "epoch": 7.01, "learning_rate": 4.649449593599481e-05, "loss": 2.3452, "step": 2423000 }, { "epoch": 7.02, "learning_rate": 4.6493772288347534e-05, "loss": 2.3434, "step": 2423500 }, { "epoch": 7.02, "learning_rate": 4.649304864070026e-05, "loss": 2.3234, "step": 2424000 }, { "epoch": 7.02, "learning_rate": 4.6492324993052985e-05, "loss": 2.3608, "step": 2424500 }, { "epoch": 7.02, "learning_rate": 4.649160134540571e-05, "loss": 2.3633, "step": 2425000 }, { "epoch": 7.02, "learning_rate": 4.649087914505373e-05, "loss": 2.3463, "step": 2425500 }, { "epoch": 7.02, "learning_rate": 4.6490156944701745e-05, "loss": 2.3956, "step": 2426000 }, { "epoch": 7.02, "learning_rate": 4.648943329705447e-05, "loss": 2.339, "step": 2426500 }, { "epoch": 7.03, "learning_rate": 4.648870964940719e-05, "loss": 2.3619, "step": 2427000 }, { "epoch": 7.03, "learning_rate": 4.648798600175991e-05, "loss": 2.3507, "step": 2427500 }, { "epoch": 7.03, "learning_rate": 4.6487262354112634e-05, "loss": 2.3517, "step": 2428000 }, { "epoch": 7.03, "learning_rate": 4.648653870646536e-05, "loss": 2.3389, "step": 2428500 }, { "epoch": 7.03, "learning_rate": 4.6485815058818085e-05, "loss": 2.3511, "step": 2429000 }, { "epoch": 7.03, "learning_rate": 4.648509141117081e-05, "loss": 2.3495, "step": 2429500 }, { "epoch": 7.03, "learning_rate": 4.648436776352353e-05, "loss": 2.3711, "step": 2430000 }, { "epoch": 7.04, "learning_rate": 4.6483645563171546e-05, "loss": 2.3255, "step": 2430500 }, { "epoch": 7.04, "learning_rate": 4.648292191552427e-05, "loss": 2.3315, "step": 2431000 }, { "epoch": 7.04, "learning_rate": 4.648219826787699e-05, "loss": 2.3308, "step": 2431500 }, { "epoch": 7.04, "learning_rate": 4.648147462022971e-05, "loss": 2.3458, "step": 2432000 }, { "epoch": 7.04, "learning_rate": 4.6480750972582434e-05, "loss": 2.3673, "step": 2432500 }, { "epoch": 7.04, "learning_rate": 4.6480027324935163e-05, "loss": 2.351, "step": 2433000 }, { "epoch": 7.04, "learning_rate": 4.6479303677287886e-05, "loss": 2.3423, "step": 2433500 }, { "epoch": 7.05, "learning_rate": 4.647858147693591e-05, "loss": 2.3255, "step": 2434000 }, { "epoch": 7.05, "learning_rate": 4.647785782928863e-05, "loss": 2.3455, "step": 2434500 }, { "epoch": 7.05, "learning_rate": 4.647713418164135e-05, "loss": 2.3473, "step": 2435000 }, { "epoch": 7.05, "learning_rate": 4.6476410533994075e-05, "loss": 2.354, "step": 2435500 }, { "epoch": 7.05, "learning_rate": 4.64756868863468e-05, "loss": 2.3577, "step": 2436000 }, { "epoch": 7.05, "learning_rate": 4.647496323869952e-05, "loss": 2.3581, "step": 2436500 }, { "epoch": 7.05, "learning_rate": 4.647423959105224e-05, "loss": 2.3453, "step": 2437000 }, { "epoch": 7.06, "learning_rate": 4.6473515943404964e-05, "loss": 2.3332, "step": 2437500 }, { "epoch": 7.06, "learning_rate": 4.6472792295757686e-05, "loss": 2.3679, "step": 2438000 }, { "epoch": 7.06, "learning_rate": 4.6472068648110415e-05, "loss": 2.3339, "step": 2438500 }, { "epoch": 7.06, "learning_rate": 4.647134500046314e-05, "loss": 2.3313, "step": 2439000 }, { "epoch": 7.06, "learning_rate": 4.647062135281586e-05, "loss": 2.3372, "step": 2439500 }, { "epoch": 7.06, "learning_rate": 4.646989770516859e-05, "loss": 2.3533, "step": 2440000 }, { "epoch": 7.06, "learning_rate": 4.646917405752131e-05, "loss": 2.3603, "step": 2440500 }, { "epoch": 7.07, "learning_rate": 4.6468451857169326e-05, "loss": 2.3509, "step": 2441000 }, { "epoch": 7.07, "learning_rate": 4.646772820952205e-05, "loss": 2.3333, "step": 2441500 }, { "epoch": 7.07, "learning_rate": 4.646700456187477e-05, "loss": 2.3278, "step": 2442000 }, { "epoch": 7.07, "learning_rate": 4.646628091422749e-05, "loss": 2.3414, "step": 2442500 }, { "epoch": 7.07, "learning_rate": 4.6465557266580215e-05, "loss": 2.3564, "step": 2443000 }, { "epoch": 7.07, "learning_rate": 4.646483361893294e-05, "loss": 2.332, "step": 2443500 }, { "epoch": 7.07, "learning_rate": 4.646411141858096e-05, "loss": 2.3489, "step": 2444000 }, { "epoch": 7.08, "learning_rate": 4.646338777093368e-05, "loss": 2.3592, "step": 2444500 }, { "epoch": 7.08, "learning_rate": 4.6462664123286404e-05, "loss": 2.3361, "step": 2445000 }, { "epoch": 7.08, "learning_rate": 4.6461940475639126e-05, "loss": 2.3452, "step": 2445500 }, { "epoch": 7.08, "learning_rate": 4.6461219722582435e-05, "loss": 2.3349, "step": 2446000 }, { "epoch": 7.08, "learning_rate": 4.6460496074935164e-05, "loss": 2.3356, "step": 2446500 }, { "epoch": 7.08, "learning_rate": 4.6459772427287886e-05, "loss": 2.3417, "step": 2447000 }, { "epoch": 7.08, "learning_rate": 4.645904877964061e-05, "loss": 2.3389, "step": 2447500 }, { "epoch": 7.09, "learning_rate": 4.645832513199334e-05, "loss": 2.3335, "step": 2448000 }, { "epoch": 7.09, "learning_rate": 4.645760148434606e-05, "loss": 2.3567, "step": 2448500 }, { "epoch": 7.09, "learning_rate": 4.645687783669878e-05, "loss": 2.3355, "step": 2449000 }, { "epoch": 7.09, "learning_rate": 4.6456154189051504e-05, "loss": 2.3559, "step": 2449500 }, { "epoch": 7.09, "learning_rate": 4.645543054140423e-05, "loss": 2.3431, "step": 2450000 }, { "epoch": 7.09, "learning_rate": 4.645470834105224e-05, "loss": 2.3426, "step": 2450500 }, { "epoch": 7.09, "learning_rate": 4.6453986140700265e-05, "loss": 2.3429, "step": 2451000 }, { "epoch": 7.1, "learning_rate": 4.645326249305299e-05, "loss": 2.3557, "step": 2451500 }, { "epoch": 7.1, "learning_rate": 4.645253884540571e-05, "loss": 2.3541, "step": 2452000 }, { "epoch": 7.1, "learning_rate": 4.645181519775843e-05, "loss": 2.3844, "step": 2452500 }, { "epoch": 7.1, "learning_rate": 4.6451091550111153e-05, "loss": 2.3363, "step": 2453000 }, { "epoch": 7.1, "learning_rate": 4.6450367902463876e-05, "loss": 2.3213, "step": 2453500 }, { "epoch": 7.1, "learning_rate": 4.644964570211189e-05, "loss": 2.3417, "step": 2454000 }, { "epoch": 7.1, "learning_rate": 4.6448922054464614e-05, "loss": 2.3361, "step": 2454500 }, { "epoch": 7.11, "learning_rate": 4.644819840681734e-05, "loss": 2.346, "step": 2455000 }, { "epoch": 7.11, "learning_rate": 4.6447474759170065e-05, "loss": 2.3373, "step": 2455500 }, { "epoch": 7.11, "learning_rate": 4.6446751111522794e-05, "loss": 2.3616, "step": 2456000 }, { "epoch": 7.11, "learning_rate": 4.644602891117081e-05, "loss": 2.3726, "step": 2456500 }, { "epoch": 7.11, "learning_rate": 4.644530526352353e-05, "loss": 2.3509, "step": 2457000 }, { "epoch": 7.11, "learning_rate": 4.6444581615876254e-05, "loss": 2.3539, "step": 2457500 }, { "epoch": 7.11, "learning_rate": 4.6443857968228976e-05, "loss": 2.362, "step": 2458000 }, { "epoch": 7.12, "learning_rate": 4.64431343205817e-05, "loss": 2.3787, "step": 2458500 }, { "epoch": 7.12, "learning_rate": 4.644241067293442e-05, "loss": 2.3557, "step": 2459000 }, { "epoch": 7.12, "learning_rate": 4.644168702528714e-05, "loss": 2.3194, "step": 2459500 }, { "epoch": 7.12, "learning_rate": 4.6440964824935165e-05, "loss": 2.3507, "step": 2460000 }, { "epoch": 7.12, "learning_rate": 4.644024117728789e-05, "loss": 2.3496, "step": 2460500 }, { "epoch": 7.12, "learning_rate": 4.643951752964061e-05, "loss": 2.356, "step": 2461000 }, { "epoch": 7.13, "learning_rate": 4.643879388199333e-05, "loss": 2.3433, "step": 2461500 }, { "epoch": 7.13, "learning_rate": 4.6438070234346054e-05, "loss": 2.3793, "step": 2462000 }, { "epoch": 7.13, "learning_rate": 4.643734803399407e-05, "loss": 2.3352, "step": 2462500 }, { "epoch": 7.13, "learning_rate": 4.64366243863468e-05, "loss": 2.3514, "step": 2463000 }, { "epoch": 7.13, "learning_rate": 4.643590073869952e-05, "loss": 2.3432, "step": 2463500 }, { "epoch": 7.13, "learning_rate": 4.643517709105224e-05, "loss": 2.3498, "step": 2464000 }, { "epoch": 7.13, "learning_rate": 4.6434453443404965e-05, "loss": 2.3185, "step": 2464500 }, { "epoch": 7.14, "learning_rate": 4.6433729795757694e-05, "loss": 2.3388, "step": 2465000 }, { "epoch": 7.14, "learning_rate": 4.6433006148110416e-05, "loss": 2.3357, "step": 2465500 }, { "epoch": 7.14, "learning_rate": 4.643228394775843e-05, "loss": 2.3422, "step": 2466000 }, { "epoch": 7.14, "learning_rate": 4.6431560300111154e-05, "loss": 2.3547, "step": 2466500 }, { "epoch": 7.14, "learning_rate": 4.6430836652463877e-05, "loss": 2.3599, "step": 2467000 }, { "epoch": 7.14, "learning_rate": 4.64301130048166e-05, "loss": 2.3548, "step": 2467500 }, { "epoch": 7.14, "learning_rate": 4.642938935716932e-05, "loss": 2.3399, "step": 2468000 }, { "epoch": 7.15, "learning_rate": 4.642866570952204e-05, "loss": 2.3574, "step": 2468500 }, { "epoch": 7.15, "learning_rate": 4.6427942061874765e-05, "loss": 2.3333, "step": 2469000 }, { "epoch": 7.15, "learning_rate": 4.6427218414227494e-05, "loss": 2.3678, "step": 2469500 }, { "epoch": 7.15, "learning_rate": 4.642649476658022e-05, "loss": 2.3812, "step": 2470000 }, { "epoch": 7.15, "learning_rate": 4.6425771118932946e-05, "loss": 2.3647, "step": 2470500 }, { "epoch": 7.15, "learning_rate": 4.642504747128567e-05, "loss": 2.3613, "step": 2471000 }, { "epoch": 7.15, "learning_rate": 4.642432382363839e-05, "loss": 2.348, "step": 2471500 }, { "epoch": 7.16, "learning_rate": 4.642360017599111e-05, "loss": 2.3301, "step": 2472000 }, { "epoch": 7.16, "learning_rate": 4.642287797563913e-05, "loss": 2.3526, "step": 2472500 }, { "epoch": 7.16, "learning_rate": 4.6422155775287144e-05, "loss": 2.3515, "step": 2473000 }, { "epoch": 7.16, "learning_rate": 4.6421432127639866e-05, "loss": 2.337, "step": 2473500 }, { "epoch": 7.16, "learning_rate": 4.642070992728789e-05, "loss": 2.3537, "step": 2474000 }, { "epoch": 7.16, "learning_rate": 4.641998627964061e-05, "loss": 2.3568, "step": 2474500 }, { "epoch": 7.16, "learning_rate": 4.641926263199333e-05, "loss": 2.3592, "step": 2475000 }, { "epoch": 7.17, "learning_rate": 4.6418538984346055e-05, "loss": 2.3373, "step": 2475500 }, { "epoch": 7.17, "learning_rate": 4.641781533669878e-05, "loss": 2.3604, "step": 2476000 }, { "epoch": 7.17, "learning_rate": 4.641709313634679e-05, "loss": 2.3479, "step": 2476500 }, { "epoch": 7.17, "learning_rate": 4.6416369488699515e-05, "loss": 2.3627, "step": 2477000 }, { "epoch": 7.17, "learning_rate": 4.6415645841052244e-05, "loss": 2.3427, "step": 2477500 }, { "epoch": 7.17, "learning_rate": 4.6414922193404966e-05, "loss": 2.3537, "step": 2478000 }, { "epoch": 7.17, "learning_rate": 4.6414198545757695e-05, "loss": 2.3499, "step": 2478500 }, { "epoch": 7.18, "learning_rate": 4.641347634540571e-05, "loss": 2.3668, "step": 2479000 }, { "epoch": 7.18, "learning_rate": 4.6412754145053726e-05, "loss": 2.344, "step": 2479500 }, { "epoch": 7.18, "learning_rate": 4.641203049740645e-05, "loss": 2.3437, "step": 2480000 }, { "epoch": 7.18, "learning_rate": 4.641130829705447e-05, "loss": 2.3643, "step": 2480500 }, { "epoch": 7.18, "learning_rate": 4.641058464940719e-05, "loss": 2.3746, "step": 2481000 }, { "epoch": 7.18, "learning_rate": 4.6409861001759915e-05, "loss": 2.351, "step": 2481500 }, { "epoch": 7.18, "learning_rate": 4.640913735411264e-05, "loss": 2.3674, "step": 2482000 }, { "epoch": 7.19, "learning_rate": 4.640841370646536e-05, "loss": 2.3396, "step": 2482500 }, { "epoch": 7.19, "learning_rate": 4.640769005881808e-05, "loss": 2.3609, "step": 2483000 }, { "epoch": 7.19, "learning_rate": 4.6406966411170804e-05, "loss": 2.3633, "step": 2483500 }, { "epoch": 7.19, "learning_rate": 4.6406242763523526e-05, "loss": 2.3468, "step": 2484000 }, { "epoch": 7.19, "learning_rate": 4.640551911587625e-05, "loss": 2.3437, "step": 2484500 }, { "epoch": 7.19, "learning_rate": 4.640479546822897e-05, "loss": 2.3435, "step": 2485000 }, { "epoch": 7.19, "learning_rate": 4.64040718205817e-05, "loss": 2.3693, "step": 2485500 }, { "epoch": 7.2, "learning_rate": 4.640334817293442e-05, "loss": 2.3621, "step": 2486000 }, { "epoch": 7.2, "learning_rate": 4.6402624525287144e-05, "loss": 2.3624, "step": 2486500 }, { "epoch": 7.2, "learning_rate": 4.640190087763987e-05, "loss": 2.3479, "step": 2487000 }, { "epoch": 7.2, "learning_rate": 4.6401177229992596e-05, "loss": 2.3728, "step": 2487500 }, { "epoch": 7.2, "learning_rate": 4.640045358234532e-05, "loss": 2.3557, "step": 2488000 }, { "epoch": 7.2, "learning_rate": 4.639972993469804e-05, "loss": 2.3205, "step": 2488500 }, { "epoch": 7.2, "learning_rate": 4.639900628705076e-05, "loss": 2.3266, "step": 2489000 }, { "epoch": 7.21, "learning_rate": 4.6398282639403484e-05, "loss": 2.3489, "step": 2489500 }, { "epoch": 7.21, "learning_rate": 4.639755899175621e-05, "loss": 2.3343, "step": 2490000 }, { "epoch": 7.21, "learning_rate": 4.639683534410893e-05, "loss": 2.363, "step": 2490500 }, { "epoch": 7.21, "learning_rate": 4.639611169646165e-05, "loss": 2.3585, "step": 2491000 }, { "epoch": 7.21, "learning_rate": 4.6395389496109674e-05, "loss": 2.3358, "step": 2491500 }, { "epoch": 7.21, "learning_rate": 4.6394665848462396e-05, "loss": 2.364, "step": 2492000 }, { "epoch": 7.21, "learning_rate": 4.6393942200815125e-05, "loss": 2.3396, "step": 2492500 }, { "epoch": 7.22, "learning_rate": 4.639322000046314e-05, "loss": 2.3541, "step": 2493000 }, { "epoch": 7.22, "learning_rate": 4.639249635281586e-05, "loss": 2.3636, "step": 2493500 }, { "epoch": 7.22, "learning_rate": 4.6391772705168585e-05, "loss": 2.3486, "step": 2494000 }, { "epoch": 7.22, "learning_rate": 4.639104905752131e-05, "loss": 2.3431, "step": 2494500 }, { "epoch": 7.22, "learning_rate": 4.639032540987403e-05, "loss": 2.3567, "step": 2495000 }, { "epoch": 7.22, "learning_rate": 4.638960176222675e-05, "loss": 2.3753, "step": 2495500 }, { "epoch": 7.22, "learning_rate": 4.6388878114579474e-05, "loss": 2.3596, "step": 2496000 }, { "epoch": 7.23, "learning_rate": 4.6388154466932196e-05, "loss": 2.3525, "step": 2496500 }, { "epoch": 7.23, "learning_rate": 4.638743226658022e-05, "loss": 2.3578, "step": 2497000 }, { "epoch": 7.23, "learning_rate": 4.638670861893294e-05, "loss": 2.3303, "step": 2497500 }, { "epoch": 7.23, "learning_rate": 4.638598497128566e-05, "loss": 2.3599, "step": 2498000 }, { "epoch": 7.23, "learning_rate": 4.6385261323638385e-05, "loss": 2.3431, "step": 2498500 }, { "epoch": 7.23, "learning_rate": 4.638453767599111e-05, "loss": 2.363, "step": 2499000 }, { "epoch": 7.24, "learning_rate": 4.6383814028343836e-05, "loss": 2.3494, "step": 2499500 }, { "epoch": 7.24, "learning_rate": 4.638309182799185e-05, "loss": 2.3642, "step": 2500000 }, { "epoch": 7.24, "learning_rate": 4.6382368180344574e-05, "loss": 2.3491, "step": 2500500 }, { "epoch": 7.24, "learning_rate": 4.6381644532697296e-05, "loss": 2.378, "step": 2501000 }, { "epoch": 7.24, "learning_rate": 4.6380920885050025e-05, "loss": 2.3392, "step": 2501500 }, { "epoch": 7.24, "learning_rate": 4.638019723740275e-05, "loss": 2.3645, "step": 2502000 }, { "epoch": 7.24, "learning_rate": 4.637947358975547e-05, "loss": 2.3661, "step": 2502500 }, { "epoch": 7.25, "learning_rate": 4.637874994210819e-05, "loss": 2.3244, "step": 2503000 }, { "epoch": 7.25, "learning_rate": 4.637802774175621e-05, "loss": 2.3611, "step": 2503500 }, { "epoch": 7.25, "learning_rate": 4.637730409410893e-05, "loss": 2.3544, "step": 2504000 }, { "epoch": 7.25, "learning_rate": 4.637658044646165e-05, "loss": 2.3745, "step": 2504500 }, { "epoch": 7.25, "learning_rate": 4.6375856798814374e-05, "loss": 2.3536, "step": 2505000 }, { "epoch": 7.25, "learning_rate": 4.6375133151167096e-05, "loss": 2.3692, "step": 2505500 }, { "epoch": 7.25, "learning_rate": 4.6374409503519825e-05, "loss": 2.3516, "step": 2506000 }, { "epoch": 7.26, "learning_rate": 4.637368730316784e-05, "loss": 2.3373, "step": 2506500 }, { "epoch": 7.26, "learning_rate": 4.637296365552056e-05, "loss": 2.3402, "step": 2507000 }, { "epoch": 7.26, "learning_rate": 4.637224000787329e-05, "loss": 2.3396, "step": 2507500 }, { "epoch": 7.26, "learning_rate": 4.6371516360226014e-05, "loss": 2.3797, "step": 2508000 }, { "epoch": 7.26, "learning_rate": 4.637079415987403e-05, "loss": 2.3364, "step": 2508500 }, { "epoch": 7.26, "learning_rate": 4.637007051222675e-05, "loss": 2.352, "step": 2509000 }, { "epoch": 7.26, "learning_rate": 4.6369346864579475e-05, "loss": 2.3707, "step": 2509500 }, { "epoch": 7.27, "learning_rate": 4.63686232169322e-05, "loss": 2.3662, "step": 2510000 }, { "epoch": 7.27, "learning_rate": 4.6367899569284926e-05, "loss": 2.325, "step": 2510500 }, { "epoch": 7.27, "learning_rate": 4.636717592163765e-05, "loss": 2.3457, "step": 2511000 }, { "epoch": 7.27, "learning_rate": 4.636645227399037e-05, "loss": 2.3551, "step": 2511500 }, { "epoch": 7.27, "learning_rate": 4.636572862634309e-05, "loss": 2.3441, "step": 2512000 }, { "epoch": 7.27, "learning_rate": 4.63650078732864e-05, "loss": 2.3818, "step": 2512500 }, { "epoch": 7.27, "learning_rate": 4.6364285672934424e-05, "loss": 2.369, "step": 2513000 }, { "epoch": 7.28, "learning_rate": 4.6363562025287146e-05, "loss": 2.3542, "step": 2513500 }, { "epoch": 7.28, "learning_rate": 4.636283837763987e-05, "loss": 2.3436, "step": 2514000 }, { "epoch": 7.28, "learning_rate": 4.636211472999259e-05, "loss": 2.3595, "step": 2514500 }, { "epoch": 7.28, "learning_rate": 4.6361392529640606e-05, "loss": 2.3705, "step": 2515000 }, { "epoch": 7.28, "learning_rate": 4.6360668881993335e-05, "loss": 2.3547, "step": 2515500 }, { "epoch": 7.28, "learning_rate": 4.635994523434606e-05, "loss": 2.3696, "step": 2516000 }, { "epoch": 7.28, "learning_rate": 4.635922158669878e-05, "loss": 2.3602, "step": 2516500 }, { "epoch": 7.29, "learning_rate": 4.63584979390515e-05, "loss": 2.3854, "step": 2517000 }, { "epoch": 7.29, "learning_rate": 4.6357774291404224e-05, "loss": 2.339, "step": 2517500 }, { "epoch": 7.29, "learning_rate": 4.635705064375695e-05, "loss": 2.3619, "step": 2518000 }, { "epoch": 7.29, "learning_rate": 4.635632989070026e-05, "loss": 2.3353, "step": 2518500 }, { "epoch": 7.29, "learning_rate": 4.6355606243052984e-05, "loss": 2.3343, "step": 2519000 }, { "epoch": 7.29, "learning_rate": 4.6354882595405706e-05, "loss": 2.3677, "step": 2519500 }, { "epoch": 7.29, "learning_rate": 4.635415894775843e-05, "loss": 2.3688, "step": 2520000 }, { "epoch": 7.3, "learning_rate": 4.635343674740645e-05, "loss": 2.3618, "step": 2520500 }, { "epoch": 7.3, "learning_rate": 4.635271309975917e-05, "loss": 2.3456, "step": 2521000 }, { "epoch": 7.3, "learning_rate": 4.6351989452111895e-05, "loss": 2.3543, "step": 2521500 }, { "epoch": 7.3, "learning_rate": 4.635126580446462e-05, "loss": 2.3446, "step": 2522000 }, { "epoch": 7.3, "learning_rate": 4.635054215681734e-05, "loss": 2.3505, "step": 2522500 }, { "epoch": 7.3, "learning_rate": 4.634981850917007e-05, "loss": 2.3595, "step": 2523000 }, { "epoch": 7.3, "learning_rate": 4.634909486152279e-05, "loss": 2.3568, "step": 2523500 }, { "epoch": 7.31, "learning_rate": 4.634837121387551e-05, "loss": 2.3351, "step": 2524000 }, { "epoch": 7.31, "learning_rate": 4.6347647566228235e-05, "loss": 2.3362, "step": 2524500 }, { "epoch": 7.31, "learning_rate": 4.634692391858096e-05, "loss": 2.3609, "step": 2525000 }, { "epoch": 7.31, "learning_rate": 4.634620027093368e-05, "loss": 2.3531, "step": 2525500 }, { "epoch": 7.31, "learning_rate": 4.63454766232864e-05, "loss": 2.3646, "step": 2526000 }, { "epoch": 7.31, "learning_rate": 4.6344752975639124e-05, "loss": 2.3555, "step": 2526500 }, { "epoch": 7.31, "learning_rate": 4.634402932799185e-05, "loss": 2.3648, "step": 2527000 }, { "epoch": 7.32, "learning_rate": 4.6343305680344576e-05, "loss": 2.3385, "step": 2527500 }, { "epoch": 7.32, "learning_rate": 4.634258347999259e-05, "loss": 2.3625, "step": 2528000 }, { "epoch": 7.32, "learning_rate": 4.6341859832345313e-05, "loss": 2.3579, "step": 2528500 }, { "epoch": 7.32, "learning_rate": 4.6341136184698036e-05, "loss": 2.3653, "step": 2529000 }, { "epoch": 7.32, "learning_rate": 4.634041543164135e-05, "loss": 2.3596, "step": 2529500 }, { "epoch": 7.32, "learning_rate": 4.6339691783994074e-05, "loss": 2.3424, "step": 2530000 }, { "epoch": 7.32, "learning_rate": 4.63389681363468e-05, "loss": 2.3597, "step": 2530500 }, { "epoch": 7.33, "learning_rate": 4.6338244488699525e-05, "loss": 2.3764, "step": 2531000 }, { "epoch": 7.33, "learning_rate": 4.633752084105225e-05, "loss": 2.3602, "step": 2531500 }, { "epoch": 7.33, "learning_rate": 4.633679719340497e-05, "loss": 2.3381, "step": 2532000 }, { "epoch": 7.33, "learning_rate": 4.633607354575769e-05, "loss": 2.3622, "step": 2532500 }, { "epoch": 7.33, "learning_rate": 4.6335349898110414e-05, "loss": 2.3496, "step": 2533000 }, { "epoch": 7.33, "learning_rate": 4.6334626250463136e-05, "loss": 2.3615, "step": 2533500 }, { "epoch": 7.33, "learning_rate": 4.633390405011115e-05, "loss": 2.3605, "step": 2534000 }, { "epoch": 7.34, "learning_rate": 4.633318040246388e-05, "loss": 2.3645, "step": 2534500 }, { "epoch": 7.34, "learning_rate": 4.63324567548166e-05, "loss": 2.3585, "step": 2535000 }, { "epoch": 7.34, "learning_rate": 4.6331733107169325e-05, "loss": 2.3857, "step": 2535500 }, { "epoch": 7.34, "learning_rate": 4.633100945952205e-05, "loss": 2.3719, "step": 2536000 }, { "epoch": 7.34, "learning_rate": 4.633028581187477e-05, "loss": 2.3456, "step": 2536500 }, { "epoch": 7.34, "learning_rate": 4.632956216422749e-05, "loss": 2.3526, "step": 2537000 }, { "epoch": 7.35, "learning_rate": 4.632883851658022e-05, "loss": 2.3613, "step": 2537500 }, { "epoch": 7.35, "learning_rate": 4.6328116316228236e-05, "loss": 2.3609, "step": 2538000 }, { "epoch": 7.35, "learning_rate": 4.632739266858096e-05, "loss": 2.3528, "step": 2538500 }, { "epoch": 7.35, "learning_rate": 4.632666902093368e-05, "loss": 2.3485, "step": 2539000 }, { "epoch": 7.35, "learning_rate": 4.63259453732864e-05, "loss": 2.3668, "step": 2539500 }, { "epoch": 7.35, "learning_rate": 4.632522172563913e-05, "loss": 2.3493, "step": 2540000 }, { "epoch": 7.35, "learning_rate": 4.6324498077991854e-05, "loss": 2.3602, "step": 2540500 }, { "epoch": 7.36, "learning_rate": 4.6323774430344576e-05, "loss": 2.3492, "step": 2541000 }, { "epoch": 7.36, "learning_rate": 4.63230507826973e-05, "loss": 2.3533, "step": 2541500 }, { "epoch": 7.36, "learning_rate": 4.6322328582345314e-05, "loss": 2.3659, "step": 2542000 }, { "epoch": 7.36, "learning_rate": 4.6321604934698036e-05, "loss": 2.3628, "step": 2542500 }, { "epoch": 7.36, "learning_rate": 4.632088273434605e-05, "loss": 2.3576, "step": 2543000 }, { "epoch": 7.36, "learning_rate": 4.632015908669878e-05, "loss": 2.3605, "step": 2543500 }, { "epoch": 7.36, "learning_rate": 4.63194354390515e-05, "loss": 2.3583, "step": 2544000 }, { "epoch": 7.37, "learning_rate": 4.6318711791404225e-05, "loss": 2.3646, "step": 2544500 }, { "epoch": 7.37, "learning_rate": 4.6317988143756954e-05, "loss": 2.3644, "step": 2545000 }, { "epoch": 7.37, "learning_rate": 4.631726739070026e-05, "loss": 2.3473, "step": 2545500 }, { "epoch": 7.37, "learning_rate": 4.6316543743052986e-05, "loss": 2.3522, "step": 2546000 }, { "epoch": 7.37, "learning_rate": 4.631582009540571e-05, "loss": 2.3436, "step": 2546500 }, { "epoch": 7.37, "learning_rate": 4.631509644775843e-05, "loss": 2.3565, "step": 2547000 }, { "epoch": 7.37, "learning_rate": 4.631437280011115e-05, "loss": 2.3656, "step": 2547500 }, { "epoch": 7.38, "learning_rate": 4.631364915246388e-05, "loss": 2.3649, "step": 2548000 }, { "epoch": 7.38, "learning_rate": 4.6312925504816604e-05, "loss": 2.3572, "step": 2548500 }, { "epoch": 7.38, "learning_rate": 4.6312201857169326e-05, "loss": 2.3387, "step": 2549000 }, { "epoch": 7.38, "learning_rate": 4.631147820952205e-05, "loss": 2.3277, "step": 2549500 }, { "epoch": 7.38, "learning_rate": 4.631075456187477e-05, "loss": 2.3638, "step": 2550000 }, { "epoch": 7.38, "learning_rate": 4.631003091422749e-05, "loss": 2.3626, "step": 2550500 }, { "epoch": 7.38, "learning_rate": 4.6309307266580215e-05, "loss": 2.3574, "step": 2551000 }, { "epoch": 7.39, "learning_rate": 4.630858361893294e-05, "loss": 2.3319, "step": 2551500 }, { "epoch": 7.39, "learning_rate": 4.630785997128566e-05, "loss": 2.336, "step": 2552000 }, { "epoch": 7.39, "learning_rate": 4.630713632363839e-05, "loss": 2.3317, "step": 2552500 }, { "epoch": 7.39, "learning_rate": 4.630641267599111e-05, "loss": 2.3679, "step": 2553000 }, { "epoch": 7.39, "learning_rate": 4.630568902834383e-05, "loss": 2.3666, "step": 2553500 }, { "epoch": 7.39, "learning_rate": 4.630496827528715e-05, "loss": 2.3392, "step": 2554000 }, { "epoch": 7.39, "learning_rate": 4.630424462763987e-05, "loss": 2.3525, "step": 2554500 }, { "epoch": 7.4, "learning_rate": 4.630352097999259e-05, "loss": 2.3563, "step": 2555000 }, { "epoch": 7.4, "learning_rate": 4.6302797332345315e-05, "loss": 2.3637, "step": 2555500 }, { "epoch": 7.4, "learning_rate": 4.630207368469804e-05, "loss": 2.372, "step": 2556000 }, { "epoch": 7.4, "learning_rate": 4.630135148434605e-05, "loss": 2.3515, "step": 2556500 }, { "epoch": 7.4, "learning_rate": 4.630062783669878e-05, "loss": 2.3431, "step": 2557000 }, { "epoch": 7.4, "learning_rate": 4.6299904189051504e-05, "loss": 2.3023, "step": 2557500 }, { "epoch": 7.4, "learning_rate": 4.6299180541404226e-05, "loss": 2.3363, "step": 2558000 }, { "epoch": 7.41, "learning_rate": 4.6298459788347535e-05, "loss": 2.3365, "step": 2558500 }, { "epoch": 7.41, "learning_rate": 4.629773614070026e-05, "loss": 2.3579, "step": 2559000 }, { "epoch": 7.41, "learning_rate": 4.629701249305298e-05, "loss": 2.3861, "step": 2559500 }, { "epoch": 7.41, "learning_rate": 4.6296290292701e-05, "loss": 2.3868, "step": 2560000 }, { "epoch": 7.41, "learning_rate": 4.629556664505373e-05, "loss": 2.3673, "step": 2560500 }, { "epoch": 7.41, "learning_rate": 4.629484299740645e-05, "loss": 2.3512, "step": 2561000 }, { "epoch": 7.41, "learning_rate": 4.6294119349759175e-05, "loss": 2.3552, "step": 2561500 }, { "epoch": 7.42, "learning_rate": 4.629339714940719e-05, "loss": 2.3493, "step": 2562000 }, { "epoch": 7.42, "learning_rate": 4.629267350175991e-05, "loss": 2.3507, "step": 2562500 }, { "epoch": 7.42, "learning_rate": 4.6291949854112635e-05, "loss": 2.3867, "step": 2563000 }, { "epoch": 7.42, "learning_rate": 4.629122620646536e-05, "loss": 2.3453, "step": 2563500 }, { "epoch": 7.42, "learning_rate": 4.629050400611338e-05, "loss": 2.338, "step": 2564000 }, { "epoch": 7.42, "learning_rate": 4.62897803584661e-05, "loss": 2.3344, "step": 2564500 }, { "epoch": 7.42, "learning_rate": 4.6289056710818824e-05, "loss": 2.3704, "step": 2565000 }, { "epoch": 7.43, "learning_rate": 4.628833306317155e-05, "loss": 2.3734, "step": 2565500 }, { "epoch": 7.43, "learning_rate": 4.628760941552427e-05, "loss": 2.3448, "step": 2566000 }, { "epoch": 7.43, "learning_rate": 4.628688576787699e-05, "loss": 2.3515, "step": 2566500 }, { "epoch": 7.43, "learning_rate": 4.6286162120229713e-05, "loss": 2.3849, "step": 2567000 }, { "epoch": 7.43, "learning_rate": 4.6285438472582436e-05, "loss": 2.3378, "step": 2567500 }, { "epoch": 7.43, "learning_rate": 4.6284714824935165e-05, "loss": 2.3894, "step": 2568000 }, { "epoch": 7.43, "learning_rate": 4.628399117728789e-05, "loss": 2.3624, "step": 2568500 }, { "epoch": 7.44, "learning_rate": 4.628326752964061e-05, "loss": 2.3398, "step": 2569000 }, { "epoch": 7.44, "learning_rate": 4.628254388199333e-05, "loss": 2.3612, "step": 2569500 }, { "epoch": 7.44, "learning_rate": 4.628182023434606e-05, "loss": 2.3612, "step": 2570000 }, { "epoch": 7.44, "learning_rate": 4.628109658669878e-05, "loss": 2.3718, "step": 2570500 }, { "epoch": 7.44, "learning_rate": 4.6280372939051505e-05, "loss": 2.3732, "step": 2571000 }, { "epoch": 7.44, "learning_rate": 4.627964929140423e-05, "loss": 2.3562, "step": 2571500 }, { "epoch": 7.44, "learning_rate": 4.627892564375695e-05, "loss": 2.3543, "step": 2572000 }, { "epoch": 7.45, "learning_rate": 4.627820199610967e-05, "loss": 2.3401, "step": 2572500 }, { "epoch": 7.45, "learning_rate": 4.6277478348462394e-05, "loss": 2.3581, "step": 2573000 }, { "epoch": 7.45, "learning_rate": 4.6276754700815116e-05, "loss": 2.3535, "step": 2573500 }, { "epoch": 7.45, "learning_rate": 4.627603105316784e-05, "loss": 2.3635, "step": 2574000 }, { "epoch": 7.45, "learning_rate": 4.627530740552056e-05, "loss": 2.3792, "step": 2574500 }, { "epoch": 7.45, "learning_rate": 4.627458665246388e-05, "loss": 2.311, "step": 2575000 }, { "epoch": 7.46, "learning_rate": 4.6273863004816605e-05, "loss": 2.3701, "step": 2575500 }, { "epoch": 7.46, "learning_rate": 4.627313935716933e-05, "loss": 2.3654, "step": 2576000 }, { "epoch": 7.46, "learning_rate": 4.627241570952205e-05, "loss": 2.36, "step": 2576500 }, { "epoch": 7.46, "learning_rate": 4.627169206187477e-05, "loss": 2.3579, "step": 2577000 }, { "epoch": 7.46, "learning_rate": 4.6270968414227494e-05, "loss": 2.3577, "step": 2577500 }, { "epoch": 7.46, "learning_rate": 4.627024766117081e-05, "loss": 2.3675, "step": 2578000 }, { "epoch": 7.46, "learning_rate": 4.626952401352353e-05, "loss": 2.3542, "step": 2578500 }, { "epoch": 7.47, "learning_rate": 4.6268800365876254e-05, "loss": 2.3685, "step": 2579000 }, { "epoch": 7.47, "learning_rate": 4.6268076718228976e-05, "loss": 2.371, "step": 2579500 }, { "epoch": 7.47, "learning_rate": 4.626735451787699e-05, "loss": 2.3349, "step": 2580000 }, { "epoch": 7.47, "learning_rate": 4.6266630870229714e-05, "loss": 2.3445, "step": 2580500 }, { "epoch": 7.47, "learning_rate": 4.6265907222582436e-05, "loss": 2.3505, "step": 2581000 }, { "epoch": 7.47, "learning_rate": 4.626518357493516e-05, "loss": 2.3805, "step": 2581500 }, { "epoch": 7.47, "learning_rate": 4.626445992728788e-05, "loss": 2.3327, "step": 2582000 }, { "epoch": 7.48, "learning_rate": 4.626373627964061e-05, "loss": 2.3523, "step": 2582500 }, { "epoch": 7.48, "learning_rate": 4.626301263199333e-05, "loss": 2.3691, "step": 2583000 }, { "epoch": 7.48, "learning_rate": 4.626228898434606e-05, "loss": 2.3657, "step": 2583500 }, { "epoch": 7.48, "learning_rate": 4.626156533669878e-05, "loss": 2.3459, "step": 2584000 }, { "epoch": 7.48, "learning_rate": 4.6260841689051506e-05, "loss": 2.3313, "step": 2584500 }, { "epoch": 7.48, "learning_rate": 4.626011804140423e-05, "loss": 2.3457, "step": 2585000 }, { "epoch": 7.48, "learning_rate": 4.625939439375695e-05, "loss": 2.3611, "step": 2585500 }, { "epoch": 7.49, "learning_rate": 4.625867074610967e-05, "loss": 2.3557, "step": 2586000 }, { "epoch": 7.49, "learning_rate": 4.6257947098462395e-05, "loss": 2.3558, "step": 2586500 }, { "epoch": 7.49, "learning_rate": 4.625722345081512e-05, "loss": 2.3577, "step": 2587000 }, { "epoch": 7.49, "learning_rate": 4.625649980316784e-05, "loss": 2.3583, "step": 2587500 }, { "epoch": 7.49, "learning_rate": 4.625577615552056e-05, "loss": 2.3512, "step": 2588000 }, { "epoch": 7.49, "learning_rate": 4.6255052507873283e-05, "loss": 2.3685, "step": 2588500 }, { "epoch": 7.49, "learning_rate": 4.6254330307521306e-05, "loss": 2.3606, "step": 2589000 }, { "epoch": 7.5, "learning_rate": 4.625360665987403e-05, "loss": 2.3374, "step": 2589500 }, { "epoch": 7.5, "learning_rate": 4.625288301222676e-05, "loss": 2.342, "step": 2590000 }, { "epoch": 7.5, "learning_rate": 4.625215936457948e-05, "loss": 2.3855, "step": 2590500 }, { "epoch": 7.5, "learning_rate": 4.6251437164227495e-05, "loss": 2.3399, "step": 2591000 }, { "epoch": 7.5, "learning_rate": 4.625071351658022e-05, "loss": 2.3498, "step": 2591500 }, { "epoch": 7.5, "learning_rate": 4.624999131622824e-05, "loss": 2.3479, "step": 2592000 }, { "epoch": 7.5, "learning_rate": 4.624926766858096e-05, "loss": 2.3396, "step": 2592500 }, { "epoch": 7.51, "learning_rate": 4.6248544020933684e-05, "loss": 2.3527, "step": 2593000 }, { "epoch": 7.51, "learning_rate": 4.6247820373286406e-05, "loss": 2.3679, "step": 2593500 }, { "epoch": 7.51, "learning_rate": 4.624709817293442e-05, "loss": 2.35, "step": 2594000 }, { "epoch": 7.51, "learning_rate": 4.6246374525287144e-05, "loss": 2.3675, "step": 2594500 }, { "epoch": 7.51, "learning_rate": 4.6245650877639866e-05, "loss": 2.3601, "step": 2595000 }, { "epoch": 7.51, "learning_rate": 4.624492722999259e-05, "loss": 2.3604, "step": 2595500 }, { "epoch": 7.51, "learning_rate": 4.624420358234531e-05, "loss": 2.3697, "step": 2596000 }, { "epoch": 7.52, "learning_rate": 4.624347993469804e-05, "loss": 2.3497, "step": 2596500 }, { "epoch": 7.52, "learning_rate": 4.6242757734346055e-05, "loss": 2.3393, "step": 2597000 }, { "epoch": 7.52, "learning_rate": 4.6242034086698784e-05, "loss": 2.3864, "step": 2597500 }, { "epoch": 7.52, "learning_rate": 4.6241310439051506e-05, "loss": 2.3298, "step": 2598000 }, { "epoch": 7.52, "learning_rate": 4.624058679140423e-05, "loss": 2.3432, "step": 2598500 }, { "epoch": 7.52, "learning_rate": 4.623986314375695e-05, "loss": 2.3172, "step": 2599000 }, { "epoch": 7.52, "learning_rate": 4.623913949610967e-05, "loss": 2.3599, "step": 2599500 }, { "epoch": 7.53, "learning_rate": 4.623841729575769e-05, "loss": 2.36, "step": 2600000 }, { "epoch": 7.53, "learning_rate": 4.623769364811041e-05, "loss": 2.3511, "step": 2600500 }, { "epoch": 7.53, "learning_rate": 4.623697000046314e-05, "loss": 2.3628, "step": 2601000 }, { "epoch": 7.53, "learning_rate": 4.6236247800111155e-05, "loss": 2.3583, "step": 2601500 }, { "epoch": 7.53, "learning_rate": 4.623552415246388e-05, "loss": 2.3397, "step": 2602000 }, { "epoch": 7.53, "learning_rate": 4.62348005048166e-05, "loss": 2.3579, "step": 2602500 }, { "epoch": 7.53, "learning_rate": 4.623407685716932e-05, "loss": 2.3408, "step": 2603000 }, { "epoch": 7.54, "learning_rate": 4.6233353209522044e-05, "loss": 2.3598, "step": 2603500 }, { "epoch": 7.54, "learning_rate": 4.623262956187477e-05, "loss": 2.3696, "step": 2604000 }, { "epoch": 7.54, "learning_rate": 4.623190591422749e-05, "loss": 2.3487, "step": 2604500 }, { "epoch": 7.54, "learning_rate": 4.623118226658022e-05, "loss": 2.3747, "step": 2605000 }, { "epoch": 7.54, "learning_rate": 4.623045861893294e-05, "loss": 2.3399, "step": 2605500 }, { "epoch": 7.54, "learning_rate": 4.622973497128566e-05, "loss": 2.3666, "step": 2606000 }, { "epoch": 7.54, "learning_rate": 4.6229012770933685e-05, "loss": 2.3666, "step": 2606500 }, { "epoch": 7.55, "learning_rate": 4.622828912328641e-05, "loss": 2.3533, "step": 2607000 }, { "epoch": 7.55, "learning_rate": 4.622756547563913e-05, "loss": 2.337, "step": 2607500 }, { "epoch": 7.55, "learning_rate": 4.622684182799185e-05, "loss": 2.3649, "step": 2608000 }, { "epoch": 7.55, "learning_rate": 4.6226118180344574e-05, "loss": 2.3452, "step": 2608500 }, { "epoch": 7.55, "learning_rate": 4.6225394532697296e-05, "loss": 2.3787, "step": 2609000 }, { "epoch": 7.55, "learning_rate": 4.622467088505002e-05, "loss": 2.3546, "step": 2609500 }, { "epoch": 7.55, "learning_rate": 4.622394723740274e-05, "loss": 2.3598, "step": 2610000 }, { "epoch": 7.56, "learning_rate": 4.622322358975546e-05, "loss": 2.3308, "step": 2610500 }, { "epoch": 7.56, "learning_rate": 4.6222501389403485e-05, "loss": 2.3515, "step": 2611000 }, { "epoch": 7.56, "learning_rate": 4.622177774175621e-05, "loss": 2.3634, "step": 2611500 }, { "epoch": 7.56, "learning_rate": 4.622105409410893e-05, "loss": 2.3631, "step": 2612000 }, { "epoch": 7.56, "learning_rate": 4.622033044646166e-05, "loss": 2.3534, "step": 2612500 }, { "epoch": 7.56, "learning_rate": 4.621960679881438e-05, "loss": 2.3751, "step": 2613000 }, { "epoch": 7.57, "learning_rate": 4.62188831511671e-05, "loss": 2.3645, "step": 2613500 }, { "epoch": 7.57, "learning_rate": 4.621816095081512e-05, "loss": 2.3473, "step": 2614000 }, { "epoch": 7.57, "learning_rate": 4.621743730316784e-05, "loss": 2.3684, "step": 2614500 }, { "epoch": 7.57, "learning_rate": 4.621671365552056e-05, "loss": 2.3618, "step": 2615000 }, { "epoch": 7.57, "learning_rate": 4.621599000787329e-05, "loss": 2.3846, "step": 2615500 }, { "epoch": 7.57, "learning_rate": 4.621526780752131e-05, "loss": 2.3591, "step": 2616000 }, { "epoch": 7.57, "learning_rate": 4.621454415987403e-05, "loss": 2.3563, "step": 2616500 }, { "epoch": 7.58, "learning_rate": 4.621382051222675e-05, "loss": 2.3491, "step": 2617000 }, { "epoch": 7.58, "learning_rate": 4.6213096864579474e-05, "loss": 2.3458, "step": 2617500 }, { "epoch": 7.58, "learning_rate": 4.6212373216932196e-05, "loss": 2.3561, "step": 2618000 }, { "epoch": 7.58, "learning_rate": 4.621164956928492e-05, "loss": 2.3699, "step": 2618500 }, { "epoch": 7.58, "learning_rate": 4.621092592163764e-05, "loss": 2.3664, "step": 2619000 }, { "epoch": 7.58, "learning_rate": 4.621020227399037e-05, "loss": 2.3589, "step": 2619500 }, { "epoch": 7.58, "learning_rate": 4.620948007363839e-05, "loss": 2.3745, "step": 2620000 }, { "epoch": 7.59, "learning_rate": 4.6208756425991114e-05, "loss": 2.3698, "step": 2620500 }, { "epoch": 7.59, "learning_rate": 4.6208032778343837e-05, "loss": 2.3862, "step": 2621000 }, { "epoch": 7.59, "learning_rate": 4.620730913069656e-05, "loss": 2.339, "step": 2621500 }, { "epoch": 7.59, "learning_rate": 4.6206586930344574e-05, "loss": 2.3516, "step": 2622000 }, { "epoch": 7.59, "learning_rate": 4.62058632826973e-05, "loss": 2.3625, "step": 2622500 }, { "epoch": 7.59, "learning_rate": 4.620514108234532e-05, "loss": 2.3607, "step": 2623000 }, { "epoch": 7.59, "learning_rate": 4.620441743469804e-05, "loss": 2.3586, "step": 2623500 }, { "epoch": 7.6, "learning_rate": 4.6203693787050763e-05, "loss": 2.324, "step": 2624000 }, { "epoch": 7.6, "learning_rate": 4.6202970139403486e-05, "loss": 2.3377, "step": 2624500 }, { "epoch": 7.6, "learning_rate": 4.62022479390515e-05, "loss": 2.3847, "step": 2625000 }, { "epoch": 7.6, "learning_rate": 4.6201524291404223e-05, "loss": 2.3679, "step": 2625500 }, { "epoch": 7.6, "learning_rate": 4.6200800643756946e-05, "loss": 2.3511, "step": 2626000 }, { "epoch": 7.6, "learning_rate": 4.620007699610967e-05, "loss": 2.3596, "step": 2626500 }, { "epoch": 7.6, "learning_rate": 4.619935334846239e-05, "loss": 2.3396, "step": 2627000 }, { "epoch": 7.61, "learning_rate": 4.619862970081512e-05, "loss": 2.3775, "step": 2627500 }, { "epoch": 7.61, "learning_rate": 4.619790750046314e-05, "loss": 2.3437, "step": 2628000 }, { "epoch": 7.61, "learning_rate": 4.6197183852815864e-05, "loss": 2.3413, "step": 2628500 }, { "epoch": 7.61, "learning_rate": 4.6196460205168586e-05, "loss": 2.3617, "step": 2629000 }, { "epoch": 7.61, "learning_rate": 4.61957380048166e-05, "loss": 2.3689, "step": 2629500 }, { "epoch": 7.61, "learning_rate": 4.6195014357169324e-05, "loss": 2.345, "step": 2630000 }, { "epoch": 7.61, "learning_rate": 4.6194290709522046e-05, "loss": 2.3699, "step": 2630500 }, { "epoch": 7.62, "learning_rate": 4.619356706187477e-05, "loss": 2.3584, "step": 2631000 }, { "epoch": 7.62, "learning_rate": 4.619284341422749e-05, "loss": 2.3628, "step": 2631500 }, { "epoch": 7.62, "learning_rate": 4.619211976658022e-05, "loss": 2.3551, "step": 2632000 }, { "epoch": 7.62, "learning_rate": 4.619139611893294e-05, "loss": 2.3636, "step": 2632500 }, { "epoch": 7.62, "learning_rate": 4.6190672471285664e-05, "loss": 2.3261, "step": 2633000 }, { "epoch": 7.62, "learning_rate": 4.6189948823638386e-05, "loss": 2.3729, "step": 2633500 }, { "epoch": 7.62, "learning_rate": 4.618922517599111e-05, "loss": 2.353, "step": 2634000 }, { "epoch": 7.63, "learning_rate": 4.618850152834384e-05, "loss": 2.3736, "step": 2634500 }, { "epoch": 7.63, "learning_rate": 4.618777788069656e-05, "loss": 2.3775, "step": 2635000 }, { "epoch": 7.63, "learning_rate": 4.6187055680344575e-05, "loss": 2.347, "step": 2635500 }, { "epoch": 7.63, "learning_rate": 4.61863320326973e-05, "loss": 2.333, "step": 2636000 }, { "epoch": 7.63, "learning_rate": 4.618560838505002e-05, "loss": 2.3365, "step": 2636500 }, { "epoch": 7.63, "learning_rate": 4.618488473740274e-05, "loss": 2.369, "step": 2637000 }, { "epoch": 7.63, "learning_rate": 4.618416108975547e-05, "loss": 2.3537, "step": 2637500 }, { "epoch": 7.64, "learning_rate": 4.6183438889403486e-05, "loss": 2.3695, "step": 2638000 }, { "epoch": 7.64, "learning_rate": 4.618271524175621e-05, "loss": 2.3579, "step": 2638500 }, { "epoch": 7.64, "learning_rate": 4.618199159410893e-05, "loss": 2.3611, "step": 2639000 }, { "epoch": 7.64, "learning_rate": 4.618126794646165e-05, "loss": 2.3474, "step": 2639500 }, { "epoch": 7.64, "learning_rate": 4.6180544298814375e-05, "loss": 2.3261, "step": 2640000 }, { "epoch": 7.64, "learning_rate": 4.61798206511671e-05, "loss": 2.3707, "step": 2640500 }, { "epoch": 7.64, "learning_rate": 4.617909700351982e-05, "loss": 2.3605, "step": 2641000 }, { "epoch": 7.65, "learning_rate": 4.617837335587254e-05, "loss": 2.341, "step": 2641500 }, { "epoch": 7.65, "learning_rate": 4.6177651155520564e-05, "loss": 2.3526, "step": 2642000 }, { "epoch": 7.65, "learning_rate": 4.617692895516859e-05, "loss": 2.342, "step": 2642500 }, { "epoch": 7.65, "learning_rate": 4.617620530752131e-05, "loss": 2.3708, "step": 2643000 }, { "epoch": 7.65, "learning_rate": 4.6175483107169325e-05, "loss": 2.367, "step": 2643500 }, { "epoch": 7.65, "learning_rate": 4.617475945952205e-05, "loss": 2.3547, "step": 2644000 }, { "epoch": 7.65, "learning_rate": 4.617403581187477e-05, "loss": 2.3439, "step": 2644500 }, { "epoch": 7.66, "learning_rate": 4.617331216422749e-05, "loss": 2.3368, "step": 2645000 }, { "epoch": 7.66, "learning_rate": 4.617258851658022e-05, "loss": 2.367, "step": 2645500 }, { "epoch": 7.66, "learning_rate": 4.617186486893294e-05, "loss": 2.3653, "step": 2646000 }, { "epoch": 7.66, "learning_rate": 4.6171141221285665e-05, "loss": 2.3556, "step": 2646500 }, { "epoch": 7.66, "learning_rate": 4.617041757363839e-05, "loss": 2.3271, "step": 2647000 }, { "epoch": 7.66, "learning_rate": 4.616969392599111e-05, "loss": 2.3565, "step": 2647500 }, { "epoch": 7.66, "learning_rate": 4.6168971725639125e-05, "loss": 2.3483, "step": 2648000 }, { "epoch": 7.67, "learning_rate": 4.616824807799185e-05, "loss": 2.3775, "step": 2648500 }, { "epoch": 7.67, "learning_rate": 4.616752443034457e-05, "loss": 2.3587, "step": 2649000 }, { "epoch": 7.67, "learning_rate": 4.616680222999259e-05, "loss": 2.3652, "step": 2649500 }, { "epoch": 7.67, "learning_rate": 4.616607858234532e-05, "loss": 2.346, "step": 2650000 }, { "epoch": 7.67, "learning_rate": 4.616535493469804e-05, "loss": 2.3474, "step": 2650500 }, { "epoch": 7.67, "learning_rate": 4.6164631287050765e-05, "loss": 2.3592, "step": 2651000 }, { "epoch": 7.68, "learning_rate": 4.616390763940349e-05, "loss": 2.3615, "step": 2651500 }, { "epoch": 7.68, "learning_rate": 4.616318399175621e-05, "loss": 2.3623, "step": 2652000 }, { "epoch": 7.68, "learning_rate": 4.616246034410893e-05, "loss": 2.3663, "step": 2652500 }, { "epoch": 7.68, "learning_rate": 4.6161736696461654e-05, "loss": 2.3641, "step": 2653000 }, { "epoch": 7.68, "learning_rate": 4.6161013048814376e-05, "loss": 2.3406, "step": 2653500 }, { "epoch": 7.68, "learning_rate": 4.61602908484624e-05, "loss": 2.3559, "step": 2654000 }, { "epoch": 7.68, "learning_rate": 4.6159568648110414e-05, "loss": 2.3717, "step": 2654500 }, { "epoch": 7.69, "learning_rate": 4.6158845000463136e-05, "loss": 2.3652, "step": 2655000 }, { "epoch": 7.69, "learning_rate": 4.615812135281586e-05, "loss": 2.3654, "step": 2655500 }, { "epoch": 7.69, "learning_rate": 4.615739770516858e-05, "loss": 2.3393, "step": 2656000 }, { "epoch": 7.69, "learning_rate": 4.61566740575213e-05, "loss": 2.3584, "step": 2656500 }, { "epoch": 7.69, "learning_rate": 4.6155950409874025e-05, "loss": 2.3633, "step": 2657000 }, { "epoch": 7.69, "learning_rate": 4.6155226762226754e-05, "loss": 2.3588, "step": 2657500 }, { "epoch": 7.69, "learning_rate": 4.6154503114579476e-05, "loss": 2.3493, "step": 2658000 }, { "epoch": 7.7, "learning_rate": 4.61537794669322e-05, "loss": 2.3288, "step": 2658500 }, { "epoch": 7.7, "learning_rate": 4.615305581928492e-05, "loss": 2.3494, "step": 2659000 }, { "epoch": 7.7, "learning_rate": 4.615233217163765e-05, "loss": 2.3584, "step": 2659500 }, { "epoch": 7.7, "learning_rate": 4.615160852399037e-05, "loss": 2.3461, "step": 2660000 }, { "epoch": 7.7, "learning_rate": 4.615088632363839e-05, "loss": 2.3696, "step": 2660500 }, { "epoch": 7.7, "learning_rate": 4.615016267599111e-05, "loss": 2.3635, "step": 2661000 }, { "epoch": 7.7, "learning_rate": 4.614943902834383e-05, "loss": 2.3641, "step": 2661500 }, { "epoch": 7.71, "learning_rate": 4.614871682799185e-05, "loss": 2.3615, "step": 2662000 }, { "epoch": 7.71, "learning_rate": 4.614799318034457e-05, "loss": 2.3623, "step": 2662500 }, { "epoch": 7.71, "learning_rate": 4.6147272427287886e-05, "loss": 2.377, "step": 2663000 }, { "epoch": 7.71, "learning_rate": 4.614654877964061e-05, "loss": 2.385, "step": 2663500 }, { "epoch": 7.71, "learning_rate": 4.614582513199333e-05, "loss": 2.3702, "step": 2664000 }, { "epoch": 7.71, "learning_rate": 4.614510148434605e-05, "loss": 2.363, "step": 2664500 }, { "epoch": 7.71, "learning_rate": 4.614437783669878e-05, "loss": 2.3471, "step": 2665000 }, { "epoch": 7.72, "learning_rate": 4.6143654189051504e-05, "loss": 2.3523, "step": 2665500 }, { "epoch": 7.72, "learning_rate": 4.6142930541404226e-05, "loss": 2.3534, "step": 2666000 }, { "epoch": 7.72, "learning_rate": 4.614220689375695e-05, "loss": 2.3528, "step": 2666500 }, { "epoch": 7.72, "learning_rate": 4.614148324610967e-05, "loss": 2.3619, "step": 2667000 }, { "epoch": 7.72, "learning_rate": 4.61407595984624e-05, "loss": 2.3474, "step": 2667500 }, { "epoch": 7.72, "learning_rate": 4.614003595081512e-05, "loss": 2.3698, "step": 2668000 }, { "epoch": 7.72, "learning_rate": 4.6139312303167844e-05, "loss": 2.3653, "step": 2668500 }, { "epoch": 7.73, "learning_rate": 4.613859010281586e-05, "loss": 2.339, "step": 2669000 }, { "epoch": 7.73, "learning_rate": 4.613786645516858e-05, "loss": 2.3729, "step": 2669500 }, { "epoch": 7.73, "learning_rate": 4.6137142807521304e-05, "loss": 2.3822, "step": 2670000 }, { "epoch": 7.73, "learning_rate": 4.6136420607169326e-05, "loss": 2.3594, "step": 2670500 }, { "epoch": 7.73, "learning_rate": 4.613569695952205e-05, "loss": 2.3533, "step": 2671000 }, { "epoch": 7.73, "learning_rate": 4.613497331187477e-05, "loss": 2.3666, "step": 2671500 }, { "epoch": 7.73, "learning_rate": 4.613424966422749e-05, "loss": 2.3673, "step": 2672000 }, { "epoch": 7.74, "learning_rate": 4.613352601658022e-05, "loss": 2.3489, "step": 2672500 }, { "epoch": 7.74, "learning_rate": 4.6132802368932944e-05, "loss": 2.3566, "step": 2673000 }, { "epoch": 7.74, "learning_rate": 4.6132078721285666e-05, "loss": 2.3514, "step": 2673500 }, { "epoch": 7.74, "learning_rate": 4.613135507363839e-05, "loss": 2.342, "step": 2674000 }, { "epoch": 7.74, "learning_rate": 4.613063142599111e-05, "loss": 2.377, "step": 2674500 }, { "epoch": 7.74, "learning_rate": 4.612990777834383e-05, "loss": 2.3566, "step": 2675000 }, { "epoch": 7.74, "learning_rate": 4.612918557799185e-05, "loss": 2.3468, "step": 2675500 }, { "epoch": 7.75, "learning_rate": 4.612846193034458e-05, "loss": 2.3564, "step": 2676000 }, { "epoch": 7.75, "learning_rate": 4.61277382826973e-05, "loss": 2.3468, "step": 2676500 }, { "epoch": 7.75, "learning_rate": 4.612701463505002e-05, "loss": 2.3486, "step": 2677000 }, { "epoch": 7.75, "learning_rate": 4.6126290987402744e-05, "loss": 2.3636, "step": 2677500 }, { "epoch": 7.75, "learning_rate": 4.6125567339755467e-05, "loss": 2.3648, "step": 2678000 }, { "epoch": 7.75, "learning_rate": 4.6124848033994075e-05, "loss": 2.3866, "step": 2678500 }, { "epoch": 7.75, "learning_rate": 4.61241243863468e-05, "loss": 2.3732, "step": 2679000 }, { "epoch": 7.76, "learning_rate": 4.612340073869952e-05, "loss": 2.3582, "step": 2679500 }, { "epoch": 7.76, "learning_rate": 4.6122678538347536e-05, "loss": 2.3678, "step": 2680000 }, { "epoch": 7.76, "learning_rate": 4.6121954890700265e-05, "loss": 2.3706, "step": 2680500 }, { "epoch": 7.76, "learning_rate": 4.612123124305299e-05, "loss": 2.3745, "step": 2681000 }, { "epoch": 7.76, "learning_rate": 4.612050759540571e-05, "loss": 2.3494, "step": 2681500 }, { "epoch": 7.76, "learning_rate": 4.611978394775843e-05, "loss": 2.3489, "step": 2682000 }, { "epoch": 7.76, "learning_rate": 4.6119060300111153e-05, "loss": 2.3222, "step": 2682500 }, { "epoch": 7.77, "learning_rate": 4.6118336652463876e-05, "loss": 2.3655, "step": 2683000 }, { "epoch": 7.77, "learning_rate": 4.61176130048166e-05, "loss": 2.3891, "step": 2683500 }, { "epoch": 7.77, "learning_rate": 4.611688935716933e-05, "loss": 2.3574, "step": 2684000 }, { "epoch": 7.77, "learning_rate": 4.611616570952205e-05, "loss": 2.3533, "step": 2684500 }, { "epoch": 7.77, "learning_rate": 4.611544206187477e-05, "loss": 2.3373, "step": 2685000 }, { "epoch": 7.77, "learning_rate": 4.6114718414227494e-05, "loss": 2.344, "step": 2685500 }, { "epoch": 7.77, "learning_rate": 4.6113994766580216e-05, "loss": 2.3476, "step": 2686000 }, { "epoch": 7.78, "learning_rate": 4.611327111893294e-05, "loss": 2.3288, "step": 2686500 }, { "epoch": 7.78, "learning_rate": 4.611254747128566e-05, "loss": 2.357, "step": 2687000 }, { "epoch": 7.78, "learning_rate": 4.611182382363839e-05, "loss": 2.3556, "step": 2687500 }, { "epoch": 7.78, "learning_rate": 4.611110017599111e-05, "loss": 2.3624, "step": 2688000 }, { "epoch": 7.78, "learning_rate": 4.6110376528343834e-05, "loss": 2.368, "step": 2688500 }, { "epoch": 7.78, "learning_rate": 4.6109652880696556e-05, "loss": 2.3641, "step": 2689000 }, { "epoch": 7.79, "learning_rate": 4.610893068034458e-05, "loss": 2.364, "step": 2689500 }, { "epoch": 7.79, "learning_rate": 4.6108208479992594e-05, "loss": 2.3451, "step": 2690000 }, { "epoch": 7.79, "learning_rate": 4.6107484832345316e-05, "loss": 2.3543, "step": 2690500 }, { "epoch": 7.79, "learning_rate": 4.610676118469804e-05, "loss": 2.3746, "step": 2691000 }, { "epoch": 7.79, "learning_rate": 4.610603753705076e-05, "loss": 2.3555, "step": 2691500 }, { "epoch": 7.79, "learning_rate": 4.610531388940348e-05, "loss": 2.3416, "step": 2692000 }, { "epoch": 7.79, "learning_rate": 4.6104590241756205e-05, "loss": 2.3483, "step": 2692500 }, { "epoch": 7.8, "learning_rate": 4.610386659410893e-05, "loss": 2.3344, "step": 2693000 }, { "epoch": 7.8, "learning_rate": 4.610314439375695e-05, "loss": 2.3693, "step": 2693500 }, { "epoch": 7.8, "learning_rate": 4.610242074610967e-05, "loss": 2.3745, "step": 2694000 }, { "epoch": 7.8, "learning_rate": 4.6101697098462394e-05, "loss": 2.3403, "step": 2694500 }, { "epoch": 7.8, "learning_rate": 4.610097345081512e-05, "loss": 2.3358, "step": 2695000 }, { "epoch": 7.8, "learning_rate": 4.6100249803167845e-05, "loss": 2.3645, "step": 2695500 }, { "epoch": 7.8, "learning_rate": 4.609952615552057e-05, "loss": 2.3524, "step": 2696000 }, { "epoch": 7.81, "learning_rate": 4.609880250787329e-05, "loss": 2.3531, "step": 2696500 }, { "epoch": 7.81, "learning_rate": 4.6098080307521305e-05, "loss": 2.33, "step": 2697000 }, { "epoch": 7.81, "learning_rate": 4.609735665987403e-05, "loss": 2.3612, "step": 2697500 }, { "epoch": 7.81, "learning_rate": 4.609663301222675e-05, "loss": 2.352, "step": 2698000 }, { "epoch": 7.81, "learning_rate": 4.609590936457948e-05, "loss": 2.3596, "step": 2698500 }, { "epoch": 7.81, "learning_rate": 4.60951857169322e-05, "loss": 2.3462, "step": 2699000 }, { "epoch": 7.81, "learning_rate": 4.609446206928492e-05, "loss": 2.3678, "step": 2699500 }, { "epoch": 7.82, "learning_rate": 4.6093738421637646e-05, "loss": 2.3461, "step": 2700000 }, { "epoch": 7.82, "learning_rate": 4.609301477399037e-05, "loss": 2.36, "step": 2700500 }, { "epoch": 7.82, "learning_rate": 4.609229112634309e-05, "loss": 2.3533, "step": 2701000 }, { "epoch": 7.82, "learning_rate": 4.6091570373286406e-05, "loss": 2.3584, "step": 2701500 }, { "epoch": 7.82, "learning_rate": 4.609084672563913e-05, "loss": 2.3504, "step": 2702000 }, { "epoch": 7.82, "learning_rate": 4.609012307799186e-05, "loss": 2.36, "step": 2702500 }, { "epoch": 7.82, "learning_rate": 4.608939943034458e-05, "loss": 2.3771, "step": 2703000 }, { "epoch": 7.83, "learning_rate": 4.60886757826973e-05, "loss": 2.3562, "step": 2703500 }, { "epoch": 7.83, "learning_rate": 4.6087952135050024e-05, "loss": 2.3612, "step": 2704000 }, { "epoch": 7.83, "learning_rate": 4.608723138199333e-05, "loss": 2.336, "step": 2704500 }, { "epoch": 7.83, "learning_rate": 4.6086507734346055e-05, "loss": 2.3635, "step": 2705000 }, { "epoch": 7.83, "learning_rate": 4.608578408669878e-05, "loss": 2.3652, "step": 2705500 }, { "epoch": 7.83, "learning_rate": 4.6085060439051506e-05, "loss": 2.349, "step": 2706000 }, { "epoch": 7.83, "learning_rate": 4.608433679140423e-05, "loss": 2.359, "step": 2706500 }, { "epoch": 7.84, "learning_rate": 4.608361314375695e-05, "loss": 2.3597, "step": 2707000 }, { "epoch": 7.84, "learning_rate": 4.608288949610967e-05, "loss": 2.3296, "step": 2707500 }, { "epoch": 7.84, "learning_rate": 4.6082165848462395e-05, "loss": 2.3479, "step": 2708000 }, { "epoch": 7.84, "learning_rate": 4.608144220081512e-05, "loss": 2.3327, "step": 2708500 }, { "epoch": 7.84, "learning_rate": 4.608071855316784e-05, "loss": 2.3476, "step": 2709000 }, { "epoch": 7.84, "learning_rate": 4.6079996352815855e-05, "loss": 2.3505, "step": 2709500 }, { "epoch": 7.84, "learning_rate": 4.6079272705168584e-05, "loss": 2.3586, "step": 2710000 }, { "epoch": 7.85, "learning_rate": 4.6078549057521306e-05, "loss": 2.341, "step": 2710500 }, { "epoch": 7.85, "learning_rate": 4.607782540987403e-05, "loss": 2.3368, "step": 2711000 }, { "epoch": 7.85, "learning_rate": 4.607710320952205e-05, "loss": 2.3621, "step": 2711500 }, { "epoch": 7.85, "learning_rate": 4.607637956187477e-05, "loss": 2.3524, "step": 2712000 }, { "epoch": 7.85, "learning_rate": 4.6075655914227495e-05, "loss": 2.3616, "step": 2712500 }, { "epoch": 7.85, "learning_rate": 4.607493226658022e-05, "loss": 2.3502, "step": 2713000 }, { "epoch": 7.85, "learning_rate": 4.607420861893294e-05, "loss": 2.3545, "step": 2713500 }, { "epoch": 7.86, "learning_rate": 4.607348497128566e-05, "loss": 2.3584, "step": 2714000 }, { "epoch": 7.86, "learning_rate": 4.6072761323638384e-05, "loss": 2.356, "step": 2714500 }, { "epoch": 7.86, "learning_rate": 4.6072037675991106e-05, "loss": 2.3747, "step": 2715000 }, { "epoch": 7.86, "learning_rate": 4.607131402834383e-05, "loss": 2.3583, "step": 2715500 }, { "epoch": 7.86, "learning_rate": 4.607059038069656e-05, "loss": 2.3504, "step": 2716000 }, { "epoch": 7.86, "learning_rate": 4.606986673304928e-05, "loss": 2.3698, "step": 2716500 }, { "epoch": 7.86, "learning_rate": 4.606914308540201e-05, "loss": 2.375, "step": 2717000 }, { "epoch": 7.87, "learning_rate": 4.606841943775473e-05, "loss": 2.3537, "step": 2717500 }, { "epoch": 7.87, "learning_rate": 4.606769868469804e-05, "loss": 2.3401, "step": 2718000 }, { "epoch": 7.87, "learning_rate": 4.606697503705076e-05, "loss": 2.3763, "step": 2718500 }, { "epoch": 7.87, "learning_rate": 4.6066251389403484e-05, "loss": 2.3595, "step": 2719000 }, { "epoch": 7.87, "learning_rate": 4.606552774175621e-05, "loss": 2.3392, "step": 2719500 }, { "epoch": 7.87, "learning_rate": 4.606480409410893e-05, "loss": 2.3492, "step": 2720000 }, { "epoch": 7.87, "learning_rate": 4.606408189375695e-05, "loss": 2.3559, "step": 2720500 }, { "epoch": 7.88, "learning_rate": 4.6063358246109673e-05, "loss": 2.3542, "step": 2721000 }, { "epoch": 7.88, "learning_rate": 4.6062634598462396e-05, "loss": 2.3474, "step": 2721500 }, { "epoch": 7.88, "learning_rate": 4.606191095081512e-05, "loss": 2.3456, "step": 2722000 }, { "epoch": 7.88, "learning_rate": 4.6061188750463134e-05, "loss": 2.3377, "step": 2722500 }, { "epoch": 7.88, "learning_rate": 4.6060465102815856e-05, "loss": 2.3597, "step": 2723000 }, { "epoch": 7.88, "learning_rate": 4.605974145516858e-05, "loss": 2.341, "step": 2723500 }, { "epoch": 7.88, "learning_rate": 4.605901780752131e-05, "loss": 2.3811, "step": 2724000 }, { "epoch": 7.89, "learning_rate": 4.605829415987403e-05, "loss": 2.3588, "step": 2724500 }, { "epoch": 7.89, "learning_rate": 4.605757051222676e-05, "loss": 2.354, "step": 2725000 }, { "epoch": 7.89, "learning_rate": 4.6056848311874774e-05, "loss": 2.3524, "step": 2725500 }, { "epoch": 7.89, "learning_rate": 4.6056124664227496e-05, "loss": 2.3535, "step": 2726000 }, { "epoch": 7.89, "learning_rate": 4.605540101658022e-05, "loss": 2.338, "step": 2726500 }, { "epoch": 7.89, "learning_rate": 4.605467736893294e-05, "loss": 2.363, "step": 2727000 }, { "epoch": 7.89, "learning_rate": 4.605395372128566e-05, "loss": 2.347, "step": 2727500 }, { "epoch": 7.9, "learning_rate": 4.6053230073638385e-05, "loss": 2.353, "step": 2728000 }, { "epoch": 7.9, "learning_rate": 4.605250642599111e-05, "loss": 2.3812, "step": 2728500 }, { "epoch": 7.9, "learning_rate": 4.605178277834383e-05, "loss": 2.3525, "step": 2729000 }, { "epoch": 7.9, "learning_rate": 4.605105913069656e-05, "loss": 2.3672, "step": 2729500 }, { "epoch": 7.9, "learning_rate": 4.6050336930344574e-05, "loss": 2.3479, "step": 2730000 }, { "epoch": 7.9, "learning_rate": 4.6049613282697296e-05, "loss": 2.3618, "step": 2730500 }, { "epoch": 7.91, "learning_rate": 4.604888963505002e-05, "loss": 2.3589, "step": 2731000 }, { "epoch": 7.91, "learning_rate": 4.604816598740274e-05, "loss": 2.363, "step": 2731500 }, { "epoch": 7.91, "learning_rate": 4.604744233975547e-05, "loss": 2.3213, "step": 2732000 }, { "epoch": 7.91, "learning_rate": 4.604671869210819e-05, "loss": 2.3388, "step": 2732500 }, { "epoch": 7.91, "learning_rate": 4.604599649175621e-05, "loss": 2.3743, "step": 2733000 }, { "epoch": 7.91, "learning_rate": 4.604527429140423e-05, "loss": 2.3593, "step": 2733500 }, { "epoch": 7.91, "learning_rate": 4.604455064375695e-05, "loss": 2.344, "step": 2734000 }, { "epoch": 7.92, "learning_rate": 4.6043826996109674e-05, "loss": 2.3467, "step": 2734500 }, { "epoch": 7.92, "learning_rate": 4.604310479575769e-05, "loss": 2.331, "step": 2735000 }, { "epoch": 7.92, "learning_rate": 4.604238114811041e-05, "loss": 2.3622, "step": 2735500 }, { "epoch": 7.92, "learning_rate": 4.6041657500463134e-05, "loss": 2.3549, "step": 2736000 }, { "epoch": 7.92, "learning_rate": 4.6040933852815857e-05, "loss": 2.341, "step": 2736500 }, { "epoch": 7.92, "learning_rate": 4.6040210205168586e-05, "loss": 2.3586, "step": 2737000 }, { "epoch": 7.92, "learning_rate": 4.603948655752131e-05, "loss": 2.349, "step": 2737500 }, { "epoch": 7.93, "learning_rate": 4.603876290987403e-05, "loss": 2.3413, "step": 2738000 }, { "epoch": 7.93, "learning_rate": 4.6038040709522046e-05, "loss": 2.3774, "step": 2738500 }, { "epoch": 7.93, "learning_rate": 4.603731706187477e-05, "loss": 2.3769, "step": 2739000 }, { "epoch": 7.93, "learning_rate": 4.603659341422749e-05, "loss": 2.3604, "step": 2739500 }, { "epoch": 7.93, "learning_rate": 4.603586976658022e-05, "loss": 2.3577, "step": 2740000 }, { "epoch": 7.93, "learning_rate": 4.603514611893294e-05, "loss": 2.3607, "step": 2740500 }, { "epoch": 7.93, "learning_rate": 4.6034422471285664e-05, "loss": 2.3582, "step": 2741000 }, { "epoch": 7.94, "learning_rate": 4.6033698823638386e-05, "loss": 2.3359, "step": 2741500 }, { "epoch": 7.94, "learning_rate": 4.603297517599111e-05, "loss": 2.3699, "step": 2742000 }, { "epoch": 7.94, "learning_rate": 4.603225297563913e-05, "loss": 2.3682, "step": 2742500 }, { "epoch": 7.94, "learning_rate": 4.603152932799185e-05, "loss": 2.3593, "step": 2743000 }, { "epoch": 7.94, "learning_rate": 4.6030805680344575e-05, "loss": 2.3412, "step": 2743500 }, { "epoch": 7.94, "learning_rate": 4.60300820326973e-05, "loss": 2.3606, "step": 2744000 }, { "epoch": 7.94, "learning_rate": 4.602935983234531e-05, "loss": 2.345, "step": 2744500 }, { "epoch": 7.95, "learning_rate": 4.6028637631993335e-05, "loss": 2.3484, "step": 2745000 }, { "epoch": 7.95, "learning_rate": 4.602791543164135e-05, "loss": 2.3577, "step": 2745500 }, { "epoch": 7.95, "learning_rate": 4.602719178399407e-05, "loss": 2.3519, "step": 2746000 }, { "epoch": 7.95, "learning_rate": 4.6026468136346795e-05, "loss": 2.3681, "step": 2746500 }, { "epoch": 7.95, "learning_rate": 4.602574448869952e-05, "loss": 2.3577, "step": 2747000 }, { "epoch": 7.95, "learning_rate": 4.6025020841052246e-05, "loss": 2.3423, "step": 2747500 }, { "epoch": 7.95, "learning_rate": 4.602429719340497e-05, "loss": 2.3528, "step": 2748000 }, { "epoch": 7.96, "learning_rate": 4.602357354575769e-05, "loss": 2.3647, "step": 2748500 }, { "epoch": 7.96, "learning_rate": 4.602284989811041e-05, "loss": 2.349, "step": 2749000 }, { "epoch": 7.96, "learning_rate": 4.6022126250463135e-05, "loss": 2.3612, "step": 2749500 }, { "epoch": 7.96, "learning_rate": 4.602140260281586e-05, "loss": 2.3382, "step": 2750000 }, { "epoch": 7.96, "learning_rate": 4.6020678955168586e-05, "loss": 2.342, "step": 2750500 }, { "epoch": 7.96, "learning_rate": 4.601995530752131e-05, "loss": 2.3682, "step": 2751000 }, { "epoch": 7.96, "learning_rate": 4.601923165987403e-05, "loss": 2.3658, "step": 2751500 }, { "epoch": 7.97, "learning_rate": 4.601850801222675e-05, "loss": 2.3387, "step": 2752000 }, { "epoch": 7.97, "learning_rate": 4.6017784364579475e-05, "loss": 2.3463, "step": 2752500 }, { "epoch": 7.97, "learning_rate": 4.60170607169322e-05, "loss": 2.3453, "step": 2753000 }, { "epoch": 7.97, "learning_rate": 4.601633851658021e-05, "loss": 2.3527, "step": 2753500 }, { "epoch": 7.97, "learning_rate": 4.6015614868932935e-05, "loss": 2.349, "step": 2754000 }, { "epoch": 7.97, "learning_rate": 4.601489122128566e-05, "loss": 2.3504, "step": 2754500 }, { "epoch": 7.97, "learning_rate": 4.6014167573638387e-05, "loss": 2.3543, "step": 2755000 }, { "epoch": 7.98, "learning_rate": 4.601344537328641e-05, "loss": 2.3408, "step": 2755500 }, { "epoch": 7.98, "learning_rate": 4.601272172563913e-05, "loss": 2.339, "step": 2756000 }, { "epoch": 7.98, "learning_rate": 4.601199807799185e-05, "loss": 2.347, "step": 2756500 }, { "epoch": 7.98, "learning_rate": 4.601127587763987e-05, "loss": 2.3315, "step": 2757000 }, { "epoch": 7.98, "learning_rate": 4.601055222999259e-05, "loss": 2.3646, "step": 2757500 }, { "epoch": 7.98, "learning_rate": 4.600982858234531e-05, "loss": 2.3361, "step": 2758000 }, { "epoch": 7.98, "learning_rate": 4.6009104934698036e-05, "loss": 2.3739, "step": 2758500 }, { "epoch": 7.99, "learning_rate": 4.6008381287050765e-05, "loss": 2.3534, "step": 2759000 }, { "epoch": 7.99, "learning_rate": 4.600765763940349e-05, "loss": 2.3671, "step": 2759500 }, { "epoch": 7.99, "learning_rate": 4.600693399175621e-05, "loss": 2.36, "step": 2760000 }, { "epoch": 7.99, "learning_rate": 4.6006211791404225e-05, "loss": 2.348, "step": 2760500 }, { "epoch": 7.99, "learning_rate": 4.600548814375695e-05, "loss": 2.3424, "step": 2761000 }, { "epoch": 7.99, "learning_rate": 4.600476449610967e-05, "loss": 2.3616, "step": 2761500 }, { "epoch": 7.99, "learning_rate": 4.600404084846239e-05, "loss": 2.3605, "step": 2762000 }, { "epoch": 8.0, "learning_rate": 4.6003318648110414e-05, "loss": 2.3552, "step": 2762500 }, { "epoch": 8.0, "learning_rate": 4.6002595000463136e-05, "loss": 2.365, "step": 2763000 }, { "epoch": 8.0, "learning_rate": 4.6001871352815865e-05, "loss": 2.3511, "step": 2763500 }, { "epoch": 8.0, "eval_accuracy": 0.6457251163529523, "eval_accuracy_mlm": 0.6076235167413505, "eval_accuracy_nsp": 0.8500226863373253, "eval_loss": 2.32987904548645, "eval_runtime": 330.539, "eval_samples_per_second": 1320.225, "eval_steps_per_second": 55.01, "step": 2763776 }, { "epoch": 8.0, "learning_rate": 4.600114915246388e-05, "loss": 2.3725, "step": 2764000 }, { "epoch": 8.0, "learning_rate": 4.60004255048166e-05, "loss": 2.3288, "step": 2764500 }, { "epoch": 8.0, "learning_rate": 4.5999701857169325e-05, "loss": 2.3284, "step": 2765000 }, { "epoch": 8.0, "learning_rate": 4.599897820952205e-05, "loss": 2.3444, "step": 2765500 }, { "epoch": 8.01, "learning_rate": 4.599825456187477e-05, "loss": 2.3309, "step": 2766000 }, { "epoch": 8.01, "learning_rate": 4.599753091422749e-05, "loss": 2.3115, "step": 2766500 }, { "epoch": 8.01, "learning_rate": 4.5996807266580214e-05, "loss": 2.3397, "step": 2767000 }, { "epoch": 8.01, "learning_rate": 4.5996083618932936e-05, "loss": 2.3402, "step": 2767500 }, { "epoch": 8.01, "learning_rate": 4.5995359971285665e-05, "loss": 2.346, "step": 2768000 }, { "epoch": 8.01, "learning_rate": 4.599463777093368e-05, "loss": 2.3254, "step": 2768500 }, { "epoch": 8.02, "learning_rate": 4.59939141232864e-05, "loss": 2.3336, "step": 2769000 }, { "epoch": 8.02, "learning_rate": 4.5993190475639125e-05, "loss": 2.3274, "step": 2769500 }, { "epoch": 8.02, "learning_rate": 4.5992466827991854e-05, "loss": 2.3304, "step": 2770000 }, { "epoch": 8.02, "learning_rate": 4.599174462763987e-05, "loss": 2.3226, "step": 2770500 }, { "epoch": 8.02, "learning_rate": 4.599102097999259e-05, "loss": 2.3217, "step": 2771000 }, { "epoch": 8.02, "learning_rate": 4.5990297332345314e-05, "loss": 2.3288, "step": 2771500 }, { "epoch": 8.02, "learning_rate": 4.5989573684698036e-05, "loss": 2.353, "step": 2772000 }, { "epoch": 8.03, "learning_rate": 4.5988850037050765e-05, "loss": 2.3055, "step": 2772500 }, { "epoch": 8.03, "learning_rate": 4.598812638940349e-05, "loss": 2.3257, "step": 2773000 }, { "epoch": 8.03, "learning_rate": 4.598740274175621e-05, "loss": 2.3604, "step": 2773500 }, { "epoch": 8.03, "learning_rate": 4.5986680541404225e-05, "loss": 2.3073, "step": 2774000 }, { "epoch": 8.03, "learning_rate": 4.598595689375695e-05, "loss": 2.3527, "step": 2774500 }, { "epoch": 8.03, "learning_rate": 4.598523324610967e-05, "loss": 2.3385, "step": 2775000 }, { "epoch": 8.03, "learning_rate": 4.598450959846239e-05, "loss": 2.345, "step": 2775500 }, { "epoch": 8.04, "learning_rate": 4.5983785950815114e-05, "loss": 2.335, "step": 2776000 }, { "epoch": 8.04, "learning_rate": 4.5983062303167837e-05, "loss": 2.3061, "step": 2776500 }, { "epoch": 8.04, "learning_rate": 4.5982338655520566e-05, "loss": 2.3423, "step": 2777000 }, { "epoch": 8.04, "learning_rate": 4.598161500787329e-05, "loss": 2.3404, "step": 2777500 }, { "epoch": 8.04, "learning_rate": 4.598089136022602e-05, "loss": 2.3114, "step": 2778000 }, { "epoch": 8.04, "learning_rate": 4.598016771257874e-05, "loss": 2.342, "step": 2778500 }, { "epoch": 8.04, "learning_rate": 4.597944406493146e-05, "loss": 2.3266, "step": 2779000 }, { "epoch": 8.05, "learning_rate": 4.597872186457948e-05, "loss": 2.3357, "step": 2779500 }, { "epoch": 8.05, "learning_rate": 4.59779982169322e-05, "loss": 2.3154, "step": 2780000 }, { "epoch": 8.05, "learning_rate": 4.5977276016580215e-05, "loss": 2.3237, "step": 2780500 }, { "epoch": 8.05, "learning_rate": 4.597655236893294e-05, "loss": 2.3486, "step": 2781000 }, { "epoch": 8.05, "learning_rate": 4.597583016858096e-05, "loss": 2.3192, "step": 2781500 }, { "epoch": 8.05, "learning_rate": 4.597510652093368e-05, "loss": 2.326, "step": 2782000 }, { "epoch": 8.05, "learning_rate": 4.5974382873286404e-05, "loss": 2.3288, "step": 2782500 }, { "epoch": 8.06, "learning_rate": 4.5973659225639126e-05, "loss": 2.3524, "step": 2783000 }, { "epoch": 8.06, "learning_rate": 4.597293557799185e-05, "loss": 2.3417, "step": 2783500 }, { "epoch": 8.06, "learning_rate": 4.597221193034457e-05, "loss": 2.3352, "step": 2784000 }, { "epoch": 8.06, "learning_rate": 4.597148828269729e-05, "loss": 2.3381, "step": 2784500 }, { "epoch": 8.06, "learning_rate": 4.5970766082345315e-05, "loss": 2.3327, "step": 2785000 }, { "epoch": 8.06, "learning_rate": 4.5970042434698044e-05, "loss": 2.3292, "step": 2785500 }, { "epoch": 8.06, "learning_rate": 4.5969318787050766e-05, "loss": 2.3245, "step": 2786000 }, { "epoch": 8.07, "learning_rate": 4.596859513940349e-05, "loss": 2.3397, "step": 2786500 }, { "epoch": 8.07, "learning_rate": 4.596787149175621e-05, "loss": 2.3252, "step": 2787000 }, { "epoch": 8.07, "learning_rate": 4.596714784410893e-05, "loss": 2.325, "step": 2787500 }, { "epoch": 8.07, "learning_rate": 4.5966424196461655e-05, "loss": 2.3522, "step": 2788000 }, { "epoch": 8.07, "learning_rate": 4.596570054881438e-05, "loss": 2.3346, "step": 2788500 }, { "epoch": 8.07, "learning_rate": 4.59649769011671e-05, "loss": 2.3213, "step": 2789000 }, { "epoch": 8.07, "learning_rate": 4.596425325351982e-05, "loss": 2.3389, "step": 2789500 }, { "epoch": 8.08, "learning_rate": 4.5963529605872544e-05, "loss": 2.3734, "step": 2790000 }, { "epoch": 8.08, "learning_rate": 4.5962805958225266e-05, "loss": 2.3403, "step": 2790500 }, { "epoch": 8.08, "learning_rate": 4.596208231057799e-05, "loss": 2.333, "step": 2791000 }, { "epoch": 8.08, "learning_rate": 4.596136011022601e-05, "loss": 2.3135, "step": 2791500 }, { "epoch": 8.08, "learning_rate": 4.596063646257874e-05, "loss": 2.3395, "step": 2792000 }, { "epoch": 8.08, "learning_rate": 4.595991281493146e-05, "loss": 2.3362, "step": 2792500 }, { "epoch": 8.08, "learning_rate": 4.5959189167284184e-05, "loss": 2.3352, "step": 2793000 }, { "epoch": 8.09, "learning_rate": 4.59584669669322e-05, "loss": 2.3313, "step": 2793500 }, { "epoch": 8.09, "learning_rate": 4.595774331928492e-05, "loss": 2.3297, "step": 2794000 }, { "epoch": 8.09, "learning_rate": 4.5957021118932944e-05, "loss": 2.3543, "step": 2794500 }, { "epoch": 8.09, "learning_rate": 4.595629747128567e-05, "loss": 2.3148, "step": 2795000 }, { "epoch": 8.09, "learning_rate": 4.595557382363839e-05, "loss": 2.3232, "step": 2795500 }, { "epoch": 8.09, "learning_rate": 4.595485017599111e-05, "loss": 2.3317, "step": 2796000 }, { "epoch": 8.09, "learning_rate": 4.5954126528343833e-05, "loss": 2.3556, "step": 2796500 }, { "epoch": 8.1, "learning_rate": 4.5953402880696556e-05, "loss": 2.3389, "step": 2797000 }, { "epoch": 8.1, "learning_rate": 4.595268068034457e-05, "loss": 2.3279, "step": 2797500 }, { "epoch": 8.1, "learning_rate": 4.5951958479992594e-05, "loss": 2.3357, "step": 2798000 }, { "epoch": 8.1, "learning_rate": 4.5951234832345316e-05, "loss": 2.3482, "step": 2798500 }, { "epoch": 8.1, "learning_rate": 4.595051118469804e-05, "loss": 2.3212, "step": 2799000 }, { "epoch": 8.1, "learning_rate": 4.594978753705076e-05, "loss": 2.3213, "step": 2799500 }, { "epoch": 8.1, "learning_rate": 4.594906388940349e-05, "loss": 2.3382, "step": 2800000 }, { "epoch": 8.11, "learning_rate": 4.594834024175621e-05, "loss": 2.3218, "step": 2800500 }, { "epoch": 8.11, "learning_rate": 4.5947616594108934e-05, "loss": 2.3368, "step": 2801000 }, { "epoch": 8.11, "learning_rate": 4.5946892946461656e-05, "loss": 2.3312, "step": 2801500 }, { "epoch": 8.11, "learning_rate": 4.594617074610967e-05, "loss": 2.3361, "step": 2802000 }, { "epoch": 8.11, "learning_rate": 4.5945448545757694e-05, "loss": 2.3166, "step": 2802500 }, { "epoch": 8.11, "learning_rate": 4.5944724898110416e-05, "loss": 2.339, "step": 2803000 }, { "epoch": 8.11, "learning_rate": 4.594400125046314e-05, "loss": 2.3446, "step": 2803500 }, { "epoch": 8.12, "learning_rate": 4.594327760281586e-05, "loss": 2.3636, "step": 2804000 }, { "epoch": 8.12, "learning_rate": 4.594255395516858e-05, "loss": 2.3309, "step": 2804500 }, { "epoch": 8.12, "learning_rate": 4.5941830307521305e-05, "loss": 2.3536, "step": 2805000 }, { "epoch": 8.12, "learning_rate": 4.594110665987403e-05, "loss": 2.3399, "step": 2805500 }, { "epoch": 8.12, "learning_rate": 4.594038301222675e-05, "loss": 2.3202, "step": 2806000 }, { "epoch": 8.12, "learning_rate": 4.593965936457947e-05, "loss": 2.3304, "step": 2806500 }, { "epoch": 8.13, "learning_rate": 4.5938935716932194e-05, "loss": 2.3255, "step": 2807000 }, { "epoch": 8.13, "learning_rate": 4.593821206928492e-05, "loss": 2.3555, "step": 2807500 }, { "epoch": 8.13, "learning_rate": 4.5937489868932945e-05, "loss": 2.345, "step": 2808000 }, { "epoch": 8.13, "learning_rate": 4.593676622128567e-05, "loss": 2.3275, "step": 2808500 }, { "epoch": 8.13, "learning_rate": 4.593604257363839e-05, "loss": 2.3486, "step": 2809000 }, { "epoch": 8.13, "learning_rate": 4.593531892599111e-05, "loss": 2.3345, "step": 2809500 }, { "epoch": 8.13, "learning_rate": 4.5934595278343834e-05, "loss": 2.3412, "step": 2810000 }, { "epoch": 8.14, "learning_rate": 4.5933871630696556e-05, "loss": 2.34, "step": 2810500 }, { "epoch": 8.14, "learning_rate": 4.593314798304928e-05, "loss": 2.3226, "step": 2811000 }, { "epoch": 8.14, "learning_rate": 4.5932424335402e-05, "loss": 2.3304, "step": 2811500 }, { "epoch": 8.14, "learning_rate": 4.593170068775472e-05, "loss": 2.3324, "step": 2812000 }, { "epoch": 8.14, "learning_rate": 4.5930977040107445e-05, "loss": 2.3451, "step": 2812500 }, { "epoch": 8.14, "learning_rate": 4.593025483975547e-05, "loss": 2.3313, "step": 2813000 }, { "epoch": 8.14, "learning_rate": 4.592953119210819e-05, "loss": 2.3, "step": 2813500 }, { "epoch": 8.15, "learning_rate": 4.592880754446091e-05, "loss": 2.3373, "step": 2814000 }, { "epoch": 8.15, "learning_rate": 4.592808389681364e-05, "loss": 2.3438, "step": 2814500 }, { "epoch": 8.15, "learning_rate": 4.592736169646166e-05, "loss": 2.3492, "step": 2815000 }, { "epoch": 8.15, "learning_rate": 4.592663804881438e-05, "loss": 2.3583, "step": 2815500 }, { "epoch": 8.15, "learning_rate": 4.5925915848462395e-05, "loss": 2.356, "step": 2816000 }, { "epoch": 8.15, "learning_rate": 4.5925192200815124e-05, "loss": 2.3394, "step": 2816500 }, { "epoch": 8.15, "learning_rate": 4.5924468553167846e-05, "loss": 2.3603, "step": 2817000 }, { "epoch": 8.16, "learning_rate": 4.592374490552057e-05, "loss": 2.3418, "step": 2817500 }, { "epoch": 8.16, "learning_rate": 4.592302125787329e-05, "loss": 2.349, "step": 2818000 }, { "epoch": 8.16, "learning_rate": 4.592229761022601e-05, "loss": 2.353, "step": 2818500 }, { "epoch": 8.16, "learning_rate": 4.5921573962578735e-05, "loss": 2.3387, "step": 2819000 }, { "epoch": 8.16, "learning_rate": 4.592085031493146e-05, "loss": 2.348, "step": 2819500 }, { "epoch": 8.16, "learning_rate": 4.592012811457947e-05, "loss": 2.3094, "step": 2820000 }, { "epoch": 8.16, "learning_rate": 4.5919404466932195e-05, "loss": 2.3424, "step": 2820500 }, { "epoch": 8.17, "learning_rate": 4.5918680819284924e-05, "loss": 2.3553, "step": 2821000 }, { "epoch": 8.17, "learning_rate": 4.5917957171637646e-05, "loss": 2.3297, "step": 2821500 }, { "epoch": 8.17, "learning_rate": 4.5917233523990375e-05, "loss": 2.3513, "step": 2822000 }, { "epoch": 8.17, "learning_rate": 4.59165098763431e-05, "loss": 2.2997, "step": 2822500 }, { "epoch": 8.17, "learning_rate": 4.591578622869582e-05, "loss": 2.3247, "step": 2823000 }, { "epoch": 8.17, "learning_rate": 4.591506258104854e-05, "loss": 2.3523, "step": 2823500 }, { "epoch": 8.17, "learning_rate": 4.591434038069656e-05, "loss": 2.3372, "step": 2824000 }, { "epoch": 8.18, "learning_rate": 4.591361673304928e-05, "loss": 2.3212, "step": 2824500 }, { "epoch": 8.18, "learning_rate": 4.5912893085402e-05, "loss": 2.3305, "step": 2825000 }, { "epoch": 8.18, "learning_rate": 4.5912169437754724e-05, "loss": 2.3481, "step": 2825500 }, { "epoch": 8.18, "learning_rate": 4.5911445790107446e-05, "loss": 2.3646, "step": 2826000 }, { "epoch": 8.18, "learning_rate": 4.591072358975547e-05, "loss": 2.3278, "step": 2826500 }, { "epoch": 8.18, "learning_rate": 4.590999994210819e-05, "loss": 2.3144, "step": 2827000 }, { "epoch": 8.18, "learning_rate": 4.590927629446091e-05, "loss": 2.3401, "step": 2827500 }, { "epoch": 8.19, "learning_rate": 4.5908552646813635e-05, "loss": 2.3331, "step": 2828000 }, { "epoch": 8.19, "learning_rate": 4.590782899916636e-05, "loss": 2.355, "step": 2828500 }, { "epoch": 8.19, "learning_rate": 4.590710535151908e-05, "loss": 2.3634, "step": 2829000 }, { "epoch": 8.19, "learning_rate": 4.59063831511671e-05, "loss": 2.3554, "step": 2829500 }, { "epoch": 8.19, "learning_rate": 4.5905660950815124e-05, "loss": 2.3275, "step": 2830000 }, { "epoch": 8.19, "learning_rate": 4.5904937303167847e-05, "loss": 2.3243, "step": 2830500 }, { "epoch": 8.19, "learning_rate": 4.590421365552057e-05, "loss": 2.3289, "step": 2831000 }, { "epoch": 8.2, "learning_rate": 4.590349000787329e-05, "loss": 2.3222, "step": 2831500 }, { "epoch": 8.2, "learning_rate": 4.5902767807521307e-05, "loss": 2.3311, "step": 2832000 }, { "epoch": 8.2, "learning_rate": 4.590204415987403e-05, "loss": 2.3273, "step": 2832500 }, { "epoch": 8.2, "learning_rate": 4.590132051222675e-05, "loss": 2.3402, "step": 2833000 }, { "epoch": 8.2, "learning_rate": 4.590059686457947e-05, "loss": 2.3199, "step": 2833500 }, { "epoch": 8.2, "learning_rate": 4.5899873216932196e-05, "loss": 2.3469, "step": 2834000 }, { "epoch": 8.2, "learning_rate": 4.589915101658022e-05, "loss": 2.342, "step": 2834500 }, { "epoch": 8.21, "learning_rate": 4.589842736893294e-05, "loss": 2.318, "step": 2835000 }, { "epoch": 8.21, "learning_rate": 4.5897705168580956e-05, "loss": 2.3441, "step": 2835500 }, { "epoch": 8.21, "learning_rate": 4.589698152093368e-05, "loss": 2.3181, "step": 2836000 }, { "epoch": 8.21, "learning_rate": 4.58962578732864e-05, "loss": 2.3429, "step": 2836500 }, { "epoch": 8.21, "learning_rate": 4.589553422563912e-05, "loss": 2.3235, "step": 2837000 }, { "epoch": 8.21, "learning_rate": 4.589481057799185e-05, "loss": 2.3579, "step": 2837500 }, { "epoch": 8.21, "learning_rate": 4.5894086930344574e-05, "loss": 2.3518, "step": 2838000 }, { "epoch": 8.22, "learning_rate": 4.58933632826973e-05, "loss": 2.3283, "step": 2838500 }, { "epoch": 8.22, "learning_rate": 4.5892639635050025e-05, "loss": 2.3335, "step": 2839000 }, { "epoch": 8.22, "learning_rate": 4.589191598740275e-05, "loss": 2.3258, "step": 2839500 }, { "epoch": 8.22, "learning_rate": 4.589119233975547e-05, "loss": 2.3168, "step": 2840000 }, { "epoch": 8.22, "learning_rate": 4.589046869210819e-05, "loss": 2.3415, "step": 2840500 }, { "epoch": 8.22, "learning_rate": 4.5889745044460914e-05, "loss": 2.3307, "step": 2841000 }, { "epoch": 8.22, "learning_rate": 4.588902284410893e-05, "loss": 2.3507, "step": 2841500 }, { "epoch": 8.23, "learning_rate": 4.588829919646165e-05, "loss": 2.3399, "step": 2842000 }, { "epoch": 8.23, "learning_rate": 4.5887576996109674e-05, "loss": 2.3558, "step": 2842500 }, { "epoch": 8.23, "learning_rate": 4.588685479575769e-05, "loss": 2.3229, "step": 2843000 }, { "epoch": 8.23, "learning_rate": 4.588613114811041e-05, "loss": 2.3395, "step": 2843500 }, { "epoch": 8.23, "learning_rate": 4.5885407500463134e-05, "loss": 2.317, "step": 2844000 }, { "epoch": 8.23, "learning_rate": 4.5884683852815856e-05, "loss": 2.336, "step": 2844500 }, { "epoch": 8.24, "learning_rate": 4.5883960205168585e-05, "loss": 2.3532, "step": 2845000 }, { "epoch": 8.24, "learning_rate": 4.58832380048166e-05, "loss": 2.3426, "step": 2845500 }, { "epoch": 8.24, "learning_rate": 4.588251435716932e-05, "loss": 2.3409, "step": 2846000 }, { "epoch": 8.24, "learning_rate": 4.588179070952205e-05, "loss": 2.3438, "step": 2846500 }, { "epoch": 8.24, "learning_rate": 4.5881067061874774e-05, "loss": 2.3627, "step": 2847000 }, { "epoch": 8.24, "learning_rate": 4.5880343414227496e-05, "loss": 2.3073, "step": 2847500 }, { "epoch": 8.24, "learning_rate": 4.587961976658022e-05, "loss": 2.3293, "step": 2848000 }, { "epoch": 8.25, "learning_rate": 4.587889611893294e-05, "loss": 2.3236, "step": 2848500 }, { "epoch": 8.25, "learning_rate": 4.587817247128566e-05, "loss": 2.3232, "step": 2849000 }, { "epoch": 8.25, "learning_rate": 4.5877448823638385e-05, "loss": 2.324, "step": 2849500 }, { "epoch": 8.25, "learning_rate": 4.587672517599111e-05, "loss": 2.3416, "step": 2850000 }, { "epoch": 8.25, "learning_rate": 4.587600152834383e-05, "loss": 2.3449, "step": 2850500 }, { "epoch": 8.25, "learning_rate": 4.587527788069655e-05, "loss": 2.3418, "step": 2851000 }, { "epoch": 8.25, "learning_rate": 4.5874555680344574e-05, "loss": 2.3578, "step": 2851500 }, { "epoch": 8.26, "learning_rate": 4.5873832032697297e-05, "loss": 2.3577, "step": 2852000 }, { "epoch": 8.26, "learning_rate": 4.5873108385050026e-05, "loss": 2.3221, "step": 2852500 }, { "epoch": 8.26, "learning_rate": 4.587238473740275e-05, "loss": 2.3495, "step": 2853000 }, { "epoch": 8.26, "learning_rate": 4.5871662537050763e-05, "loss": 2.3058, "step": 2853500 }, { "epoch": 8.26, "learning_rate": 4.5870938889403486e-05, "loss": 2.3633, "step": 2854000 }, { "epoch": 8.26, "learning_rate": 4.587021524175621e-05, "loss": 2.3509, "step": 2854500 }, { "epoch": 8.26, "learning_rate": 4.586949159410893e-05, "loss": 2.353, "step": 2855000 }, { "epoch": 8.27, "learning_rate": 4.586876794646165e-05, "loss": 2.3456, "step": 2855500 }, { "epoch": 8.27, "learning_rate": 4.5868044298814375e-05, "loss": 2.3384, "step": 2856000 }, { "epoch": 8.27, "learning_rate": 4.5867320651167104e-05, "loss": 2.3245, "step": 2856500 }, { "epoch": 8.27, "learning_rate": 4.5866597003519826e-05, "loss": 2.3172, "step": 2857000 }, { "epoch": 8.27, "learning_rate": 4.586587335587255e-05, "loss": 2.308, "step": 2857500 }, { "epoch": 8.27, "learning_rate": 4.5865151155520564e-05, "loss": 2.3443, "step": 2858000 }, { "epoch": 8.27, "learning_rate": 4.5864427507873286e-05, "loss": 2.319, "step": 2858500 }, { "epoch": 8.28, "learning_rate": 4.586370386022601e-05, "loss": 2.3115, "step": 2859000 }, { "epoch": 8.28, "learning_rate": 4.586298021257874e-05, "loss": 2.338, "step": 2859500 }, { "epoch": 8.28, "learning_rate": 4.586225656493146e-05, "loss": 2.335, "step": 2860000 }, { "epoch": 8.28, "learning_rate": 4.586153291728418e-05, "loss": 2.3416, "step": 2860500 }, { "epoch": 8.28, "learning_rate": 4.5860809269636904e-05, "loss": 2.3432, "step": 2861000 }, { "epoch": 8.28, "learning_rate": 4.5860087069284926e-05, "loss": 2.3697, "step": 2861500 }, { "epoch": 8.28, "learning_rate": 4.585936342163765e-05, "loss": 2.3557, "step": 2862000 }, { "epoch": 8.29, "learning_rate": 4.585863977399037e-05, "loss": 2.3421, "step": 2862500 }, { "epoch": 8.29, "learning_rate": 4.585791612634309e-05, "loss": 2.304, "step": 2863000 }, { "epoch": 8.29, "learning_rate": 4.585719392599111e-05, "loss": 2.3108, "step": 2863500 }, { "epoch": 8.29, "learning_rate": 4.585647027834383e-05, "loss": 2.342, "step": 2864000 }, { "epoch": 8.29, "learning_rate": 4.585574663069655e-05, "loss": 2.3438, "step": 2864500 }, { "epoch": 8.29, "learning_rate": 4.5855022983049275e-05, "loss": 2.3358, "step": 2865000 }, { "epoch": 8.29, "learning_rate": 4.5854299335402004e-05, "loss": 2.3274, "step": 2865500 }, { "epoch": 8.3, "learning_rate": 4.5853575687754726e-05, "loss": 2.3336, "step": 2866000 }, { "epoch": 8.3, "learning_rate": 4.585285348740274e-05, "loss": 2.349, "step": 2866500 }, { "epoch": 8.3, "learning_rate": 4.585212983975547e-05, "loss": 2.3324, "step": 2867000 }, { "epoch": 8.3, "learning_rate": 4.585140619210819e-05, "loss": 2.3537, "step": 2867500 }, { "epoch": 8.3, "learning_rate": 4.5850682544460915e-05, "loss": 2.3441, "step": 2868000 }, { "epoch": 8.3, "learning_rate": 4.584995889681364e-05, "loss": 2.3279, "step": 2868500 }, { "epoch": 8.3, "learning_rate": 4.584923669646165e-05, "loss": 2.3453, "step": 2869000 }, { "epoch": 8.31, "learning_rate": 4.584851304881438e-05, "loss": 2.3539, "step": 2869500 }, { "epoch": 8.31, "learning_rate": 4.5847789401167104e-05, "loss": 2.3414, "step": 2870000 }, { "epoch": 8.31, "learning_rate": 4.5847065753519827e-05, "loss": 2.3352, "step": 2870500 }, { "epoch": 8.31, "learning_rate": 4.584634355316784e-05, "loss": 2.3364, "step": 2871000 }, { "epoch": 8.31, "learning_rate": 4.5845619905520564e-05, "loss": 2.3511, "step": 2871500 }, { "epoch": 8.31, "learning_rate": 4.584489625787329e-05, "loss": 2.3182, "step": 2872000 }, { "epoch": 8.31, "learning_rate": 4.584417261022601e-05, "loss": 2.3301, "step": 2872500 }, { "epoch": 8.32, "learning_rate": 4.584344896257873e-05, "loss": 2.3333, "step": 2873000 }, { "epoch": 8.32, "learning_rate": 4.584272531493145e-05, "loss": 2.3325, "step": 2873500 }, { "epoch": 8.32, "learning_rate": 4.584200166728418e-05, "loss": 2.3324, "step": 2874000 }, { "epoch": 8.32, "learning_rate": 4.58412794669322e-05, "loss": 2.3334, "step": 2874500 }, { "epoch": 8.32, "learning_rate": 4.584055581928493e-05, "loss": 2.3282, "step": 2875000 }, { "epoch": 8.32, "learning_rate": 4.583983217163765e-05, "loss": 2.3326, "step": 2875500 }, { "epoch": 8.32, "learning_rate": 4.5839109971285665e-05, "loss": 2.3207, "step": 2876000 }, { "epoch": 8.33, "learning_rate": 4.583838632363839e-05, "loss": 2.3565, "step": 2876500 }, { "epoch": 8.33, "learning_rate": 4.583766267599111e-05, "loss": 2.3565, "step": 2877000 }, { "epoch": 8.33, "learning_rate": 4.583693902834383e-05, "loss": 2.3531, "step": 2877500 }, { "epoch": 8.33, "learning_rate": 4.5836215380696554e-05, "loss": 2.3334, "step": 2878000 }, { "epoch": 8.33, "learning_rate": 4.583549173304928e-05, "loss": 2.3327, "step": 2878500 }, { "epoch": 8.33, "learning_rate": 4.5834768085402005e-05, "loss": 2.3174, "step": 2879000 }, { "epoch": 8.33, "learning_rate": 4.583404443775473e-05, "loss": 2.3286, "step": 2879500 }, { "epoch": 8.34, "learning_rate": 4.583332079010745e-05, "loss": 2.365, "step": 2880000 }, { "epoch": 8.34, "learning_rate": 4.583259714246017e-05, "loss": 2.3476, "step": 2880500 }, { "epoch": 8.34, "learning_rate": 4.583187494210819e-05, "loss": 2.3487, "step": 2881000 }, { "epoch": 8.34, "learning_rate": 4.583115129446091e-05, "loss": 2.3424, "step": 2881500 }, { "epoch": 8.34, "learning_rate": 4.583042764681364e-05, "loss": 2.3415, "step": 2882000 }, { "epoch": 8.34, "learning_rate": 4.582970399916636e-05, "loss": 2.3474, "step": 2882500 }, { "epoch": 8.35, "learning_rate": 4.582898035151908e-05, "loss": 2.3645, "step": 2883000 }, { "epoch": 8.35, "learning_rate": 4.58282595984624e-05, "loss": 2.3566, "step": 2883500 }, { "epoch": 8.35, "learning_rate": 4.582753595081512e-05, "loss": 2.3499, "step": 2884000 }, { "epoch": 8.35, "learning_rate": 4.582681230316784e-05, "loss": 2.3525, "step": 2884500 }, { "epoch": 8.35, "learning_rate": 4.582609010281586e-05, "loss": 2.348, "step": 2885000 }, { "epoch": 8.35, "learning_rate": 4.582536790246388e-05, "loss": 2.336, "step": 2885500 }, { "epoch": 8.35, "learning_rate": 4.58246442548166e-05, "loss": 2.3396, "step": 2886000 }, { "epoch": 8.36, "learning_rate": 4.5823920607169325e-05, "loss": 2.358, "step": 2886500 }, { "epoch": 8.36, "learning_rate": 4.582319695952205e-05, "loss": 2.323, "step": 2887000 }, { "epoch": 8.36, "learning_rate": 4.582247331187477e-05, "loss": 2.3329, "step": 2887500 }, { "epoch": 8.36, "learning_rate": 4.5821751111522785e-05, "loss": 2.3356, "step": 2888000 }, { "epoch": 8.36, "learning_rate": 4.582102746387551e-05, "loss": 2.3471, "step": 2888500 }, { "epoch": 8.36, "learning_rate": 4.582030381622823e-05, "loss": 2.3448, "step": 2889000 }, { "epoch": 8.36, "learning_rate": 4.581958016858096e-05, "loss": 2.3613, "step": 2889500 }, { "epoch": 8.37, "learning_rate": 4.581885652093368e-05, "loss": 2.3476, "step": 2890000 }, { "epoch": 8.37, "learning_rate": 4.58181343205817e-05, "loss": 2.3384, "step": 2890500 }, { "epoch": 8.37, "learning_rate": 4.5817410672934426e-05, "loss": 2.3637, "step": 2891000 }, { "epoch": 8.37, "learning_rate": 4.581668702528715e-05, "loss": 2.3423, "step": 2891500 }, { "epoch": 8.37, "learning_rate": 4.581596337763987e-05, "loss": 2.3515, "step": 2892000 }, { "epoch": 8.37, "learning_rate": 4.581523972999259e-05, "loss": 2.3343, "step": 2892500 }, { "epoch": 8.37, "learning_rate": 4.5814516082345315e-05, "loss": 2.3208, "step": 2893000 }, { "epoch": 8.38, "learning_rate": 4.581379243469804e-05, "loss": 2.356, "step": 2893500 }, { "epoch": 8.38, "learning_rate": 4.581306878705076e-05, "loss": 2.3264, "step": 2894000 }, { "epoch": 8.38, "learning_rate": 4.581234513940348e-05, "loss": 2.3648, "step": 2894500 }, { "epoch": 8.38, "learning_rate": 4.581162149175621e-05, "loss": 2.3377, "step": 2895000 }, { "epoch": 8.38, "learning_rate": 4.581089784410893e-05, "loss": 2.3467, "step": 2895500 }, { "epoch": 8.38, "learning_rate": 4.5810174196461655e-05, "loss": 2.3456, "step": 2896000 }, { "epoch": 8.38, "learning_rate": 4.580945054881438e-05, "loss": 2.3251, "step": 2896500 }, { "epoch": 8.39, "learning_rate": 4.5808726901167106e-05, "loss": 2.3656, "step": 2897000 }, { "epoch": 8.39, "learning_rate": 4.580800325351983e-05, "loss": 2.3447, "step": 2897500 }, { "epoch": 8.39, "learning_rate": 4.5807281053167844e-05, "loss": 2.3464, "step": 2898000 }, { "epoch": 8.39, "learning_rate": 4.5806557405520566e-05, "loss": 2.3219, "step": 2898500 }, { "epoch": 8.39, "learning_rate": 4.580583375787329e-05, "loss": 2.3461, "step": 2899000 }, { "epoch": 8.39, "learning_rate": 4.580511011022601e-05, "loss": 2.3467, "step": 2899500 }, { "epoch": 8.39, "learning_rate": 4.580438790987403e-05, "loss": 2.3515, "step": 2900000 }, { "epoch": 8.4, "learning_rate": 4.5803664262226755e-05, "loss": 2.3607, "step": 2900500 }, { "epoch": 8.4, "learning_rate": 4.580294061457948e-05, "loss": 2.3344, "step": 2901000 }, { "epoch": 8.4, "learning_rate": 4.58022169669322e-05, "loss": 2.3479, "step": 2901500 }, { "epoch": 8.4, "learning_rate": 4.580149331928492e-05, "loss": 2.3287, "step": 2902000 }, { "epoch": 8.4, "learning_rate": 4.5800769671637644e-05, "loss": 2.3424, "step": 2902500 }, { "epoch": 8.4, "learning_rate": 4.5800046023990366e-05, "loss": 2.3447, "step": 2903000 }, { "epoch": 8.4, "learning_rate": 4.579932237634309e-05, "loss": 2.3357, "step": 2903500 }, { "epoch": 8.41, "learning_rate": 4.579859872869581e-05, "loss": 2.331, "step": 2904000 }, { "epoch": 8.41, "learning_rate": 4.579787652834383e-05, "loss": 2.3611, "step": 2904500 }, { "epoch": 8.41, "learning_rate": 4.579715288069656e-05, "loss": 2.3188, "step": 2905000 }, { "epoch": 8.41, "learning_rate": 4.5796429233049284e-05, "loss": 2.3564, "step": 2905500 }, { "epoch": 8.41, "learning_rate": 4.5795705585402006e-05, "loss": 2.3366, "step": 2906000 }, { "epoch": 8.41, "learning_rate": 4.579498193775473e-05, "loss": 2.3149, "step": 2906500 }, { "epoch": 8.41, "learning_rate": 4.5794259737402744e-05, "loss": 2.3446, "step": 2907000 }, { "epoch": 8.42, "learning_rate": 4.5793536089755466e-05, "loss": 2.3321, "step": 2907500 }, { "epoch": 8.42, "learning_rate": 4.579281244210819e-05, "loss": 2.3028, "step": 2908000 }, { "epoch": 8.42, "learning_rate": 4.579208879446091e-05, "loss": 2.3364, "step": 2908500 }, { "epoch": 8.42, "learning_rate": 4.579136514681363e-05, "loss": 2.3384, "step": 2909000 }, { "epoch": 8.42, "learning_rate": 4.5790642946461656e-05, "loss": 2.3425, "step": 2909500 }, { "epoch": 8.42, "learning_rate": 4.578991929881438e-05, "loss": 2.3377, "step": 2910000 }, { "epoch": 8.42, "learning_rate": 4.57891956511671e-05, "loss": 2.3518, "step": 2910500 }, { "epoch": 8.43, "learning_rate": 4.578847200351982e-05, "loss": 2.354, "step": 2911000 }, { "epoch": 8.43, "learning_rate": 4.5787748355872544e-05, "loss": 2.3488, "step": 2911500 }, { "epoch": 8.43, "learning_rate": 4.578702615552056e-05, "loss": 2.3563, "step": 2912000 }, { "epoch": 8.43, "learning_rate": 4.578630250787329e-05, "loss": 2.3692, "step": 2912500 }, { "epoch": 8.43, "learning_rate": 4.578557886022601e-05, "loss": 2.3398, "step": 2913000 }, { "epoch": 8.43, "learning_rate": 4.5784855212578733e-05, "loss": 2.3525, "step": 2913500 }, { "epoch": 8.43, "learning_rate": 4.5784133012226756e-05, "loss": 2.3504, "step": 2914000 }, { "epoch": 8.44, "learning_rate": 4.578340936457948e-05, "loss": 2.295, "step": 2914500 }, { "epoch": 8.44, "learning_rate": 4.57826857169322e-05, "loss": 2.3341, "step": 2915000 }, { "epoch": 8.44, "learning_rate": 4.578196206928492e-05, "loss": 2.3642, "step": 2915500 }, { "epoch": 8.44, "learning_rate": 4.5781238421637645e-05, "loss": 2.3089, "step": 2916000 }, { "epoch": 8.44, "learning_rate": 4.578051477399037e-05, "loss": 2.3208, "step": 2916500 }, { "epoch": 8.44, "learning_rate": 4.577979112634309e-05, "loss": 2.3486, "step": 2917000 }, { "epoch": 8.44, "learning_rate": 4.577906747869581e-05, "loss": 2.3332, "step": 2917500 }, { "epoch": 8.45, "learning_rate": 4.5778343831048534e-05, "loss": 2.3364, "step": 2918000 }, { "epoch": 8.45, "learning_rate": 4.577762018340126e-05, "loss": 2.3508, "step": 2918500 }, { "epoch": 8.45, "learning_rate": 4.5776896535753985e-05, "loss": 2.3599, "step": 2919000 }, { "epoch": 8.45, "learning_rate": 4.577617433540201e-05, "loss": 2.3508, "step": 2919500 }, { "epoch": 8.45, "learning_rate": 4.577545068775473e-05, "loss": 2.3523, "step": 2920000 }, { "epoch": 8.45, "learning_rate": 4.577472704010745e-05, "loss": 2.3496, "step": 2920500 }, { "epoch": 8.46, "learning_rate": 4.5774003392460174e-05, "loss": 2.3371, "step": 2921000 }, { "epoch": 8.46, "learning_rate": 4.5773279744812896e-05, "loss": 2.3311, "step": 2921500 }, { "epoch": 8.46, "learning_rate": 4.577255609716562e-05, "loss": 2.3434, "step": 2922000 }, { "epoch": 8.46, "learning_rate": 4.577183244951834e-05, "loss": 2.3412, "step": 2922500 }, { "epoch": 8.46, "learning_rate": 4.577110880187106e-05, "loss": 2.328, "step": 2923000 }, { "epoch": 8.46, "learning_rate": 4.5770385154223785e-05, "loss": 2.3311, "step": 2923500 }, { "epoch": 8.46, "learning_rate": 4.576966295387181e-05, "loss": 2.3301, "step": 2924000 }, { "epoch": 8.47, "learning_rate": 4.576893930622453e-05, "loss": 2.3461, "step": 2924500 }, { "epoch": 8.47, "learning_rate": 4.576821565857725e-05, "loss": 2.3246, "step": 2925000 }, { "epoch": 8.47, "learning_rate": 4.5767492010929974e-05, "loss": 2.3341, "step": 2925500 }, { "epoch": 8.47, "learning_rate": 4.5766768363282696e-05, "loss": 2.351, "step": 2926000 }, { "epoch": 8.47, "learning_rate": 4.576604471563542e-05, "loss": 2.3217, "step": 2926500 }, { "epoch": 8.47, "learning_rate": 4.576532251528344e-05, "loss": 2.3577, "step": 2927000 }, { "epoch": 8.47, "learning_rate": 4.576459886763616e-05, "loss": 2.3391, "step": 2927500 }, { "epoch": 8.48, "learning_rate": 4.5763876667284186e-05, "loss": 2.3249, "step": 2928000 }, { "epoch": 8.48, "learning_rate": 4.576315301963691e-05, "loss": 2.3475, "step": 2928500 }, { "epoch": 8.48, "learning_rate": 4.576242937198963e-05, "loss": 2.3351, "step": 2929000 }, { "epoch": 8.48, "learning_rate": 4.576170572434235e-05, "loss": 2.3342, "step": 2929500 }, { "epoch": 8.48, "learning_rate": 4.5760982076695074e-05, "loss": 2.3456, "step": 2930000 }, { "epoch": 8.48, "learning_rate": 4.57602584290478e-05, "loss": 2.3518, "step": 2930500 }, { "epoch": 8.48, "learning_rate": 4.575953478140052e-05, "loss": 2.3452, "step": 2931000 }, { "epoch": 8.49, "learning_rate": 4.575881113375324e-05, "loss": 2.3402, "step": 2931500 }, { "epoch": 8.49, "learning_rate": 4.5758087486105963e-05, "loss": 2.3211, "step": 2932000 }, { "epoch": 8.49, "learning_rate": 4.5757365285753986e-05, "loss": 2.3595, "step": 2932500 }, { "epoch": 8.49, "learning_rate": 4.575664163810671e-05, "loss": 2.3445, "step": 2933000 }, { "epoch": 8.49, "learning_rate": 4.5755919437754724e-05, "loss": 2.3538, "step": 2933500 }, { "epoch": 8.49, "learning_rate": 4.5755195790107446e-05, "loss": 2.3278, "step": 2934000 }, { "epoch": 8.49, "learning_rate": 4.5754472142460175e-05, "loss": 2.3375, "step": 2934500 }, { "epoch": 8.5, "learning_rate": 4.57537484948129e-05, "loss": 2.3459, "step": 2935000 }, { "epoch": 8.5, "learning_rate": 4.575302484716562e-05, "loss": 2.3311, "step": 2935500 }, { "epoch": 8.5, "learning_rate": 4.575230264681364e-05, "loss": 2.3487, "step": 2936000 }, { "epoch": 8.5, "learning_rate": 4.5751578999166364e-05, "loss": 2.3509, "step": 2936500 }, { "epoch": 8.5, "learning_rate": 4.5750855351519086e-05, "loss": 2.3366, "step": 2937000 }, { "epoch": 8.5, "learning_rate": 4.575013170387181e-05, "loss": 2.3392, "step": 2937500 }, { "epoch": 8.5, "learning_rate": 4.574940805622453e-05, "loss": 2.3349, "step": 2938000 }, { "epoch": 8.51, "learning_rate": 4.574868440857725e-05, "loss": 2.3271, "step": 2938500 }, { "epoch": 8.51, "learning_rate": 4.5747960760929975e-05, "loss": 2.3474, "step": 2939000 }, { "epoch": 8.51, "learning_rate": 4.57472371132827e-05, "loss": 2.3459, "step": 2939500 }, { "epoch": 8.51, "learning_rate": 4.574651491293071e-05, "loss": 2.3396, "step": 2940000 }, { "epoch": 8.51, "learning_rate": 4.574579126528344e-05, "loss": 2.3556, "step": 2940500 }, { "epoch": 8.51, "learning_rate": 4.5745067617636164e-05, "loss": 2.3154, "step": 2941000 }, { "epoch": 8.51, "learning_rate": 4.5744343969988886e-05, "loss": 2.3018, "step": 2941500 }, { "epoch": 8.52, "learning_rate": 4.5743620322341615e-05, "loss": 2.3469, "step": 2942000 }, { "epoch": 8.52, "learning_rate": 4.574289667469434e-05, "loss": 2.3574, "step": 2942500 }, { "epoch": 8.52, "learning_rate": 4.574217447434235e-05, "loss": 2.3353, "step": 2943000 }, { "epoch": 8.52, "learning_rate": 4.5741450826695075e-05, "loss": 2.3413, "step": 2943500 }, { "epoch": 8.52, "learning_rate": 4.57407271790478e-05, "loss": 2.3234, "step": 2944000 }, { "epoch": 8.52, "learning_rate": 4.574000353140052e-05, "loss": 2.3299, "step": 2944500 }, { "epoch": 8.52, "learning_rate": 4.573927988375324e-05, "loss": 2.3306, "step": 2945000 }, { "epoch": 8.53, "learning_rate": 4.5738557683401264e-05, "loss": 2.3282, "step": 2945500 }, { "epoch": 8.53, "learning_rate": 4.5737834035753987e-05, "loss": 2.3437, "step": 2946000 }, { "epoch": 8.53, "learning_rate": 4.573711038810671e-05, "loss": 2.3457, "step": 2946500 }, { "epoch": 8.53, "learning_rate": 4.573638674045943e-05, "loss": 2.3321, "step": 2947000 }, { "epoch": 8.53, "learning_rate": 4.573566309281215e-05, "loss": 2.3276, "step": 2947500 }, { "epoch": 8.53, "learning_rate": 4.5734939445164875e-05, "loss": 2.3808, "step": 2948000 }, { "epoch": 8.53, "learning_rate": 4.573421724481289e-05, "loss": 2.3262, "step": 2948500 }, { "epoch": 8.54, "learning_rate": 4.573349359716561e-05, "loss": 2.3371, "step": 2949000 }, { "epoch": 8.54, "learning_rate": 4.573276994951834e-05, "loss": 2.3306, "step": 2949500 }, { "epoch": 8.54, "learning_rate": 4.5732046301871064e-05, "loss": 2.3369, "step": 2950000 }, { "epoch": 8.54, "learning_rate": 4.5731322654223793e-05, "loss": 2.3352, "step": 2950500 }, { "epoch": 8.54, "learning_rate": 4.573060045387181e-05, "loss": 2.3214, "step": 2951000 }, { "epoch": 8.54, "learning_rate": 4.572987680622453e-05, "loss": 2.3374, "step": 2951500 }, { "epoch": 8.54, "learning_rate": 4.5729153158577254e-05, "loss": 2.3424, "step": 2952000 }, { "epoch": 8.55, "learning_rate": 4.5728429510929976e-05, "loss": 2.3416, "step": 2952500 }, { "epoch": 8.55, "learning_rate": 4.57277058632827e-05, "loss": 2.3262, "step": 2953000 }, { "epoch": 8.55, "learning_rate": 4.572698221563542e-05, "loss": 2.3348, "step": 2953500 }, { "epoch": 8.55, "learning_rate": 4.572625856798814e-05, "loss": 2.3342, "step": 2954000 }, { "epoch": 8.55, "learning_rate": 4.5725534920340865e-05, "loss": 2.3409, "step": 2954500 }, { "epoch": 8.55, "learning_rate": 4.572481271998889e-05, "loss": 2.3512, "step": 2955000 }, { "epoch": 8.55, "learning_rate": 4.572408907234161e-05, "loss": 2.34, "step": 2955500 }, { "epoch": 8.56, "learning_rate": 4.572336542469433e-05, "loss": 2.3317, "step": 2956000 }, { "epoch": 8.56, "learning_rate": 4.572264177704706e-05, "loss": 2.3258, "step": 2956500 }, { "epoch": 8.56, "learning_rate": 4.572191812939978e-05, "loss": 2.3574, "step": 2957000 }, { "epoch": 8.56, "learning_rate": 4.5721194481752505e-05, "loss": 2.3397, "step": 2957500 }, { "epoch": 8.56, "learning_rate": 4.572047228140052e-05, "loss": 2.3277, "step": 2958000 }, { "epoch": 8.56, "learning_rate": 4.571975008104854e-05, "loss": 2.347, "step": 2958500 }, { "epoch": 8.57, "learning_rate": 4.5719026433401265e-05, "loss": 2.3614, "step": 2959000 }, { "epoch": 8.57, "learning_rate": 4.571830278575399e-05, "loss": 2.3647, "step": 2959500 }, { "epoch": 8.57, "learning_rate": 4.571757913810671e-05, "loss": 2.3604, "step": 2960000 }, { "epoch": 8.57, "learning_rate": 4.571685549045943e-05, "loss": 2.3338, "step": 2960500 }, { "epoch": 8.57, "learning_rate": 4.5716131842812154e-05, "loss": 2.3338, "step": 2961000 }, { "epoch": 8.57, "learning_rate": 4.5715408195164876e-05, "loss": 2.335, "step": 2961500 }, { "epoch": 8.57, "learning_rate": 4.57146845475176e-05, "loss": 2.3363, "step": 2962000 }, { "epoch": 8.58, "learning_rate": 4.571396234716562e-05, "loss": 2.3441, "step": 2962500 }, { "epoch": 8.58, "learning_rate": 4.571323869951834e-05, "loss": 2.363, "step": 2963000 }, { "epoch": 8.58, "learning_rate": 4.5712515051871065e-05, "loss": 2.3327, "step": 2963500 }, { "epoch": 8.58, "learning_rate": 4.571179140422379e-05, "loss": 2.3503, "step": 2964000 }, { "epoch": 8.58, "learning_rate": 4.5711067756576517e-05, "loss": 2.3252, "step": 2964500 }, { "epoch": 8.58, "learning_rate": 4.571034410892924e-05, "loss": 2.3477, "step": 2965000 }, { "epoch": 8.58, "learning_rate": 4.5709621908577254e-05, "loss": 2.3178, "step": 2965500 }, { "epoch": 8.59, "learning_rate": 4.570889970822527e-05, "loss": 2.3352, "step": 2966000 }, { "epoch": 8.59, "learning_rate": 4.570817606057799e-05, "loss": 2.369, "step": 2966500 }, { "epoch": 8.59, "learning_rate": 4.5707453860226014e-05, "loss": 2.34, "step": 2967000 }, { "epoch": 8.59, "learning_rate": 4.570673021257874e-05, "loss": 2.338, "step": 2967500 }, { "epoch": 8.59, "learning_rate": 4.570600656493146e-05, "loss": 2.3399, "step": 2968000 }, { "epoch": 8.59, "learning_rate": 4.570528291728418e-05, "loss": 2.3486, "step": 2968500 }, { "epoch": 8.59, "learning_rate": 4.57045592696369e-05, "loss": 2.3466, "step": 2969000 }, { "epoch": 8.6, "learning_rate": 4.5703835621989626e-05, "loss": 2.3446, "step": 2969500 }, { "epoch": 8.6, "learning_rate": 4.570311197434235e-05, "loss": 2.3452, "step": 2970000 }, { "epoch": 8.6, "learning_rate": 4.570238832669507e-05, "loss": 2.3471, "step": 2970500 }, { "epoch": 8.6, "learning_rate": 4.570166467904779e-05, "loss": 2.3676, "step": 2971000 }, { "epoch": 8.6, "learning_rate": 4.570094103140052e-05, "loss": 2.3545, "step": 2971500 }, { "epoch": 8.6, "learning_rate": 4.5700217383753244e-05, "loss": 2.3423, "step": 2972000 }, { "epoch": 8.6, "learning_rate": 4.5699495183401266e-05, "loss": 2.3361, "step": 2972500 }, { "epoch": 8.61, "learning_rate": 4.569877153575399e-05, "loss": 2.3344, "step": 2973000 }, { "epoch": 8.61, "learning_rate": 4.569804788810671e-05, "loss": 2.3306, "step": 2973500 }, { "epoch": 8.61, "learning_rate": 4.569732424045943e-05, "loss": 2.3548, "step": 2974000 }, { "epoch": 8.61, "learning_rate": 4.5696600592812155e-05, "loss": 2.3419, "step": 2974500 }, { "epoch": 8.61, "learning_rate": 4.569587694516488e-05, "loss": 2.3102, "step": 2975000 }, { "epoch": 8.61, "learning_rate": 4.56951532975176e-05, "loss": 2.34, "step": 2975500 }, { "epoch": 8.61, "learning_rate": 4.569442964987032e-05, "loss": 2.3492, "step": 2976000 }, { "epoch": 8.62, "learning_rate": 4.5693706002223044e-05, "loss": 2.3369, "step": 2976500 }, { "epoch": 8.62, "learning_rate": 4.5692983801871066e-05, "loss": 2.3321, "step": 2977000 }, { "epoch": 8.62, "learning_rate": 4.569226160151908e-05, "loss": 2.3282, "step": 2977500 }, { "epoch": 8.62, "learning_rate": 4.5691537953871804e-05, "loss": 2.3258, "step": 2978000 }, { "epoch": 8.62, "learning_rate": 4.5690814306224526e-05, "loss": 2.3454, "step": 2978500 }, { "epoch": 8.62, "learning_rate": 4.569009065857725e-05, "loss": 2.3348, "step": 2979000 }, { "epoch": 8.62, "learning_rate": 4.568936701092998e-05, "loss": 2.3422, "step": 2979500 }, { "epoch": 8.63, "learning_rate": 4.568864481057799e-05, "loss": 2.373, "step": 2980000 }, { "epoch": 8.63, "learning_rate": 4.568792116293072e-05, "loss": 2.3475, "step": 2980500 }, { "epoch": 8.63, "learning_rate": 4.5687197515283444e-05, "loss": 2.3185, "step": 2981000 }, { "epoch": 8.63, "learning_rate": 4.5686473867636166e-05, "loss": 2.3517, "step": 2981500 }, { "epoch": 8.63, "learning_rate": 4.568575021998889e-05, "loss": 2.3324, "step": 2982000 }, { "epoch": 8.63, "learning_rate": 4.568502657234161e-05, "loss": 2.3439, "step": 2982500 }, { "epoch": 8.63, "learning_rate": 4.5684304371989626e-05, "loss": 2.3467, "step": 2983000 }, { "epoch": 8.64, "learning_rate": 4.568358072434235e-05, "loss": 2.3207, "step": 2983500 }, { "epoch": 8.64, "learning_rate": 4.568285707669507e-05, "loss": 2.3383, "step": 2984000 }, { "epoch": 8.64, "learning_rate": 4.568213342904779e-05, "loss": 2.3405, "step": 2984500 }, { "epoch": 8.64, "learning_rate": 4.5681411228695815e-05, "loss": 2.3216, "step": 2985000 }, { "epoch": 8.64, "learning_rate": 4.568068758104854e-05, "loss": 2.3484, "step": 2985500 }, { "epoch": 8.64, "learning_rate": 4.567996393340126e-05, "loss": 2.3062, "step": 2986000 }, { "epoch": 8.64, "learning_rate": 4.567924028575398e-05, "loss": 2.3421, "step": 2986500 }, { "epoch": 8.65, "learning_rate": 4.567851663810671e-05, "loss": 2.3532, "step": 2987000 }, { "epoch": 8.65, "learning_rate": 4.567779299045943e-05, "loss": 2.3285, "step": 2987500 }, { "epoch": 8.65, "learning_rate": 4.5677069342812156e-05, "loss": 2.3311, "step": 2988000 }, { "epoch": 8.65, "learning_rate": 4.567634569516488e-05, "loss": 2.3272, "step": 2988500 }, { "epoch": 8.65, "learning_rate": 4.56756220475176e-05, "loss": 2.3261, "step": 2989000 }, { "epoch": 8.65, "learning_rate": 4.567489839987032e-05, "loss": 2.3574, "step": 2989500 }, { "epoch": 8.65, "learning_rate": 4.5674176199518345e-05, "loss": 2.3527, "step": 2990000 }, { "epoch": 8.66, "learning_rate": 4.567345255187107e-05, "loss": 2.3265, "step": 2990500 }, { "epoch": 8.66, "learning_rate": 4.567272890422379e-05, "loss": 2.3338, "step": 2991000 }, { "epoch": 8.66, "learning_rate": 4.567200525657651e-05, "loss": 2.3705, "step": 2991500 }, { "epoch": 8.66, "learning_rate": 4.5671281608929234e-05, "loss": 2.3427, "step": 2992000 }, { "epoch": 8.66, "learning_rate": 4.5670557961281956e-05, "loss": 2.3713, "step": 2992500 }, { "epoch": 8.66, "learning_rate": 4.566983431363468e-05, "loss": 2.3371, "step": 2993000 }, { "epoch": 8.66, "learning_rate": 4.56691106659874e-05, "loss": 2.3455, "step": 2993500 }, { "epoch": 8.67, "learning_rate": 4.566838701834013e-05, "loss": 2.3458, "step": 2994000 }, { "epoch": 8.67, "learning_rate": 4.566766481798815e-05, "loss": 2.3319, "step": 2994500 }, { "epoch": 8.67, "learning_rate": 4.5666941170340874e-05, "loss": 2.3284, "step": 2995000 }, { "epoch": 8.67, "learning_rate": 4.566621896998889e-05, "loss": 2.3564, "step": 2995500 }, { "epoch": 8.67, "learning_rate": 4.566549532234161e-05, "loss": 2.3404, "step": 2996000 }, { "epoch": 8.67, "learning_rate": 4.5664771674694334e-05, "loss": 2.3539, "step": 2996500 }, { "epoch": 8.68, "learning_rate": 4.5664048027047056e-05, "loss": 2.327, "step": 2997000 }, { "epoch": 8.68, "learning_rate": 4.566332437939978e-05, "loss": 2.3317, "step": 2997500 }, { "epoch": 8.68, "learning_rate": 4.56626007317525e-05, "loss": 2.3549, "step": 2998000 }, { "epoch": 8.68, "learning_rate": 4.566187708410522e-05, "loss": 2.347, "step": 2998500 }, { "epoch": 8.68, "learning_rate": 4.566115343645795e-05, "loss": 2.3192, "step": 2999000 }, { "epoch": 8.68, "learning_rate": 4.5660429788810674e-05, "loss": 2.3373, "step": 2999500 }, { "epoch": 8.68, "learning_rate": 4.5659706141163396e-05, "loss": 2.3348, "step": 3000000 }, { "epoch": 8.69, "learning_rate": 4.565898394081141e-05, "loss": 2.3518, "step": 3000500 }, { "epoch": 8.69, "learning_rate": 4.5658260293164134e-05, "loss": 2.3185, "step": 3001000 }, { "epoch": 8.69, "learning_rate": 4.565753664551686e-05, "loss": 2.3406, "step": 3001500 }, { "epoch": 8.69, "learning_rate": 4.5656812997869585e-05, "loss": 2.3426, "step": 3002000 }, { "epoch": 8.69, "learning_rate": 4.565608935022231e-05, "loss": 2.3667, "step": 3002500 }, { "epoch": 8.69, "learning_rate": 4.565536570257503e-05, "loss": 2.3649, "step": 3003000 }, { "epoch": 8.69, "learning_rate": 4.565464350222305e-05, "loss": 2.3469, "step": 3003500 }, { "epoch": 8.7, "learning_rate": 4.5653919854575774e-05, "loss": 2.3449, "step": 3004000 }, { "epoch": 8.7, "learning_rate": 4.5653196206928497e-05, "loss": 2.3311, "step": 3004500 }, { "epoch": 8.7, "learning_rate": 4.565247255928122e-05, "loss": 2.3406, "step": 3005000 }, { "epoch": 8.7, "learning_rate": 4.565174891163394e-05, "loss": 2.3478, "step": 3005500 }, { "epoch": 8.7, "learning_rate": 4.5651026711281957e-05, "loss": 2.3479, "step": 3006000 }, { "epoch": 8.7, "learning_rate": 4.565030306363468e-05, "loss": 2.322, "step": 3006500 }, { "epoch": 8.7, "learning_rate": 4.56495794159874e-05, "loss": 2.3388, "step": 3007000 }, { "epoch": 8.71, "learning_rate": 4.5648857215635423e-05, "loss": 2.3523, "step": 3007500 }, { "epoch": 8.71, "learning_rate": 4.5648133567988146e-05, "loss": 2.3172, "step": 3008000 }, { "epoch": 8.71, "learning_rate": 4.564740992034087e-05, "loss": 2.3397, "step": 3008500 }, { "epoch": 8.71, "learning_rate": 4.56466862726936e-05, "loss": 2.3392, "step": 3009000 }, { "epoch": 8.71, "learning_rate": 4.564596262504632e-05, "loss": 2.3363, "step": 3009500 }, { "epoch": 8.71, "learning_rate": 4.564523897739904e-05, "loss": 2.3349, "step": 3010000 }, { "epoch": 8.71, "learning_rate": 4.5644515329751764e-05, "loss": 2.3183, "step": 3010500 }, { "epoch": 8.72, "learning_rate": 4.564379312939978e-05, "loss": 2.3577, "step": 3011000 }, { "epoch": 8.72, "learning_rate": 4.56430694817525e-05, "loss": 2.3549, "step": 3011500 }, { "epoch": 8.72, "learning_rate": 4.5642345834105224e-05, "loss": 2.3266, "step": 3012000 }, { "epoch": 8.72, "learning_rate": 4.564162218645795e-05, "loss": 2.3535, "step": 3012500 }, { "epoch": 8.72, "learning_rate": 4.5640898538810675e-05, "loss": 2.3437, "step": 3013000 }, { "epoch": 8.72, "learning_rate": 4.56401748911634e-05, "loss": 2.3423, "step": 3013500 }, { "epoch": 8.72, "learning_rate": 4.563945124351612e-05, "loss": 2.3264, "step": 3014000 }, { "epoch": 8.73, "learning_rate": 4.5638729043164135e-05, "loss": 2.3528, "step": 3014500 }, { "epoch": 8.73, "learning_rate": 4.563800539551686e-05, "loss": 2.3382, "step": 3015000 }, { "epoch": 8.73, "learning_rate": 4.563728174786958e-05, "loss": 2.3551, "step": 3015500 }, { "epoch": 8.73, "learning_rate": 4.56365581002223e-05, "loss": 2.3518, "step": 3016000 }, { "epoch": 8.73, "learning_rate": 4.563583445257503e-05, "loss": 2.3348, "step": 3016500 }, { "epoch": 8.73, "learning_rate": 4.563511080492775e-05, "loss": 2.3434, "step": 3017000 }, { "epoch": 8.73, "learning_rate": 4.5634387157280475e-05, "loss": 2.3586, "step": 3017500 }, { "epoch": 8.74, "learning_rate": 4.5633663509633204e-05, "loss": 2.3568, "step": 3018000 }, { "epoch": 8.74, "learning_rate": 4.563294130928122e-05, "loss": 2.3419, "step": 3018500 }, { "epoch": 8.74, "learning_rate": 4.563221766163394e-05, "loss": 2.3511, "step": 3019000 }, { "epoch": 8.74, "learning_rate": 4.5631494013986664e-05, "loss": 2.3184, "step": 3019500 }, { "epoch": 8.74, "learning_rate": 4.5630770366339386e-05, "loss": 2.3335, "step": 3020000 }, { "epoch": 8.74, "learning_rate": 4.563004671869211e-05, "loss": 2.3468, "step": 3020500 }, { "epoch": 8.74, "learning_rate": 4.562932307104483e-05, "loss": 2.335, "step": 3021000 }, { "epoch": 8.75, "learning_rate": 4.562860087069285e-05, "loss": 2.3166, "step": 3021500 }, { "epoch": 8.75, "learning_rate": 4.5627877223045575e-05, "loss": 2.3174, "step": 3022000 }, { "epoch": 8.75, "learning_rate": 4.56271535753983e-05, "loss": 2.3321, "step": 3022500 }, { "epoch": 8.75, "learning_rate": 4.562642992775102e-05, "loss": 2.3282, "step": 3023000 }, { "epoch": 8.75, "learning_rate": 4.562570628010374e-05, "loss": 2.3305, "step": 3023500 }, { "epoch": 8.75, "learning_rate": 4.562498263245647e-05, "loss": 2.3487, "step": 3024000 }, { "epoch": 8.75, "learning_rate": 4.562425898480919e-05, "loss": 2.3286, "step": 3024500 }, { "epoch": 8.76, "learning_rate": 4.562353678445721e-05, "loss": 2.3107, "step": 3025000 }, { "epoch": 8.76, "learning_rate": 4.562281313680993e-05, "loss": 2.3444, "step": 3025500 }, { "epoch": 8.76, "learning_rate": 4.562208948916265e-05, "loss": 2.316, "step": 3026000 }, { "epoch": 8.76, "learning_rate": 4.5621365841515376e-05, "loss": 2.3497, "step": 3026500 }, { "epoch": 8.76, "learning_rate": 4.5620642193868105e-05, "loss": 2.3321, "step": 3027000 }, { "epoch": 8.76, "learning_rate": 4.561991854622083e-05, "loss": 2.3445, "step": 3027500 }, { "epoch": 8.76, "learning_rate": 4.561919489857355e-05, "loss": 2.3382, "step": 3028000 }, { "epoch": 8.77, "learning_rate": 4.561847125092627e-05, "loss": 2.3469, "step": 3028500 }, { "epoch": 8.77, "learning_rate": 4.561775049786958e-05, "loss": 2.3304, "step": 3029000 }, { "epoch": 8.77, "learning_rate": 4.56170268502223e-05, "loss": 2.3495, "step": 3029500 }, { "epoch": 8.77, "learning_rate": 4.561630320257503e-05, "loss": 2.3233, "step": 3030000 }, { "epoch": 8.77, "learning_rate": 4.5615579554927754e-05, "loss": 2.3501, "step": 3030500 }, { "epoch": 8.77, "learning_rate": 4.5614855907280476e-05, "loss": 2.3422, "step": 3031000 }, { "epoch": 8.77, "learning_rate": 4.5614132259633205e-05, "loss": 2.3293, "step": 3031500 }, { "epoch": 8.78, "learning_rate": 4.561340861198593e-05, "loss": 2.3335, "step": 3032000 }, { "epoch": 8.78, "learning_rate": 4.561268496433865e-05, "loss": 2.336, "step": 3032500 }, { "epoch": 8.78, "learning_rate": 4.561196131669137e-05, "loss": 2.3304, "step": 3033000 }, { "epoch": 8.78, "learning_rate": 4.5611237669044094e-05, "loss": 2.3331, "step": 3033500 }, { "epoch": 8.78, "learning_rate": 4.561051546869211e-05, "loss": 2.352, "step": 3034000 }, { "epoch": 8.78, "learning_rate": 4.560979182104483e-05, "loss": 2.3391, "step": 3034500 }, { "epoch": 8.79, "learning_rate": 4.5609068173397554e-05, "loss": 2.3045, "step": 3035000 }, { "epoch": 8.79, "learning_rate": 4.560834452575028e-05, "loss": 2.333, "step": 3035500 }, { "epoch": 8.79, "learning_rate": 4.56076223253983e-05, "loss": 2.324, "step": 3036000 }, { "epoch": 8.79, "learning_rate": 4.560689867775102e-05, "loss": 2.3379, "step": 3036500 }, { "epoch": 8.79, "learning_rate": 4.560617503010374e-05, "loss": 2.3479, "step": 3037000 }, { "epoch": 8.79, "learning_rate": 4.5605451382456465e-05, "loss": 2.3426, "step": 3037500 }, { "epoch": 8.79, "learning_rate": 4.560472773480919e-05, "loss": 2.3231, "step": 3038000 }, { "epoch": 8.8, "learning_rate": 4.5604004087161916e-05, "loss": 2.3649, "step": 3038500 }, { "epoch": 8.8, "learning_rate": 4.560328188680993e-05, "loss": 2.3531, "step": 3039000 }, { "epoch": 8.8, "learning_rate": 4.5602558239162654e-05, "loss": 2.3452, "step": 3039500 }, { "epoch": 8.8, "learning_rate": 4.560183459151538e-05, "loss": 2.3391, "step": 3040000 }, { "epoch": 8.8, "learning_rate": 4.5601110943868105e-05, "loss": 2.3251, "step": 3040500 }, { "epoch": 8.8, "learning_rate": 4.560038874351612e-05, "loss": 2.3499, "step": 3041000 }, { "epoch": 8.8, "learning_rate": 4.559966509586884e-05, "loss": 2.3437, "step": 3041500 }, { "epoch": 8.81, "learning_rate": 4.559894289551686e-05, "loss": 2.3552, "step": 3042000 }, { "epoch": 8.81, "learning_rate": 4.559821924786958e-05, "loss": 2.348, "step": 3042500 }, { "epoch": 8.81, "learning_rate": 4.55974956002223e-05, "loss": 2.3375, "step": 3043000 }, { "epoch": 8.81, "learning_rate": 4.559677195257503e-05, "loss": 2.3441, "step": 3043500 }, { "epoch": 8.81, "learning_rate": 4.5596048304927754e-05, "loss": 2.3357, "step": 3044000 }, { "epoch": 8.81, "learning_rate": 4.559532465728048e-05, "loss": 2.3306, "step": 3044500 }, { "epoch": 8.81, "learning_rate": 4.559460245692849e-05, "loss": 2.3435, "step": 3045000 }, { "epoch": 8.82, "learning_rate": 4.5593878809281214e-05, "loss": 2.3273, "step": 3045500 }, { "epoch": 8.82, "learning_rate": 4.559315516163394e-05, "loss": 2.3489, "step": 3046000 }, { "epoch": 8.82, "learning_rate": 4.559243296128196e-05, "loss": 2.3352, "step": 3046500 }, { "epoch": 8.82, "learning_rate": 4.559170931363468e-05, "loss": 2.3499, "step": 3047000 }, { "epoch": 8.82, "learning_rate": 4.5590985665987403e-05, "loss": 2.3369, "step": 3047500 }, { "epoch": 8.82, "learning_rate": 4.5590263465635426e-05, "loss": 2.3442, "step": 3048000 }, { "epoch": 8.82, "learning_rate": 4.558953981798815e-05, "loss": 2.3258, "step": 3048500 }, { "epoch": 8.83, "learning_rate": 4.558881617034087e-05, "loss": 2.3411, "step": 3049000 }, { "epoch": 8.83, "learning_rate": 4.558809252269359e-05, "loss": 2.354, "step": 3049500 }, { "epoch": 8.83, "learning_rate": 4.5587368875046315e-05, "loss": 2.3305, "step": 3050000 }, { "epoch": 8.83, "learning_rate": 4.558664522739904e-05, "loss": 2.3442, "step": 3050500 }, { "epoch": 8.83, "learning_rate": 4.558592157975176e-05, "loss": 2.3281, "step": 3051000 }, { "epoch": 8.83, "learning_rate": 4.558519793210448e-05, "loss": 2.324, "step": 3051500 }, { "epoch": 8.83, "learning_rate": 4.5584474284457204e-05, "loss": 2.325, "step": 3052000 }, { "epoch": 8.84, "learning_rate": 4.558375063680993e-05, "loss": 2.3336, "step": 3052500 }, { "epoch": 8.84, "learning_rate": 4.5583026989162655e-05, "loss": 2.3677, "step": 3053000 }, { "epoch": 8.84, "learning_rate": 4.558230334151538e-05, "loss": 2.3192, "step": 3053500 }, { "epoch": 8.84, "learning_rate": 4.5581579693868106e-05, "loss": 2.3423, "step": 3054000 }, { "epoch": 8.84, "learning_rate": 4.558085604622083e-05, "loss": 2.3501, "step": 3054500 }, { "epoch": 8.84, "learning_rate": 4.558013239857355e-05, "loss": 2.325, "step": 3055000 }, { "epoch": 8.84, "learning_rate": 4.557940875092627e-05, "loss": 2.354, "step": 3055500 }, { "epoch": 8.85, "learning_rate": 4.557868799786958e-05, "loss": 2.3486, "step": 3056000 }, { "epoch": 8.85, "learning_rate": 4.557796435022231e-05, "loss": 2.3472, "step": 3056500 }, { "epoch": 8.85, "learning_rate": 4.557724070257503e-05, "loss": 2.3301, "step": 3057000 }, { "epoch": 8.85, "learning_rate": 4.5576517054927755e-05, "loss": 2.3546, "step": 3057500 }, { "epoch": 8.85, "learning_rate": 4.557579340728048e-05, "loss": 2.3476, "step": 3058000 }, { "epoch": 8.85, "learning_rate": 4.55750697596332e-05, "loss": 2.3549, "step": 3058500 }, { "epoch": 8.85, "learning_rate": 4.557434611198592e-05, "loss": 2.3248, "step": 3059000 }, { "epoch": 8.86, "learning_rate": 4.5573622464338644e-05, "loss": 2.3293, "step": 3059500 }, { "epoch": 8.86, "learning_rate": 4.5572898816691366e-05, "loss": 2.3373, "step": 3060000 }, { "epoch": 8.86, "learning_rate": 4.557217661633938e-05, "loss": 2.3444, "step": 3060500 }, { "epoch": 8.86, "learning_rate": 4.557145296869211e-05, "loss": 2.3362, "step": 3061000 }, { "epoch": 8.86, "learning_rate": 4.557072932104483e-05, "loss": 2.3682, "step": 3061500 }, { "epoch": 8.86, "learning_rate": 4.5570007120692855e-05, "loss": 2.3107, "step": 3062000 }, { "epoch": 8.86, "learning_rate": 4.556928492034087e-05, "loss": 2.3313, "step": 3062500 }, { "epoch": 8.87, "learning_rate": 4.556856127269359e-05, "loss": 2.3233, "step": 3063000 }, { "epoch": 8.87, "learning_rate": 4.5567837625046316e-05, "loss": 2.3137, "step": 3063500 }, { "epoch": 8.87, "learning_rate": 4.556711397739904e-05, "loss": 2.3336, "step": 3064000 }, { "epoch": 8.87, "learning_rate": 4.556639032975176e-05, "loss": 2.364, "step": 3064500 }, { "epoch": 8.87, "learning_rate": 4.556566668210448e-05, "loss": 2.3321, "step": 3065000 }, { "epoch": 8.87, "learning_rate": 4.556494303445721e-05, "loss": 2.3604, "step": 3065500 }, { "epoch": 8.87, "learning_rate": 4.5564219386809933e-05, "loss": 2.3225, "step": 3066000 }, { "epoch": 8.88, "learning_rate": 4.5563495739162656e-05, "loss": 2.3473, "step": 3066500 }, { "epoch": 8.88, "learning_rate": 4.556277209151538e-05, "loss": 2.3421, "step": 3067000 }, { "epoch": 8.88, "learning_rate": 4.55620484438681e-05, "loss": 2.3293, "step": 3067500 }, { "epoch": 8.88, "learning_rate": 4.556132479622082e-05, "loss": 2.3163, "step": 3068000 }, { "epoch": 8.88, "learning_rate": 4.556060114857355e-05, "loss": 2.3385, "step": 3068500 }, { "epoch": 8.88, "learning_rate": 4.555987894822157e-05, "loss": 2.353, "step": 3069000 }, { "epoch": 8.88, "learning_rate": 4.555915530057429e-05, "loss": 2.3116, "step": 3069500 }, { "epoch": 8.89, "learning_rate": 4.555843165292701e-05, "loss": 2.3098, "step": 3070000 }, { "epoch": 8.89, "learning_rate": 4.5557708005279734e-05, "loss": 2.3387, "step": 3070500 }, { "epoch": 8.89, "learning_rate": 4.555698435763246e-05, "loss": 2.3438, "step": 3071000 }, { "epoch": 8.89, "learning_rate": 4.5556260709985185e-05, "loss": 2.353, "step": 3071500 }, { "epoch": 8.89, "learning_rate": 4.5555539956928494e-05, "loss": 2.3587, "step": 3072000 }, { "epoch": 8.89, "learning_rate": 4.555481775657651e-05, "loss": 2.3371, "step": 3072500 }, { "epoch": 8.9, "learning_rate": 4.555409410892924e-05, "loss": 2.3098, "step": 3073000 }, { "epoch": 8.9, "learning_rate": 4.555337046128196e-05, "loss": 2.33, "step": 3073500 }, { "epoch": 8.9, "learning_rate": 4.555264681363468e-05, "loss": 2.3505, "step": 3074000 }, { "epoch": 8.9, "learning_rate": 4.5551923165987405e-05, "loss": 2.3379, "step": 3074500 }, { "epoch": 8.9, "learning_rate": 4.555120096563542e-05, "loss": 2.3775, "step": 3075000 }, { "epoch": 8.9, "learning_rate": 4.555047731798814e-05, "loss": 2.3421, "step": 3075500 }, { "epoch": 8.9, "learning_rate": 4.5549753670340865e-05, "loss": 2.3344, "step": 3076000 }, { "epoch": 8.91, "learning_rate": 4.5549030022693594e-05, "loss": 2.3399, "step": 3076500 }, { "epoch": 8.91, "learning_rate": 4.5548306375046316e-05, "loss": 2.3491, "step": 3077000 }, { "epoch": 8.91, "learning_rate": 4.554758272739904e-05, "loss": 2.3623, "step": 3077500 }, { "epoch": 8.91, "learning_rate": 4.554685907975176e-05, "loss": 2.3463, "step": 3078000 }, { "epoch": 8.91, "learning_rate": 4.554613543210449e-05, "loss": 2.3548, "step": 3078500 }, { "epoch": 8.91, "learning_rate": 4.554541178445721e-05, "loss": 2.3435, "step": 3079000 }, { "epoch": 8.91, "learning_rate": 4.554468958410523e-05, "loss": 2.3365, "step": 3079500 }, { "epoch": 8.92, "learning_rate": 4.554396593645795e-05, "loss": 2.3368, "step": 3080000 }, { "epoch": 8.92, "learning_rate": 4.554324228881067e-05, "loss": 2.323, "step": 3080500 }, { "epoch": 8.92, "learning_rate": 4.5542518641163394e-05, "loss": 2.3603, "step": 3081000 }, { "epoch": 8.92, "learning_rate": 4.5541794993516117e-05, "loss": 2.3366, "step": 3081500 }, { "epoch": 8.92, "learning_rate": 4.554107134586884e-05, "loss": 2.3476, "step": 3082000 }, { "epoch": 8.92, "learning_rate": 4.554034769822156e-05, "loss": 2.3432, "step": 3082500 }, { "epoch": 8.92, "learning_rate": 4.553962405057428e-05, "loss": 2.3468, "step": 3083000 }, { "epoch": 8.93, "learning_rate": 4.5538901850222306e-05, "loss": 2.3416, "step": 3083500 }, { "epoch": 8.93, "learning_rate": 4.5538178202575035e-05, "loss": 2.3346, "step": 3084000 }, { "epoch": 8.93, "learning_rate": 4.553745600222305e-05, "loss": 2.3343, "step": 3084500 }, { "epoch": 8.93, "learning_rate": 4.553673235457577e-05, "loss": 2.3469, "step": 3085000 }, { "epoch": 8.93, "learning_rate": 4.5536008706928495e-05, "loss": 2.336, "step": 3085500 }, { "epoch": 8.93, "learning_rate": 4.553528505928122e-05, "loss": 2.3553, "step": 3086000 }, { "epoch": 8.93, "learning_rate": 4.553456141163394e-05, "loss": 2.3435, "step": 3086500 }, { "epoch": 8.94, "learning_rate": 4.553383776398666e-05, "loss": 2.3799, "step": 3087000 }, { "epoch": 8.94, "learning_rate": 4.5533115563634684e-05, "loss": 2.3359, "step": 3087500 }, { "epoch": 8.94, "learning_rate": 4.55323933632827e-05, "loss": 2.3376, "step": 3088000 }, { "epoch": 8.94, "learning_rate": 4.553166971563542e-05, "loss": 2.3501, "step": 3088500 }, { "epoch": 8.94, "learning_rate": 4.5530946067988144e-05, "loss": 2.3458, "step": 3089000 }, { "epoch": 8.94, "learning_rate": 4.5530222420340866e-05, "loss": 2.3523, "step": 3089500 }, { "epoch": 8.94, "learning_rate": 4.552949877269359e-05, "loss": 2.3302, "step": 3090000 }, { "epoch": 8.95, "learning_rate": 4.552877512504631e-05, "loss": 2.3375, "step": 3090500 }, { "epoch": 8.95, "learning_rate": 4.552805147739904e-05, "loss": 2.3488, "step": 3091000 }, { "epoch": 8.95, "learning_rate": 4.552732782975176e-05, "loss": 2.344, "step": 3091500 }, { "epoch": 8.95, "learning_rate": 4.552660418210449e-05, "loss": 2.3455, "step": 3092000 }, { "epoch": 8.95, "learning_rate": 4.552588053445721e-05, "loss": 2.3389, "step": 3092500 }, { "epoch": 8.95, "learning_rate": 4.5525156886809935e-05, "loss": 2.353, "step": 3093000 }, { "epoch": 8.95, "learning_rate": 4.552443323916266e-05, "loss": 2.3455, "step": 3093500 }, { "epoch": 8.96, "learning_rate": 4.552371103881067e-05, "loss": 2.3485, "step": 3094000 }, { "epoch": 8.96, "learning_rate": 4.5522987391163395e-05, "loss": 2.3484, "step": 3094500 }, { "epoch": 8.96, "learning_rate": 4.552226374351612e-05, "loss": 2.3375, "step": 3095000 }, { "epoch": 8.96, "learning_rate": 4.552154009586884e-05, "loss": 2.3508, "step": 3095500 }, { "epoch": 8.96, "learning_rate": 4.552081789551686e-05, "loss": 2.3629, "step": 3096000 }, { "epoch": 8.96, "learning_rate": 4.5520094247869584e-05, "loss": 2.335, "step": 3096500 }, { "epoch": 8.96, "learning_rate": 4.5519370600222306e-05, "loss": 2.3455, "step": 3097000 }, { "epoch": 8.97, "learning_rate": 4.551864695257503e-05, "loss": 2.3369, "step": 3097500 }, { "epoch": 8.97, "learning_rate": 4.551792330492775e-05, "loss": 2.3592, "step": 3098000 }, { "epoch": 8.97, "learning_rate": 4.551719965728047e-05, "loss": 2.3394, "step": 3098500 }, { "epoch": 8.97, "learning_rate": 4.55164760096332e-05, "loss": 2.3569, "step": 3099000 }, { "epoch": 8.97, "learning_rate": 4.5515752361985924e-05, "loss": 2.3231, "step": 3099500 }, { "epoch": 8.97, "learning_rate": 4.5515028714338647e-05, "loss": 2.3389, "step": 3100000 }, { "epoch": 8.97, "learning_rate": 4.551430506669137e-05, "loss": 2.32, "step": 3100500 }, { "epoch": 8.98, "learning_rate": 4.551358286633939e-05, "loss": 2.341, "step": 3101000 }, { "epoch": 8.98, "learning_rate": 4.551285921869211e-05, "loss": 2.3236, "step": 3101500 }, { "epoch": 8.98, "learning_rate": 4.5512135571044836e-05, "loss": 2.3382, "step": 3102000 }, { "epoch": 8.98, "learning_rate": 4.551141337069285e-05, "loss": 2.3428, "step": 3102500 }, { "epoch": 8.98, "learning_rate": 4.551068972304557e-05, "loss": 2.3365, "step": 3103000 }, { "epoch": 8.98, "learning_rate": 4.5509966075398296e-05, "loss": 2.3366, "step": 3103500 }, { "epoch": 8.98, "learning_rate": 4.550924242775102e-05, "loss": 2.3411, "step": 3104000 }, { "epoch": 8.99, "learning_rate": 4.550851878010374e-05, "loss": 2.3268, "step": 3104500 }, { "epoch": 8.99, "learning_rate": 4.550779513245646e-05, "loss": 2.3292, "step": 3105000 }, { "epoch": 8.99, "learning_rate": 4.550707148480919e-05, "loss": 2.3428, "step": 3105500 }, { "epoch": 8.99, "learning_rate": 4.550634928445721e-05, "loss": 2.3362, "step": 3106000 }, { "epoch": 8.99, "learning_rate": 4.5505625636809936e-05, "loss": 2.3363, "step": 3106500 }, { "epoch": 8.99, "learning_rate": 4.550490198916266e-05, "loss": 2.324, "step": 3107000 }, { "epoch": 8.99, "learning_rate": 4.550417834151538e-05, "loss": 2.3453, "step": 3107500 }, { "epoch": 9.0, "learning_rate": 4.5503456141163396e-05, "loss": 2.3465, "step": 3108000 }, { "epoch": 9.0, "learning_rate": 4.550273249351612e-05, "loss": 2.3315, "step": 3108500 }, { "epoch": 9.0, "learning_rate": 4.550200884586884e-05, "loss": 2.3531, "step": 3109000 }, { "epoch": 9.0, "eval_accuracy": 0.6471148299204104, "eval_accuracy_mlm": 0.6092049587115826, "eval_accuracy_nsp": 0.8504489144931322, "eval_loss": 2.321585178375244, "eval_runtime": 330.7989, "eval_samples_per_second": 1319.188, "eval_steps_per_second": 54.967, "step": 3109248 }, { "epoch": 9.0, "learning_rate": 4.550128519822157e-05, "loss": 2.3074, "step": 3109500 }, { "epoch": 9.0, "learning_rate": 4.550056155057429e-05, "loss": 2.3264, "step": 3110000 }, { "epoch": 9.0, "learning_rate": 4.5499837902927014e-05, "loss": 2.3111, "step": 3110500 }, { "epoch": 9.01, "learning_rate": 4.5499114255279736e-05, "loss": 2.3162, "step": 3111000 }, { "epoch": 9.01, "learning_rate": 4.549839060763246e-05, "loss": 2.3384, "step": 3111500 }, { "epoch": 9.01, "learning_rate": 4.549766985457577e-05, "loss": 2.3095, "step": 3112000 }, { "epoch": 9.01, "learning_rate": 4.549694765422379e-05, "loss": 2.3044, "step": 3112500 }, { "epoch": 9.01, "learning_rate": 4.549622400657651e-05, "loss": 2.3092, "step": 3113000 }, { "epoch": 9.01, "learning_rate": 4.5495500358929234e-05, "loss": 2.3297, "step": 3113500 }, { "epoch": 9.01, "learning_rate": 4.549477671128196e-05, "loss": 2.31, "step": 3114000 }, { "epoch": 9.02, "learning_rate": 4.549405451092998e-05, "loss": 2.3126, "step": 3114500 }, { "epoch": 9.02, "learning_rate": 4.54933308632827e-05, "loss": 2.3192, "step": 3115000 }, { "epoch": 9.02, "learning_rate": 4.549260721563542e-05, "loss": 2.3338, "step": 3115500 }, { "epoch": 9.02, "learning_rate": 4.5491883567988145e-05, "loss": 2.332, "step": 3116000 }, { "epoch": 9.02, "learning_rate": 4.549115992034087e-05, "loss": 2.3382, "step": 3116500 }, { "epoch": 9.02, "learning_rate": 4.549043627269359e-05, "loss": 2.3242, "step": 3117000 }, { "epoch": 9.02, "learning_rate": 4.548971262504632e-05, "loss": 2.3122, "step": 3117500 }, { "epoch": 9.03, "learning_rate": 4.548898897739904e-05, "loss": 2.3076, "step": 3118000 }, { "epoch": 9.03, "learning_rate": 4.548826532975176e-05, "loss": 2.3187, "step": 3118500 }, { "epoch": 9.03, "learning_rate": 4.5487541682104485e-05, "loss": 2.3224, "step": 3119000 }, { "epoch": 9.03, "learning_rate": 4.548681803445721e-05, "loss": 2.3234, "step": 3119500 }, { "epoch": 9.03, "learning_rate": 4.548609438680993e-05, "loss": 2.3355, "step": 3120000 }, { "epoch": 9.03, "learning_rate": 4.548537073916265e-05, "loss": 2.3152, "step": 3120500 }, { "epoch": 9.03, "learning_rate": 4.5484647091515374e-05, "loss": 2.3239, "step": 3121000 }, { "epoch": 9.04, "learning_rate": 4.54839234438681e-05, "loss": 2.3238, "step": 3121500 }, { "epoch": 9.04, "learning_rate": 4.5483199796220826e-05, "loss": 2.3353, "step": 3122000 }, { "epoch": 9.04, "learning_rate": 4.548247614857355e-05, "loss": 2.2975, "step": 3122500 }, { "epoch": 9.04, "learning_rate": 4.548175394822157e-05, "loss": 2.3144, "step": 3123000 }, { "epoch": 9.04, "learning_rate": 4.548103030057429e-05, "loss": 2.3342, "step": 3123500 }, { "epoch": 9.04, "learning_rate": 4.5480306652927015e-05, "loss": 2.3554, "step": 3124000 }, { "epoch": 9.04, "learning_rate": 4.547958300527974e-05, "loss": 2.3197, "step": 3124500 }, { "epoch": 9.05, "learning_rate": 4.547885935763246e-05, "loss": 2.3259, "step": 3125000 }, { "epoch": 9.05, "learning_rate": 4.547813570998518e-05, "loss": 2.3229, "step": 3125500 }, { "epoch": 9.05, "learning_rate": 4.547741495692849e-05, "loss": 2.3403, "step": 3126000 }, { "epoch": 9.05, "learning_rate": 4.547669130928122e-05, "loss": 2.3158, "step": 3126500 }, { "epoch": 9.05, "learning_rate": 4.547596766163394e-05, "loss": 2.3352, "step": 3127000 }, { "epoch": 9.05, "learning_rate": 4.5475244013986664e-05, "loss": 2.3134, "step": 3127500 }, { "epoch": 9.05, "learning_rate": 4.5474520366339386e-05, "loss": 2.323, "step": 3128000 }, { "epoch": 9.06, "learning_rate": 4.547379671869211e-05, "loss": 2.3045, "step": 3128500 }, { "epoch": 9.06, "learning_rate": 4.547307307104484e-05, "loss": 2.3518, "step": 3129000 }, { "epoch": 9.06, "learning_rate": 4.547234942339756e-05, "loss": 2.34, "step": 3129500 }, { "epoch": 9.06, "learning_rate": 4.547162577575028e-05, "loss": 2.3034, "step": 3130000 }, { "epoch": 9.06, "learning_rate": 4.54709035753983e-05, "loss": 2.3032, "step": 3130500 }, { "epoch": 9.06, "learning_rate": 4.547017992775102e-05, "loss": 2.3127, "step": 3131000 }, { "epoch": 9.06, "learning_rate": 4.546945628010374e-05, "loss": 2.3171, "step": 3131500 }, { "epoch": 9.07, "learning_rate": 4.5468734079751764e-05, "loss": 2.3058, "step": 3132000 }, { "epoch": 9.07, "learning_rate": 4.5468010432104486e-05, "loss": 2.3094, "step": 3132500 }, { "epoch": 9.07, "learning_rate": 4.546728678445721e-05, "loss": 2.3274, "step": 3133000 }, { "epoch": 9.07, "learning_rate": 4.546656313680993e-05, "loss": 2.3076, "step": 3133500 }, { "epoch": 9.07, "learning_rate": 4.5465840936457946e-05, "loss": 2.3085, "step": 3134000 }, { "epoch": 9.07, "learning_rate": 4.546511728881067e-05, "loss": 2.3319, "step": 3134500 }, { "epoch": 9.07, "learning_rate": 4.54643936411634e-05, "loss": 2.3283, "step": 3135000 }, { "epoch": 9.08, "learning_rate": 4.546366999351612e-05, "loss": 2.3164, "step": 3135500 }, { "epoch": 9.08, "learning_rate": 4.546294634586884e-05, "loss": 2.3172, "step": 3136000 }, { "epoch": 9.08, "learning_rate": 4.546222269822157e-05, "loss": 2.3259, "step": 3136500 }, { "epoch": 9.08, "learning_rate": 4.546149905057429e-05, "loss": 2.3406, "step": 3137000 }, { "epoch": 9.08, "learning_rate": 4.5460775402927015e-05, "loss": 2.2911, "step": 3137500 }, { "epoch": 9.08, "learning_rate": 4.546005175527974e-05, "loss": 2.34, "step": 3138000 }, { "epoch": 9.08, "learning_rate": 4.545932955492775e-05, "loss": 2.2965, "step": 3138500 }, { "epoch": 9.09, "learning_rate": 4.5458605907280475e-05, "loss": 2.3136, "step": 3139000 }, { "epoch": 9.09, "learning_rate": 4.54578822596332e-05, "loss": 2.3031, "step": 3139500 }, { "epoch": 9.09, "learning_rate": 4.545716005928122e-05, "loss": 2.3059, "step": 3140000 }, { "epoch": 9.09, "learning_rate": 4.545643641163394e-05, "loss": 2.328, "step": 3140500 }, { "epoch": 9.09, "learning_rate": 4.5455712763986664e-05, "loss": 2.3073, "step": 3141000 }, { "epoch": 9.09, "learning_rate": 4.545498911633939e-05, "loss": 2.3333, "step": 3141500 }, { "epoch": 9.09, "learning_rate": 4.545426546869211e-05, "loss": 2.3364, "step": 3142000 }, { "epoch": 9.1, "learning_rate": 4.5453543268340124e-05, "loss": 2.3182, "step": 3142500 }, { "epoch": 9.1, "learning_rate": 4.545281962069285e-05, "loss": 2.3167, "step": 3143000 }, { "epoch": 9.1, "learning_rate": 4.545209597304557e-05, "loss": 2.3387, "step": 3143500 }, { "epoch": 9.1, "learning_rate": 4.54513723253983e-05, "loss": 2.3085, "step": 3144000 }, { "epoch": 9.1, "learning_rate": 4.545064867775102e-05, "loss": 2.3421, "step": 3144500 }, { "epoch": 9.1, "learning_rate": 4.544992503010375e-05, "loss": 2.322, "step": 3145000 }, { "epoch": 9.1, "learning_rate": 4.544920138245647e-05, "loss": 2.3241, "step": 3145500 }, { "epoch": 9.11, "learning_rate": 4.5448477734809194e-05, "loss": 2.3218, "step": 3146000 }, { "epoch": 9.11, "learning_rate": 4.5447754087161916e-05, "loss": 2.3209, "step": 3146500 }, { "epoch": 9.11, "learning_rate": 4.5447033334105225e-05, "loss": 2.3214, "step": 3147000 }, { "epoch": 9.11, "learning_rate": 4.544631113375325e-05, "loss": 2.3297, "step": 3147500 }, { "epoch": 9.11, "learning_rate": 4.544558748610597e-05, "loss": 2.3312, "step": 3148000 }, { "epoch": 9.11, "learning_rate": 4.544486383845869e-05, "loss": 2.3224, "step": 3148500 }, { "epoch": 9.12, "learning_rate": 4.5444140190811414e-05, "loss": 2.3232, "step": 3149000 }, { "epoch": 9.12, "learning_rate": 4.5443416543164136e-05, "loss": 2.3052, "step": 3149500 }, { "epoch": 9.12, "learning_rate": 4.544269289551686e-05, "loss": 2.3185, "step": 3150000 }, { "epoch": 9.12, "learning_rate": 4.544196924786958e-05, "loss": 2.315, "step": 3150500 }, { "epoch": 9.12, "learning_rate": 4.54412456002223e-05, "loss": 2.342, "step": 3151000 }, { "epoch": 9.12, "learning_rate": 4.544052484716562e-05, "loss": 2.322, "step": 3151500 }, { "epoch": 9.12, "learning_rate": 4.543980119951835e-05, "loss": 2.3044, "step": 3152000 }, { "epoch": 9.13, "learning_rate": 4.543907755187107e-05, "loss": 2.3258, "step": 3152500 }, { "epoch": 9.13, "learning_rate": 4.543835390422379e-05, "loss": 2.3254, "step": 3153000 }, { "epoch": 9.13, "learning_rate": 4.5437630256576514e-05, "loss": 2.3043, "step": 3153500 }, { "epoch": 9.13, "learning_rate": 4.5436906608929236e-05, "loss": 2.3072, "step": 3154000 }, { "epoch": 9.13, "learning_rate": 4.543618296128196e-05, "loss": 2.3171, "step": 3154500 }, { "epoch": 9.13, "learning_rate": 4.543545931363468e-05, "loss": 2.2927, "step": 3155000 }, { "epoch": 9.13, "learning_rate": 4.54347356659874e-05, "loss": 2.334, "step": 3155500 }, { "epoch": 9.14, "learning_rate": 4.5434012018340125e-05, "loss": 2.3401, "step": 3156000 }, { "epoch": 9.14, "learning_rate": 4.543328837069285e-05, "loss": 2.331, "step": 3156500 }, { "epoch": 9.14, "learning_rate": 4.543256472304557e-05, "loss": 2.3019, "step": 3157000 }, { "epoch": 9.14, "learning_rate": 4.543184252269359e-05, "loss": 2.3396, "step": 3157500 }, { "epoch": 9.14, "learning_rate": 4.5431118875046314e-05, "loss": 2.327, "step": 3158000 }, { "epoch": 9.14, "learning_rate": 4.5430395227399037e-05, "loss": 2.3271, "step": 3158500 }, { "epoch": 9.14, "learning_rate": 4.5429671579751766e-05, "loss": 2.3268, "step": 3159000 }, { "epoch": 9.15, "learning_rate": 4.542894793210449e-05, "loss": 2.3335, "step": 3159500 }, { "epoch": 9.15, "learning_rate": 4.54282257317525e-05, "loss": 2.3091, "step": 3160000 }, { "epoch": 9.15, "learning_rate": 4.5427502084105226e-05, "loss": 2.342, "step": 3160500 }, { "epoch": 9.15, "learning_rate": 4.542677843645795e-05, "loss": 2.3292, "step": 3161000 }, { "epoch": 9.15, "learning_rate": 4.542605478881068e-05, "loss": 2.3209, "step": 3161500 }, { "epoch": 9.15, "learning_rate": 4.54253311411634e-05, "loss": 2.3442, "step": 3162000 }, { "epoch": 9.15, "learning_rate": 4.542460749351612e-05, "loss": 2.3316, "step": 3162500 }, { "epoch": 9.16, "learning_rate": 4.5423883845868844e-05, "loss": 2.3228, "step": 3163000 }, { "epoch": 9.16, "learning_rate": 4.5423160198221566e-05, "loss": 2.337, "step": 3163500 }, { "epoch": 9.16, "learning_rate": 4.542243655057429e-05, "loss": 2.3141, "step": 3164000 }, { "epoch": 9.16, "learning_rate": 4.5421714350222304e-05, "loss": 2.3465, "step": 3164500 }, { "epoch": 9.16, "learning_rate": 4.5420992149870326e-05, "loss": 2.3229, "step": 3165000 }, { "epoch": 9.16, "learning_rate": 4.542026850222305e-05, "loss": 2.329, "step": 3165500 }, { "epoch": 9.16, "learning_rate": 4.541954485457577e-05, "loss": 2.3211, "step": 3166000 }, { "epoch": 9.17, "learning_rate": 4.54188212069285e-05, "loss": 2.3297, "step": 3166500 }, { "epoch": 9.17, "learning_rate": 4.541809755928122e-05, "loss": 2.3212, "step": 3167000 }, { "epoch": 9.17, "learning_rate": 4.5417373911633944e-05, "loss": 2.3189, "step": 3167500 }, { "epoch": 9.17, "learning_rate": 4.5416650263986666e-05, "loss": 2.337, "step": 3168000 }, { "epoch": 9.17, "learning_rate": 4.541592661633939e-05, "loss": 2.3106, "step": 3168500 }, { "epoch": 9.17, "learning_rate": 4.541520296869211e-05, "loss": 2.3399, "step": 3169000 }, { "epoch": 9.17, "learning_rate": 4.5414480768340126e-05, "loss": 2.3218, "step": 3169500 }, { "epoch": 9.18, "learning_rate": 4.541375712069285e-05, "loss": 2.3187, "step": 3170000 }, { "epoch": 9.18, "learning_rate": 4.541303347304558e-05, "loss": 2.3224, "step": 3170500 }, { "epoch": 9.18, "learning_rate": 4.54123098253983e-05, "loss": 2.3073, "step": 3171000 }, { "epoch": 9.18, "learning_rate": 4.5411587625046315e-05, "loss": 2.3142, "step": 3171500 }, { "epoch": 9.18, "learning_rate": 4.541086397739904e-05, "loss": 2.3431, "step": 3172000 }, { "epoch": 9.18, "learning_rate": 4.541014032975176e-05, "loss": 2.3134, "step": 3172500 }, { "epoch": 9.18, "learning_rate": 4.540941668210448e-05, "loss": 2.3265, "step": 3173000 }, { "epoch": 9.19, "learning_rate": 4.5408693034457204e-05, "loss": 2.3411, "step": 3173500 }, { "epoch": 9.19, "learning_rate": 4.540796938680993e-05, "loss": 2.3345, "step": 3174000 }, { "epoch": 9.19, "learning_rate": 4.540724718645795e-05, "loss": 2.3126, "step": 3174500 }, { "epoch": 9.19, "learning_rate": 4.540652498610597e-05, "loss": 2.3293, "step": 3175000 }, { "epoch": 9.19, "learning_rate": 4.5405802785753986e-05, "loss": 2.3364, "step": 3175500 }, { "epoch": 9.19, "learning_rate": 4.540507913810671e-05, "loss": 2.3453, "step": 3176000 }, { "epoch": 9.19, "learning_rate": 4.540435549045943e-05, "loss": 2.3358, "step": 3176500 }, { "epoch": 9.2, "learning_rate": 4.540363184281215e-05, "loss": 2.3236, "step": 3177000 }, { "epoch": 9.2, "learning_rate": 4.5402908195164875e-05, "loss": 2.3215, "step": 3177500 }, { "epoch": 9.2, "learning_rate": 4.5402184547517604e-05, "loss": 2.3047, "step": 3178000 }, { "epoch": 9.2, "learning_rate": 4.540146089987033e-05, "loss": 2.3325, "step": 3178500 }, { "epoch": 9.2, "learning_rate": 4.540073725222305e-05, "loss": 2.3281, "step": 3179000 }, { "epoch": 9.2, "learning_rate": 4.540001360457577e-05, "loss": 2.3146, "step": 3179500 }, { "epoch": 9.2, "learning_rate": 4.539928995692849e-05, "loss": 2.347, "step": 3180000 }, { "epoch": 9.21, "learning_rate": 4.539856775657651e-05, "loss": 2.327, "step": 3180500 }, { "epoch": 9.21, "learning_rate": 4.539784410892923e-05, "loss": 2.3131, "step": 3181000 }, { "epoch": 9.21, "learning_rate": 4.539712046128196e-05, "loss": 2.3303, "step": 3181500 }, { "epoch": 9.21, "learning_rate": 4.539639681363468e-05, "loss": 2.3016, "step": 3182000 }, { "epoch": 9.21, "learning_rate": 4.5395673165987405e-05, "loss": 2.3304, "step": 3182500 }, { "epoch": 9.21, "learning_rate": 4.539494951834013e-05, "loss": 2.3241, "step": 3183000 }, { "epoch": 9.21, "learning_rate": 4.539422587069285e-05, "loss": 2.3017, "step": 3183500 }, { "epoch": 9.22, "learning_rate": 4.539350222304558e-05, "loss": 2.3007, "step": 3184000 }, { "epoch": 9.22, "learning_rate": 4.53927785753983e-05, "loss": 2.3205, "step": 3184500 }, { "epoch": 9.22, "learning_rate": 4.5392056375046316e-05, "loss": 2.3351, "step": 3185000 }, { "epoch": 9.22, "learning_rate": 4.539133417469433e-05, "loss": 2.313, "step": 3185500 }, { "epoch": 9.22, "learning_rate": 4.5390610527047054e-05, "loss": 2.3406, "step": 3186000 }, { "epoch": 9.22, "learning_rate": 4.5389886879399776e-05, "loss": 2.3541, "step": 3186500 }, { "epoch": 9.23, "learning_rate": 4.5389163231752505e-05, "loss": 2.324, "step": 3187000 }, { "epoch": 9.23, "learning_rate": 4.538844103140052e-05, "loss": 2.352, "step": 3187500 }, { "epoch": 9.23, "learning_rate": 4.538771738375324e-05, "loss": 2.3345, "step": 3188000 }, { "epoch": 9.23, "learning_rate": 4.5386993736105965e-05, "loss": 2.3555, "step": 3188500 }, { "epoch": 9.23, "learning_rate": 4.5386270088458694e-05, "loss": 2.3334, "step": 3189000 }, { "epoch": 9.23, "learning_rate": 4.5385546440811416e-05, "loss": 2.3158, "step": 3189500 }, { "epoch": 9.23, "learning_rate": 4.538482424045943e-05, "loss": 2.3262, "step": 3190000 }, { "epoch": 9.24, "learning_rate": 4.5384100592812154e-05, "loss": 2.308, "step": 3190500 }, { "epoch": 9.24, "learning_rate": 4.5383376945164876e-05, "loss": 2.321, "step": 3191000 }, { "epoch": 9.24, "learning_rate": 4.5382653297517605e-05, "loss": 2.3468, "step": 3191500 }, { "epoch": 9.24, "learning_rate": 4.538192964987033e-05, "loss": 2.332, "step": 3192000 }, { "epoch": 9.24, "learning_rate": 4.538120600222305e-05, "loss": 2.3414, "step": 3192500 }, { "epoch": 9.24, "learning_rate": 4.538048235457577e-05, "loss": 2.3375, "step": 3193000 }, { "epoch": 9.24, "learning_rate": 4.5379758706928494e-05, "loss": 2.3032, "step": 3193500 }, { "epoch": 9.25, "learning_rate": 4.537903650657651e-05, "loss": 2.3318, "step": 3194000 }, { "epoch": 9.25, "learning_rate": 4.537831285892923e-05, "loss": 2.3248, "step": 3194500 }, { "epoch": 9.25, "learning_rate": 4.5377589211281954e-05, "loss": 2.3115, "step": 3195000 }, { "epoch": 9.25, "learning_rate": 4.5376865563634676e-05, "loss": 2.3098, "step": 3195500 }, { "epoch": 9.25, "learning_rate": 4.5376141915987405e-05, "loss": 2.3247, "step": 3196000 }, { "epoch": 9.25, "learning_rate": 4.537541826834013e-05, "loss": 2.3106, "step": 3196500 }, { "epoch": 9.25, "learning_rate": 4.537469462069286e-05, "loss": 2.3548, "step": 3197000 }, { "epoch": 9.26, "learning_rate": 4.537397097304558e-05, "loss": 2.3418, "step": 3197500 }, { "epoch": 9.26, "learning_rate": 4.5373248772693594e-05, "loss": 2.3158, "step": 3198000 }, { "epoch": 9.26, "learning_rate": 4.537252512504632e-05, "loss": 2.3175, "step": 3198500 }, { "epoch": 9.26, "learning_rate": 4.537180147739904e-05, "loss": 2.3271, "step": 3199000 }, { "epoch": 9.26, "learning_rate": 4.537107782975176e-05, "loss": 2.3187, "step": 3199500 }, { "epoch": 9.26, "learning_rate": 4.5370354182104483e-05, "loss": 2.3044, "step": 3200000 }, { "epoch": 9.26, "learning_rate": 4.5369630534457206e-05, "loss": 2.3259, "step": 3200500 }, { "epoch": 9.27, "learning_rate": 4.536890688680993e-05, "loss": 2.3334, "step": 3201000 }, { "epoch": 9.27, "learning_rate": 4.536818468645795e-05, "loss": 2.3338, "step": 3201500 }, { "epoch": 9.27, "learning_rate": 4.536746103881067e-05, "loss": 2.3179, "step": 3202000 }, { "epoch": 9.27, "learning_rate": 4.5366737391163395e-05, "loss": 2.3413, "step": 3202500 }, { "epoch": 9.27, "learning_rate": 4.536601374351612e-05, "loss": 2.3272, "step": 3203000 }, { "epoch": 9.27, "learning_rate": 4.536529009586884e-05, "loss": 2.3142, "step": 3203500 }, { "epoch": 9.27, "learning_rate": 4.536456644822157e-05, "loss": 2.3337, "step": 3204000 }, { "epoch": 9.28, "learning_rate": 4.536384280057429e-05, "loss": 2.3114, "step": 3204500 }, { "epoch": 9.28, "learning_rate": 4.536311915292701e-05, "loss": 2.3384, "step": 3205000 }, { "epoch": 9.28, "learning_rate": 4.5362395505279735e-05, "loss": 2.333, "step": 3205500 }, { "epoch": 9.28, "learning_rate": 4.536167185763246e-05, "loss": 2.3118, "step": 3206000 }, { "epoch": 9.28, "learning_rate": 4.536094820998518e-05, "loss": 2.3626, "step": 3206500 }, { "epoch": 9.28, "learning_rate": 4.536022456233791e-05, "loss": 2.3138, "step": 3207000 }, { "epoch": 9.28, "learning_rate": 4.535950091469063e-05, "loss": 2.3337, "step": 3207500 }, { "epoch": 9.29, "learning_rate": 4.535877726704335e-05, "loss": 2.3526, "step": 3208000 }, { "epoch": 9.29, "learning_rate": 4.5358053619396075e-05, "loss": 2.3561, "step": 3208500 }, { "epoch": 9.29, "learning_rate": 4.53573299717488e-05, "loss": 2.337, "step": 3209000 }, { "epoch": 9.29, "learning_rate": 4.535660777139681e-05, "loss": 2.3327, "step": 3209500 }, { "epoch": 9.29, "learning_rate": 4.5355884123749535e-05, "loss": 2.3276, "step": 3210000 }, { "epoch": 9.29, "learning_rate": 4.535516047610226e-05, "loss": 2.3263, "step": 3210500 }, { "epoch": 9.29, "learning_rate": 4.535443827575028e-05, "loss": 2.323, "step": 3211000 }, { "epoch": 9.3, "learning_rate": 4.535371462810301e-05, "loss": 2.321, "step": 3211500 }, { "epoch": 9.3, "learning_rate": 4.5352992427751024e-05, "loss": 2.3055, "step": 3212000 }, { "epoch": 9.3, "learning_rate": 4.5352268780103746e-05, "loss": 2.3194, "step": 3212500 }, { "epoch": 9.3, "learning_rate": 4.535154513245647e-05, "loss": 2.3113, "step": 3213000 }, { "epoch": 9.3, "learning_rate": 4.535082148480919e-05, "loss": 2.3079, "step": 3213500 }, { "epoch": 9.3, "learning_rate": 4.535009783716191e-05, "loss": 2.3454, "step": 3214000 }, { "epoch": 9.3, "learning_rate": 4.5349374189514635e-05, "loss": 2.3223, "step": 3214500 }, { "epoch": 9.31, "learning_rate": 4.534865054186736e-05, "loss": 2.3148, "step": 3215000 }, { "epoch": 9.31, "learning_rate": 4.534792689422008e-05, "loss": 2.3045, "step": 3215500 }, { "epoch": 9.31, "learning_rate": 4.53472046938681e-05, "loss": 2.3331, "step": 3216000 }, { "epoch": 9.31, "learning_rate": 4.5346481046220824e-05, "loss": 2.3267, "step": 3216500 }, { "epoch": 9.31, "learning_rate": 4.5345757398573547e-05, "loss": 2.3109, "step": 3217000 }, { "epoch": 9.31, "learning_rate": 4.534503519822156e-05, "loss": 2.3417, "step": 3217500 }, { "epoch": 9.31, "learning_rate": 4.5344311550574284e-05, "loss": 2.29, "step": 3218000 }, { "epoch": 9.32, "learning_rate": 4.534358790292701e-05, "loss": 2.3321, "step": 3218500 }, { "epoch": 9.32, "learning_rate": 4.5342864255279736e-05, "loss": 2.3146, "step": 3219000 }, { "epoch": 9.32, "learning_rate": 4.534214060763246e-05, "loss": 2.3137, "step": 3219500 }, { "epoch": 9.32, "learning_rate": 4.534141695998518e-05, "loss": 2.3171, "step": 3220000 }, { "epoch": 9.32, "learning_rate": 4.534069331233791e-05, "loss": 2.3326, "step": 3220500 }, { "epoch": 9.32, "learning_rate": 4.533996966469063e-05, "loss": 2.3355, "step": 3221000 }, { "epoch": 9.32, "learning_rate": 4.5339246017043354e-05, "loss": 2.3328, "step": 3221500 }, { "epoch": 9.33, "learning_rate": 4.533852381669137e-05, "loss": 2.3281, "step": 3222000 }, { "epoch": 9.33, "learning_rate": 4.533780016904409e-05, "loss": 2.3171, "step": 3222500 }, { "epoch": 9.33, "learning_rate": 4.5337076521396814e-05, "loss": 2.3352, "step": 3223000 }, { "epoch": 9.33, "learning_rate": 4.5336354321044836e-05, "loss": 2.3305, "step": 3223500 }, { "epoch": 9.33, "learning_rate": 4.533563067339756e-05, "loss": 2.3223, "step": 3224000 }, { "epoch": 9.33, "learning_rate": 4.533490702575028e-05, "loss": 2.3145, "step": 3224500 }, { "epoch": 9.34, "learning_rate": 4.5334183378103e-05, "loss": 2.3265, "step": 3225000 }, { "epoch": 9.34, "learning_rate": 4.5333459730455725e-05, "loss": 2.3075, "step": 3225500 }, { "epoch": 9.34, "learning_rate": 4.5332736082808454e-05, "loss": 2.322, "step": 3226000 }, { "epoch": 9.34, "learning_rate": 4.5332012435161176e-05, "loss": 2.32, "step": 3226500 }, { "epoch": 9.34, "learning_rate": 4.53312887875139e-05, "loss": 2.3182, "step": 3227000 }, { "epoch": 9.34, "learning_rate": 4.5330566587161914e-05, "loss": 2.344, "step": 3227500 }, { "epoch": 9.34, "learning_rate": 4.5329844386809936e-05, "loss": 2.3179, "step": 3228000 }, { "epoch": 9.35, "learning_rate": 4.532912073916266e-05, "loss": 2.3076, "step": 3228500 }, { "epoch": 9.35, "learning_rate": 4.532839709151538e-05, "loss": 2.3214, "step": 3229000 }, { "epoch": 9.35, "learning_rate": 4.53276734438681e-05, "loss": 2.2845, "step": 3229500 }, { "epoch": 9.35, "learning_rate": 4.532695124351612e-05, "loss": 2.3316, "step": 3230000 }, { "epoch": 9.35, "learning_rate": 4.532622759586884e-05, "loss": 2.3242, "step": 3230500 }, { "epoch": 9.35, "learning_rate": 4.532550394822156e-05, "loss": 2.3387, "step": 3231000 }, { "epoch": 9.35, "learning_rate": 4.5324780300574285e-05, "loss": 2.322, "step": 3231500 }, { "epoch": 9.36, "learning_rate": 4.532405665292701e-05, "loss": 2.3275, "step": 3232000 }, { "epoch": 9.36, "learning_rate": 4.5323333005279736e-05, "loss": 2.3034, "step": 3232500 }, { "epoch": 9.36, "learning_rate": 4.532260935763246e-05, "loss": 2.3294, "step": 3233000 }, { "epoch": 9.36, "learning_rate": 4.5321887157280474e-05, "loss": 2.343, "step": 3233500 }, { "epoch": 9.36, "learning_rate": 4.53211635096332e-05, "loss": 2.3193, "step": 3234000 }, { "epoch": 9.36, "learning_rate": 4.5320439861985925e-05, "loss": 2.3292, "step": 3234500 }, { "epoch": 9.36, "learning_rate": 4.531971621433865e-05, "loss": 2.3256, "step": 3235000 }, { "epoch": 9.37, "learning_rate": 4.531899256669137e-05, "loss": 2.3216, "step": 3235500 }, { "epoch": 9.37, "learning_rate": 4.531826891904409e-05, "loss": 2.3208, "step": 3236000 }, { "epoch": 9.37, "learning_rate": 4.5317545271396814e-05, "loss": 2.3328, "step": 3236500 }, { "epoch": 9.37, "learning_rate": 4.531682162374954e-05, "loss": 2.3013, "step": 3237000 }, { "epoch": 9.37, "learning_rate": 4.531609797610226e-05, "loss": 2.33, "step": 3237500 }, { "epoch": 9.37, "learning_rate": 4.531537577575028e-05, "loss": 2.3283, "step": 3238000 }, { "epoch": 9.37, "learning_rate": 4.5314652128103003e-05, "loss": 2.33, "step": 3238500 }, { "epoch": 9.38, "learning_rate": 4.5313928480455726e-05, "loss": 2.3104, "step": 3239000 }, { "epoch": 9.38, "learning_rate": 4.531320483280845e-05, "loss": 2.3094, "step": 3239500 }, { "epoch": 9.38, "learning_rate": 4.531248118516117e-05, "loss": 2.3434, "step": 3240000 }, { "epoch": 9.38, "learning_rate": 4.5311760432104486e-05, "loss": 2.3084, "step": 3240500 }, { "epoch": 9.38, "learning_rate": 4.531103678445721e-05, "loss": 2.3363, "step": 3241000 }, { "epoch": 9.38, "learning_rate": 4.531031458410523e-05, "loss": 2.345, "step": 3241500 }, { "epoch": 9.38, "learning_rate": 4.530959093645795e-05, "loss": 2.3257, "step": 3242000 }, { "epoch": 9.39, "learning_rate": 4.5308867288810675e-05, "loss": 2.3451, "step": 3242500 }, { "epoch": 9.39, "learning_rate": 4.53081436411634e-05, "loss": 2.3438, "step": 3243000 }, { "epoch": 9.39, "learning_rate": 4.530741999351612e-05, "loss": 2.3377, "step": 3243500 }, { "epoch": 9.39, "learning_rate": 4.530669634586884e-05, "loss": 2.3179, "step": 3244000 }, { "epoch": 9.39, "learning_rate": 4.5305972698221564e-05, "loss": 2.3072, "step": 3244500 }, { "epoch": 9.39, "learning_rate": 4.5305249050574286e-05, "loss": 2.361, "step": 3245000 }, { "epoch": 9.39, "learning_rate": 4.5304525402927015e-05, "loss": 2.3049, "step": 3245500 }, { "epoch": 9.4, "learning_rate": 4.530380320257503e-05, "loss": 2.3121, "step": 3246000 }, { "epoch": 9.4, "learning_rate": 4.530307955492775e-05, "loss": 2.3673, "step": 3246500 }, { "epoch": 9.4, "learning_rate": 4.5302355907280475e-05, "loss": 2.3155, "step": 3247000 }, { "epoch": 9.4, "learning_rate": 4.53016322596332e-05, "loss": 2.3505, "step": 3247500 }, { "epoch": 9.4, "learning_rate": 4.530090861198592e-05, "loss": 2.3316, "step": 3248000 }, { "epoch": 9.4, "learning_rate": 4.530018496433864e-05, "loss": 2.3153, "step": 3248500 }, { "epoch": 9.4, "learning_rate": 4.529946131669137e-05, "loss": 2.3206, "step": 3249000 }, { "epoch": 9.41, "learning_rate": 4.529873766904409e-05, "loss": 2.3183, "step": 3249500 }, { "epoch": 9.41, "learning_rate": 4.5298014021396815e-05, "loss": 2.3329, "step": 3250000 }, { "epoch": 9.41, "learning_rate": 4.529729182104484e-05, "loss": 2.3066, "step": 3250500 }, { "epoch": 9.41, "learning_rate": 4.529656817339756e-05, "loss": 2.3056, "step": 3251000 }, { "epoch": 9.41, "learning_rate": 4.529584452575028e-05, "loss": 2.3278, "step": 3251500 }, { "epoch": 9.41, "learning_rate": 4.52951223253983e-05, "loss": 2.3572, "step": 3252000 }, { "epoch": 9.41, "learning_rate": 4.529439867775102e-05, "loss": 2.33, "step": 3252500 }, { "epoch": 9.42, "learning_rate": 4.529367503010374e-05, "loss": 2.324, "step": 3253000 }, { "epoch": 9.42, "learning_rate": 4.5292951382456464e-05, "loss": 2.3274, "step": 3253500 }, { "epoch": 9.42, "learning_rate": 4.5292227734809186e-05, "loss": 2.3336, "step": 3254000 }, { "epoch": 9.42, "learning_rate": 4.5291504087161915e-05, "loss": 2.3297, "step": 3254500 }, { "epoch": 9.42, "learning_rate": 4.5290783334105224e-05, "loss": 2.3493, "step": 3255000 }, { "epoch": 9.42, "learning_rate": 4.5290059686457947e-05, "loss": 2.3423, "step": 3255500 }, { "epoch": 9.42, "learning_rate": 4.528933603881067e-05, "loss": 2.3262, "step": 3256000 }, { "epoch": 9.43, "learning_rate": 4.52886123911634e-05, "loss": 2.2964, "step": 3256500 }, { "epoch": 9.43, "learning_rate": 4.5287890190811413e-05, "loss": 2.325, "step": 3257000 }, { "epoch": 9.43, "learning_rate": 4.5287166543164136e-05, "loss": 2.3365, "step": 3257500 }, { "epoch": 9.43, "learning_rate": 4.5286442895516865e-05, "loss": 2.3078, "step": 3258000 }, { "epoch": 9.43, "learning_rate": 4.528572069516488e-05, "loss": 2.3288, "step": 3258500 }, { "epoch": 9.43, "learning_rate": 4.52849970475176e-05, "loss": 2.3211, "step": 3259000 }, { "epoch": 9.43, "learning_rate": 4.5284273399870325e-05, "loss": 2.3236, "step": 3259500 }, { "epoch": 9.44, "learning_rate": 4.528354975222305e-05, "loss": 2.3195, "step": 3260000 }, { "epoch": 9.44, "learning_rate": 4.528282610457577e-05, "loss": 2.3328, "step": 3260500 }, { "epoch": 9.44, "learning_rate": 4.528210245692849e-05, "loss": 2.3262, "step": 3261000 }, { "epoch": 9.44, "learning_rate": 4.5281378809281214e-05, "loss": 2.3056, "step": 3261500 }, { "epoch": 9.44, "learning_rate": 4.5280655161633936e-05, "loss": 2.3202, "step": 3262000 }, { "epoch": 9.44, "learning_rate": 4.5279931513986665e-05, "loss": 2.3051, "step": 3262500 }, { "epoch": 9.45, "learning_rate": 4.527920786633939e-05, "loss": 2.3171, "step": 3263000 }, { "epoch": 9.45, "learning_rate": 4.527848421869211e-05, "loss": 2.3066, "step": 3263500 }, { "epoch": 9.45, "learning_rate": 4.527776057104484e-05, "loss": 2.3275, "step": 3264000 }, { "epoch": 9.45, "learning_rate": 4.527703981798815e-05, "loss": 2.3175, "step": 3264500 }, { "epoch": 9.45, "learning_rate": 4.527631617034087e-05, "loss": 2.3364, "step": 3265000 }, { "epoch": 9.45, "learning_rate": 4.527559252269359e-05, "loss": 2.3244, "step": 3265500 }, { "epoch": 9.45, "learning_rate": 4.5274868875046314e-05, "loss": 2.3169, "step": 3266000 }, { "epoch": 9.46, "learning_rate": 4.5274146674694336e-05, "loss": 2.3267, "step": 3266500 }, { "epoch": 9.46, "learning_rate": 4.527342302704706e-05, "loss": 2.3287, "step": 3267000 }, { "epoch": 9.46, "learning_rate": 4.5272700826695074e-05, "loss": 2.32, "step": 3267500 }, { "epoch": 9.46, "learning_rate": 4.5271977179047796e-05, "loss": 2.3333, "step": 3268000 }, { "epoch": 9.46, "learning_rate": 4.527125353140052e-05, "loss": 2.337, "step": 3268500 }, { "epoch": 9.46, "learning_rate": 4.527052988375324e-05, "loss": 2.305, "step": 3269000 }, { "epoch": 9.46, "learning_rate": 4.526980768340126e-05, "loss": 2.303, "step": 3269500 }, { "epoch": 9.47, "learning_rate": 4.5269084035753985e-05, "loss": 2.3417, "step": 3270000 }, { "epoch": 9.47, "learning_rate": 4.526836038810671e-05, "loss": 2.3242, "step": 3270500 }, { "epoch": 9.47, "learning_rate": 4.526763674045943e-05, "loss": 2.3238, "step": 3271000 }, { "epoch": 9.47, "learning_rate": 4.526691309281215e-05, "loss": 2.3244, "step": 3271500 }, { "epoch": 9.47, "learning_rate": 4.526618944516488e-05, "loss": 2.3388, "step": 3272000 }, { "epoch": 9.47, "learning_rate": 4.52654657975176e-05, "loss": 2.3336, "step": 3272500 }, { "epoch": 9.47, "learning_rate": 4.5264742149870325e-05, "loss": 2.3406, "step": 3273000 }, { "epoch": 9.48, "learning_rate": 4.526401850222305e-05, "loss": 2.3361, "step": 3273500 }, { "epoch": 9.48, "learning_rate": 4.526329630187106e-05, "loss": 2.3436, "step": 3274000 }, { "epoch": 9.48, "learning_rate": 4.526257265422379e-05, "loss": 2.3156, "step": 3274500 }, { "epoch": 9.48, "learning_rate": 4.5261849006576515e-05, "loss": 2.3347, "step": 3275000 }, { "epoch": 9.48, "learning_rate": 4.526112535892924e-05, "loss": 2.3286, "step": 3275500 }, { "epoch": 9.48, "learning_rate": 4.526040315857725e-05, "loss": 2.3274, "step": 3276000 }, { "epoch": 9.48, "learning_rate": 4.5259679510929975e-05, "loss": 2.3146, "step": 3276500 }, { "epoch": 9.49, "learning_rate": 4.52589558632827e-05, "loss": 2.3257, "step": 3277000 }, { "epoch": 9.49, "learning_rate": 4.525823221563542e-05, "loss": 2.3325, "step": 3277500 }, { "epoch": 9.49, "learning_rate": 4.525751001528344e-05, "loss": 2.3457, "step": 3278000 }, { "epoch": 9.49, "learning_rate": 4.5256786367636164e-05, "loss": 2.3301, "step": 3278500 }, { "epoch": 9.49, "learning_rate": 4.5256062719988886e-05, "loss": 2.3449, "step": 3279000 }, { "epoch": 9.49, "learning_rate": 4.5255339072341615e-05, "loss": 2.3509, "step": 3279500 }, { "epoch": 9.49, "learning_rate": 4.525461687198963e-05, "loss": 2.3568, "step": 3280000 }, { "epoch": 9.5, "learning_rate": 4.525389322434235e-05, "loss": 2.3308, "step": 3280500 }, { "epoch": 9.5, "learning_rate": 4.5253169576695075e-05, "loss": 2.3243, "step": 3281000 }, { "epoch": 9.5, "learning_rate": 4.52524459290478e-05, "loss": 2.3296, "step": 3281500 }, { "epoch": 9.5, "learning_rate": 4.525172228140052e-05, "loss": 2.3359, "step": 3282000 }, { "epoch": 9.5, "learning_rate": 4.525099863375324e-05, "loss": 2.3435, "step": 3282500 }, { "epoch": 9.5, "learning_rate": 4.5250276433401264e-05, "loss": 2.3019, "step": 3283000 }, { "epoch": 9.5, "learning_rate": 4.5249552785753986e-05, "loss": 2.3073, "step": 3283500 }, { "epoch": 9.51, "learning_rate": 4.524882913810671e-05, "loss": 2.3046, "step": 3284000 }, { "epoch": 9.51, "learning_rate": 4.524810549045943e-05, "loss": 2.319, "step": 3284500 }, { "epoch": 9.51, "learning_rate": 4.524738184281215e-05, "loss": 2.306, "step": 3285000 }, { "epoch": 9.51, "learning_rate": 4.5246658195164875e-05, "loss": 2.3068, "step": 3285500 }, { "epoch": 9.51, "learning_rate": 4.52459345475176e-05, "loss": 2.3244, "step": 3286000 }, { "epoch": 9.51, "learning_rate": 4.5245210899870326e-05, "loss": 2.3225, "step": 3286500 }, { "epoch": 9.51, "learning_rate": 4.524448869951834e-05, "loss": 2.357, "step": 3287000 }, { "epoch": 9.52, "learning_rate": 4.524376505187107e-05, "loss": 2.3434, "step": 3287500 }, { "epoch": 9.52, "learning_rate": 4.5243042851519086e-05, "loss": 2.3044, "step": 3288000 }, { "epoch": 9.52, "learning_rate": 4.524231920387181e-05, "loss": 2.3242, "step": 3288500 }, { "epoch": 9.52, "learning_rate": 4.524159555622453e-05, "loss": 2.3448, "step": 3289000 }, { "epoch": 9.52, "learning_rate": 4.524087190857725e-05, "loss": 2.3218, "step": 3289500 }, { "epoch": 9.52, "learning_rate": 4.5240148260929975e-05, "loss": 2.3305, "step": 3290000 }, { "epoch": 9.52, "learning_rate": 4.52394246132827e-05, "loss": 2.3088, "step": 3290500 }, { "epoch": 9.53, "learning_rate": 4.523870096563542e-05, "loss": 2.323, "step": 3291000 }, { "epoch": 9.53, "learning_rate": 4.523797731798814e-05, "loss": 2.3202, "step": 3291500 }, { "epoch": 9.53, "learning_rate": 4.523725367034087e-05, "loss": 2.3296, "step": 3292000 }, { "epoch": 9.53, "learning_rate": 4.523653002269359e-05, "loss": 2.3139, "step": 3292500 }, { "epoch": 9.53, "learning_rate": 4.5235806375046315e-05, "loss": 2.313, "step": 3293000 }, { "epoch": 9.53, "learning_rate": 4.523508272739904e-05, "loss": 2.3171, "step": 3293500 }, { "epoch": 9.53, "learning_rate": 4.523435907975177e-05, "loss": 2.3063, "step": 3294000 }, { "epoch": 9.54, "learning_rate": 4.523363543210449e-05, "loss": 2.3359, "step": 3294500 }, { "epoch": 9.54, "learning_rate": 4.5232913231752505e-05, "loss": 2.3273, "step": 3295000 }, { "epoch": 9.54, "learning_rate": 4.523218958410523e-05, "loss": 2.3557, "step": 3295500 }, { "epoch": 9.54, "learning_rate": 4.523146593645795e-05, "loss": 2.3253, "step": 3296000 }, { "epoch": 9.54, "learning_rate": 4.523074228881067e-05, "loss": 2.3122, "step": 3296500 }, { "epoch": 9.54, "learning_rate": 4.5230018641163393e-05, "loss": 2.345, "step": 3297000 }, { "epoch": 9.54, "learning_rate": 4.522929499351612e-05, "loss": 2.3052, "step": 3297500 }, { "epoch": 9.55, "learning_rate": 4.5228571345868845e-05, "loss": 2.3489, "step": 3298000 }, { "epoch": 9.55, "learning_rate": 4.522784769822157e-05, "loss": 2.3297, "step": 3298500 }, { "epoch": 9.55, "learning_rate": 4.522712405057429e-05, "loss": 2.3227, "step": 3299000 }, { "epoch": 9.55, "learning_rate": 4.52264032975176e-05, "loss": 2.3217, "step": 3299500 }, { "epoch": 9.55, "learning_rate": 4.522567964987032e-05, "loss": 2.3325, "step": 3300000 }, { "epoch": 9.55, "learning_rate": 4.522495600222304e-05, "loss": 2.3343, "step": 3300500 }, { "epoch": 9.56, "learning_rate": 4.5224233801871065e-05, "loss": 2.3324, "step": 3301000 }, { "epoch": 9.56, "learning_rate": 4.522351015422379e-05, "loss": 2.3181, "step": 3301500 }, { "epoch": 9.56, "learning_rate": 4.5222786506576516e-05, "loss": 2.3371, "step": 3302000 }, { "epoch": 9.56, "learning_rate": 4.522206285892924e-05, "loss": 2.329, "step": 3302500 }, { "epoch": 9.56, "learning_rate": 4.522133921128196e-05, "loss": 2.3172, "step": 3303000 }, { "epoch": 9.56, "learning_rate": 4.522061556363468e-05, "loss": 2.3166, "step": 3303500 }, { "epoch": 9.56, "learning_rate": 4.5219891915987405e-05, "loss": 2.3025, "step": 3304000 }, { "epoch": 9.57, "learning_rate": 4.521916826834013e-05, "loss": 2.3114, "step": 3304500 }, { "epoch": 9.57, "learning_rate": 4.521844462069285e-05, "loss": 2.3314, "step": 3305000 }, { "epoch": 9.57, "learning_rate": 4.521772242034087e-05, "loss": 2.3291, "step": 3305500 }, { "epoch": 9.57, "learning_rate": 4.5216998772693594e-05, "loss": 2.3302, "step": 3306000 }, { "epoch": 9.57, "learning_rate": 4.521627657234161e-05, "loss": 2.3155, "step": 3306500 }, { "epoch": 9.57, "learning_rate": 4.521555292469433e-05, "loss": 2.3349, "step": 3307000 }, { "epoch": 9.57, "learning_rate": 4.5214829277047054e-05, "loss": 2.343, "step": 3307500 }, { "epoch": 9.58, "learning_rate": 4.5214105629399776e-05, "loss": 2.3252, "step": 3308000 }, { "epoch": 9.58, "learning_rate": 4.52133819817525e-05, "loss": 2.3499, "step": 3308500 }, { "epoch": 9.58, "learning_rate": 4.521265833410523e-05, "loss": 2.3294, "step": 3309000 }, { "epoch": 9.58, "learning_rate": 4.521193468645795e-05, "loss": 2.3329, "step": 3309500 }, { "epoch": 9.58, "learning_rate": 4.521121103881067e-05, "loss": 2.3128, "step": 3310000 }, { "epoch": 9.58, "learning_rate": 4.5210487391163394e-05, "loss": 2.3145, "step": 3310500 }, { "epoch": 9.58, "learning_rate": 4.520976374351612e-05, "loss": 2.3523, "step": 3311000 }, { "epoch": 9.59, "learning_rate": 4.5209040095868846e-05, "loss": 2.3164, "step": 3311500 }, { "epoch": 9.59, "learning_rate": 4.520831644822157e-05, "loss": 2.3291, "step": 3312000 }, { "epoch": 9.59, "learning_rate": 4.520759280057429e-05, "loss": 2.3081, "step": 3312500 }, { "epoch": 9.59, "learning_rate": 4.5206870600222306e-05, "loss": 2.3196, "step": 3313000 }, { "epoch": 9.59, "learning_rate": 4.520614695257503e-05, "loss": 2.3298, "step": 3313500 }, { "epoch": 9.59, "learning_rate": 4.520542330492775e-05, "loss": 2.3426, "step": 3314000 }, { "epoch": 9.59, "learning_rate": 4.520469965728047e-05, "loss": 2.3267, "step": 3314500 }, { "epoch": 9.6, "learning_rate": 4.5203977456928495e-05, "loss": 2.3417, "step": 3315000 }, { "epoch": 9.6, "learning_rate": 4.520325380928122e-05, "loss": 2.3384, "step": 3315500 }, { "epoch": 9.6, "learning_rate": 4.520253016163394e-05, "loss": 2.3124, "step": 3316000 }, { "epoch": 9.6, "learning_rate": 4.520180651398667e-05, "loss": 2.3277, "step": 3316500 }, { "epoch": 9.6, "learning_rate": 4.520108286633939e-05, "loss": 2.3372, "step": 3317000 }, { "epoch": 9.6, "learning_rate": 4.520035921869211e-05, "loss": 2.3427, "step": 3317500 }, { "epoch": 9.6, "learning_rate": 4.5199635571044835e-05, "loss": 2.3072, "step": 3318000 }, { "epoch": 9.61, "learning_rate": 4.519891337069285e-05, "loss": 2.3382, "step": 3318500 }, { "epoch": 9.61, "learning_rate": 4.519818972304557e-05, "loss": 2.3304, "step": 3319000 }, { "epoch": 9.61, "learning_rate": 4.5197466075398295e-05, "loss": 2.3263, "step": 3319500 }, { "epoch": 9.61, "learning_rate": 4.5196742427751024e-05, "loss": 2.331, "step": 3320000 }, { "epoch": 9.61, "learning_rate": 4.5196018780103746e-05, "loss": 2.3429, "step": 3320500 }, { "epoch": 9.61, "learning_rate": 4.519529513245647e-05, "loss": 2.3338, "step": 3321000 }, { "epoch": 9.61, "learning_rate": 4.519457148480919e-05, "loss": 2.3169, "step": 3321500 }, { "epoch": 9.62, "learning_rate": 4.519384783716191e-05, "loss": 2.3148, "step": 3322000 }, { "epoch": 9.62, "learning_rate": 4.5193124189514635e-05, "loss": 2.3314, "step": 3322500 }, { "epoch": 9.62, "learning_rate": 4.519240198916265e-05, "loss": 2.3317, "step": 3323000 }, { "epoch": 9.62, "learning_rate": 4.519167834151537e-05, "loss": 2.3173, "step": 3323500 }, { "epoch": 9.62, "learning_rate": 4.51909546938681e-05, "loss": 2.2966, "step": 3324000 }, { "epoch": 9.62, "learning_rate": 4.5190232493516124e-05, "loss": 2.3208, "step": 3324500 }, { "epoch": 9.62, "learning_rate": 4.5189508845868846e-05, "loss": 2.3194, "step": 3325000 }, { "epoch": 9.63, "learning_rate": 4.518878664551686e-05, "loss": 2.3159, "step": 3325500 }, { "epoch": 9.63, "learning_rate": 4.5188062997869584e-05, "loss": 2.3347, "step": 3326000 }, { "epoch": 9.63, "learning_rate": 4.5187339350222306e-05, "loss": 2.3323, "step": 3326500 }, { "epoch": 9.63, "learning_rate": 4.518661570257503e-05, "loss": 2.3094, "step": 3327000 }, { "epoch": 9.63, "learning_rate": 4.518589205492775e-05, "loss": 2.3204, "step": 3327500 }, { "epoch": 9.63, "learning_rate": 4.518516840728047e-05, "loss": 2.3254, "step": 3328000 }, { "epoch": 9.63, "learning_rate": 4.51844447596332e-05, "loss": 2.3304, "step": 3328500 }, { "epoch": 9.64, "learning_rate": 4.5183721111985924e-05, "loss": 2.3622, "step": 3329000 }, { "epoch": 9.64, "learning_rate": 4.5182997464338646e-05, "loss": 2.3453, "step": 3329500 }, { "epoch": 9.64, "learning_rate": 4.518227381669137e-05, "loss": 2.3293, "step": 3330000 }, { "epoch": 9.64, "learning_rate": 4.518155016904409e-05, "loss": 2.3169, "step": 3330500 }, { "epoch": 9.64, "learning_rate": 4.518082652139682e-05, "loss": 2.3153, "step": 3331000 }, { "epoch": 9.64, "learning_rate": 4.518010287374954e-05, "loss": 2.3315, "step": 3331500 }, { "epoch": 9.64, "learning_rate": 4.517938067339756e-05, "loss": 2.3276, "step": 3332000 }, { "epoch": 9.65, "learning_rate": 4.517865702575028e-05, "loss": 2.3142, "step": 3332500 }, { "epoch": 9.65, "learning_rate": 4.51779348253983e-05, "loss": 2.3411, "step": 3333000 }, { "epoch": 9.65, "learning_rate": 4.5177211177751025e-05, "loss": 2.3359, "step": 3333500 }, { "epoch": 9.65, "learning_rate": 4.517648753010375e-05, "loss": 2.3318, "step": 3334000 }, { "epoch": 9.65, "learning_rate": 4.517576388245647e-05, "loss": 2.3248, "step": 3334500 }, { "epoch": 9.65, "learning_rate": 4.517504023480919e-05, "loss": 2.3366, "step": 3335000 }, { "epoch": 9.65, "learning_rate": 4.5174316587161913e-05, "loss": 2.3261, "step": 3335500 }, { "epoch": 9.66, "learning_rate": 4.5173592939514636e-05, "loss": 2.3095, "step": 3336000 }, { "epoch": 9.66, "learning_rate": 4.517286929186736e-05, "loss": 2.3091, "step": 3336500 }, { "epoch": 9.66, "learning_rate": 4.5172147091515374e-05, "loss": 2.3292, "step": 3337000 }, { "epoch": 9.66, "learning_rate": 4.5171424891163396e-05, "loss": 2.3246, "step": 3337500 }, { "epoch": 9.66, "learning_rate": 4.517070269081141e-05, "loss": 2.3135, "step": 3338000 }, { "epoch": 9.66, "learning_rate": 4.5169979043164134e-05, "loss": 2.3196, "step": 3338500 }, { "epoch": 9.67, "learning_rate": 4.516925539551686e-05, "loss": 2.3226, "step": 3339000 }, { "epoch": 9.67, "learning_rate": 4.5168531747869585e-05, "loss": 2.3303, "step": 3339500 }, { "epoch": 9.67, "learning_rate": 4.516780810022231e-05, "loss": 2.311, "step": 3340000 }, { "epoch": 9.67, "learning_rate": 4.516708445257503e-05, "loss": 2.3345, "step": 3340500 }, { "epoch": 9.67, "learning_rate": 4.516636080492775e-05, "loss": 2.3274, "step": 3341000 }, { "epoch": 9.67, "learning_rate": 4.5165637157280474e-05, "loss": 2.351, "step": 3341500 }, { "epoch": 9.67, "learning_rate": 4.5164914956928496e-05, "loss": 2.3156, "step": 3342000 }, { "epoch": 9.68, "learning_rate": 4.516419130928122e-05, "loss": 2.3387, "step": 3342500 }, { "epoch": 9.68, "learning_rate": 4.516346766163394e-05, "loss": 2.3133, "step": 3343000 }, { "epoch": 9.68, "learning_rate": 4.516274401398666e-05, "loss": 2.3157, "step": 3343500 }, { "epoch": 9.68, "learning_rate": 4.5162020366339385e-05, "loss": 2.3209, "step": 3344000 }, { "epoch": 9.68, "learning_rate": 4.51612981659874e-05, "loss": 2.3304, "step": 3344500 }, { "epoch": 9.68, "learning_rate": 4.516057451834013e-05, "loss": 2.3231, "step": 3345000 }, { "epoch": 9.68, "learning_rate": 4.515985087069285e-05, "loss": 2.3273, "step": 3345500 }, { "epoch": 9.69, "learning_rate": 4.5159127223045574e-05, "loss": 2.3445, "step": 3346000 }, { "epoch": 9.69, "learning_rate": 4.51584035753983e-05, "loss": 2.3307, "step": 3346500 }, { "epoch": 9.69, "learning_rate": 4.5157679927751025e-05, "loss": 2.3225, "step": 3347000 }, { "epoch": 9.69, "learning_rate": 4.515695772739904e-05, "loss": 2.3356, "step": 3347500 }, { "epoch": 9.69, "learning_rate": 4.515623407975176e-05, "loss": 2.3382, "step": 3348000 }, { "epoch": 9.69, "learning_rate": 4.515551187939978e-05, "loss": 2.3314, "step": 3348500 }, { "epoch": 9.69, "learning_rate": 4.51547882317525e-05, "loss": 2.3243, "step": 3349000 }, { "epoch": 9.7, "learning_rate": 4.515406458410523e-05, "loss": 2.3396, "step": 3349500 }, { "epoch": 9.7, "learning_rate": 4.515334093645795e-05, "loss": 2.3253, "step": 3350000 }, { "epoch": 9.7, "learning_rate": 4.5152617288810674e-05, "loss": 2.3373, "step": 3350500 }, { "epoch": 9.7, "learning_rate": 4.51518936411634e-05, "loss": 2.3197, "step": 3351000 }, { "epoch": 9.7, "learning_rate": 4.515116999351612e-05, "loss": 2.3021, "step": 3351500 }, { "epoch": 9.7, "learning_rate": 4.5150447793164134e-05, "loss": 2.3321, "step": 3352000 }, { "epoch": 9.7, "learning_rate": 4.514972414551686e-05, "loss": 2.3308, "step": 3352500 }, { "epoch": 9.71, "learning_rate": 4.514900049786958e-05, "loss": 2.3281, "step": 3353000 }, { "epoch": 9.71, "learning_rate": 4.51482768502223e-05, "loss": 2.3417, "step": 3353500 }, { "epoch": 9.71, "learning_rate": 4.514755320257503e-05, "loss": 2.3097, "step": 3354000 }, { "epoch": 9.71, "learning_rate": 4.514682955492775e-05, "loss": 2.3454, "step": 3354500 }, { "epoch": 9.71, "learning_rate": 4.514610590728048e-05, "loss": 2.3336, "step": 3355000 }, { "epoch": 9.71, "learning_rate": 4.5145382259633204e-05, "loss": 2.3515, "step": 3355500 }, { "epoch": 9.71, "learning_rate": 4.5144658611985926e-05, "loss": 2.3051, "step": 3356000 }, { "epoch": 9.72, "learning_rate": 4.514393496433865e-05, "loss": 2.3247, "step": 3356500 }, { "epoch": 9.72, "learning_rate": 4.514321131669137e-05, "loss": 2.3467, "step": 3357000 }, { "epoch": 9.72, "learning_rate": 4.5142489116339386e-05, "loss": 2.3219, "step": 3357500 }, { "epoch": 9.72, "learning_rate": 4.514176546869211e-05, "loss": 2.3177, "step": 3358000 }, { "epoch": 9.72, "learning_rate": 4.514104182104483e-05, "loss": 2.3135, "step": 3358500 }, { "epoch": 9.72, "learning_rate": 4.514031817339755e-05, "loss": 2.2972, "step": 3359000 }, { "epoch": 9.72, "learning_rate": 4.513959452575028e-05, "loss": 2.3412, "step": 3359500 }, { "epoch": 9.73, "learning_rate": 4.51388723253983e-05, "loss": 2.3365, "step": 3360000 }, { "epoch": 9.73, "learning_rate": 4.513814867775102e-05, "loss": 2.3119, "step": 3360500 }, { "epoch": 9.73, "learning_rate": 4.513742503010374e-05, "loss": 2.3206, "step": 3361000 }, { "epoch": 9.73, "learning_rate": 4.513670138245647e-05, "loss": 2.3444, "step": 3361500 }, { "epoch": 9.73, "learning_rate": 4.513597773480919e-05, "loss": 2.3335, "step": 3362000 }, { "epoch": 9.73, "learning_rate": 4.5135254087161915e-05, "loss": 2.3046, "step": 3362500 }, { "epoch": 9.73, "learning_rate": 4.513453043951464e-05, "loss": 2.2967, "step": 3363000 }, { "epoch": 9.74, "learning_rate": 4.513380679186736e-05, "loss": 2.3432, "step": 3363500 }, { "epoch": 9.74, "learning_rate": 4.513308314422008e-05, "loss": 2.3322, "step": 3364000 }, { "epoch": 9.74, "learning_rate": 4.5132360943868104e-05, "loss": 2.3513, "step": 3364500 }, { "epoch": 9.74, "learning_rate": 4.5131637296220826e-05, "loss": 2.335, "step": 3365000 }, { "epoch": 9.74, "learning_rate": 4.513091364857355e-05, "loss": 2.35, "step": 3365500 }, { "epoch": 9.74, "learning_rate": 4.513019000092627e-05, "loss": 2.3336, "step": 3366000 }, { "epoch": 9.74, "learning_rate": 4.5129467800574286e-05, "loss": 2.325, "step": 3366500 }, { "epoch": 9.75, "learning_rate": 4.512874415292701e-05, "loss": 2.3189, "step": 3367000 }, { "epoch": 9.75, "learning_rate": 4.512802050527973e-05, "loss": 2.3193, "step": 3367500 }, { "epoch": 9.75, "learning_rate": 4.512729830492775e-05, "loss": 2.3087, "step": 3368000 }, { "epoch": 9.75, "learning_rate": 4.5126574657280475e-05, "loss": 2.32, "step": 3368500 }, { "epoch": 9.75, "learning_rate": 4.5125851009633204e-05, "loss": 2.3051, "step": 3369000 }, { "epoch": 9.75, "learning_rate": 4.512512736198593e-05, "loss": 2.3297, "step": 3369500 }, { "epoch": 9.75, "learning_rate": 4.512440371433865e-05, "loss": 2.347, "step": 3370000 }, { "epoch": 9.76, "learning_rate": 4.512368006669137e-05, "loss": 2.3077, "step": 3370500 }, { "epoch": 9.76, "learning_rate": 4.512295786633939e-05, "loss": 2.3406, "step": 3371000 }, { "epoch": 9.76, "learning_rate": 4.512223421869211e-05, "loss": 2.3536, "step": 3371500 }, { "epoch": 9.76, "learning_rate": 4.512151057104483e-05, "loss": 2.3295, "step": 3372000 }, { "epoch": 9.76, "learning_rate": 4.512078692339755e-05, "loss": 2.3521, "step": 3372500 }, { "epoch": 9.76, "learning_rate": 4.512006327575028e-05, "loss": 2.3469, "step": 3373000 }, { "epoch": 9.76, "learning_rate": 4.5119339628103005e-05, "loss": 2.3348, "step": 3373500 }, { "epoch": 9.77, "learning_rate": 4.511861742775102e-05, "loss": 2.3283, "step": 3374000 }, { "epoch": 9.77, "learning_rate": 4.511789378010374e-05, "loss": 2.3374, "step": 3374500 }, { "epoch": 9.77, "learning_rate": 4.5117170132456465e-05, "loss": 2.3595, "step": 3375000 }, { "epoch": 9.77, "learning_rate": 4.511644648480919e-05, "loss": 2.3254, "step": 3375500 }, { "epoch": 9.77, "learning_rate": 4.511572428445721e-05, "loss": 2.3029, "step": 3376000 }, { "epoch": 9.77, "learning_rate": 4.511500063680993e-05, "loss": 2.3188, "step": 3376500 }, { "epoch": 9.78, "learning_rate": 4.511427698916266e-05, "loss": 2.3274, "step": 3377000 }, { "epoch": 9.78, "learning_rate": 4.511355334151538e-05, "loss": 2.3129, "step": 3377500 }, { "epoch": 9.78, "learning_rate": 4.5112829693868105e-05, "loss": 2.3382, "step": 3378000 }, { "epoch": 9.78, "learning_rate": 4.511210604622083e-05, "loss": 2.3261, "step": 3378500 }, { "epoch": 9.78, "learning_rate": 4.511138239857355e-05, "loss": 2.3301, "step": 3379000 }, { "epoch": 9.78, "learning_rate": 4.511065875092627e-05, "loss": 2.3149, "step": 3379500 }, { "epoch": 9.78, "learning_rate": 4.5109935103278994e-05, "loss": 2.313, "step": 3380000 }, { "epoch": 9.79, "learning_rate": 4.5109211455631716e-05, "loss": 2.3396, "step": 3380500 }, { "epoch": 9.79, "learning_rate": 4.510848925527973e-05, "loss": 2.3575, "step": 3381000 }, { "epoch": 9.79, "learning_rate": 4.5107767054927754e-05, "loss": 2.3482, "step": 3381500 }, { "epoch": 9.79, "learning_rate": 4.5107043407280476e-05, "loss": 2.3407, "step": 3382000 }, { "epoch": 9.79, "learning_rate": 4.51063197596332e-05, "loss": 2.31, "step": 3382500 }, { "epoch": 9.79, "learning_rate": 4.510559611198592e-05, "loss": 2.3117, "step": 3383000 }, { "epoch": 9.79, "learning_rate": 4.510487246433864e-05, "loss": 2.3428, "step": 3383500 }, { "epoch": 9.8, "learning_rate": 4.510414881669137e-05, "loss": 2.3114, "step": 3384000 }, { "epoch": 9.8, "learning_rate": 4.5103425169044094e-05, "loss": 2.3283, "step": 3384500 }, { "epoch": 9.8, "learning_rate": 4.5102701521396816e-05, "loss": 2.3051, "step": 3385000 }, { "epoch": 9.8, "learning_rate": 4.510197787374954e-05, "loss": 2.3164, "step": 3385500 }, { "epoch": 9.8, "learning_rate": 4.510125422610226e-05, "loss": 2.3335, "step": 3386000 }, { "epoch": 9.8, "learning_rate": 4.510053057845498e-05, "loss": 2.2876, "step": 3386500 }, { "epoch": 9.8, "learning_rate": 4.5099806930807705e-05, "loss": 2.3192, "step": 3387000 }, { "epoch": 9.81, "learning_rate": 4.509908617775102e-05, "loss": 2.3278, "step": 3387500 }, { "epoch": 9.81, "learning_rate": 4.509836253010374e-05, "loss": 2.3359, "step": 3388000 }, { "epoch": 9.81, "learning_rate": 4.5097638882456465e-05, "loss": 2.3152, "step": 3388500 }, { "epoch": 9.81, "learning_rate": 4.509691523480919e-05, "loss": 2.3165, "step": 3389000 }, { "epoch": 9.81, "learning_rate": 4.509619158716191e-05, "loss": 2.3534, "step": 3389500 }, { "epoch": 9.81, "learning_rate": 4.509546938680993e-05, "loss": 2.3431, "step": 3390000 }, { "epoch": 9.81, "learning_rate": 4.5094745739162654e-05, "loss": 2.322, "step": 3390500 }, { "epoch": 9.82, "learning_rate": 4.509402209151538e-05, "loss": 2.2985, "step": 3391000 }, { "epoch": 9.82, "learning_rate": 4.5093298443868106e-05, "loss": 2.3345, "step": 3391500 }, { "epoch": 9.82, "learning_rate": 4.509257479622083e-05, "loss": 2.3151, "step": 3392000 }, { "epoch": 9.82, "learning_rate": 4.509185114857355e-05, "loss": 2.3381, "step": 3392500 }, { "epoch": 9.82, "learning_rate": 4.509112750092627e-05, "loss": 2.3311, "step": 3393000 }, { "epoch": 9.82, "learning_rate": 4.509040530057429e-05, "loss": 2.3288, "step": 3393500 }, { "epoch": 9.82, "learning_rate": 4.5089684547517604e-05, "loss": 2.3163, "step": 3394000 }, { "epoch": 9.83, "learning_rate": 4.5088960899870326e-05, "loss": 2.3489, "step": 3394500 }, { "epoch": 9.83, "learning_rate": 4.508823725222305e-05, "loss": 2.3378, "step": 3395000 }, { "epoch": 9.83, "learning_rate": 4.508751360457577e-05, "loss": 2.3618, "step": 3395500 }, { "epoch": 9.83, "learning_rate": 4.508678995692849e-05, "loss": 2.3199, "step": 3396000 }, { "epoch": 9.83, "learning_rate": 4.5086066309281215e-05, "loss": 2.3324, "step": 3396500 }, { "epoch": 9.83, "learning_rate": 4.508534266163394e-05, "loss": 2.3193, "step": 3397000 }, { "epoch": 9.83, "learning_rate": 4.508461901398666e-05, "loss": 2.3233, "step": 3397500 }, { "epoch": 9.84, "learning_rate": 4.508389536633938e-05, "loss": 2.3182, "step": 3398000 }, { "epoch": 9.84, "learning_rate": 4.508317171869211e-05, "loss": 2.3386, "step": 3398500 }, { "epoch": 9.84, "learning_rate": 4.508244807104483e-05, "loss": 2.3117, "step": 3399000 }, { "epoch": 9.84, "learning_rate": 4.508172442339756e-05, "loss": 2.3334, "step": 3399500 }, { "epoch": 9.84, "learning_rate": 4.5081000775750284e-05, "loss": 2.3251, "step": 3400000 }, { "epoch": 9.84, "learning_rate": 4.5080277128103006e-05, "loss": 2.3366, "step": 3400500 }, { "epoch": 9.84, "learning_rate": 4.507955348045573e-05, "loss": 2.3381, "step": 3401000 }, { "epoch": 9.85, "learning_rate": 4.507882983280845e-05, "loss": 2.3131, "step": 3401500 }, { "epoch": 9.85, "learning_rate": 4.5078107632456466e-05, "loss": 2.3466, "step": 3402000 }, { "epoch": 9.85, "learning_rate": 4.507738543210449e-05, "loss": 2.3172, "step": 3402500 }, { "epoch": 9.85, "learning_rate": 4.507666178445721e-05, "loss": 2.3446, "step": 3403000 }, { "epoch": 9.85, "learning_rate": 4.5075939584105226e-05, "loss": 2.329, "step": 3403500 }, { "epoch": 9.85, "learning_rate": 4.507521593645795e-05, "loss": 2.3248, "step": 3404000 }, { "epoch": 9.85, "learning_rate": 4.507449228881067e-05, "loss": 2.3084, "step": 3404500 }, { "epoch": 9.86, "learning_rate": 4.507376864116339e-05, "loss": 2.2927, "step": 3405000 }, { "epoch": 9.86, "learning_rate": 4.5073044993516115e-05, "loss": 2.3205, "step": 3405500 }, { "epoch": 9.86, "learning_rate": 4.507232134586884e-05, "loss": 2.3195, "step": 3406000 }, { "epoch": 9.86, "learning_rate": 4.5071597698221567e-05, "loss": 2.3458, "step": 3406500 }, { "epoch": 9.86, "learning_rate": 4.507087405057429e-05, "loss": 2.311, "step": 3407000 }, { "epoch": 9.86, "learning_rate": 4.507015040292701e-05, "loss": 2.3347, "step": 3407500 }, { "epoch": 9.86, "learning_rate": 4.506942675527974e-05, "loss": 2.3418, "step": 3408000 }, { "epoch": 9.87, "learning_rate": 4.5068704554927756e-05, "loss": 2.3261, "step": 3408500 }, { "epoch": 9.87, "learning_rate": 4.506798090728048e-05, "loss": 2.3226, "step": 3409000 }, { "epoch": 9.87, "learning_rate": 4.50672572596332e-05, "loss": 2.3253, "step": 3409500 }, { "epoch": 9.87, "learning_rate": 4.506653361198592e-05, "loss": 2.3061, "step": 3410000 }, { "epoch": 9.87, "learning_rate": 4.5065809964338644e-05, "loss": 2.3364, "step": 3410500 }, { "epoch": 9.87, "learning_rate": 4.506508631669137e-05, "loss": 2.3197, "step": 3411000 }, { "epoch": 9.87, "learning_rate": 4.506436411633939e-05, "loss": 2.3179, "step": 3411500 }, { "epoch": 9.88, "learning_rate": 4.506364046869211e-05, "loss": 2.3081, "step": 3412000 }, { "epoch": 9.88, "learning_rate": 4.5062916821044834e-05, "loss": 2.3061, "step": 3412500 }, { "epoch": 9.88, "learning_rate": 4.5062193173397556e-05, "loss": 2.3285, "step": 3413000 }, { "epoch": 9.88, "learning_rate": 4.5061469525750285e-05, "loss": 2.3329, "step": 3413500 }, { "epoch": 9.88, "learning_rate": 4.506074587810301e-05, "loss": 2.3266, "step": 3414000 }, { "epoch": 9.88, "learning_rate": 4.506002223045573e-05, "loss": 2.319, "step": 3414500 }, { "epoch": 9.89, "learning_rate": 4.505929858280845e-05, "loss": 2.3325, "step": 3415000 }, { "epoch": 9.89, "learning_rate": 4.5058574935161174e-05, "loss": 2.3383, "step": 3415500 }, { "epoch": 9.89, "learning_rate": 4.505785273480919e-05, "loss": 2.3449, "step": 3416000 }, { "epoch": 9.89, "learning_rate": 4.505713053445721e-05, "loss": 2.3295, "step": 3416500 }, { "epoch": 9.89, "learning_rate": 4.5056406886809934e-05, "loss": 2.3312, "step": 3417000 }, { "epoch": 9.89, "learning_rate": 4.5055683239162656e-05, "loss": 2.3315, "step": 3417500 }, { "epoch": 9.89, "learning_rate": 4.505495959151538e-05, "loss": 2.3177, "step": 3418000 }, { "epoch": 9.9, "learning_rate": 4.50542359438681e-05, "loss": 2.3135, "step": 3418500 }, { "epoch": 9.9, "learning_rate": 4.5053513743516116e-05, "loss": 2.3116, "step": 3419000 }, { "epoch": 9.9, "learning_rate": 4.505279009586884e-05, "loss": 2.3395, "step": 3419500 }, { "epoch": 9.9, "learning_rate": 4.505206644822156e-05, "loss": 2.3342, "step": 3420000 }, { "epoch": 9.9, "learning_rate": 4.505134280057429e-05, "loss": 2.3226, "step": 3420500 }, { "epoch": 9.9, "learning_rate": 4.505061915292701e-05, "loss": 2.3268, "step": 3421000 }, { "epoch": 9.9, "learning_rate": 4.504989550527974e-05, "loss": 2.3373, "step": 3421500 }, { "epoch": 9.91, "learning_rate": 4.504917185763246e-05, "loss": 2.3346, "step": 3422000 }, { "epoch": 9.91, "learning_rate": 4.5048448209985185e-05, "loss": 2.3306, "step": 3422500 }, { "epoch": 9.91, "learning_rate": 4.504772456233791e-05, "loss": 2.3415, "step": 3423000 }, { "epoch": 9.91, "learning_rate": 4.504700236198592e-05, "loss": 2.3437, "step": 3423500 }, { "epoch": 9.91, "learning_rate": 4.504628016163394e-05, "loss": 2.3268, "step": 3424000 }, { "epoch": 9.91, "learning_rate": 4.504555651398666e-05, "loss": 2.3542, "step": 3424500 }, { "epoch": 9.91, "learning_rate": 4.504483286633939e-05, "loss": 2.317, "step": 3425000 }, { "epoch": 9.92, "learning_rate": 4.504410921869211e-05, "loss": 2.3436, "step": 3425500 }, { "epoch": 9.92, "learning_rate": 4.5043385571044834e-05, "loss": 2.3258, "step": 3426000 }, { "epoch": 9.92, "learning_rate": 4.5042661923397557e-05, "loss": 2.3314, "step": 3426500 }, { "epoch": 9.92, "learning_rate": 4.504193827575028e-05, "loss": 2.3301, "step": 3427000 }, { "epoch": 9.92, "learning_rate": 4.5041214628103e-05, "loss": 2.2861, "step": 3427500 }, { "epoch": 9.92, "learning_rate": 4.504049098045572e-05, "loss": 2.323, "step": 3428000 }, { "epoch": 9.92, "learning_rate": 4.503976878010374e-05, "loss": 2.3564, "step": 3428500 }, { "epoch": 9.93, "learning_rate": 4.503904513245647e-05, "loss": 2.319, "step": 3429000 }, { "epoch": 9.93, "learning_rate": 4.503832148480919e-05, "loss": 2.3351, "step": 3429500 }, { "epoch": 9.93, "learning_rate": 4.503759783716191e-05, "loss": 2.3253, "step": 3430000 }, { "epoch": 9.93, "learning_rate": 4.503687418951464e-05, "loss": 2.3281, "step": 3430500 }, { "epoch": 9.93, "learning_rate": 4.5036150541867364e-05, "loss": 2.3177, "step": 3431000 }, { "epoch": 9.93, "learning_rate": 4.503542834151538e-05, "loss": 2.3294, "step": 3431500 }, { "epoch": 9.93, "learning_rate": 4.50347046938681e-05, "loss": 2.3607, "step": 3432000 }, { "epoch": 9.94, "learning_rate": 4.503398249351612e-05, "loss": 2.3445, "step": 3432500 }, { "epoch": 9.94, "learning_rate": 4.503325884586884e-05, "loss": 2.2968, "step": 3433000 }, { "epoch": 9.94, "learning_rate": 4.503253519822157e-05, "loss": 2.331, "step": 3433500 }, { "epoch": 9.94, "learning_rate": 4.503181155057429e-05, "loss": 2.3187, "step": 3434000 }, { "epoch": 9.94, "learning_rate": 4.503108790292701e-05, "loss": 2.3184, "step": 3434500 }, { "epoch": 9.94, "learning_rate": 4.503036570257503e-05, "loss": 2.3246, "step": 3435000 }, { "epoch": 9.94, "learning_rate": 4.502964205492775e-05, "loss": 2.335, "step": 3435500 }, { "epoch": 9.95, "learning_rate": 4.502891840728047e-05, "loss": 2.3015, "step": 3436000 }, { "epoch": 9.95, "learning_rate": 4.50281947596332e-05, "loss": 2.3212, "step": 3436500 }, { "epoch": 9.95, "learning_rate": 4.5027471111985924e-05, "loss": 2.3051, "step": 3437000 }, { "epoch": 9.95, "learning_rate": 4.5026747464338646e-05, "loss": 2.3372, "step": 3437500 }, { "epoch": 9.95, "learning_rate": 4.502602381669137e-05, "loss": 2.3309, "step": 3438000 }, { "epoch": 9.95, "learning_rate": 4.502530016904409e-05, "loss": 2.3318, "step": 3438500 }, { "epoch": 9.95, "learning_rate": 4.502457796869211e-05, "loss": 2.3449, "step": 3439000 }, { "epoch": 9.96, "learning_rate": 4.5023854321044835e-05, "loss": 2.3279, "step": 3439500 }, { "epoch": 9.96, "learning_rate": 4.502313067339756e-05, "loss": 2.3422, "step": 3440000 }, { "epoch": 9.96, "learning_rate": 4.502240702575028e-05, "loss": 2.3204, "step": 3440500 }, { "epoch": 9.96, "learning_rate": 4.5021683378103e-05, "loss": 2.3294, "step": 3441000 }, { "epoch": 9.96, "learning_rate": 4.502096117775102e-05, "loss": 2.3231, "step": 3441500 }, { "epoch": 9.96, "learning_rate": 4.502023753010374e-05, "loss": 2.3156, "step": 3442000 }, { "epoch": 9.96, "learning_rate": 4.501951388245647e-05, "loss": 2.3331, "step": 3442500 }, { "epoch": 9.97, "learning_rate": 4.5018791682104484e-05, "loss": 2.3379, "step": 3443000 }, { "epoch": 9.97, "learning_rate": 4.5018068034457206e-05, "loss": 2.34, "step": 3443500 }, { "epoch": 9.97, "learning_rate": 4.5017344386809935e-05, "loss": 2.3163, "step": 3444000 }, { "epoch": 9.97, "learning_rate": 4.501662073916266e-05, "loss": 2.3472, "step": 3444500 }, { "epoch": 9.97, "learning_rate": 4.501589709151538e-05, "loss": 2.346, "step": 3445000 }, { "epoch": 9.97, "learning_rate": 4.50151734438681e-05, "loss": 2.3298, "step": 3445500 }, { "epoch": 9.97, "learning_rate": 4.5014449796220824e-05, "loss": 2.3314, "step": 3446000 }, { "epoch": 9.98, "learning_rate": 4.5013726148573547e-05, "loss": 2.311, "step": 3446500 }, { "epoch": 9.98, "learning_rate": 4.501300250092627e-05, "loss": 2.3492, "step": 3447000 }, { "epoch": 9.98, "learning_rate": 4.501227885327899e-05, "loss": 2.3293, "step": 3447500 }, { "epoch": 9.98, "learning_rate": 4.501155520563172e-05, "loss": 2.3351, "step": 3448000 }, { "epoch": 9.98, "learning_rate": 4.5010833005279736e-05, "loss": 2.3085, "step": 3448500 }, { "epoch": 9.98, "learning_rate": 4.501010935763246e-05, "loss": 2.3398, "step": 3449000 }, { "epoch": 9.98, "learning_rate": 4.500938570998518e-05, "loss": 2.3143, "step": 3449500 }, { "epoch": 9.99, "learning_rate": 4.5008663509633196e-05, "loss": 2.3132, "step": 3450000 }, { "epoch": 9.99, "learning_rate": 4.500793986198592e-05, "loss": 2.3422, "step": 3450500 }, { "epoch": 9.99, "learning_rate": 4.500721621433864e-05, "loss": 2.3604, "step": 3451000 }, { "epoch": 9.99, "learning_rate": 4.500649256669137e-05, "loss": 2.3286, "step": 3451500 }, { "epoch": 9.99, "learning_rate": 4.500576891904409e-05, "loss": 2.3214, "step": 3452000 }, { "epoch": 9.99, "learning_rate": 4.500504527139682e-05, "loss": 2.359, "step": 3452500 }, { "epoch": 10.0, "learning_rate": 4.500432162374954e-05, "loss": 2.3204, "step": 3453000 }, { "epoch": 10.0, "learning_rate": 4.5003597976102265e-05, "loss": 2.338, "step": 3453500 }, { "epoch": 10.0, "learning_rate": 4.500287432845499e-05, "loss": 2.3387, "step": 3454000 }, { "epoch": 10.0, "learning_rate": 4.5002152128103e-05, "loss": 2.3167, "step": 3454500 }, { "epoch": 10.0, "eval_accuracy": 0.6491371245160269, "eval_accuracy_mlm": 0.6116889359002926, "eval_accuracy_nsp": 0.8501739285861599, "eval_loss": 2.3059029579162598, "eval_runtime": 330.448, "eval_samples_per_second": 1320.589, "eval_steps_per_second": 55.025, "step": 3454720 }, { "epoch": 10.0, "learning_rate": 4.5001428480455725e-05, "loss": 2.318, "step": 3455000 }, { "epoch": 10.0, "learning_rate": 4.500070628010374e-05, "loss": 2.3119, "step": 3455500 }, { "epoch": 10.0, "learning_rate": 4.499998263245647e-05, "loss": 2.2885, "step": 3456000 }, { "epoch": 10.01, "learning_rate": 4.499925898480919e-05, "loss": 2.3048, "step": 3456500 }, { "epoch": 10.01, "learning_rate": 4.4998535337161914e-05, "loss": 2.303, "step": 3457000 }, { "epoch": 10.01, "learning_rate": 4.4997811689514636e-05, "loss": 2.2934, "step": 3457500 }, { "epoch": 10.01, "learning_rate": 4.499708804186736e-05, "loss": 2.3209, "step": 3458000 }, { "epoch": 10.01, "learning_rate": 4.499636439422009e-05, "loss": 2.2897, "step": 3458500 }, { "epoch": 10.01, "learning_rate": 4.49956421938681e-05, "loss": 2.3233, "step": 3459000 }, { "epoch": 10.01, "learning_rate": 4.4994918546220825e-05, "loss": 2.2969, "step": 3459500 }, { "epoch": 10.02, "learning_rate": 4.499419489857355e-05, "loss": 2.3026, "step": 3460000 }, { "epoch": 10.02, "learning_rate": 4.499347125092627e-05, "loss": 2.3057, "step": 3460500 }, { "epoch": 10.02, "learning_rate": 4.499274760327899e-05, "loss": 2.3203, "step": 3461000 }, { "epoch": 10.02, "learning_rate": 4.4992025402927014e-05, "loss": 2.3155, "step": 3461500 }, { "epoch": 10.02, "learning_rate": 4.499130320257503e-05, "loss": 2.3069, "step": 3462000 }, { "epoch": 10.02, "learning_rate": 4.499057955492775e-05, "loss": 2.304, "step": 3462500 }, { "epoch": 10.02, "learning_rate": 4.4989855907280474e-05, "loss": 2.2898, "step": 3463000 }, { "epoch": 10.03, "learning_rate": 4.4989132259633196e-05, "loss": 2.3275, "step": 3463500 }, { "epoch": 10.03, "learning_rate": 4.498840861198592e-05, "loss": 2.3204, "step": 3464000 }, { "epoch": 10.03, "learning_rate": 4.498768496433865e-05, "loss": 2.3107, "step": 3464500 }, { "epoch": 10.03, "learning_rate": 4.498696131669137e-05, "loss": 2.3287, "step": 3465000 }, { "epoch": 10.03, "learning_rate": 4.498623766904409e-05, "loss": 2.304, "step": 3465500 }, { "epoch": 10.03, "learning_rate": 4.498551402139682e-05, "loss": 2.2975, "step": 3466000 }, { "epoch": 10.03, "learning_rate": 4.4984790373749543e-05, "loss": 2.3121, "step": 3466500 }, { "epoch": 10.04, "learning_rate": 4.498406817339756e-05, "loss": 2.2987, "step": 3467000 }, { "epoch": 10.04, "learning_rate": 4.498334452575028e-05, "loss": 2.3093, "step": 3467500 }, { "epoch": 10.04, "learning_rate": 4.4982620878103003e-05, "loss": 2.2933, "step": 3468000 }, { "epoch": 10.04, "learning_rate": 4.4981897230455726e-05, "loss": 2.3028, "step": 3468500 }, { "epoch": 10.04, "learning_rate": 4.498117358280845e-05, "loss": 2.2951, "step": 3469000 }, { "epoch": 10.04, "learning_rate": 4.498044993516117e-05, "loss": 2.3064, "step": 3469500 }, { "epoch": 10.04, "learning_rate": 4.49797262875139e-05, "loss": 2.309, "step": 3470000 }, { "epoch": 10.05, "learning_rate": 4.4979004087161915e-05, "loss": 2.3176, "step": 3470500 }, { "epoch": 10.05, "learning_rate": 4.497828188680993e-05, "loss": 2.288, "step": 3471000 }, { "epoch": 10.05, "learning_rate": 4.497755823916265e-05, "loss": 2.2905, "step": 3471500 }, { "epoch": 10.05, "learning_rate": 4.4976834591515375e-05, "loss": 2.3069, "step": 3472000 }, { "epoch": 10.05, "learning_rate": 4.49761109438681e-05, "loss": 2.3048, "step": 3472500 }, { "epoch": 10.05, "learning_rate": 4.497538729622082e-05, "loss": 2.3146, "step": 3473000 }, { "epoch": 10.05, "learning_rate": 4.497466364857355e-05, "loss": 2.3175, "step": 3473500 }, { "epoch": 10.06, "learning_rate": 4.497394000092627e-05, "loss": 2.3173, "step": 3474000 }, { "epoch": 10.06, "learning_rate": 4.4973216353279e-05, "loss": 2.313, "step": 3474500 }, { "epoch": 10.06, "learning_rate": 4.497249270563172e-05, "loss": 2.291, "step": 3475000 }, { "epoch": 10.06, "learning_rate": 4.497177050527974e-05, "loss": 2.3106, "step": 3475500 }, { "epoch": 10.06, "learning_rate": 4.497104685763246e-05, "loss": 2.3018, "step": 3476000 }, { "epoch": 10.06, "learning_rate": 4.497032320998518e-05, "loss": 2.3165, "step": 3476500 }, { "epoch": 10.06, "learning_rate": 4.4969599562337904e-05, "loss": 2.3238, "step": 3477000 }, { "epoch": 10.07, "learning_rate": 4.4968875914690626e-05, "loss": 2.3212, "step": 3477500 }, { "epoch": 10.07, "learning_rate": 4.496815226704335e-05, "loss": 2.3242, "step": 3478000 }, { "epoch": 10.07, "learning_rate": 4.496742861939607e-05, "loss": 2.3106, "step": 3478500 }, { "epoch": 10.07, "learning_rate": 4.49667049717488e-05, "loss": 2.2926, "step": 3479000 }, { "epoch": 10.07, "learning_rate": 4.496598132410152e-05, "loss": 2.2834, "step": 3479500 }, { "epoch": 10.07, "learning_rate": 4.496525912374954e-05, "loss": 2.3286, "step": 3480000 }, { "epoch": 10.07, "learning_rate": 4.496453692339755e-05, "loss": 2.3134, "step": 3480500 }, { "epoch": 10.08, "learning_rate": 4.496381327575028e-05, "loss": 2.3021, "step": 3481000 }, { "epoch": 10.08, "learning_rate": 4.4963089628103004e-05, "loss": 2.2945, "step": 3481500 }, { "epoch": 10.08, "learning_rate": 4.4962365980455726e-05, "loss": 2.3017, "step": 3482000 }, { "epoch": 10.08, "learning_rate": 4.496164233280845e-05, "loss": 2.2884, "step": 3482500 }, { "epoch": 10.08, "learning_rate": 4.496092013245647e-05, "loss": 2.3153, "step": 3483000 }, { "epoch": 10.08, "learning_rate": 4.496019648480919e-05, "loss": 2.3066, "step": 3483500 }, { "epoch": 10.08, "learning_rate": 4.4959472837161915e-05, "loss": 2.3235, "step": 3484000 }, { "epoch": 10.09, "learning_rate": 4.495874918951464e-05, "loss": 2.2898, "step": 3484500 }, { "epoch": 10.09, "learning_rate": 4.495802554186736e-05, "loss": 2.2885, "step": 3485000 }, { "epoch": 10.09, "learning_rate": 4.495730189422008e-05, "loss": 2.3289, "step": 3485500 }, { "epoch": 10.09, "learning_rate": 4.49565796938681e-05, "loss": 2.3213, "step": 3486000 }, { "epoch": 10.09, "learning_rate": 4.495585604622083e-05, "loss": 2.3121, "step": 3486500 }, { "epoch": 10.09, "learning_rate": 4.495513239857355e-05, "loss": 2.2947, "step": 3487000 }, { "epoch": 10.09, "learning_rate": 4.495440875092627e-05, "loss": 2.3304, "step": 3487500 }, { "epoch": 10.1, "learning_rate": 4.4953685103278993e-05, "loss": 2.3129, "step": 3488000 }, { "epoch": 10.1, "learning_rate": 4.495296145563172e-05, "loss": 2.3016, "step": 3488500 }, { "epoch": 10.1, "learning_rate": 4.4952237807984445e-05, "loss": 2.3186, "step": 3489000 }, { "epoch": 10.1, "learning_rate": 4.495151416033717e-05, "loss": 2.3172, "step": 3489500 }, { "epoch": 10.1, "learning_rate": 4.495079051268989e-05, "loss": 2.3307, "step": 3490000 }, { "epoch": 10.1, "learning_rate": 4.4950068312337905e-05, "loss": 2.3105, "step": 3490500 }, { "epoch": 10.11, "learning_rate": 4.494934611198593e-05, "loss": 2.2984, "step": 3491000 }, { "epoch": 10.11, "learning_rate": 4.494862246433865e-05, "loss": 2.2892, "step": 3491500 }, { "epoch": 10.11, "learning_rate": 4.494789881669137e-05, "loss": 2.2977, "step": 3492000 }, { "epoch": 10.11, "learning_rate": 4.4947175169044094e-05, "loss": 2.3457, "step": 3492500 }, { "epoch": 10.11, "learning_rate": 4.4946451521396816e-05, "loss": 2.3209, "step": 3493000 }, { "epoch": 10.11, "learning_rate": 4.494572787374954e-05, "loss": 2.3048, "step": 3493500 }, { "epoch": 10.11, "learning_rate": 4.494500422610226e-05, "loss": 2.32, "step": 3494000 }, { "epoch": 10.12, "learning_rate": 4.494428057845498e-05, "loss": 2.3068, "step": 3494500 }, { "epoch": 10.12, "learning_rate": 4.4943556930807705e-05, "loss": 2.2979, "step": 3495000 }, { "epoch": 10.12, "learning_rate": 4.494283617775102e-05, "loss": 2.3157, "step": 3495500 }, { "epoch": 10.12, "learning_rate": 4.494211253010374e-05, "loss": 2.3117, "step": 3496000 }, { "epoch": 10.12, "learning_rate": 4.494138888245647e-05, "loss": 2.3028, "step": 3496500 }, { "epoch": 10.12, "learning_rate": 4.4940665234809194e-05, "loss": 2.2735, "step": 3497000 }, { "epoch": 10.12, "learning_rate": 4.4939941587161916e-05, "loss": 2.3104, "step": 3497500 }, { "epoch": 10.13, "learning_rate": 4.493921793951464e-05, "loss": 2.3049, "step": 3498000 }, { "epoch": 10.13, "learning_rate": 4.493849429186736e-05, "loss": 2.3192, "step": 3498500 }, { "epoch": 10.13, "learning_rate": 4.493777064422008e-05, "loss": 2.2936, "step": 3499000 }, { "epoch": 10.13, "learning_rate": 4.4937046996572805e-05, "loss": 2.3174, "step": 3499500 }, { "epoch": 10.13, "learning_rate": 4.493632479622083e-05, "loss": 2.3222, "step": 3500000 }, { "epoch": 10.13, "learning_rate": 4.493560114857355e-05, "loss": 2.3427, "step": 3500500 }, { "epoch": 10.13, "learning_rate": 4.4934878948221565e-05, "loss": 2.2852, "step": 3501000 }, { "epoch": 10.14, "learning_rate": 4.493415530057429e-05, "loss": 2.3139, "step": 3501500 }, { "epoch": 10.14, "learning_rate": 4.493343165292701e-05, "loss": 2.3273, "step": 3502000 }, { "epoch": 10.14, "learning_rate": 4.493270800527973e-05, "loss": 2.3065, "step": 3502500 }, { "epoch": 10.14, "learning_rate": 4.493198580492775e-05, "loss": 2.3259, "step": 3503000 }, { "epoch": 10.14, "learning_rate": 4.4931262157280477e-05, "loss": 2.2944, "step": 3503500 }, { "epoch": 10.14, "learning_rate": 4.49305385096332e-05, "loss": 2.2858, "step": 3504000 }, { "epoch": 10.14, "learning_rate": 4.492981486198593e-05, "loss": 2.3192, "step": 3504500 }, { "epoch": 10.15, "learning_rate": 4.492909121433865e-05, "loss": 2.2994, "step": 3505000 }, { "epoch": 10.15, "learning_rate": 4.492836756669137e-05, "loss": 2.3184, "step": 3505500 }, { "epoch": 10.15, "learning_rate": 4.492764536633939e-05, "loss": 2.3111, "step": 3506000 }, { "epoch": 10.15, "learning_rate": 4.4926923165987403e-05, "loss": 2.3085, "step": 3506500 }, { "epoch": 10.15, "learning_rate": 4.4926199518340126e-05, "loss": 2.2825, "step": 3507000 }, { "epoch": 10.15, "learning_rate": 4.4925475870692855e-05, "loss": 2.3119, "step": 3507500 }, { "epoch": 10.15, "learning_rate": 4.492475222304558e-05, "loss": 2.3239, "step": 3508000 }, { "epoch": 10.16, "learning_rate": 4.49240285753983e-05, "loss": 2.3107, "step": 3508500 }, { "epoch": 10.16, "learning_rate": 4.492330492775102e-05, "loss": 2.3135, "step": 3509000 }, { "epoch": 10.16, "learning_rate": 4.4922581280103744e-05, "loss": 2.3089, "step": 3509500 }, { "epoch": 10.16, "learning_rate": 4.4921857632456466e-05, "loss": 2.3052, "step": 3510000 }, { "epoch": 10.16, "learning_rate": 4.492113398480919e-05, "loss": 2.3301, "step": 3510500 }, { "epoch": 10.16, "learning_rate": 4.4920411784457204e-05, "loss": 2.3085, "step": 3511000 }, { "epoch": 10.16, "learning_rate": 4.491968813680993e-05, "loss": 2.3181, "step": 3511500 }, { "epoch": 10.17, "learning_rate": 4.4918964489162655e-05, "loss": 2.3085, "step": 3512000 }, { "epoch": 10.17, "learning_rate": 4.491824084151538e-05, "loss": 2.2796, "step": 3512500 }, { "epoch": 10.17, "learning_rate": 4.49175186411634e-05, "loss": 2.2842, "step": 3513000 }, { "epoch": 10.17, "learning_rate": 4.491679499351612e-05, "loss": 2.3144, "step": 3513500 }, { "epoch": 10.17, "learning_rate": 4.4916071345868844e-05, "loss": 2.3372, "step": 3514000 }, { "epoch": 10.17, "learning_rate": 4.4915347698221566e-05, "loss": 2.3084, "step": 3514500 }, { "epoch": 10.17, "learning_rate": 4.491462405057429e-05, "loss": 2.3374, "step": 3515000 }, { "epoch": 10.18, "learning_rate": 4.491390040292701e-05, "loss": 2.2806, "step": 3515500 }, { "epoch": 10.18, "learning_rate": 4.491317675527973e-05, "loss": 2.3277, "step": 3516000 }, { "epoch": 10.18, "learning_rate": 4.4912453107632455e-05, "loss": 2.3288, "step": 3516500 }, { "epoch": 10.18, "learning_rate": 4.491173090728048e-05, "loss": 2.3314, "step": 3517000 }, { "epoch": 10.18, "learning_rate": 4.49110072596332e-05, "loss": 2.3179, "step": 3517500 }, { "epoch": 10.18, "learning_rate": 4.491028361198592e-05, "loss": 2.2908, "step": 3518000 }, { "epoch": 10.18, "learning_rate": 4.4909559964338644e-05, "loss": 2.2958, "step": 3518500 }, { "epoch": 10.19, "learning_rate": 4.490883631669137e-05, "loss": 2.2872, "step": 3519000 }, { "epoch": 10.19, "learning_rate": 4.490811411633939e-05, "loss": 2.3406, "step": 3519500 }, { "epoch": 10.19, "learning_rate": 4.490739046869211e-05, "loss": 2.2884, "step": 3520000 }, { "epoch": 10.19, "learning_rate": 4.490666682104483e-05, "loss": 2.3085, "step": 3520500 }, { "epoch": 10.19, "learning_rate": 4.4905944620692855e-05, "loss": 2.3172, "step": 3521000 }, { "epoch": 10.19, "learning_rate": 4.490522097304558e-05, "loss": 2.3198, "step": 3521500 }, { "epoch": 10.19, "learning_rate": 4.49044973253983e-05, "loss": 2.2868, "step": 3522000 }, { "epoch": 10.2, "learning_rate": 4.490377367775102e-05, "loss": 2.3186, "step": 3522500 }, { "epoch": 10.2, "learning_rate": 4.4903050030103744e-05, "loss": 2.3346, "step": 3523000 }, { "epoch": 10.2, "learning_rate": 4.490232638245647e-05, "loss": 2.3063, "step": 3523500 }, { "epoch": 10.2, "learning_rate": 4.490160418210448e-05, "loss": 2.3233, "step": 3524000 }, { "epoch": 10.2, "learning_rate": 4.4900880534457204e-05, "loss": 2.305, "step": 3524500 }, { "epoch": 10.2, "learning_rate": 4.490015688680993e-05, "loss": 2.2812, "step": 3525000 }, { "epoch": 10.2, "learning_rate": 4.4899433239162656e-05, "loss": 2.3107, "step": 3525500 }, { "epoch": 10.21, "learning_rate": 4.489870959151538e-05, "loss": 2.3506, "step": 3526000 }, { "epoch": 10.21, "learning_rate": 4.489798594386811e-05, "loss": 2.3249, "step": 3526500 }, { "epoch": 10.21, "learning_rate": 4.489726374351612e-05, "loss": 2.3267, "step": 3527000 }, { "epoch": 10.21, "learning_rate": 4.4896540095868845e-05, "loss": 2.279, "step": 3527500 }, { "epoch": 10.21, "learning_rate": 4.489581644822157e-05, "loss": 2.312, "step": 3528000 }, { "epoch": 10.21, "learning_rate": 4.489509280057429e-05, "loss": 2.2989, "step": 3528500 }, { "epoch": 10.22, "learning_rate": 4.4894370600222305e-05, "loss": 2.3034, "step": 3529000 }, { "epoch": 10.22, "learning_rate": 4.489364839987033e-05, "loss": 2.3053, "step": 3529500 }, { "epoch": 10.22, "learning_rate": 4.489292475222305e-05, "loss": 2.3234, "step": 3530000 }, { "epoch": 10.22, "learning_rate": 4.4892202551871065e-05, "loss": 2.3098, "step": 3530500 }, { "epoch": 10.22, "learning_rate": 4.489147890422379e-05, "loss": 2.2984, "step": 3531000 }, { "epoch": 10.22, "learning_rate": 4.489075525657651e-05, "loss": 2.3142, "step": 3531500 }, { "epoch": 10.22, "learning_rate": 4.489003160892923e-05, "loss": 2.3072, "step": 3532000 }, { "epoch": 10.23, "learning_rate": 4.4889307961281954e-05, "loss": 2.3606, "step": 3532500 }, { "epoch": 10.23, "learning_rate": 4.488858431363468e-05, "loss": 2.3388, "step": 3533000 }, { "epoch": 10.23, "learning_rate": 4.4887860665987405e-05, "loss": 2.3159, "step": 3533500 }, { "epoch": 10.23, "learning_rate": 4.4887137018340134e-05, "loss": 2.3085, "step": 3534000 }, { "epoch": 10.23, "learning_rate": 4.4886413370692856e-05, "loss": 2.3192, "step": 3534500 }, { "epoch": 10.23, "learning_rate": 4.488568972304558e-05, "loss": 2.3216, "step": 3535000 }, { "epoch": 10.23, "learning_rate": 4.48849660753983e-05, "loss": 2.3031, "step": 3535500 }, { "epoch": 10.24, "learning_rate": 4.488424242775102e-05, "loss": 2.2882, "step": 3536000 }, { "epoch": 10.24, "learning_rate": 4.4883518780103745e-05, "loss": 2.3313, "step": 3536500 }, { "epoch": 10.24, "learning_rate": 4.488279513245647e-05, "loss": 2.3026, "step": 3537000 }, { "epoch": 10.24, "learning_rate": 4.488207293210448e-05, "loss": 2.2925, "step": 3537500 }, { "epoch": 10.24, "learning_rate": 4.4881349284457205e-05, "loss": 2.3132, "step": 3538000 }, { "epoch": 10.24, "learning_rate": 4.4880625636809934e-05, "loss": 2.3145, "step": 3538500 }, { "epoch": 10.24, "learning_rate": 4.4879901989162656e-05, "loss": 2.327, "step": 3539000 }, { "epoch": 10.25, "learning_rate": 4.487917834151538e-05, "loss": 2.3145, "step": 3539500 }, { "epoch": 10.25, "learning_rate": 4.48784546938681e-05, "loss": 2.3109, "step": 3540000 }, { "epoch": 10.25, "learning_rate": 4.487773104622082e-05, "loss": 2.3047, "step": 3540500 }, { "epoch": 10.25, "learning_rate": 4.487700739857355e-05, "loss": 2.2971, "step": 3541000 }, { "epoch": 10.25, "learning_rate": 4.4876283750926274e-05, "loss": 2.3169, "step": 3541500 }, { "epoch": 10.25, "learning_rate": 4.487556155057429e-05, "loss": 2.3292, "step": 3542000 }, { "epoch": 10.25, "learning_rate": 4.487483790292701e-05, "loss": 2.3254, "step": 3542500 }, { "epoch": 10.26, "learning_rate": 4.4874115702575035e-05, "loss": 2.3049, "step": 3543000 }, { "epoch": 10.26, "learning_rate": 4.487339205492776e-05, "loss": 2.3144, "step": 3543500 }, { "epoch": 10.26, "learning_rate": 4.487266985457577e-05, "loss": 2.3365, "step": 3544000 }, { "epoch": 10.26, "learning_rate": 4.4871946206928495e-05, "loss": 2.3088, "step": 3544500 }, { "epoch": 10.26, "learning_rate": 4.487122255928122e-05, "loss": 2.2921, "step": 3545000 }, { "epoch": 10.26, "learning_rate": 4.487049891163394e-05, "loss": 2.3074, "step": 3545500 }, { "epoch": 10.26, "learning_rate": 4.486977526398666e-05, "loss": 2.3114, "step": 3546000 }, { "epoch": 10.27, "learning_rate": 4.4869051616339383e-05, "loss": 2.3314, "step": 3546500 }, { "epoch": 10.27, "learning_rate": 4.4868327968692106e-05, "loss": 2.3172, "step": 3547000 }, { "epoch": 10.27, "learning_rate": 4.4867604321044835e-05, "loss": 2.3221, "step": 3547500 }, { "epoch": 10.27, "learning_rate": 4.486688067339756e-05, "loss": 2.305, "step": 3548000 }, { "epoch": 10.27, "learning_rate": 4.4866157025750286e-05, "loss": 2.2769, "step": 3548500 }, { "epoch": 10.27, "learning_rate": 4.486543337810301e-05, "loss": 2.2977, "step": 3549000 }, { "epoch": 10.27, "learning_rate": 4.486470973045573e-05, "loss": 2.3094, "step": 3549500 }, { "epoch": 10.28, "learning_rate": 4.4863987530103746e-05, "loss": 2.2984, "step": 3550000 }, { "epoch": 10.28, "learning_rate": 4.486326388245647e-05, "loss": 2.308, "step": 3550500 }, { "epoch": 10.28, "learning_rate": 4.4862541682104484e-05, "loss": 2.3465, "step": 3551000 }, { "epoch": 10.28, "learning_rate": 4.4861818034457206e-05, "loss": 2.2982, "step": 3551500 }, { "epoch": 10.28, "learning_rate": 4.4861094386809935e-05, "loss": 2.3099, "step": 3552000 }, { "epoch": 10.28, "learning_rate": 4.486037073916266e-05, "loss": 2.3185, "step": 3552500 }, { "epoch": 10.28, "learning_rate": 4.485964709151538e-05, "loss": 2.2814, "step": 3553000 }, { "epoch": 10.29, "learning_rate": 4.48589234438681e-05, "loss": 2.3306, "step": 3553500 }, { "epoch": 10.29, "learning_rate": 4.4858199796220824e-05, "loss": 2.3116, "step": 3554000 }, { "epoch": 10.29, "learning_rate": 4.4857476148573546e-05, "loss": 2.2916, "step": 3554500 }, { "epoch": 10.29, "learning_rate": 4.485675250092627e-05, "loss": 2.2922, "step": 3555000 }, { "epoch": 10.29, "learning_rate": 4.4856030300574284e-05, "loss": 2.3064, "step": 3555500 }, { "epoch": 10.29, "learning_rate": 4.4855308100222306e-05, "loss": 2.3001, "step": 3556000 }, { "epoch": 10.29, "learning_rate": 4.4854584452575035e-05, "loss": 2.2938, "step": 3556500 }, { "epoch": 10.3, "learning_rate": 4.485386080492776e-05, "loss": 2.3301, "step": 3557000 }, { "epoch": 10.3, "learning_rate": 4.485313715728048e-05, "loss": 2.3039, "step": 3557500 }, { "epoch": 10.3, "learning_rate": 4.48524135096332e-05, "loss": 2.3008, "step": 3558000 }, { "epoch": 10.3, "learning_rate": 4.4851689861985924e-05, "loss": 2.3105, "step": 3558500 }, { "epoch": 10.3, "learning_rate": 4.4850966214338646e-05, "loss": 2.3178, "step": 3559000 }, { "epoch": 10.3, "learning_rate": 4.485024256669137e-05, "loss": 2.3126, "step": 3559500 }, { "epoch": 10.3, "learning_rate": 4.4849520366339384e-05, "loss": 2.3066, "step": 3560000 }, { "epoch": 10.31, "learning_rate": 4.4848796718692106e-05, "loss": 2.3288, "step": 3560500 }, { "epoch": 10.31, "learning_rate": 4.4848073071044836e-05, "loss": 2.3383, "step": 3561000 }, { "epoch": 10.31, "learning_rate": 4.484734942339756e-05, "loss": 2.3026, "step": 3561500 }, { "epoch": 10.31, "learning_rate": 4.484662722304557e-05, "loss": 2.3257, "step": 3562000 }, { "epoch": 10.31, "learning_rate": 4.4845903575398296e-05, "loss": 2.3144, "step": 3562500 }, { "epoch": 10.31, "learning_rate": 4.484518137504631e-05, "loss": 2.312, "step": 3563000 }, { "epoch": 10.31, "learning_rate": 4.4844459174694333e-05, "loss": 2.2965, "step": 3563500 }, { "epoch": 10.32, "learning_rate": 4.484373552704706e-05, "loss": 2.3335, "step": 3564000 }, { "epoch": 10.32, "learning_rate": 4.4843011879399785e-05, "loss": 2.3282, "step": 3564500 }, { "epoch": 10.32, "learning_rate": 4.484228823175251e-05, "loss": 2.3205, "step": 3565000 }, { "epoch": 10.32, "learning_rate": 4.484156458410523e-05, "loss": 2.3193, "step": 3565500 }, { "epoch": 10.32, "learning_rate": 4.484084093645795e-05, "loss": 2.3274, "step": 3566000 }, { "epoch": 10.32, "learning_rate": 4.4840117288810674e-05, "loss": 2.3086, "step": 3566500 }, { "epoch": 10.33, "learning_rate": 4.4839393641163396e-05, "loss": 2.3213, "step": 3567000 }, { "epoch": 10.33, "learning_rate": 4.483866999351612e-05, "loss": 2.3215, "step": 3567500 }, { "epoch": 10.33, "learning_rate": 4.483794634586884e-05, "loss": 2.3023, "step": 3568000 }, { "epoch": 10.33, "learning_rate": 4.483722269822156e-05, "loss": 2.3117, "step": 3568500 }, { "epoch": 10.33, "learning_rate": 4.4836499050574285e-05, "loss": 2.3112, "step": 3569000 }, { "epoch": 10.33, "learning_rate": 4.4835775402927014e-05, "loss": 2.2898, "step": 3569500 }, { "epoch": 10.33, "learning_rate": 4.4835051755279736e-05, "loss": 2.2993, "step": 3570000 }, { "epoch": 10.34, "learning_rate": 4.483432810763246e-05, "loss": 2.3147, "step": 3570500 }, { "epoch": 10.34, "learning_rate": 4.483360445998519e-05, "loss": 2.3069, "step": 3571000 }, { "epoch": 10.34, "learning_rate": 4.483288081233791e-05, "loss": 2.3149, "step": 3571500 }, { "epoch": 10.34, "learning_rate": 4.483215716469063e-05, "loss": 2.3284, "step": 3572000 }, { "epoch": 10.34, "learning_rate": 4.4831433517043354e-05, "loss": 2.3167, "step": 3572500 }, { "epoch": 10.34, "learning_rate": 4.483071131669137e-05, "loss": 2.3092, "step": 3573000 }, { "epoch": 10.34, "learning_rate": 4.482998766904409e-05, "loss": 2.3269, "step": 3573500 }, { "epoch": 10.35, "learning_rate": 4.4829264021396814e-05, "loss": 2.2891, "step": 3574000 }, { "epoch": 10.35, "learning_rate": 4.4828540373749536e-05, "loss": 2.3211, "step": 3574500 }, { "epoch": 10.35, "learning_rate": 4.482781817339756e-05, "loss": 2.2922, "step": 3575000 }, { "epoch": 10.35, "learning_rate": 4.482709452575028e-05, "loss": 2.3227, "step": 3575500 }, { "epoch": 10.35, "learning_rate": 4.4826370878103e-05, "loss": 2.3061, "step": 3576000 }, { "epoch": 10.35, "learning_rate": 4.4825647230455725e-05, "loss": 2.3221, "step": 3576500 }, { "epoch": 10.35, "learning_rate": 4.482492503010374e-05, "loss": 2.301, "step": 3577000 }, { "epoch": 10.36, "learning_rate": 4.482420138245646e-05, "loss": 2.3101, "step": 3577500 }, { "epoch": 10.36, "learning_rate": 4.4823477734809185e-05, "loss": 2.325, "step": 3578000 }, { "epoch": 10.36, "learning_rate": 4.482275553445721e-05, "loss": 2.3206, "step": 3578500 }, { "epoch": 10.36, "learning_rate": 4.4822031886809937e-05, "loss": 2.3196, "step": 3579000 }, { "epoch": 10.36, "learning_rate": 4.482130823916266e-05, "loss": 2.3545, "step": 3579500 }, { "epoch": 10.36, "learning_rate": 4.482058459151538e-05, "loss": 2.3138, "step": 3580000 }, { "epoch": 10.36, "learning_rate": 4.48198609438681e-05, "loss": 2.3355, "step": 3580500 }, { "epoch": 10.37, "learning_rate": 4.4819137296220826e-05, "loss": 2.3213, "step": 3581000 }, { "epoch": 10.37, "learning_rate": 4.481841509586884e-05, "loss": 2.3275, "step": 3581500 }, { "epoch": 10.37, "learning_rate": 4.4817692895516863e-05, "loss": 2.3366, "step": 3582000 }, { "epoch": 10.37, "learning_rate": 4.4816969247869586e-05, "loss": 2.3315, "step": 3582500 }, { "epoch": 10.37, "learning_rate": 4.481624560022231e-05, "loss": 2.3038, "step": 3583000 }, { "epoch": 10.37, "learning_rate": 4.481552195257503e-05, "loss": 2.3137, "step": 3583500 }, { "epoch": 10.37, "learning_rate": 4.481479830492775e-05, "loss": 2.3014, "step": 3584000 }, { "epoch": 10.38, "learning_rate": 4.4814074657280475e-05, "loss": 2.3165, "step": 3584500 }, { "epoch": 10.38, "learning_rate": 4.48133510096332e-05, "loss": 2.3193, "step": 3585000 }, { "epoch": 10.38, "learning_rate": 4.481262736198592e-05, "loss": 2.2968, "step": 3585500 }, { "epoch": 10.38, "learning_rate": 4.481190371433864e-05, "loss": 2.3057, "step": 3586000 }, { "epoch": 10.38, "learning_rate": 4.481118006669137e-05, "loss": 2.3089, "step": 3586500 }, { "epoch": 10.38, "learning_rate": 4.481045786633939e-05, "loss": 2.3229, "step": 3587000 }, { "epoch": 10.38, "learning_rate": 4.4809734218692115e-05, "loss": 2.3143, "step": 3587500 }, { "epoch": 10.39, "learning_rate": 4.480901057104484e-05, "loss": 2.3122, "step": 3588000 }, { "epoch": 10.39, "learning_rate": 4.480828692339756e-05, "loss": 2.3206, "step": 3588500 }, { "epoch": 10.39, "learning_rate": 4.480756327575028e-05, "loss": 2.3227, "step": 3589000 }, { "epoch": 10.39, "learning_rate": 4.4806839628103004e-05, "loss": 2.3301, "step": 3589500 }, { "epoch": 10.39, "learning_rate": 4.4806115980455726e-05, "loss": 2.3171, "step": 3590000 }, { "epoch": 10.39, "learning_rate": 4.480539233280845e-05, "loss": 2.3204, "step": 3590500 }, { "epoch": 10.39, "learning_rate": 4.4804670132456464e-05, "loss": 2.319, "step": 3591000 }, { "epoch": 10.4, "learning_rate": 4.4803946484809186e-05, "loss": 2.3049, "step": 3591500 }, { "epoch": 10.4, "learning_rate": 4.4803222837161915e-05, "loss": 2.323, "step": 3592000 }, { "epoch": 10.4, "learning_rate": 4.480249918951464e-05, "loss": 2.3086, "step": 3592500 }, { "epoch": 10.4, "learning_rate": 4.480177554186736e-05, "loss": 2.2907, "step": 3593000 }, { "epoch": 10.4, "learning_rate": 4.480105189422009e-05, "loss": 2.3337, "step": 3593500 }, { "epoch": 10.4, "learning_rate": 4.480032824657281e-05, "loss": 2.3256, "step": 3594000 }, { "epoch": 10.4, "learning_rate": 4.4799606046220826e-05, "loss": 2.3201, "step": 3594500 }, { "epoch": 10.41, "learning_rate": 4.479888384586884e-05, "loss": 2.3337, "step": 3595000 }, { "epoch": 10.41, "learning_rate": 4.4798160198221564e-05, "loss": 2.2999, "step": 3595500 }, { "epoch": 10.41, "learning_rate": 4.479743655057429e-05, "loss": 2.3098, "step": 3596000 }, { "epoch": 10.41, "learning_rate": 4.4796712902927015e-05, "loss": 2.3238, "step": 3596500 }, { "epoch": 10.41, "learning_rate": 4.479598925527974e-05, "loss": 2.3148, "step": 3597000 }, { "epoch": 10.41, "learning_rate": 4.479526560763246e-05, "loss": 2.3168, "step": 3597500 }, { "epoch": 10.41, "learning_rate": 4.479454195998518e-05, "loss": 2.2975, "step": 3598000 }, { "epoch": 10.42, "learning_rate": 4.4793818312337904e-05, "loss": 2.3296, "step": 3598500 }, { "epoch": 10.42, "learning_rate": 4.4793094664690627e-05, "loss": 2.3159, "step": 3599000 }, { "epoch": 10.42, "learning_rate": 4.479237101704335e-05, "loss": 2.3025, "step": 3599500 }, { "epoch": 10.42, "learning_rate": 4.4791648816691364e-05, "loss": 2.2736, "step": 3600000 }, { "epoch": 10.42, "learning_rate": 4.479092516904409e-05, "loss": 2.3003, "step": 3600500 }, { "epoch": 10.42, "learning_rate": 4.479020296869211e-05, "loss": 2.2924, "step": 3601000 }, { "epoch": 10.42, "learning_rate": 4.478947932104484e-05, "loss": 2.3206, "step": 3601500 }, { "epoch": 10.43, "learning_rate": 4.478875567339756e-05, "loss": 2.2963, "step": 3602000 }, { "epoch": 10.43, "learning_rate": 4.478803202575028e-05, "loss": 2.3218, "step": 3602500 }, { "epoch": 10.43, "learning_rate": 4.47873098253983e-05, "loss": 2.3097, "step": 3603000 }, { "epoch": 10.43, "learning_rate": 4.4786587625046313e-05, "loss": 2.2898, "step": 3603500 }, { "epoch": 10.43, "learning_rate": 4.478586397739904e-05, "loss": 2.3246, "step": 3604000 }, { "epoch": 10.43, "learning_rate": 4.4785140329751765e-05, "loss": 2.3324, "step": 3604500 }, { "epoch": 10.43, "learning_rate": 4.478441668210449e-05, "loss": 2.3072, "step": 3605000 }, { "epoch": 10.44, "learning_rate": 4.478369303445721e-05, "loss": 2.3204, "step": 3605500 }, { "epoch": 10.44, "learning_rate": 4.478296938680993e-05, "loss": 2.3258, "step": 3606000 }, { "epoch": 10.44, "learning_rate": 4.4782245739162654e-05, "loss": 2.2923, "step": 3606500 }, { "epoch": 10.44, "learning_rate": 4.4781522091515376e-05, "loss": 2.2975, "step": 3607000 }, { "epoch": 10.44, "learning_rate": 4.478079989116339e-05, "loss": 2.2948, "step": 3607500 }, { "epoch": 10.44, "learning_rate": 4.4780076243516114e-05, "loss": 2.3203, "step": 3608000 }, { "epoch": 10.45, "learning_rate": 4.477935259586884e-05, "loss": 2.3295, "step": 3608500 }, { "epoch": 10.45, "learning_rate": 4.4778628948221565e-05, "loss": 2.3071, "step": 3609000 }, { "epoch": 10.45, "learning_rate": 4.4777905300574294e-05, "loss": 2.3205, "step": 3609500 }, { "epoch": 10.45, "learning_rate": 4.4777181652927016e-05, "loss": 2.3056, "step": 3610000 }, { "epoch": 10.45, "learning_rate": 4.477645945257503e-05, "loss": 2.3205, "step": 3610500 }, { "epoch": 10.45, "learning_rate": 4.4775735804927754e-05, "loss": 2.2996, "step": 3611000 }, { "epoch": 10.45, "learning_rate": 4.4775012157280476e-05, "loss": 2.307, "step": 3611500 }, { "epoch": 10.46, "learning_rate": 4.47742885096332e-05, "loss": 2.3369, "step": 3612000 }, { "epoch": 10.46, "learning_rate": 4.477356486198592e-05, "loss": 2.2987, "step": 3612500 }, { "epoch": 10.46, "learning_rate": 4.477284121433864e-05, "loss": 2.2771, "step": 3613000 }, { "epoch": 10.46, "learning_rate": 4.4772117566691365e-05, "loss": 2.3447, "step": 3613500 }, { "epoch": 10.46, "learning_rate": 4.477139536633939e-05, "loss": 2.3368, "step": 3614000 }, { "epoch": 10.46, "learning_rate": 4.477067171869211e-05, "loss": 2.3306, "step": 3614500 }, { "epoch": 10.46, "learning_rate": 4.476994807104483e-05, "loss": 2.3036, "step": 3615000 }, { "epoch": 10.47, "learning_rate": 4.4769224423397554e-05, "loss": 2.3354, "step": 3615500 }, { "epoch": 10.47, "learning_rate": 4.476850077575028e-05, "loss": 2.3324, "step": 3616000 }, { "epoch": 10.47, "learning_rate": 4.4767777128103005e-05, "loss": 2.2986, "step": 3616500 }, { "epoch": 10.47, "learning_rate": 4.476705492775102e-05, "loss": 2.3304, "step": 3617000 }, { "epoch": 10.47, "learning_rate": 4.476633128010374e-05, "loss": 2.3198, "step": 3617500 }, { "epoch": 10.47, "learning_rate": 4.476560763245647e-05, "loss": 2.3054, "step": 3618000 }, { "epoch": 10.47, "learning_rate": 4.4764883984809194e-05, "loss": 2.3441, "step": 3618500 }, { "epoch": 10.48, "learning_rate": 4.476416033716192e-05, "loss": 2.2986, "step": 3619000 }, { "epoch": 10.48, "learning_rate": 4.476343668951464e-05, "loss": 2.3157, "step": 3619500 }, { "epoch": 10.48, "learning_rate": 4.476271304186736e-05, "loss": 2.3005, "step": 3620000 }, { "epoch": 10.48, "learning_rate": 4.476198939422008e-05, "loss": 2.3066, "step": 3620500 }, { "epoch": 10.48, "learning_rate": 4.4761265746572806e-05, "loss": 2.3124, "step": 3621000 }, { "epoch": 10.48, "learning_rate": 4.476054209892553e-05, "loss": 2.31, "step": 3621500 }, { "epoch": 10.48, "learning_rate": 4.475981989857354e-05, "loss": 2.3299, "step": 3622000 }, { "epoch": 10.49, "learning_rate": 4.475909625092627e-05, "loss": 2.3074, "step": 3622500 }, { "epoch": 10.49, "learning_rate": 4.4758372603278995e-05, "loss": 2.3118, "step": 3623000 }, { "epoch": 10.49, "learning_rate": 4.475764895563172e-05, "loss": 2.3074, "step": 3623500 }, { "epoch": 10.49, "learning_rate": 4.475692675527974e-05, "loss": 2.3237, "step": 3624000 }, { "epoch": 10.49, "learning_rate": 4.475620310763246e-05, "loss": 2.3325, "step": 3624500 }, { "epoch": 10.49, "learning_rate": 4.4755479459985184e-05, "loss": 2.3181, "step": 3625000 }, { "epoch": 10.49, "learning_rate": 4.4754755812337906e-05, "loss": 2.3079, "step": 3625500 }, { "epoch": 10.5, "learning_rate": 4.475403216469063e-05, "loss": 2.3159, "step": 3626000 }, { "epoch": 10.5, "learning_rate": 4.475330851704335e-05, "loss": 2.3264, "step": 3626500 }, { "epoch": 10.5, "learning_rate": 4.475258486939607e-05, "loss": 2.3158, "step": 3627000 }, { "epoch": 10.5, "learning_rate": 4.4751862669044095e-05, "loss": 2.2862, "step": 3627500 }, { "epoch": 10.5, "learning_rate": 4.475114046869211e-05, "loss": 2.3071, "step": 3628000 }, { "epoch": 10.5, "learning_rate": 4.475041682104483e-05, "loss": 2.3131, "step": 3628500 }, { "epoch": 10.5, "learning_rate": 4.474969462069285e-05, "loss": 2.2918, "step": 3629000 }, { "epoch": 10.51, "learning_rate": 4.474897097304557e-05, "loss": 2.3153, "step": 3629500 }, { "epoch": 10.51, "learning_rate": 4.474824732539829e-05, "loss": 2.2893, "step": 3630000 }, { "epoch": 10.51, "learning_rate": 4.474752367775102e-05, "loss": 2.3049, "step": 3630500 }, { "epoch": 10.51, "learning_rate": 4.4746800030103744e-05, "loss": 2.297, "step": 3631000 }, { "epoch": 10.51, "learning_rate": 4.474607638245647e-05, "loss": 2.3007, "step": 3631500 }, { "epoch": 10.51, "learning_rate": 4.4745352734809195e-05, "loss": 2.3007, "step": 3632000 }, { "epoch": 10.51, "learning_rate": 4.474462908716192e-05, "loss": 2.315, "step": 3632500 }, { "epoch": 10.52, "learning_rate": 4.474390543951464e-05, "loss": 2.3077, "step": 3633000 }, { "epoch": 10.52, "learning_rate": 4.474318179186736e-05, "loss": 2.321, "step": 3633500 }, { "epoch": 10.52, "learning_rate": 4.4742458144220084e-05, "loss": 2.3097, "step": 3634000 }, { "epoch": 10.52, "learning_rate": 4.4741734496572806e-05, "loss": 2.3118, "step": 3634500 }, { "epoch": 10.52, "learning_rate": 4.474101229622082e-05, "loss": 2.3146, "step": 3635000 }, { "epoch": 10.52, "learning_rate": 4.4740290095868844e-05, "loss": 2.3096, "step": 3635500 }, { "epoch": 10.52, "learning_rate": 4.4739566448221567e-05, "loss": 2.3122, "step": 3636000 }, { "epoch": 10.53, "learning_rate": 4.473884280057429e-05, "loss": 2.3357, "step": 3636500 }, { "epoch": 10.53, "learning_rate": 4.4738120600222304e-05, "loss": 2.3068, "step": 3637000 }, { "epoch": 10.53, "learning_rate": 4.4737396952575027e-05, "loss": 2.3483, "step": 3637500 }, { "epoch": 10.53, "learning_rate": 4.473667330492775e-05, "loss": 2.3151, "step": 3638000 }, { "epoch": 10.53, "learning_rate": 4.473594965728047e-05, "loss": 2.3061, "step": 3638500 }, { "epoch": 10.53, "learning_rate": 4.47352260096332e-05, "loss": 2.321, "step": 3639000 }, { "epoch": 10.53, "learning_rate": 4.473450236198592e-05, "loss": 2.3246, "step": 3639500 }, { "epoch": 10.54, "learning_rate": 4.4733778714338644e-05, "loss": 2.3119, "step": 3640000 }, { "epoch": 10.54, "learning_rate": 4.473305651398667e-05, "loss": 2.2968, "step": 3640500 }, { "epoch": 10.54, "learning_rate": 4.473233286633939e-05, "loss": 2.3374, "step": 3641000 }, { "epoch": 10.54, "learning_rate": 4.473160921869211e-05, "loss": 2.3078, "step": 3641500 }, { "epoch": 10.54, "learning_rate": 4.4730885571044834e-05, "loss": 2.3153, "step": 3642000 }, { "epoch": 10.54, "learning_rate": 4.473016337069285e-05, "loss": 2.3131, "step": 3642500 }, { "epoch": 10.54, "learning_rate": 4.472943972304557e-05, "loss": 2.307, "step": 3643000 }, { "epoch": 10.55, "learning_rate": 4.47287160753983e-05, "loss": 2.3166, "step": 3643500 }, { "epoch": 10.55, "learning_rate": 4.472799242775102e-05, "loss": 2.3036, "step": 3644000 }, { "epoch": 10.55, "learning_rate": 4.4727268780103745e-05, "loss": 2.3033, "step": 3644500 }, { "epoch": 10.55, "learning_rate": 4.472654513245647e-05, "loss": 2.3061, "step": 3645000 }, { "epoch": 10.55, "learning_rate": 4.472582148480919e-05, "loss": 2.314, "step": 3645500 }, { "epoch": 10.55, "learning_rate": 4.472509783716192e-05, "loss": 2.3211, "step": 3646000 }, { "epoch": 10.56, "learning_rate": 4.472437418951464e-05, "loss": 2.3232, "step": 3646500 }, { "epoch": 10.56, "learning_rate": 4.472365054186736e-05, "loss": 2.2904, "step": 3647000 }, { "epoch": 10.56, "learning_rate": 4.4722926894220085e-05, "loss": 2.3177, "step": 3647500 }, { "epoch": 10.56, "learning_rate": 4.472220324657281e-05, "loss": 2.3221, "step": 3648000 }, { "epoch": 10.56, "learning_rate": 4.472147959892553e-05, "loss": 2.3296, "step": 3648500 }, { "epoch": 10.56, "learning_rate": 4.472075595127825e-05, "loss": 2.315, "step": 3649000 }, { "epoch": 10.56, "learning_rate": 4.4720032303630974e-05, "loss": 2.3182, "step": 3649500 }, { "epoch": 10.57, "learning_rate": 4.4719308655983696e-05, "loss": 2.3072, "step": 3650000 }, { "epoch": 10.57, "learning_rate": 4.471858645563172e-05, "loss": 2.298, "step": 3650500 }, { "epoch": 10.57, "learning_rate": 4.4717864255279734e-05, "loss": 2.3486, "step": 3651000 }, { "epoch": 10.57, "learning_rate": 4.4717140607632456e-05, "loss": 2.3215, "step": 3651500 }, { "epoch": 10.57, "learning_rate": 4.471641695998518e-05, "loss": 2.3109, "step": 3652000 }, { "epoch": 10.57, "learning_rate": 4.47156947596332e-05, "loss": 2.2995, "step": 3652500 }, { "epoch": 10.57, "learning_rate": 4.471497111198592e-05, "loss": 2.3399, "step": 3653000 }, { "epoch": 10.58, "learning_rate": 4.4714247464338645e-05, "loss": 2.3253, "step": 3653500 }, { "epoch": 10.58, "learning_rate": 4.4713523816691374e-05, "loss": 2.2985, "step": 3654000 }, { "epoch": 10.58, "learning_rate": 4.4712800169044097e-05, "loss": 2.303, "step": 3654500 }, { "epoch": 10.58, "learning_rate": 4.471207652139682e-05, "loss": 2.3236, "step": 3655000 }, { "epoch": 10.58, "learning_rate": 4.471135287374954e-05, "loss": 2.2951, "step": 3655500 }, { "epoch": 10.58, "learning_rate": 4.471062922610226e-05, "loss": 2.322, "step": 3656000 }, { "epoch": 10.58, "learning_rate": 4.4709905578454985e-05, "loss": 2.3172, "step": 3656500 }, { "epoch": 10.59, "learning_rate": 4.470918193080771e-05, "loss": 2.313, "step": 3657000 }, { "epoch": 10.59, "learning_rate": 4.470845828316043e-05, "loss": 2.2927, "step": 3657500 }, { "epoch": 10.59, "learning_rate": 4.470773463551315e-05, "loss": 2.2953, "step": 3658000 }, { "epoch": 10.59, "learning_rate": 4.4707012435161174e-05, "loss": 2.3205, "step": 3658500 }, { "epoch": 10.59, "learning_rate": 4.47062887875139e-05, "loss": 2.3263, "step": 3659000 }, { "epoch": 10.59, "learning_rate": 4.470556513986662e-05, "loss": 2.343, "step": 3659500 }, { "epoch": 10.59, "learning_rate": 4.470484149221934e-05, "loss": 2.2819, "step": 3660000 }, { "epoch": 10.6, "learning_rate": 4.4704117844572063e-05, "loss": 2.3002, "step": 3660500 }, { "epoch": 10.6, "learning_rate": 4.4703395644220086e-05, "loss": 2.3189, "step": 3661000 }, { "epoch": 10.6, "learning_rate": 4.470267199657281e-05, "loss": 2.3005, "step": 3661500 }, { "epoch": 10.6, "learning_rate": 4.470194834892553e-05, "loss": 2.2919, "step": 3662000 }, { "epoch": 10.6, "learning_rate": 4.470122470127825e-05, "loss": 2.3284, "step": 3662500 }, { "epoch": 10.6, "learning_rate": 4.4700501053630975e-05, "loss": 2.311, "step": 3663000 }, { "epoch": 10.6, "learning_rate": 4.4699777405983704e-05, "loss": 2.3006, "step": 3663500 }, { "epoch": 10.61, "learning_rate": 4.469905520563172e-05, "loss": 2.3178, "step": 3664000 }, { "epoch": 10.61, "learning_rate": 4.469833155798444e-05, "loss": 2.3153, "step": 3664500 }, { "epoch": 10.61, "learning_rate": 4.4697607910337164e-05, "loss": 2.3001, "step": 3665000 }, { "epoch": 10.61, "learning_rate": 4.4696884262689886e-05, "loss": 2.3369, "step": 3665500 }, { "epoch": 10.61, "learning_rate": 4.46961620623379e-05, "loss": 2.3378, "step": 3666000 }, { "epoch": 10.61, "learning_rate": 4.4695438414690624e-05, "loss": 2.3457, "step": 3666500 }, { "epoch": 10.61, "learning_rate": 4.469471476704335e-05, "loss": 2.3198, "step": 3667000 }, { "epoch": 10.62, "learning_rate": 4.4693991119396075e-05, "loss": 2.3316, "step": 3667500 }, { "epoch": 10.62, "learning_rate": 4.469326891904409e-05, "loss": 2.3104, "step": 3668000 }, { "epoch": 10.62, "learning_rate": 4.469254527139682e-05, "loss": 2.2941, "step": 3668500 }, { "epoch": 10.62, "learning_rate": 4.469182162374954e-05, "loss": 2.3213, "step": 3669000 }, { "epoch": 10.62, "learning_rate": 4.4691097976102264e-05, "loss": 2.2985, "step": 3669500 }, { "epoch": 10.62, "learning_rate": 4.469037577575028e-05, "loss": 2.3033, "step": 3670000 }, { "epoch": 10.62, "learning_rate": 4.4689652128103e-05, "loss": 2.3113, "step": 3670500 }, { "epoch": 10.63, "learning_rate": 4.4688928480455724e-05, "loss": 2.3247, "step": 3671000 }, { "epoch": 10.63, "learning_rate": 4.468820483280845e-05, "loss": 2.3117, "step": 3671500 }, { "epoch": 10.63, "learning_rate": 4.4687481185161175e-05, "loss": 2.3388, "step": 3672000 }, { "epoch": 10.63, "learning_rate": 4.46867575375139e-05, "loss": 2.2947, "step": 3672500 }, { "epoch": 10.63, "learning_rate": 4.468603388986662e-05, "loss": 2.3024, "step": 3673000 }, { "epoch": 10.63, "learning_rate": 4.468531024221934e-05, "loss": 2.3195, "step": 3673500 }, { "epoch": 10.63, "learning_rate": 4.4684586594572064e-05, "loss": 2.2959, "step": 3674000 }, { "epoch": 10.64, "learning_rate": 4.468386439422008e-05, "loss": 2.324, "step": 3674500 }, { "epoch": 10.64, "learning_rate": 4.46831407465728e-05, "loss": 2.2992, "step": 3675000 }, { "epoch": 10.64, "learning_rate": 4.4682417098925524e-05, "loss": 2.3196, "step": 3675500 }, { "epoch": 10.64, "learning_rate": 4.468169345127825e-05, "loss": 2.3218, "step": 3676000 }, { "epoch": 10.64, "learning_rate": 4.468097269822157e-05, "loss": 2.3117, "step": 3676500 }, { "epoch": 10.64, "learning_rate": 4.468024905057429e-05, "loss": 2.3335, "step": 3677000 }, { "epoch": 10.64, "learning_rate": 4.467952540292701e-05, "loss": 2.3094, "step": 3677500 }, { "epoch": 10.65, "learning_rate": 4.4678801755279736e-05, "loss": 2.2977, "step": 3678000 }, { "epoch": 10.65, "learning_rate": 4.467807810763246e-05, "loss": 2.3174, "step": 3678500 }, { "epoch": 10.65, "learning_rate": 4.467735445998518e-05, "loss": 2.305, "step": 3679000 }, { "epoch": 10.65, "learning_rate": 4.46766308123379e-05, "loss": 2.3139, "step": 3679500 }, { "epoch": 10.65, "learning_rate": 4.467590716469063e-05, "loss": 2.3491, "step": 3680000 }, { "epoch": 10.65, "learning_rate": 4.4675183517043354e-05, "loss": 2.2985, "step": 3680500 }, { "epoch": 10.65, "learning_rate": 4.4674459869396076e-05, "loss": 2.3137, "step": 3681000 }, { "epoch": 10.66, "learning_rate": 4.467373766904409e-05, "loss": 2.3176, "step": 3681500 }, { "epoch": 10.66, "learning_rate": 4.467301546869211e-05, "loss": 2.3213, "step": 3682000 }, { "epoch": 10.66, "learning_rate": 4.467229182104483e-05, "loss": 2.3039, "step": 3682500 }, { "epoch": 10.66, "learning_rate": 4.467156817339755e-05, "loss": 2.3138, "step": 3683000 }, { "epoch": 10.66, "learning_rate": 4.467084452575028e-05, "loss": 2.3051, "step": 3683500 }, { "epoch": 10.66, "learning_rate": 4.46701223253983e-05, "loss": 2.286, "step": 3684000 }, { "epoch": 10.67, "learning_rate": 4.4669398677751025e-05, "loss": 2.3236, "step": 3684500 }, { "epoch": 10.67, "learning_rate": 4.466867503010375e-05, "loss": 2.3233, "step": 3685000 }, { "epoch": 10.67, "learning_rate": 4.466795138245647e-05, "loss": 2.3075, "step": 3685500 }, { "epoch": 10.67, "learning_rate": 4.466722773480919e-05, "loss": 2.3164, "step": 3686000 }, { "epoch": 10.67, "learning_rate": 4.4666504087161914e-05, "loss": 2.3396, "step": 3686500 }, { "epoch": 10.67, "learning_rate": 4.4665780439514636e-05, "loss": 2.313, "step": 3687000 }, { "epoch": 10.67, "learning_rate": 4.466505679186736e-05, "loss": 2.2806, "step": 3687500 }, { "epoch": 10.68, "learning_rate": 4.466433314422008e-05, "loss": 2.2998, "step": 3688000 }, { "epoch": 10.68, "learning_rate": 4.46636109438681e-05, "loss": 2.3111, "step": 3688500 }, { "epoch": 10.68, "learning_rate": 4.466288874351612e-05, "loss": 2.3123, "step": 3689000 }, { "epoch": 10.68, "learning_rate": 4.466216509586884e-05, "loss": 2.3107, "step": 3689500 }, { "epoch": 10.68, "learning_rate": 4.466144144822156e-05, "loss": 2.309, "step": 3690000 }, { "epoch": 10.68, "learning_rate": 4.4660717800574285e-05, "loss": 2.3302, "step": 3690500 }, { "epoch": 10.68, "learning_rate": 4.465999415292701e-05, "loss": 2.3073, "step": 3691000 }, { "epoch": 10.69, "learning_rate": 4.4659270505279736e-05, "loss": 2.3142, "step": 3691500 }, { "epoch": 10.69, "learning_rate": 4.465854685763246e-05, "loss": 2.3107, "step": 3692000 }, { "epoch": 10.69, "learning_rate": 4.465782320998518e-05, "loss": 2.3118, "step": 3692500 }, { "epoch": 10.69, "learning_rate": 4.46571010096332e-05, "loss": 2.3175, "step": 3693000 }, { "epoch": 10.69, "learning_rate": 4.465637880928122e-05, "loss": 2.3135, "step": 3693500 }, { "epoch": 10.69, "learning_rate": 4.465565516163394e-05, "loss": 2.3214, "step": 3694000 }, { "epoch": 10.69, "learning_rate": 4.465493151398666e-05, "loss": 2.3046, "step": 3694500 }, { "epoch": 10.7, "learning_rate": 4.4654207866339385e-05, "loss": 2.2735, "step": 3695000 }, { "epoch": 10.7, "learning_rate": 4.465348421869211e-05, "loss": 2.312, "step": 3695500 }, { "epoch": 10.7, "learning_rate": 4.465276201834013e-05, "loss": 2.3198, "step": 3696000 }, { "epoch": 10.7, "learning_rate": 4.465203837069285e-05, "loss": 2.3151, "step": 3696500 }, { "epoch": 10.7, "learning_rate": 4.4651314723045574e-05, "loss": 2.3109, "step": 3697000 }, { "epoch": 10.7, "learning_rate": 4.46505910753983e-05, "loss": 2.3303, "step": 3697500 }, { "epoch": 10.7, "learning_rate": 4.464986742775102e-05, "loss": 2.2977, "step": 3698000 }, { "epoch": 10.71, "learning_rate": 4.464914378010374e-05, "loss": 2.2965, "step": 3698500 }, { "epoch": 10.71, "learning_rate": 4.464842013245647e-05, "loss": 2.3177, "step": 3699000 }, { "epoch": 10.71, "learning_rate": 4.4647697932104486e-05, "loss": 2.3321, "step": 3699500 }, { "epoch": 10.71, "learning_rate": 4.464697573175251e-05, "loss": 2.2906, "step": 3700000 }, { "epoch": 10.71, "learning_rate": 4.464625208410523e-05, "loss": 2.3131, "step": 3700500 }, { "epoch": 10.71, "learning_rate": 4.464552843645795e-05, "loss": 2.3201, "step": 3701000 }, { "epoch": 10.71, "learning_rate": 4.4644804788810675e-05, "loss": 2.2943, "step": 3701500 }, { "epoch": 10.72, "learning_rate": 4.46440811411634e-05, "loss": 2.3072, "step": 3702000 }, { "epoch": 10.72, "learning_rate": 4.464335749351612e-05, "loss": 2.3114, "step": 3702500 }, { "epoch": 10.72, "learning_rate": 4.464263384586884e-05, "loss": 2.3206, "step": 3703000 }, { "epoch": 10.72, "learning_rate": 4.4641910198221564e-05, "loss": 2.3167, "step": 3703500 }, { "epoch": 10.72, "learning_rate": 4.4641186550574286e-05, "loss": 2.3103, "step": 3704000 }, { "epoch": 10.72, "learning_rate": 4.464046435022231e-05, "loss": 2.3133, "step": 3704500 }, { "epoch": 10.72, "learning_rate": 4.463974070257503e-05, "loss": 2.323, "step": 3705000 }, { "epoch": 10.73, "learning_rate": 4.463901705492775e-05, "loss": 2.3088, "step": 3705500 }, { "epoch": 10.73, "learning_rate": 4.463829485457577e-05, "loss": 2.3061, "step": 3706000 }, { "epoch": 10.73, "learning_rate": 4.46375712069285e-05, "loss": 2.3085, "step": 3706500 }, { "epoch": 10.73, "learning_rate": 4.463684755928122e-05, "loss": 2.3119, "step": 3707000 }, { "epoch": 10.73, "learning_rate": 4.463612391163394e-05, "loss": 2.2953, "step": 3707500 }, { "epoch": 10.73, "learning_rate": 4.4635400263986664e-05, "loss": 2.3352, "step": 3708000 }, { "epoch": 10.73, "learning_rate": 4.4634676616339386e-05, "loss": 2.3264, "step": 3708500 }, { "epoch": 10.74, "learning_rate": 4.463395296869211e-05, "loss": 2.3102, "step": 3709000 }, { "epoch": 10.74, "learning_rate": 4.463322932104483e-05, "loss": 2.2989, "step": 3709500 }, { "epoch": 10.74, "learning_rate": 4.463250567339756e-05, "loss": 2.3089, "step": 3710000 }, { "epoch": 10.74, "learning_rate": 4.463178202575028e-05, "loss": 2.3277, "step": 3710500 }, { "epoch": 10.74, "learning_rate": 4.4631058378103004e-05, "loss": 2.3192, "step": 3711000 }, { "epoch": 10.74, "learning_rate": 4.4630334730455726e-05, "loss": 2.3136, "step": 3711500 }, { "epoch": 10.74, "learning_rate": 4.462961108280845e-05, "loss": 2.2977, "step": 3712000 }, { "epoch": 10.75, "learning_rate": 4.4628888882456464e-05, "loss": 2.3193, "step": 3712500 }, { "epoch": 10.75, "learning_rate": 4.4628165234809186e-05, "loss": 2.3006, "step": 3713000 }, { "epoch": 10.75, "learning_rate": 4.4627441587161915e-05, "loss": 2.311, "step": 3713500 }, { "epoch": 10.75, "learning_rate": 4.462671793951464e-05, "loss": 2.3234, "step": 3714000 }, { "epoch": 10.75, "learning_rate": 4.462599573916266e-05, "loss": 2.3066, "step": 3714500 }, { "epoch": 10.75, "learning_rate": 4.4625273538810676e-05, "loss": 2.348, "step": 3715000 }, { "epoch": 10.75, "learning_rate": 4.46245498911634e-05, "loss": 2.2944, "step": 3715500 }, { "epoch": 10.76, "learning_rate": 4.462382624351612e-05, "loss": 2.3281, "step": 3716000 }, { "epoch": 10.76, "learning_rate": 4.462310259586884e-05, "loss": 2.3089, "step": 3716500 }, { "epoch": 10.76, "learning_rate": 4.4622378948221565e-05, "loss": 2.2989, "step": 3717000 }, { "epoch": 10.76, "learning_rate": 4.462165530057429e-05, "loss": 2.3214, "step": 3717500 }, { "epoch": 10.76, "learning_rate": 4.462093165292701e-05, "loss": 2.3225, "step": 3718000 }, { "epoch": 10.76, "learning_rate": 4.462020800527973e-05, "loss": 2.3363, "step": 3718500 }, { "epoch": 10.76, "learning_rate": 4.4619485804927754e-05, "loss": 2.2883, "step": 3719000 }, { "epoch": 10.77, "learning_rate": 4.4618762157280476e-05, "loss": 2.3253, "step": 3719500 }, { "epoch": 10.77, "learning_rate": 4.46180385096332e-05, "loss": 2.3091, "step": 3720000 }, { "epoch": 10.77, "learning_rate": 4.461731486198592e-05, "loss": 2.312, "step": 3720500 }, { "epoch": 10.77, "learning_rate": 4.461659121433864e-05, "loss": 2.3333, "step": 3721000 }, { "epoch": 10.77, "learning_rate": 4.461587046128196e-05, "loss": 2.3184, "step": 3721500 }, { "epoch": 10.77, "learning_rate": 4.461514681363469e-05, "loss": 2.2891, "step": 3722000 }, { "epoch": 10.78, "learning_rate": 4.461442316598741e-05, "loss": 2.2843, "step": 3722500 }, { "epoch": 10.78, "learning_rate": 4.461369951834013e-05, "loss": 2.2941, "step": 3723000 }, { "epoch": 10.78, "learning_rate": 4.4612975870692854e-05, "loss": 2.3364, "step": 3723500 }, { "epoch": 10.78, "learning_rate": 4.461225367034087e-05, "loss": 2.3271, "step": 3724000 }, { "epoch": 10.78, "learning_rate": 4.461153002269359e-05, "loss": 2.3317, "step": 3724500 }, { "epoch": 10.78, "learning_rate": 4.4610806375046314e-05, "loss": 2.3112, "step": 3725000 }, { "epoch": 10.78, "learning_rate": 4.4610082727399036e-05, "loss": 2.3373, "step": 3725500 }, { "epoch": 10.79, "learning_rate": 4.460935907975176e-05, "loss": 2.3123, "step": 3726000 }, { "epoch": 10.79, "learning_rate": 4.460863543210449e-05, "loss": 2.3247, "step": 3726500 }, { "epoch": 10.79, "learning_rate": 4.460791178445721e-05, "loss": 2.3101, "step": 3727000 }, { "epoch": 10.79, "learning_rate": 4.460718813680993e-05, "loss": 2.3188, "step": 3727500 }, { "epoch": 10.79, "learning_rate": 4.460646593645795e-05, "loss": 2.2933, "step": 3728000 }, { "epoch": 10.79, "learning_rate": 4.460574228881067e-05, "loss": 2.3381, "step": 3728500 }, { "epoch": 10.79, "learning_rate": 4.46050186411634e-05, "loss": 2.3135, "step": 3729000 }, { "epoch": 10.8, "learning_rate": 4.460429499351612e-05, "loss": 2.3071, "step": 3729500 }, { "epoch": 10.8, "learning_rate": 4.460357134586884e-05, "loss": 2.316, "step": 3730000 }, { "epoch": 10.8, "learning_rate": 4.4602847698221565e-05, "loss": 2.2933, "step": 3730500 }, { "epoch": 10.8, "learning_rate": 4.460212549786959e-05, "loss": 2.3147, "step": 3731000 }, { "epoch": 10.8, "learning_rate": 4.460140185022231e-05, "loss": 2.3235, "step": 3731500 }, { "epoch": 10.8, "learning_rate": 4.460067820257503e-05, "loss": 2.323, "step": 3732000 }, { "epoch": 10.8, "learning_rate": 4.4599954554927754e-05, "loss": 2.3248, "step": 3732500 }, { "epoch": 10.81, "learning_rate": 4.4599230907280477e-05, "loss": 2.2862, "step": 3733000 }, { "epoch": 10.81, "learning_rate": 4.45985072596332e-05, "loss": 2.3261, "step": 3733500 }, { "epoch": 10.81, "learning_rate": 4.459778361198592e-05, "loss": 2.3116, "step": 3734000 }, { "epoch": 10.81, "learning_rate": 4.459705996433864e-05, "loss": 2.3179, "step": 3734500 }, { "epoch": 10.81, "learning_rate": 4.459633776398666e-05, "loss": 2.287, "step": 3735000 }, { "epoch": 10.81, "learning_rate": 4.459561556363468e-05, "loss": 2.3365, "step": 3735500 }, { "epoch": 10.81, "learning_rate": 4.45948919159874e-05, "loss": 2.3006, "step": 3736000 }, { "epoch": 10.82, "learning_rate": 4.459416826834013e-05, "loss": 2.3095, "step": 3736500 }, { "epoch": 10.82, "learning_rate": 4.4593444620692855e-05, "loss": 2.3028, "step": 3737000 }, { "epoch": 10.82, "learning_rate": 4.459272242034087e-05, "loss": 2.3221, "step": 3737500 }, { "epoch": 10.82, "learning_rate": 4.459199877269359e-05, "loss": 2.3221, "step": 3738000 }, { "epoch": 10.82, "learning_rate": 4.4591275125046315e-05, "loss": 2.3182, "step": 3738500 }, { "epoch": 10.82, "learning_rate": 4.459055147739904e-05, "loss": 2.3084, "step": 3739000 }, { "epoch": 10.82, "learning_rate": 4.458982927704706e-05, "loss": 2.3347, "step": 3739500 }, { "epoch": 10.83, "learning_rate": 4.458910562939978e-05, "loss": 2.3243, "step": 3740000 }, { "epoch": 10.83, "learning_rate": 4.4588381981752504e-05, "loss": 2.3109, "step": 3740500 }, { "epoch": 10.83, "learning_rate": 4.4587658334105226e-05, "loss": 2.3257, "step": 3741000 }, { "epoch": 10.83, "learning_rate": 4.458693468645795e-05, "loss": 2.3264, "step": 3741500 }, { "epoch": 10.83, "learning_rate": 4.4586212486105964e-05, "loss": 2.3214, "step": 3742000 }, { "epoch": 10.83, "learning_rate": 4.4585488838458686e-05, "loss": 2.2818, "step": 3742500 }, { "epoch": 10.83, "learning_rate": 4.4584765190811415e-05, "loss": 2.3009, "step": 3743000 }, { "epoch": 10.84, "learning_rate": 4.458404299045943e-05, "loss": 2.3259, "step": 3743500 }, { "epoch": 10.84, "learning_rate": 4.458331934281215e-05, "loss": 2.3041, "step": 3744000 }, { "epoch": 10.84, "learning_rate": 4.458259569516488e-05, "loss": 2.2969, "step": 3744500 }, { "epoch": 10.84, "learning_rate": 4.4581872047517604e-05, "loss": 2.307, "step": 3745000 }, { "epoch": 10.84, "learning_rate": 4.4581148399870326e-05, "loss": 2.3028, "step": 3745500 }, { "epoch": 10.84, "learning_rate": 4.458042475222305e-05, "loss": 2.2941, "step": 3746000 }, { "epoch": 10.84, "learning_rate": 4.457970110457577e-05, "loss": 2.2926, "step": 3746500 }, { "epoch": 10.85, "learning_rate": 4.457897745692849e-05, "loss": 2.3077, "step": 3747000 }, { "epoch": 10.85, "learning_rate": 4.4578253809281215e-05, "loss": 2.321, "step": 3747500 }, { "epoch": 10.85, "learning_rate": 4.457753016163394e-05, "loss": 2.3167, "step": 3748000 }, { "epoch": 10.85, "learning_rate": 4.4576806513986666e-05, "loss": 2.3043, "step": 3748500 }, { "epoch": 10.85, "learning_rate": 4.457608286633939e-05, "loss": 2.3258, "step": 3749000 }, { "epoch": 10.85, "learning_rate": 4.457535921869211e-05, "loss": 2.3045, "step": 3749500 }, { "epoch": 10.85, "learning_rate": 4.457463846563542e-05, "loss": 2.3111, "step": 3750000 }, { "epoch": 10.86, "learning_rate": 4.457391481798814e-05, "loss": 2.3204, "step": 3750500 }, { "epoch": 10.86, "learning_rate": 4.4573191170340864e-05, "loss": 2.3106, "step": 3751000 }, { "epoch": 10.86, "learning_rate": 4.457246752269359e-05, "loss": 2.2958, "step": 3751500 }, { "epoch": 10.86, "learning_rate": 4.4571743875046315e-05, "loss": 2.318, "step": 3752000 }, { "epoch": 10.86, "learning_rate": 4.457102167469434e-05, "loss": 2.3144, "step": 3752500 }, { "epoch": 10.86, "learning_rate": 4.457029802704706e-05, "loss": 2.33, "step": 3753000 }, { "epoch": 10.86, "learning_rate": 4.456957437939978e-05, "loss": 2.3127, "step": 3753500 }, { "epoch": 10.87, "learning_rate": 4.4568850731752504e-05, "loss": 2.3216, "step": 3754000 }, { "epoch": 10.87, "learning_rate": 4.456812708410523e-05, "loss": 2.3337, "step": 3754500 }, { "epoch": 10.87, "learning_rate": 4.456740488375324e-05, "loss": 2.3216, "step": 3755000 }, { "epoch": 10.87, "learning_rate": 4.4566681236105965e-05, "loss": 2.2996, "step": 3755500 }, { "epoch": 10.87, "learning_rate": 4.456595758845869e-05, "loss": 2.3207, "step": 3756000 }, { "epoch": 10.87, "learning_rate": 4.4565233940811416e-05, "loss": 2.319, "step": 3756500 }, { "epoch": 10.87, "learning_rate": 4.456451029316414e-05, "loss": 2.3064, "step": 3757000 }, { "epoch": 10.88, "learning_rate": 4.456378664551686e-05, "loss": 2.2996, "step": 3757500 }, { "epoch": 10.88, "learning_rate": 4.456306299786958e-05, "loss": 2.3426, "step": 3758000 }, { "epoch": 10.88, "learning_rate": 4.4562339350222305e-05, "loss": 2.2958, "step": 3758500 }, { "epoch": 10.88, "learning_rate": 4.4561615702575034e-05, "loss": 2.2931, "step": 3759000 }, { "epoch": 10.88, "learning_rate": 4.456089494951834e-05, "loss": 2.2935, "step": 3759500 }, { "epoch": 10.88, "learning_rate": 4.4560171301871065e-05, "loss": 2.3385, "step": 3760000 }, { "epoch": 10.89, "learning_rate": 4.455944765422379e-05, "loss": 2.3039, "step": 3760500 }, { "epoch": 10.89, "learning_rate": 4.4558724006576516e-05, "loss": 2.3058, "step": 3761000 }, { "epoch": 10.89, "learning_rate": 4.455800035892924e-05, "loss": 2.291, "step": 3761500 }, { "epoch": 10.89, "learning_rate": 4.455727671128196e-05, "loss": 2.324, "step": 3762000 }, { "epoch": 10.89, "learning_rate": 4.4556554510929976e-05, "loss": 2.3053, "step": 3762500 }, { "epoch": 10.89, "learning_rate": 4.45558308632827e-05, "loss": 2.3333, "step": 3763000 }, { "epoch": 10.89, "learning_rate": 4.4555108662930714e-05, "loss": 2.3071, "step": 3763500 }, { "epoch": 10.9, "learning_rate": 4.455438501528344e-05, "loss": 2.3267, "step": 3764000 }, { "epoch": 10.9, "learning_rate": 4.4553661367636165e-05, "loss": 2.3389, "step": 3764500 }, { "epoch": 10.9, "learning_rate": 4.455293771998889e-05, "loss": 2.2908, "step": 3765000 }, { "epoch": 10.9, "learning_rate": 4.455221407234161e-05, "loss": 2.3056, "step": 3765500 }, { "epoch": 10.9, "learning_rate": 4.455149042469433e-05, "loss": 2.288, "step": 3766000 }, { "epoch": 10.9, "learning_rate": 4.455076677704706e-05, "loss": 2.3325, "step": 3766500 }, { "epoch": 10.9, "learning_rate": 4.455004312939978e-05, "loss": 2.3234, "step": 3767000 }, { "epoch": 10.91, "learning_rate": 4.4549319481752505e-05, "loss": 2.3317, "step": 3767500 }, { "epoch": 10.91, "learning_rate": 4.454859583410523e-05, "loss": 2.2849, "step": 3768000 }, { "epoch": 10.91, "learning_rate": 4.454787218645795e-05, "loss": 2.3031, "step": 3768500 }, { "epoch": 10.91, "learning_rate": 4.4547149986105965e-05, "loss": 2.2983, "step": 3769000 }, { "epoch": 10.91, "learning_rate": 4.4546426338458694e-05, "loss": 2.3219, "step": 3769500 }, { "epoch": 10.91, "learning_rate": 4.4545702690811417e-05, "loss": 2.3367, "step": 3770000 }, { "epoch": 10.91, "learning_rate": 4.454497904316414e-05, "loss": 2.3163, "step": 3770500 }, { "epoch": 10.92, "learning_rate": 4.454425539551686e-05, "loss": 2.3088, "step": 3771000 }, { "epoch": 10.92, "learning_rate": 4.454353174786958e-05, "loss": 2.3245, "step": 3771500 }, { "epoch": 10.92, "learning_rate": 4.4542808100222305e-05, "loss": 2.3252, "step": 3772000 }, { "epoch": 10.92, "learning_rate": 4.454208445257503e-05, "loss": 2.3045, "step": 3772500 }, { "epoch": 10.92, "learning_rate": 4.454136225222304e-05, "loss": 2.3251, "step": 3773000 }, { "epoch": 10.92, "learning_rate": 4.4540638604575765e-05, "loss": 2.3266, "step": 3773500 }, { "epoch": 10.92, "learning_rate": 4.453991640422379e-05, "loss": 2.3016, "step": 3774000 }, { "epoch": 10.93, "learning_rate": 4.453919275657652e-05, "loss": 2.3329, "step": 3774500 }, { "epoch": 10.93, "learning_rate": 4.453846910892924e-05, "loss": 2.3048, "step": 3775000 }, { "epoch": 10.93, "learning_rate": 4.453774546128196e-05, "loss": 2.3281, "step": 3775500 }, { "epoch": 10.93, "learning_rate": 4.4537021813634684e-05, "loss": 2.3184, "step": 3776000 }, { "epoch": 10.93, "learning_rate": 4.4536298165987406e-05, "loss": 2.3028, "step": 3776500 }, { "epoch": 10.93, "learning_rate": 4.453557451834013e-05, "loss": 2.2906, "step": 3777000 }, { "epoch": 10.93, "learning_rate": 4.453485087069285e-05, "loss": 2.3051, "step": 3777500 }, { "epoch": 10.94, "learning_rate": 4.453412722304557e-05, "loss": 2.2924, "step": 3778000 }, { "epoch": 10.94, "learning_rate": 4.4533403575398295e-05, "loss": 2.2934, "step": 3778500 }, { "epoch": 10.94, "learning_rate": 4.453267992775102e-05, "loss": 2.3213, "step": 3779000 }, { "epoch": 10.94, "learning_rate": 4.453195772739904e-05, "loss": 2.3231, "step": 3779500 }, { "epoch": 10.94, "learning_rate": 4.453123407975176e-05, "loss": 2.2997, "step": 3780000 }, { "epoch": 10.94, "learning_rate": 4.4530510432104484e-05, "loss": 2.2987, "step": 3780500 }, { "epoch": 10.94, "learning_rate": 4.4529786784457206e-05, "loss": 2.3432, "step": 3781000 }, { "epoch": 10.95, "learning_rate": 4.4529063136809935e-05, "loss": 2.309, "step": 3781500 }, { "epoch": 10.95, "learning_rate": 4.452833948916266e-05, "loss": 2.3424, "step": 3782000 }, { "epoch": 10.95, "learning_rate": 4.452761584151538e-05, "loss": 2.2964, "step": 3782500 }, { "epoch": 10.95, "learning_rate": 4.4526893641163395e-05, "loss": 2.3144, "step": 3783000 }, { "epoch": 10.95, "learning_rate": 4.452616999351612e-05, "loss": 2.3006, "step": 3783500 }, { "epoch": 10.95, "learning_rate": 4.4525446345868846e-05, "loss": 2.3378, "step": 3784000 }, { "epoch": 10.95, "learning_rate": 4.452472414551686e-05, "loss": 2.2885, "step": 3784500 }, { "epoch": 10.96, "learning_rate": 4.4524000497869584e-05, "loss": 2.3178, "step": 3785000 }, { "epoch": 10.96, "learning_rate": 4.4523276850222306e-05, "loss": 2.3245, "step": 3785500 }, { "epoch": 10.96, "learning_rate": 4.452255320257503e-05, "loss": 2.2928, "step": 3786000 }, { "epoch": 10.96, "learning_rate": 4.452182955492775e-05, "loss": 2.3215, "step": 3786500 }, { "epoch": 10.96, "learning_rate": 4.452110590728047e-05, "loss": 2.3133, "step": 3787000 }, { "epoch": 10.96, "learning_rate": 4.4520382259633195e-05, "loss": 2.3027, "step": 3787500 }, { "epoch": 10.96, "learning_rate": 4.451966005928122e-05, "loss": 2.3274, "step": 3788000 }, { "epoch": 10.97, "learning_rate": 4.451893641163394e-05, "loss": 2.3183, "step": 3788500 }, { "epoch": 10.97, "learning_rate": 4.451821276398667e-05, "loss": 2.298, "step": 3789000 }, { "epoch": 10.97, "learning_rate": 4.4517490563634684e-05, "loss": 2.3294, "step": 3789500 }, { "epoch": 10.97, "learning_rate": 4.4516766915987407e-05, "loss": 2.3153, "step": 3790000 }, { "epoch": 10.97, "learning_rate": 4.451604326834013e-05, "loss": 2.2983, "step": 3790500 }, { "epoch": 10.97, "learning_rate": 4.451531962069285e-05, "loss": 2.3059, "step": 3791000 }, { "epoch": 10.97, "learning_rate": 4.451459597304557e-05, "loss": 2.3022, "step": 3791500 }, { "epoch": 10.98, "learning_rate": 4.4513872325398296e-05, "loss": 2.3026, "step": 3792000 }, { "epoch": 10.98, "learning_rate": 4.451314867775102e-05, "loss": 2.2935, "step": 3792500 }, { "epoch": 10.98, "learning_rate": 4.451242503010375e-05, "loss": 2.3257, "step": 3793000 }, { "epoch": 10.98, "learning_rate": 4.451170138245647e-05, "loss": 2.2688, "step": 3793500 }, { "epoch": 10.98, "learning_rate": 4.451097773480919e-05, "loss": 2.3323, "step": 3794000 }, { "epoch": 10.98, "learning_rate": 4.4510254087161913e-05, "loss": 2.3091, "step": 3794500 }, { "epoch": 10.98, "learning_rate": 4.4509530439514636e-05, "loss": 2.3176, "step": 3795000 }, { "epoch": 10.99, "learning_rate": 4.450880679186736e-05, "loss": 2.3324, "step": 3795500 }, { "epoch": 10.99, "learning_rate": 4.450808314422009e-05, "loss": 2.3419, "step": 3796000 }, { "epoch": 10.99, "learning_rate": 4.45073609438681e-05, "loss": 2.3075, "step": 3796500 }, { "epoch": 10.99, "learning_rate": 4.4506637296220825e-05, "loss": 2.3348, "step": 3797000 }, { "epoch": 10.99, "learning_rate": 4.450591364857355e-05, "loss": 2.2989, "step": 3797500 }, { "epoch": 10.99, "learning_rate": 4.450519000092627e-05, "loss": 2.3362, "step": 3798000 }, { "epoch": 11.0, "learning_rate": 4.4504466353279e-05, "loss": 2.3173, "step": 3798500 }, { "epoch": 11.0, "learning_rate": 4.450374270563172e-05, "loss": 2.3239, "step": 3799000 }, { "epoch": 11.0, "learning_rate": 4.4503020505279736e-05, "loss": 2.3114, "step": 3799500 }, { "epoch": 11.0, "learning_rate": 4.450229685763246e-05, "loss": 2.321, "step": 3800000 }, { "epoch": 11.0, "eval_accuracy": 0.649606812559522, "eval_accuracy_mlm": 0.6118292335819766, "eval_accuracy_nsp": 0.8521698679609336, "eval_loss": 2.303694486618042, "eval_runtime": 330.6766, "eval_samples_per_second": 1319.676, "eval_steps_per_second": 54.987, "step": 3800192 }, { "epoch": 11.0, "learning_rate": 4.450157320998518e-05, "loss": 2.2686, "step": 3800500 }, { "epoch": 11.0, "learning_rate": 4.45008495623379e-05, "loss": 2.2723, "step": 3801000 }, { "epoch": 11.0, "learning_rate": 4.4500125914690625e-05, "loss": 2.2728, "step": 3801500 }, { "epoch": 11.01, "learning_rate": 4.449940226704335e-05, "loss": 2.2589, "step": 3802000 }, { "epoch": 11.01, "learning_rate": 4.449867861939607e-05, "loss": 2.2992, "step": 3802500 }, { "epoch": 11.01, "learning_rate": 4.44979549717488e-05, "loss": 2.2963, "step": 3803000 }, { "epoch": 11.01, "learning_rate": 4.449723421869211e-05, "loss": 2.2922, "step": 3803500 }, { "epoch": 11.01, "learning_rate": 4.4496510571044836e-05, "loss": 2.298, "step": 3804000 }, { "epoch": 11.01, "learning_rate": 4.449578692339756e-05, "loss": 2.2803, "step": 3804500 }, { "epoch": 11.01, "learning_rate": 4.449506327575028e-05, "loss": 2.3067, "step": 3805000 }, { "epoch": 11.02, "learning_rate": 4.4494339628103e-05, "loss": 2.2855, "step": 3805500 }, { "epoch": 11.02, "learning_rate": 4.4493615980455725e-05, "loss": 2.2817, "step": 3806000 }, { "epoch": 11.02, "learning_rate": 4.449289233280845e-05, "loss": 2.2773, "step": 3806500 }, { "epoch": 11.02, "learning_rate": 4.449216868516117e-05, "loss": 2.2936, "step": 3807000 }, { "epoch": 11.02, "learning_rate": 4.44914450375139e-05, "loss": 2.2836, "step": 3807500 }, { "epoch": 11.02, "learning_rate": 4.449072138986662e-05, "loss": 2.2877, "step": 3808000 }, { "epoch": 11.02, "learning_rate": 4.448999774221934e-05, "loss": 2.2951, "step": 3808500 }, { "epoch": 11.03, "learning_rate": 4.4489274094572065e-05, "loss": 2.2851, "step": 3809000 }, { "epoch": 11.03, "learning_rate": 4.448855189422008e-05, "loss": 2.271, "step": 3809500 }, { "epoch": 11.03, "learning_rate": 4.4487829693868096e-05, "loss": 2.2802, "step": 3810000 }, { "epoch": 11.03, "learning_rate": 4.4487106046220826e-05, "loss": 2.3079, "step": 3810500 }, { "epoch": 11.03, "learning_rate": 4.448638239857355e-05, "loss": 2.3086, "step": 3811000 }, { "epoch": 11.03, "learning_rate": 4.448565875092628e-05, "loss": 2.3141, "step": 3811500 }, { "epoch": 11.03, "learning_rate": 4.4484935103279e-05, "loss": 2.2847, "step": 3812000 }, { "epoch": 11.04, "learning_rate": 4.448421145563172e-05, "loss": 2.2871, "step": 3812500 }, { "epoch": 11.04, "learning_rate": 4.448348925527974e-05, "loss": 2.268, "step": 3813000 }, { "epoch": 11.04, "learning_rate": 4.448276560763246e-05, "loss": 2.2887, "step": 3813500 }, { "epoch": 11.04, "learning_rate": 4.448204195998518e-05, "loss": 2.3128, "step": 3814000 }, { "epoch": 11.04, "learning_rate": 4.44813197596332e-05, "loss": 2.2732, "step": 3814500 }, { "epoch": 11.04, "learning_rate": 4.4480596111985926e-05, "loss": 2.3099, "step": 3815000 }, { "epoch": 11.04, "learning_rate": 4.447987246433865e-05, "loss": 2.3075, "step": 3815500 }, { "epoch": 11.05, "learning_rate": 4.4479150263986664e-05, "loss": 2.3168, "step": 3816000 }, { "epoch": 11.05, "learning_rate": 4.4478426616339386e-05, "loss": 2.292, "step": 3816500 }, { "epoch": 11.05, "learning_rate": 4.447770296869211e-05, "loss": 2.2826, "step": 3817000 }, { "epoch": 11.05, "learning_rate": 4.447697932104483e-05, "loss": 2.2949, "step": 3817500 }, { "epoch": 11.05, "learning_rate": 4.447625567339755e-05, "loss": 2.2725, "step": 3818000 }, { "epoch": 11.05, "learning_rate": 4.447553202575028e-05, "loss": 2.3116, "step": 3818500 }, { "epoch": 11.05, "learning_rate": 4.4474808378103004e-05, "loss": 2.2743, "step": 3819000 }, { "epoch": 11.06, "learning_rate": 4.4474084730455726e-05, "loss": 2.3117, "step": 3819500 }, { "epoch": 11.06, "learning_rate": 4.447336253010375e-05, "loss": 2.2807, "step": 3820000 }, { "epoch": 11.06, "learning_rate": 4.447263888245647e-05, "loss": 2.2859, "step": 3820500 }, { "epoch": 11.06, "learning_rate": 4.447191523480919e-05, "loss": 2.2899, "step": 3821000 }, { "epoch": 11.06, "learning_rate": 4.4471191587161915e-05, "loss": 2.3095, "step": 3821500 }, { "epoch": 11.06, "learning_rate": 4.447046793951464e-05, "loss": 2.2984, "step": 3822000 }, { "epoch": 11.06, "learning_rate": 4.446974429186736e-05, "loss": 2.2877, "step": 3822500 }, { "epoch": 11.07, "learning_rate": 4.446902064422008e-05, "loss": 2.2967, "step": 3823000 }, { "epoch": 11.07, "learning_rate": 4.4468296996572804e-05, "loss": 2.2945, "step": 3823500 }, { "epoch": 11.07, "learning_rate": 4.4467573348925526e-05, "loss": 2.3127, "step": 3824000 }, { "epoch": 11.07, "learning_rate": 4.446685114857355e-05, "loss": 2.3369, "step": 3824500 }, { "epoch": 11.07, "learning_rate": 4.4466128948221564e-05, "loss": 2.31, "step": 3825000 }, { "epoch": 11.07, "learning_rate": 4.4465405300574286e-05, "loss": 2.2873, "step": 3825500 }, { "epoch": 11.07, "learning_rate": 4.446468165292701e-05, "loss": 2.3013, "step": 3826000 }, { "epoch": 11.08, "learning_rate": 4.446395945257503e-05, "loss": 2.2751, "step": 3826500 }, { "epoch": 11.08, "learning_rate": 4.446323580492775e-05, "loss": 2.273, "step": 3827000 }, { "epoch": 11.08, "learning_rate": 4.4462512157280475e-05, "loss": 2.3104, "step": 3827500 }, { "epoch": 11.08, "learning_rate": 4.44617885096332e-05, "loss": 2.3169, "step": 3828000 }, { "epoch": 11.08, "learning_rate": 4.4461064861985927e-05, "loss": 2.3134, "step": 3828500 }, { "epoch": 11.08, "learning_rate": 4.446034121433865e-05, "loss": 2.2931, "step": 3829000 }, { "epoch": 11.08, "learning_rate": 4.445961756669137e-05, "loss": 2.2568, "step": 3829500 }, { "epoch": 11.09, "learning_rate": 4.445889391904409e-05, "loss": 2.2833, "step": 3830000 }, { "epoch": 11.09, "learning_rate": 4.4458170271396816e-05, "loss": 2.3046, "step": 3830500 }, { "epoch": 11.09, "learning_rate": 4.445744662374954e-05, "loss": 2.2981, "step": 3831000 }, { "epoch": 11.09, "learning_rate": 4.445672297610226e-05, "loss": 2.2907, "step": 3831500 }, { "epoch": 11.09, "learning_rate": 4.445599932845498e-05, "loss": 2.2931, "step": 3832000 }, { "epoch": 11.09, "learning_rate": 4.4455277128103e-05, "loss": 2.2904, "step": 3832500 }, { "epoch": 11.09, "learning_rate": 4.445455348045573e-05, "loss": 2.2897, "step": 3833000 }, { "epoch": 11.1, "learning_rate": 4.445382983280845e-05, "loss": 2.2976, "step": 3833500 }, { "epoch": 11.1, "learning_rate": 4.445310618516118e-05, "loss": 2.3147, "step": 3834000 }, { "epoch": 11.1, "learning_rate": 4.44523825375139e-05, "loss": 2.3212, "step": 3834500 }, { "epoch": 11.1, "learning_rate": 4.445165888986662e-05, "loss": 2.289, "step": 3835000 }, { "epoch": 11.1, "learning_rate": 4.4450935242219345e-05, "loss": 2.2984, "step": 3835500 }, { "epoch": 11.1, "learning_rate": 4.445021159457207e-05, "loss": 2.298, "step": 3836000 }, { "epoch": 11.11, "learning_rate": 4.444948794692479e-05, "loss": 2.3146, "step": 3836500 }, { "epoch": 11.11, "learning_rate": 4.444876429927751e-05, "loss": 2.288, "step": 3837000 }, { "epoch": 11.11, "learning_rate": 4.4448040651630234e-05, "loss": 2.3075, "step": 3837500 }, { "epoch": 11.11, "learning_rate": 4.4447317003982956e-05, "loss": 2.3034, "step": 3838000 }, { "epoch": 11.11, "learning_rate": 4.444659480363098e-05, "loss": 2.2941, "step": 3838500 }, { "epoch": 11.11, "learning_rate": 4.44458711559837e-05, "loss": 2.3028, "step": 3839000 }, { "epoch": 11.11, "learning_rate": 4.444514750833642e-05, "loss": 2.2933, "step": 3839500 }, { "epoch": 11.12, "learning_rate": 4.4444423860689145e-05, "loss": 2.2984, "step": 3840000 }, { "epoch": 11.12, "learning_rate": 4.4443700213041874e-05, "loss": 2.2852, "step": 3840500 }, { "epoch": 11.12, "learning_rate": 4.444297801268989e-05, "loss": 2.2859, "step": 3841000 }, { "epoch": 11.12, "learning_rate": 4.444225436504261e-05, "loss": 2.2934, "step": 3841500 }, { "epoch": 11.12, "learning_rate": 4.4441530717395334e-05, "loss": 2.2947, "step": 3842000 }, { "epoch": 11.12, "learning_rate": 4.4440807069748056e-05, "loss": 2.2921, "step": 3842500 }, { "epoch": 11.12, "learning_rate": 4.444008342210078e-05, "loss": 2.294, "step": 3843000 }, { "epoch": 11.13, "learning_rate": 4.44393612217488e-05, "loss": 2.3045, "step": 3843500 }, { "epoch": 11.13, "learning_rate": 4.443863757410152e-05, "loss": 2.2957, "step": 3844000 }, { "epoch": 11.13, "learning_rate": 4.4437913926454245e-05, "loss": 2.2838, "step": 3844500 }, { "epoch": 11.13, "learning_rate": 4.443719027880697e-05, "loss": 2.2981, "step": 3845000 }, { "epoch": 11.13, "learning_rate": 4.443646807845498e-05, "loss": 2.2961, "step": 3845500 }, { "epoch": 11.13, "learning_rate": 4.4435744430807705e-05, "loss": 2.3033, "step": 3846000 }, { "epoch": 11.13, "learning_rate": 4.443502078316043e-05, "loss": 2.2924, "step": 3846500 }, { "epoch": 11.14, "learning_rate": 4.4434297135513157e-05, "loss": 2.2644, "step": 3847000 }, { "epoch": 11.14, "learning_rate": 4.443357348786588e-05, "loss": 2.3018, "step": 3847500 }, { "epoch": 11.14, "learning_rate": 4.44328498402186e-05, "loss": 2.2764, "step": 3848000 }, { "epoch": 11.14, "learning_rate": 4.443212763986662e-05, "loss": 2.2915, "step": 3848500 }, { "epoch": 11.14, "learning_rate": 4.443140543951464e-05, "loss": 2.3004, "step": 3849000 }, { "epoch": 11.14, "learning_rate": 4.443068179186736e-05, "loss": 2.2697, "step": 3849500 }, { "epoch": 11.14, "learning_rate": 4.442995814422008e-05, "loss": 2.2959, "step": 3850000 }, { "epoch": 11.15, "learning_rate": 4.4429234496572806e-05, "loss": 2.2923, "step": 3850500 }, { "epoch": 11.15, "learning_rate": 4.442851084892553e-05, "loss": 2.3147, "step": 3851000 }, { "epoch": 11.15, "learning_rate": 4.442778720127826e-05, "loss": 2.2821, "step": 3851500 }, { "epoch": 11.15, "learning_rate": 4.442706355363098e-05, "loss": 2.3035, "step": 3852000 }, { "epoch": 11.15, "learning_rate": 4.44263399059837e-05, "loss": 2.2767, "step": 3852500 }, { "epoch": 11.15, "learning_rate": 4.442561770563172e-05, "loss": 2.3207, "step": 3853000 }, { "epoch": 11.15, "learning_rate": 4.442489405798444e-05, "loss": 2.3131, "step": 3853500 }, { "epoch": 11.16, "learning_rate": 4.442417041033716e-05, "loss": 2.3055, "step": 3854000 }, { "epoch": 11.16, "learning_rate": 4.4423446762689884e-05, "loss": 2.3018, "step": 3854500 }, { "epoch": 11.16, "learning_rate": 4.4422723115042606e-05, "loss": 2.3021, "step": 3855000 }, { "epoch": 11.16, "learning_rate": 4.442199946739533e-05, "loss": 2.3047, "step": 3855500 }, { "epoch": 11.16, "learning_rate": 4.442127581974806e-05, "loss": 2.3027, "step": 3856000 }, { "epoch": 11.16, "learning_rate": 4.442055217210078e-05, "loss": 2.2977, "step": 3856500 }, { "epoch": 11.16, "learning_rate": 4.44198299717488e-05, "loss": 2.275, "step": 3857000 }, { "epoch": 11.17, "learning_rate": 4.441910777139682e-05, "loss": 2.2803, "step": 3857500 }, { "epoch": 11.17, "learning_rate": 4.441838412374954e-05, "loss": 2.2964, "step": 3858000 }, { "epoch": 11.17, "learning_rate": 4.4417661923397555e-05, "loss": 2.3217, "step": 3858500 }, { "epoch": 11.17, "learning_rate": 4.4416938275750284e-05, "loss": 2.2941, "step": 3859000 }, { "epoch": 11.17, "learning_rate": 4.4416214628103006e-05, "loss": 2.3128, "step": 3859500 }, { "epoch": 11.17, "learning_rate": 4.441549098045573e-05, "loss": 2.2891, "step": 3860000 }, { "epoch": 11.17, "learning_rate": 4.441476733280845e-05, "loss": 2.3115, "step": 3860500 }, { "epoch": 11.18, "learning_rate": 4.441404368516117e-05, "loss": 2.3146, "step": 3861000 }, { "epoch": 11.18, "learning_rate": 4.4413320037513895e-05, "loss": 2.2819, "step": 3861500 }, { "epoch": 11.18, "learning_rate": 4.441259638986662e-05, "loss": 2.2711, "step": 3862000 }, { "epoch": 11.18, "learning_rate": 4.441187274221934e-05, "loss": 2.298, "step": 3862500 }, { "epoch": 11.18, "learning_rate": 4.441114909457206e-05, "loss": 2.2763, "step": 3863000 }, { "epoch": 11.18, "learning_rate": 4.4410426894220084e-05, "loss": 2.302, "step": 3863500 }, { "epoch": 11.18, "learning_rate": 4.4409704693868106e-05, "loss": 2.29, "step": 3864000 }, { "epoch": 11.19, "learning_rate": 4.440898104622083e-05, "loss": 2.2867, "step": 3864500 }, { "epoch": 11.19, "learning_rate": 4.440825739857355e-05, "loss": 2.3127, "step": 3865000 }, { "epoch": 11.19, "learning_rate": 4.440753375092627e-05, "loss": 2.3017, "step": 3865500 }, { "epoch": 11.19, "learning_rate": 4.4406810103278995e-05, "loss": 2.2837, "step": 3866000 }, { "epoch": 11.19, "learning_rate": 4.440608645563172e-05, "loss": 2.325, "step": 3866500 }, { "epoch": 11.19, "learning_rate": 4.440536280798444e-05, "loss": 2.294, "step": 3867000 }, { "epoch": 11.19, "learning_rate": 4.440463916033716e-05, "loss": 2.2788, "step": 3867500 }, { "epoch": 11.2, "learning_rate": 4.4403915512689884e-05, "loss": 2.3067, "step": 3868000 }, { "epoch": 11.2, "learning_rate": 4.4403191865042607e-05, "loss": 2.3004, "step": 3868500 }, { "epoch": 11.2, "learning_rate": 4.440246966469063e-05, "loss": 2.2889, "step": 3869000 }, { "epoch": 11.2, "learning_rate": 4.440174601704335e-05, "loss": 2.3176, "step": 3869500 }, { "epoch": 11.2, "learning_rate": 4.440102236939607e-05, "loss": 2.27, "step": 3870000 }, { "epoch": 11.2, "learning_rate": 4.440030016904409e-05, "loss": 2.2946, "step": 3870500 }, { "epoch": 11.2, "learning_rate": 4.439957652139682e-05, "loss": 2.3177, "step": 3871000 }, { "epoch": 11.21, "learning_rate": 4.439885287374954e-05, "loss": 2.2968, "step": 3871500 }, { "epoch": 11.21, "learning_rate": 4.439812922610226e-05, "loss": 2.2872, "step": 3872000 }, { "epoch": 11.21, "learning_rate": 4.4397405578454985e-05, "loss": 2.312, "step": 3872500 }, { "epoch": 11.21, "learning_rate": 4.439668193080771e-05, "loss": 2.2964, "step": 3873000 }, { "epoch": 11.21, "learning_rate": 4.4395958283160436e-05, "loss": 2.2821, "step": 3873500 }, { "epoch": 11.21, "learning_rate": 4.439523608280845e-05, "loss": 2.2738, "step": 3874000 }, { "epoch": 11.22, "learning_rate": 4.4394512435161174e-05, "loss": 2.2641, "step": 3874500 }, { "epoch": 11.22, "learning_rate": 4.4393788787513896e-05, "loss": 2.2871, "step": 3875000 }, { "epoch": 11.22, "learning_rate": 4.439306513986662e-05, "loss": 2.2881, "step": 3875500 }, { "epoch": 11.22, "learning_rate": 4.439234149221934e-05, "loss": 2.3196, "step": 3876000 }, { "epoch": 11.22, "learning_rate": 4.439161784457206e-05, "loss": 2.3096, "step": 3876500 }, { "epoch": 11.22, "learning_rate": 4.4390894196924785e-05, "loss": 2.2789, "step": 3877000 }, { "epoch": 11.22, "learning_rate": 4.439017199657281e-05, "loss": 2.2884, "step": 3877500 }, { "epoch": 11.23, "learning_rate": 4.438944979622082e-05, "loss": 2.2814, "step": 3878000 }, { "epoch": 11.23, "learning_rate": 4.438872614857355e-05, "loss": 2.2818, "step": 3878500 }, { "epoch": 11.23, "learning_rate": 4.4388002500926274e-05, "loss": 2.2913, "step": 3879000 }, { "epoch": 11.23, "learning_rate": 4.4387278853278996e-05, "loss": 2.2857, "step": 3879500 }, { "epoch": 11.23, "learning_rate": 4.438655520563172e-05, "loss": 2.295, "step": 3880000 }, { "epoch": 11.23, "learning_rate": 4.438583155798444e-05, "loss": 2.2999, "step": 3880500 }, { "epoch": 11.23, "learning_rate": 4.438510791033716e-05, "loss": 2.3024, "step": 3881000 }, { "epoch": 11.24, "learning_rate": 4.4384384262689885e-05, "loss": 2.2967, "step": 3881500 }, { "epoch": 11.24, "learning_rate": 4.438366061504261e-05, "loss": 2.2856, "step": 3882000 }, { "epoch": 11.24, "learning_rate": 4.4382936967395336e-05, "loss": 2.3042, "step": 3882500 }, { "epoch": 11.24, "learning_rate": 4.438221331974806e-05, "loss": 2.3017, "step": 3883000 }, { "epoch": 11.24, "learning_rate": 4.438148967210078e-05, "loss": 2.3141, "step": 3883500 }, { "epoch": 11.24, "learning_rate": 4.43807660244535e-05, "loss": 2.2983, "step": 3884000 }, { "epoch": 11.24, "learning_rate": 4.4380042376806225e-05, "loss": 2.2994, "step": 3884500 }, { "epoch": 11.25, "learning_rate": 4.437932017645424e-05, "loss": 2.2948, "step": 3885000 }, { "epoch": 11.25, "learning_rate": 4.437859652880696e-05, "loss": 2.2896, "step": 3885500 }, { "epoch": 11.25, "learning_rate": 4.437787288115969e-05, "loss": 2.2839, "step": 3886000 }, { "epoch": 11.25, "learning_rate": 4.4377149233512414e-05, "loss": 2.2984, "step": 3886500 }, { "epoch": 11.25, "learning_rate": 4.4376425585865137e-05, "loss": 2.318, "step": 3887000 }, { "epoch": 11.25, "learning_rate": 4.437570193821786e-05, "loss": 2.3111, "step": 3887500 }, { "epoch": 11.25, "learning_rate": 4.437497973786588e-05, "loss": 2.3157, "step": 3888000 }, { "epoch": 11.26, "learning_rate": 4.43742560902186e-05, "loss": 2.2971, "step": 3888500 }, { "epoch": 11.26, "learning_rate": 4.437353388986662e-05, "loss": 2.3091, "step": 3889000 }, { "epoch": 11.26, "learning_rate": 4.437281024221934e-05, "loss": 2.2843, "step": 3889500 }, { "epoch": 11.26, "learning_rate": 4.437208659457206e-05, "loss": 2.3, "step": 3890000 }, { "epoch": 11.26, "learning_rate": 4.4371362946924786e-05, "loss": 2.3057, "step": 3890500 }, { "epoch": 11.26, "learning_rate": 4.437063929927751e-05, "loss": 2.285, "step": 3891000 }, { "epoch": 11.26, "learning_rate": 4.436991565163024e-05, "loss": 2.298, "step": 3891500 }, { "epoch": 11.27, "learning_rate": 4.436919200398296e-05, "loss": 2.3333, "step": 3892000 }, { "epoch": 11.27, "learning_rate": 4.436846835633568e-05, "loss": 2.2982, "step": 3892500 }, { "epoch": 11.27, "learning_rate": 4.436774470868841e-05, "loss": 2.2807, "step": 3893000 }, { "epoch": 11.27, "learning_rate": 4.436702106104113e-05, "loss": 2.2976, "step": 3893500 }, { "epoch": 11.27, "learning_rate": 4.4366297413393855e-05, "loss": 2.2927, "step": 3894000 }, { "epoch": 11.27, "learning_rate": 4.436557376574658e-05, "loss": 2.2966, "step": 3894500 }, { "epoch": 11.27, "learning_rate": 4.43648501180993e-05, "loss": 2.3097, "step": 3895000 }, { "epoch": 11.28, "learning_rate": 4.436412647045202e-05, "loss": 2.2963, "step": 3895500 }, { "epoch": 11.28, "learning_rate": 4.4363402822804744e-05, "loss": 2.2833, "step": 3896000 }, { "epoch": 11.28, "learning_rate": 4.4362679175157466e-05, "loss": 2.3086, "step": 3896500 }, { "epoch": 11.28, "learning_rate": 4.436195697480549e-05, "loss": 2.289, "step": 3897000 }, { "epoch": 11.28, "learning_rate": 4.436123332715821e-05, "loss": 2.2789, "step": 3897500 }, { "epoch": 11.28, "learning_rate": 4.4360511126806226e-05, "loss": 2.2868, "step": 3898000 }, { "epoch": 11.28, "learning_rate": 4.435978747915895e-05, "loss": 2.3086, "step": 3898500 }, { "epoch": 11.29, "learning_rate": 4.4359065278806964e-05, "loss": 2.3146, "step": 3899000 }, { "epoch": 11.29, "learning_rate": 4.4358341631159686e-05, "loss": 2.2974, "step": 3899500 }, { "epoch": 11.29, "learning_rate": 4.435761798351241e-05, "loss": 2.3185, "step": 3900000 }, { "epoch": 11.29, "learning_rate": 4.435689433586514e-05, "loss": 2.2874, "step": 3900500 }, { "epoch": 11.29, "learning_rate": 4.435617068821786e-05, "loss": 2.2886, "step": 3901000 }, { "epoch": 11.29, "learning_rate": 4.435544704057059e-05, "loss": 2.3028, "step": 3901500 }, { "epoch": 11.29, "learning_rate": 4.4354724840218604e-05, "loss": 2.3163, "step": 3902000 }, { "epoch": 11.3, "learning_rate": 4.4354001192571326e-05, "loss": 2.2947, "step": 3902500 }, { "epoch": 11.3, "learning_rate": 4.435327754492405e-05, "loss": 2.2892, "step": 3903000 }, { "epoch": 11.3, "learning_rate": 4.435255389727677e-05, "loss": 2.2943, "step": 3903500 }, { "epoch": 11.3, "learning_rate": 4.435183024962949e-05, "loss": 2.312, "step": 3904000 }, { "epoch": 11.3, "learning_rate": 4.4351106601982215e-05, "loss": 2.2868, "step": 3904500 }, { "epoch": 11.3, "learning_rate": 4.435038295433494e-05, "loss": 2.2951, "step": 3905000 }, { "epoch": 11.3, "learning_rate": 4.434965930668766e-05, "loss": 2.299, "step": 3905500 }, { "epoch": 11.31, "learning_rate": 4.434893565904039e-05, "loss": 2.32, "step": 3906000 }, { "epoch": 11.31, "learning_rate": 4.434821201139311e-05, "loss": 2.286, "step": 3906500 }, { "epoch": 11.31, "learning_rate": 4.434748836374583e-05, "loss": 2.3081, "step": 3907000 }, { "epoch": 11.31, "learning_rate": 4.4346764716098556e-05, "loss": 2.2949, "step": 3907500 }, { "epoch": 11.31, "learning_rate": 4.434604251574658e-05, "loss": 2.2969, "step": 3908000 }, { "epoch": 11.31, "learning_rate": 4.43453188680993e-05, "loss": 2.2952, "step": 3908500 }, { "epoch": 11.31, "learning_rate": 4.4344596667747316e-05, "loss": 2.2953, "step": 3909000 }, { "epoch": 11.32, "learning_rate": 4.434387302010004e-05, "loss": 2.3102, "step": 3909500 }, { "epoch": 11.32, "learning_rate": 4.434314937245277e-05, "loss": 2.2922, "step": 3910000 }, { "epoch": 11.32, "learning_rate": 4.434242572480549e-05, "loss": 2.263, "step": 3910500 }, { "epoch": 11.32, "learning_rate": 4.4341703524453505e-05, "loss": 2.2974, "step": 3911000 }, { "epoch": 11.32, "learning_rate": 4.434098132410152e-05, "loss": 2.3087, "step": 3911500 }, { "epoch": 11.32, "learning_rate": 4.434025767645424e-05, "loss": 2.304, "step": 3912000 }, { "epoch": 11.33, "learning_rate": 4.4339534028806965e-05, "loss": 2.3138, "step": 3912500 }, { "epoch": 11.33, "learning_rate": 4.433881038115969e-05, "loss": 2.3041, "step": 3913000 }, { "epoch": 11.33, "learning_rate": 4.4338086733512416e-05, "loss": 2.2963, "step": 3913500 }, { "epoch": 11.33, "learning_rate": 4.433736308586514e-05, "loss": 2.2973, "step": 3914000 }, { "epoch": 11.33, "learning_rate": 4.433663943821786e-05, "loss": 2.296, "step": 3914500 }, { "epoch": 11.33, "learning_rate": 4.433591579057058e-05, "loss": 2.3445, "step": 3915000 }, { "epoch": 11.33, "learning_rate": 4.433519214292331e-05, "loss": 2.2963, "step": 3915500 }, { "epoch": 11.34, "learning_rate": 4.4334468495276034e-05, "loss": 2.3101, "step": 3916000 }, { "epoch": 11.34, "learning_rate": 4.4333744847628756e-05, "loss": 2.2876, "step": 3916500 }, { "epoch": 11.34, "learning_rate": 4.433302119998148e-05, "loss": 2.3078, "step": 3917000 }, { "epoch": 11.34, "learning_rate": 4.4332298999629494e-05, "loss": 2.2878, "step": 3917500 }, { "epoch": 11.34, "learning_rate": 4.4331575351982216e-05, "loss": 2.2668, "step": 3918000 }, { "epoch": 11.34, "learning_rate": 4.433085170433494e-05, "loss": 2.2991, "step": 3918500 }, { "epoch": 11.34, "learning_rate": 4.433012805668767e-05, "loss": 2.306, "step": 3919000 }, { "epoch": 11.35, "learning_rate": 4.432940585633568e-05, "loss": 2.3221, "step": 3919500 }, { "epoch": 11.35, "learning_rate": 4.4328682208688405e-05, "loss": 2.3057, "step": 3920000 }, { "epoch": 11.35, "learning_rate": 4.432795856104113e-05, "loss": 2.2965, "step": 3920500 }, { "epoch": 11.35, "learning_rate": 4.432723491339385e-05, "loss": 2.3049, "step": 3921000 }, { "epoch": 11.35, "learning_rate": 4.4326512713041865e-05, "loss": 2.3, "step": 3921500 }, { "epoch": 11.35, "learning_rate": 4.432578906539459e-05, "loss": 2.2848, "step": 3922000 }, { "epoch": 11.35, "learning_rate": 4.4325065417747316e-05, "loss": 2.297, "step": 3922500 }, { "epoch": 11.36, "learning_rate": 4.432434177010004e-05, "loss": 2.3222, "step": 3923000 }, { "epoch": 11.36, "learning_rate": 4.432361812245277e-05, "loss": 2.2986, "step": 3923500 }, { "epoch": 11.36, "learning_rate": 4.432289447480549e-05, "loss": 2.323, "step": 3924000 }, { "epoch": 11.36, "learning_rate": 4.432217082715821e-05, "loss": 2.295, "step": 3924500 }, { "epoch": 11.36, "learning_rate": 4.4321447179510934e-05, "loss": 2.311, "step": 3925000 }, { "epoch": 11.36, "learning_rate": 4.432072497915895e-05, "loss": 2.3085, "step": 3925500 }, { "epoch": 11.36, "learning_rate": 4.432000133151167e-05, "loss": 2.2964, "step": 3926000 }, { "epoch": 11.37, "learning_rate": 4.4319277683864394e-05, "loss": 2.3263, "step": 3926500 }, { "epoch": 11.37, "learning_rate": 4.431855548351242e-05, "loss": 2.3014, "step": 3927000 }, { "epoch": 11.37, "learning_rate": 4.431783183586514e-05, "loss": 2.2935, "step": 3927500 }, { "epoch": 11.37, "learning_rate": 4.431710818821786e-05, "loss": 2.3236, "step": 3928000 }, { "epoch": 11.37, "learning_rate": 4.4316384540570583e-05, "loss": 2.3076, "step": 3928500 }, { "epoch": 11.37, "learning_rate": 4.4315660892923306e-05, "loss": 2.3143, "step": 3929000 }, { "epoch": 11.37, "learning_rate": 4.431493724527603e-05, "loss": 2.2974, "step": 3929500 }, { "epoch": 11.38, "learning_rate": 4.431421359762875e-05, "loss": 2.3073, "step": 3930000 }, { "epoch": 11.38, "learning_rate": 4.431348994998148e-05, "loss": 2.315, "step": 3930500 }, { "epoch": 11.38, "learning_rate": 4.43127663023342e-05, "loss": 2.2901, "step": 3931000 }, { "epoch": 11.38, "learning_rate": 4.4312042654686924e-05, "loss": 2.3244, "step": 3931500 }, { "epoch": 11.38, "learning_rate": 4.4311319007039646e-05, "loss": 2.3177, "step": 3932000 }, { "epoch": 11.38, "learning_rate": 4.431059535939237e-05, "loss": 2.2916, "step": 3932500 }, { "epoch": 11.38, "learning_rate": 4.430987315904039e-05, "loss": 2.3209, "step": 3933000 }, { "epoch": 11.39, "learning_rate": 4.430914951139311e-05, "loss": 2.298, "step": 3933500 }, { "epoch": 11.39, "learning_rate": 4.4308425863745835e-05, "loss": 2.2733, "step": 3934000 }, { "epoch": 11.39, "learning_rate": 4.430770221609856e-05, "loss": 2.2753, "step": 3934500 }, { "epoch": 11.39, "learning_rate": 4.430697856845128e-05, "loss": 2.2932, "step": 3935000 }, { "epoch": 11.39, "learning_rate": 4.4306254920804e-05, "loss": 2.3022, "step": 3935500 }, { "epoch": 11.39, "learning_rate": 4.4305531273156724e-05, "loss": 2.3055, "step": 3936000 }, { "epoch": 11.39, "learning_rate": 4.4304807625509446e-05, "loss": 2.306, "step": 3936500 }, { "epoch": 11.4, "learning_rate": 4.430408542515747e-05, "loss": 2.2852, "step": 3937000 }, { "epoch": 11.4, "learning_rate": 4.430336177751019e-05, "loss": 2.3028, "step": 3937500 }, { "epoch": 11.4, "learning_rate": 4.430263812986292e-05, "loss": 2.3001, "step": 3938000 }, { "epoch": 11.4, "learning_rate": 4.430191448221564e-05, "loss": 2.3039, "step": 3938500 }, { "epoch": 11.4, "learning_rate": 4.4301190834568364e-05, "loss": 2.3164, "step": 3939000 }, { "epoch": 11.4, "learning_rate": 4.4300467186921086e-05, "loss": 2.3127, "step": 3939500 }, { "epoch": 11.4, "learning_rate": 4.429974353927381e-05, "loss": 2.3078, "step": 3940000 }, { "epoch": 11.41, "learning_rate": 4.4299021338921824e-05, "loss": 2.2852, "step": 3940500 }, { "epoch": 11.41, "learning_rate": 4.4298299138569846e-05, "loss": 2.3351, "step": 3941000 }, { "epoch": 11.41, "learning_rate": 4.429757549092257e-05, "loss": 2.2779, "step": 3941500 }, { "epoch": 11.41, "learning_rate": 4.429685184327529e-05, "loss": 2.3009, "step": 3942000 }, { "epoch": 11.41, "learning_rate": 4.429612819562801e-05, "loss": 2.3163, "step": 3942500 }, { "epoch": 11.41, "learning_rate": 4.4295404547980735e-05, "loss": 2.3233, "step": 3943000 }, { "epoch": 11.41, "learning_rate": 4.429468090033346e-05, "loss": 2.3284, "step": 3943500 }, { "epoch": 11.42, "learning_rate": 4.429395725268618e-05, "loss": 2.3109, "step": 3944000 }, { "epoch": 11.42, "learning_rate": 4.42932336050389e-05, "loss": 2.313, "step": 3944500 }, { "epoch": 11.42, "learning_rate": 4.429251140468692e-05, "loss": 2.3092, "step": 3945000 }, { "epoch": 11.42, "learning_rate": 4.429178775703965e-05, "loss": 2.3059, "step": 3945500 }, { "epoch": 11.42, "learning_rate": 4.429106410939237e-05, "loss": 2.3068, "step": 3946000 }, { "epoch": 11.42, "learning_rate": 4.42903404617451e-05, "loss": 2.2908, "step": 3946500 }, { "epoch": 11.42, "learning_rate": 4.428961681409782e-05, "loss": 2.335, "step": 3947000 }, { "epoch": 11.43, "learning_rate": 4.4288894613745836e-05, "loss": 2.3129, "step": 3947500 }, { "epoch": 11.43, "learning_rate": 4.428817096609856e-05, "loss": 2.3018, "step": 3948000 }, { "epoch": 11.43, "learning_rate": 4.428744731845128e-05, "loss": 2.3048, "step": 3948500 }, { "epoch": 11.43, "learning_rate": 4.4286723670804e-05, "loss": 2.3053, "step": 3949000 }, { "epoch": 11.43, "learning_rate": 4.4286000023156725e-05, "loss": 2.2953, "step": 3949500 }, { "epoch": 11.43, "learning_rate": 4.428527927010004e-05, "loss": 2.2934, "step": 3950000 }, { "epoch": 11.44, "learning_rate": 4.428455562245276e-05, "loss": 2.2809, "step": 3950500 }, { "epoch": 11.44, "learning_rate": 4.4283831974805485e-05, "loss": 2.3156, "step": 3951000 }, { "epoch": 11.44, "learning_rate": 4.428310832715821e-05, "loss": 2.3016, "step": 3951500 }, { "epoch": 11.44, "learning_rate": 4.428238467951093e-05, "loss": 2.2839, "step": 3952000 }, { "epoch": 11.44, "learning_rate": 4.428166103186365e-05, "loss": 2.3052, "step": 3952500 }, { "epoch": 11.44, "learning_rate": 4.428093738421638e-05, "loss": 2.2954, "step": 3953000 }, { "epoch": 11.44, "learning_rate": 4.42802137365691e-05, "loss": 2.2658, "step": 3953500 }, { "epoch": 11.45, "learning_rate": 4.4279490088921825e-05, "loss": 2.2998, "step": 3954000 }, { "epoch": 11.45, "learning_rate": 4.427876644127455e-05, "loss": 2.3206, "step": 3954500 }, { "epoch": 11.45, "learning_rate": 4.427804424092257e-05, "loss": 2.3139, "step": 3955000 }, { "epoch": 11.45, "learning_rate": 4.427732059327529e-05, "loss": 2.2897, "step": 3955500 }, { "epoch": 11.45, "learning_rate": 4.4276596945628014e-05, "loss": 2.2835, "step": 3956000 }, { "epoch": 11.45, "learning_rate": 4.4275873297980736e-05, "loss": 2.3086, "step": 3956500 }, { "epoch": 11.45, "learning_rate": 4.427515109762875e-05, "loss": 2.3157, "step": 3957000 }, { "epoch": 11.46, "learning_rate": 4.4274427449981474e-05, "loss": 2.2893, "step": 3957500 }, { "epoch": 11.46, "learning_rate": 4.4273703802334196e-05, "loss": 2.2715, "step": 3958000 }, { "epoch": 11.46, "learning_rate": 4.427298160198222e-05, "loss": 2.3184, "step": 3958500 }, { "epoch": 11.46, "learning_rate": 4.427225795433494e-05, "loss": 2.3111, "step": 3959000 }, { "epoch": 11.46, "learning_rate": 4.427153430668766e-05, "loss": 2.3096, "step": 3959500 }, { "epoch": 11.46, "learning_rate": 4.4270810659040385e-05, "loss": 2.2955, "step": 3960000 }, { "epoch": 11.46, "learning_rate": 4.4270087011393114e-05, "loss": 2.3065, "step": 3960500 }, { "epoch": 11.47, "learning_rate": 4.4269363363745836e-05, "loss": 2.3341, "step": 3961000 }, { "epoch": 11.47, "learning_rate": 4.426863971609856e-05, "loss": 2.3112, "step": 3961500 }, { "epoch": 11.47, "learning_rate": 4.426791606845128e-05, "loss": 2.2741, "step": 3962000 }, { "epoch": 11.47, "learning_rate": 4.4267192420804e-05, "loss": 2.3238, "step": 3962500 }, { "epoch": 11.47, "learning_rate": 4.426647022045202e-05, "loss": 2.2932, "step": 3963000 }, { "epoch": 11.47, "learning_rate": 4.426574657280475e-05, "loss": 2.3027, "step": 3963500 }, { "epoch": 11.47, "learning_rate": 4.426502292515747e-05, "loss": 2.2739, "step": 3964000 }, { "epoch": 11.48, "learning_rate": 4.426429927751019e-05, "loss": 2.2876, "step": 3964500 }, { "epoch": 11.48, "learning_rate": 4.4263575629862914e-05, "loss": 2.2803, "step": 3965000 }, { "epoch": 11.48, "learning_rate": 4.426285342951093e-05, "loss": 2.3114, "step": 3965500 }, { "epoch": 11.48, "learning_rate": 4.426212978186365e-05, "loss": 2.2772, "step": 3966000 }, { "epoch": 11.48, "learning_rate": 4.4261406134216374e-05, "loss": 2.3118, "step": 3966500 }, { "epoch": 11.48, "learning_rate": 4.42606824865691e-05, "loss": 2.3169, "step": 3967000 }, { "epoch": 11.48, "learning_rate": 4.425996028621712e-05, "loss": 2.3221, "step": 3967500 }, { "epoch": 11.49, "learning_rate": 4.425923663856985e-05, "loss": 2.3125, "step": 3968000 }, { "epoch": 11.49, "learning_rate": 4.4258514438217864e-05, "loss": 2.2877, "step": 3968500 }, { "epoch": 11.49, "learning_rate": 4.4257790790570586e-05, "loss": 2.3047, "step": 3969000 }, { "epoch": 11.49, "learning_rate": 4.425706714292331e-05, "loss": 2.298, "step": 3969500 }, { "epoch": 11.49, "learning_rate": 4.425634349527603e-05, "loss": 2.3017, "step": 3970000 }, { "epoch": 11.49, "learning_rate": 4.425561984762875e-05, "loss": 2.3199, "step": 3970500 }, { "epoch": 11.49, "learning_rate": 4.4254896199981475e-05, "loss": 2.2959, "step": 3971000 }, { "epoch": 11.5, "learning_rate": 4.42541725523342e-05, "loss": 2.326, "step": 3971500 }, { "epoch": 11.5, "learning_rate": 4.4253448904686926e-05, "loss": 2.3069, "step": 3972000 }, { "epoch": 11.5, "learning_rate": 4.425272525703965e-05, "loss": 2.316, "step": 3972500 }, { "epoch": 11.5, "learning_rate": 4.425200160939237e-05, "loss": 2.3168, "step": 3973000 }, { "epoch": 11.5, "learning_rate": 4.4251279409040386e-05, "loss": 2.3295, "step": 3973500 }, { "epoch": 11.5, "learning_rate": 4.425055576139311e-05, "loss": 2.3328, "step": 3974000 }, { "epoch": 11.5, "learning_rate": 4.424983211374583e-05, "loss": 2.2868, "step": 3974500 }, { "epoch": 11.51, "learning_rate": 4.424910846609855e-05, "loss": 2.3096, "step": 3975000 }, { "epoch": 11.51, "learning_rate": 4.424838481845128e-05, "loss": 2.3272, "step": 3975500 }, { "epoch": 11.51, "learning_rate": 4.42476626180993e-05, "loss": 2.3217, "step": 3976000 }, { "epoch": 11.51, "learning_rate": 4.4246938970452026e-05, "loss": 2.3046, "step": 3976500 }, { "epoch": 11.51, "learning_rate": 4.424621677010004e-05, "loss": 2.2842, "step": 3977000 }, { "epoch": 11.51, "learning_rate": 4.4245493122452764e-05, "loss": 2.347, "step": 3977500 }, { "epoch": 11.51, "learning_rate": 4.4244769474805486e-05, "loss": 2.301, "step": 3978000 }, { "epoch": 11.52, "learning_rate": 4.424404582715821e-05, "loss": 2.3263, "step": 3978500 }, { "epoch": 11.52, "learning_rate": 4.424332217951093e-05, "loss": 2.3088, "step": 3979000 }, { "epoch": 11.52, "learning_rate": 4.424259853186365e-05, "loss": 2.3154, "step": 3979500 }, { "epoch": 11.52, "learning_rate": 4.4241874884216375e-05, "loss": 2.3006, "step": 3980000 }, { "epoch": 11.52, "learning_rate": 4.42411512365691e-05, "loss": 2.3171, "step": 3980500 }, { "epoch": 11.52, "learning_rate": 4.424042903621712e-05, "loss": 2.2837, "step": 3981000 }, { "epoch": 11.52, "learning_rate": 4.423970538856984e-05, "loss": 2.3081, "step": 3981500 }, { "epoch": 11.53, "learning_rate": 4.4238981740922564e-05, "loss": 2.3006, "step": 3982000 }, { "epoch": 11.53, "learning_rate": 4.4238258093275287e-05, "loss": 2.3255, "step": 3982500 }, { "epoch": 11.53, "learning_rate": 4.423753589292331e-05, "loss": 2.2871, "step": 3983000 }, { "epoch": 11.53, "learning_rate": 4.423681224527603e-05, "loss": 2.2935, "step": 3983500 }, { "epoch": 11.53, "learning_rate": 4.423608859762875e-05, "loss": 2.286, "step": 3984000 }, { "epoch": 11.53, "learning_rate": 4.4235364949981476e-05, "loss": 2.2812, "step": 3984500 }, { "epoch": 11.53, "learning_rate": 4.42346413023342e-05, "loss": 2.315, "step": 3985000 }, { "epoch": 11.54, "learning_rate": 4.423391765468693e-05, "loss": 2.2924, "step": 3985500 }, { "epoch": 11.54, "learning_rate": 4.423319545433494e-05, "loss": 2.2924, "step": 3986000 }, { "epoch": 11.54, "learning_rate": 4.4232471806687665e-05, "loss": 2.2976, "step": 3986500 }, { "epoch": 11.54, "learning_rate": 4.423174815904039e-05, "loss": 2.2832, "step": 3987000 }, { "epoch": 11.54, "learning_rate": 4.423102451139311e-05, "loss": 2.2738, "step": 3987500 }, { "epoch": 11.54, "learning_rate": 4.423030086374583e-05, "loss": 2.287, "step": 3988000 }, { "epoch": 11.55, "learning_rate": 4.4229578663393854e-05, "loss": 2.3203, "step": 3988500 }, { "epoch": 11.55, "learning_rate": 4.4228855015746576e-05, "loss": 2.3089, "step": 3989000 }, { "epoch": 11.55, "learning_rate": 4.422813281539459e-05, "loss": 2.319, "step": 3989500 }, { "epoch": 11.55, "learning_rate": 4.4227409167747314e-05, "loss": 2.2963, "step": 3990000 }, { "epoch": 11.55, "learning_rate": 4.422668552010004e-05, "loss": 2.2939, "step": 3990500 }, { "epoch": 11.55, "learning_rate": 4.422596331974806e-05, "loss": 2.2874, "step": 3991000 }, { "epoch": 11.55, "learning_rate": 4.422523967210078e-05, "loss": 2.2937, "step": 3991500 }, { "epoch": 11.56, "learning_rate": 4.42245160244535e-05, "loss": 2.2808, "step": 3992000 }, { "epoch": 11.56, "learning_rate": 4.4223792376806225e-05, "loss": 2.3007, "step": 3992500 }, { "epoch": 11.56, "learning_rate": 4.4223068729158954e-05, "loss": 2.3146, "step": 3993000 }, { "epoch": 11.56, "learning_rate": 4.4222345081511676e-05, "loss": 2.285, "step": 3993500 }, { "epoch": 11.56, "learning_rate": 4.42216214338644e-05, "loss": 2.309, "step": 3994000 }, { "epoch": 11.56, "learning_rate": 4.422089778621712e-05, "loss": 2.3153, "step": 3994500 }, { "epoch": 11.56, "learning_rate": 4.422017413856984e-05, "loss": 2.3166, "step": 3995000 }, { "epoch": 11.57, "learning_rate": 4.4219450490922565e-05, "loss": 2.3058, "step": 3995500 }, { "epoch": 11.57, "learning_rate": 4.421872684327529e-05, "loss": 2.3028, "step": 3996000 }, { "epoch": 11.57, "learning_rate": 4.421800319562801e-05, "loss": 2.3112, "step": 3996500 }, { "epoch": 11.57, "learning_rate": 4.421727954798073e-05, "loss": 2.2844, "step": 3997000 }, { "epoch": 11.57, "learning_rate": 4.421655879492405e-05, "loss": 2.3182, "step": 3997500 }, { "epoch": 11.57, "learning_rate": 4.4215835147276776e-05, "loss": 2.3341, "step": 3998000 }, { "epoch": 11.57, "learning_rate": 4.42151114996295e-05, "loss": 2.3018, "step": 3998500 }, { "epoch": 11.58, "learning_rate": 4.421438785198222e-05, "loss": 2.2824, "step": 3999000 }, { "epoch": 11.58, "learning_rate": 4.421366420433494e-05, "loss": 2.2931, "step": 3999500 }, { "epoch": 11.58, "learning_rate": 4.4212940556687665e-05, "loss": 2.3088, "step": 4000000 }, { "epoch": 11.58, "learning_rate": 4.421221690904039e-05, "loss": 2.334, "step": 4000500 }, { "epoch": 11.58, "learning_rate": 4.421149326139311e-05, "loss": 2.3087, "step": 4001000 }, { "epoch": 11.58, "learning_rate": 4.421076961374583e-05, "loss": 2.311, "step": 4001500 }, { "epoch": 11.58, "learning_rate": 4.4210045966098554e-05, "loss": 2.3158, "step": 4002000 }, { "epoch": 11.59, "learning_rate": 4.4209322318451277e-05, "loss": 2.3135, "step": 4002500 }, { "epoch": 11.59, "learning_rate": 4.4208598670804006e-05, "loss": 2.2959, "step": 4003000 }, { "epoch": 11.59, "learning_rate": 4.420787647045202e-05, "loss": 2.2845, "step": 4003500 }, { "epoch": 11.59, "learning_rate": 4.420715282280474e-05, "loss": 2.2957, "step": 4004000 }, { "epoch": 11.59, "learning_rate": 4.420643062245276e-05, "loss": 2.2841, "step": 4004500 }, { "epoch": 11.59, "learning_rate": 4.420570697480548e-05, "loss": 2.3077, "step": 4005000 }, { "epoch": 11.59, "learning_rate": 4.420498332715821e-05, "loss": 2.3048, "step": 4005500 }, { "epoch": 11.6, "learning_rate": 4.420425967951093e-05, "loss": 2.3119, "step": 4006000 }, { "epoch": 11.6, "learning_rate": 4.4203537479158955e-05, "loss": 2.3023, "step": 4006500 }, { "epoch": 11.6, "learning_rate": 4.420281383151168e-05, "loss": 2.2974, "step": 4007000 }, { "epoch": 11.6, "learning_rate": 4.42020901838644e-05, "loss": 2.2779, "step": 4007500 }, { "epoch": 11.6, "learning_rate": 4.420136653621712e-05, "loss": 2.2794, "step": 4008000 }, { "epoch": 11.6, "learning_rate": 4.4200642888569844e-05, "loss": 2.3066, "step": 4008500 }, { "epoch": 11.6, "learning_rate": 4.4199919240922566e-05, "loss": 2.2977, "step": 4009000 }, { "epoch": 11.61, "learning_rate": 4.419919559327529e-05, "loss": 2.3176, "step": 4009500 }, { "epoch": 11.61, "learning_rate": 4.419847194562801e-05, "loss": 2.2809, "step": 4010000 }, { "epoch": 11.61, "learning_rate": 4.419774829798073e-05, "loss": 2.3118, "step": 4010500 }, { "epoch": 11.61, "learning_rate": 4.4197026097628755e-05, "loss": 2.3104, "step": 4011000 }, { "epoch": 11.61, "learning_rate": 4.419630244998148e-05, "loss": 2.3073, "step": 4011500 }, { "epoch": 11.61, "learning_rate": 4.41955788023342e-05, "loss": 2.3047, "step": 4012000 }, { "epoch": 11.61, "learning_rate": 4.4194856601982215e-05, "loss": 2.2994, "step": 4012500 }, { "epoch": 11.62, "learning_rate": 4.4194132954334944e-05, "loss": 2.2983, "step": 4013000 }, { "epoch": 11.62, "learning_rate": 4.4193409306687666e-05, "loss": 2.3036, "step": 4013500 }, { "epoch": 11.62, "learning_rate": 4.419268565904039e-05, "loss": 2.3088, "step": 4014000 }, { "epoch": 11.62, "learning_rate": 4.419196201139311e-05, "loss": 2.2879, "step": 4014500 }, { "epoch": 11.62, "learning_rate": 4.419123836374583e-05, "loss": 2.2929, "step": 4015000 }, { "epoch": 11.62, "learning_rate": 4.4190514716098555e-05, "loss": 2.2861, "step": 4015500 }, { "epoch": 11.62, "learning_rate": 4.418979106845128e-05, "loss": 2.3032, "step": 4016000 }, { "epoch": 11.63, "learning_rate": 4.4189067420804006e-05, "loss": 2.3023, "step": 4016500 }, { "epoch": 11.63, "learning_rate": 4.418834377315673e-05, "loss": 2.297, "step": 4017000 }, { "epoch": 11.63, "learning_rate": 4.418762012550945e-05, "loss": 2.3063, "step": 4017500 }, { "epoch": 11.63, "learning_rate": 4.418689647786217e-05, "loss": 2.3267, "step": 4018000 }, { "epoch": 11.63, "learning_rate": 4.4186172830214895e-05, "loss": 2.2868, "step": 4018500 }, { "epoch": 11.63, "learning_rate": 4.418544918256762e-05, "loss": 2.3032, "step": 4019000 }, { "epoch": 11.63, "learning_rate": 4.418472698221563e-05, "loss": 2.3136, "step": 4019500 }, { "epoch": 11.64, "learning_rate": 4.418400333456836e-05, "loss": 2.2942, "step": 4020000 }, { "epoch": 11.64, "learning_rate": 4.4183279686921084e-05, "loss": 2.273, "step": 4020500 }, { "epoch": 11.64, "learning_rate": 4.418255748656911e-05, "loss": 2.2957, "step": 4021000 }, { "epoch": 11.64, "learning_rate": 4.418183383892183e-05, "loss": 2.2944, "step": 4021500 }, { "epoch": 11.64, "learning_rate": 4.418111019127455e-05, "loss": 2.3061, "step": 4022000 }, { "epoch": 11.64, "learning_rate": 4.418038654362727e-05, "loss": 2.3101, "step": 4022500 }, { "epoch": 11.64, "learning_rate": 4.4179662895979996e-05, "loss": 2.3026, "step": 4023000 }, { "epoch": 11.65, "learning_rate": 4.417893924833272e-05, "loss": 2.3102, "step": 4023500 }, { "epoch": 11.65, "learning_rate": 4.417821704798073e-05, "loss": 2.3056, "step": 4024000 }, { "epoch": 11.65, "learning_rate": 4.4177494847628756e-05, "loss": 2.3116, "step": 4024500 }, { "epoch": 11.65, "learning_rate": 4.417677119998148e-05, "loss": 2.2919, "step": 4025000 }, { "epoch": 11.65, "learning_rate": 4.41760475523342e-05, "loss": 2.2696, "step": 4025500 }, { "epoch": 11.65, "learning_rate": 4.417532390468692e-05, "loss": 2.2923, "step": 4026000 }, { "epoch": 11.66, "learning_rate": 4.4174600257039645e-05, "loss": 2.3166, "step": 4026500 }, { "epoch": 11.66, "learning_rate": 4.417387660939237e-05, "loss": 2.3212, "step": 4027000 }, { "epoch": 11.66, "learning_rate": 4.4173152961745096e-05, "loss": 2.3009, "step": 4027500 }, { "epoch": 11.66, "learning_rate": 4.417242931409782e-05, "loss": 2.3027, "step": 4028000 }, { "epoch": 11.66, "learning_rate": 4.417170566645054e-05, "loss": 2.2992, "step": 4028500 }, { "epoch": 11.66, "learning_rate": 4.417098201880326e-05, "loss": 2.2865, "step": 4029000 }, { "epoch": 11.66, "learning_rate": 4.4170258371155985e-05, "loss": 2.294, "step": 4029500 }, { "epoch": 11.67, "learning_rate": 4.416953472350871e-05, "loss": 2.2931, "step": 4030000 }, { "epoch": 11.67, "learning_rate": 4.416881252315673e-05, "loss": 2.2949, "step": 4030500 }, { "epoch": 11.67, "learning_rate": 4.416808887550945e-05, "loss": 2.2832, "step": 4031000 }, { "epoch": 11.67, "learning_rate": 4.4167365227862174e-05, "loss": 2.3111, "step": 4031500 }, { "epoch": 11.67, "learning_rate": 4.4166641580214896e-05, "loss": 2.3021, "step": 4032000 }, { "epoch": 11.67, "learning_rate": 4.416591937986291e-05, "loss": 2.2856, "step": 4032500 }, { "epoch": 11.67, "learning_rate": 4.4165195732215634e-05, "loss": 2.2924, "step": 4033000 }, { "epoch": 11.68, "learning_rate": 4.4164472084568356e-05, "loss": 2.3113, "step": 4033500 }, { "epoch": 11.68, "learning_rate": 4.4163748436921085e-05, "loss": 2.2964, "step": 4034000 }, { "epoch": 11.68, "learning_rate": 4.416302478927381e-05, "loss": 2.2989, "step": 4034500 }, { "epoch": 11.68, "learning_rate": 4.4162301141626536e-05, "loss": 2.3168, "step": 4035000 }, { "epoch": 11.68, "learning_rate": 4.416157749397926e-05, "loss": 2.2998, "step": 4035500 }, { "epoch": 11.68, "learning_rate": 4.416085384633198e-05, "loss": 2.2884, "step": 4036000 }, { "epoch": 11.68, "learning_rate": 4.416013309327529e-05, "loss": 2.2962, "step": 4036500 }, { "epoch": 11.69, "learning_rate": 4.415940944562801e-05, "loss": 2.318, "step": 4037000 }, { "epoch": 11.69, "learning_rate": 4.4158685797980734e-05, "loss": 2.2858, "step": 4037500 }, { "epoch": 11.69, "learning_rate": 4.4157962150333456e-05, "loss": 2.3189, "step": 4038000 }, { "epoch": 11.69, "learning_rate": 4.4157238502686185e-05, "loss": 2.3121, "step": 4038500 }, { "epoch": 11.69, "learning_rate": 4.415651485503891e-05, "loss": 2.3022, "step": 4039000 }, { "epoch": 11.69, "learning_rate": 4.415579120739163e-05, "loss": 2.289, "step": 4039500 }, { "epoch": 11.69, "learning_rate": 4.415506755974435e-05, "loss": 2.2943, "step": 4040000 }, { "epoch": 11.7, "learning_rate": 4.415434535939237e-05, "loss": 2.2873, "step": 4040500 }, { "epoch": 11.7, "learning_rate": 4.415362171174509e-05, "loss": 2.3376, "step": 4041000 }, { "epoch": 11.7, "learning_rate": 4.415289806409781e-05, "loss": 2.3106, "step": 4041500 }, { "epoch": 11.7, "learning_rate": 4.4152174416450534e-05, "loss": 2.2988, "step": 4042000 }, { "epoch": 11.7, "learning_rate": 4.415145076880326e-05, "loss": 2.3081, "step": 4042500 }, { "epoch": 11.7, "learning_rate": 4.4150727121155986e-05, "loss": 2.3062, "step": 4043000 }, { "epoch": 11.7, "learning_rate": 4.415000492080401e-05, "loss": 2.3117, "step": 4043500 }, { "epoch": 11.71, "learning_rate": 4.414928127315673e-05, "loss": 2.3184, "step": 4044000 }, { "epoch": 11.71, "learning_rate": 4.4148559072804746e-05, "loss": 2.2977, "step": 4044500 }, { "epoch": 11.71, "learning_rate": 4.414783542515747e-05, "loss": 2.3117, "step": 4045000 }, { "epoch": 11.71, "learning_rate": 4.414711177751019e-05, "loss": 2.3191, "step": 4045500 }, { "epoch": 11.71, "learning_rate": 4.414638812986291e-05, "loss": 2.3122, "step": 4046000 }, { "epoch": 11.71, "learning_rate": 4.4145664482215635e-05, "loss": 2.3071, "step": 4046500 }, { "epoch": 11.71, "learning_rate": 4.414494083456836e-05, "loss": 2.2947, "step": 4047000 }, { "epoch": 11.72, "learning_rate": 4.4144217186921086e-05, "loss": 2.313, "step": 4047500 }, { "epoch": 11.72, "learning_rate": 4.414349353927381e-05, "loss": 2.3196, "step": 4048000 }, { "epoch": 11.72, "learning_rate": 4.414276989162653e-05, "loss": 2.291, "step": 4048500 }, { "epoch": 11.72, "learning_rate": 4.414204624397925e-05, "loss": 2.2913, "step": 4049000 }, { "epoch": 11.72, "learning_rate": 4.414132404362727e-05, "loss": 2.3165, "step": 4049500 }, { "epoch": 11.72, "learning_rate": 4.414060039598e-05, "loss": 2.294, "step": 4050000 }, { "epoch": 11.72, "learning_rate": 4.413987674833272e-05, "loss": 2.31, "step": 4050500 }, { "epoch": 11.73, "learning_rate": 4.413915310068544e-05, "loss": 2.2875, "step": 4051000 }, { "epoch": 11.73, "learning_rate": 4.4138429453038164e-05, "loss": 2.3076, "step": 4051500 }, { "epoch": 11.73, "learning_rate": 4.4137707252686186e-05, "loss": 2.3173, "step": 4052000 }, { "epoch": 11.73, "learning_rate": 4.413698360503891e-05, "loss": 2.3119, "step": 4052500 }, { "epoch": 11.73, "learning_rate": 4.413625995739163e-05, "loss": 2.2926, "step": 4053000 }, { "epoch": 11.73, "learning_rate": 4.413553630974435e-05, "loss": 2.3058, "step": 4053500 }, { "epoch": 11.73, "learning_rate": 4.4134812662097075e-05, "loss": 2.2948, "step": 4054000 }, { "epoch": 11.74, "learning_rate": 4.413409046174509e-05, "loss": 2.3115, "step": 4054500 }, { "epoch": 11.74, "learning_rate": 4.413336681409781e-05, "loss": 2.3101, "step": 4055000 }, { "epoch": 11.74, "learning_rate": 4.4132643166450535e-05, "loss": 2.3207, "step": 4055500 }, { "epoch": 11.74, "learning_rate": 4.4131919518803264e-05, "loss": 2.2986, "step": 4056000 }, { "epoch": 11.74, "learning_rate": 4.4131195871155986e-05, "loss": 2.3221, "step": 4056500 }, { "epoch": 11.74, "learning_rate": 4.413047222350871e-05, "loss": 2.2736, "step": 4057000 }, { "epoch": 11.74, "learning_rate": 4.412975002315673e-05, "loss": 2.2975, "step": 4057500 }, { "epoch": 11.75, "learning_rate": 4.412902637550945e-05, "loss": 2.2917, "step": 4058000 }, { "epoch": 11.75, "learning_rate": 4.4128302727862175e-05, "loss": 2.3064, "step": 4058500 }, { "epoch": 11.75, "learning_rate": 4.41275790802149e-05, "loss": 2.296, "step": 4059000 }, { "epoch": 11.75, "learning_rate": 4.412685543256762e-05, "loss": 2.2987, "step": 4059500 }, { "epoch": 11.75, "learning_rate": 4.412613178492034e-05, "loss": 2.2945, "step": 4060000 }, { "epoch": 11.75, "learning_rate": 4.4125408137273064e-05, "loss": 2.3104, "step": 4060500 }, { "epoch": 11.75, "learning_rate": 4.412468593692109e-05, "loss": 2.2864, "step": 4061000 }, { "epoch": 11.76, "learning_rate": 4.412396228927381e-05, "loss": 2.301, "step": 4061500 }, { "epoch": 11.76, "learning_rate": 4.412323864162653e-05, "loss": 2.2895, "step": 4062000 }, { "epoch": 11.76, "learning_rate": 4.4122514993979253e-05, "loss": 2.2987, "step": 4062500 }, { "epoch": 11.76, "learning_rate": 4.4121791346331976e-05, "loss": 2.2841, "step": 4063000 }, { "epoch": 11.76, "learning_rate": 4.41210676986847e-05, "loss": 2.2942, "step": 4063500 }, { "epoch": 11.76, "learning_rate": 4.412034405103742e-05, "loss": 2.3106, "step": 4064000 }, { "epoch": 11.77, "learning_rate": 4.411962040339014e-05, "loss": 2.3025, "step": 4064500 }, { "epoch": 11.77, "learning_rate": 4.411889675574287e-05, "loss": 2.3014, "step": 4065000 }, { "epoch": 11.77, "learning_rate": 4.411817455539089e-05, "loss": 2.3265, "step": 4065500 }, { "epoch": 11.77, "learning_rate": 4.4117450907743616e-05, "loss": 2.3057, "step": 4066000 }, { "epoch": 11.77, "learning_rate": 4.411672726009634e-05, "loss": 2.3206, "step": 4066500 }, { "epoch": 11.77, "learning_rate": 4.411600361244906e-05, "loss": 2.2914, "step": 4067000 }, { "epoch": 11.77, "learning_rate": 4.411527996480178e-05, "loss": 2.2837, "step": 4067500 }, { "epoch": 11.78, "learning_rate": 4.41145577644498e-05, "loss": 2.3036, "step": 4068000 }, { "epoch": 11.78, "learning_rate": 4.411383411680252e-05, "loss": 2.3062, "step": 4068500 }, { "epoch": 11.78, "learning_rate": 4.4113111916450536e-05, "loss": 2.2849, "step": 4069000 }, { "epoch": 11.78, "learning_rate": 4.4112388268803265e-05, "loss": 2.2961, "step": 4069500 }, { "epoch": 11.78, "learning_rate": 4.411166462115599e-05, "loss": 2.3164, "step": 4070000 }, { "epoch": 11.78, "learning_rate": 4.411094097350871e-05, "loss": 2.3031, "step": 4070500 }, { "epoch": 11.78, "learning_rate": 4.411021732586143e-05, "loss": 2.2988, "step": 4071000 }, { "epoch": 11.79, "learning_rate": 4.4109493678214154e-05, "loss": 2.2962, "step": 4071500 }, { "epoch": 11.79, "learning_rate": 4.4108770030566876e-05, "loss": 2.2982, "step": 4072000 }, { "epoch": 11.79, "learning_rate": 4.4108046382919605e-05, "loss": 2.3011, "step": 4072500 }, { "epoch": 11.79, "learning_rate": 4.410732418256762e-05, "loss": 2.3009, "step": 4073000 }, { "epoch": 11.79, "learning_rate": 4.410660053492034e-05, "loss": 2.2899, "step": 4073500 }, { "epoch": 11.79, "learning_rate": 4.4105878334568365e-05, "loss": 2.2802, "step": 4074000 }, { "epoch": 11.79, "learning_rate": 4.410515468692109e-05, "loss": 2.3145, "step": 4074500 }, { "epoch": 11.8, "learning_rate": 4.410443103927381e-05, "loss": 2.2888, "step": 4075000 }, { "epoch": 11.8, "learning_rate": 4.410370739162653e-05, "loss": 2.2885, "step": 4075500 }, { "epoch": 11.8, "learning_rate": 4.4102983743979254e-05, "loss": 2.3152, "step": 4076000 }, { "epoch": 11.8, "learning_rate": 4.4102260096331976e-05, "loss": 2.2972, "step": 4076500 }, { "epoch": 11.8, "learning_rate": 4.41015364486847e-05, "loss": 2.2907, "step": 4077000 }, { "epoch": 11.8, "learning_rate": 4.410081280103742e-05, "loss": 2.2915, "step": 4077500 }, { "epoch": 11.8, "learning_rate": 4.410008915339014e-05, "loss": 2.2878, "step": 4078000 }, { "epoch": 11.81, "learning_rate": 4.4099366953038165e-05, "loss": 2.2933, "step": 4078500 }, { "epoch": 11.81, "learning_rate": 4.409864330539089e-05, "loss": 2.3163, "step": 4079000 }, { "epoch": 11.81, "learning_rate": 4.409791965774361e-05, "loss": 2.3006, "step": 4079500 }, { "epoch": 11.81, "learning_rate": 4.409719601009634e-05, "loss": 2.2837, "step": 4080000 }, { "epoch": 11.81, "learning_rate": 4.4096473809744354e-05, "loss": 2.3107, "step": 4080500 }, { "epoch": 11.81, "learning_rate": 4.409575016209708e-05, "loss": 2.3069, "step": 4081000 }, { "epoch": 11.81, "learning_rate": 4.409502796174509e-05, "loss": 2.2875, "step": 4081500 }, { "epoch": 11.82, "learning_rate": 4.4094304314097815e-05, "loss": 2.3154, "step": 4082000 }, { "epoch": 11.82, "learning_rate": 4.4093580666450544e-05, "loss": 2.3142, "step": 4082500 }, { "epoch": 11.82, "learning_rate": 4.4092857018803266e-05, "loss": 2.3172, "step": 4083000 }, { "epoch": 11.82, "learning_rate": 4.409213481845128e-05, "loss": 2.3143, "step": 4083500 }, { "epoch": 11.82, "learning_rate": 4.4091411170804004e-05, "loss": 2.3086, "step": 4084000 }, { "epoch": 11.82, "learning_rate": 4.4090687523156726e-05, "loss": 2.3097, "step": 4084500 }, { "epoch": 11.82, "learning_rate": 4.408996387550945e-05, "loss": 2.2923, "step": 4085000 }, { "epoch": 11.83, "learning_rate": 4.4089241675157464e-05, "loss": 2.3094, "step": 4085500 }, { "epoch": 11.83, "learning_rate": 4.408851802751019e-05, "loss": 2.2949, "step": 4086000 }, { "epoch": 11.83, "learning_rate": 4.4087794379862915e-05, "loss": 2.3103, "step": 4086500 }, { "epoch": 11.83, "learning_rate": 4.408707073221564e-05, "loss": 2.3002, "step": 4087000 }, { "epoch": 11.83, "learning_rate": 4.4086347084568366e-05, "loss": 2.285, "step": 4087500 }, { "epoch": 11.83, "learning_rate": 4.408562488421638e-05, "loss": 2.2682, "step": 4088000 }, { "epoch": 11.83, "learning_rate": 4.4084901236569104e-05, "loss": 2.3297, "step": 4088500 }, { "epoch": 11.84, "learning_rate": 4.4084177588921826e-05, "loss": 2.2834, "step": 4089000 }, { "epoch": 11.84, "learning_rate": 4.408345394127455e-05, "loss": 2.3078, "step": 4089500 }, { "epoch": 11.84, "learning_rate": 4.408273029362727e-05, "loss": 2.3086, "step": 4090000 }, { "epoch": 11.84, "learning_rate": 4.408200664597999e-05, "loss": 2.323, "step": 4090500 }, { "epoch": 11.84, "learning_rate": 4.4081282998332715e-05, "loss": 2.2837, "step": 4091000 }, { "epoch": 11.84, "learning_rate": 4.4080559350685444e-05, "loss": 2.328, "step": 4091500 }, { "epoch": 11.84, "learning_rate": 4.4079835703038166e-05, "loss": 2.3061, "step": 4092000 }, { "epoch": 11.85, "learning_rate": 4.407911350268618e-05, "loss": 2.3229, "step": 4092500 }, { "epoch": 11.85, "learning_rate": 4.4078389855038904e-05, "loss": 2.3036, "step": 4093000 }, { "epoch": 11.85, "learning_rate": 4.4077666207391626e-05, "loss": 2.304, "step": 4093500 }, { "epoch": 11.85, "learning_rate": 4.407694255974435e-05, "loss": 2.303, "step": 4094000 }, { "epoch": 11.85, "learning_rate": 4.407621891209707e-05, "loss": 2.3261, "step": 4094500 }, { "epoch": 11.85, "learning_rate": 4.40754952644498e-05, "loss": 2.2864, "step": 4095000 }, { "epoch": 11.85, "learning_rate": 4.4074773064097815e-05, "loss": 2.3366, "step": 4095500 }, { "epoch": 11.86, "learning_rate": 4.4074049416450544e-05, "loss": 2.2698, "step": 4096000 }, { "epoch": 11.86, "learning_rate": 4.4073325768803267e-05, "loss": 2.2931, "step": 4096500 }, { "epoch": 11.86, "learning_rate": 4.407260212115599e-05, "loss": 2.308, "step": 4097000 }, { "epoch": 11.86, "learning_rate": 4.407187847350871e-05, "loss": 2.2926, "step": 4097500 }, { "epoch": 11.86, "learning_rate": 4.407115482586143e-05, "loss": 2.3188, "step": 4098000 }, { "epoch": 11.86, "learning_rate": 4.407043407280474e-05, "loss": 2.3002, "step": 4098500 }, { "epoch": 11.86, "learning_rate": 4.4069710425157464e-05, "loss": 2.3053, "step": 4099000 }, { "epoch": 11.87, "learning_rate": 4.406898677751019e-05, "loss": 2.2839, "step": 4099500 }, { "epoch": 11.87, "learning_rate": 4.4068263129862916e-05, "loss": 2.3119, "step": 4100000 }, { "epoch": 11.87, "learning_rate": 4.406753948221564e-05, "loss": 2.2932, "step": 4100500 }, { "epoch": 11.87, "learning_rate": 4.406681872915895e-05, "loss": 2.2897, "step": 4101000 }, { "epoch": 11.87, "learning_rate": 4.406609508151167e-05, "loss": 2.3241, "step": 4101500 }, { "epoch": 11.87, "learning_rate": 4.406537143386439e-05, "loss": 2.3068, "step": 4102000 }, { "epoch": 11.88, "learning_rate": 4.406464778621712e-05, "loss": 2.3062, "step": 4102500 }, { "epoch": 11.88, "learning_rate": 4.406392413856984e-05, "loss": 2.3035, "step": 4103000 }, { "epoch": 11.88, "learning_rate": 4.406320049092257e-05, "loss": 2.3151, "step": 4103500 }, { "epoch": 11.88, "learning_rate": 4.4062476843275294e-05, "loss": 2.2993, "step": 4104000 }, { "epoch": 11.88, "learning_rate": 4.4061753195628016e-05, "loss": 2.297, "step": 4104500 }, { "epoch": 11.88, "learning_rate": 4.406102954798074e-05, "loss": 2.312, "step": 4105000 }, { "epoch": 11.88, "learning_rate": 4.406030590033346e-05, "loss": 2.2969, "step": 4105500 }, { "epoch": 11.89, "learning_rate": 4.405958225268618e-05, "loss": 2.3049, "step": 4106000 }, { "epoch": 11.89, "learning_rate": 4.4058858605038905e-05, "loss": 2.3005, "step": 4106500 }, { "epoch": 11.89, "learning_rate": 4.405813495739163e-05, "loss": 2.2733, "step": 4107000 }, { "epoch": 11.89, "learning_rate": 4.405741130974435e-05, "loss": 2.3173, "step": 4107500 }, { "epoch": 11.89, "learning_rate": 4.405668910939237e-05, "loss": 2.2702, "step": 4108000 }, { "epoch": 11.89, "learning_rate": 4.4055965461745094e-05, "loss": 2.2972, "step": 4108500 }, { "epoch": 11.89, "learning_rate": 4.4055241814097816e-05, "loss": 2.2941, "step": 4109000 }, { "epoch": 11.9, "learning_rate": 4.405451816645054e-05, "loss": 2.2935, "step": 4109500 }, { "epoch": 11.9, "learning_rate": 4.4053795966098554e-05, "loss": 2.2781, "step": 4110000 }, { "epoch": 11.9, "learning_rate": 4.405307231845128e-05, "loss": 2.2801, "step": 4110500 }, { "epoch": 11.9, "learning_rate": 4.4052348670804005e-05, "loss": 2.2994, "step": 4111000 }, { "epoch": 11.9, "learning_rate": 4.405162502315673e-05, "loss": 2.2714, "step": 4111500 }, { "epoch": 11.9, "learning_rate": 4.405090137550945e-05, "loss": 2.3237, "step": 4112000 }, { "epoch": 11.9, "learning_rate": 4.405017772786217e-05, "loss": 2.3093, "step": 4112500 }, { "epoch": 11.91, "learning_rate": 4.4049454080214894e-05, "loss": 2.3054, "step": 4113000 }, { "epoch": 11.91, "learning_rate": 4.404873043256762e-05, "loss": 2.2994, "step": 4113500 }, { "epoch": 11.91, "learning_rate": 4.4048006784920345e-05, "loss": 2.3025, "step": 4114000 }, { "epoch": 11.91, "learning_rate": 4.404728458456836e-05, "loss": 2.278, "step": 4114500 }, { "epoch": 11.91, "learning_rate": 4.404656093692108e-05, "loss": 2.3121, "step": 4115000 }, { "epoch": 11.91, "learning_rate": 4.4045837289273805e-05, "loss": 2.2963, "step": 4115500 }, { "epoch": 11.91, "learning_rate": 4.404511364162653e-05, "loss": 2.295, "step": 4116000 }, { "epoch": 11.92, "learning_rate": 4.404438999397925e-05, "loss": 2.3031, "step": 4116500 }, { "epoch": 11.92, "learning_rate": 4.404366634633197e-05, "loss": 2.2905, "step": 4117000 }, { "epoch": 11.92, "learning_rate": 4.40429426986847e-05, "loss": 2.2766, "step": 4117500 }, { "epoch": 11.92, "learning_rate": 4.404222049833272e-05, "loss": 2.3105, "step": 4118000 }, { "epoch": 11.92, "learning_rate": 4.4041496850685446e-05, "loss": 2.2809, "step": 4118500 }, { "epoch": 11.92, "learning_rate": 4.404077320303817e-05, "loss": 2.3152, "step": 4119000 }, { "epoch": 11.92, "learning_rate": 4.404004955539089e-05, "loss": 2.2916, "step": 4119500 }, { "epoch": 11.93, "learning_rate": 4.403932590774361e-05, "loss": 2.2978, "step": 4120000 }, { "epoch": 11.93, "learning_rate": 4.403860515468692e-05, "loss": 2.3157, "step": 4120500 }, { "epoch": 11.93, "learning_rate": 4.4037881507039643e-05, "loss": 2.2839, "step": 4121000 }, { "epoch": 11.93, "learning_rate": 4.403715785939237e-05, "loss": 2.3198, "step": 4121500 }, { "epoch": 11.93, "learning_rate": 4.4036434211745095e-05, "loss": 2.2724, "step": 4122000 }, { "epoch": 11.93, "learning_rate": 4.403571056409782e-05, "loss": 2.3063, "step": 4122500 }, { "epoch": 11.93, "learning_rate": 4.403498691645054e-05, "loss": 2.3071, "step": 4123000 }, { "epoch": 11.94, "learning_rate": 4.403426326880326e-05, "loss": 2.2885, "step": 4123500 }, { "epoch": 11.94, "learning_rate": 4.4033539621155984e-05, "loss": 2.3243, "step": 4124000 }, { "epoch": 11.94, "learning_rate": 4.4032815973508706e-05, "loss": 2.298, "step": 4124500 }, { "epoch": 11.94, "learning_rate": 4.403209377315673e-05, "loss": 2.2972, "step": 4125000 }, { "epoch": 11.94, "learning_rate": 4.403137157280475e-05, "loss": 2.3127, "step": 4125500 }, { "epoch": 11.94, "learning_rate": 4.403064792515747e-05, "loss": 2.2933, "step": 4126000 }, { "epoch": 11.94, "learning_rate": 4.4029924277510195e-05, "loss": 2.2892, "step": 4126500 }, { "epoch": 11.95, "learning_rate": 4.402920062986292e-05, "loss": 2.2909, "step": 4127000 }, { "epoch": 11.95, "learning_rate": 4.402847842951093e-05, "loss": 2.3121, "step": 4127500 }, { "epoch": 11.95, "learning_rate": 4.4027754781863655e-05, "loss": 2.3134, "step": 4128000 }, { "epoch": 11.95, "learning_rate": 4.402703113421638e-05, "loss": 2.2711, "step": 4128500 }, { "epoch": 11.95, "learning_rate": 4.40263074865691e-05, "loss": 2.3203, "step": 4129000 }, { "epoch": 11.95, "learning_rate": 4.402558528621712e-05, "loss": 2.3034, "step": 4129500 }, { "epoch": 11.95, "learning_rate": 4.4024861638569844e-05, "loss": 2.3063, "step": 4130000 }, { "epoch": 11.96, "learning_rate": 4.402413943821786e-05, "loss": 2.3168, "step": 4130500 }, { "epoch": 11.96, "learning_rate": 4.402341579057058e-05, "loss": 2.3214, "step": 4131000 }, { "epoch": 11.96, "learning_rate": 4.4022692142923304e-05, "loss": 2.3061, "step": 4131500 }, { "epoch": 11.96, "learning_rate": 4.4021968495276026e-05, "loss": 2.3007, "step": 4132000 }, { "epoch": 11.96, "learning_rate": 4.402124484762875e-05, "loss": 2.3079, "step": 4132500 }, { "epoch": 11.96, "learning_rate": 4.402052119998148e-05, "loss": 2.2661, "step": 4133000 }, { "epoch": 11.96, "learning_rate": 4.40197975523342e-05, "loss": 2.3062, "step": 4133500 }, { "epoch": 11.97, "learning_rate": 4.401907390468692e-05, "loss": 2.2874, "step": 4134000 }, { "epoch": 11.97, "learning_rate": 4.4018351704334944e-05, "loss": 2.2984, "step": 4134500 }, { "epoch": 11.97, "learning_rate": 4.4017628056687667e-05, "loss": 2.3181, "step": 4135000 }, { "epoch": 11.97, "learning_rate": 4.401690440904039e-05, "loss": 2.3004, "step": 4135500 }, { "epoch": 11.97, "learning_rate": 4.401618076139311e-05, "loss": 2.3158, "step": 4136000 }, { "epoch": 11.97, "learning_rate": 4.401545711374583e-05, "loss": 2.3053, "step": 4136500 }, { "epoch": 11.97, "learning_rate": 4.4014733466098555e-05, "loss": 2.3112, "step": 4137000 }, { "epoch": 11.98, "learning_rate": 4.401401126574657e-05, "loss": 2.3036, "step": 4137500 }, { "epoch": 11.98, "learning_rate": 4.40132876180993e-05, "loss": 2.2959, "step": 4138000 }, { "epoch": 11.98, "learning_rate": 4.401256397045202e-05, "loss": 2.272, "step": 4138500 }, { "epoch": 11.98, "learning_rate": 4.4011840322804745e-05, "loss": 2.2947, "step": 4139000 }, { "epoch": 11.98, "learning_rate": 4.401111667515747e-05, "loss": 2.3185, "step": 4139500 }, { "epoch": 11.98, "learning_rate": 4.401039302751019e-05, "loss": 2.3121, "step": 4140000 }, { "epoch": 11.99, "learning_rate": 4.400966937986292e-05, "loss": 2.3207, "step": 4140500 }, { "epoch": 11.99, "learning_rate": 4.400894573221564e-05, "loss": 2.3163, "step": 4141000 }, { "epoch": 11.99, "learning_rate": 4.4008223531863656e-05, "loss": 2.3147, "step": 4141500 }, { "epoch": 11.99, "learning_rate": 4.400749988421638e-05, "loss": 2.304, "step": 4142000 }, { "epoch": 11.99, "learning_rate": 4.40067762365691e-05, "loss": 2.2826, "step": 4142500 }, { "epoch": 11.99, "learning_rate": 4.400605403621712e-05, "loss": 2.3074, "step": 4143000 }, { "epoch": 11.99, "learning_rate": 4.4005330388569845e-05, "loss": 2.3019, "step": 4143500 }, { "epoch": 12.0, "learning_rate": 4.400460674092257e-05, "loss": 2.3096, "step": 4144000 }, { "epoch": 12.0, "learning_rate": 4.400388309327529e-05, "loss": 2.2964, "step": 4144500 }, { "epoch": 12.0, "learning_rate": 4.400315944562801e-05, "loss": 2.3088, "step": 4145000 }, { "epoch": 12.0, "learning_rate": 4.4002435797980734e-05, "loss": 2.2955, "step": 4145500 }, { "epoch": 12.0, "eval_accuracy": 0.6509588873451346, "eval_accuracy_mlm": 0.6136933702388812, "eval_accuracy_nsp": 0.8509415975764575, "eval_loss": 2.2997477054595947, "eval_runtime": 330.67, "eval_samples_per_second": 1319.702, "eval_steps_per_second": 54.988, "step": 4145664 }, { "epoch": 12.0, "learning_rate": 4.400171504492405e-05, "loss": 2.2811, "step": 4146000 }, { "epoch": 12.0, "learning_rate": 4.400099139727677e-05, "loss": 2.2889, "step": 4146500 }, { "epoch": 12.0, "learning_rate": 4.4000267749629494e-05, "loss": 2.2654, "step": 4147000 }, { "epoch": 12.01, "learning_rate": 4.3999544101982216e-05, "loss": 2.2694, "step": 4147500 }, { "epoch": 12.01, "learning_rate": 4.3998820454334945e-05, "loss": 2.3201, "step": 4148000 }, { "epoch": 12.01, "learning_rate": 4.399809680668767e-05, "loss": 2.3081, "step": 4148500 }, { "epoch": 12.01, "learning_rate": 4.399737315904039e-05, "loss": 2.2992, "step": 4149000 }, { "epoch": 12.01, "learning_rate": 4.399664951139311e-05, "loss": 2.2677, "step": 4149500 }, { "epoch": 12.01, "learning_rate": 4.3995925863745834e-05, "loss": 2.296, "step": 4150000 }, { "epoch": 12.01, "learning_rate": 4.3995202216098556e-05, "loss": 2.2578, "step": 4150500 }, { "epoch": 12.02, "learning_rate": 4.399447856845128e-05, "loss": 2.2632, "step": 4151000 }, { "epoch": 12.02, "learning_rate": 4.3993754920804e-05, "loss": 2.2901, "step": 4151500 }, { "epoch": 12.02, "learning_rate": 4.399303272045202e-05, "loss": 2.2657, "step": 4152000 }, { "epoch": 12.02, "learning_rate": 4.3992309072804745e-05, "loss": 2.2606, "step": 4152500 }, { "epoch": 12.02, "learning_rate": 4.399158542515747e-05, "loss": 2.2653, "step": 4153000 }, { "epoch": 12.02, "learning_rate": 4.399086177751019e-05, "loss": 2.2952, "step": 4153500 }, { "epoch": 12.02, "learning_rate": 4.399013812986291e-05, "loss": 2.2594, "step": 4154000 }, { "epoch": 12.03, "learning_rate": 4.3989414482215634e-05, "loss": 2.2807, "step": 4154500 }, { "epoch": 12.03, "learning_rate": 4.398869083456836e-05, "loss": 2.2612, "step": 4155000 }, { "epoch": 12.03, "learning_rate": 4.398796863421638e-05, "loss": 2.2717, "step": 4155500 }, { "epoch": 12.03, "learning_rate": 4.39872449865691e-05, "loss": 2.291, "step": 4156000 }, { "epoch": 12.03, "learning_rate": 4.398652133892183e-05, "loss": 2.2707, "step": 4156500 }, { "epoch": 12.03, "learning_rate": 4.398579769127455e-05, "loss": 2.2761, "step": 4157000 }, { "epoch": 12.03, "learning_rate": 4.398507549092257e-05, "loss": 2.2908, "step": 4157500 }, { "epoch": 12.04, "learning_rate": 4.398435184327529e-05, "loss": 2.3035, "step": 4158000 }, { "epoch": 12.04, "learning_rate": 4.3983629642923306e-05, "loss": 2.2696, "step": 4158500 }, { "epoch": 12.04, "learning_rate": 4.398290599527603e-05, "loss": 2.2731, "step": 4159000 }, { "epoch": 12.04, "learning_rate": 4.398218234762875e-05, "loss": 2.3038, "step": 4159500 }, { "epoch": 12.04, "learning_rate": 4.398145869998148e-05, "loss": 2.2794, "step": 4160000 }, { "epoch": 12.04, "learning_rate": 4.39807350523342e-05, "loss": 2.2725, "step": 4160500 }, { "epoch": 12.04, "learning_rate": 4.398001285198222e-05, "loss": 2.2751, "step": 4161000 }, { "epoch": 12.05, "learning_rate": 4.397929065163023e-05, "loss": 2.2884, "step": 4161500 }, { "epoch": 12.05, "learning_rate": 4.3978568451278255e-05, "loss": 2.2832, "step": 4162000 }, { "epoch": 12.05, "learning_rate": 4.397784480363098e-05, "loss": 2.3056, "step": 4162500 }, { "epoch": 12.05, "learning_rate": 4.39771211559837e-05, "loss": 2.2926, "step": 4163000 }, { "epoch": 12.05, "learning_rate": 4.397639750833643e-05, "loss": 2.2928, "step": 4163500 }, { "epoch": 12.05, "learning_rate": 4.397567386068915e-05, "loss": 2.2595, "step": 4164000 }, { "epoch": 12.05, "learning_rate": 4.397495021304187e-05, "loss": 2.3109, "step": 4164500 }, { "epoch": 12.06, "learning_rate": 4.3974226565394595e-05, "loss": 2.2821, "step": 4165000 }, { "epoch": 12.06, "learning_rate": 4.397350291774732e-05, "loss": 2.2694, "step": 4165500 }, { "epoch": 12.06, "learning_rate": 4.397277927010004e-05, "loss": 2.2936, "step": 4166000 }, { "epoch": 12.06, "learning_rate": 4.3972057069748055e-05, "loss": 2.2966, "step": 4166500 }, { "epoch": 12.06, "learning_rate": 4.397133342210078e-05, "loss": 2.3021, "step": 4167000 }, { "epoch": 12.06, "learning_rate": 4.39706097744535e-05, "loss": 2.3038, "step": 4167500 }, { "epoch": 12.06, "learning_rate": 4.396988612680623e-05, "loss": 2.2854, "step": 4168000 }, { "epoch": 12.07, "learning_rate": 4.396916247915895e-05, "loss": 2.2965, "step": 4168500 }, { "epoch": 12.07, "learning_rate": 4.396843883151167e-05, "loss": 2.2742, "step": 4169000 }, { "epoch": 12.07, "learning_rate": 4.3967715183864395e-05, "loss": 2.2975, "step": 4169500 }, { "epoch": 12.07, "learning_rate": 4.396699153621712e-05, "loss": 2.2876, "step": 4170000 }, { "epoch": 12.07, "learning_rate": 4.3966267888569846e-05, "loss": 2.2969, "step": 4170500 }, { "epoch": 12.07, "learning_rate": 4.396554568821786e-05, "loss": 2.3158, "step": 4171000 }, { "epoch": 12.07, "learning_rate": 4.3964822040570584e-05, "loss": 2.2862, "step": 4171500 }, { "epoch": 12.08, "learning_rate": 4.3964098392923306e-05, "loss": 2.292, "step": 4172000 }, { "epoch": 12.08, "learning_rate": 4.396337474527603e-05, "loss": 2.3026, "step": 4172500 }, { "epoch": 12.08, "learning_rate": 4.396265109762875e-05, "loss": 2.2692, "step": 4173000 }, { "epoch": 12.08, "learning_rate": 4.396192744998148e-05, "loss": 2.2834, "step": 4173500 }, { "epoch": 12.08, "learning_rate": 4.39612038023342e-05, "loss": 2.2802, "step": 4174000 }, { "epoch": 12.08, "learning_rate": 4.3960480154686924e-05, "loss": 2.2563, "step": 4174500 }, { "epoch": 12.08, "learning_rate": 4.3959756507039647e-05, "loss": 2.2814, "step": 4175000 }, { "epoch": 12.09, "learning_rate": 4.395903430668766e-05, "loss": 2.2651, "step": 4175500 }, { "epoch": 12.09, "learning_rate": 4.3958310659040384e-05, "loss": 2.273, "step": 4176000 }, { "epoch": 12.09, "learning_rate": 4.395758845868841e-05, "loss": 2.2903, "step": 4176500 }, { "epoch": 12.09, "learning_rate": 4.395686481104113e-05, "loss": 2.2825, "step": 4177000 }, { "epoch": 12.09, "learning_rate": 4.395614116339385e-05, "loss": 2.2812, "step": 4177500 }, { "epoch": 12.09, "learning_rate": 4.395541751574658e-05, "loss": 2.2721, "step": 4178000 }, { "epoch": 12.1, "learning_rate": 4.39546938680993e-05, "loss": 2.2757, "step": 4178500 }, { "epoch": 12.1, "learning_rate": 4.3953970220452025e-05, "loss": 2.2831, "step": 4179000 }, { "epoch": 12.1, "learning_rate": 4.395324657280475e-05, "loss": 2.2892, "step": 4179500 }, { "epoch": 12.1, "learning_rate": 4.395252292515747e-05, "loss": 2.3047, "step": 4180000 }, { "epoch": 12.1, "learning_rate": 4.3951800724805485e-05, "loss": 2.3052, "step": 4180500 }, { "epoch": 12.1, "learning_rate": 4.395107707715821e-05, "loss": 2.2793, "step": 4181000 }, { "epoch": 12.1, "learning_rate": 4.395035487680623e-05, "loss": 2.2668, "step": 4181500 }, { "epoch": 12.11, "learning_rate": 4.394963122915895e-05, "loss": 2.283, "step": 4182000 }, { "epoch": 12.11, "learning_rate": 4.3948907581511674e-05, "loss": 2.3011, "step": 4182500 }, { "epoch": 12.11, "learning_rate": 4.394818682845498e-05, "loss": 2.2881, "step": 4183000 }, { "epoch": 12.11, "learning_rate": 4.3947463180807705e-05, "loss": 2.2778, "step": 4183500 }, { "epoch": 12.11, "learning_rate": 4.394673953316043e-05, "loss": 2.2771, "step": 4184000 }, { "epoch": 12.11, "learning_rate": 4.3946015885513156e-05, "loss": 2.2777, "step": 4184500 }, { "epoch": 12.11, "learning_rate": 4.394529223786588e-05, "loss": 2.2881, "step": 4185000 }, { "epoch": 12.12, "learning_rate": 4.39445685902186e-05, "loss": 2.2826, "step": 4185500 }, { "epoch": 12.12, "learning_rate": 4.394384494257133e-05, "loss": 2.2715, "step": 4186000 }, { "epoch": 12.12, "learning_rate": 4.394312129492405e-05, "loss": 2.2986, "step": 4186500 }, { "epoch": 12.12, "learning_rate": 4.394239909457207e-05, "loss": 2.3083, "step": 4187000 }, { "epoch": 12.12, "learning_rate": 4.394167544692479e-05, "loss": 2.2807, "step": 4187500 }, { "epoch": 12.12, "learning_rate": 4.394095179927751e-05, "loss": 2.2901, "step": 4188000 }, { "epoch": 12.12, "learning_rate": 4.3940228151630234e-05, "loss": 2.2753, "step": 4188500 }, { "epoch": 12.13, "learning_rate": 4.3939504503982956e-05, "loss": 2.3118, "step": 4189000 }, { "epoch": 12.13, "learning_rate": 4.393878085633568e-05, "loss": 2.2994, "step": 4189500 }, { "epoch": 12.13, "learning_rate": 4.393805720868841e-05, "loss": 2.2756, "step": 4190000 }, { "epoch": 12.13, "learning_rate": 4.393733356104113e-05, "loss": 2.2828, "step": 4190500 }, { "epoch": 12.13, "learning_rate": 4.393660991339385e-05, "loss": 2.296, "step": 4191000 }, { "epoch": 12.13, "learning_rate": 4.3935886265746574e-05, "loss": 2.2889, "step": 4191500 }, { "epoch": 12.13, "learning_rate": 4.3935162618099296e-05, "loss": 2.2638, "step": 4192000 }, { "epoch": 12.14, "learning_rate": 4.393443897045202e-05, "loss": 2.2708, "step": 4192500 }, { "epoch": 12.14, "learning_rate": 4.393371677010004e-05, "loss": 2.2748, "step": 4193000 }, { "epoch": 12.14, "learning_rate": 4.3932994569748057e-05, "loss": 2.2795, "step": 4193500 }, { "epoch": 12.14, "learning_rate": 4.3932270922100786e-05, "loss": 2.2762, "step": 4194000 }, { "epoch": 12.14, "learning_rate": 4.393154727445351e-05, "loss": 2.2827, "step": 4194500 }, { "epoch": 12.14, "learning_rate": 4.393082362680623e-05, "loss": 2.2756, "step": 4195000 }, { "epoch": 12.14, "learning_rate": 4.3930101426454246e-05, "loss": 2.262, "step": 4195500 }, { "epoch": 12.15, "learning_rate": 4.392937777880697e-05, "loss": 2.2675, "step": 4196000 }, { "epoch": 12.15, "learning_rate": 4.392865413115969e-05, "loss": 2.2915, "step": 4196500 }, { "epoch": 12.15, "learning_rate": 4.392793048351241e-05, "loss": 2.2717, "step": 4197000 }, { "epoch": 12.15, "learning_rate": 4.3927206835865135e-05, "loss": 2.2906, "step": 4197500 }, { "epoch": 12.15, "learning_rate": 4.392648318821786e-05, "loss": 2.2978, "step": 4198000 }, { "epoch": 12.15, "learning_rate": 4.3925759540570586e-05, "loss": 2.2921, "step": 4198500 }, { "epoch": 12.15, "learning_rate": 4.392503589292331e-05, "loss": 2.2976, "step": 4199000 }, { "epoch": 12.16, "learning_rate": 4.3924313692571324e-05, "loss": 2.3069, "step": 4199500 }, { "epoch": 12.16, "learning_rate": 4.3923590044924046e-05, "loss": 2.27, "step": 4200000 }, { "epoch": 12.16, "learning_rate": 4.3922866397276775e-05, "loss": 2.2636, "step": 4200500 }, { "epoch": 12.16, "learning_rate": 4.39221427496295e-05, "loss": 2.2525, "step": 4201000 }, { "epoch": 12.16, "learning_rate": 4.392141910198222e-05, "loss": 2.2893, "step": 4201500 }, { "epoch": 12.16, "learning_rate": 4.3920696901630235e-05, "loss": 2.2807, "step": 4202000 }, { "epoch": 12.16, "learning_rate": 4.391997325398296e-05, "loss": 2.2702, "step": 4202500 }, { "epoch": 12.17, "learning_rate": 4.3919249606335686e-05, "loss": 2.2788, "step": 4203000 }, { "epoch": 12.17, "learning_rate": 4.391852595868841e-05, "loss": 2.2887, "step": 4203500 }, { "epoch": 12.17, "learning_rate": 4.3917803758336424e-05, "loss": 2.2828, "step": 4204000 }, { "epoch": 12.17, "learning_rate": 4.3917080110689146e-05, "loss": 2.2918, "step": 4204500 }, { "epoch": 12.17, "learning_rate": 4.391635646304187e-05, "loss": 2.2719, "step": 4205000 }, { "epoch": 12.17, "learning_rate": 4.391563281539459e-05, "loss": 2.2862, "step": 4205500 }, { "epoch": 12.17, "learning_rate": 4.391490916774731e-05, "loss": 2.2908, "step": 4206000 }, { "epoch": 12.18, "learning_rate": 4.3914185520100035e-05, "loss": 2.295, "step": 4206500 }, { "epoch": 12.18, "learning_rate": 4.391346187245276e-05, "loss": 2.2904, "step": 4207000 }, { "epoch": 12.18, "learning_rate": 4.3912738224805486e-05, "loss": 2.286, "step": 4207500 }, { "epoch": 12.18, "learning_rate": 4.391201602445351e-05, "loss": 2.282, "step": 4208000 }, { "epoch": 12.18, "learning_rate": 4.391129237680623e-05, "loss": 2.2839, "step": 4208500 }, { "epoch": 12.18, "learning_rate": 4.391056872915895e-05, "loss": 2.2997, "step": 4209000 }, { "epoch": 12.18, "learning_rate": 4.3909845081511675e-05, "loss": 2.3117, "step": 4209500 }, { "epoch": 12.19, "learning_rate": 4.39091214338644e-05, "loss": 2.3097, "step": 4210000 }, { "epoch": 12.19, "learning_rate": 4.390839923351241e-05, "loss": 2.275, "step": 4210500 }, { "epoch": 12.19, "learning_rate": 4.3907675585865135e-05, "loss": 2.2807, "step": 4211000 }, { "epoch": 12.19, "learning_rate": 4.390695338551316e-05, "loss": 2.3015, "step": 4211500 }, { "epoch": 12.19, "learning_rate": 4.390622973786588e-05, "loss": 2.299, "step": 4212000 }, { "epoch": 12.19, "learning_rate": 4.39055060902186e-05, "loss": 2.2846, "step": 4212500 }, { "epoch": 12.19, "learning_rate": 4.3904782442571324e-05, "loss": 2.2668, "step": 4213000 }, { "epoch": 12.2, "learning_rate": 4.390406024221934e-05, "loss": 2.2799, "step": 4213500 }, { "epoch": 12.2, "learning_rate": 4.390333659457206e-05, "loss": 2.2658, "step": 4214000 }, { "epoch": 12.2, "learning_rate": 4.3902614394220084e-05, "loss": 2.3017, "step": 4214500 }, { "epoch": 12.2, "learning_rate": 4.390189074657281e-05, "loss": 2.2915, "step": 4215000 }, { "epoch": 12.2, "learning_rate": 4.390116709892553e-05, "loss": 2.282, "step": 4215500 }, { "epoch": 12.2, "learning_rate": 4.390044345127826e-05, "loss": 2.2945, "step": 4216000 }, { "epoch": 12.21, "learning_rate": 4.389971980363098e-05, "loss": 2.2872, "step": 4216500 }, { "epoch": 12.21, "learning_rate": 4.38989961559837e-05, "loss": 2.2833, "step": 4217000 }, { "epoch": 12.21, "learning_rate": 4.3898272508336425e-05, "loss": 2.2631, "step": 4217500 }, { "epoch": 12.21, "learning_rate": 4.389754886068915e-05, "loss": 2.2917, "step": 4218000 }, { "epoch": 12.21, "learning_rate": 4.389682521304187e-05, "loss": 2.2932, "step": 4218500 }, { "epoch": 12.21, "learning_rate": 4.389610156539459e-05, "loss": 2.3136, "step": 4219000 }, { "epoch": 12.21, "learning_rate": 4.3895377917747314e-05, "loss": 2.2859, "step": 4219500 }, { "epoch": 12.22, "learning_rate": 4.3894654270100036e-05, "loss": 2.2875, "step": 4220000 }, { "epoch": 12.22, "learning_rate": 4.389393062245276e-05, "loss": 2.2845, "step": 4220500 }, { "epoch": 12.22, "learning_rate": 4.389320697480549e-05, "loss": 2.2803, "step": 4221000 }, { "epoch": 12.22, "learning_rate": 4.389248332715821e-05, "loss": 2.2844, "step": 4221500 }, { "epoch": 12.22, "learning_rate": 4.389175967951093e-05, "loss": 2.2717, "step": 4222000 }, { "epoch": 12.22, "learning_rate": 4.3891036031863654e-05, "loss": 2.259, "step": 4222500 }, { "epoch": 12.22, "learning_rate": 4.3890313831511676e-05, "loss": 2.2916, "step": 4223000 }, { "epoch": 12.23, "learning_rate": 4.38895901838644e-05, "loss": 2.2865, "step": 4223500 }, { "epoch": 12.23, "learning_rate": 4.3888867983512414e-05, "loss": 2.2801, "step": 4224000 }, { "epoch": 12.23, "learning_rate": 4.3888144335865136e-05, "loss": 2.2912, "step": 4224500 }, { "epoch": 12.23, "learning_rate": 4.3887420688217865e-05, "loss": 2.286, "step": 4225000 }, { "epoch": 12.23, "learning_rate": 4.388669704057059e-05, "loss": 2.3212, "step": 4225500 }, { "epoch": 12.23, "learning_rate": 4.388597339292331e-05, "loss": 2.2883, "step": 4226000 }, { "epoch": 12.23, "learning_rate": 4.3885251192571325e-05, "loss": 2.2859, "step": 4226500 }, { "epoch": 12.24, "learning_rate": 4.388452754492405e-05, "loss": 2.2918, "step": 4227000 }, { "epoch": 12.24, "learning_rate": 4.388380389727677e-05, "loss": 2.2721, "step": 4227500 }, { "epoch": 12.24, "learning_rate": 4.388308024962949e-05, "loss": 2.2519, "step": 4228000 }, { "epoch": 12.24, "learning_rate": 4.3882358049277514e-05, "loss": 2.2771, "step": 4228500 }, { "epoch": 12.24, "learning_rate": 4.3881634401630236e-05, "loss": 2.2967, "step": 4229000 }, { "epoch": 12.24, "learning_rate": 4.388091075398296e-05, "loss": 2.2798, "step": 4229500 }, { "epoch": 12.24, "learning_rate": 4.388018710633568e-05, "loss": 2.2789, "step": 4230000 }, { "epoch": 12.25, "learning_rate": 4.387946345868841e-05, "loss": 2.2813, "step": 4230500 }, { "epoch": 12.25, "learning_rate": 4.387873981104113e-05, "loss": 2.2632, "step": 4231000 }, { "epoch": 12.25, "learning_rate": 4.3878016163393854e-05, "loss": 2.3081, "step": 4231500 }, { "epoch": 12.25, "learning_rate": 4.3877292515746577e-05, "loss": 2.2689, "step": 4232000 }, { "epoch": 12.25, "learning_rate": 4.38765688680993e-05, "loss": 2.3032, "step": 4232500 }, { "epoch": 12.25, "learning_rate": 4.387584522045202e-05, "loss": 2.2927, "step": 4233000 }, { "epoch": 12.25, "learning_rate": 4.387512157280474e-05, "loss": 2.3115, "step": 4233500 }, { "epoch": 12.26, "learning_rate": 4.3874399372452766e-05, "loss": 2.3016, "step": 4234000 }, { "epoch": 12.26, "learning_rate": 4.387367572480549e-05, "loss": 2.267, "step": 4234500 }, { "epoch": 12.26, "learning_rate": 4.3872953524453503e-05, "loss": 2.2764, "step": 4235000 }, { "epoch": 12.26, "learning_rate": 4.387223132410152e-05, "loss": 2.2815, "step": 4235500 }, { "epoch": 12.26, "learning_rate": 4.387150767645424e-05, "loss": 2.2796, "step": 4236000 }, { "epoch": 12.26, "learning_rate": 4.3870784028806963e-05, "loss": 2.2598, "step": 4236500 }, { "epoch": 12.26, "learning_rate": 4.3870060381159686e-05, "loss": 2.2593, "step": 4237000 }, { "epoch": 12.27, "learning_rate": 4.3869336733512415e-05, "loss": 2.2852, "step": 4237500 }, { "epoch": 12.27, "learning_rate": 4.386861453316043e-05, "loss": 2.3163, "step": 4238000 }, { "epoch": 12.27, "learning_rate": 4.386789088551316e-05, "loss": 2.2652, "step": 4238500 }, { "epoch": 12.27, "learning_rate": 4.386716723786588e-05, "loss": 2.2645, "step": 4239000 }, { "epoch": 12.27, "learning_rate": 4.3866443590218604e-05, "loss": 2.3058, "step": 4239500 }, { "epoch": 12.27, "learning_rate": 4.3865719942571326e-05, "loss": 2.2842, "step": 4240000 }, { "epoch": 12.27, "learning_rate": 4.386499774221934e-05, "loss": 2.2893, "step": 4240500 }, { "epoch": 12.28, "learning_rate": 4.3864274094572064e-05, "loss": 2.288, "step": 4241000 }, { "epoch": 12.28, "learning_rate": 4.3863550446924786e-05, "loss": 2.3095, "step": 4241500 }, { "epoch": 12.28, "learning_rate": 4.3862826799277515e-05, "loss": 2.3336, "step": 4242000 }, { "epoch": 12.28, "learning_rate": 4.386210315163024e-05, "loss": 2.2868, "step": 4242500 }, { "epoch": 12.28, "learning_rate": 4.386137950398296e-05, "loss": 2.2732, "step": 4243000 }, { "epoch": 12.28, "learning_rate": 4.386065585633568e-05, "loss": 2.3304, "step": 4243500 }, { "epoch": 12.28, "learning_rate": 4.3859932208688404e-05, "loss": 2.3031, "step": 4244000 }, { "epoch": 12.29, "learning_rate": 4.3859208561041126e-05, "loss": 2.2946, "step": 4244500 }, { "epoch": 12.29, "learning_rate": 4.385848491339385e-05, "loss": 2.2834, "step": 4245000 }, { "epoch": 12.29, "learning_rate": 4.385776271304187e-05, "loss": 2.2767, "step": 4245500 }, { "epoch": 12.29, "learning_rate": 4.385703906539459e-05, "loss": 2.2762, "step": 4246000 }, { "epoch": 12.29, "learning_rate": 4.3856315417747315e-05, "loss": 2.2852, "step": 4246500 }, { "epoch": 12.29, "learning_rate": 4.385559177010004e-05, "loss": 2.2909, "step": 4247000 }, { "epoch": 12.29, "learning_rate": 4.3854868122452766e-05, "loss": 2.3005, "step": 4247500 }, { "epoch": 12.3, "learning_rate": 4.385414447480549e-05, "loss": 2.2792, "step": 4248000 }, { "epoch": 12.3, "learning_rate": 4.385342082715821e-05, "loss": 2.2748, "step": 4248500 }, { "epoch": 12.3, "learning_rate": 4.385269717951093e-05, "loss": 2.2851, "step": 4249000 }, { "epoch": 12.3, "learning_rate": 4.3851973531863655e-05, "loss": 2.2754, "step": 4249500 }, { "epoch": 12.3, "learning_rate": 4.385124988421638e-05, "loss": 2.3025, "step": 4250000 }, { "epoch": 12.3, "learning_rate": 4.38505262365691e-05, "loss": 2.2921, "step": 4250500 }, { "epoch": 12.3, "learning_rate": 4.3849804036217115e-05, "loss": 2.305, "step": 4251000 }, { "epoch": 12.31, "learning_rate": 4.384908038856984e-05, "loss": 2.2814, "step": 4251500 }, { "epoch": 12.31, "learning_rate": 4.384835818821786e-05, "loss": 2.2801, "step": 4252000 }, { "epoch": 12.31, "learning_rate": 4.3847635987865876e-05, "loss": 2.2854, "step": 4252500 }, { "epoch": 12.31, "learning_rate": 4.38469123402186e-05, "loss": 2.288, "step": 4253000 }, { "epoch": 12.31, "learning_rate": 4.384618869257133e-05, "loss": 2.2916, "step": 4253500 }, { "epoch": 12.31, "learning_rate": 4.384546504492405e-05, "loss": 2.2919, "step": 4254000 }, { "epoch": 12.32, "learning_rate": 4.384474139727677e-05, "loss": 2.293, "step": 4254500 }, { "epoch": 12.32, "learning_rate": 4.3844017749629493e-05, "loss": 2.2739, "step": 4255000 }, { "epoch": 12.32, "learning_rate": 4.3843294101982216e-05, "loss": 2.2952, "step": 4255500 }, { "epoch": 12.32, "learning_rate": 4.3842570454334945e-05, "loss": 2.2788, "step": 4256000 }, { "epoch": 12.32, "learning_rate": 4.384184680668767e-05, "loss": 2.2953, "step": 4256500 }, { "epoch": 12.32, "learning_rate": 4.384112315904039e-05, "loss": 2.3094, "step": 4257000 }, { "epoch": 12.32, "learning_rate": 4.3840400958688405e-05, "loss": 2.2959, "step": 4257500 }, { "epoch": 12.33, "learning_rate": 4.383967731104113e-05, "loss": 2.2894, "step": 4258000 }, { "epoch": 12.33, "learning_rate": 4.383895366339385e-05, "loss": 2.2802, "step": 4258500 }, { "epoch": 12.33, "learning_rate": 4.383823001574657e-05, "loss": 2.3114, "step": 4259000 }, { "epoch": 12.33, "learning_rate": 4.3837506368099294e-05, "loss": 2.2905, "step": 4259500 }, { "epoch": 12.33, "learning_rate": 4.3836782720452016e-05, "loss": 2.2886, "step": 4260000 }, { "epoch": 12.33, "learning_rate": 4.3836059072804745e-05, "loss": 2.2724, "step": 4260500 }, { "epoch": 12.33, "learning_rate": 4.383533542515747e-05, "loss": 2.2904, "step": 4261000 }, { "epoch": 12.34, "learning_rate": 4.3834611777510196e-05, "loss": 2.2893, "step": 4261500 }, { "epoch": 12.34, "learning_rate": 4.383388957715821e-05, "loss": 2.2689, "step": 4262000 }, { "epoch": 12.34, "learning_rate": 4.383316737680623e-05, "loss": 2.297, "step": 4262500 }, { "epoch": 12.34, "learning_rate": 4.383244372915895e-05, "loss": 2.2869, "step": 4263000 }, { "epoch": 12.34, "learning_rate": 4.383172008151167e-05, "loss": 2.2952, "step": 4263500 }, { "epoch": 12.34, "learning_rate": 4.3830996433864394e-05, "loss": 2.2802, "step": 4264000 }, { "epoch": 12.34, "learning_rate": 4.3830272786217116e-05, "loss": 2.2968, "step": 4264500 }, { "epoch": 12.35, "learning_rate": 4.3829549138569845e-05, "loss": 2.2739, "step": 4265000 }, { "epoch": 12.35, "learning_rate": 4.382882549092257e-05, "loss": 2.2589, "step": 4265500 }, { "epoch": 12.35, "learning_rate": 4.382810184327529e-05, "loss": 2.2814, "step": 4266000 }, { "epoch": 12.35, "learning_rate": 4.3827379642923305e-05, "loss": 2.3004, "step": 4266500 }, { "epoch": 12.35, "learning_rate": 4.382665599527603e-05, "loss": 2.3045, "step": 4267000 }, { "epoch": 12.35, "learning_rate": 4.382593234762875e-05, "loss": 2.3114, "step": 4267500 }, { "epoch": 12.35, "learning_rate": 4.382520869998148e-05, "loss": 2.2714, "step": 4268000 }, { "epoch": 12.36, "learning_rate": 4.38244850523342e-05, "loss": 2.2894, "step": 4268500 }, { "epoch": 12.36, "learning_rate": 4.3823762851982216e-05, "loss": 2.2775, "step": 4269000 }, { "epoch": 12.36, "learning_rate": 4.3823039204334945e-05, "loss": 2.2878, "step": 4269500 }, { "epoch": 12.36, "learning_rate": 4.382231555668767e-05, "loss": 2.29, "step": 4270000 }, { "epoch": 12.36, "learning_rate": 4.382159190904039e-05, "loss": 2.2835, "step": 4270500 }, { "epoch": 12.36, "learning_rate": 4.382086826139311e-05, "loss": 2.2797, "step": 4271000 }, { "epoch": 12.36, "learning_rate": 4.382014750833642e-05, "loss": 2.2941, "step": 4271500 }, { "epoch": 12.37, "learning_rate": 4.3819425307984443e-05, "loss": 2.2883, "step": 4272000 }, { "epoch": 12.37, "learning_rate": 4.3818701660337166e-05, "loss": 2.2769, "step": 4272500 }, { "epoch": 12.37, "learning_rate": 4.381797801268989e-05, "loss": 2.3019, "step": 4273000 }, { "epoch": 12.37, "learning_rate": 4.381725436504261e-05, "loss": 2.2941, "step": 4273500 }, { "epoch": 12.37, "learning_rate": 4.381653071739533e-05, "loss": 2.2862, "step": 4274000 }, { "epoch": 12.37, "learning_rate": 4.3815807069748055e-05, "loss": 2.2841, "step": 4274500 }, { "epoch": 12.37, "learning_rate": 4.381508342210078e-05, "loss": 2.2958, "step": 4275000 }, { "epoch": 12.38, "learning_rate": 4.3814359774453506e-05, "loss": 2.2824, "step": 4275500 }, { "epoch": 12.38, "learning_rate": 4.381363612680623e-05, "loss": 2.2795, "step": 4276000 }, { "epoch": 12.38, "learning_rate": 4.3812913926454244e-05, "loss": 2.2895, "step": 4276500 }, { "epoch": 12.38, "learning_rate": 4.381219027880697e-05, "loss": 2.2804, "step": 4277000 }, { "epoch": 12.38, "learning_rate": 4.3811466631159695e-05, "loss": 2.2839, "step": 4277500 }, { "epoch": 12.38, "learning_rate": 4.381074298351242e-05, "loss": 2.2997, "step": 4278000 }, { "epoch": 12.38, "learning_rate": 4.381001933586514e-05, "loss": 2.2956, "step": 4278500 }, { "epoch": 12.39, "learning_rate": 4.380929568821786e-05, "loss": 2.318, "step": 4279000 }, { "epoch": 12.39, "learning_rate": 4.3808572040570584e-05, "loss": 2.2882, "step": 4279500 }, { "epoch": 12.39, "learning_rate": 4.3807848392923306e-05, "loss": 2.2891, "step": 4280000 }, { "epoch": 12.39, "learning_rate": 4.380712619257132e-05, "loss": 2.3054, "step": 4280500 }, { "epoch": 12.39, "learning_rate": 4.3806402544924044e-05, "loss": 2.3001, "step": 4281000 }, { "epoch": 12.39, "learning_rate": 4.380567889727677e-05, "loss": 2.2936, "step": 4281500 }, { "epoch": 12.39, "learning_rate": 4.3804955249629495e-05, "loss": 2.2858, "step": 4282000 }, { "epoch": 12.4, "learning_rate": 4.380423160198222e-05, "loss": 2.3144, "step": 4282500 }, { "epoch": 12.4, "learning_rate": 4.3803507954334946e-05, "loss": 2.3094, "step": 4283000 }, { "epoch": 12.4, "learning_rate": 4.380278575398296e-05, "loss": 2.2685, "step": 4283500 }, { "epoch": 12.4, "learning_rate": 4.380206355363098e-05, "loss": 2.3107, "step": 4284000 }, { "epoch": 12.4, "learning_rate": 4.38013399059837e-05, "loss": 2.3217, "step": 4284500 }, { "epoch": 12.4, "learning_rate": 4.380061625833642e-05, "loss": 2.2973, "step": 4285000 }, { "epoch": 12.4, "learning_rate": 4.3799892610689144e-05, "loss": 2.2988, "step": 4285500 }, { "epoch": 12.41, "learning_rate": 4.379916896304187e-05, "loss": 2.2992, "step": 4286000 }, { "epoch": 12.41, "learning_rate": 4.3798445315394595e-05, "loss": 2.2805, "step": 4286500 }, { "epoch": 12.41, "learning_rate": 4.379772166774732e-05, "loss": 2.2965, "step": 4287000 }, { "epoch": 12.41, "learning_rate": 4.379699802010004e-05, "loss": 2.2998, "step": 4287500 }, { "epoch": 12.41, "learning_rate": 4.379627437245276e-05, "loss": 2.2934, "step": 4288000 }, { "epoch": 12.41, "learning_rate": 4.379555217210078e-05, "loss": 2.2849, "step": 4288500 }, { "epoch": 12.41, "learning_rate": 4.37948285244535e-05, "loss": 2.2734, "step": 4289000 }, { "epoch": 12.42, "learning_rate": 4.379410487680622e-05, "loss": 2.3122, "step": 4289500 }, { "epoch": 12.42, "learning_rate": 4.3793381229158944e-05, "loss": 2.2944, "step": 4290000 }, { "epoch": 12.42, "learning_rate": 4.379265758151167e-05, "loss": 2.2777, "step": 4290500 }, { "epoch": 12.42, "learning_rate": 4.3791933933864396e-05, "loss": 2.2829, "step": 4291000 }, { "epoch": 12.42, "learning_rate": 4.3791210286217125e-05, "loss": 2.2629, "step": 4291500 }, { "epoch": 12.42, "learning_rate": 4.379048808586514e-05, "loss": 2.2875, "step": 4292000 }, { "epoch": 12.43, "learning_rate": 4.3789765885513156e-05, "loss": 2.3145, "step": 4292500 }, { "epoch": 12.43, "learning_rate": 4.378904368516117e-05, "loss": 2.2983, "step": 4293000 }, { "epoch": 12.43, "learning_rate": 4.37883200375139e-05, "loss": 2.2925, "step": 4293500 }, { "epoch": 12.43, "learning_rate": 4.378759638986662e-05, "loss": 2.2872, "step": 4294000 }, { "epoch": 12.43, "learning_rate": 4.3786872742219345e-05, "loss": 2.2608, "step": 4294500 }, { "epoch": 12.43, "learning_rate": 4.378614909457207e-05, "loss": 2.3221, "step": 4295000 }, { "epoch": 12.43, "learning_rate": 4.378542544692479e-05, "loss": 2.2951, "step": 4295500 }, { "epoch": 12.44, "learning_rate": 4.378470179927751e-05, "loss": 2.2951, "step": 4296000 }, { "epoch": 12.44, "learning_rate": 4.3783978151630234e-05, "loss": 2.2808, "step": 4296500 }, { "epoch": 12.44, "learning_rate": 4.3783254503982956e-05, "loss": 2.2651, "step": 4297000 }, { "epoch": 12.44, "learning_rate": 4.378253085633568e-05, "loss": 2.2701, "step": 4297500 }, { "epoch": 12.44, "learning_rate": 4.378180720868841e-05, "loss": 2.2589, "step": 4298000 }, { "epoch": 12.44, "learning_rate": 4.378108356104113e-05, "loss": 2.3159, "step": 4298500 }, { "epoch": 12.44, "learning_rate": 4.378035991339385e-05, "loss": 2.2782, "step": 4299000 }, { "epoch": 12.45, "learning_rate": 4.3779636265746574e-05, "loss": 2.3126, "step": 4299500 }, { "epoch": 12.45, "learning_rate": 4.3778912618099296e-05, "loss": 2.2858, "step": 4300000 }, { "epoch": 12.45, "learning_rate": 4.377819041774732e-05, "loss": 2.2502, "step": 4300500 }, { "epoch": 12.45, "learning_rate": 4.377746677010004e-05, "loss": 2.272, "step": 4301000 }, { "epoch": 12.45, "learning_rate": 4.377674312245276e-05, "loss": 2.2904, "step": 4301500 }, { "epoch": 12.45, "learning_rate": 4.3776019474805485e-05, "loss": 2.2798, "step": 4302000 }, { "epoch": 12.45, "learning_rate": 4.37752972744535e-05, "loss": 2.2648, "step": 4302500 }, { "epoch": 12.46, "learning_rate": 4.377457362680622e-05, "loss": 2.3037, "step": 4303000 }, { "epoch": 12.46, "learning_rate": 4.3773849979158945e-05, "loss": 2.2826, "step": 4303500 }, { "epoch": 12.46, "learning_rate": 4.3773126331511674e-05, "loss": 2.2888, "step": 4304000 }, { "epoch": 12.46, "learning_rate": 4.377240413115969e-05, "loss": 2.291, "step": 4304500 }, { "epoch": 12.46, "learning_rate": 4.377168048351241e-05, "loss": 2.2989, "step": 4305000 }, { "epoch": 12.46, "learning_rate": 4.377095683586514e-05, "loss": 2.3125, "step": 4305500 }, { "epoch": 12.46, "learning_rate": 4.377023318821786e-05, "loss": 2.2893, "step": 4306000 }, { "epoch": 12.47, "learning_rate": 4.376951098786588e-05, "loss": 2.2844, "step": 4306500 }, { "epoch": 12.47, "learning_rate": 4.37687873402186e-05, "loss": 2.281, "step": 4307000 }, { "epoch": 12.47, "learning_rate": 4.376806369257132e-05, "loss": 2.2933, "step": 4307500 }, { "epoch": 12.47, "learning_rate": 4.376734004492405e-05, "loss": 2.2864, "step": 4308000 }, { "epoch": 12.47, "learning_rate": 4.3766616397276774e-05, "loss": 2.2789, "step": 4308500 }, { "epoch": 12.47, "learning_rate": 4.37658927496295e-05, "loss": 2.3079, "step": 4309000 }, { "epoch": 12.47, "learning_rate": 4.376516910198222e-05, "loss": 2.2919, "step": 4309500 }, { "epoch": 12.48, "learning_rate": 4.376444545433494e-05, "loss": 2.2878, "step": 4310000 }, { "epoch": 12.48, "learning_rate": 4.376372180668766e-05, "loss": 2.2617, "step": 4310500 }, { "epoch": 12.48, "learning_rate": 4.376299960633568e-05, "loss": 2.2949, "step": 4311000 }, { "epoch": 12.48, "learning_rate": 4.37622759586884e-05, "loss": 2.2948, "step": 4311500 }, { "epoch": 12.48, "learning_rate": 4.376155231104112e-05, "loss": 2.3232, "step": 4312000 }, { "epoch": 12.48, "learning_rate": 4.376082866339385e-05, "loss": 2.2472, "step": 4312500 }, { "epoch": 12.48, "learning_rate": 4.3760106463041875e-05, "loss": 2.3035, "step": 4313000 }, { "epoch": 12.49, "learning_rate": 4.375938426268989e-05, "loss": 2.2912, "step": 4313500 }, { "epoch": 12.49, "learning_rate": 4.375866061504261e-05, "loss": 2.2843, "step": 4314000 }, { "epoch": 12.49, "learning_rate": 4.3757936967395335e-05, "loss": 2.2973, "step": 4314500 }, { "epoch": 12.49, "learning_rate": 4.375721331974806e-05, "loss": 2.2993, "step": 4315000 }, { "epoch": 12.49, "learning_rate": 4.375648967210078e-05, "loss": 2.2901, "step": 4315500 }, { "epoch": 12.49, "learning_rate": 4.37557660244535e-05, "loss": 2.2945, "step": 4316000 }, { "epoch": 12.49, "learning_rate": 4.3755043824101524e-05, "loss": 2.3334, "step": 4316500 }, { "epoch": 12.5, "learning_rate": 4.375432162374954e-05, "loss": 2.2781, "step": 4317000 }, { "epoch": 12.5, "learning_rate": 4.375359797610226e-05, "loss": 2.2797, "step": 4317500 }, { "epoch": 12.5, "learning_rate": 4.3752874328454984e-05, "loss": 2.3117, "step": 4318000 }, { "epoch": 12.5, "learning_rate": 4.3752150680807706e-05, "loss": 2.3002, "step": 4318500 }, { "epoch": 12.5, "learning_rate": 4.375142703316043e-05, "loss": 2.3118, "step": 4319000 }, { "epoch": 12.5, "learning_rate": 4.375070338551315e-05, "loss": 2.2872, "step": 4319500 }, { "epoch": 12.5, "learning_rate": 4.374997973786587e-05, "loss": 2.2947, "step": 4320000 }, { "epoch": 12.51, "learning_rate": 4.37492560902186e-05, "loss": 2.3056, "step": 4320500 }, { "epoch": 12.51, "learning_rate": 4.3748532442571324e-05, "loss": 2.2945, "step": 4321000 }, { "epoch": 12.51, "learning_rate": 4.374780879492405e-05, "loss": 2.3089, "step": 4321500 }, { "epoch": 12.51, "learning_rate": 4.3747085147276775e-05, "loss": 2.2785, "step": 4322000 }, { "epoch": 12.51, "learning_rate": 4.37463614996295e-05, "loss": 2.293, "step": 4322500 }, { "epoch": 12.51, "learning_rate": 4.374563929927751e-05, "loss": 2.2935, "step": 4323000 }, { "epoch": 12.51, "learning_rate": 4.3744915651630235e-05, "loss": 2.309, "step": 4323500 }, { "epoch": 12.52, "learning_rate": 4.374419200398296e-05, "loss": 2.2773, "step": 4324000 }, { "epoch": 12.52, "learning_rate": 4.374346835633568e-05, "loss": 2.2956, "step": 4324500 }, { "epoch": 12.52, "learning_rate": 4.37427447086884e-05, "loss": 2.2785, "step": 4325000 }, { "epoch": 12.52, "learning_rate": 4.3742022508336424e-05, "loss": 2.2899, "step": 4325500 }, { "epoch": 12.52, "learning_rate": 4.3741298860689146e-05, "loss": 2.3016, "step": 4326000 }, { "epoch": 12.52, "learning_rate": 4.374057521304187e-05, "loss": 2.2642, "step": 4326500 }, { "epoch": 12.52, "learning_rate": 4.373985156539459e-05, "loss": 2.2864, "step": 4327000 }, { "epoch": 12.53, "learning_rate": 4.373912791774731e-05, "loss": 2.2883, "step": 4327500 }, { "epoch": 12.53, "learning_rate": 4.373840571739533e-05, "loss": 2.2712, "step": 4328000 }, { "epoch": 12.53, "learning_rate": 4.373768206974806e-05, "loss": 2.2953, "step": 4328500 }, { "epoch": 12.53, "learning_rate": 4.373695842210078e-05, "loss": 2.2799, "step": 4329000 }, { "epoch": 12.53, "learning_rate": 4.37362362217488e-05, "loss": 2.275, "step": 4329500 }, { "epoch": 12.53, "learning_rate": 4.3735512574101525e-05, "loss": 2.2959, "step": 4330000 }, { "epoch": 12.54, "learning_rate": 4.373478892645425e-05, "loss": 2.2691, "step": 4330500 }, { "epoch": 12.54, "learning_rate": 4.373406672610226e-05, "loss": 2.2796, "step": 4331000 }, { "epoch": 12.54, "learning_rate": 4.3733343078454985e-05, "loss": 2.3167, "step": 4331500 }, { "epoch": 12.54, "learning_rate": 4.373261943080771e-05, "loss": 2.3108, "step": 4332000 }, { "epoch": 12.54, "learning_rate": 4.373189578316043e-05, "loss": 2.2748, "step": 4332500 }, { "epoch": 12.54, "learning_rate": 4.373117213551315e-05, "loss": 2.305, "step": 4333000 }, { "epoch": 12.54, "learning_rate": 4.373044848786588e-05, "loss": 2.2748, "step": 4333500 }, { "epoch": 12.55, "learning_rate": 4.37297248402186e-05, "loss": 2.2756, "step": 4334000 }, { "epoch": 12.55, "learning_rate": 4.3729001192571325e-05, "loss": 2.3068, "step": 4334500 }, { "epoch": 12.55, "learning_rate": 4.372827754492405e-05, "loss": 2.2863, "step": 4335000 }, { "epoch": 12.55, "learning_rate": 4.372755534457206e-05, "loss": 2.278, "step": 4335500 }, { "epoch": 12.55, "learning_rate": 4.372683169692479e-05, "loss": 2.2861, "step": 4336000 }, { "epoch": 12.55, "learning_rate": 4.3726108049277514e-05, "loss": 2.3018, "step": 4336500 }, { "epoch": 12.55, "learning_rate": 4.3725384401630236e-05, "loss": 2.2777, "step": 4337000 }, { "epoch": 12.56, "learning_rate": 4.372466075398296e-05, "loss": 2.2864, "step": 4337500 }, { "epoch": 12.56, "learning_rate": 4.372393710633568e-05, "loss": 2.2723, "step": 4338000 }, { "epoch": 12.56, "learning_rate": 4.37232134586884e-05, "loss": 2.2809, "step": 4338500 }, { "epoch": 12.56, "learning_rate": 4.3722491258336425e-05, "loss": 2.301, "step": 4339000 }, { "epoch": 12.56, "learning_rate": 4.372176761068915e-05, "loss": 2.2871, "step": 4339500 }, { "epoch": 12.56, "learning_rate": 4.372104396304187e-05, "loss": 2.2922, "step": 4340000 }, { "epoch": 12.56, "learning_rate": 4.372032031539459e-05, "loss": 2.2903, "step": 4340500 }, { "epoch": 12.57, "learning_rate": 4.3719596667747314e-05, "loss": 2.2703, "step": 4341000 }, { "epoch": 12.57, "learning_rate": 4.3718873020100036e-05, "loss": 2.3063, "step": 4341500 }, { "epoch": 12.57, "learning_rate": 4.371814937245276e-05, "loss": 2.2859, "step": 4342000 }, { "epoch": 12.57, "learning_rate": 4.371742717210078e-05, "loss": 2.2946, "step": 4342500 }, { "epoch": 12.57, "learning_rate": 4.37167035244535e-05, "loss": 2.294, "step": 4343000 }, { "epoch": 12.57, "learning_rate": 4.371597987680623e-05, "loss": 2.288, "step": 4343500 }, { "epoch": 12.57, "learning_rate": 4.3715256229158954e-05, "loss": 2.2939, "step": 4344000 }, { "epoch": 12.58, "learning_rate": 4.3714532581511676e-05, "loss": 2.3053, "step": 4344500 }, { "epoch": 12.58, "learning_rate": 4.37138089338644e-05, "loss": 2.2817, "step": 4345000 }, { "epoch": 12.58, "learning_rate": 4.3713086733512414e-05, "loss": 2.2955, "step": 4345500 }, { "epoch": 12.58, "learning_rate": 4.3712363085865137e-05, "loss": 2.2824, "step": 4346000 }, { "epoch": 12.58, "learning_rate": 4.371163943821786e-05, "loss": 2.3114, "step": 4346500 }, { "epoch": 12.58, "learning_rate": 4.371091579057058e-05, "loss": 2.3125, "step": 4347000 }, { "epoch": 12.58, "learning_rate": 4.37101921429233e-05, "loss": 2.2592, "step": 4347500 }, { "epoch": 12.59, "learning_rate": 4.370946849527603e-05, "loss": 2.2719, "step": 4348000 }, { "epoch": 12.59, "learning_rate": 4.3708744847628754e-05, "loss": 2.3107, "step": 4348500 }, { "epoch": 12.59, "learning_rate": 4.370802119998148e-05, "loss": 2.3019, "step": 4349000 }, { "epoch": 12.59, "learning_rate": 4.37072975523342e-05, "loss": 2.2855, "step": 4349500 }, { "epoch": 12.59, "learning_rate": 4.370657390468692e-05, "loss": 2.2934, "step": 4350000 }, { "epoch": 12.59, "learning_rate": 4.3705851704334943e-05, "loss": 2.2843, "step": 4350500 }, { "epoch": 12.59, "learning_rate": 4.3705128056687666e-05, "loss": 2.2804, "step": 4351000 }, { "epoch": 12.6, "learning_rate": 4.370440440904039e-05, "loss": 2.2944, "step": 4351500 }, { "epoch": 12.6, "learning_rate": 4.370368076139311e-05, "loss": 2.2821, "step": 4352000 }, { "epoch": 12.6, "learning_rate": 4.370295711374583e-05, "loss": 2.2736, "step": 4352500 }, { "epoch": 12.6, "learning_rate": 4.3702234913393855e-05, "loss": 2.3085, "step": 4353000 }, { "epoch": 12.6, "learning_rate": 4.370151126574658e-05, "loss": 2.2868, "step": 4353500 }, { "epoch": 12.6, "learning_rate": 4.37007876180993e-05, "loss": 2.2909, "step": 4354000 }, { "epoch": 12.6, "learning_rate": 4.370006397045202e-05, "loss": 2.276, "step": 4354500 }, { "epoch": 12.61, "learning_rate": 4.3699340322804744e-05, "loss": 2.277, "step": 4355000 }, { "epoch": 12.61, "learning_rate": 4.3698616675157466e-05, "loss": 2.2799, "step": 4355500 }, { "epoch": 12.61, "learning_rate": 4.369789302751019e-05, "loss": 2.2776, "step": 4356000 }, { "epoch": 12.61, "learning_rate": 4.369716937986291e-05, "loss": 2.2956, "step": 4356500 }, { "epoch": 12.61, "learning_rate": 4.369644717951093e-05, "loss": 2.3011, "step": 4357000 }, { "epoch": 12.61, "learning_rate": 4.3695723531863655e-05, "loss": 2.286, "step": 4357500 }, { "epoch": 12.61, "learning_rate": 4.3694999884216384e-05, "loss": 2.2812, "step": 4358000 }, { "epoch": 12.62, "learning_rate": 4.36942776838644e-05, "loss": 2.2878, "step": 4358500 }, { "epoch": 12.62, "learning_rate": 4.369355403621712e-05, "loss": 2.283, "step": 4359000 }, { "epoch": 12.62, "learning_rate": 4.3692830388569844e-05, "loss": 2.2832, "step": 4359500 }, { "epoch": 12.62, "learning_rate": 4.3692106740922566e-05, "loss": 2.3103, "step": 4360000 }, { "epoch": 12.62, "learning_rate": 4.369138309327529e-05, "loss": 2.2841, "step": 4360500 }, { "epoch": 12.62, "learning_rate": 4.369066089292331e-05, "loss": 2.282, "step": 4361000 }, { "epoch": 12.62, "learning_rate": 4.368993724527603e-05, "loss": 2.2902, "step": 4361500 }, { "epoch": 12.63, "learning_rate": 4.3689213597628755e-05, "loss": 2.2707, "step": 4362000 }, { "epoch": 12.63, "learning_rate": 4.368848994998148e-05, "loss": 2.2999, "step": 4362500 }, { "epoch": 12.63, "learning_rate": 4.36877663023342e-05, "loss": 2.2977, "step": 4363000 }, { "epoch": 12.63, "learning_rate": 4.368704265468692e-05, "loss": 2.296, "step": 4363500 }, { "epoch": 12.63, "learning_rate": 4.3686319007039644e-05, "loss": 2.2952, "step": 4364000 }, { "epoch": 12.63, "learning_rate": 4.3685595359392366e-05, "loss": 2.3198, "step": 4364500 }, { "epoch": 12.63, "learning_rate": 4.3684871711745095e-05, "loss": 2.304, "step": 4365000 }, { "epoch": 12.64, "learning_rate": 4.368414806409782e-05, "loss": 2.2931, "step": 4365500 }, { "epoch": 12.64, "learning_rate": 4.368342586374583e-05, "loss": 2.2641, "step": 4366000 }, { "epoch": 12.64, "learning_rate": 4.3682702216098555e-05, "loss": 2.2951, "step": 4366500 }, { "epoch": 12.64, "learning_rate": 4.3681978568451284e-05, "loss": 2.2963, "step": 4367000 }, { "epoch": 12.64, "learning_rate": 4.36812563680993e-05, "loss": 2.3162, "step": 4367500 }, { "epoch": 12.64, "learning_rate": 4.3680534167747316e-05, "loss": 2.2933, "step": 4368000 }, { "epoch": 12.65, "learning_rate": 4.367981052010004e-05, "loss": 2.2692, "step": 4368500 }, { "epoch": 12.65, "learning_rate": 4.367908687245276e-05, "loss": 2.2864, "step": 4369000 }, { "epoch": 12.65, "learning_rate": 4.367836322480548e-05, "loss": 2.3025, "step": 4369500 }, { "epoch": 12.65, "learning_rate": 4.367763957715821e-05, "loss": 2.2734, "step": 4370000 }, { "epoch": 12.65, "learning_rate": 4.3676915929510934e-05, "loss": 2.2734, "step": 4370500 }, { "epoch": 12.65, "learning_rate": 4.3676192281863656e-05, "loss": 2.2531, "step": 4371000 }, { "epoch": 12.65, "learning_rate": 4.367546863421638e-05, "loss": 2.2737, "step": 4371500 }, { "epoch": 12.66, "learning_rate": 4.36747449865691e-05, "loss": 2.2671, "step": 4372000 }, { "epoch": 12.66, "learning_rate": 4.367402133892182e-05, "loss": 2.3104, "step": 4372500 }, { "epoch": 12.66, "learning_rate": 4.3673299138569845e-05, "loss": 2.2917, "step": 4373000 }, { "epoch": 12.66, "learning_rate": 4.367257549092257e-05, "loss": 2.2974, "step": 4373500 }, { "epoch": 12.66, "learning_rate": 4.367185184327529e-05, "loss": 2.3046, "step": 4374000 }, { "epoch": 12.66, "learning_rate": 4.367112819562801e-05, "loss": 2.282, "step": 4374500 }, { "epoch": 12.66, "learning_rate": 4.3670404547980734e-05, "loss": 2.3, "step": 4375000 }, { "epoch": 12.67, "learning_rate": 4.366968090033346e-05, "loss": 2.3216, "step": 4375500 }, { "epoch": 12.67, "learning_rate": 4.3668957252686185e-05, "loss": 2.265, "step": 4376000 }, { "epoch": 12.67, "learning_rate": 4.36682350523342e-05, "loss": 2.2926, "step": 4376500 }, { "epoch": 12.67, "learning_rate": 4.366751140468692e-05, "loss": 2.2926, "step": 4377000 }, { "epoch": 12.67, "learning_rate": 4.3666787757039645e-05, "loss": 2.2822, "step": 4377500 }, { "epoch": 12.67, "learning_rate": 4.366606410939237e-05, "loss": 2.2709, "step": 4378000 }, { "epoch": 12.67, "learning_rate": 4.366534046174509e-05, "loss": 2.2895, "step": 4378500 }, { "epoch": 12.68, "learning_rate": 4.366461681409781e-05, "loss": 2.2882, "step": 4379000 }, { "epoch": 12.68, "learning_rate": 4.3663894613745834e-05, "loss": 2.3158, "step": 4379500 }, { "epoch": 12.68, "learning_rate": 4.3663170966098556e-05, "loss": 2.2652, "step": 4380000 }, { "epoch": 12.68, "learning_rate": 4.3662447318451285e-05, "loss": 2.2897, "step": 4380500 }, { "epoch": 12.68, "learning_rate": 4.366172367080401e-05, "loss": 2.2783, "step": 4381000 }, { "epoch": 12.68, "learning_rate": 4.366100002315673e-05, "loss": 2.2994, "step": 4381500 }, { "epoch": 12.68, "learning_rate": 4.366027637550945e-05, "loss": 2.3178, "step": 4382000 }, { "epoch": 12.69, "learning_rate": 4.3659552727862174e-05, "loss": 2.3046, "step": 4382500 }, { "epoch": 12.69, "learning_rate": 4.3658829080214896e-05, "loss": 2.2897, "step": 4383000 }, { "epoch": 12.69, "learning_rate": 4.365810543256762e-05, "loss": 2.2789, "step": 4383500 }, { "epoch": 12.69, "learning_rate": 4.365738178492034e-05, "loss": 2.2895, "step": 4384000 }, { "epoch": 12.69, "learning_rate": 4.365665813727306e-05, "loss": 2.2846, "step": 4384500 }, { "epoch": 12.69, "learning_rate": 4.3655935936921085e-05, "loss": 2.2861, "step": 4385000 }, { "epoch": 12.69, "learning_rate": 4.365521228927381e-05, "loss": 2.2909, "step": 4385500 }, { "epoch": 12.7, "learning_rate": 4.365449008892182e-05, "loss": 2.2812, "step": 4386000 }, { "epoch": 12.7, "learning_rate": 4.3653766441274545e-05, "loss": 2.2979, "step": 4386500 }, { "epoch": 12.7, "learning_rate": 4.365304279362727e-05, "loss": 2.3065, "step": 4387000 }, { "epoch": 12.7, "learning_rate": 4.365231914598e-05, "loss": 2.303, "step": 4387500 }, { "epoch": 12.7, "learning_rate": 4.365159549833272e-05, "loss": 2.2735, "step": 4388000 }, { "epoch": 12.7, "learning_rate": 4.365087185068544e-05, "loss": 2.276, "step": 4388500 }, { "epoch": 12.7, "learning_rate": 4.3650148203038163e-05, "loss": 2.3068, "step": 4389000 }, { "epoch": 12.71, "learning_rate": 4.3649426002686186e-05, "loss": 2.3006, "step": 4389500 }, { "epoch": 12.71, "learning_rate": 4.364870235503891e-05, "loss": 2.2823, "step": 4390000 }, { "epoch": 12.71, "learning_rate": 4.364797870739163e-05, "loss": 2.2985, "step": 4390500 }, { "epoch": 12.71, "learning_rate": 4.364725505974435e-05, "loss": 2.2829, "step": 4391000 }, { "epoch": 12.71, "learning_rate": 4.364653285939237e-05, "loss": 2.2631, "step": 4391500 }, { "epoch": 12.71, "learning_rate": 4.364580921174509e-05, "loss": 2.2863, "step": 4392000 }, { "epoch": 12.71, "learning_rate": 4.364508556409781e-05, "loss": 2.3034, "step": 4392500 }, { "epoch": 12.72, "learning_rate": 4.3644361916450535e-05, "loss": 2.2801, "step": 4393000 }, { "epoch": 12.72, "learning_rate": 4.3643638268803264e-05, "loss": 2.3062, "step": 4393500 }, { "epoch": 12.72, "learning_rate": 4.3642914621155986e-05, "loss": 2.3062, "step": 4394000 }, { "epoch": 12.72, "learning_rate": 4.364219097350871e-05, "loss": 2.2737, "step": 4394500 }, { "epoch": 12.72, "learning_rate": 4.364146732586144e-05, "loss": 2.3031, "step": 4395000 }, { "epoch": 12.72, "learning_rate": 4.364074367821416e-05, "loss": 2.3045, "step": 4395500 }, { "epoch": 12.72, "learning_rate": 4.364002292515747e-05, "loss": 2.2611, "step": 4396000 }, { "epoch": 12.73, "learning_rate": 4.363929927751019e-05, "loss": 2.3004, "step": 4396500 }, { "epoch": 12.73, "learning_rate": 4.363857562986291e-05, "loss": 2.2865, "step": 4397000 }, { "epoch": 12.73, "learning_rate": 4.363785198221564e-05, "loss": 2.309, "step": 4397500 }, { "epoch": 12.73, "learning_rate": 4.3637128334568364e-05, "loss": 2.2951, "step": 4398000 }, { "epoch": 12.73, "learning_rate": 4.3636404686921086e-05, "loss": 2.2856, "step": 4398500 }, { "epoch": 12.73, "learning_rate": 4.363568103927381e-05, "loss": 2.2962, "step": 4399000 }, { "epoch": 12.73, "learning_rate": 4.363495739162653e-05, "loss": 2.2809, "step": 4399500 }, { "epoch": 12.74, "learning_rate": 4.3634235191274546e-05, "loss": 2.2879, "step": 4400000 }, { "epoch": 12.74, "learning_rate": 4.363351154362727e-05, "loss": 2.2759, "step": 4400500 }, { "epoch": 12.74, "learning_rate": 4.363278789597999e-05, "loss": 2.2959, "step": 4401000 }, { "epoch": 12.74, "learning_rate": 4.363206569562801e-05, "loss": 2.3067, "step": 4401500 }, { "epoch": 12.74, "learning_rate": 4.3631342047980735e-05, "loss": 2.2855, "step": 4402000 }, { "epoch": 12.74, "learning_rate": 4.363061840033346e-05, "loss": 2.2979, "step": 4402500 }, { "epoch": 12.74, "learning_rate": 4.3629894752686187e-05, "loss": 2.2814, "step": 4403000 }, { "epoch": 12.75, "learning_rate": 4.362917110503891e-05, "loss": 2.2698, "step": 4403500 }, { "epoch": 12.75, "learning_rate": 4.362844745739163e-05, "loss": 2.2975, "step": 4404000 }, { "epoch": 12.75, "learning_rate": 4.3627725257039647e-05, "loss": 2.3056, "step": 4404500 }, { "epoch": 12.75, "learning_rate": 4.362700160939237e-05, "loss": 2.2831, "step": 4405000 }, { "epoch": 12.75, "learning_rate": 4.362627796174509e-05, "loss": 2.3016, "step": 4405500 }, { "epoch": 12.75, "learning_rate": 4.362555431409781e-05, "loss": 2.2749, "step": 4406000 }, { "epoch": 12.76, "learning_rate": 4.362483066645054e-05, "loss": 2.2735, "step": 4406500 }, { "epoch": 12.76, "learning_rate": 4.3624107018803265e-05, "loss": 2.2926, "step": 4407000 }, { "epoch": 12.76, "learning_rate": 4.362338481845128e-05, "loss": 2.3031, "step": 4407500 }, { "epoch": 12.76, "learning_rate": 4.3622661170804e-05, "loss": 2.2725, "step": 4408000 }, { "epoch": 12.76, "learning_rate": 4.3621937523156725e-05, "loss": 2.3019, "step": 4408500 }, { "epoch": 12.76, "learning_rate": 4.362121387550945e-05, "loss": 2.3157, "step": 4409000 }, { "epoch": 12.76, "learning_rate": 4.362049022786217e-05, "loss": 2.2883, "step": 4409500 }, { "epoch": 12.77, "learning_rate": 4.36197665802149e-05, "loss": 2.3094, "step": 4410000 }, { "epoch": 12.77, "learning_rate": 4.361904293256762e-05, "loss": 2.2848, "step": 4410500 }, { "epoch": 12.77, "learning_rate": 4.361831928492034e-05, "loss": 2.2813, "step": 4411000 }, { "epoch": 12.77, "learning_rate": 4.3617597084568365e-05, "loss": 2.2721, "step": 4411500 }, { "epoch": 12.77, "learning_rate": 4.361687343692109e-05, "loss": 2.2934, "step": 4412000 }, { "epoch": 12.77, "learning_rate": 4.36161512365691e-05, "loss": 2.3088, "step": 4412500 }, { "epoch": 12.77, "learning_rate": 4.3615427588921825e-05, "loss": 2.2838, "step": 4413000 }, { "epoch": 12.78, "learning_rate": 4.361470394127455e-05, "loss": 2.2762, "step": 4413500 }, { "epoch": 12.78, "learning_rate": 4.361398174092256e-05, "loss": 2.2809, "step": 4414000 }, { "epoch": 12.78, "learning_rate": 4.361325809327529e-05, "loss": 2.2775, "step": 4414500 }, { "epoch": 12.78, "learning_rate": 4.3612534445628014e-05, "loss": 2.2707, "step": 4415000 }, { "epoch": 12.78, "learning_rate": 4.3611810797980736e-05, "loss": 2.3035, "step": 4415500 }, { "epoch": 12.78, "learning_rate": 4.361108715033346e-05, "loss": 2.2763, "step": 4416000 }, { "epoch": 12.78, "learning_rate": 4.361036350268618e-05, "loss": 2.3049, "step": 4416500 }, { "epoch": 12.79, "learning_rate": 4.36096398550389e-05, "loss": 2.3143, "step": 4417000 }, { "epoch": 12.79, "learning_rate": 4.360891620739163e-05, "loss": 2.2845, "step": 4417500 }, { "epoch": 12.79, "learning_rate": 4.3608192559744354e-05, "loss": 2.3283, "step": 4418000 }, { "epoch": 12.79, "learning_rate": 4.3607468912097076e-05, "loss": 2.2652, "step": 4418500 }, { "epoch": 12.79, "learning_rate": 4.360674815904039e-05, "loss": 2.2945, "step": 4419000 }, { "epoch": 12.79, "learning_rate": 4.3606024511393114e-05, "loss": 2.2833, "step": 4419500 }, { "epoch": 12.79, "learning_rate": 4.3605300863745836e-05, "loss": 2.302, "step": 4420000 }, { "epoch": 12.8, "learning_rate": 4.360457721609856e-05, "loss": 2.2676, "step": 4420500 }, { "epoch": 12.8, "learning_rate": 4.360385356845128e-05, "loss": 2.2876, "step": 4421000 }, { "epoch": 12.8, "learning_rate": 4.3603129920804e-05, "loss": 2.2745, "step": 4421500 }, { "epoch": 12.8, "learning_rate": 4.3602406273156725e-05, "loss": 2.3025, "step": 4422000 }, { "epoch": 12.8, "learning_rate": 4.360168262550945e-05, "loss": 2.2954, "step": 4422500 }, { "epoch": 12.8, "learning_rate": 4.360095897786217e-05, "loss": 2.284, "step": 4423000 }, { "epoch": 12.8, "learning_rate": 4.360023677751019e-05, "loss": 2.2739, "step": 4423500 }, { "epoch": 12.81, "learning_rate": 4.3599513129862914e-05, "loss": 2.2861, "step": 4424000 }, { "epoch": 12.81, "learning_rate": 4.3598789482215637e-05, "loss": 2.2708, "step": 4424500 }, { "epoch": 12.81, "learning_rate": 4.3598065834568366e-05, "loss": 2.2727, "step": 4425000 }, { "epoch": 12.81, "learning_rate": 4.359734363421638e-05, "loss": 2.2873, "step": 4425500 }, { "epoch": 12.81, "learning_rate": 4.359662288115969e-05, "loss": 2.3176, "step": 4426000 }, { "epoch": 12.81, "learning_rate": 4.359589923351242e-05, "loss": 2.286, "step": 4426500 }, { "epoch": 12.81, "learning_rate": 4.359517558586514e-05, "loss": 2.3087, "step": 4427000 }, { "epoch": 12.82, "learning_rate": 4.3594451938217864e-05, "loss": 2.2774, "step": 4427500 }, { "epoch": 12.82, "learning_rate": 4.3593728290570586e-05, "loss": 2.2544, "step": 4428000 }, { "epoch": 12.82, "learning_rate": 4.359300464292331e-05, "loss": 2.291, "step": 4428500 }, { "epoch": 12.82, "learning_rate": 4.359228099527603e-05, "loss": 2.2947, "step": 4429000 }, { "epoch": 12.82, "learning_rate": 4.359155734762875e-05, "loss": 2.3145, "step": 4429500 }, { "epoch": 12.82, "learning_rate": 4.3590833699981475e-05, "loss": 2.3209, "step": 4430000 }, { "epoch": 12.82, "learning_rate": 4.35901100523342e-05, "loss": 2.2688, "step": 4430500 }, { "epoch": 12.83, "learning_rate": 4.358938640468692e-05, "loss": 2.2786, "step": 4431000 }, { "epoch": 12.83, "learning_rate": 4.358866275703964e-05, "loss": 2.2736, "step": 4431500 }, { "epoch": 12.83, "learning_rate": 4.358793910939237e-05, "loss": 2.2968, "step": 4432000 }, { "epoch": 12.83, "learning_rate": 4.358721546174509e-05, "loss": 2.282, "step": 4432500 }, { "epoch": 12.83, "learning_rate": 4.358649181409782e-05, "loss": 2.2922, "step": 4433000 }, { "epoch": 12.83, "learning_rate": 4.3585768166450544e-05, "loss": 2.3045, "step": 4433500 }, { "epoch": 12.83, "learning_rate": 4.3585044518803266e-05, "loss": 2.2791, "step": 4434000 }, { "epoch": 12.84, "learning_rate": 4.358432231845128e-05, "loss": 2.307, "step": 4434500 }, { "epoch": 12.84, "learning_rate": 4.3583598670804004e-05, "loss": 2.2716, "step": 4435000 }, { "epoch": 12.84, "learning_rate": 4.3582875023156726e-05, "loss": 2.2893, "step": 4435500 }, { "epoch": 12.84, "learning_rate": 4.358215427010004e-05, "loss": 2.3072, "step": 4436000 }, { "epoch": 12.84, "learning_rate": 4.3581430622452764e-05, "loss": 2.2726, "step": 4436500 }, { "epoch": 12.84, "learning_rate": 4.3580706974805486e-05, "loss": 2.2981, "step": 4437000 }, { "epoch": 12.84, "learning_rate": 4.357998332715821e-05, "loss": 2.2811, "step": 4437500 }, { "epoch": 12.85, "learning_rate": 4.3579261126806224e-05, "loss": 2.2717, "step": 4438000 }, { "epoch": 12.85, "learning_rate": 4.3578537479158946e-05, "loss": 2.2845, "step": 4438500 }, { "epoch": 12.85, "learning_rate": 4.357781383151167e-05, "loss": 2.2851, "step": 4439000 }, { "epoch": 12.85, "learning_rate": 4.357709018386439e-05, "loss": 2.2978, "step": 4439500 }, { "epoch": 12.85, "learning_rate": 4.357636653621712e-05, "loss": 2.2972, "step": 4440000 }, { "epoch": 12.85, "learning_rate": 4.357564288856984e-05, "loss": 2.2827, "step": 4440500 }, { "epoch": 12.85, "learning_rate": 4.357491924092257e-05, "loss": 2.287, "step": 4441000 }, { "epoch": 12.86, "learning_rate": 4.3574197040570587e-05, "loss": 2.2737, "step": 4441500 }, { "epoch": 12.86, "learning_rate": 4.357347339292331e-05, "loss": 2.3074, "step": 4442000 }, { "epoch": 12.86, "learning_rate": 4.357274974527603e-05, "loss": 2.2981, "step": 4442500 }, { "epoch": 12.86, "learning_rate": 4.357202609762875e-05, "loss": 2.3185, "step": 4443000 }, { "epoch": 12.86, "learning_rate": 4.3571302449981475e-05, "loss": 2.2951, "step": 4443500 }, { "epoch": 12.86, "learning_rate": 4.35705788023342e-05, "loss": 2.2845, "step": 4444000 }, { "epoch": 12.87, "learning_rate": 4.356985515468692e-05, "loss": 2.3012, "step": 4444500 }, { "epoch": 12.87, "learning_rate": 4.356913150703964e-05, "loss": 2.2746, "step": 4445000 }, { "epoch": 12.87, "learning_rate": 4.356840785939237e-05, "loss": 2.2774, "step": 4445500 }, { "epoch": 12.87, "learning_rate": 4.3567684211745093e-05, "loss": 2.2789, "step": 4446000 }, { "epoch": 12.87, "learning_rate": 4.3566960564097816e-05, "loss": 2.2787, "step": 4446500 }, { "epoch": 12.87, "learning_rate": 4.356623836374583e-05, "loss": 2.2976, "step": 4447000 }, { "epoch": 12.87, "learning_rate": 4.3565514716098553e-05, "loss": 2.3033, "step": 4447500 }, { "epoch": 12.88, "learning_rate": 4.3564792515746576e-05, "loss": 2.2752, "step": 4448000 }, { "epoch": 12.88, "learning_rate": 4.35640688680993e-05, "loss": 2.2885, "step": 4448500 }, { "epoch": 12.88, "learning_rate": 4.356334522045202e-05, "loss": 2.3148, "step": 4449000 }, { "epoch": 12.88, "learning_rate": 4.356262157280475e-05, "loss": 2.3076, "step": 4449500 }, { "epoch": 12.88, "learning_rate": 4.356189792515747e-05, "loss": 2.2955, "step": 4450000 }, { "epoch": 12.88, "learning_rate": 4.3561174277510194e-05, "loss": 2.278, "step": 4450500 }, { "epoch": 12.88, "learning_rate": 4.3560450629862916e-05, "loss": 2.2934, "step": 4451000 }, { "epoch": 12.89, "learning_rate": 4.355972842951093e-05, "loss": 2.295, "step": 4451500 }, { "epoch": 12.89, "learning_rate": 4.3559004781863654e-05, "loss": 2.2765, "step": 4452000 }, { "epoch": 12.89, "learning_rate": 4.3558281134216376e-05, "loss": 2.3007, "step": 4452500 }, { "epoch": 12.89, "learning_rate": 4.35575574865691e-05, "loss": 2.3, "step": 4453000 }, { "epoch": 12.89, "learning_rate": 4.355683383892182e-05, "loss": 2.2803, "step": 4453500 }, { "epoch": 12.89, "learning_rate": 4.355611019127455e-05, "loss": 2.293, "step": 4454000 }, { "epoch": 12.89, "learning_rate": 4.355538654362727e-05, "loss": 2.3089, "step": 4454500 }, { "epoch": 12.9, "learning_rate": 4.355466289598e-05, "loss": 2.2824, "step": 4455000 }, { "epoch": 12.9, "learning_rate": 4.355393924833272e-05, "loss": 2.28, "step": 4455500 }, { "epoch": 12.9, "learning_rate": 4.3553215600685445e-05, "loss": 2.3017, "step": 4456000 }, { "epoch": 12.9, "learning_rate": 4.355249195303817e-05, "loss": 2.2649, "step": 4456500 }, { "epoch": 12.9, "learning_rate": 4.355176830539089e-05, "loss": 2.2809, "step": 4457000 }, { "epoch": 12.9, "learning_rate": 4.355104465774361e-05, "loss": 2.2821, "step": 4457500 }, { "epoch": 12.9, "learning_rate": 4.3550321010096334e-05, "loss": 2.2712, "step": 4458000 }, { "epoch": 12.91, "learning_rate": 4.3549597362449056e-05, "loss": 2.2864, "step": 4458500 }, { "epoch": 12.91, "learning_rate": 4.354887371480178e-05, "loss": 2.277, "step": 4459000 }, { "epoch": 12.91, "learning_rate": 4.35481500671545e-05, "loss": 2.298, "step": 4459500 }, { "epoch": 12.91, "learning_rate": 4.354742641950722e-05, "loss": 2.3088, "step": 4460000 }, { "epoch": 12.91, "learning_rate": 4.3546702771859945e-05, "loss": 2.2784, "step": 4460500 }, { "epoch": 12.91, "learning_rate": 4.354598057150797e-05, "loss": 2.298, "step": 4461000 }, { "epoch": 12.91, "learning_rate": 4.354525692386069e-05, "loss": 2.3199, "step": 4461500 }, { "epoch": 12.92, "learning_rate": 4.354453327621341e-05, "loss": 2.3146, "step": 4462000 }, { "epoch": 12.92, "learning_rate": 4.354380962856614e-05, "loss": 2.2779, "step": 4462500 }, { "epoch": 12.92, "learning_rate": 4.354308598091886e-05, "loss": 2.2835, "step": 4463000 }, { "epoch": 12.92, "learning_rate": 4.354236378056688e-05, "loss": 2.2597, "step": 4463500 }, { "epoch": 12.92, "learning_rate": 4.35416415802149e-05, "loss": 2.2979, "step": 4464000 }, { "epoch": 12.92, "learning_rate": 4.3540917932567623e-05, "loss": 2.3171, "step": 4464500 }, { "epoch": 12.92, "learning_rate": 4.3540194284920346e-05, "loss": 2.2654, "step": 4465000 }, { "epoch": 12.93, "learning_rate": 4.353947063727307e-05, "loss": 2.268, "step": 4465500 }, { "epoch": 12.93, "learning_rate": 4.3538748436921083e-05, "loss": 2.2823, "step": 4466000 }, { "epoch": 12.93, "learning_rate": 4.3538024789273806e-05, "loss": 2.2805, "step": 4466500 }, { "epoch": 12.93, "learning_rate": 4.353730114162653e-05, "loss": 2.287, "step": 4467000 }, { "epoch": 12.93, "learning_rate": 4.353657749397925e-05, "loss": 2.309, "step": 4467500 }, { "epoch": 12.93, "learning_rate": 4.353585384633197e-05, "loss": 2.2904, "step": 4468000 }, { "epoch": 12.93, "learning_rate": 4.3535131645979995e-05, "loss": 2.3005, "step": 4468500 }, { "epoch": 12.94, "learning_rate": 4.353440944562801e-05, "loss": 2.2781, "step": 4469000 }, { "epoch": 12.94, "learning_rate": 4.353368579798073e-05, "loss": 2.2991, "step": 4469500 }, { "epoch": 12.94, "learning_rate": 4.3532962150333455e-05, "loss": 2.3136, "step": 4470000 }, { "epoch": 12.94, "learning_rate": 4.3532238502686184e-05, "loss": 2.2814, "step": 4470500 }, { "epoch": 12.94, "learning_rate": 4.3531514855038906e-05, "loss": 2.3119, "step": 4471000 }, { "epoch": 12.94, "learning_rate": 4.353079120739163e-05, "loss": 2.2725, "step": 4471500 }, { "epoch": 12.94, "learning_rate": 4.353006755974435e-05, "loss": 2.3128, "step": 4472000 }, { "epoch": 12.95, "learning_rate": 4.352934391209707e-05, "loss": 2.282, "step": 4472500 }, { "epoch": 12.95, "learning_rate": 4.35286202644498e-05, "loss": 2.2856, "step": 4473000 }, { "epoch": 12.95, "learning_rate": 4.3527896616802524e-05, "loss": 2.2928, "step": 4473500 }, { "epoch": 12.95, "learning_rate": 4.3527172969155246e-05, "loss": 2.3122, "step": 4474000 }, { "epoch": 12.95, "learning_rate": 4.352645076880326e-05, "loss": 2.3056, "step": 4474500 }, { "epoch": 12.95, "learning_rate": 4.3525727121155984e-05, "loss": 2.2997, "step": 4475000 }, { "epoch": 12.95, "learning_rate": 4.3525003473508706e-05, "loss": 2.279, "step": 4475500 }, { "epoch": 12.96, "learning_rate": 4.352427982586143e-05, "loss": 2.2859, "step": 4476000 }, { "epoch": 12.96, "learning_rate": 4.352355617821415e-05, "loss": 2.3091, "step": 4476500 }, { "epoch": 12.96, "learning_rate": 4.352283253056687e-05, "loss": 2.2498, "step": 4477000 }, { "epoch": 12.96, "learning_rate": 4.35221088829196e-05, "loss": 2.286, "step": 4477500 }, { "epoch": 12.96, "learning_rate": 4.3521385235272324e-05, "loss": 2.3032, "step": 4478000 }, { "epoch": 12.96, "learning_rate": 4.352066158762505e-05, "loss": 2.2769, "step": 4478500 }, { "epoch": 12.96, "learning_rate": 4.351994083456836e-05, "loss": 2.2975, "step": 4479000 }, { "epoch": 12.97, "learning_rate": 4.3519217186921084e-05, "loss": 2.2931, "step": 4479500 }, { "epoch": 12.97, "learning_rate": 4.35184949865691e-05, "loss": 2.2915, "step": 4480000 }, { "epoch": 12.97, "learning_rate": 4.351777133892183e-05, "loss": 2.2989, "step": 4480500 }, { "epoch": 12.97, "learning_rate": 4.351704769127455e-05, "loss": 2.2736, "step": 4481000 }, { "epoch": 12.97, "learning_rate": 4.351632404362727e-05, "loss": 2.3193, "step": 4481500 }, { "epoch": 12.97, "learning_rate": 4.3515600395979996e-05, "loss": 2.2949, "step": 4482000 }, { "epoch": 12.98, "learning_rate": 4.351487674833272e-05, "loss": 2.2814, "step": 4482500 }, { "epoch": 12.98, "learning_rate": 4.351415310068544e-05, "loss": 2.294, "step": 4483000 }, { "epoch": 12.98, "learning_rate": 4.351342945303816e-05, "loss": 2.2778, "step": 4483500 }, { "epoch": 12.98, "learning_rate": 4.3512705805390884e-05, "loss": 2.2795, "step": 4484000 }, { "epoch": 12.98, "learning_rate": 4.351198215774361e-05, "loss": 2.2953, "step": 4484500 }, { "epoch": 12.98, "learning_rate": 4.3511258510096336e-05, "loss": 2.2685, "step": 4485000 }, { "epoch": 12.98, "learning_rate": 4.351053486244906e-05, "loss": 2.3144, "step": 4485500 }, { "epoch": 12.99, "learning_rate": 4.350981121480178e-05, "loss": 2.2776, "step": 4486000 }, { "epoch": 12.99, "learning_rate": 4.35090890144498e-05, "loss": 2.2719, "step": 4486500 }, { "epoch": 12.99, "learning_rate": 4.3508365366802525e-05, "loss": 2.2891, "step": 4487000 }, { "epoch": 12.99, "learning_rate": 4.350764171915525e-05, "loss": 2.2724, "step": 4487500 }, { "epoch": 12.99, "learning_rate": 4.350691807150797e-05, "loss": 2.2887, "step": 4488000 }, { "epoch": 12.99, "learning_rate": 4.3506195871155985e-05, "loss": 2.2841, "step": 4488500 }, { "epoch": 12.99, "learning_rate": 4.350547222350871e-05, "loss": 2.288, "step": 4489000 }, { "epoch": 13.0, "learning_rate": 4.350474857586143e-05, "loss": 2.3102, "step": 4489500 }, { "epoch": 13.0, "learning_rate": 4.350402492821415e-05, "loss": 2.2786, "step": 4490000 }, { "epoch": 13.0, "learning_rate": 4.3503302727862174e-05, "loss": 2.3065, "step": 4490500 }, { "epoch": 13.0, "learning_rate": 4.3502579080214896e-05, "loss": 2.2941, "step": 4491000 }, { "epoch": 13.0, "eval_accuracy": 0.6519667819102094, "eval_accuracy_mlm": 0.6147493240537715, "eval_accuracy_nsp": 0.8515396919241223, "eval_loss": 2.285916566848755, "eval_runtime": 330.4294, "eval_samples_per_second": 1320.663, "eval_steps_per_second": 55.028, "step": 4491136 }, { "epoch": 13.0, "learning_rate": 4.350185543256762e-05, "loss": 2.2703, "step": 4491500 }, { "epoch": 13.0, "learning_rate": 4.350113178492034e-05, "loss": 2.263, "step": 4492000 }, { "epoch": 13.0, "learning_rate": 4.350040813727307e-05, "loss": 2.2633, "step": 4492500 }, { "epoch": 13.01, "learning_rate": 4.3499685936921085e-05, "loss": 2.2604, "step": 4493000 }, { "epoch": 13.01, "learning_rate": 4.349896228927381e-05, "loss": 2.2509, "step": 4493500 }, { "epoch": 13.01, "learning_rate": 4.349823864162653e-05, "loss": 2.2627, "step": 4494000 }, { "epoch": 13.01, "learning_rate": 4.349751499397925e-05, "loss": 2.266, "step": 4494500 }, { "epoch": 13.01, "learning_rate": 4.3496792793627274e-05, "loss": 2.293, "step": 4495000 }, { "epoch": 13.01, "learning_rate": 4.3496069145979996e-05, "loss": 2.2931, "step": 4495500 }, { "epoch": 13.01, "learning_rate": 4.349534549833272e-05, "loss": 2.287, "step": 4496000 }, { "epoch": 13.02, "learning_rate": 4.349462185068544e-05, "loss": 2.2158, "step": 4496500 }, { "epoch": 13.02, "learning_rate": 4.349389820303816e-05, "loss": 2.2506, "step": 4497000 }, { "epoch": 13.02, "learning_rate": 4.3493174555390885e-05, "loss": 2.2813, "step": 4497500 }, { "epoch": 13.02, "learning_rate": 4.34924523550389e-05, "loss": 2.2819, "step": 4498000 }, { "epoch": 13.02, "learning_rate": 4.349172870739163e-05, "loss": 2.2681, "step": 4498500 }, { "epoch": 13.02, "learning_rate": 4.349100505974435e-05, "loss": 2.287, "step": 4499000 }, { "epoch": 13.02, "learning_rate": 4.3490281412097074e-05, "loss": 2.2663, "step": 4499500 }, { "epoch": 13.03, "learning_rate": 4.34895577644498e-05, "loss": 2.2696, "step": 4500000 }, { "epoch": 13.03, "learning_rate": 4.3488834116802526e-05, "loss": 2.2868, "step": 4500500 }, { "epoch": 13.03, "learning_rate": 4.348811046915525e-05, "loss": 2.2862, "step": 4501000 }, { "epoch": 13.03, "learning_rate": 4.348738682150797e-05, "loss": 2.2863, "step": 4501500 }, { "epoch": 13.03, "learning_rate": 4.3486664621155986e-05, "loss": 2.237, "step": 4502000 }, { "epoch": 13.03, "learning_rate": 4.3485942420804e-05, "loss": 2.2913, "step": 4502500 }, { "epoch": 13.03, "learning_rate": 4.348521877315673e-05, "loss": 2.2999, "step": 4503000 }, { "epoch": 13.04, "learning_rate": 4.348449512550945e-05, "loss": 2.2576, "step": 4503500 }, { "epoch": 13.04, "learning_rate": 4.3483771477862175e-05, "loss": 2.2668, "step": 4504000 }, { "epoch": 13.04, "learning_rate": 4.34830478302149e-05, "loss": 2.268, "step": 4504500 }, { "epoch": 13.04, "learning_rate": 4.348232418256762e-05, "loss": 2.2798, "step": 4505000 }, { "epoch": 13.04, "learning_rate": 4.348160053492034e-05, "loss": 2.2499, "step": 4505500 }, { "epoch": 13.04, "learning_rate": 4.3480876887273064e-05, "loss": 2.2528, "step": 4506000 }, { "epoch": 13.04, "learning_rate": 4.348015468692108e-05, "loss": 2.2651, "step": 4506500 }, { "epoch": 13.05, "learning_rate": 4.34794310392738e-05, "loss": 2.249, "step": 4507000 }, { "epoch": 13.05, "learning_rate": 4.347870739162653e-05, "loss": 2.2622, "step": 4507500 }, { "epoch": 13.05, "learning_rate": 4.347798374397925e-05, "loss": 2.2708, "step": 4508000 }, { "epoch": 13.05, "learning_rate": 4.347726009633198e-05, "loss": 2.2856, "step": 4508500 }, { "epoch": 13.05, "learning_rate": 4.3476536448684704e-05, "loss": 2.2897, "step": 4509000 }, { "epoch": 13.05, "learning_rate": 4.3475812801037426e-05, "loss": 2.2628, "step": 4509500 }, { "epoch": 13.05, "learning_rate": 4.347508915339015e-05, "loss": 2.2829, "step": 4510000 }, { "epoch": 13.06, "learning_rate": 4.347436550574287e-05, "loss": 2.2595, "step": 4510500 }, { "epoch": 13.06, "learning_rate": 4.347364185809559e-05, "loss": 2.2737, "step": 4511000 }, { "epoch": 13.06, "learning_rate": 4.3472918210448315e-05, "loss": 2.2561, "step": 4511500 }, { "epoch": 13.06, "learning_rate": 4.347219456280104e-05, "loss": 2.2928, "step": 4512000 }, { "epoch": 13.06, "learning_rate": 4.347147236244905e-05, "loss": 2.2584, "step": 4512500 }, { "epoch": 13.06, "learning_rate": 4.347074871480178e-05, "loss": 2.2889, "step": 4513000 }, { "epoch": 13.06, "learning_rate": 4.3470025067154504e-05, "loss": 2.2659, "step": 4513500 }, { "epoch": 13.07, "learning_rate": 4.3469301419507226e-05, "loss": 2.2651, "step": 4514000 }, { "epoch": 13.07, "learning_rate": 4.3468577771859955e-05, "loss": 2.2736, "step": 4514500 }, { "epoch": 13.07, "learning_rate": 4.346785557150797e-05, "loss": 2.2656, "step": 4515000 }, { "epoch": 13.07, "learning_rate": 4.346713192386069e-05, "loss": 2.2545, "step": 4515500 }, { "epoch": 13.07, "learning_rate": 4.3466408276213415e-05, "loss": 2.2689, "step": 4516000 }, { "epoch": 13.07, "learning_rate": 4.346568462856614e-05, "loss": 2.2784, "step": 4516500 }, { "epoch": 13.07, "learning_rate": 4.346496242821416e-05, "loss": 2.2917, "step": 4517000 }, { "epoch": 13.08, "learning_rate": 4.346423878056688e-05, "loss": 2.2672, "step": 4517500 }, { "epoch": 13.08, "learning_rate": 4.3463515132919604e-05, "loss": 2.2658, "step": 4518000 }, { "epoch": 13.08, "learning_rate": 4.3462791485272327e-05, "loss": 2.2881, "step": 4518500 }, { "epoch": 13.08, "learning_rate": 4.346206783762505e-05, "loss": 2.2553, "step": 4519000 }, { "epoch": 13.08, "learning_rate": 4.346134418997777e-05, "loss": 2.2551, "step": 4519500 }, { "epoch": 13.08, "learning_rate": 4.346062054233049e-05, "loss": 2.2726, "step": 4520000 }, { "epoch": 13.08, "learning_rate": 4.3459896894683215e-05, "loss": 2.2752, "step": 4520500 }, { "epoch": 13.09, "learning_rate": 4.345917469433123e-05, "loss": 2.2974, "step": 4521000 }, { "epoch": 13.09, "learning_rate": 4.345845104668396e-05, "loss": 2.2845, "step": 4521500 }, { "epoch": 13.09, "learning_rate": 4.345772739903668e-05, "loss": 2.2659, "step": 4522000 }, { "epoch": 13.09, "learning_rate": 4.345700375138941e-05, "loss": 2.2657, "step": 4522500 }, { "epoch": 13.09, "learning_rate": 4.3456280103742133e-05, "loss": 2.2786, "step": 4523000 }, { "epoch": 13.09, "learning_rate": 4.3455556456094856e-05, "loss": 2.2818, "step": 4523500 }, { "epoch": 13.1, "learning_rate": 4.345483280844758e-05, "loss": 2.2562, "step": 4524000 }, { "epoch": 13.1, "learning_rate": 4.34541091608003e-05, "loss": 2.2787, "step": 4524500 }, { "epoch": 13.1, "learning_rate": 4.345338551315302e-05, "loss": 2.2755, "step": 4525000 }, { "epoch": 13.1, "learning_rate": 4.3452661865505745e-05, "loss": 2.28, "step": 4525500 }, { "epoch": 13.1, "learning_rate": 4.345193966515376e-05, "loss": 2.2793, "step": 4526000 }, { "epoch": 13.1, "learning_rate": 4.345121601750648e-05, "loss": 2.3029, "step": 4526500 }, { "epoch": 13.1, "learning_rate": 4.345049236985921e-05, "loss": 2.2791, "step": 4527000 }, { "epoch": 13.11, "learning_rate": 4.3449768722211934e-05, "loss": 2.2947, "step": 4527500 }, { "epoch": 13.11, "learning_rate": 4.344904652185995e-05, "loss": 2.2985, "step": 4528000 }, { "epoch": 13.11, "learning_rate": 4.344832287421267e-05, "loss": 2.2667, "step": 4528500 }, { "epoch": 13.11, "learning_rate": 4.3447599226565394e-05, "loss": 2.2754, "step": 4529000 }, { "epoch": 13.11, "learning_rate": 4.344687557891812e-05, "loss": 2.2619, "step": 4529500 }, { "epoch": 13.11, "learning_rate": 4.3446151931270845e-05, "loss": 2.2596, "step": 4530000 }, { "epoch": 13.11, "learning_rate": 4.344542828362357e-05, "loss": 2.2909, "step": 4530500 }, { "epoch": 13.12, "learning_rate": 4.344470463597629e-05, "loss": 2.2688, "step": 4531000 }, { "epoch": 13.12, "learning_rate": 4.344398098832901e-05, "loss": 2.2723, "step": 4531500 }, { "epoch": 13.12, "learning_rate": 4.3443258787977034e-05, "loss": 2.2539, "step": 4532000 }, { "epoch": 13.12, "learning_rate": 4.3442535140329756e-05, "loss": 2.2503, "step": 4532500 }, { "epoch": 13.12, "learning_rate": 4.344181149268248e-05, "loss": 2.2997, "step": 4533000 }, { "epoch": 13.12, "learning_rate": 4.34410878450352e-05, "loss": 2.2763, "step": 4533500 }, { "epoch": 13.12, "learning_rate": 4.344036419738792e-05, "loss": 2.2678, "step": 4534000 }, { "epoch": 13.13, "learning_rate": 4.3439640549740645e-05, "loss": 2.2672, "step": 4534500 }, { "epoch": 13.13, "learning_rate": 4.343891834938866e-05, "loss": 2.2705, "step": 4535000 }, { "epoch": 13.13, "learning_rate": 4.343819470174138e-05, "loss": 2.2989, "step": 4535500 }, { "epoch": 13.13, "learning_rate": 4.343747105409411e-05, "loss": 2.2836, "step": 4536000 }, { "epoch": 13.13, "learning_rate": 4.3436747406446834e-05, "loss": 2.2731, "step": 4536500 }, { "epoch": 13.13, "learning_rate": 4.343602375879956e-05, "loss": 2.2403, "step": 4537000 }, { "epoch": 13.13, "learning_rate": 4.343530155844758e-05, "loss": 2.2537, "step": 4537500 }, { "epoch": 13.14, "learning_rate": 4.34345779108003e-05, "loss": 2.2727, "step": 4538000 }, { "epoch": 13.14, "learning_rate": 4.343385426315302e-05, "loss": 2.2876, "step": 4538500 }, { "epoch": 13.14, "learning_rate": 4.343313206280104e-05, "loss": 2.2937, "step": 4539000 }, { "epoch": 13.14, "learning_rate": 4.343240841515376e-05, "loss": 2.2638, "step": 4539500 }, { "epoch": 13.14, "learning_rate": 4.343168476750648e-05, "loss": 2.2566, "step": 4540000 }, { "epoch": 13.14, "learning_rate": 4.343096111985921e-05, "loss": 2.2976, "step": 4540500 }, { "epoch": 13.14, "learning_rate": 4.3430237472211934e-05, "loss": 2.2783, "step": 4541000 }, { "epoch": 13.15, "learning_rate": 4.342951382456466e-05, "loss": 2.2938, "step": 4541500 }, { "epoch": 13.15, "learning_rate": 4.342879017691738e-05, "loss": 2.2527, "step": 4542000 }, { "epoch": 13.15, "learning_rate": 4.34280665292701e-05, "loss": 2.2751, "step": 4542500 }, { "epoch": 13.15, "learning_rate": 4.3427342881622823e-05, "loss": 2.2912, "step": 4543000 }, { "epoch": 13.15, "learning_rate": 4.3426619233975546e-05, "loss": 2.2502, "step": 4543500 }, { "epoch": 13.15, "learning_rate": 4.342589558632827e-05, "loss": 2.2896, "step": 4544000 }, { "epoch": 13.15, "learning_rate": 4.3425171938681e-05, "loss": 2.2322, "step": 4544500 }, { "epoch": 13.16, "learning_rate": 4.342444829103372e-05, "loss": 2.2805, "step": 4545000 }, { "epoch": 13.16, "learning_rate": 4.342372464338644e-05, "loss": 2.2878, "step": 4545500 }, { "epoch": 13.16, "learning_rate": 4.342300389032976e-05, "loss": 2.2907, "step": 4546000 }, { "epoch": 13.16, "learning_rate": 4.342228024268248e-05, "loss": 2.25, "step": 4546500 }, { "epoch": 13.16, "learning_rate": 4.34215565950352e-05, "loss": 2.2689, "step": 4547000 }, { "epoch": 13.16, "learning_rate": 4.3420832947387924e-05, "loss": 2.2489, "step": 4547500 }, { "epoch": 13.16, "learning_rate": 4.3420109299740646e-05, "loss": 2.2577, "step": 4548000 }, { "epoch": 13.17, "learning_rate": 4.341938565209337e-05, "loss": 2.2831, "step": 4548500 }, { "epoch": 13.17, "learning_rate": 4.341866200444609e-05, "loss": 2.2789, "step": 4549000 }, { "epoch": 13.17, "learning_rate": 4.341793835679881e-05, "loss": 2.2791, "step": 4549500 }, { "epoch": 13.17, "learning_rate": 4.3417214709151535e-05, "loss": 2.2837, "step": 4550000 }, { "epoch": 13.17, "learning_rate": 4.341649250879956e-05, "loss": 2.2579, "step": 4550500 }, { "epoch": 13.17, "learning_rate": 4.341576886115228e-05, "loss": 2.2812, "step": 4551000 }, { "epoch": 13.17, "learning_rate": 4.3415045213505e-05, "loss": 2.2556, "step": 4551500 }, { "epoch": 13.18, "learning_rate": 4.341432156585773e-05, "loss": 2.2962, "step": 4552000 }, { "epoch": 13.18, "learning_rate": 4.3413599365505746e-05, "loss": 2.2688, "step": 4552500 }, { "epoch": 13.18, "learning_rate": 4.341287571785847e-05, "loss": 2.2834, "step": 4553000 }, { "epoch": 13.18, "learning_rate": 4.341215207021119e-05, "loss": 2.2661, "step": 4553500 }, { "epoch": 13.18, "learning_rate": 4.341142842256391e-05, "loss": 2.2849, "step": 4554000 }, { "epoch": 13.18, "learning_rate": 4.3410704774916635e-05, "loss": 2.2482, "step": 4554500 }, { "epoch": 13.18, "learning_rate": 4.340998257456466e-05, "loss": 2.2939, "step": 4555000 }, { "epoch": 13.19, "learning_rate": 4.340925892691738e-05, "loss": 2.2696, "step": 4555500 }, { "epoch": 13.19, "learning_rate": 4.34085352792701e-05, "loss": 2.2936, "step": 4556000 }, { "epoch": 13.19, "learning_rate": 4.3407811631622824e-05, "loss": 2.2909, "step": 4556500 }, { "epoch": 13.19, "learning_rate": 4.3407087983975546e-05, "loss": 2.2414, "step": 4557000 }, { "epoch": 13.19, "learning_rate": 4.340636433632827e-05, "loss": 2.2643, "step": 4557500 }, { "epoch": 13.19, "learning_rate": 4.340564213597629e-05, "loss": 2.2715, "step": 4558000 }, { "epoch": 13.19, "learning_rate": 4.340491848832901e-05, "loss": 2.2916, "step": 4558500 }, { "epoch": 13.2, "learning_rate": 4.3404194840681735e-05, "loss": 2.2797, "step": 4559000 }, { "epoch": 13.2, "learning_rate": 4.3403471193034464e-05, "loss": 2.2723, "step": 4559500 }, { "epoch": 13.2, "learning_rate": 4.340274754538719e-05, "loss": 2.2605, "step": 4560000 }, { "epoch": 13.2, "learning_rate": 4.34020253450352e-05, "loss": 2.2933, "step": 4560500 }, { "epoch": 13.2, "learning_rate": 4.3401301697387925e-05, "loss": 2.2655, "step": 4561000 }, { "epoch": 13.2, "learning_rate": 4.340057804974065e-05, "loss": 2.2577, "step": 4561500 }, { "epoch": 13.21, "learning_rate": 4.339985440209337e-05, "loss": 2.2941, "step": 4562000 }, { "epoch": 13.21, "learning_rate": 4.339913220174139e-05, "loss": 2.2348, "step": 4562500 }, { "epoch": 13.21, "learning_rate": 4.339841000138941e-05, "loss": 2.306, "step": 4563000 }, { "epoch": 13.21, "learning_rate": 4.339768635374213e-05, "loss": 2.2946, "step": 4563500 }, { "epoch": 13.21, "learning_rate": 4.339696270609485e-05, "loss": 2.2677, "step": 4564000 }, { "epoch": 13.21, "learning_rate": 4.3396239058447574e-05, "loss": 2.274, "step": 4564500 }, { "epoch": 13.21, "learning_rate": 4.3395515410800296e-05, "loss": 2.2817, "step": 4565000 }, { "epoch": 13.22, "learning_rate": 4.339479176315302e-05, "loss": 2.2921, "step": 4565500 }, { "epoch": 13.22, "learning_rate": 4.339406956280104e-05, "loss": 2.2645, "step": 4566000 }, { "epoch": 13.22, "learning_rate": 4.339334591515376e-05, "loss": 2.2427, "step": 4566500 }, { "epoch": 13.22, "learning_rate": 4.339262371480178e-05, "loss": 2.2691, "step": 4567000 }, { "epoch": 13.22, "learning_rate": 4.339190006715451e-05, "loss": 2.2624, "step": 4567500 }, { "epoch": 13.22, "learning_rate": 4.339117641950723e-05, "loss": 2.2782, "step": 4568000 }, { "epoch": 13.22, "learning_rate": 4.339045277185995e-05, "loss": 2.2871, "step": 4568500 }, { "epoch": 13.23, "learning_rate": 4.3389729124212674e-05, "loss": 2.2703, "step": 4569000 }, { "epoch": 13.23, "learning_rate": 4.3389005476565396e-05, "loss": 2.2788, "step": 4569500 }, { "epoch": 13.23, "learning_rate": 4.338828182891812e-05, "loss": 2.2841, "step": 4570000 }, { "epoch": 13.23, "learning_rate": 4.338755818127084e-05, "loss": 2.2704, "step": 4570500 }, { "epoch": 13.23, "learning_rate": 4.338683453362356e-05, "loss": 2.2578, "step": 4571000 }, { "epoch": 13.23, "learning_rate": 4.338611088597629e-05, "loss": 2.2785, "step": 4571500 }, { "epoch": 13.23, "learning_rate": 4.3385387238329014e-05, "loss": 2.2835, "step": 4572000 }, { "epoch": 13.24, "learning_rate": 4.3384663590681736e-05, "loss": 2.287, "step": 4572500 }, { "epoch": 13.24, "learning_rate": 4.338393994303446e-05, "loss": 2.2938, "step": 4573000 }, { "epoch": 13.24, "learning_rate": 4.3383217742682474e-05, "loss": 2.2571, "step": 4573500 }, { "epoch": 13.24, "learning_rate": 4.3382494095035196e-05, "loss": 2.2608, "step": 4574000 }, { "epoch": 13.24, "learning_rate": 4.3381770447387925e-05, "loss": 2.281, "step": 4574500 }, { "epoch": 13.24, "learning_rate": 4.338104679974065e-05, "loss": 2.2456, "step": 4575000 }, { "epoch": 13.24, "learning_rate": 4.338032315209337e-05, "loss": 2.269, "step": 4575500 }, { "epoch": 13.25, "learning_rate": 4.337960095174139e-05, "loss": 2.2778, "step": 4576000 }, { "epoch": 13.25, "learning_rate": 4.3378877304094114e-05, "loss": 2.2712, "step": 4576500 }, { "epoch": 13.25, "learning_rate": 4.3378153656446837e-05, "loss": 2.2903, "step": 4577000 }, { "epoch": 13.25, "learning_rate": 4.337743000879956e-05, "loss": 2.2551, "step": 4577500 }, { "epoch": 13.25, "learning_rate": 4.337670636115228e-05, "loss": 2.2682, "step": 4578000 }, { "epoch": 13.25, "learning_rate": 4.3375984160800297e-05, "loss": 2.2646, "step": 4578500 }, { "epoch": 13.25, "learning_rate": 4.337526051315302e-05, "loss": 2.2796, "step": 4579000 }, { "epoch": 13.26, "learning_rate": 4.337453686550574e-05, "loss": 2.254, "step": 4579500 }, { "epoch": 13.26, "learning_rate": 4.337381321785846e-05, "loss": 2.2834, "step": 4580000 }, { "epoch": 13.26, "learning_rate": 4.3373091017506486e-05, "loss": 2.274, "step": 4580500 }, { "epoch": 13.26, "learning_rate": 4.337236736985921e-05, "loss": 2.2657, "step": 4581000 }, { "epoch": 13.26, "learning_rate": 4.337164372221193e-05, "loss": 2.2861, "step": 4581500 }, { "epoch": 13.26, "learning_rate": 4.337092007456466e-05, "loss": 2.2807, "step": 4582000 }, { "epoch": 13.26, "learning_rate": 4.337019642691738e-05, "loss": 2.2688, "step": 4582500 }, { "epoch": 13.27, "learning_rate": 4.3369472779270104e-05, "loss": 2.2642, "step": 4583000 }, { "epoch": 13.27, "learning_rate": 4.3368749131622826e-05, "loss": 2.2595, "step": 4583500 }, { "epoch": 13.27, "learning_rate": 4.336802693127084e-05, "loss": 2.27, "step": 4584000 }, { "epoch": 13.27, "learning_rate": 4.336730328362357e-05, "loss": 2.2522, "step": 4584500 }, { "epoch": 13.27, "learning_rate": 4.336657963597629e-05, "loss": 2.2806, "step": 4585000 }, { "epoch": 13.27, "learning_rate": 4.3365855988329015e-05, "loss": 2.2826, "step": 4585500 }, { "epoch": 13.27, "learning_rate": 4.336513378797703e-05, "loss": 2.2898, "step": 4586000 }, { "epoch": 13.28, "learning_rate": 4.336441014032975e-05, "loss": 2.2753, "step": 4586500 }, { "epoch": 13.28, "learning_rate": 4.3363686492682475e-05, "loss": 2.2992, "step": 4587000 }, { "epoch": 13.28, "learning_rate": 4.33629628450352e-05, "loss": 2.2638, "step": 4587500 }, { "epoch": 13.28, "learning_rate": 4.336224064468322e-05, "loss": 2.2855, "step": 4588000 }, { "epoch": 13.28, "learning_rate": 4.336151699703594e-05, "loss": 2.2826, "step": 4588500 }, { "epoch": 13.28, "learning_rate": 4.3360793349388664e-05, "loss": 2.2696, "step": 4589000 }, { "epoch": 13.28, "learning_rate": 4.336006970174139e-05, "loss": 2.2835, "step": 4589500 }, { "epoch": 13.29, "learning_rate": 4.3359346054094115e-05, "loss": 2.2742, "step": 4590000 }, { "epoch": 13.29, "learning_rate": 4.335862240644684e-05, "loss": 2.2755, "step": 4590500 }, { "epoch": 13.29, "learning_rate": 4.335789875879956e-05, "loss": 2.2838, "step": 4591000 }, { "epoch": 13.29, "learning_rate": 4.3357176558447575e-05, "loss": 2.2584, "step": 4591500 }, { "epoch": 13.29, "learning_rate": 4.33564529108003e-05, "loss": 2.2966, "step": 4592000 }, { "epoch": 13.29, "learning_rate": 4.335572926315302e-05, "loss": 2.2626, "step": 4592500 }, { "epoch": 13.29, "learning_rate": 4.335500561550574e-05, "loss": 2.2788, "step": 4593000 }, { "epoch": 13.3, "learning_rate": 4.335428196785847e-05, "loss": 2.284, "step": 4593500 }, { "epoch": 13.3, "learning_rate": 4.335355832021119e-05, "loss": 2.2681, "step": 4594000 }, { "epoch": 13.3, "learning_rate": 4.3352834672563915e-05, "loss": 2.259, "step": 4594500 }, { "epoch": 13.3, "learning_rate": 4.335211102491664e-05, "loss": 2.2804, "step": 4595000 }, { "epoch": 13.3, "learning_rate": 4.335138737726936e-05, "loss": 2.2778, "step": 4595500 }, { "epoch": 13.3, "learning_rate": 4.3350665176917375e-05, "loss": 2.2566, "step": 4596000 }, { "epoch": 13.3, "learning_rate": 4.334994297656539e-05, "loss": 2.3003, "step": 4596500 }, { "epoch": 13.31, "learning_rate": 4.334921932891812e-05, "loss": 2.2807, "step": 4597000 }, { "epoch": 13.31, "learning_rate": 4.334849568127084e-05, "loss": 2.2911, "step": 4597500 }, { "epoch": 13.31, "learning_rate": 4.334777203362357e-05, "loss": 2.2634, "step": 4598000 }, { "epoch": 13.31, "learning_rate": 4.3347048385976293e-05, "loss": 2.2701, "step": 4598500 }, { "epoch": 13.31, "learning_rate": 4.3346324738329016e-05, "loss": 2.295, "step": 4599000 }, { "epoch": 13.31, "learning_rate": 4.334560109068174e-05, "loss": 2.3116, "step": 4599500 }, { "epoch": 13.32, "learning_rate": 4.334487744303446e-05, "loss": 2.2575, "step": 4600000 }, { "epoch": 13.32, "learning_rate": 4.334415379538718e-05, "loss": 2.2502, "step": 4600500 }, { "epoch": 13.32, "learning_rate": 4.3343430147739905e-05, "loss": 2.2878, "step": 4601000 }, { "epoch": 13.32, "learning_rate": 4.334270939468322e-05, "loss": 2.2917, "step": 4601500 }, { "epoch": 13.32, "learning_rate": 4.334198574703594e-05, "loss": 2.2726, "step": 4602000 }, { "epoch": 13.32, "learning_rate": 4.3341262099388665e-05, "loss": 2.2933, "step": 4602500 }, { "epoch": 13.32, "learning_rate": 4.334053845174139e-05, "loss": 2.2767, "step": 4603000 }, { "epoch": 13.33, "learning_rate": 4.333981480409411e-05, "loss": 2.2825, "step": 4603500 }, { "epoch": 13.33, "learning_rate": 4.3339092603742125e-05, "loss": 2.269, "step": 4604000 }, { "epoch": 13.33, "learning_rate": 4.3338368956094854e-05, "loss": 2.2835, "step": 4604500 }, { "epoch": 13.33, "learning_rate": 4.3337645308447576e-05, "loss": 2.2575, "step": 4605000 }, { "epoch": 13.33, "learning_rate": 4.33369216608003e-05, "loss": 2.277, "step": 4605500 }, { "epoch": 13.33, "learning_rate": 4.333619801315302e-05, "loss": 2.271, "step": 4606000 }, { "epoch": 13.33, "learning_rate": 4.333547436550574e-05, "loss": 2.273, "step": 4606500 }, { "epoch": 13.34, "learning_rate": 4.333475071785847e-05, "loss": 2.2633, "step": 4607000 }, { "epoch": 13.34, "learning_rate": 4.3334027070211194e-05, "loss": 2.2773, "step": 4607500 }, { "epoch": 13.34, "learning_rate": 4.3333303422563916e-05, "loss": 2.2775, "step": 4608000 }, { "epoch": 13.34, "learning_rate": 4.333257977491664e-05, "loss": 2.2681, "step": 4608500 }, { "epoch": 13.34, "learning_rate": 4.333185612726936e-05, "loss": 2.2637, "step": 4609000 }, { "epoch": 13.34, "learning_rate": 4.3331133926917376e-05, "loss": 2.2711, "step": 4609500 }, { "epoch": 13.34, "learning_rate": 4.33304102792701e-05, "loss": 2.2847, "step": 4610000 }, { "epoch": 13.35, "learning_rate": 4.332968663162282e-05, "loss": 2.2866, "step": 4610500 }, { "epoch": 13.35, "learning_rate": 4.332896298397554e-05, "loss": 2.2776, "step": 4611000 }, { "epoch": 13.35, "learning_rate": 4.3328240783623565e-05, "loss": 2.2634, "step": 4611500 }, { "epoch": 13.35, "learning_rate": 4.3327520030566874e-05, "loss": 2.2674, "step": 4612000 }, { "epoch": 13.35, "learning_rate": 4.33267963829196e-05, "loss": 2.3074, "step": 4612500 }, { "epoch": 13.35, "learning_rate": 4.3326072735272325e-05, "loss": 2.2791, "step": 4613000 }, { "epoch": 13.35, "learning_rate": 4.332534908762505e-05, "loss": 2.2686, "step": 4613500 }, { "epoch": 13.36, "learning_rate": 4.332462543997777e-05, "loss": 2.2572, "step": 4614000 }, { "epoch": 13.36, "learning_rate": 4.33239017923305e-05, "loss": 2.2699, "step": 4614500 }, { "epoch": 13.36, "learning_rate": 4.332317814468322e-05, "loss": 2.2701, "step": 4615000 }, { "epoch": 13.36, "learning_rate": 4.332245449703594e-05, "loss": 2.2639, "step": 4615500 }, { "epoch": 13.36, "learning_rate": 4.3321730849388665e-05, "loss": 2.2664, "step": 4616000 }, { "epoch": 13.36, "learning_rate": 4.332100720174139e-05, "loss": 2.2793, "step": 4616500 }, { "epoch": 13.36, "learning_rate": 4.33202850013894e-05, "loss": 2.3015, "step": 4617000 }, { "epoch": 13.37, "learning_rate": 4.3319561353742126e-05, "loss": 2.2776, "step": 4617500 }, { "epoch": 13.37, "learning_rate": 4.331883770609485e-05, "loss": 2.2877, "step": 4618000 }, { "epoch": 13.37, "learning_rate": 4.331811405844757e-05, "loss": 2.2533, "step": 4618500 }, { "epoch": 13.37, "learning_rate": 4.33173904108003e-05, "loss": 2.2993, "step": 4619000 }, { "epoch": 13.37, "learning_rate": 4.331666676315302e-05, "loss": 2.2775, "step": 4619500 }, { "epoch": 13.37, "learning_rate": 4.331594311550575e-05, "loss": 2.2823, "step": 4620000 }, { "epoch": 13.37, "learning_rate": 4.331521946785847e-05, "loss": 2.2511, "step": 4620500 }, { "epoch": 13.38, "learning_rate": 4.331449726750649e-05, "loss": 2.2891, "step": 4621000 }, { "epoch": 13.38, "learning_rate": 4.331377361985921e-05, "loss": 2.2892, "step": 4621500 }, { "epoch": 13.38, "learning_rate": 4.331304997221193e-05, "loss": 2.2824, "step": 4622000 }, { "epoch": 13.38, "learning_rate": 4.3312326324564655e-05, "loss": 2.2809, "step": 4622500 }, { "epoch": 13.38, "learning_rate": 4.331160267691738e-05, "loss": 2.2678, "step": 4623000 }, { "epoch": 13.38, "learning_rate": 4.33108790292701e-05, "loss": 2.284, "step": 4623500 }, { "epoch": 13.38, "learning_rate": 4.331015538162282e-05, "loss": 2.2843, "step": 4624000 }, { "epoch": 13.39, "learning_rate": 4.330943173397555e-05, "loss": 2.2681, "step": 4624500 }, { "epoch": 13.39, "learning_rate": 4.330870808632827e-05, "loss": 2.2842, "step": 4625000 }, { "epoch": 13.39, "learning_rate": 4.330798588597629e-05, "loss": 2.2702, "step": 4625500 }, { "epoch": 13.39, "learning_rate": 4.330726223832901e-05, "loss": 2.2745, "step": 4626000 }, { "epoch": 13.39, "learning_rate": 4.330653859068173e-05, "loss": 2.2643, "step": 4626500 }, { "epoch": 13.39, "learning_rate": 4.330581494303446e-05, "loss": 2.2602, "step": 4627000 }, { "epoch": 13.39, "learning_rate": 4.3305091295387184e-05, "loss": 2.244, "step": 4627500 }, { "epoch": 13.4, "learning_rate": 4.33043705423305e-05, "loss": 2.2813, "step": 4628000 }, { "epoch": 13.4, "learning_rate": 4.330364689468322e-05, "loss": 2.2818, "step": 4628500 }, { "epoch": 13.4, "learning_rate": 4.3302923247035944e-05, "loss": 2.2523, "step": 4629000 }, { "epoch": 13.4, "learning_rate": 4.3302199599388666e-05, "loss": 2.2802, "step": 4629500 }, { "epoch": 13.4, "learning_rate": 4.330147595174139e-05, "loss": 2.2787, "step": 4630000 }, { "epoch": 13.4, "learning_rate": 4.330075230409411e-05, "loss": 2.2859, "step": 4630500 }, { "epoch": 13.4, "learning_rate": 4.330002865644683e-05, "loss": 2.2864, "step": 4631000 }, { "epoch": 13.41, "learning_rate": 4.3299305008799555e-05, "loss": 2.2689, "step": 4631500 }, { "epoch": 13.41, "learning_rate": 4.329858280844758e-05, "loss": 2.2799, "step": 4632000 }, { "epoch": 13.41, "learning_rate": 4.32978591608003e-05, "loss": 2.2995, "step": 4632500 }, { "epoch": 13.41, "learning_rate": 4.329713551315302e-05, "loss": 2.2673, "step": 4633000 }, { "epoch": 13.41, "learning_rate": 4.3296411865505744e-05, "loss": 2.2572, "step": 4633500 }, { "epoch": 13.41, "learning_rate": 4.329568966515376e-05, "loss": 2.2615, "step": 4634000 }, { "epoch": 13.41, "learning_rate": 4.329496601750649e-05, "loss": 2.2884, "step": 4634500 }, { "epoch": 13.42, "learning_rate": 4.329424236985921e-05, "loss": 2.291, "step": 4635000 }, { "epoch": 13.42, "learning_rate": 4.329351872221193e-05, "loss": 2.2786, "step": 4635500 }, { "epoch": 13.42, "learning_rate": 4.3292795074564656e-05, "loss": 2.2754, "step": 4636000 }, { "epoch": 13.42, "learning_rate": 4.329207287421268e-05, "loss": 2.2802, "step": 4636500 }, { "epoch": 13.42, "learning_rate": 4.32913492265654e-05, "loss": 2.2752, "step": 4637000 }, { "epoch": 13.42, "learning_rate": 4.329062557891812e-05, "loss": 2.2747, "step": 4637500 }, { "epoch": 13.43, "learning_rate": 4.328990337856614e-05, "loss": 2.2767, "step": 4638000 }, { "epoch": 13.43, "learning_rate": 4.328917973091886e-05, "loss": 2.2606, "step": 4638500 }, { "epoch": 13.43, "learning_rate": 4.3288457530566876e-05, "loss": 2.2664, "step": 4639000 }, { "epoch": 13.43, "learning_rate": 4.32877338829196e-05, "loss": 2.2764, "step": 4639500 }, { "epoch": 13.43, "learning_rate": 4.328701023527233e-05, "loss": 2.3038, "step": 4640000 }, { "epoch": 13.43, "learning_rate": 4.328628658762505e-05, "loss": 2.2665, "step": 4640500 }, { "epoch": 13.43, "learning_rate": 4.328556293997777e-05, "loss": 2.2888, "step": 4641000 }, { "epoch": 13.44, "learning_rate": 4.3284839292330494e-05, "loss": 2.2792, "step": 4641500 }, { "epoch": 13.44, "learning_rate": 4.328411564468322e-05, "loss": 2.292, "step": 4642000 }, { "epoch": 13.44, "learning_rate": 4.3283391997035945e-05, "loss": 2.2936, "step": 4642500 }, { "epoch": 13.44, "learning_rate": 4.328266834938867e-05, "loss": 2.2587, "step": 4643000 }, { "epoch": 13.44, "learning_rate": 4.328194470174139e-05, "loss": 2.2742, "step": 4643500 }, { "epoch": 13.44, "learning_rate": 4.3281222501389405e-05, "loss": 2.2928, "step": 4644000 }, { "epoch": 13.44, "learning_rate": 4.328050030103743e-05, "loss": 2.2801, "step": 4644500 }, { "epoch": 13.45, "learning_rate": 4.327977665339015e-05, "loss": 2.2708, "step": 4645000 }, { "epoch": 13.45, "learning_rate": 4.327905300574287e-05, "loss": 2.262, "step": 4645500 }, { "epoch": 13.45, "learning_rate": 4.3278329358095594e-05, "loss": 2.2808, "step": 4646000 }, { "epoch": 13.45, "learning_rate": 4.3277605710448316e-05, "loss": 2.2567, "step": 4646500 }, { "epoch": 13.45, "learning_rate": 4.327688206280104e-05, "loss": 2.2677, "step": 4647000 }, { "epoch": 13.45, "learning_rate": 4.327615841515376e-05, "loss": 2.2793, "step": 4647500 }, { "epoch": 13.45, "learning_rate": 4.327543476750648e-05, "loss": 2.2734, "step": 4648000 }, { "epoch": 13.46, "learning_rate": 4.3274711119859205e-05, "loss": 2.2574, "step": 4648500 }, { "epoch": 13.46, "learning_rate": 4.327398747221193e-05, "loss": 2.2687, "step": 4649000 }, { "epoch": 13.46, "learning_rate": 4.3273263824564656e-05, "loss": 2.2937, "step": 4649500 }, { "epoch": 13.46, "learning_rate": 4.327254017691738e-05, "loss": 2.2753, "step": 4650000 }, { "epoch": 13.46, "learning_rate": 4.32718165292701e-05, "loss": 2.2887, "step": 4650500 }, { "epoch": 13.46, "learning_rate": 4.3271095776213416e-05, "loss": 2.2666, "step": 4651000 }, { "epoch": 13.46, "learning_rate": 4.327037212856614e-05, "loss": 2.2708, "step": 4651500 }, { "epoch": 13.47, "learning_rate": 4.326964848091886e-05, "loss": 2.2556, "step": 4652000 }, { "epoch": 13.47, "learning_rate": 4.326892483327158e-05, "loss": 2.2683, "step": 4652500 }, { "epoch": 13.47, "learning_rate": 4.3268202632919605e-05, "loss": 2.2678, "step": 4653000 }, { "epoch": 13.47, "learning_rate": 4.326747898527233e-05, "loss": 2.2883, "step": 4653500 }, { "epoch": 13.47, "learning_rate": 4.326675533762505e-05, "loss": 2.2853, "step": 4654000 }, { "epoch": 13.47, "learning_rate": 4.326603168997777e-05, "loss": 2.2697, "step": 4654500 }, { "epoch": 13.47, "learning_rate": 4.3265308042330494e-05, "loss": 2.2753, "step": 4655000 }, { "epoch": 13.48, "learning_rate": 4.326458584197851e-05, "loss": 2.306, "step": 4655500 }, { "epoch": 13.48, "learning_rate": 4.326386219433123e-05, "loss": 2.2812, "step": 4656000 }, { "epoch": 13.48, "learning_rate": 4.3263138546683954e-05, "loss": 2.2582, "step": 4656500 }, { "epoch": 13.48, "learning_rate": 4.3262414899036683e-05, "loss": 2.2708, "step": 4657000 }, { "epoch": 13.48, "learning_rate": 4.3261691251389406e-05, "loss": 2.2593, "step": 4657500 }, { "epoch": 13.48, "learning_rate": 4.326096760374213e-05, "loss": 2.2621, "step": 4658000 }, { "epoch": 13.48, "learning_rate": 4.326024395609486e-05, "loss": 2.2803, "step": 4658500 }, { "epoch": 13.49, "learning_rate": 4.325952030844758e-05, "loss": 2.2712, "step": 4659000 }, { "epoch": 13.49, "learning_rate": 4.32587966608003e-05, "loss": 2.254, "step": 4659500 }, { "epoch": 13.49, "learning_rate": 4.325807446044832e-05, "loss": 2.2886, "step": 4660000 }, { "epoch": 13.49, "learning_rate": 4.325735081280104e-05, "loss": 2.2969, "step": 4660500 }, { "epoch": 13.49, "learning_rate": 4.3256628612449055e-05, "loss": 2.2671, "step": 4661000 }, { "epoch": 13.49, "learning_rate": 4.325590785939237e-05, "loss": 2.2843, "step": 4661500 }, { "epoch": 13.49, "learning_rate": 4.325518421174509e-05, "loss": 2.2603, "step": 4662000 }, { "epoch": 13.5, "learning_rate": 4.3254460564097815e-05, "loss": 2.2749, "step": 4662500 }, { "epoch": 13.5, "learning_rate": 4.325373691645054e-05, "loss": 2.2913, "step": 4663000 }, { "epoch": 13.5, "learning_rate": 4.325301326880326e-05, "loss": 2.305, "step": 4663500 }, { "epoch": 13.5, "learning_rate": 4.325228962115598e-05, "loss": 2.2885, "step": 4664000 }, { "epoch": 13.5, "learning_rate": 4.3251565973508704e-05, "loss": 2.2705, "step": 4664500 }, { "epoch": 13.5, "learning_rate": 4.325084232586143e-05, "loss": 2.2807, "step": 4665000 }, { "epoch": 13.5, "learning_rate": 4.3250118678214155e-05, "loss": 2.2739, "step": 4665500 }, { "epoch": 13.51, "learning_rate": 4.324939503056688e-05, "loss": 2.2864, "step": 4666000 }, { "epoch": 13.51, "learning_rate": 4.3248671382919606e-05, "loss": 2.2802, "step": 4666500 }, { "epoch": 13.51, "learning_rate": 4.324794773527233e-05, "loss": 2.3008, "step": 4667000 }, { "epoch": 13.51, "learning_rate": 4.324722408762505e-05, "loss": 2.2836, "step": 4667500 }, { "epoch": 13.51, "learning_rate": 4.324650043997777e-05, "loss": 2.2615, "step": 4668000 }, { "epoch": 13.51, "learning_rate": 4.3245776792330495e-05, "loss": 2.2556, "step": 4668500 }, { "epoch": 13.51, "learning_rate": 4.324505314468322e-05, "loss": 2.2717, "step": 4669000 }, { "epoch": 13.52, "learning_rate": 4.324432949703594e-05, "loss": 2.2972, "step": 4669500 }, { "epoch": 13.52, "learning_rate": 4.324360584938866e-05, "loss": 2.2556, "step": 4670000 }, { "epoch": 13.52, "learning_rate": 4.324288364903668e-05, "loss": 2.2743, "step": 4670500 }, { "epoch": 13.52, "learning_rate": 4.3242160001389406e-05, "loss": 2.2751, "step": 4671000 }, { "epoch": 13.52, "learning_rate": 4.324143635374213e-05, "loss": 2.2558, "step": 4671500 }, { "epoch": 13.52, "learning_rate": 4.324071270609486e-05, "loss": 2.3004, "step": 4672000 }, { "epoch": 13.52, "learning_rate": 4.323999050574287e-05, "loss": 2.2806, "step": 4672500 }, { "epoch": 13.53, "learning_rate": 4.3239266858095595e-05, "loss": 2.2659, "step": 4673000 }, { "epoch": 13.53, "learning_rate": 4.323854321044832e-05, "loss": 2.2913, "step": 4673500 }, { "epoch": 13.53, "learning_rate": 4.323781956280104e-05, "loss": 2.2594, "step": 4674000 }, { "epoch": 13.53, "learning_rate": 4.3237097362449056e-05, "loss": 2.2856, "step": 4674500 }, { "epoch": 13.53, "learning_rate": 4.323637371480178e-05, "loss": 2.296, "step": 4675000 }, { "epoch": 13.53, "learning_rate": 4.323565006715451e-05, "loss": 2.2752, "step": 4675500 }, { "epoch": 13.54, "learning_rate": 4.323492641950723e-05, "loss": 2.2737, "step": 4676000 }, { "epoch": 13.54, "learning_rate": 4.323420277185995e-05, "loss": 2.2684, "step": 4676500 }, { "epoch": 13.54, "learning_rate": 4.323348057150797e-05, "loss": 2.2952, "step": 4677000 }, { "epoch": 13.54, "learning_rate": 4.323275692386069e-05, "loss": 2.2957, "step": 4677500 }, { "epoch": 13.54, "learning_rate": 4.323203327621341e-05, "loss": 2.3005, "step": 4678000 }, { "epoch": 13.54, "learning_rate": 4.3231309628566133e-05, "loss": 2.2851, "step": 4678500 }, { "epoch": 13.54, "learning_rate": 4.3230585980918856e-05, "loss": 2.2959, "step": 4679000 }, { "epoch": 13.55, "learning_rate": 4.322986378056688e-05, "loss": 2.2649, "step": 4679500 }, { "epoch": 13.55, "learning_rate": 4.322914013291961e-05, "loss": 2.2679, "step": 4680000 }, { "epoch": 13.55, "learning_rate": 4.322841793256762e-05, "loss": 2.2667, "step": 4680500 }, { "epoch": 13.55, "learning_rate": 4.3227694284920345e-05, "loss": 2.2653, "step": 4681000 }, { "epoch": 13.55, "learning_rate": 4.322697063727307e-05, "loss": 2.2831, "step": 4681500 }, { "epoch": 13.55, "learning_rate": 4.322624698962579e-05, "loss": 2.2727, "step": 4682000 }, { "epoch": 13.55, "learning_rate": 4.322552334197851e-05, "loss": 2.2813, "step": 4682500 }, { "epoch": 13.56, "learning_rate": 4.3224799694331234e-05, "loss": 2.2732, "step": 4683000 }, { "epoch": 13.56, "learning_rate": 4.3224076046683956e-05, "loss": 2.2745, "step": 4683500 }, { "epoch": 13.56, "learning_rate": 4.3223352399036685e-05, "loss": 2.2828, "step": 4684000 }, { "epoch": 13.56, "learning_rate": 4.322262875138941e-05, "loss": 2.2879, "step": 4684500 }, { "epoch": 13.56, "learning_rate": 4.322190510374213e-05, "loss": 2.2777, "step": 4685000 }, { "epoch": 13.56, "learning_rate": 4.322118145609485e-05, "loss": 2.2788, "step": 4685500 }, { "epoch": 13.56, "learning_rate": 4.3220457808447574e-05, "loss": 2.2542, "step": 4686000 }, { "epoch": 13.57, "learning_rate": 4.321973560809559e-05, "loss": 2.2742, "step": 4686500 }, { "epoch": 13.57, "learning_rate": 4.3219013407743605e-05, "loss": 2.2825, "step": 4687000 }, { "epoch": 13.57, "learning_rate": 4.3218289760096334e-05, "loss": 2.2854, "step": 4687500 }, { "epoch": 13.57, "learning_rate": 4.3217566112449056e-05, "loss": 2.2592, "step": 4688000 }, { "epoch": 13.57, "learning_rate": 4.321684535939237e-05, "loss": 2.2693, "step": 4688500 }, { "epoch": 13.57, "learning_rate": 4.3216121711745094e-05, "loss": 2.2548, "step": 4689000 }, { "epoch": 13.57, "learning_rate": 4.3215398064097816e-05, "loss": 2.2984, "step": 4689500 }, { "epoch": 13.58, "learning_rate": 4.321467441645054e-05, "loss": 2.2737, "step": 4690000 }, { "epoch": 13.58, "learning_rate": 4.321395076880326e-05, "loss": 2.2777, "step": 4690500 }, { "epoch": 13.58, "learning_rate": 4.321322712115598e-05, "loss": 2.2836, "step": 4691000 }, { "epoch": 13.58, "learning_rate": 4.3212503473508705e-05, "loss": 2.267, "step": 4691500 }, { "epoch": 13.58, "learning_rate": 4.3211779825861434e-05, "loss": 2.2669, "step": 4692000 }, { "epoch": 13.58, "learning_rate": 4.321105617821416e-05, "loss": 2.2677, "step": 4692500 }, { "epoch": 13.58, "learning_rate": 4.321033253056688e-05, "loss": 2.2796, "step": 4693000 }, { "epoch": 13.59, "learning_rate": 4.32096088829196e-05, "loss": 2.2598, "step": 4693500 }, { "epoch": 13.59, "learning_rate": 4.320888523527232e-05, "loss": 2.2677, "step": 4694000 }, { "epoch": 13.59, "learning_rate": 4.3208161587625046e-05, "loss": 2.2859, "step": 4694500 }, { "epoch": 13.59, "learning_rate": 4.320743938727307e-05, "loss": 2.3002, "step": 4695000 }, { "epoch": 13.59, "learning_rate": 4.320671573962579e-05, "loss": 2.2878, "step": 4695500 }, { "epoch": 13.59, "learning_rate": 4.320599209197851e-05, "loss": 2.2695, "step": 4696000 }, { "epoch": 13.59, "learning_rate": 4.3205268444331235e-05, "loss": 2.2798, "step": 4696500 }, { "epoch": 13.6, "learning_rate": 4.320454479668396e-05, "loss": 2.2777, "step": 4697000 }, { "epoch": 13.6, "learning_rate": 4.320382259633198e-05, "loss": 2.2578, "step": 4697500 }, { "epoch": 13.6, "learning_rate": 4.32030989486847e-05, "loss": 2.2841, "step": 4698000 }, { "epoch": 13.6, "learning_rate": 4.3202375301037424e-05, "loss": 2.2715, "step": 4698500 }, { "epoch": 13.6, "learning_rate": 4.320165454798073e-05, "loss": 2.2848, "step": 4699000 }, { "epoch": 13.6, "learning_rate": 4.320093090033346e-05, "loss": 2.2769, "step": 4699500 }, { "epoch": 13.6, "learning_rate": 4.3200207252686184e-05, "loss": 2.3036, "step": 4700000 }, { "epoch": 13.61, "learning_rate": 4.3199483605038906e-05, "loss": 2.267, "step": 4700500 }, { "epoch": 13.61, "learning_rate": 4.319875995739163e-05, "loss": 2.2784, "step": 4701000 }, { "epoch": 13.61, "learning_rate": 4.319803630974435e-05, "loss": 2.2651, "step": 4701500 }, { "epoch": 13.61, "learning_rate": 4.319731266209707e-05, "loss": 2.2828, "step": 4702000 }, { "epoch": 13.61, "learning_rate": 4.31965890144498e-05, "loss": 2.2905, "step": 4702500 }, { "epoch": 13.61, "learning_rate": 4.3195865366802524e-05, "loss": 2.2735, "step": 4703000 }, { "epoch": 13.61, "learning_rate": 4.3195141719155246e-05, "loss": 2.2728, "step": 4703500 }, { "epoch": 13.62, "learning_rate": 4.319441807150797e-05, "loss": 2.312, "step": 4704000 }, { "epoch": 13.62, "learning_rate": 4.319369442386069e-05, "loss": 2.2738, "step": 4704500 }, { "epoch": 13.62, "learning_rate": 4.319297077621341e-05, "loss": 2.2748, "step": 4705000 }, { "epoch": 13.62, "learning_rate": 4.3192247128566135e-05, "loss": 2.2808, "step": 4705500 }, { "epoch": 13.62, "learning_rate": 4.319152348091886e-05, "loss": 2.2737, "step": 4706000 }, { "epoch": 13.62, "learning_rate": 4.3190799833271586e-05, "loss": 2.2638, "step": 4706500 }, { "epoch": 13.62, "learning_rate": 4.319007618562431e-05, "loss": 2.2562, "step": 4707000 }, { "epoch": 13.63, "learning_rate": 4.318935253797703e-05, "loss": 2.2784, "step": 4707500 }, { "epoch": 13.63, "learning_rate": 4.3188630337625046e-05, "loss": 2.251, "step": 4708000 }, { "epoch": 13.63, "learning_rate": 4.318790668997777e-05, "loss": 2.2604, "step": 4708500 }, { "epoch": 13.63, "learning_rate": 4.3187184489625784e-05, "loss": 2.27, "step": 4709000 }, { "epoch": 13.63, "learning_rate": 4.318646084197851e-05, "loss": 2.2695, "step": 4709500 }, { "epoch": 13.63, "learning_rate": 4.3185737194331235e-05, "loss": 2.2973, "step": 4710000 }, { "epoch": 13.63, "learning_rate": 4.3185013546683964e-05, "loss": 2.2703, "step": 4710500 }, { "epoch": 13.64, "learning_rate": 4.318428989903669e-05, "loss": 2.2578, "step": 4711000 }, { "epoch": 13.64, "learning_rate": 4.318356625138941e-05, "loss": 2.2741, "step": 4711500 }, { "epoch": 13.64, "learning_rate": 4.3182844051037424e-05, "loss": 2.2635, "step": 4712000 }, { "epoch": 13.64, "learning_rate": 4.318212040339015e-05, "loss": 2.2814, "step": 4712500 }, { "epoch": 13.64, "learning_rate": 4.318139820303816e-05, "loss": 2.2967, "step": 4713000 }, { "epoch": 13.64, "learning_rate": 4.3180674555390884e-05, "loss": 2.2729, "step": 4713500 }, { "epoch": 13.65, "learning_rate": 4.3179950907743613e-05, "loss": 2.271, "step": 4714000 }, { "epoch": 13.65, "learning_rate": 4.3179227260096336e-05, "loss": 2.264, "step": 4714500 }, { "epoch": 13.65, "learning_rate": 4.317850361244906e-05, "loss": 2.2874, "step": 4715000 }, { "epoch": 13.65, "learning_rate": 4.317777996480178e-05, "loss": 2.2577, "step": 4715500 }, { "epoch": 13.65, "learning_rate": 4.31770563171545e-05, "loss": 2.2779, "step": 4716000 }, { "epoch": 13.65, "learning_rate": 4.3176332669507225e-05, "loss": 2.258, "step": 4716500 }, { "epoch": 13.65, "learning_rate": 4.3175609021859954e-05, "loss": 2.2954, "step": 4717000 }, { "epoch": 13.66, "learning_rate": 4.3174885374212676e-05, "loss": 2.266, "step": 4717500 }, { "epoch": 13.66, "learning_rate": 4.31741617265654e-05, "loss": 2.2925, "step": 4718000 }, { "epoch": 13.66, "learning_rate": 4.317343807891812e-05, "loss": 2.2776, "step": 4718500 }, { "epoch": 13.66, "learning_rate": 4.3172715878566136e-05, "loss": 2.2517, "step": 4719000 }, { "epoch": 13.66, "learning_rate": 4.3171992230918865e-05, "loss": 2.2825, "step": 4719500 }, { "epoch": 13.66, "learning_rate": 4.317126858327159e-05, "loss": 2.2834, "step": 4720000 }, { "epoch": 13.66, "learning_rate": 4.31705463829196e-05, "loss": 2.2792, "step": 4720500 }, { "epoch": 13.67, "learning_rate": 4.3169822735272325e-05, "loss": 2.254, "step": 4721000 }, { "epoch": 13.67, "learning_rate": 4.316909908762505e-05, "loss": 2.2799, "step": 4721500 }, { "epoch": 13.67, "learning_rate": 4.316837543997777e-05, "loss": 2.2808, "step": 4722000 }, { "epoch": 13.67, "learning_rate": 4.316765179233049e-05, "loss": 2.2533, "step": 4722500 }, { "epoch": 13.67, "learning_rate": 4.3166928144683214e-05, "loss": 2.2775, "step": 4723000 }, { "epoch": 13.67, "learning_rate": 4.3166204497035936e-05, "loss": 2.2521, "step": 4723500 }, { "epoch": 13.67, "learning_rate": 4.3165480849388665e-05, "loss": 2.2698, "step": 4724000 }, { "epoch": 13.68, "learning_rate": 4.316475720174139e-05, "loss": 2.2551, "step": 4724500 }, { "epoch": 13.68, "learning_rate": 4.3164033554094116e-05, "loss": 2.2805, "step": 4725000 }, { "epoch": 13.68, "learning_rate": 4.316331135374213e-05, "loss": 2.2842, "step": 4725500 }, { "epoch": 13.68, "learning_rate": 4.3162587706094854e-05, "loss": 2.2861, "step": 4726000 }, { "epoch": 13.68, "learning_rate": 4.3161864058447576e-05, "loss": 2.2651, "step": 4726500 }, { "epoch": 13.68, "learning_rate": 4.31611404108003e-05, "loss": 2.3007, "step": 4727000 }, { "epoch": 13.68, "learning_rate": 4.3160418210448314e-05, "loss": 2.2813, "step": 4727500 }, { "epoch": 13.69, "learning_rate": 4.3159694562801036e-05, "loss": 2.2896, "step": 4728000 }, { "epoch": 13.69, "learning_rate": 4.3158970915153765e-05, "loss": 2.2784, "step": 4728500 }, { "epoch": 13.69, "learning_rate": 4.315824726750649e-05, "loss": 2.2884, "step": 4729000 }, { "epoch": 13.69, "learning_rate": 4.315752361985921e-05, "loss": 2.2673, "step": 4729500 }, { "epoch": 13.69, "learning_rate": 4.315679997221193e-05, "loss": 2.2774, "step": 4730000 }, { "epoch": 13.69, "learning_rate": 4.3156076324564654e-05, "loss": 2.2924, "step": 4730500 }, { "epoch": 13.69, "learning_rate": 4.315535412421267e-05, "loss": 2.2719, "step": 4731000 }, { "epoch": 13.7, "learning_rate": 4.315463047656539e-05, "loss": 2.2875, "step": 4731500 }, { "epoch": 13.7, "learning_rate": 4.315390682891812e-05, "loss": 2.2835, "step": 4732000 }, { "epoch": 13.7, "learning_rate": 4.315318318127084e-05, "loss": 2.2948, "step": 4732500 }, { "epoch": 13.7, "learning_rate": 4.3152460980918866e-05, "loss": 2.2831, "step": 4733000 }, { "epoch": 13.7, "learning_rate": 4.315173733327159e-05, "loss": 2.2815, "step": 4733500 }, { "epoch": 13.7, "learning_rate": 4.315101368562431e-05, "loss": 2.3022, "step": 4734000 }, { "epoch": 13.7, "learning_rate": 4.315029003797703e-05, "loss": 2.2708, "step": 4734500 }, { "epoch": 13.71, "learning_rate": 4.3149566390329755e-05, "loss": 2.282, "step": 4735000 }, { "epoch": 13.71, "learning_rate": 4.314884418997777e-05, "loss": 2.2828, "step": 4735500 }, { "epoch": 13.71, "learning_rate": 4.314812198962579e-05, "loss": 2.2654, "step": 4736000 }, { "epoch": 13.71, "learning_rate": 4.3147398341978515e-05, "loss": 2.2996, "step": 4736500 }, { "epoch": 13.71, "learning_rate": 4.314667614162653e-05, "loss": 2.2943, "step": 4737000 }, { "epoch": 13.71, "learning_rate": 4.3145953941274546e-05, "loss": 2.282, "step": 4737500 }, { "epoch": 13.71, "learning_rate": 4.314523029362727e-05, "loss": 2.3333, "step": 4738000 }, { "epoch": 13.72, "learning_rate": 4.314450664597999e-05, "loss": 2.2764, "step": 4738500 }, { "epoch": 13.72, "learning_rate": 4.314378299833271e-05, "loss": 2.2702, "step": 4739000 }, { "epoch": 13.72, "learning_rate": 4.314305935068544e-05, "loss": 2.2642, "step": 4739500 }, { "epoch": 13.72, "learning_rate": 4.3142335703038164e-05, "loss": 2.2801, "step": 4740000 }, { "epoch": 13.72, "learning_rate": 4.314161205539089e-05, "loss": 2.2725, "step": 4740500 }, { "epoch": 13.72, "learning_rate": 4.3140888407743615e-05, "loss": 2.289, "step": 4741000 }, { "epoch": 13.72, "learning_rate": 4.314016620739163e-05, "loss": 2.2723, "step": 4741500 }, { "epoch": 13.73, "learning_rate": 4.313944255974435e-05, "loss": 2.2683, "step": 4742000 }, { "epoch": 13.73, "learning_rate": 4.3138718912097075e-05, "loss": 2.2767, "step": 4742500 }, { "epoch": 13.73, "learning_rate": 4.31379952644498e-05, "loss": 2.2597, "step": 4743000 }, { "epoch": 13.73, "learning_rate": 4.313727161680252e-05, "loss": 2.2816, "step": 4743500 }, { "epoch": 13.73, "learning_rate": 4.313654796915524e-05, "loss": 2.2807, "step": 4744000 }, { "epoch": 13.73, "learning_rate": 4.3135824321507964e-05, "loss": 2.2817, "step": 4744500 }, { "epoch": 13.73, "learning_rate": 4.313510067386069e-05, "loss": 2.2639, "step": 4745000 }, { "epoch": 13.74, "learning_rate": 4.3134377026213415e-05, "loss": 2.2685, "step": 4745500 }, { "epoch": 13.74, "learning_rate": 4.313365337856614e-05, "loss": 2.2768, "step": 4746000 }, { "epoch": 13.74, "learning_rate": 4.313292973091886e-05, "loss": 2.2818, "step": 4746500 }, { "epoch": 13.74, "learning_rate": 4.313220608327159e-05, "loss": 2.2817, "step": 4747000 }, { "epoch": 13.74, "learning_rate": 4.313148243562431e-05, "loss": 2.2635, "step": 4747500 }, { "epoch": 13.74, "learning_rate": 4.313075878797703e-05, "loss": 2.293, "step": 4748000 }, { "epoch": 13.74, "learning_rate": 4.3130035140329755e-05, "loss": 2.2811, "step": 4748500 }, { "epoch": 13.75, "learning_rate": 4.312931149268248e-05, "loss": 2.278, "step": 4749000 }, { "epoch": 13.75, "learning_rate": 4.312858929233049e-05, "loss": 2.2819, "step": 4749500 }, { "epoch": 13.75, "learning_rate": 4.3127865644683215e-05, "loss": 2.2828, "step": 4750000 }, { "epoch": 13.75, "learning_rate": 4.312714344433124e-05, "loss": 2.274, "step": 4750500 }, { "epoch": 13.75, "learning_rate": 4.312641979668396e-05, "loss": 2.294, "step": 4751000 }, { "epoch": 13.75, "learning_rate": 4.312569614903668e-05, "loss": 2.273, "step": 4751500 }, { "epoch": 13.76, "learning_rate": 4.31249739486847e-05, "loss": 2.2733, "step": 4752000 }, { "epoch": 13.76, "learning_rate": 4.312425030103742e-05, "loss": 2.2773, "step": 4752500 }, { "epoch": 13.76, "learning_rate": 4.312352665339014e-05, "loss": 2.2769, "step": 4753000 }, { "epoch": 13.76, "learning_rate": 4.3122803005742864e-05, "loss": 2.2711, "step": 4753500 }, { "epoch": 13.76, "learning_rate": 4.312208080539089e-05, "loss": 2.2863, "step": 4754000 }, { "epoch": 13.76, "learning_rate": 4.312135715774361e-05, "loss": 2.2749, "step": 4754500 }, { "epoch": 13.76, "learning_rate": 4.312063351009634e-05, "loss": 2.2697, "step": 4755000 }, { "epoch": 13.77, "learning_rate": 4.311990986244906e-05, "loss": 2.2753, "step": 4755500 }, { "epoch": 13.77, "learning_rate": 4.3119187662097076e-05, "loss": 2.3028, "step": 4756000 }, { "epoch": 13.77, "learning_rate": 4.31184640144498e-05, "loss": 2.2789, "step": 4756500 }, { "epoch": 13.77, "learning_rate": 4.311774036680252e-05, "loss": 2.2895, "step": 4757000 }, { "epoch": 13.77, "learning_rate": 4.311701671915524e-05, "loss": 2.2687, "step": 4757500 }, { "epoch": 13.77, "learning_rate": 4.311629307150797e-05, "loss": 2.2722, "step": 4758000 }, { "epoch": 13.77, "learning_rate": 4.3115569423860694e-05, "loss": 2.2705, "step": 4758500 }, { "epoch": 13.78, "learning_rate": 4.3114845776213416e-05, "loss": 2.2789, "step": 4759000 }, { "epoch": 13.78, "learning_rate": 4.311412212856614e-05, "loss": 2.2529, "step": 4759500 }, { "epoch": 13.78, "learning_rate": 4.311339848091886e-05, "loss": 2.269, "step": 4760000 }, { "epoch": 13.78, "learning_rate": 4.311267483327158e-05, "loss": 2.2947, "step": 4760500 }, { "epoch": 13.78, "learning_rate": 4.3111951185624305e-05, "loss": 2.2744, "step": 4761000 }, { "epoch": 13.78, "learning_rate": 4.311122753797703e-05, "loss": 2.2682, "step": 4761500 }, { "epoch": 13.78, "learning_rate": 4.3110503890329756e-05, "loss": 2.2844, "step": 4762000 }, { "epoch": 13.79, "learning_rate": 4.310978024268248e-05, "loss": 2.2718, "step": 4762500 }, { "epoch": 13.79, "learning_rate": 4.31090565950352e-05, "loss": 2.2467, "step": 4763000 }, { "epoch": 13.79, "learning_rate": 4.310833439468322e-05, "loss": 2.2638, "step": 4763500 }, { "epoch": 13.79, "learning_rate": 4.3107610747035945e-05, "loss": 2.2879, "step": 4764000 }, { "epoch": 13.79, "learning_rate": 4.310688709938867e-05, "loss": 2.2993, "step": 4764500 }, { "epoch": 13.79, "learning_rate": 4.310616345174139e-05, "loss": 2.2701, "step": 4765000 }, { "epoch": 13.79, "learning_rate": 4.310543980409411e-05, "loss": 2.2782, "step": 4765500 }, { "epoch": 13.8, "learning_rate": 4.3104716156446834e-05, "loss": 2.2787, "step": 4766000 }, { "epoch": 13.8, "learning_rate": 4.3103992508799556e-05, "loss": 2.2668, "step": 4766500 }, { "epoch": 13.8, "learning_rate": 4.310326886115228e-05, "loss": 2.2668, "step": 4767000 }, { "epoch": 13.8, "learning_rate": 4.3102545213505e-05, "loss": 2.3055, "step": 4767500 }, { "epoch": 13.8, "learning_rate": 4.310182156585772e-05, "loss": 2.2658, "step": 4768000 }, { "epoch": 13.8, "learning_rate": 4.3101097918210445e-05, "loss": 2.2666, "step": 4768500 }, { "epoch": 13.8, "learning_rate": 4.3100374270563174e-05, "loss": 2.2869, "step": 4769000 }, { "epoch": 13.81, "learning_rate": 4.3099650622915897e-05, "loss": 2.2865, "step": 4769500 }, { "epoch": 13.81, "learning_rate": 4.309892697526862e-05, "loss": 2.2773, "step": 4770000 }, { "epoch": 13.81, "learning_rate": 4.309820332762135e-05, "loss": 2.2548, "step": 4770500 }, { "epoch": 13.81, "learning_rate": 4.309748112726936e-05, "loss": 2.281, "step": 4771000 }, { "epoch": 13.81, "learning_rate": 4.3096757479622086e-05, "loss": 2.2673, "step": 4771500 }, { "epoch": 13.81, "learning_rate": 4.30960352792701e-05, "loss": 2.277, "step": 4772000 }, { "epoch": 13.81, "learning_rate": 4.3095311631622823e-05, "loss": 2.2713, "step": 4772500 }, { "epoch": 13.82, "learning_rate": 4.3094587983975546e-05, "loss": 2.2749, "step": 4773000 }, { "epoch": 13.82, "learning_rate": 4.309386433632827e-05, "loss": 2.2809, "step": 4773500 }, { "epoch": 13.82, "learning_rate": 4.3093140688681e-05, "loss": 2.2914, "step": 4774000 }, { "epoch": 13.82, "learning_rate": 4.309241704103372e-05, "loss": 2.2798, "step": 4774500 }, { "epoch": 13.82, "learning_rate": 4.309169339338644e-05, "loss": 2.278, "step": 4775000 }, { "epoch": 13.82, "learning_rate": 4.3090969745739164e-05, "loss": 2.2696, "step": 4775500 }, { "epoch": 13.82, "learning_rate": 4.3090246098091886e-05, "loss": 2.2989, "step": 4776000 }, { "epoch": 13.83, "learning_rate": 4.3089522450444615e-05, "loss": 2.2729, "step": 4776500 }, { "epoch": 13.83, "learning_rate": 4.308880025009263e-05, "loss": 2.2848, "step": 4777000 }, { "epoch": 13.83, "learning_rate": 4.3088078049740646e-05, "loss": 2.2644, "step": 4777500 }, { "epoch": 13.83, "learning_rate": 4.3087354402093375e-05, "loss": 2.2915, "step": 4778000 }, { "epoch": 13.83, "learning_rate": 4.30866307544461e-05, "loss": 2.2862, "step": 4778500 }, { "epoch": 13.83, "learning_rate": 4.308590710679882e-05, "loss": 2.2745, "step": 4779000 }, { "epoch": 13.83, "learning_rate": 4.308518345915154e-05, "loss": 2.2697, "step": 4779500 }, { "epoch": 13.84, "learning_rate": 4.3084459811504264e-05, "loss": 2.2838, "step": 4780000 }, { "epoch": 13.84, "learning_rate": 4.3083736163856986e-05, "loss": 2.2866, "step": 4780500 }, { "epoch": 13.84, "learning_rate": 4.308301251620971e-05, "loss": 2.2737, "step": 4781000 }, { "epoch": 13.84, "learning_rate": 4.308228886856243e-05, "loss": 2.2625, "step": 4781500 }, { "epoch": 13.84, "learning_rate": 4.3081566668210446e-05, "loss": 2.2764, "step": 4782000 }, { "epoch": 13.84, "learning_rate": 4.3080843020563175e-05, "loss": 2.2582, "step": 4782500 }, { "epoch": 13.84, "learning_rate": 4.308012082021119e-05, "loss": 2.2824, "step": 4783000 }, { "epoch": 13.85, "learning_rate": 4.307939717256391e-05, "loss": 2.284, "step": 4783500 }, { "epoch": 13.85, "learning_rate": 4.3078673524916635e-05, "loss": 2.2747, "step": 4784000 }, { "epoch": 13.85, "learning_rate": 4.3077949877269364e-05, "loss": 2.2632, "step": 4784500 }, { "epoch": 13.85, "learning_rate": 4.3077226229622086e-05, "loss": 2.2926, "step": 4785000 }, { "epoch": 13.85, "learning_rate": 4.30765040292701e-05, "loss": 2.2694, "step": 4785500 }, { "epoch": 13.85, "learning_rate": 4.3075780381622824e-05, "loss": 2.2513, "step": 4786000 }, { "epoch": 13.85, "learning_rate": 4.3075056733975546e-05, "loss": 2.2738, "step": 4786500 }, { "epoch": 13.86, "learning_rate": 4.3074333086328275e-05, "loss": 2.2731, "step": 4787000 }, { "epoch": 13.86, "learning_rate": 4.3073609438681e-05, "loss": 2.2747, "step": 4787500 }, { "epoch": 13.86, "learning_rate": 4.307288579103372e-05, "loss": 2.2591, "step": 4788000 }, { "epoch": 13.86, "learning_rate": 4.307216214338644e-05, "loss": 2.2876, "step": 4788500 }, { "epoch": 13.86, "learning_rate": 4.307143994303446e-05, "loss": 2.2841, "step": 4789000 }, { "epoch": 13.86, "learning_rate": 4.307071629538718e-05, "loss": 2.2569, "step": 4789500 }, { "epoch": 13.87, "learning_rate": 4.30699926477399e-05, "loss": 2.2644, "step": 4790000 }, { "epoch": 13.87, "learning_rate": 4.3069269000092624e-05, "loss": 2.2704, "step": 4790500 }, { "epoch": 13.87, "learning_rate": 4.306854535244535e-05, "loss": 2.2846, "step": 4791000 }, { "epoch": 13.87, "learning_rate": 4.306782315209337e-05, "loss": 2.2811, "step": 4791500 }, { "epoch": 13.87, "learning_rate": 4.30670995044461e-05, "loss": 2.2646, "step": 4792000 }, { "epoch": 13.87, "learning_rate": 4.306637585679882e-05, "loss": 2.2795, "step": 4792500 }, { "epoch": 13.87, "learning_rate": 4.306565220915154e-05, "loss": 2.2669, "step": 4793000 }, { "epoch": 13.88, "learning_rate": 4.3064928561504265e-05, "loss": 2.2824, "step": 4793500 }, { "epoch": 13.88, "learning_rate": 4.306420636115228e-05, "loss": 2.2805, "step": 4794000 }, { "epoch": 13.88, "learning_rate": 4.3063482713505e-05, "loss": 2.2904, "step": 4794500 }, { "epoch": 13.88, "learning_rate": 4.3062759065857725e-05, "loss": 2.2574, "step": 4795000 }, { "epoch": 13.88, "learning_rate": 4.306203541821045e-05, "loss": 2.284, "step": 4795500 }, { "epoch": 13.88, "learning_rate": 4.3061311770563176e-05, "loss": 2.2997, "step": 4796000 }, { "epoch": 13.88, "learning_rate": 4.30605881229159e-05, "loss": 2.2746, "step": 4796500 }, { "epoch": 13.89, "learning_rate": 4.305986447526862e-05, "loss": 2.2854, "step": 4797000 }, { "epoch": 13.89, "learning_rate": 4.3059142274916636e-05, "loss": 2.2553, "step": 4797500 }, { "epoch": 13.89, "learning_rate": 4.305841862726936e-05, "loss": 2.2775, "step": 4798000 }, { "epoch": 13.89, "learning_rate": 4.305769497962208e-05, "loss": 2.2602, "step": 4798500 }, { "epoch": 13.89, "learning_rate": 4.305697133197481e-05, "loss": 2.2821, "step": 4799000 }, { "epoch": 13.89, "learning_rate": 4.305624768432753e-05, "loss": 2.2815, "step": 4799500 }, { "epoch": 13.89, "learning_rate": 4.3055524036680254e-05, "loss": 2.2674, "step": 4800000 }, { "epoch": 13.9, "learning_rate": 4.3054800389032976e-05, "loss": 2.2836, "step": 4800500 }, { "epoch": 13.9, "learning_rate": 4.30540767413857e-05, "loss": 2.2613, "step": 4801000 }, { "epoch": 13.9, "learning_rate": 4.305335454103372e-05, "loss": 2.2871, "step": 4801500 }, { "epoch": 13.9, "learning_rate": 4.3052632340681736e-05, "loss": 2.2545, "step": 4802000 }, { "epoch": 13.9, "learning_rate": 4.305190869303446e-05, "loss": 2.2903, "step": 4802500 }, { "epoch": 13.9, "learning_rate": 4.305118504538718e-05, "loss": 2.2926, "step": 4803000 }, { "epoch": 13.9, "learning_rate": 4.30504613977399e-05, "loss": 2.2936, "step": 4803500 }, { "epoch": 13.91, "learning_rate": 4.3049737750092625e-05, "loss": 2.2763, "step": 4804000 }, { "epoch": 13.91, "learning_rate": 4.3049014102445354e-05, "loss": 2.2746, "step": 4804500 }, { "epoch": 13.91, "learning_rate": 4.3048290454798076e-05, "loss": 2.285, "step": 4805000 }, { "epoch": 13.91, "learning_rate": 4.30475668071508e-05, "loss": 2.2706, "step": 4805500 }, { "epoch": 13.91, "learning_rate": 4.3046844606798814e-05, "loss": 2.2733, "step": 4806000 }, { "epoch": 13.91, "learning_rate": 4.304612240644683e-05, "loss": 2.2703, "step": 4806500 }, { "epoch": 13.91, "learning_rate": 4.304539875879956e-05, "loss": 2.2733, "step": 4807000 }, { "epoch": 13.92, "learning_rate": 4.304467511115228e-05, "loss": 2.2758, "step": 4807500 }, { "epoch": 13.92, "learning_rate": 4.3043951463505e-05, "loss": 2.2726, "step": 4808000 }, { "epoch": 13.92, "learning_rate": 4.3043227815857725e-05, "loss": 2.2802, "step": 4808500 }, { "epoch": 13.92, "learning_rate": 4.3042504168210455e-05, "loss": 2.2676, "step": 4809000 }, { "epoch": 13.92, "learning_rate": 4.304178196785847e-05, "loss": 2.2781, "step": 4809500 }, { "epoch": 13.92, "learning_rate": 4.304105832021119e-05, "loss": 2.2561, "step": 4810000 }, { "epoch": 13.92, "learning_rate": 4.3040334672563915e-05, "loss": 2.2709, "step": 4810500 }, { "epoch": 13.93, "learning_rate": 4.303961102491664e-05, "loss": 2.2696, "step": 4811000 }, { "epoch": 13.93, "learning_rate": 4.303888737726936e-05, "loss": 2.2925, "step": 4811500 }, { "epoch": 13.93, "learning_rate": 4.3038165176917375e-05, "loss": 2.2701, "step": 4812000 }, { "epoch": 13.93, "learning_rate": 4.3037441529270104e-05, "loss": 2.2899, "step": 4812500 }, { "epoch": 13.93, "learning_rate": 4.3036717881622826e-05, "loss": 2.3022, "step": 4813000 }, { "epoch": 13.93, "learning_rate": 4.303599423397555e-05, "loss": 2.291, "step": 4813500 }, { "epoch": 13.93, "learning_rate": 4.303527058632827e-05, "loss": 2.2791, "step": 4814000 }, { "epoch": 13.94, "learning_rate": 4.3034546938681e-05, "loss": 2.2668, "step": 4814500 }, { "epoch": 13.94, "learning_rate": 4.303382329103372e-05, "loss": 2.2997, "step": 4815000 }, { "epoch": 13.94, "learning_rate": 4.3033099643386444e-05, "loss": 2.275, "step": 4815500 }, { "epoch": 13.94, "learning_rate": 4.3032375995739166e-05, "loss": 2.2564, "step": 4816000 }, { "epoch": 13.94, "learning_rate": 4.303165234809189e-05, "loss": 2.2745, "step": 4816500 }, { "epoch": 13.94, "learning_rate": 4.303092870044461e-05, "loss": 2.2821, "step": 4817000 }, { "epoch": 13.94, "learning_rate": 4.3030206500092626e-05, "loss": 2.2778, "step": 4817500 }, { "epoch": 13.95, "learning_rate": 4.3029482852445355e-05, "loss": 2.2659, "step": 4818000 }, { "epoch": 13.95, "learning_rate": 4.302875920479808e-05, "loss": 2.2587, "step": 4818500 }, { "epoch": 13.95, "learning_rate": 4.30280355571508e-05, "loss": 2.2622, "step": 4819000 }, { "epoch": 13.95, "learning_rate": 4.3027313356798815e-05, "loss": 2.2939, "step": 4819500 }, { "epoch": 13.95, "learning_rate": 4.302658970915154e-05, "loss": 2.281, "step": 4820000 }, { "epoch": 13.95, "learning_rate": 4.302586895609485e-05, "loss": 2.27, "step": 4820500 }, { "epoch": 13.95, "learning_rate": 4.3025145308447575e-05, "loss": 2.2504, "step": 4821000 }, { "epoch": 13.96, "learning_rate": 4.30244216608003e-05, "loss": 2.2966, "step": 4821500 }, { "epoch": 13.96, "learning_rate": 4.3023698013153026e-05, "loss": 2.2679, "step": 4822000 }, { "epoch": 13.96, "learning_rate": 4.302297436550575e-05, "loss": 2.2972, "step": 4822500 }, { "epoch": 13.96, "learning_rate": 4.302225071785847e-05, "loss": 2.2832, "step": 4823000 }, { "epoch": 13.96, "learning_rate": 4.302152707021119e-05, "loss": 2.2686, "step": 4823500 }, { "epoch": 13.96, "learning_rate": 4.3020803422563915e-05, "loss": 2.2594, "step": 4824000 }, { "epoch": 13.96, "learning_rate": 4.302007977491664e-05, "loss": 2.272, "step": 4824500 }, { "epoch": 13.97, "learning_rate": 4.301935612726936e-05, "loss": 2.2712, "step": 4825000 }, { "epoch": 13.97, "learning_rate": 4.301863247962208e-05, "loss": 2.2631, "step": 4825500 }, { "epoch": 13.97, "learning_rate": 4.3017908831974804e-05, "loss": 2.2726, "step": 4826000 }, { "epoch": 13.97, "learning_rate": 4.3017186631622827e-05, "loss": 2.2735, "step": 4826500 }, { "epoch": 13.97, "learning_rate": 4.301646298397555e-05, "loss": 2.2888, "step": 4827000 }, { "epoch": 13.97, "learning_rate": 4.301573933632827e-05, "loss": 2.2658, "step": 4827500 }, { "epoch": 13.98, "learning_rate": 4.301501568868099e-05, "loss": 2.2752, "step": 4828000 }, { "epoch": 13.98, "learning_rate": 4.301429348832901e-05, "loss": 2.2508, "step": 4828500 }, { "epoch": 13.98, "learning_rate": 4.301356984068173e-05, "loss": 2.2607, "step": 4829000 }, { "epoch": 13.98, "learning_rate": 4.301284619303446e-05, "loss": 2.2677, "step": 4829500 }, { "epoch": 13.98, "learning_rate": 4.301212254538718e-05, "loss": 2.278, "step": 4830000 }, { "epoch": 13.98, "learning_rate": 4.3011398897739905e-05, "loss": 2.2704, "step": 4830500 }, { "epoch": 13.98, "learning_rate": 4.301067669738793e-05, "loss": 2.2717, "step": 4831000 }, { "epoch": 13.99, "learning_rate": 4.300995304974065e-05, "loss": 2.2851, "step": 4831500 }, { "epoch": 13.99, "learning_rate": 4.300922940209337e-05, "loss": 2.2581, "step": 4832000 }, { "epoch": 13.99, "learning_rate": 4.3008505754446094e-05, "loss": 2.2865, "step": 4832500 }, { "epoch": 13.99, "learning_rate": 4.3007782106798816e-05, "loss": 2.2853, "step": 4833000 }, { "epoch": 13.99, "learning_rate": 4.300705845915154e-05, "loss": 2.2573, "step": 4833500 }, { "epoch": 13.99, "learning_rate": 4.300633481150426e-05, "loss": 2.2787, "step": 4834000 }, { "epoch": 13.99, "learning_rate": 4.300561116385698e-05, "loss": 2.276, "step": 4834500 }, { "epoch": 14.0, "learning_rate": 4.3004888963505005e-05, "loss": 2.2563, "step": 4835000 }, { "epoch": 14.0, "learning_rate": 4.300416531585773e-05, "loss": 2.2761, "step": 4835500 }, { "epoch": 14.0, "learning_rate": 4.300344166821045e-05, "loss": 2.2709, "step": 4836000 }, { "epoch": 14.0, "learning_rate": 4.300271802056318e-05, "loss": 2.2776, "step": 4836500 }, { "epoch": 14.0, "eval_accuracy": 0.653478279659735, "eval_accuracy_mlm": 0.6164064549706573, "eval_accuracy_nsp": 0.8522890285206216, "eval_loss": 2.277855396270752, "eval_runtime": 330.582, "eval_samples_per_second": 1320.054, "eval_steps_per_second": 55.003, "step": 4836608 }, { "epoch": 14.0, "learning_rate": 4.30019943729159e-05, "loss": 2.2775, "step": 4837000 }, { "epoch": 14.0, "learning_rate": 4.300127072526862e-05, "loss": 2.2426, "step": 4837500 }, { "epoch": 14.0, "learning_rate": 4.300054997221193e-05, "loss": 2.2506, "step": 4838000 }, { "epoch": 14.01, "learning_rate": 4.2999826324564654e-05, "loss": 2.2419, "step": 4838500 }, { "epoch": 14.01, "learning_rate": 4.2999104124212676e-05, "loss": 2.2666, "step": 4839000 }, { "epoch": 14.01, "learning_rate": 4.29983804765654e-05, "loss": 2.2439, "step": 4839500 }, { "epoch": 14.01, "learning_rate": 4.2997658276213414e-05, "loss": 2.2649, "step": 4840000 }, { "epoch": 14.01, "learning_rate": 4.2996934628566136e-05, "loss": 2.2709, "step": 4840500 }, { "epoch": 14.01, "learning_rate": 4.299621098091886e-05, "loss": 2.2522, "step": 4841000 }, { "epoch": 14.01, "learning_rate": 4.299548733327158e-05, "loss": 2.2533, "step": 4841500 }, { "epoch": 14.02, "learning_rate": 4.29947636856243e-05, "loss": 2.2649, "step": 4842000 }, { "epoch": 14.02, "learning_rate": 4.299404003797703e-05, "loss": 2.2809, "step": 4842500 }, { "epoch": 14.02, "learning_rate": 4.2993316390329754e-05, "loss": 2.2559, "step": 4843000 }, { "epoch": 14.02, "learning_rate": 4.2992592742682476e-05, "loss": 2.2673, "step": 4843500 }, { "epoch": 14.02, "learning_rate": 4.29918690950352e-05, "loss": 2.2553, "step": 4844000 }, { "epoch": 14.02, "learning_rate": 4.299114544738793e-05, "loss": 2.2555, "step": 4844500 }, { "epoch": 14.02, "learning_rate": 4.299042179974065e-05, "loss": 2.2878, "step": 4845000 }, { "epoch": 14.03, "learning_rate": 4.298969815209337e-05, "loss": 2.2563, "step": 4845500 }, { "epoch": 14.03, "learning_rate": 4.2988974504446094e-05, "loss": 2.2662, "step": 4846000 }, { "epoch": 14.03, "learning_rate": 4.298825085679882e-05, "loss": 2.2632, "step": 4846500 }, { "epoch": 14.03, "learning_rate": 4.298752865644683e-05, "loss": 2.2494, "step": 4847000 }, { "epoch": 14.03, "learning_rate": 4.2986805008799554e-05, "loss": 2.2687, "step": 4847500 }, { "epoch": 14.03, "learning_rate": 4.2986081361152283e-05, "loss": 2.2364, "step": 4848000 }, { "epoch": 14.03, "learning_rate": 4.2985357713505006e-05, "loss": 2.2455, "step": 4848500 }, { "epoch": 14.04, "learning_rate": 4.298463406585773e-05, "loss": 2.2464, "step": 4849000 }, { "epoch": 14.04, "learning_rate": 4.2983911865505743e-05, "loss": 2.2709, "step": 4849500 }, { "epoch": 14.04, "learning_rate": 4.2983188217858466e-05, "loss": 2.2592, "step": 4850000 }, { "epoch": 14.04, "learning_rate": 4.298246457021119e-05, "loss": 2.2477, "step": 4850500 }, { "epoch": 14.04, "learning_rate": 4.2981743817154504e-05, "loss": 2.2437, "step": 4851000 }, { "epoch": 14.04, "learning_rate": 4.2981020169507226e-05, "loss": 2.2457, "step": 4851500 }, { "epoch": 14.04, "learning_rate": 4.2980296521859955e-05, "loss": 2.2576, "step": 4852000 }, { "epoch": 14.05, "learning_rate": 4.297957287421268e-05, "loss": 2.2443, "step": 4852500 }, { "epoch": 14.05, "learning_rate": 4.29788492265654e-05, "loss": 2.2281, "step": 4853000 }, { "epoch": 14.05, "learning_rate": 4.297812557891812e-05, "loss": 2.2646, "step": 4853500 }, { "epoch": 14.05, "learning_rate": 4.2977401931270844e-05, "loss": 2.2407, "step": 4854000 }, { "epoch": 14.05, "learning_rate": 4.2976678283623566e-05, "loss": 2.2483, "step": 4854500 }, { "epoch": 14.05, "learning_rate": 4.297595463597629e-05, "loss": 2.2245, "step": 4855000 }, { "epoch": 14.05, "learning_rate": 4.297523098832901e-05, "loss": 2.2487, "step": 4855500 }, { "epoch": 14.06, "learning_rate": 4.297450734068173e-05, "loss": 2.2558, "step": 4856000 }, { "epoch": 14.06, "learning_rate": 4.297378369303446e-05, "loss": 2.237, "step": 4856500 }, { "epoch": 14.06, "learning_rate": 4.2973060045387184e-05, "loss": 2.2709, "step": 4857000 }, { "epoch": 14.06, "learning_rate": 4.29723378450352e-05, "loss": 2.2585, "step": 4857500 }, { "epoch": 14.06, "learning_rate": 4.2971615644683215e-05, "loss": 2.2751, "step": 4858000 }, { "epoch": 14.06, "learning_rate": 4.297089199703594e-05, "loss": 2.2553, "step": 4858500 }, { "epoch": 14.06, "learning_rate": 4.297016834938866e-05, "loss": 2.2256, "step": 4859000 }, { "epoch": 14.07, "learning_rate": 4.296944470174139e-05, "loss": 2.2747, "step": 4859500 }, { "epoch": 14.07, "learning_rate": 4.296872105409411e-05, "loss": 2.2526, "step": 4860000 }, { "epoch": 14.07, "learning_rate": 4.296799740644683e-05, "loss": 2.2707, "step": 4860500 }, { "epoch": 14.07, "learning_rate": 4.2967275206094855e-05, "loss": 2.26, "step": 4861000 }, { "epoch": 14.07, "learning_rate": 4.296655155844758e-05, "loss": 2.2739, "step": 4861500 }, { "epoch": 14.07, "learning_rate": 4.29658279108003e-05, "loss": 2.2471, "step": 4862000 }, { "epoch": 14.07, "learning_rate": 4.296510426315302e-05, "loss": 2.2587, "step": 4862500 }, { "epoch": 14.08, "learning_rate": 4.2964380615505744e-05, "loss": 2.2462, "step": 4863000 }, { "epoch": 14.08, "learning_rate": 4.2963656967858466e-05, "loss": 2.2616, "step": 4863500 }, { "epoch": 14.08, "learning_rate": 4.296293332021119e-05, "loss": 2.2446, "step": 4864000 }, { "epoch": 14.08, "learning_rate": 4.296220967256391e-05, "loss": 2.2519, "step": 4864500 }, { "epoch": 14.08, "learning_rate": 4.296148602491663e-05, "loss": 2.2551, "step": 4865000 }, { "epoch": 14.08, "learning_rate": 4.2960763824564655e-05, "loss": 2.2667, "step": 4865500 }, { "epoch": 14.09, "learning_rate": 4.296004017691738e-05, "loss": 2.274, "step": 4866000 }, { "epoch": 14.09, "learning_rate": 4.29593165292701e-05, "loss": 2.2634, "step": 4866500 }, { "epoch": 14.09, "learning_rate": 4.295859288162283e-05, "loss": 2.2542, "step": 4867000 }, { "epoch": 14.09, "learning_rate": 4.295786923397555e-05, "loss": 2.2569, "step": 4867500 }, { "epoch": 14.09, "learning_rate": 4.2957145586328273e-05, "loss": 2.2549, "step": 4868000 }, { "epoch": 14.09, "learning_rate": 4.295642338597629e-05, "loss": 2.2673, "step": 4868500 }, { "epoch": 14.09, "learning_rate": 4.295569973832901e-05, "loss": 2.271, "step": 4869000 }, { "epoch": 14.1, "learning_rate": 4.2954976090681733e-05, "loss": 2.2398, "step": 4869500 }, { "epoch": 14.1, "learning_rate": 4.295425244303446e-05, "loss": 2.2488, "step": 4870000 }, { "epoch": 14.1, "learning_rate": 4.2953528795387185e-05, "loss": 2.2727, "step": 4870500 }, { "epoch": 14.1, "learning_rate": 4.295280514773991e-05, "loss": 2.2654, "step": 4871000 }, { "epoch": 14.1, "learning_rate": 4.295208728927381e-05, "loss": 2.2834, "step": 4871500 }, { "epoch": 14.1, "learning_rate": 4.2951365088921825e-05, "loss": 2.2653, "step": 4872000 }, { "epoch": 14.1, "learning_rate": 4.295064144127455e-05, "loss": 2.2705, "step": 4872500 }, { "epoch": 14.11, "learning_rate": 4.294991779362727e-05, "loss": 2.2383, "step": 4873000 }, { "epoch": 14.11, "learning_rate": 4.294919414597999e-05, "loss": 2.2782, "step": 4873500 }, { "epoch": 14.11, "learning_rate": 4.2948470498332714e-05, "loss": 2.2523, "step": 4874000 }, { "epoch": 14.11, "learning_rate": 4.2947746850685436e-05, "loss": 2.2494, "step": 4874500 }, { "epoch": 14.11, "learning_rate": 4.2947023203038165e-05, "loss": 2.2346, "step": 4875000 }, { "epoch": 14.11, "learning_rate": 4.294629955539089e-05, "loss": 2.2436, "step": 4875500 }, { "epoch": 14.11, "learning_rate": 4.294557590774361e-05, "loss": 2.2569, "step": 4876000 }, { "epoch": 14.12, "learning_rate": 4.294485226009634e-05, "loss": 2.2736, "step": 4876500 }, { "epoch": 14.12, "learning_rate": 4.294412861244906e-05, "loss": 2.2727, "step": 4877000 }, { "epoch": 14.12, "learning_rate": 4.294340496480178e-05, "loss": 2.2461, "step": 4877500 }, { "epoch": 14.12, "learning_rate": 4.2942681317154505e-05, "loss": 2.2486, "step": 4878000 }, { "epoch": 14.12, "learning_rate": 4.294195766950723e-05, "loss": 2.2639, "step": 4878500 }, { "epoch": 14.12, "learning_rate": 4.294123402185995e-05, "loss": 2.2661, "step": 4879000 }, { "epoch": 14.12, "learning_rate": 4.294051037421267e-05, "loss": 2.2598, "step": 4879500 }, { "epoch": 14.13, "learning_rate": 4.2939786726565394e-05, "loss": 2.2551, "step": 4880000 }, { "epoch": 14.13, "learning_rate": 4.2939063078918116e-05, "loss": 2.2608, "step": 4880500 }, { "epoch": 14.13, "learning_rate": 4.293833943127084e-05, "loss": 2.2422, "step": 4881000 }, { "epoch": 14.13, "learning_rate": 4.293761723091886e-05, "loss": 2.257, "step": 4881500 }, { "epoch": 14.13, "learning_rate": 4.293689358327159e-05, "loss": 2.2635, "step": 4882000 }, { "epoch": 14.13, "learning_rate": 4.293616993562431e-05, "loss": 2.2609, "step": 4882500 }, { "epoch": 14.13, "learning_rate": 4.2935446287977034e-05, "loss": 2.2586, "step": 4883000 }, { "epoch": 14.14, "learning_rate": 4.2934722640329757e-05, "loss": 2.2747, "step": 4883500 }, { "epoch": 14.14, "learning_rate": 4.293399899268248e-05, "loss": 2.2482, "step": 4884000 }, { "epoch": 14.14, "learning_rate": 4.29332753450352e-05, "loss": 2.2633, "step": 4884500 }, { "epoch": 14.14, "learning_rate": 4.293255169738792e-05, "loss": 2.2682, "step": 4885000 }, { "epoch": 14.14, "learning_rate": 4.2931828049740646e-05, "loss": 2.2665, "step": 4885500 }, { "epoch": 14.14, "learning_rate": 4.293110440209337e-05, "loss": 2.2612, "step": 4886000 }, { "epoch": 14.14, "learning_rate": 4.293038075444609e-05, "loss": 2.2512, "step": 4886500 }, { "epoch": 14.15, "learning_rate": 4.292965710679881e-05, "loss": 2.2718, "step": 4887000 }, { "epoch": 14.15, "learning_rate": 4.292893345915154e-05, "loss": 2.2785, "step": 4887500 }, { "epoch": 14.15, "learning_rate": 4.292821125879956e-05, "loss": 2.2659, "step": 4888000 }, { "epoch": 14.15, "learning_rate": 4.292748761115228e-05, "loss": 2.2646, "step": 4888500 }, { "epoch": 14.15, "learning_rate": 4.2926763963505e-05, "loss": 2.2759, "step": 4889000 }, { "epoch": 14.15, "learning_rate": 4.292604031585773e-05, "loss": 2.252, "step": 4889500 }, { "epoch": 14.15, "learning_rate": 4.292531666821045e-05, "loss": 2.2559, "step": 4890000 }, { "epoch": 14.16, "learning_rate": 4.292459446785847e-05, "loss": 2.2335, "step": 4890500 }, { "epoch": 14.16, "learning_rate": 4.292387082021119e-05, "loss": 2.2609, "step": 4891000 }, { "epoch": 14.16, "learning_rate": 4.292314717256391e-05, "loss": 2.2525, "step": 4891500 }, { "epoch": 14.16, "learning_rate": 4.292242352491664e-05, "loss": 2.2755, "step": 4892000 }, { "epoch": 14.16, "learning_rate": 4.2921699877269364e-05, "loss": 2.2321, "step": 4892500 }, { "epoch": 14.16, "learning_rate": 4.2920976229622086e-05, "loss": 2.2729, "step": 4893000 }, { "epoch": 14.16, "learning_rate": 4.292025258197481e-05, "loss": 2.2363, "step": 4893500 }, { "epoch": 14.17, "learning_rate": 4.2919530381622824e-05, "loss": 2.2611, "step": 4894000 }, { "epoch": 14.17, "learning_rate": 4.2918806733975546e-05, "loss": 2.2463, "step": 4894500 }, { "epoch": 14.17, "learning_rate": 4.291808308632827e-05, "loss": 2.2788, "step": 4895000 }, { "epoch": 14.17, "learning_rate": 4.291735943868099e-05, "loss": 2.2536, "step": 4895500 }, { "epoch": 14.17, "learning_rate": 4.291663579103371e-05, "loss": 2.2386, "step": 4896000 }, { "epoch": 14.17, "learning_rate": 4.2915913590681735e-05, "loss": 2.2631, "step": 4896500 }, { "epoch": 14.17, "learning_rate": 4.291519139032976e-05, "loss": 2.2716, "step": 4897000 }, { "epoch": 14.18, "learning_rate": 4.291446774268248e-05, "loss": 2.2568, "step": 4897500 }, { "epoch": 14.18, "learning_rate": 4.29137440950352e-05, "loss": 2.2518, "step": 4898000 }, { "epoch": 14.18, "learning_rate": 4.2913020447387924e-05, "loss": 2.2475, "step": 4898500 }, { "epoch": 14.18, "learning_rate": 4.291229824703594e-05, "loss": 2.2482, "step": 4899000 }, { "epoch": 14.18, "learning_rate": 4.291157459938867e-05, "loss": 2.2677, "step": 4899500 }, { "epoch": 14.18, "learning_rate": 4.291085095174139e-05, "loss": 2.267, "step": 4900000 }, { "epoch": 14.18, "learning_rate": 4.2910128751389406e-05, "loss": 2.242, "step": 4900500 }, { "epoch": 14.19, "learning_rate": 4.290940510374213e-05, "loss": 2.2446, "step": 4901000 }, { "epoch": 14.19, "learning_rate": 4.290868145609485e-05, "loss": 2.2559, "step": 4901500 }, { "epoch": 14.19, "learning_rate": 4.290795780844757e-05, "loss": 2.2736, "step": 4902000 }, { "epoch": 14.19, "learning_rate": 4.2907234160800295e-05, "loss": 2.2744, "step": 4902500 }, { "epoch": 14.19, "learning_rate": 4.290651051315302e-05, "loss": 2.2706, "step": 4903000 }, { "epoch": 14.19, "learning_rate": 4.290578686550574e-05, "loss": 2.2342, "step": 4903500 }, { "epoch": 14.2, "learning_rate": 4.290506321785847e-05, "loss": 2.2524, "step": 4904000 }, { "epoch": 14.2, "learning_rate": 4.290433957021119e-05, "loss": 2.2515, "step": 4904500 }, { "epoch": 14.2, "learning_rate": 4.290361592256392e-05, "loss": 2.2523, "step": 4905000 }, { "epoch": 14.2, "learning_rate": 4.290289227491664e-05, "loss": 2.2821, "step": 4905500 }, { "epoch": 14.2, "learning_rate": 4.2902168627269365e-05, "loss": 2.2562, "step": 4906000 }, { "epoch": 14.2, "learning_rate": 4.290144497962209e-05, "loss": 2.2786, "step": 4906500 }, { "epoch": 14.2, "learning_rate": 4.29007227792701e-05, "loss": 2.2454, "step": 4907000 }, { "epoch": 14.21, "learning_rate": 4.2899999131622825e-05, "loss": 2.2554, "step": 4907500 }, { "epoch": 14.21, "learning_rate": 4.289927548397555e-05, "loss": 2.2519, "step": 4908000 }, { "epoch": 14.21, "learning_rate": 4.289855328362357e-05, "loss": 2.2788, "step": 4908500 }, { "epoch": 14.21, "learning_rate": 4.2897831083271585e-05, "loss": 2.2629, "step": 4909000 }, { "epoch": 14.21, "learning_rate": 4.289710743562431e-05, "loss": 2.2675, "step": 4909500 }, { "epoch": 14.21, "learning_rate": 4.289638378797703e-05, "loss": 2.2591, "step": 4910000 }, { "epoch": 14.21, "learning_rate": 4.289566014032975e-05, "loss": 2.2465, "step": 4910500 }, { "epoch": 14.22, "learning_rate": 4.2894936492682474e-05, "loss": 2.2797, "step": 4911000 }, { "epoch": 14.22, "learning_rate": 4.2894212845035196e-05, "loss": 2.2561, "step": 4911500 }, { "epoch": 14.22, "learning_rate": 4.2893489197387925e-05, "loss": 2.2525, "step": 4912000 }, { "epoch": 14.22, "learning_rate": 4.289276554974065e-05, "loss": 2.2678, "step": 4912500 }, { "epoch": 14.22, "learning_rate": 4.289204334938867e-05, "loss": 2.2369, "step": 4913000 }, { "epoch": 14.22, "learning_rate": 4.289131970174139e-05, "loss": 2.2724, "step": 4913500 }, { "epoch": 14.22, "learning_rate": 4.289059750138941e-05, "loss": 2.2833, "step": 4914000 }, { "epoch": 14.23, "learning_rate": 4.288987385374213e-05, "loss": 2.2549, "step": 4914500 }, { "epoch": 14.23, "learning_rate": 4.288915020609485e-05, "loss": 2.2486, "step": 4915000 }, { "epoch": 14.23, "learning_rate": 4.2888426558447574e-05, "loss": 2.2322, "step": 4915500 }, { "epoch": 14.23, "learning_rate": 4.2887702910800296e-05, "loss": 2.2576, "step": 4916000 }, { "epoch": 14.23, "learning_rate": 4.288697926315302e-05, "loss": 2.2663, "step": 4916500 }, { "epoch": 14.23, "learning_rate": 4.288625561550574e-05, "loss": 2.2626, "step": 4917000 }, { "epoch": 14.23, "learning_rate": 4.288553196785847e-05, "loss": 2.2496, "step": 4917500 }, { "epoch": 14.24, "learning_rate": 4.288480832021119e-05, "loss": 2.2683, "step": 4918000 }, { "epoch": 14.24, "learning_rate": 4.288408611985921e-05, "loss": 2.2781, "step": 4918500 }, { "epoch": 14.24, "learning_rate": 4.288336247221193e-05, "loss": 2.2616, "step": 4919000 }, { "epoch": 14.24, "learning_rate": 4.288263882456466e-05, "loss": 2.2706, "step": 4919500 }, { "epoch": 14.24, "learning_rate": 4.288191517691738e-05, "loss": 2.2512, "step": 4920000 }, { "epoch": 14.24, "learning_rate": 4.2881192976565396e-05, "loss": 2.2431, "step": 4920500 }, { "epoch": 14.24, "learning_rate": 4.288046932891812e-05, "loss": 2.2734, "step": 4921000 }, { "epoch": 14.25, "learning_rate": 4.287974712856614e-05, "loss": 2.2729, "step": 4921500 }, { "epoch": 14.25, "learning_rate": 4.287902348091886e-05, "loss": 2.2623, "step": 4922000 }, { "epoch": 14.25, "learning_rate": 4.2878299833271586e-05, "loss": 2.2641, "step": 4922500 }, { "epoch": 14.25, "learning_rate": 4.287757618562431e-05, "loss": 2.2766, "step": 4923000 }, { "epoch": 14.25, "learning_rate": 4.287685253797703e-05, "loss": 2.2703, "step": 4923500 }, { "epoch": 14.25, "learning_rate": 4.287612889032975e-05, "loss": 2.2897, "step": 4924000 }, { "epoch": 14.25, "learning_rate": 4.2875405242682474e-05, "loss": 2.2586, "step": 4924500 }, { "epoch": 14.26, "learning_rate": 4.28746815950352e-05, "loss": 2.265, "step": 4925000 }, { "epoch": 14.26, "learning_rate": 4.287395794738792e-05, "loss": 2.2629, "step": 4925500 }, { "epoch": 14.26, "learning_rate": 4.287323429974064e-05, "loss": 2.2591, "step": 4926000 }, { "epoch": 14.26, "learning_rate": 4.287251065209337e-05, "loss": 2.3009, "step": 4926500 }, { "epoch": 14.26, "learning_rate": 4.287178700444609e-05, "loss": 2.2629, "step": 4927000 }, { "epoch": 14.26, "learning_rate": 4.287106335679882e-05, "loss": 2.2664, "step": 4927500 }, { "epoch": 14.26, "learning_rate": 4.2870339709151544e-05, "loss": 2.263, "step": 4928000 }, { "epoch": 14.27, "learning_rate": 4.2869616061504266e-05, "loss": 2.2464, "step": 4928500 }, { "epoch": 14.27, "learning_rate": 4.286889241385699e-05, "loss": 2.2611, "step": 4929000 }, { "epoch": 14.27, "learning_rate": 4.2868170213505004e-05, "loss": 2.2501, "step": 4929500 }, { "epoch": 14.27, "learning_rate": 4.2867446565857726e-05, "loss": 2.2341, "step": 4930000 }, { "epoch": 14.27, "learning_rate": 4.286672291821045e-05, "loss": 2.2756, "step": 4930500 }, { "epoch": 14.27, "learning_rate": 4.286599927056317e-05, "loss": 2.2666, "step": 4931000 }, { "epoch": 14.27, "learning_rate": 4.286527562291589e-05, "loss": 2.2614, "step": 4931500 }, { "epoch": 14.28, "learning_rate": 4.2864553422563915e-05, "loss": 2.2504, "step": 4932000 }, { "epoch": 14.28, "learning_rate": 4.286382977491664e-05, "loss": 2.261, "step": 4932500 }, { "epoch": 14.28, "learning_rate": 4.286310612726936e-05, "loss": 2.2912, "step": 4933000 }, { "epoch": 14.28, "learning_rate": 4.286238247962208e-05, "loss": 2.2786, "step": 4933500 }, { "epoch": 14.28, "learning_rate": 4.28616602792701e-05, "loss": 2.2671, "step": 4934000 }, { "epoch": 14.28, "learning_rate": 4.2860936631622826e-05, "loss": 2.2422, "step": 4934500 }, { "epoch": 14.28, "learning_rate": 4.286021298397555e-05, "loss": 2.2527, "step": 4935000 }, { "epoch": 14.29, "learning_rate": 4.285948933632827e-05, "loss": 2.2785, "step": 4935500 }, { "epoch": 14.29, "learning_rate": 4.2858765688681e-05, "loss": 2.2773, "step": 4936000 }, { "epoch": 14.29, "learning_rate": 4.285804204103372e-05, "loss": 2.2619, "step": 4936500 }, { "epoch": 14.29, "learning_rate": 4.285731984068174e-05, "loss": 2.2637, "step": 4937000 }, { "epoch": 14.29, "learning_rate": 4.285659764032975e-05, "loss": 2.2476, "step": 4937500 }, { "epoch": 14.29, "learning_rate": 4.285587543997777e-05, "loss": 2.2484, "step": 4938000 }, { "epoch": 14.29, "learning_rate": 4.28551517923305e-05, "loss": 2.2544, "step": 4938500 }, { "epoch": 14.3, "learning_rate": 4.285442814468322e-05, "loss": 2.2723, "step": 4939000 }, { "epoch": 14.3, "learning_rate": 4.285370449703594e-05, "loss": 2.2459, "step": 4939500 }, { "epoch": 14.3, "learning_rate": 4.2852980849388664e-05, "loss": 2.2866, "step": 4940000 }, { "epoch": 14.3, "learning_rate": 4.2852257201741386e-05, "loss": 2.2489, "step": 4940500 }, { "epoch": 14.3, "learning_rate": 4.285153355409411e-05, "loss": 2.245, "step": 4941000 }, { "epoch": 14.3, "learning_rate": 4.285080990644683e-05, "loss": 2.2825, "step": 4941500 }, { "epoch": 14.31, "learning_rate": 4.285008770609485e-05, "loss": 2.2652, "step": 4942000 }, { "epoch": 14.31, "learning_rate": 4.2849364058447576e-05, "loss": 2.2722, "step": 4942500 }, { "epoch": 14.31, "learning_rate": 4.28486404108003e-05, "loss": 2.2555, "step": 4943000 }, { "epoch": 14.31, "learning_rate": 4.284791676315302e-05, "loss": 2.2733, "step": 4943500 }, { "epoch": 14.31, "learning_rate": 4.284719311550575e-05, "loss": 2.2719, "step": 4944000 }, { "epoch": 14.31, "learning_rate": 4.284646946785847e-05, "loss": 2.2711, "step": 4944500 }, { "epoch": 14.31, "learning_rate": 4.2845745820211193e-05, "loss": 2.2535, "step": 4945000 }, { "epoch": 14.32, "learning_rate": 4.2845022172563916e-05, "loss": 2.2521, "step": 4945500 }, { "epoch": 14.32, "learning_rate": 4.284429852491664e-05, "loss": 2.2566, "step": 4946000 }, { "epoch": 14.32, "learning_rate": 4.2843576324564653e-05, "loss": 2.2484, "step": 4946500 }, { "epoch": 14.32, "learning_rate": 4.2842852676917376e-05, "loss": 2.264, "step": 4947000 }, { "epoch": 14.32, "learning_rate": 4.28421290292701e-05, "loss": 2.2547, "step": 4947500 }, { "epoch": 14.32, "learning_rate": 4.284140538162282e-05, "loss": 2.2833, "step": 4948000 }, { "epoch": 14.32, "learning_rate": 4.284068173397555e-05, "loss": 2.2675, "step": 4948500 }, { "epoch": 14.33, "learning_rate": 4.283996098091886e-05, "loss": 2.2549, "step": 4949000 }, { "epoch": 14.33, "learning_rate": 4.283923733327159e-05, "loss": 2.2659, "step": 4949500 }, { "epoch": 14.33, "learning_rate": 4.283851368562431e-05, "loss": 2.2447, "step": 4950000 }, { "epoch": 14.33, "learning_rate": 4.283779003797703e-05, "loss": 2.2558, "step": 4950500 }, { "epoch": 14.33, "learning_rate": 4.2837066390329754e-05, "loss": 2.2728, "step": 4951000 }, { "epoch": 14.33, "learning_rate": 4.2836342742682476e-05, "loss": 2.2641, "step": 4951500 }, { "epoch": 14.33, "learning_rate": 4.28356190950352e-05, "loss": 2.2536, "step": 4952000 }, { "epoch": 14.34, "learning_rate": 4.283489689468322e-05, "loss": 2.2596, "step": 4952500 }, { "epoch": 14.34, "learning_rate": 4.283417324703594e-05, "loss": 2.2655, "step": 4953000 }, { "epoch": 14.34, "learning_rate": 4.2833449599388665e-05, "loss": 2.257, "step": 4953500 }, { "epoch": 14.34, "learning_rate": 4.283272595174139e-05, "loss": 2.2724, "step": 4954000 }, { "epoch": 14.34, "learning_rate": 4.283200230409411e-05, "loss": 2.2617, "step": 4954500 }, { "epoch": 14.34, "learning_rate": 4.283127865644683e-05, "loss": 2.2337, "step": 4955000 }, { "epoch": 14.34, "learning_rate": 4.2830555008799554e-05, "loss": 2.2571, "step": 4955500 }, { "epoch": 14.35, "learning_rate": 4.2829831361152276e-05, "loss": 2.2746, "step": 4956000 }, { "epoch": 14.35, "learning_rate": 4.28291091608003e-05, "loss": 2.2712, "step": 4956500 }, { "epoch": 14.35, "learning_rate": 4.282838551315303e-05, "loss": 2.2425, "step": 4957000 }, { "epoch": 14.35, "learning_rate": 4.282766186550575e-05, "loss": 2.2694, "step": 4957500 }, { "epoch": 14.35, "learning_rate": 4.282693821785847e-05, "loss": 2.2408, "step": 4958000 }, { "epoch": 14.35, "learning_rate": 4.2826214570211194e-05, "loss": 2.257, "step": 4958500 }, { "epoch": 14.35, "learning_rate": 4.2825490922563917e-05, "loss": 2.257, "step": 4959000 }, { "epoch": 14.36, "learning_rate": 4.282476727491664e-05, "loss": 2.2935, "step": 4959500 }, { "epoch": 14.36, "learning_rate": 4.282404362726936e-05, "loss": 2.2777, "step": 4960000 }, { "epoch": 14.36, "learning_rate": 4.282331997962208e-05, "loss": 2.2742, "step": 4960500 }, { "epoch": 14.36, "learning_rate": 4.28225977792701e-05, "loss": 2.2781, "step": 4961000 }, { "epoch": 14.36, "learning_rate": 4.282187413162283e-05, "loss": 2.2684, "step": 4961500 }, { "epoch": 14.36, "learning_rate": 4.282115048397555e-05, "loss": 2.2671, "step": 4962000 }, { "epoch": 14.36, "learning_rate": 4.282042683632827e-05, "loss": 2.2783, "step": 4962500 }, { "epoch": 14.37, "learning_rate": 4.2819703188680994e-05, "loss": 2.2531, "step": 4963000 }, { "epoch": 14.37, "learning_rate": 4.281897954103372e-05, "loss": 2.2707, "step": 4963500 }, { "epoch": 14.37, "learning_rate": 4.281825734068173e-05, "loss": 2.2549, "step": 4964000 }, { "epoch": 14.37, "learning_rate": 4.281753369303446e-05, "loss": 2.2475, "step": 4964500 }, { "epoch": 14.37, "learning_rate": 4.2816810045387184e-05, "loss": 2.2705, "step": 4965000 }, { "epoch": 14.37, "learning_rate": 4.28160878450352e-05, "loss": 2.2639, "step": 4965500 }, { "epoch": 14.37, "learning_rate": 4.281536419738793e-05, "loss": 2.2727, "step": 4966000 }, { "epoch": 14.38, "learning_rate": 4.281464054974065e-05, "loss": 2.2459, "step": 4966500 }, { "epoch": 14.38, "learning_rate": 4.281391690209337e-05, "loss": 2.2606, "step": 4967000 }, { "epoch": 14.38, "learning_rate": 4.2813193254446095e-05, "loss": 2.2473, "step": 4967500 }, { "epoch": 14.38, "learning_rate": 4.281246960679882e-05, "loss": 2.2509, "step": 4968000 }, { "epoch": 14.38, "learning_rate": 4.281174595915154e-05, "loss": 2.2198, "step": 4968500 }, { "epoch": 14.38, "learning_rate": 4.281102231150426e-05, "loss": 2.2572, "step": 4969000 }, { "epoch": 14.38, "learning_rate": 4.2810298663856984e-05, "loss": 2.2576, "step": 4969500 }, { "epoch": 14.39, "learning_rate": 4.2809575016209706e-05, "loss": 2.2672, "step": 4970000 }, { "epoch": 14.39, "learning_rate": 4.280885136856243e-05, "loss": 2.2527, "step": 4970500 }, { "epoch": 14.39, "learning_rate": 4.280812772091515e-05, "loss": 2.2496, "step": 4971000 }, { "epoch": 14.39, "learning_rate": 4.280740407326788e-05, "loss": 2.2624, "step": 4971500 }, { "epoch": 14.39, "learning_rate": 4.28066804256206e-05, "loss": 2.287, "step": 4972000 }, { "epoch": 14.39, "learning_rate": 4.2805956777973324e-05, "loss": 2.2787, "step": 4972500 }, { "epoch": 14.39, "learning_rate": 4.280523313032605e-05, "loss": 2.2601, "step": 4973000 }, { "epoch": 14.4, "learning_rate": 4.2804509482678775e-05, "loss": 2.2694, "step": 4973500 }, { "epoch": 14.4, "learning_rate": 4.280378728232679e-05, "loss": 2.2786, "step": 4974000 }, { "epoch": 14.4, "learning_rate": 4.280306363467951e-05, "loss": 2.2378, "step": 4974500 }, { "epoch": 14.4, "learning_rate": 4.280234143432753e-05, "loss": 2.2451, "step": 4975000 }, { "epoch": 14.4, "learning_rate": 4.280161778668025e-05, "loss": 2.2614, "step": 4975500 }, { "epoch": 14.4, "learning_rate": 4.280089558632827e-05, "loss": 2.2539, "step": 4976000 }, { "epoch": 14.4, "learning_rate": 4.2800171938680995e-05, "loss": 2.2728, "step": 4976500 }, { "epoch": 14.41, "learning_rate": 4.279944829103372e-05, "loss": 2.2882, "step": 4977000 }, { "epoch": 14.41, "learning_rate": 4.279872464338644e-05, "loss": 2.2469, "step": 4977500 }, { "epoch": 14.41, "learning_rate": 4.2798002443034455e-05, "loss": 2.2695, "step": 4978000 }, { "epoch": 14.41, "learning_rate": 4.279727879538718e-05, "loss": 2.2302, "step": 4978500 }, { "epoch": 14.41, "learning_rate": 4.2796555147739907e-05, "loss": 2.2731, "step": 4979000 }, { "epoch": 14.41, "learning_rate": 4.279583150009263e-05, "loss": 2.2492, "step": 4979500 }, { "epoch": 14.42, "learning_rate": 4.279510785244535e-05, "loss": 2.2774, "step": 4980000 }, { "epoch": 14.42, "learning_rate": 4.279438420479808e-05, "loss": 2.2572, "step": 4980500 }, { "epoch": 14.42, "learning_rate": 4.27936605571508e-05, "loss": 2.245, "step": 4981000 }, { "epoch": 14.42, "learning_rate": 4.2792936909503524e-05, "loss": 2.2688, "step": 4981500 }, { "epoch": 14.42, "learning_rate": 4.279221326185625e-05, "loss": 2.2925, "step": 4982000 }, { "epoch": 14.42, "learning_rate": 4.279148961420897e-05, "loss": 2.2866, "step": 4982500 }, { "epoch": 14.42, "learning_rate": 4.2790767413856984e-05, "loss": 2.2438, "step": 4983000 }, { "epoch": 14.43, "learning_rate": 4.279004376620971e-05, "loss": 2.2746, "step": 4983500 }, { "epoch": 14.43, "learning_rate": 4.278932156585773e-05, "loss": 2.2704, "step": 4984000 }, { "epoch": 14.43, "learning_rate": 4.278859791821045e-05, "loss": 2.2895, "step": 4984500 }, { "epoch": 14.43, "learning_rate": 4.2787874270563174e-05, "loss": 2.2683, "step": 4985000 }, { "epoch": 14.43, "learning_rate": 4.278715207021119e-05, "loss": 2.2632, "step": 4985500 }, { "epoch": 14.43, "learning_rate": 4.278642842256391e-05, "loss": 2.2472, "step": 4986000 }, { "epoch": 14.43, "learning_rate": 4.2785704774916634e-05, "loss": 2.2406, "step": 4986500 }, { "epoch": 14.44, "learning_rate": 4.278498112726936e-05, "loss": 2.2775, "step": 4987000 }, { "epoch": 14.44, "learning_rate": 4.2784257479622085e-05, "loss": 2.277, "step": 4987500 }, { "epoch": 14.44, "learning_rate": 4.278353383197481e-05, "loss": 2.2476, "step": 4988000 }, { "epoch": 14.44, "learning_rate": 4.278281018432753e-05, "loss": 2.2656, "step": 4988500 }, { "epoch": 14.44, "learning_rate": 4.278208798397555e-05, "loss": 2.26, "step": 4989000 }, { "epoch": 14.44, "learning_rate": 4.278136578362357e-05, "loss": 2.2771, "step": 4989500 }, { "epoch": 14.44, "learning_rate": 4.278064213597629e-05, "loss": 2.2691, "step": 4990000 }, { "epoch": 14.45, "learning_rate": 4.277991848832901e-05, "loss": 2.2631, "step": 4990500 }, { "epoch": 14.45, "learning_rate": 4.2779194840681734e-05, "loss": 2.2664, "step": 4991000 }, { "epoch": 14.45, "learning_rate": 4.2778471193034456e-05, "loss": 2.2421, "step": 4991500 }, { "epoch": 14.45, "learning_rate": 4.277774754538718e-05, "loss": 2.2816, "step": 4992000 }, { "epoch": 14.45, "learning_rate": 4.277702389773991e-05, "loss": 2.2738, "step": 4992500 }, { "epoch": 14.45, "learning_rate": 4.277630169738792e-05, "loss": 2.2364, "step": 4993000 }, { "epoch": 14.45, "learning_rate": 4.2775578049740645e-05, "loss": 2.2808, "step": 4993500 }, { "epoch": 14.46, "learning_rate": 4.277485440209337e-05, "loss": 2.2776, "step": 4994000 }, { "epoch": 14.46, "learning_rate": 4.2774130754446096e-05, "loss": 2.2705, "step": 4994500 }, { "epoch": 14.46, "learning_rate": 4.277340710679882e-05, "loss": 2.2608, "step": 4995000 }, { "epoch": 14.46, "learning_rate": 4.277268345915154e-05, "loss": 2.2819, "step": 4995500 }, { "epoch": 14.46, "learning_rate": 4.277195981150426e-05, "loss": 2.2703, "step": 4996000 }, { "epoch": 14.46, "learning_rate": 4.2771236163856985e-05, "loss": 2.2625, "step": 4996500 }, { "epoch": 14.46, "learning_rate": 4.277051251620971e-05, "loss": 2.2532, "step": 4997000 }, { "epoch": 14.47, "learning_rate": 4.276978886856243e-05, "loss": 2.2425, "step": 4997500 }, { "epoch": 14.47, "learning_rate": 4.276906522091516e-05, "loss": 2.3025, "step": 4998000 }, { "epoch": 14.47, "learning_rate": 4.276834157326788e-05, "loss": 2.2713, "step": 4998500 }, { "epoch": 14.47, "learning_rate": 4.27676179256206e-05, "loss": 2.2791, "step": 4999000 }, { "epoch": 14.47, "learning_rate": 4.276689572526862e-05, "loss": 2.2601, "step": 4999500 }, { "epoch": 14.47, "learning_rate": 4.2766173524916634e-05, "loss": 2.2563, "step": 5000000 }, { "epoch": 14.47, "learning_rate": 4.2765449877269357e-05, "loss": 2.2839, "step": 5000500 }, { "epoch": 14.48, "learning_rate": 4.276472622962208e-05, "loss": 2.2437, "step": 5001000 }, { "epoch": 14.48, "learning_rate": 4.276400258197481e-05, "loss": 2.2377, "step": 5001500 }, { "epoch": 14.48, "learning_rate": 4.276327893432753e-05, "loss": 2.2757, "step": 5002000 }, { "epoch": 14.48, "learning_rate": 4.276255528668026e-05, "loss": 2.2633, "step": 5002500 }, { "epoch": 14.48, "learning_rate": 4.276183163903298e-05, "loss": 2.2706, "step": 5003000 }, { "epoch": 14.48, "learning_rate": 4.2761107991385704e-05, "loss": 2.2684, "step": 5003500 }, { "epoch": 14.48, "learning_rate": 4.2760384343738426e-05, "loss": 2.2525, "step": 5004000 }, { "epoch": 14.49, "learning_rate": 4.275966069609115e-05, "loss": 2.2616, "step": 5004500 }, { "epoch": 14.49, "learning_rate": 4.275893704844387e-05, "loss": 2.2695, "step": 5005000 }, { "epoch": 14.49, "learning_rate": 4.2758214848091886e-05, "loss": 2.2734, "step": 5005500 }, { "epoch": 14.49, "learning_rate": 4.275749120044461e-05, "loss": 2.2492, "step": 5006000 }, { "epoch": 14.49, "learning_rate": 4.275676755279733e-05, "loss": 2.2521, "step": 5006500 }, { "epoch": 14.49, "learning_rate": 4.275604390515006e-05, "loss": 2.2645, "step": 5007000 }, { "epoch": 14.49, "learning_rate": 4.275532315209337e-05, "loss": 2.2505, "step": 5007500 }, { "epoch": 14.5, "learning_rate": 4.2754600951741384e-05, "loss": 2.2546, "step": 5008000 }, { "epoch": 14.5, "learning_rate": 4.27538801986847e-05, "loss": 2.2508, "step": 5008500 }, { "epoch": 14.5, "learning_rate": 4.275315655103742e-05, "loss": 2.2476, "step": 5009000 }, { "epoch": 14.5, "learning_rate": 4.2752432903390144e-05, "loss": 2.2683, "step": 5009500 }, { "epoch": 14.5, "learning_rate": 4.275170925574287e-05, "loss": 2.2751, "step": 5010000 }, { "epoch": 14.5, "learning_rate": 4.2750985608095595e-05, "loss": 2.2739, "step": 5010500 }, { "epoch": 14.5, "learning_rate": 4.275026196044832e-05, "loss": 2.274, "step": 5011000 }, { "epoch": 14.51, "learning_rate": 4.274953831280104e-05, "loss": 2.2808, "step": 5011500 }, { "epoch": 14.51, "learning_rate": 4.274881466515376e-05, "loss": 2.2541, "step": 5012000 }, { "epoch": 14.51, "learning_rate": 4.2748091017506484e-05, "loss": 2.255, "step": 5012500 }, { "epoch": 14.51, "learning_rate": 4.2747367369859206e-05, "loss": 2.2907, "step": 5013000 }, { "epoch": 14.51, "learning_rate": 4.2746643722211935e-05, "loss": 2.269, "step": 5013500 }, { "epoch": 14.51, "learning_rate": 4.274592007456466e-05, "loss": 2.2697, "step": 5014000 }, { "epoch": 14.51, "learning_rate": 4.274519642691738e-05, "loss": 2.2528, "step": 5014500 }, { "epoch": 14.52, "learning_rate": 4.27444727792701e-05, "loss": 2.2682, "step": 5015000 }, { "epoch": 14.52, "learning_rate": 4.2743749131622824e-05, "loss": 2.2597, "step": 5015500 }, { "epoch": 14.52, "learning_rate": 4.2743025483975546e-05, "loss": 2.2694, "step": 5016000 }, { "epoch": 14.52, "learning_rate": 4.274230328362356e-05, "loss": 2.2538, "step": 5016500 }, { "epoch": 14.52, "learning_rate": 4.274157963597629e-05, "loss": 2.2547, "step": 5017000 }, { "epoch": 14.52, "learning_rate": 4.274085598832901e-05, "loss": 2.2635, "step": 5017500 }, { "epoch": 14.53, "learning_rate": 4.2740132340681735e-05, "loss": 2.2839, "step": 5018000 }, { "epoch": 14.53, "learning_rate": 4.273940869303446e-05, "loss": 2.276, "step": 5018500 }, { "epoch": 14.53, "learning_rate": 4.273868649268248e-05, "loss": 2.2488, "step": 5019000 }, { "epoch": 14.53, "learning_rate": 4.27379628450352e-05, "loss": 2.2731, "step": 5019500 }, { "epoch": 14.53, "learning_rate": 4.2737239197387924e-05, "loss": 2.2651, "step": 5020000 }, { "epoch": 14.53, "learning_rate": 4.273651554974065e-05, "loss": 2.2626, "step": 5020500 }, { "epoch": 14.53, "learning_rate": 4.273579190209337e-05, "loss": 2.2678, "step": 5021000 }, { "epoch": 14.54, "learning_rate": 4.273506825444609e-05, "loss": 2.279, "step": 5021500 }, { "epoch": 14.54, "learning_rate": 4.2734344606798813e-05, "loss": 2.2589, "step": 5022000 }, { "epoch": 14.54, "learning_rate": 4.2733620959151536e-05, "loss": 2.2784, "step": 5022500 }, { "epoch": 14.54, "learning_rate": 4.273289731150426e-05, "loss": 2.2533, "step": 5023000 }, { "epoch": 14.54, "learning_rate": 4.273217366385699e-05, "loss": 2.2489, "step": 5023500 }, { "epoch": 14.54, "learning_rate": 4.273145001620971e-05, "loss": 2.2578, "step": 5024000 }, { "epoch": 14.54, "learning_rate": 4.273072781585773e-05, "loss": 2.3026, "step": 5024500 }, { "epoch": 14.55, "learning_rate": 4.2730004168210454e-05, "loss": 2.2564, "step": 5025000 }, { "epoch": 14.55, "learning_rate": 4.2729280520563176e-05, "loss": 2.2527, "step": 5025500 }, { "epoch": 14.55, "learning_rate": 4.27285568729159e-05, "loss": 2.2573, "step": 5026000 }, { "epoch": 14.55, "learning_rate": 4.272783322526862e-05, "loss": 2.271, "step": 5026500 }, { "epoch": 14.55, "learning_rate": 4.272710957762134e-05, "loss": 2.2425, "step": 5027000 }, { "epoch": 14.55, "learning_rate": 4.272638737726936e-05, "loss": 2.259, "step": 5027500 }, { "epoch": 14.55, "learning_rate": 4.272566372962209e-05, "loss": 2.2686, "step": 5028000 }, { "epoch": 14.56, "learning_rate": 4.272494008197481e-05, "loss": 2.2592, "step": 5028500 }, { "epoch": 14.56, "learning_rate": 4.272421643432753e-05, "loss": 2.2575, "step": 5029000 }, { "epoch": 14.56, "learning_rate": 4.272349423397555e-05, "loss": 2.2856, "step": 5029500 }, { "epoch": 14.56, "learning_rate": 4.272277203362356e-05, "loss": 2.2561, "step": 5030000 }, { "epoch": 14.56, "learning_rate": 4.2722048385976285e-05, "loss": 2.2627, "step": 5030500 }, { "epoch": 14.56, "learning_rate": 4.272132473832901e-05, "loss": 2.2811, "step": 5031000 }, { "epoch": 14.56, "learning_rate": 4.2720601090681736e-05, "loss": 2.2557, "step": 5031500 }, { "epoch": 14.57, "learning_rate": 4.271987744303446e-05, "loss": 2.2722, "step": 5032000 }, { "epoch": 14.57, "learning_rate": 4.271915379538719e-05, "loss": 2.2477, "step": 5032500 }, { "epoch": 14.57, "learning_rate": 4.27184315950352e-05, "loss": 2.2637, "step": 5033000 }, { "epoch": 14.57, "learning_rate": 4.2717707947387925e-05, "loss": 2.2376, "step": 5033500 }, { "epoch": 14.57, "learning_rate": 4.271698429974065e-05, "loss": 2.2571, "step": 5034000 }, { "epoch": 14.57, "learning_rate": 4.271626065209337e-05, "loss": 2.2419, "step": 5034500 }, { "epoch": 14.57, "learning_rate": 4.271553700444609e-05, "loss": 2.2338, "step": 5035000 }, { "epoch": 14.58, "learning_rate": 4.2714813356798814e-05, "loss": 2.2478, "step": 5035500 }, { "epoch": 14.58, "learning_rate": 4.2714089709151536e-05, "loss": 2.2742, "step": 5036000 }, { "epoch": 14.58, "learning_rate": 4.271336606150426e-05, "loss": 2.2755, "step": 5036500 }, { "epoch": 14.58, "learning_rate": 4.271264241385699e-05, "loss": 2.2541, "step": 5037000 }, { "epoch": 14.58, "learning_rate": 4.271191876620971e-05, "loss": 2.2717, "step": 5037500 }, { "epoch": 14.58, "learning_rate": 4.2711196565857725e-05, "loss": 2.2558, "step": 5038000 }, { "epoch": 14.58, "learning_rate": 4.271047291821045e-05, "loss": 2.2427, "step": 5038500 }, { "epoch": 14.59, "learning_rate": 4.270974927056318e-05, "loss": 2.273, "step": 5039000 }, { "epoch": 14.59, "learning_rate": 4.27090256229159e-05, "loss": 2.2897, "step": 5039500 }, { "epoch": 14.59, "learning_rate": 4.270830197526862e-05, "loss": 2.2426, "step": 5040000 }, { "epoch": 14.59, "learning_rate": 4.2707578327621343e-05, "loss": 2.2643, "step": 5040500 }, { "epoch": 14.59, "learning_rate": 4.2706854679974066e-05, "loss": 2.2773, "step": 5041000 }, { "epoch": 14.59, "learning_rate": 4.270613103232679e-05, "loss": 2.2392, "step": 5041500 }, { "epoch": 14.59, "learning_rate": 4.270540883197481e-05, "loss": 2.2834, "step": 5042000 }, { "epoch": 14.6, "learning_rate": 4.270468518432753e-05, "loss": 2.2807, "step": 5042500 }, { "epoch": 14.6, "learning_rate": 4.2703961536680255e-05, "loss": 2.2641, "step": 5043000 }, { "epoch": 14.6, "learning_rate": 4.270323788903298e-05, "loss": 2.2379, "step": 5043500 }, { "epoch": 14.6, "learning_rate": 4.27025142413857e-05, "loss": 2.2685, "step": 5044000 }, { "epoch": 14.6, "learning_rate": 4.2701792041033715e-05, "loss": 2.2851, "step": 5044500 }, { "epoch": 14.6, "learning_rate": 4.270106839338644e-05, "loss": 2.2523, "step": 5045000 }, { "epoch": 14.6, "learning_rate": 4.270034474573916e-05, "loss": 2.234, "step": 5045500 }, { "epoch": 14.61, "learning_rate": 4.269962109809189e-05, "loss": 2.2641, "step": 5046000 }, { "epoch": 14.61, "learning_rate": 4.269889745044461e-05, "loss": 2.2471, "step": 5046500 }, { "epoch": 14.61, "learning_rate": 4.269817380279734e-05, "loss": 2.2852, "step": 5047000 }, { "epoch": 14.61, "learning_rate": 4.2697451602445355e-05, "loss": 2.2789, "step": 5047500 }, { "epoch": 14.61, "learning_rate": 4.269672795479808e-05, "loss": 2.2612, "step": 5048000 }, { "epoch": 14.61, "learning_rate": 4.26960043071508e-05, "loss": 2.2683, "step": 5048500 }, { "epoch": 14.61, "learning_rate": 4.269528065950352e-05, "loss": 2.2546, "step": 5049000 }, { "epoch": 14.62, "learning_rate": 4.2694557011856244e-05, "loss": 2.2828, "step": 5049500 }, { "epoch": 14.62, "learning_rate": 4.2693834811504266e-05, "loss": 2.2591, "step": 5050000 }, { "epoch": 14.62, "learning_rate": 4.269311116385699e-05, "loss": 2.2912, "step": 5050500 }, { "epoch": 14.62, "learning_rate": 4.2692388963505004e-05, "loss": 2.2748, "step": 5051000 }, { "epoch": 14.62, "learning_rate": 4.2691665315857726e-05, "loss": 2.2566, "step": 5051500 }, { "epoch": 14.62, "learning_rate": 4.269094166821045e-05, "loss": 2.2521, "step": 5052000 }, { "epoch": 14.62, "learning_rate": 4.2690219467858464e-05, "loss": 2.2661, "step": 5052500 }, { "epoch": 14.63, "learning_rate": 4.2689495820211186e-05, "loss": 2.2664, "step": 5053000 }, { "epoch": 14.63, "learning_rate": 4.2688772172563915e-05, "loss": 2.2805, "step": 5053500 }, { "epoch": 14.63, "learning_rate": 4.268804852491664e-05, "loss": 2.2769, "step": 5054000 }, { "epoch": 14.63, "learning_rate": 4.2687324877269367e-05, "loss": 2.2681, "step": 5054500 }, { "epoch": 14.63, "learning_rate": 4.268660122962209e-05, "loss": 2.2437, "step": 5055000 }, { "epoch": 14.63, "learning_rate": 4.268587758197481e-05, "loss": 2.2504, "step": 5055500 }, { "epoch": 14.64, "learning_rate": 4.268515393432753e-05, "loss": 2.2603, "step": 5056000 }, { "epoch": 14.64, "learning_rate": 4.2684430286680255e-05, "loss": 2.259, "step": 5056500 }, { "epoch": 14.64, "learning_rate": 4.268370663903298e-05, "loss": 2.2464, "step": 5057000 }, { "epoch": 14.64, "learning_rate": 4.26829829913857e-05, "loss": 2.2718, "step": 5057500 }, { "epoch": 14.64, "learning_rate": 4.268225934373842e-05, "loss": 2.267, "step": 5058000 }, { "epoch": 14.64, "learning_rate": 4.2681535696091144e-05, "loss": 2.2833, "step": 5058500 }, { "epoch": 14.64, "learning_rate": 4.268081204844387e-05, "loss": 2.2661, "step": 5059000 }, { "epoch": 14.65, "learning_rate": 4.268008840079659e-05, "loss": 2.2771, "step": 5059500 }, { "epoch": 14.65, "learning_rate": 4.267936475314932e-05, "loss": 2.247, "step": 5060000 }, { "epoch": 14.65, "learning_rate": 4.267864400009263e-05, "loss": 2.2544, "step": 5060500 }, { "epoch": 14.65, "learning_rate": 4.267792035244535e-05, "loss": 2.2406, "step": 5061000 }, { "epoch": 14.65, "learning_rate": 4.267719670479808e-05, "loss": 2.2702, "step": 5061500 }, { "epoch": 14.65, "learning_rate": 4.26764730571508e-05, "loss": 2.2795, "step": 5062000 }, { "epoch": 14.65, "learning_rate": 4.267574940950352e-05, "loss": 2.2614, "step": 5062500 }, { "epoch": 14.66, "learning_rate": 4.2675025761856245e-05, "loss": 2.2662, "step": 5063000 }, { "epoch": 14.66, "learning_rate": 4.267430211420897e-05, "loss": 2.2829, "step": 5063500 }, { "epoch": 14.66, "learning_rate": 4.267357991385699e-05, "loss": 2.2756, "step": 5064000 }, { "epoch": 14.66, "learning_rate": 4.267285626620971e-05, "loss": 2.255, "step": 5064500 }, { "epoch": 14.66, "learning_rate": 4.2672132618562434e-05, "loss": 2.2644, "step": 5065000 }, { "epoch": 14.66, "learning_rate": 4.2671408970915156e-05, "loss": 2.2677, "step": 5065500 }, { "epoch": 14.66, "learning_rate": 4.267068532326788e-05, "loss": 2.2556, "step": 5066000 }, { "epoch": 14.67, "learning_rate": 4.26699616756206e-05, "loss": 2.2772, "step": 5066500 }, { "epoch": 14.67, "learning_rate": 4.2669239475268616e-05, "loss": 2.2784, "step": 5067000 }, { "epoch": 14.67, "learning_rate": 4.266851582762134e-05, "loss": 2.2518, "step": 5067500 }, { "epoch": 14.67, "learning_rate": 4.266779217997407e-05, "loss": 2.2794, "step": 5068000 }, { "epoch": 14.67, "learning_rate": 4.266706853232679e-05, "loss": 2.2576, "step": 5068500 }, { "epoch": 14.67, "learning_rate": 4.266634488467952e-05, "loss": 2.2702, "step": 5069000 }, { "epoch": 14.67, "learning_rate": 4.266562123703224e-05, "loss": 2.256, "step": 5069500 }, { "epoch": 14.68, "learning_rate": 4.266489758938496e-05, "loss": 2.2874, "step": 5070000 }, { "epoch": 14.68, "learning_rate": 4.2664173941737685e-05, "loss": 2.2396, "step": 5070500 }, { "epoch": 14.68, "learning_rate": 4.266345029409041e-05, "loss": 2.2536, "step": 5071000 }, { "epoch": 14.68, "learning_rate": 4.266272809373842e-05, "loss": 2.2763, "step": 5071500 }, { "epoch": 14.68, "learning_rate": 4.2662005893386445e-05, "loss": 2.2916, "step": 5072000 }, { "epoch": 14.68, "learning_rate": 4.266128224573917e-05, "loss": 2.2529, "step": 5072500 }, { "epoch": 14.68, "learning_rate": 4.266055859809189e-05, "loss": 2.2643, "step": 5073000 }, { "epoch": 14.69, "learning_rate": 4.265983495044461e-05, "loss": 2.2567, "step": 5073500 }, { "epoch": 14.69, "learning_rate": 4.2659111302797334e-05, "loss": 2.2664, "step": 5074000 }, { "epoch": 14.69, "learning_rate": 4.265839054974064e-05, "loss": 2.2675, "step": 5074500 }, { "epoch": 14.69, "learning_rate": 4.2657666902093365e-05, "loss": 2.2797, "step": 5075000 }, { "epoch": 14.69, "learning_rate": 4.2656943254446094e-05, "loss": 2.2461, "step": 5075500 }, { "epoch": 14.69, "learning_rate": 4.2656219606798817e-05, "loss": 2.2577, "step": 5076000 }, { "epoch": 14.69, "learning_rate": 4.265549740644683e-05, "loss": 2.2702, "step": 5076500 }, { "epoch": 14.7, "learning_rate": 4.265477375879956e-05, "loss": 2.2562, "step": 5077000 }, { "epoch": 14.7, "learning_rate": 4.2654050111152283e-05, "loss": 2.2568, "step": 5077500 }, { "epoch": 14.7, "learning_rate": 4.2653326463505006e-05, "loss": 2.2799, "step": 5078000 }, { "epoch": 14.7, "learning_rate": 4.265260281585773e-05, "loss": 2.265, "step": 5078500 }, { "epoch": 14.7, "learning_rate": 4.265187916821045e-05, "loss": 2.2837, "step": 5079000 }, { "epoch": 14.7, "learning_rate": 4.265115552056317e-05, "loss": 2.2636, "step": 5079500 }, { "epoch": 14.7, "learning_rate": 4.2650431872915895e-05, "loss": 2.2382, "step": 5080000 }, { "epoch": 14.71, "learning_rate": 4.264970822526862e-05, "loss": 2.2441, "step": 5080500 }, { "epoch": 14.71, "learning_rate": 4.2648984577621346e-05, "loss": 2.2519, "step": 5081000 }, { "epoch": 14.71, "learning_rate": 4.264826237726936e-05, "loss": 2.2543, "step": 5081500 }, { "epoch": 14.71, "learning_rate": 4.2647538729622084e-05, "loss": 2.2707, "step": 5082000 }, { "epoch": 14.71, "learning_rate": 4.2646815081974806e-05, "loss": 2.2567, "step": 5082500 }, { "epoch": 14.71, "learning_rate": 4.264609143432753e-05, "loss": 2.2735, "step": 5083000 }, { "epoch": 14.71, "learning_rate": 4.2645369233975544e-05, "loss": 2.2537, "step": 5083500 }, { "epoch": 14.72, "learning_rate": 4.2644645586328266e-05, "loss": 2.2508, "step": 5084000 }, { "epoch": 14.72, "learning_rate": 4.2643921938680995e-05, "loss": 2.2643, "step": 5084500 }, { "epoch": 14.72, "learning_rate": 4.264319829103372e-05, "loss": 2.2482, "step": 5085000 }, { "epoch": 14.72, "learning_rate": 4.2642474643386446e-05, "loss": 2.2786, "step": 5085500 }, { "epoch": 14.72, "learning_rate": 4.264175099573917e-05, "loss": 2.2523, "step": 5086000 }, { "epoch": 14.72, "learning_rate": 4.264102734809189e-05, "loss": 2.2452, "step": 5086500 }, { "epoch": 14.72, "learning_rate": 4.264030370044461e-05, "loss": 2.2664, "step": 5087000 }, { "epoch": 14.73, "learning_rate": 4.263958150009263e-05, "loss": 2.2546, "step": 5087500 }, { "epoch": 14.73, "learning_rate": 4.263885785244535e-05, "loss": 2.279, "step": 5088000 }, { "epoch": 14.73, "learning_rate": 4.263813420479807e-05, "loss": 2.2916, "step": 5088500 }, { "epoch": 14.73, "learning_rate": 4.2637410557150795e-05, "loss": 2.2632, "step": 5089000 }, { "epoch": 14.73, "learning_rate": 4.263668690950352e-05, "loss": 2.2829, "step": 5089500 }, { "epoch": 14.73, "learning_rate": 4.2635963261856246e-05, "loss": 2.2527, "step": 5090000 }, { "epoch": 14.73, "learning_rate": 4.263523961420897e-05, "loss": 2.2334, "step": 5090500 }, { "epoch": 14.74, "learning_rate": 4.263451596656169e-05, "loss": 2.2837, "step": 5091000 }, { "epoch": 14.74, "learning_rate": 4.263379376620971e-05, "loss": 2.2579, "step": 5091500 }, { "epoch": 14.74, "learning_rate": 4.2633070118562435e-05, "loss": 2.2582, "step": 5092000 }, { "epoch": 14.74, "learning_rate": 4.263234647091516e-05, "loss": 2.226, "step": 5092500 }, { "epoch": 14.74, "learning_rate": 4.263162282326788e-05, "loss": 2.2571, "step": 5093000 }, { "epoch": 14.74, "learning_rate": 4.26308991756206e-05, "loss": 2.2457, "step": 5093500 }, { "epoch": 14.75, "learning_rate": 4.2630175527973324e-05, "loss": 2.2832, "step": 5094000 }, { "epoch": 14.75, "learning_rate": 4.2629451880326046e-05, "loss": 2.2343, "step": 5094500 }, { "epoch": 14.75, "learning_rate": 4.262872823267877e-05, "loss": 2.2638, "step": 5095000 }, { "epoch": 14.75, "learning_rate": 4.26280045850315e-05, "loss": 2.2516, "step": 5095500 }, { "epoch": 14.75, "learning_rate": 4.262728238467951e-05, "loss": 2.2649, "step": 5096000 }, { "epoch": 14.75, "learning_rate": 4.2626558737032236e-05, "loss": 2.2628, "step": 5096500 }, { "epoch": 14.75, "learning_rate": 4.262583508938496e-05, "loss": 2.2607, "step": 5097000 }, { "epoch": 14.76, "learning_rate": 4.262511144173768e-05, "loss": 2.2666, "step": 5097500 }, { "epoch": 14.76, "learning_rate": 4.2624389241385696e-05, "loss": 2.2861, "step": 5098000 }, { "epoch": 14.76, "learning_rate": 4.262366559373842e-05, "loss": 2.2659, "step": 5098500 }, { "epoch": 14.76, "learning_rate": 4.262294194609115e-05, "loss": 2.2389, "step": 5099000 }, { "epoch": 14.76, "learning_rate": 4.262221829844387e-05, "loss": 2.236, "step": 5099500 }, { "epoch": 14.76, "learning_rate": 4.26214946507966e-05, "loss": 2.2735, "step": 5100000 }, { "epoch": 14.76, "learning_rate": 4.262077100314932e-05, "loss": 2.2446, "step": 5100500 }, { "epoch": 14.77, "learning_rate": 4.2620048802797336e-05, "loss": 2.25, "step": 5101000 }, { "epoch": 14.77, "learning_rate": 4.261932515515006e-05, "loss": 2.2492, "step": 5101500 }, { "epoch": 14.77, "learning_rate": 4.261860150750278e-05, "loss": 2.2639, "step": 5102000 }, { "epoch": 14.77, "learning_rate": 4.26178778598555e-05, "loss": 2.2573, "step": 5102500 }, { "epoch": 14.77, "learning_rate": 4.2617154212208225e-05, "loss": 2.2543, "step": 5103000 }, { "epoch": 14.77, "learning_rate": 4.261643056456095e-05, "loss": 2.2686, "step": 5103500 }, { "epoch": 14.77, "learning_rate": 4.261570691691367e-05, "loss": 2.2652, "step": 5104000 }, { "epoch": 14.78, "learning_rate": 4.26149832692664e-05, "loss": 2.2638, "step": 5104500 }, { "epoch": 14.78, "learning_rate": 4.2614261068914414e-05, "loss": 2.2506, "step": 5105000 }, { "epoch": 14.78, "learning_rate": 4.2613537421267136e-05, "loss": 2.268, "step": 5105500 }, { "epoch": 14.78, "learning_rate": 4.261281377361986e-05, "loss": 2.2545, "step": 5106000 }, { "epoch": 14.78, "learning_rate": 4.261209012597259e-05, "loss": 2.2834, "step": 5106500 }, { "epoch": 14.78, "learning_rate": 4.261136647832531e-05, "loss": 2.2715, "step": 5107000 }, { "epoch": 14.78, "learning_rate": 4.261064283067803e-05, "loss": 2.2489, "step": 5107500 }, { "epoch": 14.79, "learning_rate": 4.260992063032605e-05, "loss": 2.2593, "step": 5108000 }, { "epoch": 14.79, "learning_rate": 4.260919698267877e-05, "loss": 2.2679, "step": 5108500 }, { "epoch": 14.79, "learning_rate": 4.26084733350315e-05, "loss": 2.2666, "step": 5109000 }, { "epoch": 14.79, "learning_rate": 4.260774968738422e-05, "loss": 2.2747, "step": 5109500 }, { "epoch": 14.79, "learning_rate": 4.260702603973694e-05, "loss": 2.2757, "step": 5110000 }, { "epoch": 14.79, "learning_rate": 4.2606302392089665e-05, "loss": 2.2584, "step": 5110500 }, { "epoch": 14.79, "learning_rate": 4.260557874444239e-05, "loss": 2.2557, "step": 5111000 }, { "epoch": 14.8, "learning_rate": 4.260485509679511e-05, "loss": 2.2852, "step": 5111500 }, { "epoch": 14.8, "learning_rate": 4.2604132896443125e-05, "loss": 2.2511, "step": 5112000 }, { "epoch": 14.8, "learning_rate": 4.260340924879585e-05, "loss": 2.2449, "step": 5112500 }, { "epoch": 14.8, "learning_rate": 4.260268560114857e-05, "loss": 2.2715, "step": 5113000 }, { "epoch": 14.8, "learning_rate": 4.26019619535013e-05, "loss": 2.2405, "step": 5113500 }, { "epoch": 14.8, "learning_rate": 4.260123975314932e-05, "loss": 2.2657, "step": 5114000 }, { "epoch": 14.8, "learning_rate": 4.260051610550204e-05, "loss": 2.2585, "step": 5114500 }, { "epoch": 14.81, "learning_rate": 4.2599792457854766e-05, "loss": 2.2686, "step": 5115000 }, { "epoch": 14.81, "learning_rate": 4.259906881020749e-05, "loss": 2.2813, "step": 5115500 }, { "epoch": 14.81, "learning_rate": 4.259834516256021e-05, "loss": 2.2823, "step": 5116000 }, { "epoch": 14.81, "learning_rate": 4.2597622962208226e-05, "loss": 2.2806, "step": 5116500 }, { "epoch": 14.81, "learning_rate": 4.259689931456095e-05, "loss": 2.2544, "step": 5117000 }, { "epoch": 14.81, "learning_rate": 4.259617711420897e-05, "loss": 2.2838, "step": 5117500 }, { "epoch": 14.81, "learning_rate": 4.259545346656169e-05, "loss": 2.2719, "step": 5118000 }, { "epoch": 14.82, "learning_rate": 4.2594729818914415e-05, "loss": 2.2779, "step": 5118500 }, { "epoch": 14.82, "learning_rate": 4.259400617126714e-05, "loss": 2.2589, "step": 5119000 }, { "epoch": 14.82, "learning_rate": 4.259328252361986e-05, "loss": 2.2533, "step": 5119500 }, { "epoch": 14.82, "learning_rate": 4.259255887597258e-05, "loss": 2.2878, "step": 5120000 }, { "epoch": 14.82, "learning_rate": 4.2591835228325304e-05, "loss": 2.2841, "step": 5120500 }, { "epoch": 14.82, "learning_rate": 4.259111158067803e-05, "loss": 2.2521, "step": 5121000 }, { "epoch": 14.82, "learning_rate": 4.259038938032605e-05, "loss": 2.2668, "step": 5121500 }, { "epoch": 14.83, "learning_rate": 4.258966717997407e-05, "loss": 2.249, "step": 5122000 }, { "epoch": 14.83, "learning_rate": 4.258894353232679e-05, "loss": 2.2576, "step": 5122500 }, { "epoch": 14.83, "learning_rate": 4.2588219884679515e-05, "loss": 2.2828, "step": 5123000 }, { "epoch": 14.83, "learning_rate": 4.258749623703224e-05, "loss": 2.2737, "step": 5123500 }, { "epoch": 14.83, "learning_rate": 4.258677258938496e-05, "loss": 2.2839, "step": 5124000 }, { "epoch": 14.83, "learning_rate": 4.258604894173768e-05, "loss": 2.2663, "step": 5124500 }, { "epoch": 14.83, "learning_rate": 4.2585325294090404e-05, "loss": 2.255, "step": 5125000 }, { "epoch": 14.84, "learning_rate": 4.2584601646443126e-05, "loss": 2.2762, "step": 5125500 }, { "epoch": 14.84, "learning_rate": 4.258387799879585e-05, "loss": 2.2659, "step": 5126000 }, { "epoch": 14.84, "learning_rate": 4.258315435114858e-05, "loss": 2.2631, "step": 5126500 }, { "epoch": 14.84, "learning_rate": 4.25824307035013e-05, "loss": 2.2782, "step": 5127000 }, { "epoch": 14.84, "learning_rate": 4.258170705585402e-05, "loss": 2.2754, "step": 5127500 }, { "epoch": 14.84, "learning_rate": 4.2580983408206744e-05, "loss": 2.2746, "step": 5128000 }, { "epoch": 14.84, "learning_rate": 4.258025976055947e-05, "loss": 2.2646, "step": 5128500 }, { "epoch": 14.85, "learning_rate": 4.2579536112912195e-05, "loss": 2.2604, "step": 5129000 }, { "epoch": 14.85, "learning_rate": 4.257881391256021e-05, "loss": 2.276, "step": 5129500 }, { "epoch": 14.85, "learning_rate": 4.257809026491293e-05, "loss": 2.2626, "step": 5130000 }, { "epoch": 14.85, "learning_rate": 4.2577366617265655e-05, "loss": 2.2633, "step": 5130500 }, { "epoch": 14.85, "learning_rate": 4.257664441691368e-05, "loss": 2.2634, "step": 5131000 }, { "epoch": 14.85, "learning_rate": 4.25759207692664e-05, "loss": 2.2259, "step": 5131500 }, { "epoch": 14.86, "learning_rate": 4.257519712161912e-05, "loss": 2.2693, "step": 5132000 }, { "epoch": 14.86, "learning_rate": 4.2574473473971844e-05, "loss": 2.2537, "step": 5132500 }, { "epoch": 14.86, "learning_rate": 4.2573749826324567e-05, "loss": 2.2531, "step": 5133000 }, { "epoch": 14.86, "learning_rate": 4.257302617867729e-05, "loss": 2.2665, "step": 5133500 }, { "epoch": 14.86, "learning_rate": 4.2572303978325304e-05, "loss": 2.2561, "step": 5134000 }, { "epoch": 14.86, "learning_rate": 4.2571580330678027e-05, "loss": 2.2536, "step": 5134500 }, { "epoch": 14.86, "learning_rate": 4.257085668303075e-05, "loss": 2.2628, "step": 5135000 }, { "epoch": 14.87, "learning_rate": 4.257013303538348e-05, "loss": 2.2668, "step": 5135500 }, { "epoch": 14.87, "learning_rate": 4.25694108350315e-05, "loss": 2.2697, "step": 5136000 }, { "epoch": 14.87, "learning_rate": 4.256868718738422e-05, "loss": 2.2687, "step": 5136500 }, { "epoch": 14.87, "learning_rate": 4.2567963539736945e-05, "loss": 2.2622, "step": 5137000 }, { "epoch": 14.87, "learning_rate": 4.256723989208967e-05, "loss": 2.2873, "step": 5137500 }, { "epoch": 14.87, "learning_rate": 4.256651624444239e-05, "loss": 2.2694, "step": 5138000 }, { "epoch": 14.87, "learning_rate": 4.256579259679511e-05, "loss": 2.2477, "step": 5138500 }, { "epoch": 14.88, "learning_rate": 4.256507039644313e-05, "loss": 2.2874, "step": 5139000 }, { "epoch": 14.88, "learning_rate": 4.2564346748795856e-05, "loss": 2.2553, "step": 5139500 }, { "epoch": 14.88, "learning_rate": 4.256362310114858e-05, "loss": 2.2728, "step": 5140000 }, { "epoch": 14.88, "learning_rate": 4.2562900900796594e-05, "loss": 2.2717, "step": 5140500 }, { "epoch": 14.88, "learning_rate": 4.2562177253149316e-05, "loss": 2.2831, "step": 5141000 }, { "epoch": 14.88, "learning_rate": 4.256145360550204e-05, "loss": 2.2447, "step": 5141500 }, { "epoch": 14.88, "learning_rate": 4.256072995785476e-05, "loss": 2.2477, "step": 5142000 }, { "epoch": 14.89, "learning_rate": 4.256000631020748e-05, "loss": 2.2594, "step": 5142500 }, { "epoch": 14.89, "learning_rate": 4.2559282662560205e-05, "loss": 2.2683, "step": 5143000 }, { "epoch": 14.89, "learning_rate": 4.2558559014912934e-05, "loss": 2.2583, "step": 5143500 }, { "epoch": 14.89, "learning_rate": 4.2557835367265656e-05, "loss": 2.2596, "step": 5144000 }, { "epoch": 14.89, "learning_rate": 4.255711171961838e-05, "loss": 2.2695, "step": 5144500 }, { "epoch": 14.89, "learning_rate": 4.25563880719711e-05, "loss": 2.2741, "step": 5145000 }, { "epoch": 14.89, "learning_rate": 4.255566442432383e-05, "loss": 2.2538, "step": 5145500 }, { "epoch": 14.9, "learning_rate": 4.2554942223971845e-05, "loss": 2.2868, "step": 5146000 }, { "epoch": 14.9, "learning_rate": 4.255421857632457e-05, "loss": 2.2555, "step": 5146500 }, { "epoch": 14.9, "learning_rate": 4.255349492867729e-05, "loss": 2.2599, "step": 5147000 }, { "epoch": 14.9, "learning_rate": 4.255277128103001e-05, "loss": 2.2828, "step": 5147500 }, { "epoch": 14.9, "learning_rate": 4.2552047633382734e-05, "loss": 2.2432, "step": 5148000 }, { "epoch": 14.9, "learning_rate": 4.2551323985735456e-05, "loss": 2.2525, "step": 5148500 }, { "epoch": 14.9, "learning_rate": 4.255060033808818e-05, "loss": 2.2572, "step": 5149000 }, { "epoch": 14.91, "learning_rate": 4.25498766904409e-05, "loss": 2.2798, "step": 5149500 }, { "epoch": 14.91, "learning_rate": 4.254915449008892e-05, "loss": 2.2526, "step": 5150000 }, { "epoch": 14.91, "learning_rate": 4.254843228973694e-05, "loss": 2.2441, "step": 5150500 }, { "epoch": 14.91, "learning_rate": 4.254770864208967e-05, "loss": 2.2716, "step": 5151000 }, { "epoch": 14.91, "learning_rate": 4.254698499444239e-05, "loss": 2.2748, "step": 5151500 }, { "epoch": 14.91, "learning_rate": 4.254626134679511e-05, "loss": 2.2673, "step": 5152000 }, { "epoch": 14.91, "learning_rate": 4.2545537699147834e-05, "loss": 2.2424, "step": 5152500 }, { "epoch": 14.92, "learning_rate": 4.2544814051500557e-05, "loss": 2.2664, "step": 5153000 }, { "epoch": 14.92, "learning_rate": 4.254409040385328e-05, "loss": 2.2822, "step": 5153500 }, { "epoch": 14.92, "learning_rate": 4.25433682035013e-05, "loss": 2.2427, "step": 5154000 }, { "epoch": 14.92, "learning_rate": 4.254264455585402e-05, "loss": 2.2437, "step": 5154500 }, { "epoch": 14.92, "learning_rate": 4.2541920908206746e-05, "loss": 2.2522, "step": 5155000 }, { "epoch": 14.92, "learning_rate": 4.254119726055947e-05, "loss": 2.2555, "step": 5155500 }, { "epoch": 14.92, "learning_rate": 4.254047361291219e-05, "loss": 2.2889, "step": 5156000 }, { "epoch": 14.93, "learning_rate": 4.253974996526491e-05, "loss": 2.2503, "step": 5156500 }, { "epoch": 14.93, "learning_rate": 4.2539026317617635e-05, "loss": 2.2523, "step": 5157000 }, { "epoch": 14.93, "learning_rate": 4.253830266997036e-05, "loss": 2.2942, "step": 5157500 }, { "epoch": 14.93, "learning_rate": 4.253757902232308e-05, "loss": 2.2685, "step": 5158000 }, { "epoch": 14.93, "learning_rate": 4.253685537467581e-05, "loss": 2.277, "step": 5158500 }, { "epoch": 14.93, "learning_rate": 4.253613172702853e-05, "loss": 2.2796, "step": 5159000 }, { "epoch": 14.93, "learning_rate": 4.253540807938126e-05, "loss": 2.2481, "step": 5159500 }, { "epoch": 14.94, "learning_rate": 4.2534685879029275e-05, "loss": 2.2489, "step": 5160000 }, { "epoch": 14.94, "learning_rate": 4.2533962231382e-05, "loss": 2.2552, "step": 5160500 }, { "epoch": 14.94, "learning_rate": 4.253323858373472e-05, "loss": 2.2595, "step": 5161000 }, { "epoch": 14.94, "learning_rate": 4.2532516383382735e-05, "loss": 2.2405, "step": 5161500 }, { "epoch": 14.94, "learning_rate": 4.253179273573546e-05, "loss": 2.2633, "step": 5162000 }, { "epoch": 14.94, "learning_rate": 4.253107053538348e-05, "loss": 2.271, "step": 5162500 }, { "epoch": 14.94, "learning_rate": 4.25303468877362e-05, "loss": 2.2693, "step": 5163000 }, { "epoch": 14.95, "learning_rate": 4.2529623240088924e-05, "loss": 2.2723, "step": 5163500 }, { "epoch": 14.95, "learning_rate": 4.2528899592441646e-05, "loss": 2.2551, "step": 5164000 }, { "epoch": 14.95, "learning_rate": 4.252817594479437e-05, "loss": 2.2609, "step": 5164500 }, { "epoch": 14.95, "learning_rate": 4.2527453744442384e-05, "loss": 2.2705, "step": 5165000 }, { "epoch": 14.95, "learning_rate": 4.2526730096795106e-05, "loss": 2.2517, "step": 5165500 }, { "epoch": 14.95, "learning_rate": 4.2526006449147835e-05, "loss": 2.264, "step": 5166000 }, { "epoch": 14.95, "learning_rate": 4.252528280150056e-05, "loss": 2.275, "step": 5166500 }, { "epoch": 14.96, "learning_rate": 4.252455915385328e-05, "loss": 2.238, "step": 5167000 }, { "epoch": 14.96, "learning_rate": 4.252383550620601e-05, "loss": 2.2962, "step": 5167500 }, { "epoch": 14.96, "learning_rate": 4.252311185855873e-05, "loss": 2.2613, "step": 5168000 }, { "epoch": 14.96, "learning_rate": 4.252238821091145e-05, "loss": 2.2385, "step": 5168500 }, { "epoch": 14.96, "learning_rate": 4.2521664563264175e-05, "loss": 2.2603, "step": 5169000 }, { "epoch": 14.96, "learning_rate": 4.25209409156169e-05, "loss": 2.2604, "step": 5169500 }, { "epoch": 14.97, "learning_rate": 4.252021726796962e-05, "loss": 2.2713, "step": 5170000 }, { "epoch": 14.97, "learning_rate": 4.251949362032234e-05, "loss": 2.2727, "step": 5170500 }, { "epoch": 14.97, "learning_rate": 4.2518769972675064e-05, "loss": 2.2532, "step": 5171000 }, { "epoch": 14.97, "learning_rate": 4.251804777232308e-05, "loss": 2.2815, "step": 5171500 }, { "epoch": 14.97, "learning_rate": 4.251732412467581e-05, "loss": 2.2604, "step": 5172000 }, { "epoch": 14.97, "learning_rate": 4.251660047702853e-05, "loss": 2.2655, "step": 5172500 }, { "epoch": 14.97, "learning_rate": 4.2515878276676547e-05, "loss": 2.2664, "step": 5173000 }, { "epoch": 14.98, "learning_rate": 4.2515154629029276e-05, "loss": 2.2749, "step": 5173500 }, { "epoch": 14.98, "learning_rate": 4.2514430981382e-05, "loss": 2.2793, "step": 5174000 }, { "epoch": 14.98, "learning_rate": 4.251370733373472e-05, "loss": 2.2507, "step": 5174500 }, { "epoch": 14.98, "learning_rate": 4.251298368608744e-05, "loss": 2.2838, "step": 5175000 }, { "epoch": 14.98, "learning_rate": 4.2512260038440165e-05, "loss": 2.2499, "step": 5175500 }, { "epoch": 14.98, "learning_rate": 4.251153639079289e-05, "loss": 2.2614, "step": 5176000 }, { "epoch": 14.98, "learning_rate": 4.251081419044091e-05, "loss": 2.2452, "step": 5176500 }, { "epoch": 14.99, "learning_rate": 4.251009054279363e-05, "loss": 2.2675, "step": 5177000 }, { "epoch": 14.99, "learning_rate": 4.250936834244165e-05, "loss": 2.2728, "step": 5177500 }, { "epoch": 14.99, "learning_rate": 4.250864469479437e-05, "loss": 2.2712, "step": 5178000 }, { "epoch": 14.99, "learning_rate": 4.250792104714709e-05, "loss": 2.2484, "step": 5178500 }, { "epoch": 14.99, "learning_rate": 4.2507197399499814e-05, "loss": 2.2513, "step": 5179000 }, { "epoch": 14.99, "learning_rate": 4.2506473751852536e-05, "loss": 2.2834, "step": 5179500 }, { "epoch": 14.99, "learning_rate": 4.250575010420526e-05, "loss": 2.2745, "step": 5180000 }, { "epoch": 15.0, "learning_rate": 4.250502790385328e-05, "loss": 2.2781, "step": 5180500 }, { "epoch": 15.0, "learning_rate": 4.250430425620601e-05, "loss": 2.2418, "step": 5181000 }, { "epoch": 15.0, "learning_rate": 4.250358060855873e-05, "loss": 2.2825, "step": 5181500 }, { "epoch": 15.0, "learning_rate": 4.2502856960911454e-05, "loss": 2.2728, "step": 5182000 }, { "epoch": 15.0, "eval_accuracy": 0.653979711395374, "eval_accuracy_mlm": 0.6167969977868301, "eval_accuracy_nsp": 0.8532148144074283, "eval_loss": 2.2684831619262695, "eval_runtime": 330.5599, "eval_samples_per_second": 1320.142, "eval_steps_per_second": 55.007, "step": 5182080 }, { "epoch": 15.0, "learning_rate": 4.2502133313264176e-05, "loss": 2.2451, "step": 5182500 }, { "epoch": 15.0, "learning_rate": 4.25014096656169e-05, "loss": 2.2661, "step": 5183000 }, { "epoch": 15.0, "learning_rate": 4.2500687465264914e-05, "loss": 2.2486, "step": 5183500 }, { "epoch": 15.01, "learning_rate": 4.2499963817617636e-05, "loss": 2.2817, "step": 5184000 }, { "epoch": 15.01, "learning_rate": 4.249924016997036e-05, "loss": 2.2419, "step": 5184500 }, { "epoch": 15.01, "learning_rate": 4.249851652232309e-05, "loss": 2.2364, "step": 5185000 }, { "epoch": 15.01, "learning_rate": 4.249779287467581e-05, "loss": 2.2221, "step": 5185500 }, { "epoch": 15.01, "learning_rate": 4.249706922702853e-05, "loss": 2.2512, "step": 5186000 }, { "epoch": 15.01, "learning_rate": 4.2496345579381254e-05, "loss": 2.2404, "step": 5186500 }, { "epoch": 15.01, "learning_rate": 4.249562337902927e-05, "loss": 2.2106, "step": 5187000 }, { "epoch": 15.02, "learning_rate": 4.249489973138199e-05, "loss": 2.2569, "step": 5187500 }, { "epoch": 15.02, "learning_rate": 4.249417608373472e-05, "loss": 2.212, "step": 5188000 }, { "epoch": 15.02, "learning_rate": 4.249345243608744e-05, "loss": 2.2423, "step": 5188500 }, { "epoch": 15.02, "learning_rate": 4.2492728788440165e-05, "loss": 2.2324, "step": 5189000 }, { "epoch": 15.02, "learning_rate": 4.249200514079289e-05, "loss": 2.2615, "step": 5189500 }, { "epoch": 15.02, "learning_rate": 4.249128149314561e-05, "loss": 2.2488, "step": 5190000 }, { "epoch": 15.02, "learning_rate": 4.249055784549834e-05, "loss": 2.2393, "step": 5190500 }, { "epoch": 15.03, "learning_rate": 4.248983419785106e-05, "loss": 2.2715, "step": 5191000 }, { "epoch": 15.03, "learning_rate": 4.248911055020378e-05, "loss": 2.2405, "step": 5191500 }, { "epoch": 15.03, "learning_rate": 4.24883883498518e-05, "loss": 2.2337, "step": 5192000 }, { "epoch": 15.03, "learning_rate": 4.248766470220452e-05, "loss": 2.2352, "step": 5192500 }, { "epoch": 15.03, "learning_rate": 4.248694105455724e-05, "loss": 2.2486, "step": 5193000 }, { "epoch": 15.03, "learning_rate": 4.2486217406909966e-05, "loss": 2.2445, "step": 5193500 }, { "epoch": 15.03, "learning_rate": 4.248549375926269e-05, "loss": 2.2549, "step": 5194000 }, { "epoch": 15.04, "learning_rate": 4.248477155891071e-05, "loss": 2.2393, "step": 5194500 }, { "epoch": 15.04, "learning_rate": 4.248404791126343e-05, "loss": 2.243, "step": 5195000 }, { "epoch": 15.04, "learning_rate": 4.248332426361616e-05, "loss": 2.2466, "step": 5195500 }, { "epoch": 15.04, "learning_rate": 4.2482600615968884e-05, "loss": 2.2635, "step": 5196000 }, { "epoch": 15.04, "learning_rate": 4.2481876968321606e-05, "loss": 2.2294, "step": 5196500 }, { "epoch": 15.04, "learning_rate": 4.248115332067433e-05, "loss": 2.2419, "step": 5197000 }, { "epoch": 15.04, "learning_rate": 4.248042967302705e-05, "loss": 2.2634, "step": 5197500 }, { "epoch": 15.05, "learning_rate": 4.247970602537977e-05, "loss": 2.2423, "step": 5198000 }, { "epoch": 15.05, "learning_rate": 4.2478982377732495e-05, "loss": 2.2336, "step": 5198500 }, { "epoch": 15.05, "learning_rate": 4.247826162467581e-05, "loss": 2.2585, "step": 5199000 }, { "epoch": 15.05, "learning_rate": 4.247753797702853e-05, "loss": 2.245, "step": 5199500 }, { "epoch": 15.05, "learning_rate": 4.2476814329381255e-05, "loss": 2.2229, "step": 5200000 }, { "epoch": 15.05, "learning_rate": 4.247609212902927e-05, "loss": 2.2458, "step": 5200500 }, { "epoch": 15.05, "learning_rate": 4.247536848138199e-05, "loss": 2.2081, "step": 5201000 }, { "epoch": 15.06, "learning_rate": 4.2474644833734715e-05, "loss": 2.2697, "step": 5201500 }, { "epoch": 15.06, "learning_rate": 4.247392118608744e-05, "loss": 2.2212, "step": 5202000 }, { "epoch": 15.06, "learning_rate": 4.247319898573546e-05, "loss": 2.2116, "step": 5202500 }, { "epoch": 15.06, "learning_rate": 4.247247533808818e-05, "loss": 2.2438, "step": 5203000 }, { "epoch": 15.06, "learning_rate": 4.247175169044091e-05, "loss": 2.2646, "step": 5203500 }, { "epoch": 15.06, "learning_rate": 4.2471029490088926e-05, "loss": 2.253, "step": 5204000 }, { "epoch": 15.06, "learning_rate": 4.247030584244165e-05, "loss": 2.243, "step": 5204500 }, { "epoch": 15.07, "learning_rate": 4.246958219479437e-05, "loss": 2.2642, "step": 5205000 }, { "epoch": 15.07, "learning_rate": 4.246885854714709e-05, "loss": 2.2645, "step": 5205500 }, { "epoch": 15.07, "learning_rate": 4.2468134899499815e-05, "loss": 2.2772, "step": 5206000 }, { "epoch": 15.07, "learning_rate": 4.246741125185254e-05, "loss": 2.2442, "step": 5206500 }, { "epoch": 15.07, "learning_rate": 4.2466687604205266e-05, "loss": 2.2641, "step": 5207000 }, { "epoch": 15.07, "learning_rate": 4.246596395655799e-05, "loss": 2.2622, "step": 5207500 }, { "epoch": 15.08, "learning_rate": 4.246524030891071e-05, "loss": 2.2527, "step": 5208000 }, { "epoch": 15.08, "learning_rate": 4.246451666126343e-05, "loss": 2.2321, "step": 5208500 }, { "epoch": 15.08, "learning_rate": 4.2463793013616155e-05, "loss": 2.2684, "step": 5209000 }, { "epoch": 15.08, "learning_rate": 4.246306936596888e-05, "loss": 2.2449, "step": 5209500 }, { "epoch": 15.08, "learning_rate": 4.24623457183216e-05, "loss": 2.2503, "step": 5210000 }, { "epoch": 15.08, "learning_rate": 4.246162207067433e-05, "loss": 2.2475, "step": 5210500 }, { "epoch": 15.08, "learning_rate": 4.246089842302705e-05, "loss": 2.2432, "step": 5211000 }, { "epoch": 15.09, "learning_rate": 4.246017622267507e-05, "loss": 2.2615, "step": 5211500 }, { "epoch": 15.09, "learning_rate": 4.245945257502779e-05, "loss": 2.2604, "step": 5212000 }, { "epoch": 15.09, "learning_rate": 4.245872892738051e-05, "loss": 2.2432, "step": 5212500 }, { "epoch": 15.09, "learning_rate": 4.245800527973324e-05, "loss": 2.2356, "step": 5213000 }, { "epoch": 15.09, "learning_rate": 4.245728163208596e-05, "loss": 2.257, "step": 5213500 }, { "epoch": 15.09, "learning_rate": 4.2456557984438685e-05, "loss": 2.2352, "step": 5214000 }, { "epoch": 15.09, "learning_rate": 4.2455837231381993e-05, "loss": 2.2433, "step": 5214500 }, { "epoch": 15.1, "learning_rate": 4.2455113583734716e-05, "loss": 2.2647, "step": 5215000 }, { "epoch": 15.1, "learning_rate": 4.245438993608744e-05, "loss": 2.2462, "step": 5215500 }, { "epoch": 15.1, "learning_rate": 4.245366773573546e-05, "loss": 2.237, "step": 5216000 }, { "epoch": 15.1, "learning_rate": 4.245294408808818e-05, "loss": 2.2542, "step": 5216500 }, { "epoch": 15.1, "learning_rate": 4.2452220440440905e-05, "loss": 2.2473, "step": 5217000 }, { "epoch": 15.1, "learning_rate": 4.245149679279363e-05, "loss": 2.234, "step": 5217500 }, { "epoch": 15.1, "learning_rate": 4.2450773145146356e-05, "loss": 2.2381, "step": 5218000 }, { "epoch": 15.11, "learning_rate": 4.245005094479437e-05, "loss": 2.2359, "step": 5218500 }, { "epoch": 15.11, "learning_rate": 4.2449327297147094e-05, "loss": 2.2433, "step": 5219000 }, { "epoch": 15.11, "learning_rate": 4.2448603649499816e-05, "loss": 2.2408, "step": 5219500 }, { "epoch": 15.11, "learning_rate": 4.244788000185254e-05, "loss": 2.2545, "step": 5220000 }, { "epoch": 15.11, "learning_rate": 4.244715635420527e-05, "loss": 2.2546, "step": 5220500 }, { "epoch": 15.11, "learning_rate": 4.244643270655799e-05, "loss": 2.2652, "step": 5221000 }, { "epoch": 15.11, "learning_rate": 4.244570905891071e-05, "loss": 2.2641, "step": 5221500 }, { "epoch": 15.12, "learning_rate": 4.2444985411263434e-05, "loss": 2.2347, "step": 5222000 }, { "epoch": 15.12, "learning_rate": 4.2444261763616156e-05, "loss": 2.2568, "step": 5222500 }, { "epoch": 15.12, "learning_rate": 4.244353811596888e-05, "loss": 2.2513, "step": 5223000 }, { "epoch": 15.12, "learning_rate": 4.24428144683216e-05, "loss": 2.2505, "step": 5223500 }, { "epoch": 15.12, "learning_rate": 4.244209082067432e-05, "loss": 2.2154, "step": 5224000 }, { "epoch": 15.12, "learning_rate": 4.244136862032234e-05, "loss": 2.2549, "step": 5224500 }, { "epoch": 15.12, "learning_rate": 4.244064497267507e-05, "loss": 2.2349, "step": 5225000 }, { "epoch": 15.13, "learning_rate": 4.243992277232308e-05, "loss": 2.2335, "step": 5225500 }, { "epoch": 15.13, "learning_rate": 4.243919912467581e-05, "loss": 2.263, "step": 5226000 }, { "epoch": 15.13, "learning_rate": 4.2438475477028534e-05, "loss": 2.2534, "step": 5226500 }, { "epoch": 15.13, "learning_rate": 4.243775327667655e-05, "loss": 2.2354, "step": 5227000 }, { "epoch": 15.13, "learning_rate": 4.243702962902927e-05, "loss": 2.2767, "step": 5227500 }, { "epoch": 15.13, "learning_rate": 4.2436305981381994e-05, "loss": 2.2258, "step": 5228000 }, { "epoch": 15.13, "learning_rate": 4.2435582333734716e-05, "loss": 2.2679, "step": 5228500 }, { "epoch": 15.14, "learning_rate": 4.243485868608744e-05, "loss": 2.2372, "step": 5229000 }, { "epoch": 15.14, "learning_rate": 4.243413648573546e-05, "loss": 2.2262, "step": 5229500 }, { "epoch": 15.14, "learning_rate": 4.243341283808818e-05, "loss": 2.2508, "step": 5230000 }, { "epoch": 15.14, "learning_rate": 4.2432689190440906e-05, "loss": 2.2302, "step": 5230500 }, { "epoch": 15.14, "learning_rate": 4.243196554279363e-05, "loss": 2.2364, "step": 5231000 }, { "epoch": 15.14, "learning_rate": 4.243124189514635e-05, "loss": 2.2743, "step": 5231500 }, { "epoch": 15.14, "learning_rate": 4.243051824749907e-05, "loss": 2.2375, "step": 5232000 }, { "epoch": 15.15, "learning_rate": 4.2429794599851794e-05, "loss": 2.2523, "step": 5232500 }, { "epoch": 15.15, "learning_rate": 4.2429070952204523e-05, "loss": 2.2572, "step": 5233000 }, { "epoch": 15.15, "learning_rate": 4.2428347304557246e-05, "loss": 2.2359, "step": 5233500 }, { "epoch": 15.15, "learning_rate": 4.242762365690997e-05, "loss": 2.2484, "step": 5234000 }, { "epoch": 15.15, "learning_rate": 4.242690000926269e-05, "loss": 2.22, "step": 5234500 }, { "epoch": 15.15, "learning_rate": 4.242617636161542e-05, "loss": 2.247, "step": 5235000 }, { "epoch": 15.15, "learning_rate": 4.242545271396814e-05, "loss": 2.2531, "step": 5235500 }, { "epoch": 15.16, "learning_rate": 4.2424729066320864e-05, "loss": 2.2298, "step": 5236000 }, { "epoch": 15.16, "learning_rate": 4.242400686596888e-05, "loss": 2.255, "step": 5236500 }, { "epoch": 15.16, "learning_rate": 4.24232832183216e-05, "loss": 2.2576, "step": 5237000 }, { "epoch": 15.16, "learning_rate": 4.2422559570674324e-05, "loss": 2.2298, "step": 5237500 }, { "epoch": 15.16, "learning_rate": 4.2421835923027046e-05, "loss": 2.2419, "step": 5238000 }, { "epoch": 15.16, "learning_rate": 4.242111372267507e-05, "loss": 2.2582, "step": 5238500 }, { "epoch": 15.16, "learning_rate": 4.242039007502779e-05, "loss": 2.2341, "step": 5239000 }, { "epoch": 15.17, "learning_rate": 4.241966642738051e-05, "loss": 2.2644, "step": 5239500 }, { "epoch": 15.17, "learning_rate": 4.2418942779733235e-05, "loss": 2.2411, "step": 5240000 }, { "epoch": 15.17, "learning_rate": 4.2418219132085964e-05, "loss": 2.2435, "step": 5240500 }, { "epoch": 15.17, "learning_rate": 4.2417495484438686e-05, "loss": 2.2421, "step": 5241000 }, { "epoch": 15.17, "learning_rate": 4.241677183679141e-05, "loss": 2.245, "step": 5241500 }, { "epoch": 15.17, "learning_rate": 4.241604818914413e-05, "loss": 2.2433, "step": 5242000 }, { "epoch": 15.17, "learning_rate": 4.241532454149685e-05, "loss": 2.2405, "step": 5242500 }, { "epoch": 15.18, "learning_rate": 4.241460234114487e-05, "loss": 2.2499, "step": 5243000 }, { "epoch": 15.18, "learning_rate": 4.241387869349759e-05, "loss": 2.2392, "step": 5243500 }, { "epoch": 15.18, "learning_rate": 4.241315504585032e-05, "loss": 2.2357, "step": 5244000 }, { "epoch": 15.18, "learning_rate": 4.241243139820304e-05, "loss": 2.2412, "step": 5244500 }, { "epoch": 15.18, "learning_rate": 4.2411707750555764e-05, "loss": 2.2562, "step": 5245000 }, { "epoch": 15.18, "learning_rate": 4.2410984102908486e-05, "loss": 2.2423, "step": 5245500 }, { "epoch": 15.19, "learning_rate": 4.241026045526121e-05, "loss": 2.2487, "step": 5246000 }, { "epoch": 15.19, "learning_rate": 4.2409538254909224e-05, "loss": 2.2425, "step": 5246500 }, { "epoch": 15.19, "learning_rate": 4.2408814607261946e-05, "loss": 2.242, "step": 5247000 }, { "epoch": 15.19, "learning_rate": 4.240809095961467e-05, "loss": 2.2579, "step": 5247500 }, { "epoch": 15.19, "learning_rate": 4.24073673119674e-05, "loss": 2.24, "step": 5248000 }, { "epoch": 15.19, "learning_rate": 4.240664366432012e-05, "loss": 2.2468, "step": 5248500 }, { "epoch": 15.19, "learning_rate": 4.240592001667284e-05, "loss": 2.256, "step": 5249000 }, { "epoch": 15.2, "learning_rate": 4.240519636902557e-05, "loss": 2.261, "step": 5249500 }, { "epoch": 15.2, "learning_rate": 4.240447416867359e-05, "loss": 2.2376, "step": 5250000 }, { "epoch": 15.2, "learning_rate": 4.240375052102631e-05, "loss": 2.2406, "step": 5250500 }, { "epoch": 15.2, "learning_rate": 4.240302687337903e-05, "loss": 2.2467, "step": 5251000 }, { "epoch": 15.2, "learning_rate": 4.240230322573175e-05, "loss": 2.2485, "step": 5251500 }, { "epoch": 15.2, "learning_rate": 4.240158102537977e-05, "loss": 2.2412, "step": 5252000 }, { "epoch": 15.2, "learning_rate": 4.24008573777325e-05, "loss": 2.2461, "step": 5252500 }, { "epoch": 15.21, "learning_rate": 4.2400135177380513e-05, "loss": 2.2577, "step": 5253000 }, { "epoch": 15.21, "learning_rate": 4.2399411529733236e-05, "loss": 2.245, "step": 5253500 }, { "epoch": 15.21, "learning_rate": 4.239868788208596e-05, "loss": 2.2311, "step": 5254000 }, { "epoch": 15.21, "learning_rate": 4.239796423443868e-05, "loss": 2.2482, "step": 5254500 }, { "epoch": 15.21, "learning_rate": 4.23972405867914e-05, "loss": 2.2439, "step": 5255000 }, { "epoch": 15.21, "learning_rate": 4.239651693914413e-05, "loss": 2.2316, "step": 5255500 }, { "epoch": 15.21, "learning_rate": 4.2395793291496854e-05, "loss": 2.2464, "step": 5256000 }, { "epoch": 15.22, "learning_rate": 4.2395069643849576e-05, "loss": 2.2333, "step": 5256500 }, { "epoch": 15.22, "learning_rate": 4.23943459962023e-05, "loss": 2.2392, "step": 5257000 }, { "epoch": 15.22, "learning_rate": 4.239362234855502e-05, "loss": 2.2452, "step": 5257500 }, { "epoch": 15.22, "learning_rate": 4.239290014820304e-05, "loss": 2.2747, "step": 5258000 }, { "epoch": 15.22, "learning_rate": 4.2392176500555765e-05, "loss": 2.2409, "step": 5258500 }, { "epoch": 15.22, "learning_rate": 4.239145430020378e-05, "loss": 2.238, "step": 5259000 }, { "epoch": 15.22, "learning_rate": 4.23907306525565e-05, "loss": 2.2414, "step": 5259500 }, { "epoch": 15.23, "learning_rate": 4.2390007004909225e-05, "loss": 2.2284, "step": 5260000 }, { "epoch": 15.23, "learning_rate": 4.238928335726195e-05, "loss": 2.2442, "step": 5260500 }, { "epoch": 15.23, "learning_rate": 4.238855970961467e-05, "loss": 2.238, "step": 5261000 }, { "epoch": 15.23, "learning_rate": 4.23878360619674e-05, "loss": 2.2574, "step": 5261500 }, { "epoch": 15.23, "learning_rate": 4.238711241432012e-05, "loss": 2.269, "step": 5262000 }, { "epoch": 15.23, "learning_rate": 4.238638876667285e-05, "loss": 2.2538, "step": 5262500 }, { "epoch": 15.23, "learning_rate": 4.238566511902557e-05, "loss": 2.2392, "step": 5263000 }, { "epoch": 15.24, "learning_rate": 4.2384941471378294e-05, "loss": 2.2349, "step": 5263500 }, { "epoch": 15.24, "learning_rate": 4.2384217823731016e-05, "loss": 2.2327, "step": 5264000 }, { "epoch": 15.24, "learning_rate": 4.238349417608374e-05, "loss": 2.2831, "step": 5264500 }, { "epoch": 15.24, "learning_rate": 4.2382771975731754e-05, "loss": 2.2411, "step": 5265000 }, { "epoch": 15.24, "learning_rate": 4.238204977537977e-05, "loss": 2.2468, "step": 5265500 }, { "epoch": 15.24, "learning_rate": 4.23813261277325e-05, "loss": 2.237, "step": 5266000 }, { "epoch": 15.24, "learning_rate": 4.238060248008522e-05, "loss": 2.2449, "step": 5266500 }, { "epoch": 15.25, "learning_rate": 4.237987883243794e-05, "loss": 2.2708, "step": 5267000 }, { "epoch": 15.25, "learning_rate": 4.2379155184790665e-05, "loss": 2.2305, "step": 5267500 }, { "epoch": 15.25, "learning_rate": 4.237843298443868e-05, "loss": 2.2444, "step": 5268000 }, { "epoch": 15.25, "learning_rate": 4.23777093367914e-05, "loss": 2.2536, "step": 5268500 }, { "epoch": 15.25, "learning_rate": 4.2376985689144125e-05, "loss": 2.2726, "step": 5269000 }, { "epoch": 15.25, "learning_rate": 4.237626204149685e-05, "loss": 2.2582, "step": 5269500 }, { "epoch": 15.25, "learning_rate": 4.237553839384958e-05, "loss": 2.2246, "step": 5270000 }, { "epoch": 15.26, "learning_rate": 4.23748147462023e-05, "loss": 2.2385, "step": 5270500 }, { "epoch": 15.26, "learning_rate": 4.237409109855502e-05, "loss": 2.2432, "step": 5271000 }, { "epoch": 15.26, "learning_rate": 4.237336745090775e-05, "loss": 2.241, "step": 5271500 }, { "epoch": 15.26, "learning_rate": 4.237264380326047e-05, "loss": 2.2567, "step": 5272000 }, { "epoch": 15.26, "learning_rate": 4.2371920155613195e-05, "loss": 2.2215, "step": 5272500 }, { "epoch": 15.26, "learning_rate": 4.237119795526121e-05, "loss": 2.2565, "step": 5273000 }, { "epoch": 15.26, "learning_rate": 4.237047430761393e-05, "loss": 2.2318, "step": 5273500 }, { "epoch": 15.27, "learning_rate": 4.2369750659966655e-05, "loss": 2.2457, "step": 5274000 }, { "epoch": 15.27, "learning_rate": 4.236902701231938e-05, "loss": 2.2659, "step": 5274500 }, { "epoch": 15.27, "learning_rate": 4.23683033646721e-05, "loss": 2.2519, "step": 5275000 }, { "epoch": 15.27, "learning_rate": 4.236757971702482e-05, "loss": 2.2213, "step": 5275500 }, { "epoch": 15.27, "learning_rate": 4.2366857516672844e-05, "loss": 2.2583, "step": 5276000 }, { "epoch": 15.27, "learning_rate": 4.2366133869025566e-05, "loss": 2.275, "step": 5276500 }, { "epoch": 15.27, "learning_rate": 4.236541166867358e-05, "loss": 2.239, "step": 5277000 }, { "epoch": 15.28, "learning_rate": 4.2364688021026304e-05, "loss": 2.2408, "step": 5277500 }, { "epoch": 15.28, "learning_rate": 4.2363965820674326e-05, "loss": 2.2455, "step": 5278000 }, { "epoch": 15.28, "learning_rate": 4.236324217302705e-05, "loss": 2.2344, "step": 5278500 }, { "epoch": 15.28, "learning_rate": 4.236251852537978e-05, "loss": 2.2417, "step": 5279000 }, { "epoch": 15.28, "learning_rate": 4.23617948777325e-05, "loss": 2.2383, "step": 5279500 }, { "epoch": 15.28, "learning_rate": 4.236107123008522e-05, "loss": 2.2295, "step": 5280000 }, { "epoch": 15.28, "learning_rate": 4.2360347582437944e-05, "loss": 2.2743, "step": 5280500 }, { "epoch": 15.29, "learning_rate": 4.2359623934790666e-05, "loss": 2.2668, "step": 5281000 }, { "epoch": 15.29, "learning_rate": 4.235890028714339e-05, "loss": 2.2451, "step": 5281500 }, { "epoch": 15.29, "learning_rate": 4.235817663949611e-05, "loss": 2.2618, "step": 5282000 }, { "epoch": 15.29, "learning_rate": 4.235745299184883e-05, "loss": 2.262, "step": 5282500 }, { "epoch": 15.29, "learning_rate": 4.2356729344201555e-05, "loss": 2.2582, "step": 5283000 }, { "epoch": 15.29, "learning_rate": 4.235600569655428e-05, "loss": 2.2583, "step": 5283500 }, { "epoch": 15.3, "learning_rate": 4.2355282048907e-05, "loss": 2.2503, "step": 5284000 }, { "epoch": 15.3, "learning_rate": 4.235455984855502e-05, "loss": 2.2624, "step": 5284500 }, { "epoch": 15.3, "learning_rate": 4.235383620090775e-05, "loss": 2.259, "step": 5285000 }, { "epoch": 15.3, "learning_rate": 4.2353114000555767e-05, "loss": 2.2334, "step": 5285500 }, { "epoch": 15.3, "learning_rate": 4.235239035290849e-05, "loss": 2.273, "step": 5286000 }, { "epoch": 15.3, "learning_rate": 4.235166670526121e-05, "loss": 2.2426, "step": 5286500 }, { "epoch": 15.3, "learning_rate": 4.235094305761393e-05, "loss": 2.2268, "step": 5287000 }, { "epoch": 15.31, "learning_rate": 4.2350219409966655e-05, "loss": 2.2487, "step": 5287500 }, { "epoch": 15.31, "learning_rate": 4.234949576231938e-05, "loss": 2.268, "step": 5288000 }, { "epoch": 15.31, "learning_rate": 4.23487735619674e-05, "loss": 2.246, "step": 5288500 }, { "epoch": 15.31, "learning_rate": 4.234804991432012e-05, "loss": 2.2287, "step": 5289000 }, { "epoch": 15.31, "learning_rate": 4.2347326266672844e-05, "loss": 2.2634, "step": 5289500 }, { "epoch": 15.31, "learning_rate": 4.234660406632086e-05, "loss": 2.2473, "step": 5290000 }, { "epoch": 15.31, "learning_rate": 4.234588041867358e-05, "loss": 2.2662, "step": 5290500 }, { "epoch": 15.32, "learning_rate": 4.2345156771026304e-05, "loss": 2.2439, "step": 5291000 }, { "epoch": 15.32, "learning_rate": 4.234443312337903e-05, "loss": 2.2732, "step": 5291500 }, { "epoch": 15.32, "learning_rate": 4.234370947573175e-05, "loss": 2.2466, "step": 5292000 }, { "epoch": 15.32, "learning_rate": 4.234298582808448e-05, "loss": 2.2463, "step": 5292500 }, { "epoch": 15.32, "learning_rate": 4.23422621804372e-05, "loss": 2.2581, "step": 5293000 }, { "epoch": 15.32, "learning_rate": 4.234153853278993e-05, "loss": 2.2403, "step": 5293500 }, { "epoch": 15.32, "learning_rate": 4.234081488514265e-05, "loss": 2.2396, "step": 5294000 }, { "epoch": 15.33, "learning_rate": 4.2340091237495374e-05, "loss": 2.2808, "step": 5294500 }, { "epoch": 15.33, "learning_rate": 4.2339367589848096e-05, "loss": 2.2342, "step": 5295000 }, { "epoch": 15.33, "learning_rate": 4.233864394220082e-05, "loss": 2.2421, "step": 5295500 }, { "epoch": 15.33, "learning_rate": 4.233792029455354e-05, "loss": 2.2612, "step": 5296000 }, { "epoch": 15.33, "learning_rate": 4.233719664690626e-05, "loss": 2.2524, "step": 5296500 }, { "epoch": 15.33, "learning_rate": 4.2336472999258985e-05, "loss": 2.2769, "step": 5297000 }, { "epoch": 15.33, "learning_rate": 4.233574935161171e-05, "loss": 2.2372, "step": 5297500 }, { "epoch": 15.34, "learning_rate": 4.233502715125973e-05, "loss": 2.229, "step": 5298000 }, { "epoch": 15.34, "learning_rate": 4.233430350361245e-05, "loss": 2.245, "step": 5298500 }, { "epoch": 15.34, "learning_rate": 4.233358130326047e-05, "loss": 2.2535, "step": 5299000 }, { "epoch": 15.34, "learning_rate": 4.233285765561319e-05, "loss": 2.26, "step": 5299500 }, { "epoch": 15.34, "learning_rate": 4.233213400796592e-05, "loss": 2.2449, "step": 5300000 }, { "epoch": 15.34, "learning_rate": 4.233141036031864e-05, "loss": 2.2502, "step": 5300500 }, { "epoch": 15.34, "learning_rate": 4.233068671267136e-05, "loss": 2.2396, "step": 5301000 }, { "epoch": 15.35, "learning_rate": 4.232996451231938e-05, "loss": 2.259, "step": 5301500 }, { "epoch": 15.35, "learning_rate": 4.2329243759262694e-05, "loss": 2.2258, "step": 5302000 }, { "epoch": 15.35, "learning_rate": 4.2328520111615416e-05, "loss": 2.2715, "step": 5302500 }, { "epoch": 15.35, "learning_rate": 4.232779646396814e-05, "loss": 2.2792, "step": 5303000 }, { "epoch": 15.35, "learning_rate": 4.232707281632086e-05, "loss": 2.2551, "step": 5303500 }, { "epoch": 15.35, "learning_rate": 4.232634916867358e-05, "loss": 2.2474, "step": 5304000 }, { "epoch": 15.35, "learning_rate": 4.2325625521026305e-05, "loss": 2.2346, "step": 5304500 }, { "epoch": 15.36, "learning_rate": 4.232490187337903e-05, "loss": 2.2489, "step": 5305000 }, { "epoch": 15.36, "learning_rate": 4.2324178225731757e-05, "loss": 2.2519, "step": 5305500 }, { "epoch": 15.36, "learning_rate": 4.232345602537977e-05, "loss": 2.2441, "step": 5306000 }, { "epoch": 15.36, "learning_rate": 4.2322732377732494e-05, "loss": 2.2552, "step": 5306500 }, { "epoch": 15.36, "learning_rate": 4.2322008730085217e-05, "loss": 2.255, "step": 5307000 }, { "epoch": 15.36, "learning_rate": 4.2321285082437946e-05, "loss": 2.2426, "step": 5307500 }, { "epoch": 15.36, "learning_rate": 4.232056143479067e-05, "loss": 2.2294, "step": 5308000 }, { "epoch": 15.37, "learning_rate": 4.231983778714339e-05, "loss": 2.2568, "step": 5308500 }, { "epoch": 15.37, "learning_rate": 4.231911413949611e-05, "loss": 2.2668, "step": 5309000 }, { "epoch": 15.37, "learning_rate": 4.2318390491848835e-05, "loss": 2.2505, "step": 5309500 }, { "epoch": 15.37, "learning_rate": 4.231766684420156e-05, "loss": 2.2565, "step": 5310000 }, { "epoch": 15.37, "learning_rate": 4.231694319655428e-05, "loss": 2.257, "step": 5310500 }, { "epoch": 15.37, "learning_rate": 4.2316222443497595e-05, "loss": 2.2565, "step": 5311000 }, { "epoch": 15.37, "learning_rate": 4.231549879585032e-05, "loss": 2.2345, "step": 5311500 }, { "epoch": 15.38, "learning_rate": 4.231477514820304e-05, "loss": 2.249, "step": 5312000 }, { "epoch": 15.38, "learning_rate": 4.231405150055576e-05, "loss": 2.2582, "step": 5312500 }, { "epoch": 15.38, "learning_rate": 4.2313327852908484e-05, "loss": 2.2337, "step": 5313000 }, { "epoch": 15.38, "learning_rate": 4.2312604205261206e-05, "loss": 2.2661, "step": 5313500 }, { "epoch": 15.38, "learning_rate": 4.231188055761393e-05, "loss": 2.2479, "step": 5314000 }, { "epoch": 15.38, "learning_rate": 4.231115690996666e-05, "loss": 2.2362, "step": 5314500 }, { "epoch": 15.38, "learning_rate": 4.231043326231938e-05, "loss": 2.2477, "step": 5315000 }, { "epoch": 15.39, "learning_rate": 4.230970961467211e-05, "loss": 2.2558, "step": 5315500 }, { "epoch": 15.39, "learning_rate": 4.230898596702483e-05, "loss": 2.2726, "step": 5316000 }, { "epoch": 15.39, "learning_rate": 4.230826231937755e-05, "loss": 2.2693, "step": 5316500 }, { "epoch": 15.39, "learning_rate": 4.2307538671730275e-05, "loss": 2.2341, "step": 5317000 }, { "epoch": 15.39, "learning_rate": 4.2306815024083e-05, "loss": 2.2596, "step": 5317500 }, { "epoch": 15.39, "learning_rate": 4.230609137643572e-05, "loss": 2.2372, "step": 5318000 }, { "epoch": 15.39, "learning_rate": 4.2305369176083735e-05, "loss": 2.252, "step": 5318500 }, { "epoch": 15.4, "learning_rate": 4.230464552843646e-05, "loss": 2.2614, "step": 5319000 }, { "epoch": 15.4, "learning_rate": 4.230392188078918e-05, "loss": 2.2307, "step": 5319500 }, { "epoch": 15.4, "learning_rate": 4.23031996804372e-05, "loss": 2.2545, "step": 5320000 }, { "epoch": 15.4, "learning_rate": 4.2302476032789924e-05, "loss": 2.2363, "step": 5320500 }, { "epoch": 15.4, "learning_rate": 4.2301752385142646e-05, "loss": 2.2588, "step": 5321000 }, { "epoch": 15.4, "learning_rate": 4.230102873749537e-05, "loss": 2.2272, "step": 5321500 }, { "epoch": 15.41, "learning_rate": 4.230030508984809e-05, "loss": 2.2562, "step": 5322000 }, { "epoch": 15.41, "learning_rate": 4.229958288949611e-05, "loss": 2.2701, "step": 5322500 }, { "epoch": 15.41, "learning_rate": 4.2298859241848835e-05, "loss": 2.2487, "step": 5323000 }, { "epoch": 15.41, "learning_rate": 4.229813559420156e-05, "loss": 2.2429, "step": 5323500 }, { "epoch": 15.41, "learning_rate": 4.229741194655428e-05, "loss": 2.2574, "step": 5324000 }, { "epoch": 15.41, "learning_rate": 4.229668829890701e-05, "loss": 2.2623, "step": 5324500 }, { "epoch": 15.41, "learning_rate": 4.2295966098555024e-05, "loss": 2.258, "step": 5325000 }, { "epoch": 15.42, "learning_rate": 4.229524389820304e-05, "loss": 2.2469, "step": 5325500 }, { "epoch": 15.42, "learning_rate": 4.229452025055576e-05, "loss": 2.2252, "step": 5326000 }, { "epoch": 15.42, "learning_rate": 4.2293796602908484e-05, "loss": 2.2517, "step": 5326500 }, { "epoch": 15.42, "learning_rate": 4.2293072955261207e-05, "loss": 2.2405, "step": 5327000 }, { "epoch": 15.42, "learning_rate": 4.229234930761393e-05, "loss": 2.2643, "step": 5327500 }, { "epoch": 15.42, "learning_rate": 4.229162565996666e-05, "loss": 2.2424, "step": 5328000 }, { "epoch": 15.42, "learning_rate": 4.229090201231938e-05, "loss": 2.2445, "step": 5328500 }, { "epoch": 15.43, "learning_rate": 4.22901783646721e-05, "loss": 2.2418, "step": 5329000 }, { "epoch": 15.43, "learning_rate": 4.2289454717024825e-05, "loss": 2.26, "step": 5329500 }, { "epoch": 15.43, "learning_rate": 4.2288731069377554e-05, "loss": 2.24, "step": 5330000 }, { "epoch": 15.43, "learning_rate": 4.2288007421730276e-05, "loss": 2.2747, "step": 5330500 }, { "epoch": 15.43, "learning_rate": 4.2287283774083e-05, "loss": 2.2521, "step": 5331000 }, { "epoch": 15.43, "learning_rate": 4.228656012643572e-05, "loss": 2.2394, "step": 5331500 }, { "epoch": 15.43, "learning_rate": 4.228583647878844e-05, "loss": 2.2512, "step": 5332000 }, { "epoch": 15.44, "learning_rate": 4.2285112831141165e-05, "loss": 2.2473, "step": 5332500 }, { "epoch": 15.44, "learning_rate": 4.228439063078918e-05, "loss": 2.2541, "step": 5333000 }, { "epoch": 15.44, "learning_rate": 4.22836684304372e-05, "loss": 2.2421, "step": 5333500 }, { "epoch": 15.44, "learning_rate": 4.2282944782789925e-05, "loss": 2.2436, "step": 5334000 }, { "epoch": 15.44, "learning_rate": 4.228222113514265e-05, "loss": 2.258, "step": 5334500 }, { "epoch": 15.44, "learning_rate": 4.228149748749537e-05, "loss": 2.242, "step": 5335000 }, { "epoch": 15.44, "learning_rate": 4.228077383984809e-05, "loss": 2.2521, "step": 5335500 }, { "epoch": 15.45, "learning_rate": 4.2280050192200814e-05, "loss": 2.2702, "step": 5336000 }, { "epoch": 15.45, "learning_rate": 4.2279326544553536e-05, "loss": 2.2671, "step": 5336500 }, { "epoch": 15.45, "learning_rate": 4.227860434420156e-05, "loss": 2.2383, "step": 5337000 }, { "epoch": 15.45, "learning_rate": 4.227788069655429e-05, "loss": 2.2497, "step": 5337500 }, { "epoch": 15.45, "learning_rate": 4.227715704890701e-05, "loss": 2.2527, "step": 5338000 }, { "epoch": 15.45, "learning_rate": 4.227643340125973e-05, "loss": 2.2476, "step": 5338500 }, { "epoch": 15.45, "learning_rate": 4.2275709753612454e-05, "loss": 2.2196, "step": 5339000 }, { "epoch": 15.46, "learning_rate": 4.227498755326047e-05, "loss": 2.2522, "step": 5339500 }, { "epoch": 15.46, "learning_rate": 4.227426390561319e-05, "loss": 2.232, "step": 5340000 }, { "epoch": 15.46, "learning_rate": 4.2273540257965914e-05, "loss": 2.2325, "step": 5340500 }, { "epoch": 15.46, "learning_rate": 4.2272816610318636e-05, "loss": 2.2376, "step": 5341000 }, { "epoch": 15.46, "learning_rate": 4.227209296267136e-05, "loss": 2.2323, "step": 5341500 }, { "epoch": 15.46, "learning_rate": 4.227136931502409e-05, "loss": 2.2522, "step": 5342000 }, { "epoch": 15.46, "learning_rate": 4.227064566737681e-05, "loss": 2.2296, "step": 5342500 }, { "epoch": 15.47, "learning_rate": 4.226992201972953e-05, "loss": 2.2738, "step": 5343000 }, { "epoch": 15.47, "learning_rate": 4.2269198372082254e-05, "loss": 2.251, "step": 5343500 }, { "epoch": 15.47, "learning_rate": 4.226847617173027e-05, "loss": 2.2303, "step": 5344000 }, { "epoch": 15.47, "learning_rate": 4.226775252408299e-05, "loss": 2.2446, "step": 5344500 }, { "epoch": 15.47, "learning_rate": 4.226702887643572e-05, "loss": 2.2338, "step": 5345000 }, { "epoch": 15.47, "learning_rate": 4.2266306676083737e-05, "loss": 2.2511, "step": 5345500 }, { "epoch": 15.47, "learning_rate": 4.226558302843646e-05, "loss": 2.2677, "step": 5346000 }, { "epoch": 15.48, "learning_rate": 4.226485938078919e-05, "loss": 2.2563, "step": 5346500 }, { "epoch": 15.48, "learning_rate": 4.226413573314191e-05, "loss": 2.2605, "step": 5347000 }, { "epoch": 15.48, "learning_rate": 4.226341208549463e-05, "loss": 2.2601, "step": 5347500 }, { "epoch": 15.48, "learning_rate": 4.2262688437847355e-05, "loss": 2.2587, "step": 5348000 }, { "epoch": 15.48, "learning_rate": 4.226196479020008e-05, "loss": 2.2328, "step": 5348500 }, { "epoch": 15.48, "learning_rate": 4.22612411425528e-05, "loss": 2.217, "step": 5349000 }, { "epoch": 15.48, "learning_rate": 4.226051749490552e-05, "loss": 2.247, "step": 5349500 }, { "epoch": 15.49, "learning_rate": 4.2259793847258243e-05, "loss": 2.2569, "step": 5350000 }, { "epoch": 15.49, "learning_rate": 4.2259070199610966e-05, "loss": 2.2494, "step": 5350500 }, { "epoch": 15.49, "learning_rate": 4.225834655196369e-05, "loss": 2.2721, "step": 5351000 }, { "epoch": 15.49, "learning_rate": 4.225762290431641e-05, "loss": 2.2419, "step": 5351500 }, { "epoch": 15.49, "learning_rate": 4.225689925666914e-05, "loss": 2.2511, "step": 5352000 }, { "epoch": 15.49, "learning_rate": 4.225617705631716e-05, "loss": 2.2318, "step": 5352500 }, { "epoch": 15.49, "learning_rate": 4.2255453408669884e-05, "loss": 2.2685, "step": 5353000 }, { "epoch": 15.5, "learning_rate": 4.2254729761022606e-05, "loss": 2.2541, "step": 5353500 }, { "epoch": 15.5, "learning_rate": 4.225400611337533e-05, "loss": 2.237, "step": 5354000 }, { "epoch": 15.5, "learning_rate": 4.225328246572805e-05, "loss": 2.2667, "step": 5354500 }, { "epoch": 15.5, "learning_rate": 4.2252560265376066e-05, "loss": 2.2604, "step": 5355000 }, { "epoch": 15.5, "learning_rate": 4.225183806502409e-05, "loss": 2.2515, "step": 5355500 }, { "epoch": 15.5, "learning_rate": 4.225111441737681e-05, "loss": 2.2752, "step": 5356000 }, { "epoch": 15.5, "learning_rate": 4.225039076972953e-05, "loss": 2.2594, "step": 5356500 }, { "epoch": 15.51, "learning_rate": 4.2249667122082255e-05, "loss": 2.2435, "step": 5357000 }, { "epoch": 15.51, "learning_rate": 4.224894347443498e-05, "loss": 2.248, "step": 5357500 }, { "epoch": 15.51, "learning_rate": 4.22482198267877e-05, "loss": 2.2572, "step": 5358000 }, { "epoch": 15.51, "learning_rate": 4.224749617914042e-05, "loss": 2.2536, "step": 5358500 }, { "epoch": 15.51, "learning_rate": 4.2246772531493144e-05, "loss": 2.2593, "step": 5359000 }, { "epoch": 15.51, "learning_rate": 4.2246050331141166e-05, "loss": 2.2396, "step": 5359500 }, { "epoch": 15.52, "learning_rate": 4.224532668349389e-05, "loss": 2.2482, "step": 5360000 }, { "epoch": 15.52, "learning_rate": 4.224460303584661e-05, "loss": 2.2562, "step": 5360500 }, { "epoch": 15.52, "learning_rate": 4.224387938819934e-05, "loss": 2.2699, "step": 5361000 }, { "epoch": 15.52, "learning_rate": 4.224315574055206e-05, "loss": 2.2521, "step": 5361500 }, { "epoch": 15.52, "learning_rate": 4.224243354020008e-05, "loss": 2.239, "step": 5362000 }, { "epoch": 15.52, "learning_rate": 4.224171133984809e-05, "loss": 2.2869, "step": 5362500 }, { "epoch": 15.52, "learning_rate": 4.2240987692200815e-05, "loss": 2.2331, "step": 5363000 }, { "epoch": 15.53, "learning_rate": 4.224026549184884e-05, "loss": 2.2905, "step": 5363500 }, { "epoch": 15.53, "learning_rate": 4.223954184420156e-05, "loss": 2.2075, "step": 5364000 }, { "epoch": 15.53, "learning_rate": 4.223881819655428e-05, "loss": 2.2436, "step": 5364500 }, { "epoch": 15.53, "learning_rate": 4.2238094548907004e-05, "loss": 2.2415, "step": 5365000 }, { "epoch": 15.53, "learning_rate": 4.223737090125973e-05, "loss": 2.2532, "step": 5365500 }, { "epoch": 15.53, "learning_rate": 4.223664870090774e-05, "loss": 2.2571, "step": 5366000 }, { "epoch": 15.53, "learning_rate": 4.2235925053260464e-05, "loss": 2.2475, "step": 5366500 }, { "epoch": 15.54, "learning_rate": 4.223520285290849e-05, "loss": 2.227, "step": 5367000 }, { "epoch": 15.54, "learning_rate": 4.2234479205261216e-05, "loss": 2.253, "step": 5367500 }, { "epoch": 15.54, "learning_rate": 4.223375555761394e-05, "loss": 2.2317, "step": 5368000 }, { "epoch": 15.54, "learning_rate": 4.223303190996666e-05, "loss": 2.2348, "step": 5368500 }, { "epoch": 15.54, "learning_rate": 4.223230826231938e-05, "loss": 2.2391, "step": 5369000 }, { "epoch": 15.54, "learning_rate": 4.2231584614672105e-05, "loss": 2.2466, "step": 5369500 }, { "epoch": 15.54, "learning_rate": 4.223086096702483e-05, "loss": 2.2507, "step": 5370000 }, { "epoch": 15.55, "learning_rate": 4.223013731937755e-05, "loss": 2.2471, "step": 5370500 }, { "epoch": 15.55, "learning_rate": 4.222941367173027e-05, "loss": 2.2456, "step": 5371000 }, { "epoch": 15.55, "learning_rate": 4.2228690024082994e-05, "loss": 2.2199, "step": 5371500 }, { "epoch": 15.55, "learning_rate": 4.2227966376435716e-05, "loss": 2.2558, "step": 5372000 }, { "epoch": 15.55, "learning_rate": 4.222724272878844e-05, "loss": 2.2595, "step": 5372500 }, { "epoch": 15.55, "learning_rate": 4.222652052843646e-05, "loss": 2.2365, "step": 5373000 }, { "epoch": 15.55, "learning_rate": 4.222579688078918e-05, "loss": 2.2386, "step": 5373500 }, { "epoch": 15.56, "learning_rate": 4.2225073233141905e-05, "loss": 2.2218, "step": 5374000 }, { "epoch": 15.56, "learning_rate": 4.222434958549463e-05, "loss": 2.2374, "step": 5374500 }, { "epoch": 15.56, "learning_rate": 4.2223625937847356e-05, "loss": 2.2601, "step": 5375000 }, { "epoch": 15.56, "learning_rate": 4.222290229020008e-05, "loss": 2.264, "step": 5375500 }, { "epoch": 15.56, "learning_rate": 4.22221786425528e-05, "loss": 2.2533, "step": 5376000 }, { "epoch": 15.56, "learning_rate": 4.2221456442200816e-05, "loss": 2.2674, "step": 5376500 }, { "epoch": 15.56, "learning_rate": 4.222073279455354e-05, "loss": 2.2481, "step": 5377000 }, { "epoch": 15.57, "learning_rate": 4.222000914690627e-05, "loss": 2.2468, "step": 5377500 }, { "epoch": 15.57, "learning_rate": 4.221928549925899e-05, "loss": 2.2629, "step": 5378000 }, { "epoch": 15.57, "learning_rate": 4.221856185161171e-05, "loss": 2.2364, "step": 5378500 }, { "epoch": 15.57, "learning_rate": 4.221783965125973e-05, "loss": 2.2197, "step": 5379000 }, { "epoch": 15.57, "learning_rate": 4.221711600361245e-05, "loss": 2.2484, "step": 5379500 }, { "epoch": 15.57, "learning_rate": 4.221639235596517e-05, "loss": 2.2426, "step": 5380000 }, { "epoch": 15.57, "learning_rate": 4.221567160290849e-05, "loss": 2.2489, "step": 5380500 }, { "epoch": 15.58, "learning_rate": 4.221494795526121e-05, "loss": 2.237, "step": 5381000 }, { "epoch": 15.58, "learning_rate": 4.221422430761393e-05, "loss": 2.2481, "step": 5381500 }, { "epoch": 15.58, "learning_rate": 4.2213500659966654e-05, "loss": 2.2443, "step": 5382000 }, { "epoch": 15.58, "learning_rate": 4.221277701231938e-05, "loss": 2.2464, "step": 5382500 }, { "epoch": 15.58, "learning_rate": 4.2212053364672105e-05, "loss": 2.2645, "step": 5383000 }, { "epoch": 15.58, "learning_rate": 4.221132971702483e-05, "loss": 2.2656, "step": 5383500 }, { "epoch": 15.58, "learning_rate": 4.221060606937755e-05, "loss": 2.2458, "step": 5384000 }, { "epoch": 15.59, "learning_rate": 4.220988242173027e-05, "loss": 2.2606, "step": 5384500 }, { "epoch": 15.59, "learning_rate": 4.2209158774082994e-05, "loss": 2.2457, "step": 5385000 }, { "epoch": 15.59, "learning_rate": 4.220843512643572e-05, "loss": 2.2669, "step": 5385500 }, { "epoch": 15.59, "learning_rate": 4.220771147878844e-05, "loss": 2.247, "step": 5386000 }, { "epoch": 15.59, "learning_rate": 4.220698783114117e-05, "loss": 2.2629, "step": 5386500 }, { "epoch": 15.59, "learning_rate": 4.220626418349389e-05, "loss": 2.2547, "step": 5387000 }, { "epoch": 15.59, "learning_rate": 4.220554053584661e-05, "loss": 2.2528, "step": 5387500 }, { "epoch": 15.6, "learning_rate": 4.2204816888199335e-05, "loss": 2.2608, "step": 5388000 }, { "epoch": 15.6, "learning_rate": 4.220409468784735e-05, "loss": 2.242, "step": 5388500 }, { "epoch": 15.6, "learning_rate": 4.220337104020007e-05, "loss": 2.2433, "step": 5389000 }, { "epoch": 15.6, "learning_rate": 4.22026473925528e-05, "loss": 2.2439, "step": 5389500 }, { "epoch": 15.6, "learning_rate": 4.2201923744905524e-05, "loss": 2.2524, "step": 5390000 }, { "epoch": 15.6, "learning_rate": 4.2201200097258246e-05, "loss": 2.234, "step": 5390500 }, { "epoch": 15.6, "learning_rate": 4.220047644961097e-05, "loss": 2.2486, "step": 5391000 }, { "epoch": 15.61, "learning_rate": 4.219975280196369e-05, "loss": 2.2673, "step": 5391500 }, { "epoch": 15.61, "learning_rate": 4.219903060161171e-05, "loss": 2.2397, "step": 5392000 }, { "epoch": 15.61, "learning_rate": 4.2198306953964435e-05, "loss": 2.2631, "step": 5392500 }, { "epoch": 15.61, "learning_rate": 4.219758330631716e-05, "loss": 2.2348, "step": 5393000 }, { "epoch": 15.61, "learning_rate": 4.219685965866988e-05, "loss": 2.252, "step": 5393500 }, { "epoch": 15.61, "learning_rate": 4.2196137458317895e-05, "loss": 2.2342, "step": 5394000 }, { "epoch": 15.61, "learning_rate": 4.219541381067062e-05, "loss": 2.249, "step": 5394500 }, { "epoch": 15.62, "learning_rate": 4.219469016302334e-05, "loss": 2.2708, "step": 5395000 }, { "epoch": 15.62, "learning_rate": 4.219396651537607e-05, "loss": 2.2346, "step": 5395500 }, { "epoch": 15.62, "learning_rate": 4.219324286772879e-05, "loss": 2.2624, "step": 5396000 }, { "epoch": 15.62, "learning_rate": 4.219251922008151e-05, "loss": 2.2627, "step": 5396500 }, { "epoch": 15.62, "learning_rate": 4.219179557243424e-05, "loss": 2.2308, "step": 5397000 }, { "epoch": 15.62, "learning_rate": 4.2191071924786964e-05, "loss": 2.2394, "step": 5397500 }, { "epoch": 15.62, "learning_rate": 4.219034972443498e-05, "loss": 2.2566, "step": 5398000 }, { "epoch": 15.63, "learning_rate": 4.21896260767877e-05, "loss": 2.241, "step": 5398500 }, { "epoch": 15.63, "learning_rate": 4.2188902429140424e-05, "loss": 2.2679, "step": 5399000 }, { "epoch": 15.63, "learning_rate": 4.2188180228788446e-05, "loss": 2.2343, "step": 5399500 }, { "epoch": 15.63, "learning_rate": 4.218745658114117e-05, "loss": 2.2455, "step": 5400000 }, { "epoch": 15.63, "learning_rate": 4.218673293349389e-05, "loss": 2.2644, "step": 5400500 }, { "epoch": 15.63, "learning_rate": 4.218600928584661e-05, "loss": 2.272, "step": 5401000 }, { "epoch": 15.64, "learning_rate": 4.2185285638199335e-05, "loss": 2.2578, "step": 5401500 }, { "epoch": 15.64, "learning_rate": 4.218456199055206e-05, "loss": 2.241, "step": 5402000 }, { "epoch": 15.64, "learning_rate": 4.218383834290478e-05, "loss": 2.2559, "step": 5402500 }, { "epoch": 15.64, "learning_rate": 4.21831146952575e-05, "loss": 2.2622, "step": 5403000 }, { "epoch": 15.64, "learning_rate": 4.2182391047610224e-05, "loss": 2.2191, "step": 5403500 }, { "epoch": 15.64, "learning_rate": 4.218166884725825e-05, "loss": 2.2437, "step": 5404000 }, { "epoch": 15.64, "learning_rate": 4.218094519961097e-05, "loss": 2.2523, "step": 5404500 }, { "epoch": 15.65, "learning_rate": 4.21802215519637e-05, "loss": 2.2406, "step": 5405000 }, { "epoch": 15.65, "learning_rate": 4.217949790431642e-05, "loss": 2.245, "step": 5405500 }, { "epoch": 15.65, "learning_rate": 4.217877425666914e-05, "loss": 2.2699, "step": 5406000 }, { "epoch": 15.65, "learning_rate": 4.2178050609021865e-05, "loss": 2.2443, "step": 5406500 }, { "epoch": 15.65, "learning_rate": 4.217732840866988e-05, "loss": 2.2684, "step": 5407000 }, { "epoch": 15.65, "learning_rate": 4.21766047610226e-05, "loss": 2.2355, "step": 5407500 }, { "epoch": 15.65, "learning_rate": 4.2175881113375325e-05, "loss": 2.2347, "step": 5408000 }, { "epoch": 15.66, "learning_rate": 4.217515746572805e-05, "loss": 2.2545, "step": 5408500 }, { "epoch": 15.66, "learning_rate": 4.217443381808077e-05, "loss": 2.2344, "step": 5409000 }, { "epoch": 15.66, "learning_rate": 4.217371017043349e-05, "loss": 2.2524, "step": 5409500 }, { "epoch": 15.66, "learning_rate": 4.217298652278622e-05, "loss": 2.2674, "step": 5410000 }, { "epoch": 15.66, "learning_rate": 4.217226287513894e-05, "loss": 2.2322, "step": 5410500 }, { "epoch": 15.66, "learning_rate": 4.217154067478696e-05, "loss": 2.2353, "step": 5411000 }, { "epoch": 15.66, "learning_rate": 4.217081702713968e-05, "loss": 2.2487, "step": 5411500 }, { "epoch": 15.67, "learning_rate": 4.217009337949241e-05, "loss": 2.2482, "step": 5412000 }, { "epoch": 15.67, "learning_rate": 4.216936973184513e-05, "loss": 2.2496, "step": 5412500 }, { "epoch": 15.67, "learning_rate": 4.2168646084197854e-05, "loss": 2.2632, "step": 5413000 }, { "epoch": 15.67, "learning_rate": 4.2167922436550576e-05, "loss": 2.2422, "step": 5413500 }, { "epoch": 15.67, "learning_rate": 4.21671987889033e-05, "loss": 2.2535, "step": 5414000 }, { "epoch": 15.67, "learning_rate": 4.216647658855132e-05, "loss": 2.253, "step": 5414500 }, { "epoch": 15.67, "learning_rate": 4.216575294090404e-05, "loss": 2.236, "step": 5415000 }, { "epoch": 15.68, "learning_rate": 4.2165029293256765e-05, "loss": 2.2425, "step": 5415500 }, { "epoch": 15.68, "learning_rate": 4.216430564560949e-05, "loss": 2.2561, "step": 5416000 }, { "epoch": 15.68, "learning_rate": 4.21635834452575e-05, "loss": 2.2406, "step": 5416500 }, { "epoch": 15.68, "learning_rate": 4.216286124490552e-05, "loss": 2.2683, "step": 5417000 }, { "epoch": 15.68, "learning_rate": 4.216213759725825e-05, "loss": 2.2411, "step": 5417500 }, { "epoch": 15.68, "learning_rate": 4.216141394961097e-05, "loss": 2.2494, "step": 5418000 }, { "epoch": 15.68, "learning_rate": 4.216069030196369e-05, "loss": 2.2643, "step": 5418500 }, { "epoch": 15.69, "learning_rate": 4.2159966654316414e-05, "loss": 2.2432, "step": 5419000 }, { "epoch": 15.69, "learning_rate": 4.215924300666914e-05, "loss": 2.2565, "step": 5419500 }, { "epoch": 15.69, "learning_rate": 4.2158519359021865e-05, "loss": 2.247, "step": 5420000 }, { "epoch": 15.69, "learning_rate": 4.215779571137459e-05, "loss": 2.2362, "step": 5420500 }, { "epoch": 15.69, "learning_rate": 4.215707206372731e-05, "loss": 2.2629, "step": 5421000 }, { "epoch": 15.69, "learning_rate": 4.215634841608003e-05, "loss": 2.2438, "step": 5421500 }, { "epoch": 15.69, "learning_rate": 4.2155624768432754e-05, "loss": 2.2747, "step": 5422000 }, { "epoch": 15.7, "learning_rate": 4.2154901120785477e-05, "loss": 2.2758, "step": 5422500 }, { "epoch": 15.7, "learning_rate": 4.21541774731382e-05, "loss": 2.265, "step": 5423000 }, { "epoch": 15.7, "learning_rate": 4.215345527278622e-05, "loss": 2.2696, "step": 5423500 }, { "epoch": 15.7, "learning_rate": 4.215273162513894e-05, "loss": 2.2501, "step": 5424000 }, { "epoch": 15.7, "learning_rate": 4.2152007977491666e-05, "loss": 2.2673, "step": 5424500 }, { "epoch": 15.7, "learning_rate": 4.215128432984439e-05, "loss": 2.2452, "step": 5425000 }, { "epoch": 15.7, "learning_rate": 4.215056068219711e-05, "loss": 2.2325, "step": 5425500 }, { "epoch": 15.71, "learning_rate": 4.214983703454983e-05, "loss": 2.2294, "step": 5426000 }, { "epoch": 15.71, "learning_rate": 4.214911483419785e-05, "loss": 2.2626, "step": 5426500 }, { "epoch": 15.71, "learning_rate": 4.214839118655058e-05, "loss": 2.2511, "step": 5427000 }, { "epoch": 15.71, "learning_rate": 4.21476675389033e-05, "loss": 2.2521, "step": 5427500 }, { "epoch": 15.71, "learning_rate": 4.214694389125602e-05, "loss": 2.2495, "step": 5428000 }, { "epoch": 15.71, "learning_rate": 4.214622313819934e-05, "loss": 2.2151, "step": 5428500 }, { "epoch": 15.71, "learning_rate": 4.214549949055206e-05, "loss": 2.2449, "step": 5429000 }, { "epoch": 15.72, "learning_rate": 4.214477584290478e-05, "loss": 2.2595, "step": 5429500 }, { "epoch": 15.72, "learning_rate": 4.2144052195257504e-05, "loss": 2.2451, "step": 5430000 }, { "epoch": 15.72, "learning_rate": 4.2143328547610226e-05, "loss": 2.2594, "step": 5430500 }, { "epoch": 15.72, "learning_rate": 4.214260489996295e-05, "loss": 2.2562, "step": 5431000 }, { "epoch": 15.72, "learning_rate": 4.214188125231567e-05, "loss": 2.227, "step": 5431500 }, { "epoch": 15.72, "learning_rate": 4.21411576046684e-05, "loss": 2.2284, "step": 5432000 }, { "epoch": 15.72, "learning_rate": 4.214043395702112e-05, "loss": 2.2523, "step": 5432500 }, { "epoch": 15.73, "learning_rate": 4.2139710309373844e-05, "loss": 2.2436, "step": 5433000 }, { "epoch": 15.73, "learning_rate": 4.2138986661726566e-05, "loss": 2.2418, "step": 5433500 }, { "epoch": 15.73, "learning_rate": 4.2138263014079295e-05, "loss": 2.2504, "step": 5434000 }, { "epoch": 15.73, "learning_rate": 4.213753936643202e-05, "loss": 2.2412, "step": 5434500 }, { "epoch": 15.73, "learning_rate": 4.213681716608003e-05, "loss": 2.2949, "step": 5435000 }, { "epoch": 15.73, "learning_rate": 4.213609641302335e-05, "loss": 2.2543, "step": 5435500 }, { "epoch": 15.73, "learning_rate": 4.213537276537607e-05, "loss": 2.2417, "step": 5436000 }, { "epoch": 15.74, "learning_rate": 4.213464911772879e-05, "loss": 2.235, "step": 5436500 }, { "epoch": 15.74, "learning_rate": 4.2133925470081515e-05, "loss": 2.2489, "step": 5437000 }, { "epoch": 15.74, "learning_rate": 4.213320182243424e-05, "loss": 2.2498, "step": 5437500 }, { "epoch": 15.74, "learning_rate": 4.213247817478696e-05, "loss": 2.2597, "step": 5438000 }, { "epoch": 15.74, "learning_rate": 4.213175452713968e-05, "loss": 2.2515, "step": 5438500 }, { "epoch": 15.74, "learning_rate": 4.2131030879492404e-05, "loss": 2.2186, "step": 5439000 }, { "epoch": 15.75, "learning_rate": 4.2130307231845126e-05, "loss": 2.2589, "step": 5439500 }, { "epoch": 15.75, "learning_rate": 4.212958503149315e-05, "loss": 2.2736, "step": 5440000 }, { "epoch": 15.75, "learning_rate": 4.212886138384587e-05, "loss": 2.2567, "step": 5440500 }, { "epoch": 15.75, "learning_rate": 4.212813773619859e-05, "loss": 2.2397, "step": 5441000 }, { "epoch": 15.75, "learning_rate": 4.2127414088551315e-05, "loss": 2.2305, "step": 5441500 }, { "epoch": 15.75, "learning_rate": 4.212669188819934e-05, "loss": 2.2441, "step": 5442000 }, { "epoch": 15.75, "learning_rate": 4.212596824055206e-05, "loss": 2.2472, "step": 5442500 }, { "epoch": 15.76, "learning_rate": 4.212524459290478e-05, "loss": 2.2555, "step": 5443000 }, { "epoch": 15.76, "learning_rate": 4.2124520945257504e-05, "loss": 2.2638, "step": 5443500 }, { "epoch": 15.76, "learning_rate": 4.212379729761023e-05, "loss": 2.2231, "step": 5444000 }, { "epoch": 15.76, "learning_rate": 4.212307509725825e-05, "loss": 2.2716, "step": 5444500 }, { "epoch": 15.76, "learning_rate": 4.212235144961097e-05, "loss": 2.2513, "step": 5445000 }, { "epoch": 15.76, "learning_rate": 4.2121627801963694e-05, "loss": 2.2559, "step": 5445500 }, { "epoch": 15.76, "learning_rate": 4.2120904154316416e-05, "loss": 2.2573, "step": 5446000 }, { "epoch": 15.77, "learning_rate": 4.212018050666914e-05, "loss": 2.2642, "step": 5446500 }, { "epoch": 15.77, "learning_rate": 4.211945685902186e-05, "loss": 2.2487, "step": 5447000 }, { "epoch": 15.77, "learning_rate": 4.2118734658669876e-05, "loss": 2.2218, "step": 5447500 }, { "epoch": 15.77, "learning_rate": 4.21180110110226e-05, "loss": 2.2658, "step": 5448000 }, { "epoch": 15.77, "learning_rate": 4.211728881067062e-05, "loss": 2.2465, "step": 5448500 }, { "epoch": 15.77, "learning_rate": 4.211656516302334e-05, "loss": 2.2617, "step": 5449000 }, { "epoch": 15.77, "learning_rate": 4.211584151537607e-05, "loss": 2.2541, "step": 5449500 }, { "epoch": 15.78, "learning_rate": 4.2115117867728794e-05, "loss": 2.2453, "step": 5450000 }, { "epoch": 15.78, "learning_rate": 4.2114394220081516e-05, "loss": 2.2445, "step": 5450500 }, { "epoch": 15.78, "learning_rate": 4.211367201972953e-05, "loss": 2.2454, "step": 5451000 }, { "epoch": 15.78, "learning_rate": 4.2112948372082254e-05, "loss": 2.2674, "step": 5451500 }, { "epoch": 15.78, "learning_rate": 4.2112224724434976e-05, "loss": 2.238, "step": 5452000 }, { "epoch": 15.78, "learning_rate": 4.21115010767877e-05, "loss": 2.2449, "step": 5452500 }, { "epoch": 15.78, "learning_rate": 4.211077742914043e-05, "loss": 2.2425, "step": 5453000 }, { "epoch": 15.79, "learning_rate": 4.211005378149315e-05, "loss": 2.2528, "step": 5453500 }, { "epoch": 15.79, "learning_rate": 4.210933013384587e-05, "loss": 2.2561, "step": 5454000 }, { "epoch": 15.79, "learning_rate": 4.2108606486198594e-05, "loss": 2.2358, "step": 5454500 }, { "epoch": 15.79, "learning_rate": 4.210788428584661e-05, "loss": 2.2392, "step": 5455000 }, { "epoch": 15.79, "learning_rate": 4.210716063819933e-05, "loss": 2.2384, "step": 5455500 }, { "epoch": 15.79, "learning_rate": 4.2106436990552054e-05, "loss": 2.234, "step": 5456000 }, { "epoch": 15.79, "learning_rate": 4.2105713342904776e-05, "loss": 2.2703, "step": 5456500 }, { "epoch": 15.8, "learning_rate": 4.2104989695257505e-05, "loss": 2.2379, "step": 5457000 }, { "epoch": 15.8, "learning_rate": 4.210426604761023e-05, "loss": 2.2692, "step": 5457500 }, { "epoch": 15.8, "learning_rate": 4.210354239996295e-05, "loss": 2.2468, "step": 5458000 }, { "epoch": 15.8, "learning_rate": 4.210282019961097e-05, "loss": 2.2491, "step": 5458500 }, { "epoch": 15.8, "learning_rate": 4.2102096551963694e-05, "loss": 2.2371, "step": 5459000 }, { "epoch": 15.8, "learning_rate": 4.2101372904316417e-05, "loss": 2.2607, "step": 5459500 }, { "epoch": 15.8, "learning_rate": 4.210064925666914e-05, "loss": 2.2503, "step": 5460000 }, { "epoch": 15.81, "learning_rate": 4.209992560902186e-05, "loss": 2.2643, "step": 5460500 }, { "epoch": 15.81, "learning_rate": 4.209920196137458e-05, "loss": 2.2504, "step": 5461000 }, { "epoch": 15.81, "learning_rate": 4.2098478313727305e-05, "loss": 2.2492, "step": 5461500 }, { "epoch": 15.81, "learning_rate": 4.209775466608003e-05, "loss": 2.2122, "step": 5462000 }, { "epoch": 15.81, "learning_rate": 4.209703101843275e-05, "loss": 2.2601, "step": 5462500 }, { "epoch": 15.81, "learning_rate": 4.209630737078548e-05, "loss": 2.2287, "step": 5463000 }, { "epoch": 15.81, "learning_rate": 4.20955837231382e-05, "loss": 2.2225, "step": 5463500 }, { "epoch": 15.82, "learning_rate": 4.209486152278622e-05, "loss": 2.2486, "step": 5464000 }, { "epoch": 15.82, "learning_rate": 4.2094137875138946e-05, "loss": 2.2434, "step": 5464500 }, { "epoch": 15.82, "learning_rate": 4.209341422749167e-05, "loss": 2.2488, "step": 5465000 }, { "epoch": 15.82, "learning_rate": 4.209269057984439e-05, "loss": 2.2572, "step": 5465500 }, { "epoch": 15.82, "learning_rate": 4.2091969826787706e-05, "loss": 2.2645, "step": 5466000 }, { "epoch": 15.82, "learning_rate": 4.209124617914043e-05, "loss": 2.2163, "step": 5466500 }, { "epoch": 15.82, "learning_rate": 4.209052253149315e-05, "loss": 2.2337, "step": 5467000 }, { "epoch": 15.83, "learning_rate": 4.208979888384587e-05, "loss": 2.2605, "step": 5467500 }, { "epoch": 15.83, "learning_rate": 4.2089075236198595e-05, "loss": 2.2427, "step": 5468000 }, { "epoch": 15.83, "learning_rate": 4.208835158855132e-05, "loss": 2.2834, "step": 5468500 }, { "epoch": 15.83, "learning_rate": 4.208762794090404e-05, "loss": 2.2569, "step": 5469000 }, { "epoch": 15.83, "learning_rate": 4.208690429325676e-05, "loss": 2.2458, "step": 5469500 }, { "epoch": 15.83, "learning_rate": 4.2086180645609484e-05, "loss": 2.2688, "step": 5470000 }, { "epoch": 15.83, "learning_rate": 4.2085456997962206e-05, "loss": 2.2413, "step": 5470500 }, { "epoch": 15.84, "learning_rate": 4.208473479761023e-05, "loss": 2.218, "step": 5471000 }, { "epoch": 15.84, "learning_rate": 4.208401114996295e-05, "loss": 2.2557, "step": 5471500 }, { "epoch": 15.84, "learning_rate": 4.208328750231568e-05, "loss": 2.247, "step": 5472000 }, { "epoch": 15.84, "learning_rate": 4.20825638546684e-05, "loss": 2.2346, "step": 5472500 }, { "epoch": 15.84, "learning_rate": 4.208184165431642e-05, "loss": 2.2417, "step": 5473000 }, { "epoch": 15.84, "learning_rate": 4.208111800666914e-05, "loss": 2.2592, "step": 5473500 }, { "epoch": 15.84, "learning_rate": 4.208039435902186e-05, "loss": 2.2451, "step": 5474000 }, { "epoch": 15.85, "learning_rate": 4.2079670711374584e-05, "loss": 2.2407, "step": 5474500 }, { "epoch": 15.85, "learning_rate": 4.2078947063727306e-05, "loss": 2.2243, "step": 5475000 }, { "epoch": 15.85, "learning_rate": 4.207822341608003e-05, "loss": 2.23, "step": 5475500 }, { "epoch": 15.85, "learning_rate": 4.207749976843276e-05, "loss": 2.2402, "step": 5476000 }, { "epoch": 15.85, "learning_rate": 4.207677612078548e-05, "loss": 2.2472, "step": 5476500 }, { "epoch": 15.85, "learning_rate": 4.20760524731382e-05, "loss": 2.2678, "step": 5477000 }, { "epoch": 15.86, "learning_rate": 4.2075328825490924e-05, "loss": 2.248, "step": 5477500 }, { "epoch": 15.86, "learning_rate": 4.2074605177843646e-05, "loss": 2.2401, "step": 5478000 }, { "epoch": 15.86, "learning_rate": 4.207388153019637e-05, "loss": 2.2261, "step": 5478500 }, { "epoch": 15.86, "learning_rate": 4.207315932984439e-05, "loss": 2.2438, "step": 5479000 }, { "epoch": 15.86, "learning_rate": 4.207243568219711e-05, "loss": 2.2614, "step": 5479500 }, { "epoch": 15.86, "learning_rate": 4.2071712034549835e-05, "loss": 2.2648, "step": 5480000 }, { "epoch": 15.86, "learning_rate": 4.207098838690256e-05, "loss": 2.254, "step": 5480500 }, { "epoch": 15.87, "learning_rate": 4.207026473925528e-05, "loss": 2.2682, "step": 5481000 }, { "epoch": 15.87, "learning_rate": 4.206954109160801e-05, "loss": 2.2476, "step": 5481500 }, { "epoch": 15.87, "learning_rate": 4.206881744396073e-05, "loss": 2.2565, "step": 5482000 }, { "epoch": 15.87, "learning_rate": 4.2068093796313453e-05, "loss": 2.254, "step": 5482500 }, { "epoch": 15.87, "learning_rate": 4.206737159596147e-05, "loss": 2.2478, "step": 5483000 }, { "epoch": 15.87, "learning_rate": 4.206664794831419e-05, "loss": 2.2479, "step": 5483500 }, { "epoch": 15.87, "learning_rate": 4.2065924300666913e-05, "loss": 2.2543, "step": 5484000 }, { "epoch": 15.88, "learning_rate": 4.2065200653019636e-05, "loss": 2.2552, "step": 5484500 }, { "epoch": 15.88, "learning_rate": 4.206447700537236e-05, "loss": 2.2372, "step": 5485000 }, { "epoch": 15.88, "learning_rate": 4.206375480502038e-05, "loss": 2.2696, "step": 5485500 }, { "epoch": 15.88, "learning_rate": 4.20630311573731e-05, "loss": 2.2638, "step": 5486000 }, { "epoch": 15.88, "learning_rate": 4.206230750972583e-05, "loss": 2.2537, "step": 5486500 }, { "epoch": 15.88, "learning_rate": 4.2061583862078554e-05, "loss": 2.2389, "step": 5487000 }, { "epoch": 15.88, "learning_rate": 4.206086166172657e-05, "loss": 2.2535, "step": 5487500 }, { "epoch": 15.89, "learning_rate": 4.2060139461374585e-05, "loss": 2.2582, "step": 5488000 }, { "epoch": 15.89, "learning_rate": 4.205941581372731e-05, "loss": 2.2388, "step": 5488500 }, { "epoch": 15.89, "learning_rate": 4.205869216608003e-05, "loss": 2.2442, "step": 5489000 }, { "epoch": 15.89, "learning_rate": 4.205796851843276e-05, "loss": 2.2426, "step": 5489500 }, { "epoch": 15.89, "learning_rate": 4.2057246318080774e-05, "loss": 2.2585, "step": 5490000 }, { "epoch": 15.89, "learning_rate": 4.2056522670433496e-05, "loss": 2.2389, "step": 5490500 }, { "epoch": 15.89, "learning_rate": 4.205579902278622e-05, "loss": 2.2418, "step": 5491000 }, { "epoch": 15.9, "learning_rate": 4.205507537513894e-05, "loss": 2.2343, "step": 5491500 }, { "epoch": 15.9, "learning_rate": 4.205435172749166e-05, "loss": 2.2492, "step": 5492000 }, { "epoch": 15.9, "learning_rate": 4.2053628079844385e-05, "loss": 2.2435, "step": 5492500 }, { "epoch": 15.9, "learning_rate": 4.205290443219711e-05, "loss": 2.2508, "step": 5493000 }, { "epoch": 15.9, "learning_rate": 4.205218078454983e-05, "loss": 2.2423, "step": 5493500 }, { "epoch": 15.9, "learning_rate": 4.205145858419785e-05, "loss": 2.2551, "step": 5494000 }, { "epoch": 15.9, "learning_rate": 4.205073493655058e-05, "loss": 2.2537, "step": 5494500 }, { "epoch": 15.91, "learning_rate": 4.20500112889033e-05, "loss": 2.2314, "step": 5495000 }, { "epoch": 15.91, "learning_rate": 4.2049287641256025e-05, "loss": 2.2372, "step": 5495500 }, { "epoch": 15.91, "learning_rate": 4.2048566888199334e-05, "loss": 2.2523, "step": 5496000 }, { "epoch": 15.91, "learning_rate": 4.2047843240552056e-05, "loss": 2.2422, "step": 5496500 }, { "epoch": 15.91, "learning_rate": 4.2047119592904785e-05, "loss": 2.239, "step": 5497000 }, { "epoch": 15.91, "learning_rate": 4.204639594525751e-05, "loss": 2.2649, "step": 5497500 }, { "epoch": 15.91, "learning_rate": 4.204567229761023e-05, "loss": 2.2652, "step": 5498000 }, { "epoch": 15.92, "learning_rate": 4.204494864996295e-05, "loss": 2.2713, "step": 5498500 }, { "epoch": 15.92, "learning_rate": 4.2044225002315674e-05, "loss": 2.2363, "step": 5499000 }, { "epoch": 15.92, "learning_rate": 4.2043501354668397e-05, "loss": 2.235, "step": 5499500 }, { "epoch": 15.92, "learning_rate": 4.204277770702112e-05, "loss": 2.2549, "step": 5500000 }, { "epoch": 15.92, "learning_rate": 4.2042055506669134e-05, "loss": 2.2437, "step": 5500500 }, { "epoch": 15.92, "learning_rate": 4.204133185902186e-05, "loss": 2.2637, "step": 5501000 }, { "epoch": 15.92, "learning_rate": 4.2040608211374586e-05, "loss": 2.2569, "step": 5501500 }, { "epoch": 15.93, "learning_rate": 4.203988456372731e-05, "loss": 2.2363, "step": 5502000 }, { "epoch": 15.93, "learning_rate": 4.203916091608004e-05, "loss": 2.2667, "step": 5502500 }, { "epoch": 15.93, "learning_rate": 4.203843726843276e-05, "loss": 2.2735, "step": 5503000 }, { "epoch": 15.93, "learning_rate": 4.2037715068080775e-05, "loss": 2.2428, "step": 5503500 }, { "epoch": 15.93, "learning_rate": 4.20369914204335e-05, "loss": 2.2639, "step": 5504000 }, { "epoch": 15.93, "learning_rate": 4.203626777278622e-05, "loss": 2.2634, "step": 5504500 }, { "epoch": 15.93, "learning_rate": 4.203554412513894e-05, "loss": 2.2413, "step": 5505000 }, { "epoch": 15.94, "learning_rate": 4.2034820477491664e-05, "loss": 2.2721, "step": 5505500 }, { "epoch": 15.94, "learning_rate": 4.2034096829844386e-05, "loss": 2.2438, "step": 5506000 }, { "epoch": 15.94, "learning_rate": 4.203337462949241e-05, "loss": 2.2481, "step": 5506500 }, { "epoch": 15.94, "learning_rate": 4.203265098184513e-05, "loss": 2.2613, "step": 5507000 }, { "epoch": 15.94, "learning_rate": 4.203192733419785e-05, "loss": 2.2426, "step": 5507500 }, { "epoch": 15.94, "learning_rate": 4.2031203686550575e-05, "loss": 2.2584, "step": 5508000 }, { "epoch": 15.94, "learning_rate": 4.203048148619859e-05, "loss": 2.2542, "step": 5508500 }, { "epoch": 15.95, "learning_rate": 4.202975783855131e-05, "loss": 2.2603, "step": 5509000 }, { "epoch": 15.95, "learning_rate": 4.202903419090404e-05, "loss": 2.2483, "step": 5509500 }, { "epoch": 15.95, "learning_rate": 4.2028310543256764e-05, "loss": 2.2544, "step": 5510000 }, { "epoch": 15.95, "learning_rate": 4.2027588342904786e-05, "loss": 2.2317, "step": 5510500 }, { "epoch": 15.95, "learning_rate": 4.202686469525751e-05, "loss": 2.239, "step": 5511000 }, { "epoch": 15.95, "learning_rate": 4.202614104761023e-05, "loss": 2.2304, "step": 5511500 }, { "epoch": 15.95, "learning_rate": 4.202541739996295e-05, "loss": 2.2476, "step": 5512000 }, { "epoch": 15.96, "learning_rate": 4.202469519961097e-05, "loss": 2.2388, "step": 5512500 }, { "epoch": 15.96, "learning_rate": 4.202397155196369e-05, "loss": 2.2334, "step": 5513000 }, { "epoch": 15.96, "learning_rate": 4.202324790431641e-05, "loss": 2.2499, "step": 5513500 }, { "epoch": 15.96, "learning_rate": 4.2022524256669135e-05, "loss": 2.247, "step": 5514000 }, { "epoch": 15.96, "learning_rate": 4.202180060902186e-05, "loss": 2.2459, "step": 5514500 }, { "epoch": 15.96, "learning_rate": 4.2021076961374586e-05, "loss": 2.237, "step": 5515000 }, { "epoch": 15.97, "learning_rate": 4.202035331372731e-05, "loss": 2.247, "step": 5515500 }, { "epoch": 15.97, "learning_rate": 4.201962966608003e-05, "loss": 2.248, "step": 5516000 }, { "epoch": 15.97, "learning_rate": 4.201890601843276e-05, "loss": 2.2628, "step": 5516500 }, { "epoch": 15.97, "learning_rate": 4.201818237078548e-05, "loss": 2.2333, "step": 5517000 }, { "epoch": 15.97, "learning_rate": 4.2017458723138204e-05, "loss": 2.2472, "step": 5517500 }, { "epoch": 15.97, "learning_rate": 4.2016735075490927e-05, "loss": 2.2611, "step": 5518000 }, { "epoch": 15.97, "learning_rate": 4.201601287513894e-05, "loss": 2.2372, "step": 5518500 }, { "epoch": 15.98, "learning_rate": 4.2015289227491664e-05, "loss": 2.258, "step": 5519000 }, { "epoch": 15.98, "learning_rate": 4.201456557984439e-05, "loss": 2.255, "step": 5519500 }, { "epoch": 15.98, "learning_rate": 4.201384193219711e-05, "loss": 2.2459, "step": 5520000 }, { "epoch": 15.98, "learning_rate": 4.201311828454984e-05, "loss": 2.2439, "step": 5520500 }, { "epoch": 15.98, "learning_rate": 4.2012396084197853e-05, "loss": 2.235, "step": 5521000 }, { "epoch": 15.98, "learning_rate": 4.2011672436550576e-05, "loss": 2.2701, "step": 5521500 }, { "epoch": 15.98, "learning_rate": 4.20109487889033e-05, "loss": 2.2511, "step": 5522000 }, { "epoch": 15.99, "learning_rate": 4.201022514125602e-05, "loss": 2.256, "step": 5522500 }, { "epoch": 15.99, "learning_rate": 4.200950149360874e-05, "loss": 2.2592, "step": 5523000 }, { "epoch": 15.99, "learning_rate": 4.2008777845961465e-05, "loss": 2.2268, "step": 5523500 }, { "epoch": 15.99, "learning_rate": 4.200805564560949e-05, "loss": 2.2613, "step": 5524000 }, { "epoch": 15.99, "learning_rate": 4.2007331997962216e-05, "loss": 2.235, "step": 5524500 }, { "epoch": 15.99, "learning_rate": 4.200660835031494e-05, "loss": 2.2299, "step": 5525000 }, { "epoch": 15.99, "learning_rate": 4.200588470266766e-05, "loss": 2.2464, "step": 5525500 }, { "epoch": 16.0, "learning_rate": 4.200516105502038e-05, "loss": 2.254, "step": 5526000 }, { "epoch": 16.0, "learning_rate": 4.20044388546684e-05, "loss": 2.252, "step": 5526500 }, { "epoch": 16.0, "learning_rate": 4.200371520702112e-05, "loss": 2.252, "step": 5527000 }, { "epoch": 16.0, "learning_rate": 4.2002993006669136e-05, "loss": 2.2268, "step": 5527500 }, { "epoch": 16.0, "eval_accuracy": 0.655441833614539, "eval_accuracy_mlm": 0.6187079602036782, "eval_accuracy_nsp": 0.8525960961167407, "eval_loss": 2.2606470584869385, "eval_runtime": 330.7167, "eval_samples_per_second": 1319.516, "eval_steps_per_second": 54.981, "step": 5527552 }, { "epoch": 16.0, "learning_rate": 4.2002269359021865e-05, "loss": 2.2287, "step": 5528000 }, { "epoch": 16.0, "learning_rate": 4.200154571137459e-05, "loss": 2.221, "step": 5528500 }, { "epoch": 16.0, "learning_rate": 4.200082206372731e-05, "loss": 2.2238, "step": 5529000 }, { "epoch": 16.01, "learning_rate": 4.200009841608003e-05, "loss": 2.2213, "step": 5529500 }, { "epoch": 16.01, "learning_rate": 4.1999374768432754e-05, "loss": 2.2253, "step": 5530000 }, { "epoch": 16.01, "learning_rate": 4.1998651120785476e-05, "loss": 2.2198, "step": 5530500 }, { "epoch": 16.01, "learning_rate": 4.19979274731382e-05, "loss": 2.2025, "step": 5531000 }, { "epoch": 16.01, "learning_rate": 4.1997205272786214e-05, "loss": 2.22, "step": 5531500 }, { "epoch": 16.01, "learning_rate": 4.1996483072434236e-05, "loss": 2.2223, "step": 5532000 }, { "epoch": 16.01, "learning_rate": 4.1995759424786965e-05, "loss": 2.2348, "step": 5532500 }, { "epoch": 16.02, "learning_rate": 4.199503577713969e-05, "loss": 2.2247, "step": 5533000 }, { "epoch": 16.02, "learning_rate": 4.199431212949241e-05, "loss": 2.223, "step": 5533500 }, { "epoch": 16.02, "learning_rate": 4.199358848184513e-05, "loss": 2.2164, "step": 5534000 }, { "epoch": 16.02, "learning_rate": 4.1992864834197854e-05, "loss": 2.1993, "step": 5534500 }, { "epoch": 16.02, "learning_rate": 4.1992141186550576e-05, "loss": 2.2362, "step": 5535000 }, { "epoch": 16.02, "learning_rate": 4.19914175389033e-05, "loss": 2.24, "step": 5535500 }, { "epoch": 16.02, "learning_rate": 4.199069389125602e-05, "loss": 2.2435, "step": 5536000 }, { "epoch": 16.03, "learning_rate": 4.1989971690904036e-05, "loss": 2.2546, "step": 5536500 }, { "epoch": 16.03, "learning_rate": 4.1989248043256765e-05, "loss": 2.2314, "step": 5537000 }, { "epoch": 16.03, "learning_rate": 4.198852439560949e-05, "loss": 2.2112, "step": 5537500 }, { "epoch": 16.03, "learning_rate": 4.198780074796221e-05, "loss": 2.2135, "step": 5538000 }, { "epoch": 16.03, "learning_rate": 4.198707710031493e-05, "loss": 2.2457, "step": 5538500 }, { "epoch": 16.03, "learning_rate": 4.198635345266766e-05, "loss": 2.2064, "step": 5539000 }, { "epoch": 16.03, "learning_rate": 4.1985629805020383e-05, "loss": 2.2249, "step": 5539500 }, { "epoch": 16.04, "learning_rate": 4.1984906157373106e-05, "loss": 2.2235, "step": 5540000 }, { "epoch": 16.04, "learning_rate": 4.198418250972583e-05, "loss": 2.2096, "step": 5540500 }, { "epoch": 16.04, "learning_rate": 4.1983460309373843e-05, "loss": 2.2385, "step": 5541000 }, { "epoch": 16.04, "learning_rate": 4.1982736661726566e-05, "loss": 2.2394, "step": 5541500 }, { "epoch": 16.04, "learning_rate": 4.198201301407929e-05, "loss": 2.2266, "step": 5542000 }, { "epoch": 16.04, "learning_rate": 4.198128936643202e-05, "loss": 2.2331, "step": 5542500 }, { "epoch": 16.04, "learning_rate": 4.198056716608003e-05, "loss": 2.2257, "step": 5543000 }, { "epoch": 16.05, "learning_rate": 4.1979843518432755e-05, "loss": 2.2324, "step": 5543500 }, { "epoch": 16.05, "learning_rate": 4.197912131808077e-05, "loss": 2.2309, "step": 5544000 }, { "epoch": 16.05, "learning_rate": 4.197839767043349e-05, "loss": 2.2203, "step": 5544500 }, { "epoch": 16.05, "learning_rate": 4.197767691737681e-05, "loss": 2.2191, "step": 5545000 }, { "epoch": 16.05, "learning_rate": 4.197695326972953e-05, "loss": 2.2322, "step": 5545500 }, { "epoch": 16.05, "learning_rate": 4.197622962208225e-05, "loss": 2.2247, "step": 5546000 }, { "epoch": 16.05, "learning_rate": 4.197550742173027e-05, "loss": 2.241, "step": 5546500 }, { "epoch": 16.06, "learning_rate": 4.197478377408299e-05, "loss": 2.2305, "step": 5547000 }, { "epoch": 16.06, "learning_rate": 4.197406012643572e-05, "loss": 2.226, "step": 5547500 }, { "epoch": 16.06, "learning_rate": 4.197333647878844e-05, "loss": 2.238, "step": 5548000 }, { "epoch": 16.06, "learning_rate": 4.1972612831141164e-05, "loss": 2.2386, "step": 5548500 }, { "epoch": 16.06, "learning_rate": 4.197188918349389e-05, "loss": 2.2195, "step": 5549000 }, { "epoch": 16.06, "learning_rate": 4.1971165535846615e-05, "loss": 2.2467, "step": 5549500 }, { "epoch": 16.06, "learning_rate": 4.197044188819934e-05, "loss": 2.2445, "step": 5550000 }, { "epoch": 16.07, "learning_rate": 4.196971824055206e-05, "loss": 2.229, "step": 5550500 }, { "epoch": 16.07, "learning_rate": 4.196899459290478e-05, "loss": 2.2299, "step": 5551000 }, { "epoch": 16.07, "learning_rate": 4.1968270945257504e-05, "loss": 2.218, "step": 5551500 }, { "epoch": 16.07, "learning_rate": 4.1967547297610226e-05, "loss": 2.2262, "step": 5552000 }, { "epoch": 16.07, "learning_rate": 4.196682509725824e-05, "loss": 2.2215, "step": 5552500 }, { "epoch": 16.07, "learning_rate": 4.1966101449610964e-05, "loss": 2.2128, "step": 5553000 }, { "epoch": 16.08, "learning_rate": 4.196537780196369e-05, "loss": 2.25, "step": 5553500 }, { "epoch": 16.08, "learning_rate": 4.1964654154316415e-05, "loss": 2.2169, "step": 5554000 }, { "epoch": 16.08, "learning_rate": 4.1963930506669144e-05, "loss": 2.2652, "step": 5554500 }, { "epoch": 16.08, "learning_rate": 4.1963206859021867e-05, "loss": 2.2226, "step": 5555000 }, { "epoch": 16.08, "learning_rate": 4.196248321137459e-05, "loss": 2.2309, "step": 5555500 }, { "epoch": 16.08, "learning_rate": 4.196175956372731e-05, "loss": 2.21, "step": 5556000 }, { "epoch": 16.08, "learning_rate": 4.196103591608003e-05, "loss": 2.2112, "step": 5556500 }, { "epoch": 16.09, "learning_rate": 4.1960312268432756e-05, "loss": 2.2068, "step": 5557000 }, { "epoch": 16.09, "learning_rate": 4.195958862078548e-05, "loss": 2.2362, "step": 5557500 }, { "epoch": 16.09, "learning_rate": 4.195886642043349e-05, "loss": 2.2106, "step": 5558000 }, { "epoch": 16.09, "learning_rate": 4.1958142772786216e-05, "loss": 2.2272, "step": 5558500 }, { "epoch": 16.09, "learning_rate": 4.1957419125138945e-05, "loss": 2.2399, "step": 5559000 }, { "epoch": 16.09, "learning_rate": 4.195669692478696e-05, "loss": 2.2472, "step": 5559500 }, { "epoch": 16.09, "learning_rate": 4.195597327713968e-05, "loss": 2.21, "step": 5560000 }, { "epoch": 16.1, "learning_rate": 4.1955249629492405e-05, "loss": 2.1966, "step": 5560500 }, { "epoch": 16.1, "learning_rate": 4.195452598184513e-05, "loss": 2.2274, "step": 5561000 }, { "epoch": 16.1, "learning_rate": 4.195380233419785e-05, "loss": 2.2563, "step": 5561500 }, { "epoch": 16.1, "learning_rate": 4.195307868655058e-05, "loss": 2.2069, "step": 5562000 }, { "epoch": 16.1, "learning_rate": 4.19523550389033e-05, "loss": 2.2139, "step": 5562500 }, { "epoch": 16.1, "learning_rate": 4.195163139125602e-05, "loss": 2.2167, "step": 5563000 }, { "epoch": 16.1, "learning_rate": 4.1950907743608745e-05, "loss": 2.2355, "step": 5563500 }, { "epoch": 16.11, "learning_rate": 4.195018554325677e-05, "loss": 2.2251, "step": 5564000 }, { "epoch": 16.11, "learning_rate": 4.194946334290478e-05, "loss": 2.2395, "step": 5564500 }, { "epoch": 16.11, "learning_rate": 4.1948739695257505e-05, "loss": 2.2062, "step": 5565000 }, { "epoch": 16.11, "learning_rate": 4.194801604761023e-05, "loss": 2.2331, "step": 5565500 }, { "epoch": 16.11, "learning_rate": 4.194729239996295e-05, "loss": 2.2285, "step": 5566000 }, { "epoch": 16.11, "learning_rate": 4.194656875231567e-05, "loss": 2.244, "step": 5566500 }, { "epoch": 16.11, "learning_rate": 4.1945845104668394e-05, "loss": 2.2575, "step": 5567000 }, { "epoch": 16.12, "learning_rate": 4.1945121457021116e-05, "loss": 2.2243, "step": 5567500 }, { "epoch": 16.12, "learning_rate": 4.1944397809373845e-05, "loss": 2.233, "step": 5568000 }, { "epoch": 16.12, "learning_rate": 4.194367416172657e-05, "loss": 2.2259, "step": 5568500 }, { "epoch": 16.12, "learning_rate": 4.1942950514079296e-05, "loss": 2.2495, "step": 5569000 }, { "epoch": 16.12, "learning_rate": 4.194222686643202e-05, "loss": 2.2121, "step": 5569500 }, { "epoch": 16.12, "learning_rate": 4.194150321878474e-05, "loss": 2.2291, "step": 5570000 }, { "epoch": 16.12, "learning_rate": 4.1940781018432756e-05, "loss": 2.2322, "step": 5570500 }, { "epoch": 16.13, "learning_rate": 4.194005737078548e-05, "loss": 2.2298, "step": 5571000 }, { "epoch": 16.13, "learning_rate": 4.19393337231382e-05, "loss": 2.2197, "step": 5571500 }, { "epoch": 16.13, "learning_rate": 4.193861007549092e-05, "loss": 2.2271, "step": 5572000 }, { "epoch": 16.13, "learning_rate": 4.1937886427843645e-05, "loss": 2.2166, "step": 5572500 }, { "epoch": 16.13, "learning_rate": 4.193716278019637e-05, "loss": 2.2321, "step": 5573000 }, { "epoch": 16.13, "learning_rate": 4.1936439132549096e-05, "loss": 2.2215, "step": 5573500 }, { "epoch": 16.13, "learning_rate": 4.193571548490182e-05, "loss": 2.2013, "step": 5574000 }, { "epoch": 16.14, "learning_rate": 4.193499183725454e-05, "loss": 2.2184, "step": 5574500 }, { "epoch": 16.14, "learning_rate": 4.193426818960726e-05, "loss": 2.2162, "step": 5575000 }, { "epoch": 16.14, "learning_rate": 4.193354598925528e-05, "loss": 2.2216, "step": 5575500 }, { "epoch": 16.14, "learning_rate": 4.1932822341608e-05, "loss": 2.2355, "step": 5576000 }, { "epoch": 16.14, "learning_rate": 4.193209869396073e-05, "loss": 2.2284, "step": 5576500 }, { "epoch": 16.14, "learning_rate": 4.193137504631345e-05, "loss": 2.2499, "step": 5577000 }, { "epoch": 16.14, "learning_rate": 4.193065284596147e-05, "loss": 2.2401, "step": 5577500 }, { "epoch": 16.15, "learning_rate": 4.19299291983142e-05, "loss": 2.2365, "step": 5578000 }, { "epoch": 16.15, "learning_rate": 4.192920699796221e-05, "loss": 2.2208, "step": 5578500 }, { "epoch": 16.15, "learning_rate": 4.1928483350314935e-05, "loss": 2.2414, "step": 5579000 }, { "epoch": 16.15, "learning_rate": 4.192775970266766e-05, "loss": 2.2368, "step": 5579500 }, { "epoch": 16.15, "learning_rate": 4.192703750231567e-05, "loss": 2.221, "step": 5580000 }, { "epoch": 16.15, "learning_rate": 4.1926313854668395e-05, "loss": 2.2461, "step": 5580500 }, { "epoch": 16.15, "learning_rate": 4.1925590207021124e-05, "loss": 2.2478, "step": 5581000 }, { "epoch": 16.16, "learning_rate": 4.1924866559373846e-05, "loss": 2.2468, "step": 5581500 }, { "epoch": 16.16, "learning_rate": 4.192414291172657e-05, "loss": 2.2126, "step": 5582000 }, { "epoch": 16.16, "learning_rate": 4.192341926407929e-05, "loss": 2.2398, "step": 5582500 }, { "epoch": 16.16, "learning_rate": 4.192269561643201e-05, "loss": 2.2373, "step": 5583000 }, { "epoch": 16.16, "learning_rate": 4.1921971968784735e-05, "loss": 2.2336, "step": 5583500 }, { "epoch": 16.16, "learning_rate": 4.1921248321137464e-05, "loss": 2.2408, "step": 5584000 }, { "epoch": 16.16, "learning_rate": 4.1920524673490186e-05, "loss": 2.2273, "step": 5584500 }, { "epoch": 16.17, "learning_rate": 4.191980102584291e-05, "loss": 2.2299, "step": 5585000 }, { "epoch": 16.17, "learning_rate": 4.191907737819563e-05, "loss": 2.2128, "step": 5585500 }, { "epoch": 16.17, "learning_rate": 4.191835373054835e-05, "loss": 2.2227, "step": 5586000 }, { "epoch": 16.17, "learning_rate": 4.1917630082901075e-05, "loss": 2.2438, "step": 5586500 }, { "epoch": 16.17, "learning_rate": 4.19169078825491e-05, "loss": 2.2137, "step": 5587000 }, { "epoch": 16.17, "learning_rate": 4.191618423490182e-05, "loss": 2.2271, "step": 5587500 }, { "epoch": 16.17, "learning_rate": 4.191546058725454e-05, "loss": 2.2161, "step": 5588000 }, { "epoch": 16.18, "learning_rate": 4.1914736939607264e-05, "loss": 2.244, "step": 5588500 }, { "epoch": 16.18, "learning_rate": 4.1914013291959986e-05, "loss": 2.2635, "step": 5589000 }, { "epoch": 16.18, "learning_rate": 4.191328964431271e-05, "loss": 2.2237, "step": 5589500 }, { "epoch": 16.18, "learning_rate": 4.191256599666543e-05, "loss": 2.2404, "step": 5590000 }, { "epoch": 16.18, "learning_rate": 4.1911843796313446e-05, "loss": 2.2377, "step": 5590500 }, { "epoch": 16.18, "learning_rate": 4.1911120148666175e-05, "loss": 2.2428, "step": 5591000 }, { "epoch": 16.19, "learning_rate": 4.19103979483142e-05, "loss": 2.2242, "step": 5591500 }, { "epoch": 16.19, "learning_rate": 4.190967430066692e-05, "loss": 2.212, "step": 5592000 }, { "epoch": 16.19, "learning_rate": 4.190895065301964e-05, "loss": 2.2253, "step": 5592500 }, { "epoch": 16.19, "learning_rate": 4.1908227005372364e-05, "loss": 2.2305, "step": 5593000 }, { "epoch": 16.19, "learning_rate": 4.1907503357725087e-05, "loss": 2.2244, "step": 5593500 }, { "epoch": 16.19, "learning_rate": 4.190677971007781e-05, "loss": 2.2705, "step": 5594000 }, { "epoch": 16.19, "learning_rate": 4.1906057509725824e-05, "loss": 2.1855, "step": 5594500 }, { "epoch": 16.2, "learning_rate": 4.1905333862078547e-05, "loss": 2.2597, "step": 5595000 }, { "epoch": 16.2, "learning_rate": 4.1904610214431276e-05, "loss": 2.245, "step": 5595500 }, { "epoch": 16.2, "learning_rate": 4.1903886566784e-05, "loss": 2.246, "step": 5596000 }, { "epoch": 16.2, "learning_rate": 4.190316436643201e-05, "loss": 2.2371, "step": 5596500 }, { "epoch": 16.2, "learning_rate": 4.1902440718784736e-05, "loss": 2.2303, "step": 5597000 }, { "epoch": 16.2, "learning_rate": 4.190171707113746e-05, "loss": 2.2262, "step": 5597500 }, { "epoch": 16.2, "learning_rate": 4.190099342349018e-05, "loss": 2.2245, "step": 5598000 }, { "epoch": 16.21, "learning_rate": 4.19002697758429e-05, "loss": 2.2495, "step": 5598500 }, { "epoch": 16.21, "learning_rate": 4.189954612819563e-05, "loss": 2.25, "step": 5599000 }, { "epoch": 16.21, "learning_rate": 4.1898822480548354e-05, "loss": 2.2449, "step": 5599500 }, { "epoch": 16.21, "learning_rate": 4.1898098832901076e-05, "loss": 2.2217, "step": 5600000 }, { "epoch": 16.21, "learning_rate": 4.18973751852538e-05, "loss": 2.2515, "step": 5600500 }, { "epoch": 16.21, "learning_rate": 4.189665298490182e-05, "loss": 2.2507, "step": 5601000 }, { "epoch": 16.21, "learning_rate": 4.189592933725454e-05, "loss": 2.205, "step": 5601500 }, { "epoch": 16.22, "learning_rate": 4.1895205689607265e-05, "loss": 2.2308, "step": 5602000 }, { "epoch": 16.22, "learning_rate": 4.189448204195999e-05, "loss": 2.234, "step": 5602500 }, { "epoch": 16.22, "learning_rate": 4.189375839431271e-05, "loss": 2.2219, "step": 5603000 }, { "epoch": 16.22, "learning_rate": 4.189303474666543e-05, "loss": 2.2263, "step": 5603500 }, { "epoch": 16.22, "learning_rate": 4.1892311099018154e-05, "loss": 2.2391, "step": 5604000 }, { "epoch": 16.22, "learning_rate": 4.1891588898666176e-05, "loss": 2.2346, "step": 5604500 }, { "epoch": 16.22, "learning_rate": 4.18908652510189e-05, "loss": 2.2177, "step": 5605000 }, { "epoch": 16.23, "learning_rate": 4.189014160337162e-05, "loss": 2.2259, "step": 5605500 }, { "epoch": 16.23, "learning_rate": 4.188941795572435e-05, "loss": 2.2296, "step": 5606000 }, { "epoch": 16.23, "learning_rate": 4.188869430807707e-05, "loss": 2.237, "step": 5606500 }, { "epoch": 16.23, "learning_rate": 4.188797210772509e-05, "loss": 2.2397, "step": 5607000 }, { "epoch": 16.23, "learning_rate": 4.188724846007781e-05, "loss": 2.2506, "step": 5607500 }, { "epoch": 16.23, "learning_rate": 4.188652481243053e-05, "loss": 2.2301, "step": 5608000 }, { "epoch": 16.23, "learning_rate": 4.1885801164783254e-05, "loss": 2.2392, "step": 5608500 }, { "epoch": 16.24, "learning_rate": 4.1885077517135976e-05, "loss": 2.2213, "step": 5609000 }, { "epoch": 16.24, "learning_rate": 4.1884355316784e-05, "loss": 2.2321, "step": 5609500 }, { "epoch": 16.24, "learning_rate": 4.188363166913672e-05, "loss": 2.2428, "step": 5610000 }, { "epoch": 16.24, "learning_rate": 4.188290802148944e-05, "loss": 2.2231, "step": 5610500 }, { "epoch": 16.24, "learning_rate": 4.1882184373842165e-05, "loss": 2.2432, "step": 5611000 }, { "epoch": 16.24, "learning_rate": 4.188146217349018e-05, "loss": 2.2551, "step": 5611500 }, { "epoch": 16.24, "learning_rate": 4.18807385258429e-05, "loss": 2.2282, "step": 5612000 }, { "epoch": 16.25, "learning_rate": 4.1880014878195625e-05, "loss": 2.2361, "step": 5612500 }, { "epoch": 16.25, "learning_rate": 4.187929123054835e-05, "loss": 2.234, "step": 5613000 }, { "epoch": 16.25, "learning_rate": 4.1878567582901077e-05, "loss": 2.2373, "step": 5613500 }, { "epoch": 16.25, "learning_rate": 4.18778439352538e-05, "loss": 2.2232, "step": 5614000 }, { "epoch": 16.25, "learning_rate": 4.187712028760653e-05, "loss": 2.254, "step": 5614500 }, { "epoch": 16.25, "learning_rate": 4.187639663995925e-05, "loss": 2.2519, "step": 5615000 }, { "epoch": 16.25, "learning_rate": 4.187567299231197e-05, "loss": 2.2284, "step": 5615500 }, { "epoch": 16.26, "learning_rate": 4.1874949344664694e-05, "loss": 2.2264, "step": 5616000 }, { "epoch": 16.26, "learning_rate": 4.187422569701742e-05, "loss": 2.2542, "step": 5616500 }, { "epoch": 16.26, "learning_rate": 4.187350204937014e-05, "loss": 2.2169, "step": 5617000 }, { "epoch": 16.26, "learning_rate": 4.187277840172286e-05, "loss": 2.2343, "step": 5617500 }, { "epoch": 16.26, "learning_rate": 4.1872054754075583e-05, "loss": 2.2148, "step": 5618000 }, { "epoch": 16.26, "learning_rate": 4.1871331106428306e-05, "loss": 2.2457, "step": 5618500 }, { "epoch": 16.26, "learning_rate": 4.187060745878103e-05, "loss": 2.2466, "step": 5619000 }, { "epoch": 16.27, "learning_rate": 4.1869886705724344e-05, "loss": 2.2501, "step": 5619500 }, { "epoch": 16.27, "learning_rate": 4.1869163058077066e-05, "loss": 2.2314, "step": 5620000 }, { "epoch": 16.27, "learning_rate": 4.186843941042979e-05, "loss": 2.2294, "step": 5620500 }, { "epoch": 16.27, "learning_rate": 4.186771576278252e-05, "loss": 2.2486, "step": 5621000 }, { "epoch": 16.27, "learning_rate": 4.186699356243053e-05, "loss": 2.2374, "step": 5621500 }, { "epoch": 16.27, "learning_rate": 4.1866269914783255e-05, "loss": 2.2331, "step": 5622000 }, { "epoch": 16.27, "learning_rate": 4.186554626713598e-05, "loss": 2.2415, "step": 5622500 }, { "epoch": 16.28, "learning_rate": 4.1864822619488706e-05, "loss": 2.2336, "step": 5623000 }, { "epoch": 16.28, "learning_rate": 4.186410041913672e-05, "loss": 2.2345, "step": 5623500 }, { "epoch": 16.28, "learning_rate": 4.1863376771489444e-05, "loss": 2.2415, "step": 5624000 }, { "epoch": 16.28, "learning_rate": 4.1862653123842166e-05, "loss": 2.2324, "step": 5624500 }, { "epoch": 16.28, "learning_rate": 4.186192947619489e-05, "loss": 2.2349, "step": 5625000 }, { "epoch": 16.28, "learning_rate": 4.186120582854761e-05, "loss": 2.2368, "step": 5625500 }, { "epoch": 16.28, "learning_rate": 4.186048218090033e-05, "loss": 2.2174, "step": 5626000 }, { "epoch": 16.29, "learning_rate": 4.1859758533253055e-05, "loss": 2.2265, "step": 5626500 }, { "epoch": 16.29, "learning_rate": 4.185903488560578e-05, "loss": 2.2354, "step": 5627000 }, { "epoch": 16.29, "learning_rate": 4.1858311237958506e-05, "loss": 2.211, "step": 5627500 }, { "epoch": 16.29, "learning_rate": 4.185758759031123e-05, "loss": 2.2134, "step": 5628000 }, { "epoch": 16.29, "learning_rate": 4.185686394266396e-05, "loss": 2.2071, "step": 5628500 }, { "epoch": 16.29, "learning_rate": 4.185614029501668e-05, "loss": 2.2237, "step": 5629000 }, { "epoch": 16.3, "learning_rate": 4.1855418094664695e-05, "loss": 2.237, "step": 5629500 }, { "epoch": 16.3, "learning_rate": 4.185469444701742e-05, "loss": 2.2385, "step": 5630000 }, { "epoch": 16.3, "learning_rate": 4.185397224666543e-05, "loss": 2.2286, "step": 5630500 }, { "epoch": 16.3, "learning_rate": 4.1853248599018155e-05, "loss": 2.2616, "step": 5631000 }, { "epoch": 16.3, "learning_rate": 4.185252495137088e-05, "loss": 2.2557, "step": 5631500 }, { "epoch": 16.3, "learning_rate": 4.1851801303723607e-05, "loss": 2.2513, "step": 5632000 }, { "epoch": 16.3, "learning_rate": 4.185107765607633e-05, "loss": 2.2277, "step": 5632500 }, { "epoch": 16.31, "learning_rate": 4.185035400842905e-05, "loss": 2.2519, "step": 5633000 }, { "epoch": 16.31, "learning_rate": 4.184963036078177e-05, "loss": 2.2442, "step": 5633500 }, { "epoch": 16.31, "learning_rate": 4.1848906713134495e-05, "loss": 2.2475, "step": 5634000 }, { "epoch": 16.31, "learning_rate": 4.1848185960077804e-05, "loss": 2.218, "step": 5634500 }, { "epoch": 16.31, "learning_rate": 4.1847462312430527e-05, "loss": 2.2038, "step": 5635000 }, { "epoch": 16.31, "learning_rate": 4.1846738664783256e-05, "loss": 2.2209, "step": 5635500 }, { "epoch": 16.31, "learning_rate": 4.184601501713598e-05, "loss": 2.2108, "step": 5636000 }, { "epoch": 16.32, "learning_rate": 4.184529136948871e-05, "loss": 2.2071, "step": 5636500 }, { "epoch": 16.32, "learning_rate": 4.184456772184143e-05, "loss": 2.2402, "step": 5637000 }, { "epoch": 16.32, "learning_rate": 4.184384407419415e-05, "loss": 2.2289, "step": 5637500 }, { "epoch": 16.32, "learning_rate": 4.1843120426546874e-05, "loss": 2.2554, "step": 5638000 }, { "epoch": 16.32, "learning_rate": 4.1842396778899596e-05, "loss": 2.2403, "step": 5638500 }, { "epoch": 16.32, "learning_rate": 4.184167313125232e-05, "loss": 2.2406, "step": 5639000 }, { "epoch": 16.32, "learning_rate": 4.184094948360504e-05, "loss": 2.2435, "step": 5639500 }, { "epoch": 16.33, "learning_rate": 4.184022583595776e-05, "loss": 2.2417, "step": 5640000 }, { "epoch": 16.33, "learning_rate": 4.1839502188310485e-05, "loss": 2.2454, "step": 5640500 }, { "epoch": 16.33, "learning_rate": 4.183877854066321e-05, "loss": 2.2558, "step": 5641000 }, { "epoch": 16.33, "learning_rate": 4.183805634031123e-05, "loss": 2.2111, "step": 5641500 }, { "epoch": 16.33, "learning_rate": 4.183733269266395e-05, "loss": 2.2361, "step": 5642000 }, { "epoch": 16.33, "learning_rate": 4.1836609045016674e-05, "loss": 2.2533, "step": 5642500 }, { "epoch": 16.33, "learning_rate": 4.1835885397369396e-05, "loss": 2.2463, "step": 5643000 }, { "epoch": 16.34, "learning_rate": 4.183516319701742e-05, "loss": 2.2328, "step": 5643500 }, { "epoch": 16.34, "learning_rate": 4.183443954937014e-05, "loss": 2.2235, "step": 5644000 }, { "epoch": 16.34, "learning_rate": 4.183371590172286e-05, "loss": 2.2707, "step": 5644500 }, { "epoch": 16.34, "learning_rate": 4.1832992254075585e-05, "loss": 2.2367, "step": 5645000 }, { "epoch": 16.34, "learning_rate": 4.183226860642831e-05, "loss": 2.2465, "step": 5645500 }, { "epoch": 16.34, "learning_rate": 4.183154495878103e-05, "loss": 2.255, "step": 5646000 }, { "epoch": 16.34, "learning_rate": 4.183082131113376e-05, "loss": 2.2255, "step": 5646500 }, { "epoch": 16.35, "learning_rate": 4.183009766348648e-05, "loss": 2.2323, "step": 5647000 }, { "epoch": 16.35, "learning_rate": 4.18293740158392e-05, "loss": 2.2454, "step": 5647500 }, { "epoch": 16.35, "learning_rate": 4.1828650368191925e-05, "loss": 2.2222, "step": 5648000 }, { "epoch": 16.35, "learning_rate": 4.182792672054465e-05, "loss": 2.2534, "step": 5648500 }, { "epoch": 16.35, "learning_rate": 4.182720307289737e-05, "loss": 2.2489, "step": 5649000 }, { "epoch": 16.35, "learning_rate": 4.182647942525009e-05, "loss": 2.2369, "step": 5649500 }, { "epoch": 16.35, "learning_rate": 4.182575722489811e-05, "loss": 2.2381, "step": 5650000 }, { "epoch": 16.36, "learning_rate": 4.1825033577250836e-05, "loss": 2.241, "step": 5650500 }, { "epoch": 16.36, "learning_rate": 4.182430992960356e-05, "loss": 2.2202, "step": 5651000 }, { "epoch": 16.36, "learning_rate": 4.1823589176546874e-05, "loss": 2.2406, "step": 5651500 }, { "epoch": 16.36, "learning_rate": 4.1822865528899597e-05, "loss": 2.2237, "step": 5652000 }, { "epoch": 16.36, "learning_rate": 4.182214188125232e-05, "loss": 2.2375, "step": 5652500 }, { "epoch": 16.36, "learning_rate": 4.182141823360504e-05, "loss": 2.2462, "step": 5653000 }, { "epoch": 16.36, "learning_rate": 4.182069458595776e-05, "loss": 2.2447, "step": 5653500 }, { "epoch": 16.37, "learning_rate": 4.1819970938310486e-05, "loss": 2.2473, "step": 5654000 }, { "epoch": 16.37, "learning_rate": 4.181924729066321e-05, "loss": 2.2169, "step": 5654500 }, { "epoch": 16.37, "learning_rate": 4.181852364301593e-05, "loss": 2.2293, "step": 5655000 }, { "epoch": 16.37, "learning_rate": 4.181780144266395e-05, "loss": 2.2597, "step": 5655500 }, { "epoch": 16.37, "learning_rate": 4.1817077795016675e-05, "loss": 2.2211, "step": 5656000 }, { "epoch": 16.37, "learning_rate": 4.18163541473694e-05, "loss": 2.2341, "step": 5656500 }, { "epoch": 16.37, "learning_rate": 4.181563049972212e-05, "loss": 2.2324, "step": 5657000 }, { "epoch": 16.38, "learning_rate": 4.181490685207484e-05, "loss": 2.2299, "step": 5657500 }, { "epoch": 16.38, "learning_rate": 4.181418320442757e-05, "loss": 2.254, "step": 5658000 }, { "epoch": 16.38, "learning_rate": 4.181345955678029e-05, "loss": 2.234, "step": 5658500 }, { "epoch": 16.38, "learning_rate": 4.1812735909133015e-05, "loss": 2.2259, "step": 5659000 }, { "epoch": 16.38, "learning_rate": 4.181201226148574e-05, "loss": 2.2423, "step": 5659500 }, { "epoch": 16.38, "learning_rate": 4.181128861383846e-05, "loss": 2.2242, "step": 5660000 }, { "epoch": 16.38, "learning_rate": 4.181056496619118e-05, "loss": 2.2298, "step": 5660500 }, { "epoch": 16.39, "learning_rate": 4.1809842765839204e-05, "loss": 2.2365, "step": 5661000 }, { "epoch": 16.39, "learning_rate": 4.1809119118191926e-05, "loss": 2.22, "step": 5661500 }, { "epoch": 16.39, "learning_rate": 4.180839691783994e-05, "loss": 2.2399, "step": 5662000 }, { "epoch": 16.39, "learning_rate": 4.1807673270192664e-05, "loss": 2.2538, "step": 5662500 }, { "epoch": 16.39, "learning_rate": 4.1806949622545386e-05, "loss": 2.2378, "step": 5663000 }, { "epoch": 16.39, "learning_rate": 4.180622597489811e-05, "loss": 2.2366, "step": 5663500 }, { "epoch": 16.39, "learning_rate": 4.180550232725084e-05, "loss": 2.2114, "step": 5664000 }, { "epoch": 16.4, "learning_rate": 4.180477867960356e-05, "loss": 2.2601, "step": 5664500 }, { "epoch": 16.4, "learning_rate": 4.180405503195628e-05, "loss": 2.2517, "step": 5665000 }, { "epoch": 16.4, "learning_rate": 4.180333138430901e-05, "loss": 2.2438, "step": 5665500 }, { "epoch": 16.4, "learning_rate": 4.180260773666173e-05, "loss": 2.221, "step": 5666000 }, { "epoch": 16.4, "learning_rate": 4.1801884089014455e-05, "loss": 2.2364, "step": 5666500 }, { "epoch": 16.4, "learning_rate": 4.180116188866247e-05, "loss": 2.2344, "step": 5667000 }, { "epoch": 16.41, "learning_rate": 4.180043824101519e-05, "loss": 2.2508, "step": 5667500 }, { "epoch": 16.41, "learning_rate": 4.1799714593367915e-05, "loss": 2.2359, "step": 5668000 }, { "epoch": 16.41, "learning_rate": 4.179899094572064e-05, "loss": 2.2353, "step": 5668500 }, { "epoch": 16.41, "learning_rate": 4.179826729807336e-05, "loss": 2.2505, "step": 5669000 }, { "epoch": 16.41, "learning_rate": 4.179754365042609e-05, "loss": 2.2424, "step": 5669500 }, { "epoch": 16.41, "learning_rate": 4.17968228973694e-05, "loss": 2.2313, "step": 5670000 }, { "epoch": 16.41, "learning_rate": 4.179609924972212e-05, "loss": 2.2202, "step": 5670500 }, { "epoch": 16.42, "learning_rate": 4.179537560207484e-05, "loss": 2.2313, "step": 5671000 }, { "epoch": 16.42, "learning_rate": 4.1794651954427564e-05, "loss": 2.2454, "step": 5671500 }, { "epoch": 16.42, "learning_rate": 4.1793928306780286e-05, "loss": 2.2252, "step": 5672000 }, { "epoch": 16.42, "learning_rate": 4.179320465913301e-05, "loss": 2.2402, "step": 5672500 }, { "epoch": 16.42, "learning_rate": 4.179248101148574e-05, "loss": 2.2161, "step": 5673000 }, { "epoch": 16.42, "learning_rate": 4.179175736383846e-05, "loss": 2.246, "step": 5673500 }, { "epoch": 16.42, "learning_rate": 4.179103371619119e-05, "loss": 2.2289, "step": 5674000 }, { "epoch": 16.43, "learning_rate": 4.1790311515839205e-05, "loss": 2.2392, "step": 5674500 }, { "epoch": 16.43, "learning_rate": 4.178958786819193e-05, "loss": 2.2293, "step": 5675000 }, { "epoch": 16.43, "learning_rate": 4.178886422054465e-05, "loss": 2.2209, "step": 5675500 }, { "epoch": 16.43, "learning_rate": 4.178814057289737e-05, "loss": 2.2485, "step": 5676000 }, { "epoch": 16.43, "learning_rate": 4.1787416925250093e-05, "loss": 2.2409, "step": 5676500 }, { "epoch": 16.43, "learning_rate": 4.178669472489811e-05, "loss": 2.2301, "step": 5677000 }, { "epoch": 16.43, "learning_rate": 4.178597107725084e-05, "loss": 2.2342, "step": 5677500 }, { "epoch": 16.44, "learning_rate": 4.178524742960356e-05, "loss": 2.2484, "step": 5678000 }, { "epoch": 16.44, "learning_rate": 4.178452378195628e-05, "loss": 2.2287, "step": 5678500 }, { "epoch": 16.44, "learning_rate": 4.1783800134309005e-05, "loss": 2.2585, "step": 5679000 }, { "epoch": 16.44, "learning_rate": 4.178307793395702e-05, "loss": 2.2429, "step": 5679500 }, { "epoch": 16.44, "learning_rate": 4.178235428630974e-05, "loss": 2.2592, "step": 5680000 }, { "epoch": 16.44, "learning_rate": 4.178163063866247e-05, "loss": 2.2446, "step": 5680500 }, { "epoch": 16.44, "learning_rate": 4.1780906991015194e-05, "loss": 2.2082, "step": 5681000 }, { "epoch": 16.45, "learning_rate": 4.178018479066321e-05, "loss": 2.2327, "step": 5681500 }, { "epoch": 16.45, "learning_rate": 4.177946114301594e-05, "loss": 2.2111, "step": 5682000 }, { "epoch": 16.45, "learning_rate": 4.177873749536866e-05, "loss": 2.2348, "step": 5682500 }, { "epoch": 16.45, "learning_rate": 4.1778015295016676e-05, "loss": 2.2216, "step": 5683000 }, { "epoch": 16.45, "learning_rate": 4.17772916473694e-05, "loss": 2.2528, "step": 5683500 }, { "epoch": 16.45, "learning_rate": 4.1776569447017414e-05, "loss": 2.2326, "step": 5684000 }, { "epoch": 16.45, "learning_rate": 4.1775845799370136e-05, "loss": 2.2157, "step": 5684500 }, { "epoch": 16.46, "learning_rate": 4.1775122151722865e-05, "loss": 2.2292, "step": 5685000 }, { "epoch": 16.46, "learning_rate": 4.177439850407559e-05, "loss": 2.2369, "step": 5685500 }, { "epoch": 16.46, "learning_rate": 4.177367485642831e-05, "loss": 2.2459, "step": 5686000 }, { "epoch": 16.46, "learning_rate": 4.177295120878103e-05, "loss": 2.2292, "step": 5686500 }, { "epoch": 16.46, "learning_rate": 4.1772227561133754e-05, "loss": 2.2198, "step": 5687000 }, { "epoch": 16.46, "learning_rate": 4.1771503913486476e-05, "loss": 2.2485, "step": 5687500 }, { "epoch": 16.46, "learning_rate": 4.1770780265839205e-05, "loss": 2.2574, "step": 5688000 }, { "epoch": 16.47, "learning_rate": 4.177005661819193e-05, "loss": 2.2684, "step": 5688500 }, { "epoch": 16.47, "learning_rate": 4.176933297054465e-05, "loss": 2.2451, "step": 5689000 }, { "epoch": 16.47, "learning_rate": 4.176860932289737e-05, "loss": 2.2268, "step": 5689500 }, { "epoch": 16.47, "learning_rate": 4.1767885675250094e-05, "loss": 2.2282, "step": 5690000 }, { "epoch": 16.47, "learning_rate": 4.1767162027602817e-05, "loss": 2.2417, "step": 5690500 }, { "epoch": 16.47, "learning_rate": 4.176643837995554e-05, "loss": 2.2371, "step": 5691000 }, { "epoch": 16.47, "learning_rate": 4.176571617960356e-05, "loss": 2.249, "step": 5691500 }, { "epoch": 16.48, "learning_rate": 4.176499253195628e-05, "loss": 2.2767, "step": 5692000 }, { "epoch": 16.48, "learning_rate": 4.17642703316043e-05, "loss": 2.2224, "step": 5692500 }, { "epoch": 16.48, "learning_rate": 4.176354668395702e-05, "loss": 2.2457, "step": 5693000 }, { "epoch": 16.48, "learning_rate": 4.176282303630974e-05, "loss": 2.2315, "step": 5693500 }, { "epoch": 16.48, "learning_rate": 4.1762099388662466e-05, "loss": 2.2362, "step": 5694000 }, { "epoch": 16.48, "learning_rate": 4.176137718831049e-05, "loss": 2.2277, "step": 5694500 }, { "epoch": 16.48, "learning_rate": 4.176065354066321e-05, "loss": 2.2375, "step": 5695000 }, { "epoch": 16.49, "learning_rate": 4.175992989301593e-05, "loss": 2.2313, "step": 5695500 }, { "epoch": 16.49, "learning_rate": 4.175920624536866e-05, "loss": 2.2417, "step": 5696000 }, { "epoch": 16.49, "learning_rate": 4.1758482597721384e-05, "loss": 2.2446, "step": 5696500 }, { "epoch": 16.49, "learning_rate": 4.1757758950074106e-05, "loss": 2.2432, "step": 5697000 }, { "epoch": 16.49, "learning_rate": 4.175703530242683e-05, "loss": 2.2542, "step": 5697500 }, { "epoch": 16.49, "learning_rate": 4.175631165477955e-05, "loss": 2.2396, "step": 5698000 }, { "epoch": 16.49, "learning_rate": 4.175558800713227e-05, "loss": 2.2327, "step": 5698500 }, { "epoch": 16.5, "learning_rate": 4.175486580678029e-05, "loss": 2.2343, "step": 5699000 }, { "epoch": 16.5, "learning_rate": 4.175414215913302e-05, "loss": 2.2417, "step": 5699500 }, { "epoch": 16.5, "learning_rate": 4.175341995878103e-05, "loss": 2.2374, "step": 5700000 }, { "epoch": 16.5, "learning_rate": 4.1752696311133755e-05, "loss": 2.2414, "step": 5700500 }, { "epoch": 16.5, "learning_rate": 4.175197266348648e-05, "loss": 2.2407, "step": 5701000 }, { "epoch": 16.5, "learning_rate": 4.17512490158392e-05, "loss": 2.2296, "step": 5701500 }, { "epoch": 16.5, "learning_rate": 4.175052536819192e-05, "loss": 2.2397, "step": 5702000 }, { "epoch": 16.51, "learning_rate": 4.1749801720544644e-05, "loss": 2.2238, "step": 5702500 }, { "epoch": 16.51, "learning_rate": 4.174907807289737e-05, "loss": 2.2374, "step": 5703000 }, { "epoch": 16.51, "learning_rate": 4.1748354425250095e-05, "loss": 2.23, "step": 5703500 }, { "epoch": 16.51, "learning_rate": 4.174763077760282e-05, "loss": 2.2301, "step": 5704000 }, { "epoch": 16.51, "learning_rate": 4.174690712995554e-05, "loss": 2.2522, "step": 5704500 }, { "epoch": 16.51, "learning_rate": 4.174618348230827e-05, "loss": 2.2448, "step": 5705000 }, { "epoch": 16.52, "learning_rate": 4.1745461281956284e-05, "loss": 2.2195, "step": 5705500 }, { "epoch": 16.52, "learning_rate": 4.1744737634309006e-05, "loss": 2.2079, "step": 5706000 }, { "epoch": 16.52, "learning_rate": 4.174401398666173e-05, "loss": 2.2534, "step": 5706500 }, { "epoch": 16.52, "learning_rate": 4.174329033901445e-05, "loss": 2.2082, "step": 5707000 }, { "epoch": 16.52, "learning_rate": 4.174256669136717e-05, "loss": 2.2119, "step": 5707500 }, { "epoch": 16.52, "learning_rate": 4.174184449101519e-05, "loss": 2.2487, "step": 5708000 }, { "epoch": 16.52, "learning_rate": 4.174112084336792e-05, "loss": 2.2309, "step": 5708500 }, { "epoch": 16.53, "learning_rate": 4.174039719572064e-05, "loss": 2.2196, "step": 5709000 }, { "epoch": 16.53, "learning_rate": 4.173967354807336e-05, "loss": 2.247, "step": 5709500 }, { "epoch": 16.53, "learning_rate": 4.1738949900426084e-05, "loss": 2.2404, "step": 5710000 }, { "epoch": 16.53, "learning_rate": 4.173822770007411e-05, "loss": 2.2563, "step": 5710500 }, { "epoch": 16.53, "learning_rate": 4.173750405242683e-05, "loss": 2.2366, "step": 5711000 }, { "epoch": 16.53, "learning_rate": 4.173678040477955e-05, "loss": 2.2521, "step": 5711500 }, { "epoch": 16.53, "learning_rate": 4.173605675713227e-05, "loss": 2.2518, "step": 5712000 }, { "epoch": 16.54, "learning_rate": 4.173533455678029e-05, "loss": 2.2222, "step": 5712500 }, { "epoch": 16.54, "learning_rate": 4.173461090913302e-05, "loss": 2.2222, "step": 5713000 }, { "epoch": 16.54, "learning_rate": 4.173388726148574e-05, "loss": 2.2318, "step": 5713500 }, { "epoch": 16.54, "learning_rate": 4.173316361383846e-05, "loss": 2.2405, "step": 5714000 }, { "epoch": 16.54, "learning_rate": 4.1732439966191185e-05, "loss": 2.2491, "step": 5714500 }, { "epoch": 16.54, "learning_rate": 4.173171631854391e-05, "loss": 2.2231, "step": 5715000 }, { "epoch": 16.54, "learning_rate": 4.173099267089663e-05, "loss": 2.223, "step": 5715500 }, { "epoch": 16.55, "learning_rate": 4.173026902324935e-05, "loss": 2.2125, "step": 5716000 }, { "epoch": 16.55, "learning_rate": 4.1729545375602074e-05, "loss": 2.24, "step": 5716500 }, { "epoch": 16.55, "learning_rate": 4.1728821727954796e-05, "loss": 2.2139, "step": 5717000 }, { "epoch": 16.55, "learning_rate": 4.172810097489811e-05, "loss": 2.235, "step": 5717500 }, { "epoch": 16.55, "learning_rate": 4.172737732725084e-05, "loss": 2.2503, "step": 5718000 }, { "epoch": 16.55, "learning_rate": 4.172665367960356e-05, "loss": 2.2294, "step": 5718500 }, { "epoch": 16.55, "learning_rate": 4.1725930031956285e-05, "loss": 2.2248, "step": 5719000 }, { "epoch": 16.56, "learning_rate": 4.17252078316043e-05, "loss": 2.2393, "step": 5719500 }, { "epoch": 16.56, "learning_rate": 4.172448418395702e-05, "loss": 2.2509, "step": 5720000 }, { "epoch": 16.56, "learning_rate": 4.1723760536309745e-05, "loss": 2.2285, "step": 5720500 }, { "epoch": 16.56, "learning_rate": 4.172303688866247e-05, "loss": 2.2661, "step": 5721000 }, { "epoch": 16.56, "learning_rate": 4.1722313241015196e-05, "loss": 2.2461, "step": 5721500 }, { "epoch": 16.56, "learning_rate": 4.172158959336792e-05, "loss": 2.2367, "step": 5722000 }, { "epoch": 16.56, "learning_rate": 4.1720867393015934e-05, "loss": 2.2276, "step": 5722500 }, { "epoch": 16.57, "learning_rate": 4.1720143745368656e-05, "loss": 2.2405, "step": 5723000 }, { "epoch": 16.57, "learning_rate": 4.171942009772138e-05, "loss": 2.2341, "step": 5723500 }, { "epoch": 16.57, "learning_rate": 4.17186964500741e-05, "loss": 2.2287, "step": 5724000 }, { "epoch": 16.57, "learning_rate": 4.1717974249722116e-05, "loss": 2.2368, "step": 5724500 }, { "epoch": 16.57, "learning_rate": 4.1717250602074845e-05, "loss": 2.2286, "step": 5725000 }, { "epoch": 16.57, "learning_rate": 4.171652695442757e-05, "loss": 2.231, "step": 5725500 }, { "epoch": 16.57, "learning_rate": 4.1715803306780296e-05, "loss": 2.2326, "step": 5726000 }, { "epoch": 16.58, "learning_rate": 4.171508110642831e-05, "loss": 2.2185, "step": 5726500 }, { "epoch": 16.58, "learning_rate": 4.1714357458781034e-05, "loss": 2.2232, "step": 5727000 }, { "epoch": 16.58, "learning_rate": 4.1713633811133756e-05, "loss": 2.2387, "step": 5727500 }, { "epoch": 16.58, "learning_rate": 4.171291016348648e-05, "loss": 2.2548, "step": 5728000 }, { "epoch": 16.58, "learning_rate": 4.17121865158392e-05, "loss": 2.242, "step": 5728500 }, { "epoch": 16.58, "learning_rate": 4.171146286819192e-05, "loss": 2.2322, "step": 5729000 }, { "epoch": 16.58, "learning_rate": 4.1710739220544645e-05, "loss": 2.2383, "step": 5729500 }, { "epoch": 16.59, "learning_rate": 4.171001557289737e-05, "loss": 2.2156, "step": 5730000 }, { "epoch": 16.59, "learning_rate": 4.170929337254539e-05, "loss": 2.2433, "step": 5730500 }, { "epoch": 16.59, "learning_rate": 4.170856972489811e-05, "loss": 2.2458, "step": 5731000 }, { "epoch": 16.59, "learning_rate": 4.1707846077250834e-05, "loss": 2.2423, "step": 5731500 }, { "epoch": 16.59, "learning_rate": 4.170712242960356e-05, "loss": 2.2325, "step": 5732000 }, { "epoch": 16.59, "learning_rate": 4.170639878195628e-05, "loss": 2.2155, "step": 5732500 }, { "epoch": 16.59, "learning_rate": 4.170567513430901e-05, "loss": 2.2468, "step": 5733000 }, { "epoch": 16.6, "learning_rate": 4.170495148666173e-05, "loss": 2.2331, "step": 5733500 }, { "epoch": 16.6, "learning_rate": 4.170422783901445e-05, "loss": 2.241, "step": 5734000 }, { "epoch": 16.6, "learning_rate": 4.1703504191367175e-05, "loss": 2.2379, "step": 5734500 }, { "epoch": 16.6, "learning_rate": 4.17027805437199e-05, "loss": 2.2281, "step": 5735000 }, { "epoch": 16.6, "learning_rate": 4.170205689607262e-05, "loss": 2.2408, "step": 5735500 }, { "epoch": 16.6, "learning_rate": 4.170133324842535e-05, "loss": 2.2588, "step": 5736000 }, { "epoch": 16.6, "learning_rate": 4.1700611048073364e-05, "loss": 2.2789, "step": 5736500 }, { "epoch": 16.61, "learning_rate": 4.169988884772138e-05, "loss": 2.2652, "step": 5737000 }, { "epoch": 16.61, "learning_rate": 4.16991652000741e-05, "loss": 2.288, "step": 5737500 }, { "epoch": 16.61, "learning_rate": 4.1698441552426824e-05, "loss": 2.2229, "step": 5738000 }, { "epoch": 16.61, "learning_rate": 4.1697717904779546e-05, "loss": 2.2114, "step": 5738500 }, { "epoch": 16.61, "learning_rate": 4.169699425713227e-05, "loss": 2.2301, "step": 5739000 }, { "epoch": 16.61, "learning_rate": 4.1696270609485e-05, "loss": 2.2416, "step": 5739500 }, { "epoch": 16.61, "learning_rate": 4.169554696183772e-05, "loss": 2.1979, "step": 5740000 }, { "epoch": 16.62, "learning_rate": 4.169482331419045e-05, "loss": 2.2418, "step": 5740500 }, { "epoch": 16.62, "learning_rate": 4.169409966654317e-05, "loss": 2.2357, "step": 5741000 }, { "epoch": 16.62, "learning_rate": 4.169337891348648e-05, "loss": 2.2537, "step": 5741500 }, { "epoch": 16.62, "learning_rate": 4.16926552658392e-05, "loss": 2.2484, "step": 5742000 }, { "epoch": 16.62, "learning_rate": 4.1691931618191924e-05, "loss": 2.2569, "step": 5742500 }, { "epoch": 16.62, "learning_rate": 4.1691207970544646e-05, "loss": 2.241, "step": 5743000 }, { "epoch": 16.63, "learning_rate": 4.169048432289737e-05, "loss": 2.2407, "step": 5743500 }, { "epoch": 16.63, "learning_rate": 4.16897606752501e-05, "loss": 2.2411, "step": 5744000 }, { "epoch": 16.63, "learning_rate": 4.168903847489811e-05, "loss": 2.2247, "step": 5744500 }, { "epoch": 16.63, "learning_rate": 4.1688314827250835e-05, "loss": 2.2569, "step": 5745000 }, { "epoch": 16.63, "learning_rate": 4.168759117960356e-05, "loss": 2.2313, "step": 5745500 }, { "epoch": 16.63, "learning_rate": 4.168686753195628e-05, "loss": 2.2411, "step": 5746000 }, { "epoch": 16.63, "learning_rate": 4.1686143884309e-05, "loss": 2.247, "step": 5746500 }, { "epoch": 16.64, "learning_rate": 4.1685420236661724e-05, "loss": 2.229, "step": 5747000 }, { "epoch": 16.64, "learning_rate": 4.1684696589014446e-05, "loss": 2.2417, "step": 5747500 }, { "epoch": 16.64, "learning_rate": 4.1683972941367175e-05, "loss": 2.2066, "step": 5748000 }, { "epoch": 16.64, "learning_rate": 4.16832492937199e-05, "loss": 2.2179, "step": 5748500 }, { "epoch": 16.64, "learning_rate": 4.168252709336792e-05, "loss": 2.2387, "step": 5749000 }, { "epoch": 16.64, "learning_rate": 4.1681804893015936e-05, "loss": 2.2503, "step": 5749500 }, { "epoch": 16.64, "learning_rate": 4.168108124536866e-05, "loss": 2.2316, "step": 5750000 }, { "epoch": 16.65, "learning_rate": 4.168035759772138e-05, "loss": 2.2192, "step": 5750500 }, { "epoch": 16.65, "learning_rate": 4.1679635397369396e-05, "loss": 2.2361, "step": 5751000 }, { "epoch": 16.65, "learning_rate": 4.1678911749722125e-05, "loss": 2.2306, "step": 5751500 }, { "epoch": 16.65, "learning_rate": 4.167818810207485e-05, "loss": 2.2249, "step": 5752000 }, { "epoch": 16.65, "learning_rate": 4.167746445442757e-05, "loss": 2.2277, "step": 5752500 }, { "epoch": 16.65, "learning_rate": 4.167674080678029e-05, "loss": 2.2393, "step": 5753000 }, { "epoch": 16.65, "learning_rate": 4.1676017159133014e-05, "loss": 2.2244, "step": 5753500 }, { "epoch": 16.66, "learning_rate": 4.1675293511485736e-05, "loss": 2.2423, "step": 5754000 }, { "epoch": 16.66, "learning_rate": 4.167456986383846e-05, "loss": 2.237, "step": 5754500 }, { "epoch": 16.66, "learning_rate": 4.167384621619118e-05, "loss": 2.2193, "step": 5755000 }, { "epoch": 16.66, "learning_rate": 4.167312256854391e-05, "loss": 2.2338, "step": 5755500 }, { "epoch": 16.66, "learning_rate": 4.1672400368191925e-05, "loss": 2.2577, "step": 5756000 }, { "epoch": 16.66, "learning_rate": 4.167167672054465e-05, "loss": 2.2614, "step": 5756500 }, { "epoch": 16.66, "learning_rate": 4.1670953072897376e-05, "loss": 2.2349, "step": 5757000 }, { "epoch": 16.67, "learning_rate": 4.167023087254539e-05, "loss": 2.2253, "step": 5757500 }, { "epoch": 16.67, "learning_rate": 4.1669507224898114e-05, "loss": 2.2148, "step": 5758000 }, { "epoch": 16.67, "learning_rate": 4.1668783577250836e-05, "loss": 2.2224, "step": 5758500 }, { "epoch": 16.67, "learning_rate": 4.166805992960356e-05, "loss": 2.2531, "step": 5759000 }, { "epoch": 16.67, "learning_rate": 4.166733628195628e-05, "loss": 2.2355, "step": 5759500 }, { "epoch": 16.67, "learning_rate": 4.1666612634309e-05, "loss": 2.2364, "step": 5760000 }, { "epoch": 16.67, "learning_rate": 4.1665888986661725e-05, "loss": 2.2075, "step": 5760500 }, { "epoch": 16.68, "learning_rate": 4.166516533901445e-05, "loss": 2.2489, "step": 5761000 }, { "epoch": 16.68, "learning_rate": 4.1664441691367176e-05, "loss": 2.2393, "step": 5761500 }, { "epoch": 16.68, "learning_rate": 4.16637180437199e-05, "loss": 2.2328, "step": 5762000 }, { "epoch": 16.68, "learning_rate": 4.1662995843367914e-05, "loss": 2.2459, "step": 5762500 }, { "epoch": 16.68, "learning_rate": 4.166227219572064e-05, "loss": 2.2518, "step": 5763000 }, { "epoch": 16.68, "learning_rate": 4.1661548548073365e-05, "loss": 2.2193, "step": 5763500 }, { "epoch": 16.68, "learning_rate": 4.166082490042609e-05, "loss": 2.2464, "step": 5764000 }, { "epoch": 16.69, "learning_rate": 4.166010125277881e-05, "loss": 2.2572, "step": 5764500 }, { "epoch": 16.69, "learning_rate": 4.165937760513153e-05, "loss": 2.2372, "step": 5765000 }, { "epoch": 16.69, "learning_rate": 4.1658653957484254e-05, "loss": 2.2386, "step": 5765500 }, { "epoch": 16.69, "learning_rate": 4.1657930309836976e-05, "loss": 2.2377, "step": 5766000 }, { "epoch": 16.69, "learning_rate": 4.16572066621897e-05, "loss": 2.2366, "step": 5766500 }, { "epoch": 16.69, "learning_rate": 4.165648446183772e-05, "loss": 2.2515, "step": 5767000 }, { "epoch": 16.69, "learning_rate": 4.165576081419044e-05, "loss": 2.2313, "step": 5767500 }, { "epoch": 16.7, "learning_rate": 4.1655037166543165e-05, "loss": 2.2354, "step": 5768000 }, { "epoch": 16.7, "learning_rate": 4.165431351889589e-05, "loss": 2.2774, "step": 5768500 }, { "epoch": 16.7, "learning_rate": 4.16535913185439e-05, "loss": 2.2458, "step": 5769000 }, { "epoch": 16.7, "learning_rate": 4.1652867670896625e-05, "loss": 2.2429, "step": 5769500 }, { "epoch": 16.7, "learning_rate": 4.165214402324935e-05, "loss": 2.2365, "step": 5770000 }, { "epoch": 16.7, "learning_rate": 4.165142037560208e-05, "loss": 2.2252, "step": 5770500 }, { "epoch": 16.7, "learning_rate": 4.16506967279548e-05, "loss": 2.2382, "step": 5771000 }, { "epoch": 16.71, "learning_rate": 4.164997452760282e-05, "loss": 2.271, "step": 5771500 }, { "epoch": 16.71, "learning_rate": 4.1649250879955544e-05, "loss": 2.2503, "step": 5772000 }, { "epoch": 16.71, "learning_rate": 4.1648527232308266e-05, "loss": 2.2463, "step": 5772500 }, { "epoch": 16.71, "learning_rate": 4.1647806479251575e-05, "loss": 2.2311, "step": 5773000 }, { "epoch": 16.71, "learning_rate": 4.1647082831604304e-05, "loss": 2.2343, "step": 5773500 }, { "epoch": 16.71, "learning_rate": 4.1646359183957026e-05, "loss": 2.2162, "step": 5774000 }, { "epoch": 16.71, "learning_rate": 4.164563553630975e-05, "loss": 2.2256, "step": 5774500 }, { "epoch": 16.72, "learning_rate": 4.164491188866247e-05, "loss": 2.2356, "step": 5775000 }, { "epoch": 16.72, "learning_rate": 4.164418824101519e-05, "loss": 2.2309, "step": 5775500 }, { "epoch": 16.72, "learning_rate": 4.1643464593367915e-05, "loss": 2.2361, "step": 5776000 }, { "epoch": 16.72, "learning_rate": 4.164274094572064e-05, "loss": 2.2515, "step": 5776500 }, { "epoch": 16.72, "learning_rate": 4.164201729807336e-05, "loss": 2.2525, "step": 5777000 }, { "epoch": 16.72, "learning_rate": 4.164129365042608e-05, "loss": 2.2281, "step": 5777500 }, { "epoch": 16.72, "learning_rate": 4.164057000277881e-05, "loss": 2.2653, "step": 5778000 }, { "epoch": 16.73, "learning_rate": 4.163984635513153e-05, "loss": 2.2335, "step": 5778500 }, { "epoch": 16.73, "learning_rate": 4.1639122707484255e-05, "loss": 2.2387, "step": 5779000 }, { "epoch": 16.73, "learning_rate": 4.163839905983698e-05, "loss": 2.2384, "step": 5779500 }, { "epoch": 16.73, "learning_rate": 4.16376754121897e-05, "loss": 2.2411, "step": 5780000 }, { "epoch": 16.73, "learning_rate": 4.163695321183772e-05, "loss": 2.21, "step": 5780500 }, { "epoch": 16.73, "learning_rate": 4.1636229564190444e-05, "loss": 2.2166, "step": 5781000 }, { "epoch": 16.74, "learning_rate": 4.1635505916543166e-05, "loss": 2.236, "step": 5781500 }, { "epoch": 16.74, "learning_rate": 4.163478226889589e-05, "loss": 2.2334, "step": 5782000 }, { "epoch": 16.74, "learning_rate": 4.163405862124861e-05, "loss": 2.2427, "step": 5782500 }, { "epoch": 16.74, "learning_rate": 4.163333497360133e-05, "loss": 2.2024, "step": 5783000 }, { "epoch": 16.74, "learning_rate": 4.1632612773249355e-05, "loss": 2.2413, "step": 5783500 }, { "epoch": 16.74, "learning_rate": 4.163188912560208e-05, "loss": 2.2609, "step": 5784000 }, { "epoch": 16.74, "learning_rate": 4.16311654779548e-05, "loss": 2.2348, "step": 5784500 }, { "epoch": 16.75, "learning_rate": 4.1630443277602815e-05, "loss": 2.2206, "step": 5785000 }, { "epoch": 16.75, "learning_rate": 4.1629719629955544e-05, "loss": 2.2226, "step": 5785500 }, { "epoch": 16.75, "learning_rate": 4.1628995982308267e-05, "loss": 2.2396, "step": 5786000 }, { "epoch": 16.75, "learning_rate": 4.162827233466099e-05, "loss": 2.233, "step": 5786500 }, { "epoch": 16.75, "learning_rate": 4.162754868701371e-05, "loss": 2.2094, "step": 5787000 }, { "epoch": 16.75, "learning_rate": 4.1626826486661727e-05, "loss": 2.2519, "step": 5787500 }, { "epoch": 16.75, "learning_rate": 4.1626102839014456e-05, "loss": 2.254, "step": 5788000 }, { "epoch": 16.76, "learning_rate": 4.162537919136718e-05, "loss": 2.2462, "step": 5788500 }, { "epoch": 16.76, "learning_rate": 4.16246555437199e-05, "loss": 2.2503, "step": 5789000 }, { "epoch": 16.76, "learning_rate": 4.162393189607262e-05, "loss": 2.2252, "step": 5789500 }, { "epoch": 16.76, "learning_rate": 4.1623208248425345e-05, "loss": 2.2331, "step": 5790000 }, { "epoch": 16.76, "learning_rate": 4.162248604807336e-05, "loss": 2.2449, "step": 5790500 }, { "epoch": 16.76, "learning_rate": 4.162176240042608e-05, "loss": 2.2385, "step": 5791000 }, { "epoch": 16.76, "learning_rate": 4.1621038752778805e-05, "loss": 2.2394, "step": 5791500 }, { "epoch": 16.77, "learning_rate": 4.162031510513153e-05, "loss": 2.2381, "step": 5792000 }, { "epoch": 16.77, "learning_rate": 4.1619591457484256e-05, "loss": 2.2194, "step": 5792500 }, { "epoch": 16.77, "learning_rate": 4.161886780983698e-05, "loss": 2.2398, "step": 5793000 }, { "epoch": 16.77, "learning_rate": 4.161814416218971e-05, "loss": 2.254, "step": 5793500 }, { "epoch": 16.77, "learning_rate": 4.161742196183772e-05, "loss": 2.2258, "step": 5794000 }, { "epoch": 16.77, "learning_rate": 4.1616698314190445e-05, "loss": 2.2349, "step": 5794500 }, { "epoch": 16.77, "learning_rate": 4.161597466654317e-05, "loss": 2.2383, "step": 5795000 }, { "epoch": 16.78, "learning_rate": 4.161525101889589e-05, "loss": 2.2305, "step": 5795500 }, { "epoch": 16.78, "learning_rate": 4.161452737124861e-05, "loss": 2.2364, "step": 5796000 }, { "epoch": 16.78, "learning_rate": 4.1613803723601334e-05, "loss": 2.2383, "step": 5796500 }, { "epoch": 16.78, "learning_rate": 4.1613080075954056e-05, "loss": 2.2558, "step": 5797000 }, { "epoch": 16.78, "learning_rate": 4.161235787560208e-05, "loss": 2.2395, "step": 5797500 }, { "epoch": 16.78, "learning_rate": 4.16116342279548e-05, "loss": 2.2237, "step": 5798000 }, { "epoch": 16.78, "learning_rate": 4.161091058030752e-05, "loss": 2.2406, "step": 5798500 }, { "epoch": 16.79, "learning_rate": 4.1610186932660245e-05, "loss": 2.2415, "step": 5799000 }, { "epoch": 16.79, "learning_rate": 4.160946328501297e-05, "loss": 2.2545, "step": 5799500 }, { "epoch": 16.79, "learning_rate": 4.1608739637365696e-05, "loss": 2.2262, "step": 5800000 }, { "epoch": 16.79, "learning_rate": 4.160801598971842e-05, "loss": 2.2494, "step": 5800500 }, { "epoch": 16.79, "learning_rate": 4.160729234207114e-05, "loss": 2.2401, "step": 5801000 }, { "epoch": 16.79, "learning_rate": 4.160656869442386e-05, "loss": 2.2361, "step": 5801500 }, { "epoch": 16.79, "learning_rate": 4.1605845046776585e-05, "loss": 2.2487, "step": 5802000 }, { "epoch": 16.8, "learning_rate": 4.160512139912931e-05, "loss": 2.2181, "step": 5802500 }, { "epoch": 16.8, "learning_rate": 4.160439775148203e-05, "loss": 2.2287, "step": 5803000 }, { "epoch": 16.8, "learning_rate": 4.160367410383476e-05, "loss": 2.2133, "step": 5803500 }, { "epoch": 16.8, "learning_rate": 4.160295045618748e-05, "loss": 2.2393, "step": 5804000 }, { "epoch": 16.8, "learning_rate": 4.1602228255835496e-05, "loss": 2.2445, "step": 5804500 }, { "epoch": 16.8, "learning_rate": 4.160150460818822e-05, "loss": 2.2198, "step": 5805000 }, { "epoch": 16.8, "learning_rate": 4.160078096054094e-05, "loss": 2.2629, "step": 5805500 }, { "epoch": 16.81, "learning_rate": 4.160005731289366e-05, "loss": 2.2234, "step": 5806000 }, { "epoch": 16.81, "learning_rate": 4.1599333665246385e-05, "loss": 2.2328, "step": 5806500 }, { "epoch": 16.81, "learning_rate": 4.159861146489441e-05, "loss": 2.2305, "step": 5807000 }, { "epoch": 16.81, "learning_rate": 4.159788781724713e-05, "loss": 2.2328, "step": 5807500 }, { "epoch": 16.81, "learning_rate": 4.159716416959986e-05, "loss": 2.2135, "step": 5808000 }, { "epoch": 16.81, "learning_rate": 4.159644052195258e-05, "loss": 2.2485, "step": 5808500 }, { "epoch": 16.81, "learning_rate": 4.1595716874305303e-05, "loss": 2.2221, "step": 5809000 }, { "epoch": 16.82, "learning_rate": 4.159499467395332e-05, "loss": 2.235, "step": 5809500 }, { "epoch": 16.82, "learning_rate": 4.1594272473601335e-05, "loss": 2.2443, "step": 5810000 }, { "epoch": 16.82, "learning_rate": 4.159354882595406e-05, "loss": 2.2377, "step": 5810500 }, { "epoch": 16.82, "learning_rate": 4.159282517830678e-05, "loss": 2.2207, "step": 5811000 }, { "epoch": 16.82, "learning_rate": 4.159210153065951e-05, "loss": 2.2416, "step": 5811500 }, { "epoch": 16.82, "learning_rate": 4.159137788301223e-05, "loss": 2.2351, "step": 5812000 }, { "epoch": 16.82, "learning_rate": 4.159065423536495e-05, "loss": 2.2214, "step": 5812500 }, { "epoch": 16.83, "learning_rate": 4.1589930587717675e-05, "loss": 2.2409, "step": 5813000 }, { "epoch": 16.83, "learning_rate": 4.158920838736569e-05, "loss": 2.2329, "step": 5813500 }, { "epoch": 16.83, "learning_rate": 4.158848473971841e-05, "loss": 2.2352, "step": 5814000 }, { "epoch": 16.83, "learning_rate": 4.1587761092071135e-05, "loss": 2.224, "step": 5814500 }, { "epoch": 16.83, "learning_rate": 4.1587037444423864e-05, "loss": 2.2227, "step": 5815000 }, { "epoch": 16.83, "learning_rate": 4.1586313796776586e-05, "loss": 2.2679, "step": 5815500 }, { "epoch": 16.83, "learning_rate": 4.158559159642461e-05, "loss": 2.2454, "step": 5816000 }, { "epoch": 16.84, "learning_rate": 4.158486794877733e-05, "loss": 2.2545, "step": 5816500 }, { "epoch": 16.84, "learning_rate": 4.158414430113005e-05, "loss": 2.2233, "step": 5817000 }, { "epoch": 16.84, "learning_rate": 4.158342210077807e-05, "loss": 2.2413, "step": 5817500 }, { "epoch": 16.84, "learning_rate": 4.158269845313079e-05, "loss": 2.2272, "step": 5818000 }, { "epoch": 16.84, "learning_rate": 4.158197480548351e-05, "loss": 2.2422, "step": 5818500 }, { "epoch": 16.84, "learning_rate": 4.1581251157836235e-05, "loss": 2.2477, "step": 5819000 }, { "epoch": 16.85, "learning_rate": 4.158052751018896e-05, "loss": 2.225, "step": 5819500 }, { "epoch": 16.85, "learning_rate": 4.1579803862541686e-05, "loss": 2.2269, "step": 5820000 }, { "epoch": 16.85, "learning_rate": 4.15790816621897e-05, "loss": 2.2428, "step": 5820500 }, { "epoch": 16.85, "learning_rate": 4.1578358014542424e-05, "loss": 2.2336, "step": 5821000 }, { "epoch": 16.85, "learning_rate": 4.1577634366895146e-05, "loss": 2.2318, "step": 5821500 }, { "epoch": 16.85, "learning_rate": 4.157691071924787e-05, "loss": 2.2279, "step": 5822000 }, { "epoch": 16.85, "learning_rate": 4.15761870716006e-05, "loss": 2.2411, "step": 5822500 }, { "epoch": 16.86, "learning_rate": 4.157546342395332e-05, "loss": 2.2399, "step": 5823000 }, { "epoch": 16.86, "learning_rate": 4.157473977630604e-05, "loss": 2.2381, "step": 5823500 }, { "epoch": 16.86, "learning_rate": 4.1574016128658764e-05, "loss": 2.2219, "step": 5824000 }, { "epoch": 16.86, "learning_rate": 4.1573292481011486e-05, "loss": 2.2758, "step": 5824500 }, { "epoch": 16.86, "learning_rate": 4.157256883336421e-05, "loss": 2.2148, "step": 5825000 }, { "epoch": 16.86, "learning_rate": 4.157184518571694e-05, "loss": 2.2355, "step": 5825500 }, { "epoch": 16.86, "learning_rate": 4.157112153806966e-05, "loss": 2.2321, "step": 5826000 }, { "epoch": 16.87, "learning_rate": 4.157039789042238e-05, "loss": 2.2578, "step": 5826500 }, { "epoch": 16.87, "learning_rate": 4.1569674242775104e-05, "loss": 2.253, "step": 5827000 }, { "epoch": 16.87, "learning_rate": 4.156895204242312e-05, "loss": 2.2441, "step": 5827500 }, { "epoch": 16.87, "learning_rate": 4.1568229842071136e-05, "loss": 2.2211, "step": 5828000 }, { "epoch": 16.87, "learning_rate": 4.156750619442386e-05, "loss": 2.2403, "step": 5828500 }, { "epoch": 16.87, "learning_rate": 4.156678254677659e-05, "loss": 2.2379, "step": 5829000 }, { "epoch": 16.87, "learning_rate": 4.156605889912931e-05, "loss": 2.2092, "step": 5829500 }, { "epoch": 16.88, "learning_rate": 4.156533525148204e-05, "loss": 2.2415, "step": 5830000 }, { "epoch": 16.88, "learning_rate": 4.156461160383476e-05, "loss": 2.2189, "step": 5830500 }, { "epoch": 16.88, "learning_rate": 4.156388795618748e-05, "loss": 2.2667, "step": 5831000 }, { "epoch": 16.88, "learning_rate": 4.1563164308540205e-05, "loss": 2.2317, "step": 5831500 }, { "epoch": 16.88, "learning_rate": 4.156244066089293e-05, "loss": 2.2311, "step": 5832000 }, { "epoch": 16.88, "learning_rate": 4.156171846054094e-05, "loss": 2.2296, "step": 5832500 }, { "epoch": 16.88, "learning_rate": 4.1560994812893665e-05, "loss": 2.2225, "step": 5833000 }, { "epoch": 16.89, "learning_rate": 4.156027116524639e-05, "loss": 2.2423, "step": 5833500 }, { "epoch": 16.89, "learning_rate": 4.155954751759911e-05, "loss": 2.236, "step": 5834000 }, { "epoch": 16.89, "learning_rate": 4.155882531724713e-05, "loss": 2.2155, "step": 5834500 }, { "epoch": 16.89, "learning_rate": 4.1558101669599854e-05, "loss": 2.2543, "step": 5835000 }, { "epoch": 16.89, "learning_rate": 4.1557378021952576e-05, "loss": 2.2134, "step": 5835500 }, { "epoch": 16.89, "learning_rate": 4.155665582160059e-05, "loss": 2.2532, "step": 5836000 }, { "epoch": 16.89, "learning_rate": 4.1555932173953314e-05, "loss": 2.2303, "step": 5836500 }, { "epoch": 16.9, "learning_rate": 4.1555208526306036e-05, "loss": 2.2281, "step": 5837000 }, { "epoch": 16.9, "learning_rate": 4.1554484878658765e-05, "loss": 2.2308, "step": 5837500 }, { "epoch": 16.9, "learning_rate": 4.155376267830679e-05, "loss": 2.2503, "step": 5838000 }, { "epoch": 16.9, "learning_rate": 4.155303903065951e-05, "loss": 2.2142, "step": 5838500 }, { "epoch": 16.9, "learning_rate": 4.155231538301223e-05, "loss": 2.2159, "step": 5839000 }, { "epoch": 16.9, "learning_rate": 4.1551591735364954e-05, "loss": 2.2449, "step": 5839500 }, { "epoch": 16.9, "learning_rate": 4.1550868087717676e-05, "loss": 2.261, "step": 5840000 }, { "epoch": 16.91, "learning_rate": 4.15501444400704e-05, "loss": 2.2489, "step": 5840500 }, { "epoch": 16.91, "learning_rate": 4.154942079242312e-05, "loss": 2.2154, "step": 5841000 }, { "epoch": 16.91, "learning_rate": 4.1548698592071136e-05, "loss": 2.2447, "step": 5841500 }, { "epoch": 16.91, "learning_rate": 4.1547974944423865e-05, "loss": 2.2391, "step": 5842000 }, { "epoch": 16.91, "learning_rate": 4.154725129677659e-05, "loss": 2.2333, "step": 5842500 }, { "epoch": 16.91, "learning_rate": 4.154652764912931e-05, "loss": 2.2209, "step": 5843000 }, { "epoch": 16.91, "learning_rate": 4.154580400148203e-05, "loss": 2.2522, "step": 5843500 }, { "epoch": 16.92, "learning_rate": 4.154508180113005e-05, "loss": 2.2379, "step": 5844000 }, { "epoch": 16.92, "learning_rate": 4.154435815348277e-05, "loss": 2.222, "step": 5844500 }, { "epoch": 16.92, "learning_rate": 4.15436345058355e-05, "loss": 2.2342, "step": 5845000 }, { "epoch": 16.92, "learning_rate": 4.154291085818822e-05, "loss": 2.2183, "step": 5845500 }, { "epoch": 16.92, "learning_rate": 4.154218721054094e-05, "loss": 2.2462, "step": 5846000 }, { "epoch": 16.92, "learning_rate": 4.1541465010188966e-05, "loss": 2.222, "step": 5846500 }, { "epoch": 16.92, "learning_rate": 4.154074136254169e-05, "loss": 2.2497, "step": 5847000 }, { "epoch": 16.93, "learning_rate": 4.154001771489441e-05, "loss": 2.2047, "step": 5847500 }, { "epoch": 16.93, "learning_rate": 4.153929406724713e-05, "loss": 2.2544, "step": 5848000 }, { "epoch": 16.93, "learning_rate": 4.153857186689515e-05, "loss": 2.2127, "step": 5848500 }, { "epoch": 16.93, "learning_rate": 4.153784821924787e-05, "loss": 2.2232, "step": 5849000 }, { "epoch": 16.93, "learning_rate": 4.153712457160059e-05, "loss": 2.2222, "step": 5849500 }, { "epoch": 16.93, "learning_rate": 4.1536400923953315e-05, "loss": 2.2445, "step": 5850000 }, { "epoch": 16.93, "learning_rate": 4.153567727630604e-05, "loss": 2.2141, "step": 5850500 }, { "epoch": 16.94, "learning_rate": 4.1534953628658766e-05, "loss": 2.2186, "step": 5851000 }, { "epoch": 16.94, "learning_rate": 4.153422998101149e-05, "loss": 2.2291, "step": 5851500 }, { "epoch": 16.94, "learning_rate": 4.153350633336421e-05, "loss": 2.248, "step": 5852000 }, { "epoch": 16.94, "learning_rate": 4.153278268571694e-05, "loss": 2.236, "step": 5852500 }, { "epoch": 16.94, "learning_rate": 4.153205903806966e-05, "loss": 2.2105, "step": 5853000 }, { "epoch": 16.94, "learning_rate": 4.1531335390422384e-05, "loss": 2.2426, "step": 5853500 }, { "epoch": 16.94, "learning_rate": 4.1530611742775106e-05, "loss": 2.2113, "step": 5854000 }, { "epoch": 16.95, "learning_rate": 4.152988809512783e-05, "loss": 2.2641, "step": 5854500 }, { "epoch": 16.95, "learning_rate": 4.152916444748055e-05, "loss": 2.2335, "step": 5855000 }, { "epoch": 16.95, "learning_rate": 4.1528442247128566e-05, "loss": 2.2452, "step": 5855500 }, { "epoch": 16.95, "learning_rate": 4.152771859948129e-05, "loss": 2.2259, "step": 5856000 }, { "epoch": 16.95, "learning_rate": 4.152699639912931e-05, "loss": 2.2285, "step": 5856500 }, { "epoch": 16.95, "learning_rate": 4.152627275148203e-05, "loss": 2.2446, "step": 5857000 }, { "epoch": 16.96, "learning_rate": 4.152555055113005e-05, "loss": 2.2468, "step": 5857500 }, { "epoch": 16.96, "learning_rate": 4.152482690348277e-05, "loss": 2.2476, "step": 5858000 }, { "epoch": 16.96, "learning_rate": 4.152410325583549e-05, "loss": 2.2425, "step": 5858500 }, { "epoch": 16.96, "learning_rate": 4.1523379608188215e-05, "loss": 2.2513, "step": 5859000 }, { "epoch": 16.96, "learning_rate": 4.152265596054094e-05, "loss": 2.2321, "step": 5859500 }, { "epoch": 16.96, "learning_rate": 4.1521932312893666e-05, "loss": 2.2287, "step": 5860000 }, { "epoch": 16.96, "learning_rate": 4.152120866524639e-05, "loss": 2.2262, "step": 5860500 }, { "epoch": 16.97, "learning_rate": 4.152048501759912e-05, "loss": 2.2664, "step": 5861000 }, { "epoch": 16.97, "learning_rate": 4.151976136995184e-05, "loss": 2.2249, "step": 5861500 }, { "epoch": 16.97, "learning_rate": 4.151903772230456e-05, "loss": 2.222, "step": 5862000 }, { "epoch": 16.97, "learning_rate": 4.1518314074657284e-05, "loss": 2.2338, "step": 5862500 }, { "epoch": 16.97, "learning_rate": 4.15175918743053e-05, "loss": 2.2427, "step": 5863000 }, { "epoch": 16.97, "learning_rate": 4.151686822665802e-05, "loss": 2.2541, "step": 5863500 }, { "epoch": 16.97, "learning_rate": 4.1516144579010744e-05, "loss": 2.2571, "step": 5864000 }, { "epoch": 16.98, "learning_rate": 4.1515420931363467e-05, "loss": 2.233, "step": 5864500 }, { "epoch": 16.98, "learning_rate": 4.151469873101149e-05, "loss": 2.221, "step": 5865000 }, { "epoch": 16.98, "learning_rate": 4.151397508336421e-05, "loss": 2.2408, "step": 5865500 }, { "epoch": 16.98, "learning_rate": 4.151325143571693e-05, "loss": 2.2213, "step": 5866000 }, { "epoch": 16.98, "learning_rate": 4.1512527788069656e-05, "loss": 2.2391, "step": 5866500 }, { "epoch": 16.98, "learning_rate": 4.1511804140422385e-05, "loss": 2.2071, "step": 5867000 }, { "epoch": 16.98, "learning_rate": 4.15110819400704e-05, "loss": 2.2323, "step": 5867500 }, { "epoch": 16.99, "learning_rate": 4.1510359739718416e-05, "loss": 2.2054, "step": 5868000 }, { "epoch": 16.99, "learning_rate": 4.1509636092071145e-05, "loss": 2.2437, "step": 5868500 }, { "epoch": 16.99, "learning_rate": 4.150891244442387e-05, "loss": 2.2286, "step": 5869000 }, { "epoch": 16.99, "learning_rate": 4.150818879677659e-05, "loss": 2.2207, "step": 5869500 }, { "epoch": 16.99, "learning_rate": 4.150746514912931e-05, "loss": 2.2348, "step": 5870000 }, { "epoch": 16.99, "learning_rate": 4.1506741501482034e-05, "loss": 2.2055, "step": 5870500 }, { "epoch": 16.99, "learning_rate": 4.1506017853834756e-05, "loss": 2.2378, "step": 5871000 }, { "epoch": 17.0, "learning_rate": 4.150529420618748e-05, "loss": 2.2333, "step": 5871500 }, { "epoch": 17.0, "learning_rate": 4.1504572005835494e-05, "loss": 2.2233, "step": 5872000 }, { "epoch": 17.0, "learning_rate": 4.1503848358188216e-05, "loss": 2.2372, "step": 5872500 }, { "epoch": 17.0, "learning_rate": 4.1503124710540945e-05, "loss": 2.231, "step": 5873000 }, { "epoch": 17.0, "eval_accuracy": 0.6563087216406899, "eval_accuracy_mlm": 0.6194239431791272, "eval_accuracy_nsp": 0.8540443552267947, "eval_loss": 2.2509899139404297, "eval_runtime": 330.669, "eval_samples_per_second": 1319.707, "eval_steps_per_second": 54.989, "step": 5873024 }, { "epoch": 17.0, "learning_rate": 4.150240106289367e-05, "loss": 2.2432, "step": 5873500 }, { "epoch": 17.0, "learning_rate": 4.150167886254168e-05, "loss": 2.2048, "step": 5874000 }, { "epoch": 17.0, "learning_rate": 4.1500955214894405e-05, "loss": 2.2046, "step": 5874500 }, { "epoch": 17.01, "learning_rate": 4.1500231567247134e-05, "loss": 2.2007, "step": 5875000 }, { "epoch": 17.01, "learning_rate": 4.1499507919599856e-05, "loss": 2.1957, "step": 5875500 }, { "epoch": 17.01, "learning_rate": 4.149878427195258e-05, "loss": 2.2181, "step": 5876000 }, { "epoch": 17.01, "learning_rate": 4.14980606243053e-05, "loss": 2.2094, "step": 5876500 }, { "epoch": 17.01, "learning_rate": 4.149733697665802e-05, "loss": 2.2268, "step": 5877000 }, { "epoch": 17.01, "learning_rate": 4.1496613329010745e-05, "loss": 2.2206, "step": 5877500 }, { "epoch": 17.01, "learning_rate": 4.149588968136347e-05, "loss": 2.2188, "step": 5878000 }, { "epoch": 17.02, "learning_rate": 4.149516748101149e-05, "loss": 2.1926, "step": 5878500 }, { "epoch": 17.02, "learning_rate": 4.149444383336421e-05, "loss": 2.2231, "step": 5879000 }, { "epoch": 17.02, "learning_rate": 4.1493720185716934e-05, "loss": 2.2437, "step": 5879500 }, { "epoch": 17.02, "learning_rate": 4.1492996538069656e-05, "loss": 2.2163, "step": 5880000 }, { "epoch": 17.02, "learning_rate": 4.149227289042238e-05, "loss": 2.2195, "step": 5880500 }, { "epoch": 17.02, "learning_rate": 4.14915492427751e-05, "loss": 2.2178, "step": 5881000 }, { "epoch": 17.02, "learning_rate": 4.1490827042423116e-05, "loss": 2.2021, "step": 5881500 }, { "epoch": 17.03, "learning_rate": 4.1490103394775845e-05, "loss": 2.198, "step": 5882000 }, { "epoch": 17.03, "learning_rate": 4.148937974712857e-05, "loss": 2.2219, "step": 5882500 }, { "epoch": 17.03, "learning_rate": 4.14886560994813e-05, "loss": 2.2026, "step": 5883000 }, { "epoch": 17.03, "learning_rate": 4.148793245183402e-05, "loss": 2.1919, "step": 5883500 }, { "epoch": 17.03, "learning_rate": 4.148720880418674e-05, "loss": 2.2059, "step": 5884000 }, { "epoch": 17.03, "learning_rate": 4.148648515653946e-05, "loss": 2.2266, "step": 5884500 }, { "epoch": 17.03, "learning_rate": 4.1485761508892186e-05, "loss": 2.1901, "step": 5885000 }, { "epoch": 17.04, "learning_rate": 4.148503786124491e-05, "loss": 2.2351, "step": 5885500 }, { "epoch": 17.04, "learning_rate": 4.148431421359763e-05, "loss": 2.2107, "step": 5886000 }, { "epoch": 17.04, "learning_rate": 4.1483592013245646e-05, "loss": 2.1958, "step": 5886500 }, { "epoch": 17.04, "learning_rate": 4.148286836559837e-05, "loss": 2.2222, "step": 5887000 }, { "epoch": 17.04, "learning_rate": 4.14821447179511e-05, "loss": 2.2089, "step": 5887500 }, { "epoch": 17.04, "learning_rate": 4.148142107030382e-05, "loss": 2.2168, "step": 5888000 }, { "epoch": 17.04, "learning_rate": 4.148069742265654e-05, "loss": 2.2349, "step": 5888500 }, { "epoch": 17.05, "learning_rate": 4.147997522230456e-05, "loss": 2.2073, "step": 5889000 }, { "epoch": 17.05, "learning_rate": 4.1479251574657286e-05, "loss": 2.2031, "step": 5889500 }, { "epoch": 17.05, "learning_rate": 4.147852792701001e-05, "loss": 2.2241, "step": 5890000 }, { "epoch": 17.05, "learning_rate": 4.147780427936273e-05, "loss": 2.2123, "step": 5890500 }, { "epoch": 17.05, "learning_rate": 4.1477082079010746e-05, "loss": 2.2164, "step": 5891000 }, { "epoch": 17.05, "learning_rate": 4.147635843136347e-05, "loss": 2.215, "step": 5891500 }, { "epoch": 17.05, "learning_rate": 4.14756347837162e-05, "loss": 2.2231, "step": 5892000 }, { "epoch": 17.06, "learning_rate": 4.147491258336421e-05, "loss": 2.2469, "step": 5892500 }, { "epoch": 17.06, "learning_rate": 4.1474188935716935e-05, "loss": 2.2116, "step": 5893000 }, { "epoch": 17.06, "learning_rate": 4.147346528806966e-05, "loss": 2.1982, "step": 5893500 }, { "epoch": 17.06, "learning_rate": 4.147274164042238e-05, "loss": 2.2192, "step": 5894000 }, { "epoch": 17.06, "learning_rate": 4.14720179927751e-05, "loss": 2.2086, "step": 5894500 }, { "epoch": 17.06, "learning_rate": 4.147129723971842e-05, "loss": 2.2327, "step": 5895000 }, { "epoch": 17.07, "learning_rate": 4.147057359207114e-05, "loss": 2.223, "step": 5895500 }, { "epoch": 17.07, "learning_rate": 4.146984994442386e-05, "loss": 2.2192, "step": 5896000 }, { "epoch": 17.07, "learning_rate": 4.1469126296776584e-05, "loss": 2.2157, "step": 5896500 }, { "epoch": 17.07, "learning_rate": 4.1468402649129306e-05, "loss": 2.2177, "step": 5897000 }, { "epoch": 17.07, "learning_rate": 4.1467679001482035e-05, "loss": 2.2244, "step": 5897500 }, { "epoch": 17.07, "learning_rate": 4.146695535383476e-05, "loss": 2.2085, "step": 5898000 }, { "epoch": 17.07, "learning_rate": 4.146623170618748e-05, "loss": 2.2109, "step": 5898500 }, { "epoch": 17.08, "learning_rate": 4.14655080585402e-05, "loss": 2.2346, "step": 5899000 }, { "epoch": 17.08, "learning_rate": 4.1464785858188224e-05, "loss": 2.2155, "step": 5899500 }, { "epoch": 17.08, "learning_rate": 4.1464062210540946e-05, "loss": 2.2064, "step": 5900000 }, { "epoch": 17.08, "learning_rate": 4.146333856289367e-05, "loss": 2.2143, "step": 5900500 }, { "epoch": 17.08, "learning_rate": 4.146261491524639e-05, "loss": 2.2289, "step": 5901000 }, { "epoch": 17.08, "learning_rate": 4.1461892714894407e-05, "loss": 2.2226, "step": 5901500 }, { "epoch": 17.08, "learning_rate": 4.146116906724713e-05, "loss": 2.2197, "step": 5902000 }, { "epoch": 17.09, "learning_rate": 4.1460446866895144e-05, "loss": 2.2094, "step": 5902500 }, { "epoch": 17.09, "learning_rate": 4.145972466654317e-05, "loss": 2.2335, "step": 5903000 }, { "epoch": 17.09, "learning_rate": 4.145900101889589e-05, "loss": 2.2047, "step": 5903500 }, { "epoch": 17.09, "learning_rate": 4.1458278818543904e-05, "loss": 2.2183, "step": 5904000 }, { "epoch": 17.09, "learning_rate": 4.145755517089663e-05, "loss": 2.219, "step": 5904500 }, { "epoch": 17.09, "learning_rate": 4.145683152324935e-05, "loss": 2.2002, "step": 5905000 }, { "epoch": 17.09, "learning_rate": 4.145610787560208e-05, "loss": 2.2295, "step": 5905500 }, { "epoch": 17.1, "learning_rate": 4.14553842279548e-05, "loss": 2.2174, "step": 5906000 }, { "epoch": 17.1, "learning_rate": 4.145466058030752e-05, "loss": 2.2399, "step": 5906500 }, { "epoch": 17.1, "learning_rate": 4.1453936932660245e-05, "loss": 2.207, "step": 5907000 }, { "epoch": 17.1, "learning_rate": 4.1453213285012974e-05, "loss": 2.2205, "step": 5907500 }, { "epoch": 17.1, "learning_rate": 4.1452489637365696e-05, "loss": 2.223, "step": 5908000 }, { "epoch": 17.1, "learning_rate": 4.145176598971842e-05, "loss": 2.1971, "step": 5908500 }, { "epoch": 17.1, "learning_rate": 4.145104234207114e-05, "loss": 2.215, "step": 5909000 }, { "epoch": 17.11, "learning_rate": 4.145031869442386e-05, "loss": 2.2072, "step": 5909500 }, { "epoch": 17.11, "learning_rate": 4.1449595046776585e-05, "loss": 2.2047, "step": 5910000 }, { "epoch": 17.11, "learning_rate": 4.144887139912931e-05, "loss": 2.2288, "step": 5910500 }, { "epoch": 17.11, "learning_rate": 4.144814775148203e-05, "loss": 2.2494, "step": 5911000 }, { "epoch": 17.11, "learning_rate": 4.144742410383475e-05, "loss": 2.2137, "step": 5911500 }, { "epoch": 17.11, "learning_rate": 4.1446700456187474e-05, "loss": 2.2265, "step": 5912000 }, { "epoch": 17.11, "learning_rate": 4.14459768085402e-05, "loss": 2.2262, "step": 5912500 }, { "epoch": 17.12, "learning_rate": 4.1445253160892925e-05, "loss": 2.2159, "step": 5913000 }, { "epoch": 17.12, "learning_rate": 4.144452951324565e-05, "loss": 2.2215, "step": 5913500 }, { "epoch": 17.12, "learning_rate": 4.1443805865598376e-05, "loss": 2.2228, "step": 5914000 }, { "epoch": 17.12, "learning_rate": 4.14430822179511e-05, "loss": 2.2173, "step": 5914500 }, { "epoch": 17.12, "learning_rate": 4.144235857030382e-05, "loss": 2.1959, "step": 5915000 }, { "epoch": 17.12, "learning_rate": 4.144163492265654e-05, "loss": 2.2273, "step": 5915500 }, { "epoch": 17.12, "learning_rate": 4.1440911275009265e-05, "loss": 2.2099, "step": 5916000 }, { "epoch": 17.13, "learning_rate": 4.144018762736199e-05, "loss": 2.2272, "step": 5916500 }, { "epoch": 17.13, "learning_rate": 4.143946397971471e-05, "loss": 2.2301, "step": 5917000 }, { "epoch": 17.13, "learning_rate": 4.143874033206743e-05, "loss": 2.2488, "step": 5917500 }, { "epoch": 17.13, "learning_rate": 4.1438016684420154e-05, "loss": 2.2151, "step": 5918000 }, { "epoch": 17.13, "learning_rate": 4.1437293036772876e-05, "loss": 2.2305, "step": 5918500 }, { "epoch": 17.13, "learning_rate": 4.14365708364209e-05, "loss": 2.2166, "step": 5919000 }, { "epoch": 17.13, "learning_rate": 4.143585008336421e-05, "loss": 2.2412, "step": 5919500 }, { "epoch": 17.14, "learning_rate": 4.1435126435716937e-05, "loss": 2.2195, "step": 5920000 }, { "epoch": 17.14, "learning_rate": 4.143440278806966e-05, "loss": 2.2218, "step": 5920500 }, { "epoch": 17.14, "learning_rate": 4.1433680587717674e-05, "loss": 2.2022, "step": 5921000 }, { "epoch": 17.14, "learning_rate": 4.1432956940070397e-05, "loss": 2.2195, "step": 5921500 }, { "epoch": 17.14, "learning_rate": 4.1432233292423126e-05, "loss": 2.207, "step": 5922000 }, { "epoch": 17.14, "learning_rate": 4.143150964477585e-05, "loss": 2.2218, "step": 5922500 }, { "epoch": 17.14, "learning_rate": 4.143078599712857e-05, "loss": 2.2182, "step": 5923000 }, { "epoch": 17.15, "learning_rate": 4.143006234948129e-05, "loss": 2.2339, "step": 5923500 }, { "epoch": 17.15, "learning_rate": 4.1429338701834014e-05, "loss": 2.2244, "step": 5924000 }, { "epoch": 17.15, "learning_rate": 4.142861505418674e-05, "loss": 2.1978, "step": 5924500 }, { "epoch": 17.15, "learning_rate": 4.142789140653946e-05, "loss": 2.2086, "step": 5925000 }, { "epoch": 17.15, "learning_rate": 4.142716775889218e-05, "loss": 2.2234, "step": 5925500 }, { "epoch": 17.15, "learning_rate": 4.1426444111244903e-05, "loss": 2.23, "step": 5926000 }, { "epoch": 17.15, "learning_rate": 4.1425720463597626e-05, "loss": 2.205, "step": 5926500 }, { "epoch": 17.16, "learning_rate": 4.1424996815950355e-05, "loss": 2.2129, "step": 5927000 }, { "epoch": 17.16, "learning_rate": 4.142427316830308e-05, "loss": 2.2143, "step": 5927500 }, { "epoch": 17.16, "learning_rate": 4.14235509679511e-05, "loss": 2.2141, "step": 5928000 }, { "epoch": 17.16, "learning_rate": 4.142282732030382e-05, "loss": 2.2295, "step": 5928500 }, { "epoch": 17.16, "learning_rate": 4.142210511995184e-05, "loss": 2.2348, "step": 5929000 }, { "epoch": 17.16, "learning_rate": 4.142138147230456e-05, "loss": 2.2171, "step": 5929500 }, { "epoch": 17.16, "learning_rate": 4.142065782465728e-05, "loss": 2.1984, "step": 5930000 }, { "epoch": 17.17, "learning_rate": 4.1419934177010004e-05, "loss": 2.2053, "step": 5930500 }, { "epoch": 17.17, "learning_rate": 4.1419210529362726e-05, "loss": 2.2126, "step": 5931000 }, { "epoch": 17.17, "learning_rate": 4.141848688171545e-05, "loss": 2.2197, "step": 5931500 }, { "epoch": 17.17, "learning_rate": 4.141776323406818e-05, "loss": 2.2128, "step": 5932000 }, { "epoch": 17.17, "learning_rate": 4.14170395864209e-05, "loss": 2.2148, "step": 5932500 }, { "epoch": 17.17, "learning_rate": 4.141631593877362e-05, "loss": 2.2113, "step": 5933000 }, { "epoch": 17.18, "learning_rate": 4.141559518571693e-05, "loss": 2.218, "step": 5933500 }, { "epoch": 17.18, "learning_rate": 4.141487153806965e-05, "loss": 2.2406, "step": 5934000 }, { "epoch": 17.18, "learning_rate": 4.141414789042238e-05, "loss": 2.2155, "step": 5934500 }, { "epoch": 17.18, "learning_rate": 4.1413424242775104e-05, "loss": 2.209, "step": 5935000 }, { "epoch": 17.18, "learning_rate": 4.1412700595127826e-05, "loss": 2.2397, "step": 5935500 }, { "epoch": 17.18, "learning_rate": 4.1411976947480555e-05, "loss": 2.2202, "step": 5936000 }, { "epoch": 17.18, "learning_rate": 4.141125329983328e-05, "loss": 2.2082, "step": 5936500 }, { "epoch": 17.19, "learning_rate": 4.1410529652186e-05, "loss": 2.1924, "step": 5937000 }, { "epoch": 17.19, "learning_rate": 4.140980600453872e-05, "loss": 2.1974, "step": 5937500 }, { "epoch": 17.19, "learning_rate": 4.1409082356891444e-05, "loss": 2.239, "step": 5938000 }, { "epoch": 17.19, "learning_rate": 4.140836015653946e-05, "loss": 2.1973, "step": 5938500 }, { "epoch": 17.19, "learning_rate": 4.140763650889218e-05, "loss": 2.2194, "step": 5939000 }, { "epoch": 17.19, "learning_rate": 4.1406912861244904e-05, "loss": 2.2259, "step": 5939500 }, { "epoch": 17.19, "learning_rate": 4.1406189213597626e-05, "loss": 2.2389, "step": 5940000 }, { "epoch": 17.2, "learning_rate": 4.140546701324565e-05, "loss": 2.2197, "step": 5940500 }, { "epoch": 17.2, "learning_rate": 4.140474336559837e-05, "loss": 2.2306, "step": 5941000 }, { "epoch": 17.2, "learning_rate": 4.140401971795109e-05, "loss": 2.2155, "step": 5941500 }, { "epoch": 17.2, "learning_rate": 4.140329751759911e-05, "loss": 2.2033, "step": 5942000 }, { "epoch": 17.2, "learning_rate": 4.140257386995184e-05, "loss": 2.2075, "step": 5942500 }, { "epoch": 17.2, "learning_rate": 4.140185022230456e-05, "loss": 2.1984, "step": 5943000 }, { "epoch": 17.2, "learning_rate": 4.140112657465728e-05, "loss": 2.1966, "step": 5943500 }, { "epoch": 17.21, "learning_rate": 4.1400402927010005e-05, "loss": 2.2212, "step": 5944000 }, { "epoch": 17.21, "learning_rate": 4.139967927936273e-05, "loss": 2.2123, "step": 5944500 }, { "epoch": 17.21, "learning_rate": 4.1398955631715456e-05, "loss": 2.2164, "step": 5945000 }, { "epoch": 17.21, "learning_rate": 4.139823198406818e-05, "loss": 2.2303, "step": 5945500 }, { "epoch": 17.21, "learning_rate": 4.13975083364209e-05, "loss": 2.2319, "step": 5946000 }, { "epoch": 17.21, "learning_rate": 4.139678468877362e-05, "loss": 2.1922, "step": 5946500 }, { "epoch": 17.21, "learning_rate": 4.139606248842164e-05, "loss": 2.2289, "step": 5947000 }, { "epoch": 17.22, "learning_rate": 4.139533884077436e-05, "loss": 2.2193, "step": 5947500 }, { "epoch": 17.22, "learning_rate": 4.139461519312708e-05, "loss": 2.2099, "step": 5948000 }, { "epoch": 17.22, "learning_rate": 4.1393891545479805e-05, "loss": 2.2326, "step": 5948500 }, { "epoch": 17.22, "learning_rate": 4.139316789783253e-05, "loss": 2.2233, "step": 5949000 }, { "epoch": 17.22, "learning_rate": 4.1392444250185256e-05, "loss": 2.2409, "step": 5949500 }, { "epoch": 17.22, "learning_rate": 4.139172060253798e-05, "loss": 2.2376, "step": 5950000 }, { "epoch": 17.22, "learning_rate": 4.139099695489071e-05, "loss": 2.1995, "step": 5950500 }, { "epoch": 17.23, "learning_rate": 4.139027475453872e-05, "loss": 2.2243, "step": 5951000 }, { "epoch": 17.23, "learning_rate": 4.1389551106891445e-05, "loss": 2.2207, "step": 5951500 }, { "epoch": 17.23, "learning_rate": 4.138882745924417e-05, "loss": 2.2066, "step": 5952000 }, { "epoch": 17.23, "learning_rate": 4.138810381159689e-05, "loss": 2.2117, "step": 5952500 }, { "epoch": 17.23, "learning_rate": 4.138738016394961e-05, "loss": 2.2007, "step": 5953000 }, { "epoch": 17.23, "learning_rate": 4.1386656516302334e-05, "loss": 2.229, "step": 5953500 }, { "epoch": 17.23, "learning_rate": 4.1385932868655056e-05, "loss": 2.1929, "step": 5954000 }, { "epoch": 17.24, "learning_rate": 4.138520922100778e-05, "loss": 2.2269, "step": 5954500 }, { "epoch": 17.24, "learning_rate": 4.1384488467951094e-05, "loss": 2.2037, "step": 5955000 }, { "epoch": 17.24, "learning_rate": 4.1383764820303816e-05, "loss": 2.2114, "step": 5955500 }, { "epoch": 17.24, "learning_rate": 4.138304117265654e-05, "loss": 2.2164, "step": 5956000 }, { "epoch": 17.24, "learning_rate": 4.138231752500926e-05, "loss": 2.2139, "step": 5956500 }, { "epoch": 17.24, "learning_rate": 4.138159532465728e-05, "loss": 2.2319, "step": 5957000 }, { "epoch": 17.24, "learning_rate": 4.1380871677010005e-05, "loss": 2.2393, "step": 5957500 }, { "epoch": 17.25, "learning_rate": 4.138014802936273e-05, "loss": 2.2116, "step": 5958000 }, { "epoch": 17.25, "learning_rate": 4.1379424381715457e-05, "loss": 2.1985, "step": 5958500 }, { "epoch": 17.25, "learning_rate": 4.137870073406818e-05, "loss": 2.2126, "step": 5959000 }, { "epoch": 17.25, "learning_rate": 4.1377978533716194e-05, "loss": 2.2138, "step": 5959500 }, { "epoch": 17.25, "learning_rate": 4.1377254886068917e-05, "loss": 2.2288, "step": 5960000 }, { "epoch": 17.25, "learning_rate": 4.137653268571693e-05, "loss": 2.1827, "step": 5960500 }, { "epoch": 17.25, "learning_rate": 4.1375809038069654e-05, "loss": 2.2089, "step": 5961000 }, { "epoch": 17.26, "learning_rate": 4.137508539042238e-05, "loss": 2.2098, "step": 5961500 }, { "epoch": 17.26, "learning_rate": 4.1374361742775106e-05, "loss": 2.2312, "step": 5962000 }, { "epoch": 17.26, "learning_rate": 4.137363809512783e-05, "loss": 2.2421, "step": 5962500 }, { "epoch": 17.26, "learning_rate": 4.137291444748055e-05, "loss": 2.2033, "step": 5963000 }, { "epoch": 17.26, "learning_rate": 4.137219079983327e-05, "loss": 2.1985, "step": 5963500 }, { "epoch": 17.26, "learning_rate": 4.1371467152185995e-05, "loss": 2.2188, "step": 5964000 }, { "epoch": 17.26, "learning_rate": 4.1370743504538724e-05, "loss": 2.2023, "step": 5964500 }, { "epoch": 17.27, "learning_rate": 4.1370019856891446e-05, "loss": 2.2072, "step": 5965000 }, { "epoch": 17.27, "learning_rate": 4.136929765653946e-05, "loss": 2.206, "step": 5965500 }, { "epoch": 17.27, "learning_rate": 4.1368574008892184e-05, "loss": 2.2288, "step": 5966000 }, { "epoch": 17.27, "learning_rate": 4.1367851808540206e-05, "loss": 2.2124, "step": 5966500 }, { "epoch": 17.27, "learning_rate": 4.136712816089293e-05, "loss": 2.2302, "step": 5967000 }, { "epoch": 17.27, "learning_rate": 4.136640451324565e-05, "loss": 2.2224, "step": 5967500 }, { "epoch": 17.27, "learning_rate": 4.136568086559837e-05, "loss": 2.2095, "step": 5968000 }, { "epoch": 17.28, "learning_rate": 4.136495866524639e-05, "loss": 2.2347, "step": 5968500 }, { "epoch": 17.28, "learning_rate": 4.136423501759911e-05, "loss": 2.2154, "step": 5969000 }, { "epoch": 17.28, "learning_rate": 4.136351136995183e-05, "loss": 2.2199, "step": 5969500 }, { "epoch": 17.28, "learning_rate": 4.1362787722304555e-05, "loss": 2.2188, "step": 5970000 }, { "epoch": 17.28, "learning_rate": 4.136206552195258e-05, "loss": 2.177, "step": 5970500 }, { "epoch": 17.28, "learning_rate": 4.13613418743053e-05, "loss": 2.2213, "step": 5971000 }, { "epoch": 17.29, "learning_rate": 4.136061822665802e-05, "loss": 2.2215, "step": 5971500 }, { "epoch": 17.29, "learning_rate": 4.1359894579010744e-05, "loss": 2.2378, "step": 5972000 }, { "epoch": 17.29, "learning_rate": 4.1359172378658766e-05, "loss": 2.2098, "step": 5972500 }, { "epoch": 17.29, "learning_rate": 4.135844873101149e-05, "loss": 2.2346, "step": 5973000 }, { "epoch": 17.29, "learning_rate": 4.135772508336421e-05, "loss": 2.2305, "step": 5973500 }, { "epoch": 17.29, "learning_rate": 4.135700143571693e-05, "loss": 2.2048, "step": 5974000 }, { "epoch": 17.29, "learning_rate": 4.1356277788069655e-05, "loss": 2.2146, "step": 5974500 }, { "epoch": 17.3, "learning_rate": 4.1355554140422384e-05, "loss": 2.2224, "step": 5975000 }, { "epoch": 17.3, "learning_rate": 4.1354830492775106e-05, "loss": 2.2176, "step": 5975500 }, { "epoch": 17.3, "learning_rate": 4.135410684512783e-05, "loss": 2.2196, "step": 5976000 }, { "epoch": 17.3, "learning_rate": 4.135338319748055e-05, "loss": 2.1781, "step": 5976500 }, { "epoch": 17.3, "learning_rate": 4.135265954983327e-05, "loss": 2.2166, "step": 5977000 }, { "epoch": 17.3, "learning_rate": 4.1351935902185995e-05, "loss": 2.2151, "step": 5977500 }, { "epoch": 17.3, "learning_rate": 4.135121225453872e-05, "loss": 2.2095, "step": 5978000 }, { "epoch": 17.31, "learning_rate": 4.135049005418673e-05, "loss": 2.226, "step": 5978500 }, { "epoch": 17.31, "learning_rate": 4.1349766406539455e-05, "loss": 2.2153, "step": 5979000 }, { "epoch": 17.31, "learning_rate": 4.1349042758892184e-05, "loss": 2.2189, "step": 5979500 }, { "epoch": 17.31, "learning_rate": 4.134832055854021e-05, "loss": 2.1914, "step": 5980000 }, { "epoch": 17.31, "learning_rate": 4.134759691089293e-05, "loss": 2.2224, "step": 5980500 }, { "epoch": 17.31, "learning_rate": 4.134687326324565e-05, "loss": 2.2207, "step": 5981000 }, { "epoch": 17.31, "learning_rate": 4.1346149615598373e-05, "loss": 2.242, "step": 5981500 }, { "epoch": 17.32, "learning_rate": 4.1345425967951096e-05, "loss": 2.2178, "step": 5982000 }, { "epoch": 17.32, "learning_rate": 4.134470232030382e-05, "loss": 2.2047, "step": 5982500 }, { "epoch": 17.32, "learning_rate": 4.134397867265654e-05, "loss": 2.2049, "step": 5983000 }, { "epoch": 17.32, "learning_rate": 4.134325502500926e-05, "loss": 2.2196, "step": 5983500 }, { "epoch": 17.32, "learning_rate": 4.1342531377361985e-05, "loss": 2.2375, "step": 5984000 }, { "epoch": 17.32, "learning_rate": 4.134180772971471e-05, "loss": 2.2045, "step": 5984500 }, { "epoch": 17.32, "learning_rate": 4.1341084082067436e-05, "loss": 2.2109, "step": 5985000 }, { "epoch": 17.33, "learning_rate": 4.134036043442016e-05, "loss": 2.2308, "step": 5985500 }, { "epoch": 17.33, "learning_rate": 4.133963678677288e-05, "loss": 2.2214, "step": 5986000 }, { "epoch": 17.33, "learning_rate": 4.133891313912561e-05, "loss": 2.2142, "step": 5986500 }, { "epoch": 17.33, "learning_rate": 4.133818949147833e-05, "loss": 2.2088, "step": 5987000 }, { "epoch": 17.33, "learning_rate": 4.133746729112635e-05, "loss": 2.2259, "step": 5987500 }, { "epoch": 17.33, "learning_rate": 4.133674364347907e-05, "loss": 2.2281, "step": 5988000 }, { "epoch": 17.33, "learning_rate": 4.133601999583179e-05, "loss": 2.2221, "step": 5988500 }, { "epoch": 17.34, "learning_rate": 4.1335296348184514e-05, "loss": 2.2335, "step": 5989000 }, { "epoch": 17.34, "learning_rate": 4.1334572700537236e-05, "loss": 2.2249, "step": 5989500 }, { "epoch": 17.34, "learning_rate": 4.133384905288996e-05, "loss": 2.2307, "step": 5990000 }, { "epoch": 17.34, "learning_rate": 4.133312685253798e-05, "loss": 2.2204, "step": 5990500 }, { "epoch": 17.34, "learning_rate": 4.13324032048907e-05, "loss": 2.2362, "step": 5991000 }, { "epoch": 17.34, "learning_rate": 4.1331679557243425e-05, "loss": 2.2133, "step": 5991500 }, { "epoch": 17.34, "learning_rate": 4.133095590959615e-05, "loss": 2.225, "step": 5992000 }, { "epoch": 17.35, "learning_rate": 4.133023226194887e-05, "loss": 2.2128, "step": 5992500 }, { "epoch": 17.35, "learning_rate": 4.132950861430159e-05, "loss": 2.2247, "step": 5993000 }, { "epoch": 17.35, "learning_rate": 4.1328784966654314e-05, "loss": 2.2085, "step": 5993500 }, { "epoch": 17.35, "learning_rate": 4.132806131900704e-05, "loss": 2.2409, "step": 5994000 }, { "epoch": 17.35, "learning_rate": 4.1327337671359765e-05, "loss": 2.2208, "step": 5994500 }, { "epoch": 17.35, "learning_rate": 4.132661402371249e-05, "loss": 2.2177, "step": 5995000 }, { "epoch": 17.35, "learning_rate": 4.132589037606521e-05, "loss": 2.2187, "step": 5995500 }, { "epoch": 17.36, "learning_rate": 4.132516817571323e-05, "loss": 2.2128, "step": 5996000 }, { "epoch": 17.36, "learning_rate": 4.132444597536125e-05, "loss": 2.2088, "step": 5996500 }, { "epoch": 17.36, "learning_rate": 4.132372232771397e-05, "loss": 2.1993, "step": 5997000 }, { "epoch": 17.36, "learning_rate": 4.132299868006669e-05, "loss": 2.2243, "step": 5997500 }, { "epoch": 17.36, "learning_rate": 4.1322275032419414e-05, "loss": 2.2051, "step": 5998000 }, { "epoch": 17.36, "learning_rate": 4.1321551384772137e-05, "loss": 2.2305, "step": 5998500 }, { "epoch": 17.36, "learning_rate": 4.132082918442016e-05, "loss": 2.2103, "step": 5999000 }, { "epoch": 17.37, "learning_rate": 4.132010553677288e-05, "loss": 2.2432, "step": 5999500 }, { "epoch": 17.37, "learning_rate": 4.13193818891256e-05, "loss": 2.2062, "step": 6000000 }, { "epoch": 17.37, "learning_rate": 4.131865968877362e-05, "loss": 2.2226, "step": 6000500 }, { "epoch": 17.37, "learning_rate": 4.1317937488421634e-05, "loss": 2.231, "step": 6001000 }, { "epoch": 17.37, "learning_rate": 4.1317213840774363e-05, "loss": 2.2241, "step": 6001500 }, { "epoch": 17.37, "learning_rate": 4.1316490193127086e-05, "loss": 2.2295, "step": 6002000 }, { "epoch": 17.37, "learning_rate": 4.1315766545479815e-05, "loss": 2.2568, "step": 6002500 }, { "epoch": 17.38, "learning_rate": 4.131504289783254e-05, "loss": 2.1964, "step": 6003000 }, { "epoch": 17.38, "learning_rate": 4.131431925018526e-05, "loss": 2.2464, "step": 6003500 }, { "epoch": 17.38, "learning_rate": 4.131359560253798e-05, "loss": 2.2329, "step": 6004000 }, { "epoch": 17.38, "learning_rate": 4.1312871954890704e-05, "loss": 2.2213, "step": 6004500 }, { "epoch": 17.38, "learning_rate": 4.1312148307243426e-05, "loss": 2.2195, "step": 6005000 }, { "epoch": 17.38, "learning_rate": 4.131142465959615e-05, "loss": 2.2116, "step": 6005500 }, { "epoch": 17.38, "learning_rate": 4.131070101194887e-05, "loss": 2.2256, "step": 6006000 }, { "epoch": 17.39, "learning_rate": 4.130997736430159e-05, "loss": 2.2202, "step": 6006500 }, { "epoch": 17.39, "learning_rate": 4.1309253716654315e-05, "loss": 2.2189, "step": 6007000 }, { "epoch": 17.39, "learning_rate": 4.130853006900704e-05, "loss": 2.2343, "step": 6007500 }, { "epoch": 17.39, "learning_rate": 4.1307806421359766e-05, "loss": 2.2334, "step": 6008000 }, { "epoch": 17.39, "learning_rate": 4.130708277371249e-05, "loss": 2.2242, "step": 6008500 }, { "epoch": 17.39, "learning_rate": 4.130635912606521e-05, "loss": 2.2384, "step": 6009000 }, { "epoch": 17.4, "learning_rate": 4.130563692571323e-05, "loss": 2.2254, "step": 6009500 }, { "epoch": 17.4, "learning_rate": 4.130491472536125e-05, "loss": 2.2237, "step": 6010000 }, { "epoch": 17.4, "learning_rate": 4.130419107771397e-05, "loss": 2.2459, "step": 6010500 }, { "epoch": 17.4, "learning_rate": 4.130346743006669e-05, "loss": 2.1959, "step": 6011000 }, { "epoch": 17.4, "learning_rate": 4.1302743782419415e-05, "loss": 2.227, "step": 6011500 }, { "epoch": 17.4, "learning_rate": 4.130202013477214e-05, "loss": 2.2292, "step": 6012000 }, { "epoch": 17.4, "learning_rate": 4.130129938171545e-05, "loss": 2.2362, "step": 6012500 }, { "epoch": 17.41, "learning_rate": 4.1300575734068175e-05, "loss": 2.2331, "step": 6013000 }, { "epoch": 17.41, "learning_rate": 4.12998520864209e-05, "loss": 2.1948, "step": 6013500 }, { "epoch": 17.41, "learning_rate": 4.129912843877362e-05, "loss": 2.2116, "step": 6014000 }, { "epoch": 17.41, "learning_rate": 4.129840479112634e-05, "loss": 2.213, "step": 6014500 }, { "epoch": 17.41, "learning_rate": 4.1297682590774364e-05, "loss": 2.2589, "step": 6015000 }, { "epoch": 17.41, "learning_rate": 4.1296958943127086e-05, "loss": 2.2063, "step": 6015500 }, { "epoch": 17.41, "learning_rate": 4.129623529547981e-05, "loss": 2.2267, "step": 6016000 }, { "epoch": 17.42, "learning_rate": 4.129551164783253e-05, "loss": 2.2402, "step": 6016500 }, { "epoch": 17.42, "learning_rate": 4.129478800018526e-05, "loss": 2.2364, "step": 6017000 }, { "epoch": 17.42, "learning_rate": 4.129406435253798e-05, "loss": 2.2024, "step": 6017500 }, { "epoch": 17.42, "learning_rate": 4.1293340704890704e-05, "loss": 2.2027, "step": 6018000 }, { "epoch": 17.42, "learning_rate": 4.129261705724343e-05, "loss": 2.2173, "step": 6018500 }, { "epoch": 17.42, "learning_rate": 4.129189340959615e-05, "loss": 2.2015, "step": 6019000 }, { "epoch": 17.42, "learning_rate": 4.129116976194887e-05, "loss": 2.2159, "step": 6019500 }, { "epoch": 17.43, "learning_rate": 4.129044611430159e-05, "loss": 2.2043, "step": 6020000 }, { "epoch": 17.43, "learning_rate": 4.1289722466654316e-05, "loss": 2.2265, "step": 6020500 }, { "epoch": 17.43, "learning_rate": 4.128899881900704e-05, "loss": 2.2361, "step": 6021000 }, { "epoch": 17.43, "learning_rate": 4.128827661865506e-05, "loss": 2.2174, "step": 6021500 }, { "epoch": 17.43, "learning_rate": 4.128755297100778e-05, "loss": 2.2198, "step": 6022000 }, { "epoch": 17.43, "learning_rate": 4.12868307706558e-05, "loss": 2.2458, "step": 6022500 }, { "epoch": 17.43, "learning_rate": 4.128610712300852e-05, "loss": 2.2296, "step": 6023000 }, { "epoch": 17.44, "learning_rate": 4.128538347536124e-05, "loss": 2.2208, "step": 6023500 }, { "epoch": 17.44, "learning_rate": 4.1284659827713965e-05, "loss": 2.2054, "step": 6024000 }, { "epoch": 17.44, "learning_rate": 4.1283936180066694e-05, "loss": 2.2295, "step": 6024500 }, { "epoch": 17.44, "learning_rate": 4.1283212532419416e-05, "loss": 2.2287, "step": 6025000 }, { "epoch": 17.44, "learning_rate": 4.128248888477214e-05, "loss": 2.2409, "step": 6025500 }, { "epoch": 17.44, "learning_rate": 4.128176668442016e-05, "loss": 2.242, "step": 6026000 }, { "epoch": 17.44, "learning_rate": 4.1281044484068176e-05, "loss": 2.208, "step": 6026500 }, { "epoch": 17.45, "learning_rate": 4.12803208364209e-05, "loss": 2.2134, "step": 6027000 }, { "epoch": 17.45, "learning_rate": 4.127959718877362e-05, "loss": 2.2397, "step": 6027500 }, { "epoch": 17.45, "learning_rate": 4.127887498842164e-05, "loss": 2.2007, "step": 6028000 }, { "epoch": 17.45, "learning_rate": 4.1278151340774365e-05, "loss": 2.2172, "step": 6028500 }, { "epoch": 17.45, "learning_rate": 4.127742769312709e-05, "loss": 2.2347, "step": 6029000 }, { "epoch": 17.45, "learning_rate": 4.127670404547981e-05, "loss": 2.2233, "step": 6029500 }, { "epoch": 17.45, "learning_rate": 4.127598039783253e-05, "loss": 2.2268, "step": 6030000 }, { "epoch": 17.46, "learning_rate": 4.1275256750185254e-05, "loss": 2.2189, "step": 6030500 }, { "epoch": 17.46, "learning_rate": 4.1274533102537976e-05, "loss": 2.2185, "step": 6031000 }, { "epoch": 17.46, "learning_rate": 4.12738094548907e-05, "loss": 2.2227, "step": 6031500 }, { "epoch": 17.46, "learning_rate": 4.127308580724343e-05, "loss": 2.2355, "step": 6032000 }, { "epoch": 17.46, "learning_rate": 4.127236360689144e-05, "loss": 2.2285, "step": 6032500 }, { "epoch": 17.46, "learning_rate": 4.1271639959244165e-05, "loss": 2.2006, "step": 6033000 }, { "epoch": 17.46, "learning_rate": 4.1270916311596894e-05, "loss": 2.2495, "step": 6033500 }, { "epoch": 17.47, "learning_rate": 4.1270192663949616e-05, "loss": 2.2099, "step": 6034000 }, { "epoch": 17.47, "learning_rate": 4.126947046359763e-05, "loss": 2.2141, "step": 6034500 }, { "epoch": 17.47, "learning_rate": 4.1268746815950354e-05, "loss": 2.2411, "step": 6035000 }, { "epoch": 17.47, "learning_rate": 4.1268023168303076e-05, "loss": 2.2326, "step": 6035500 }, { "epoch": 17.47, "learning_rate": 4.12672995206558e-05, "loss": 2.2273, "step": 6036000 }, { "epoch": 17.47, "learning_rate": 4.126657587300852e-05, "loss": 2.2293, "step": 6036500 }, { "epoch": 17.47, "learning_rate": 4.126585222536124e-05, "loss": 2.2394, "step": 6037000 }, { "epoch": 17.48, "learning_rate": 4.1265130025009266e-05, "loss": 2.2301, "step": 6037500 }, { "epoch": 17.48, "learning_rate": 4.126440637736199e-05, "loss": 2.1974, "step": 6038000 }, { "epoch": 17.48, "learning_rate": 4.126368272971471e-05, "loss": 2.2347, "step": 6038500 }, { "epoch": 17.48, "learning_rate": 4.126295908206743e-05, "loss": 2.2152, "step": 6039000 }, { "epoch": 17.48, "learning_rate": 4.126223832901074e-05, "loss": 2.2113, "step": 6039500 }, { "epoch": 17.48, "learning_rate": 4.126151468136347e-05, "loss": 2.2217, "step": 6040000 }, { "epoch": 17.48, "learning_rate": 4.126079103371619e-05, "loss": 2.2138, "step": 6040500 }, { "epoch": 17.49, "learning_rate": 4.126006738606892e-05, "loss": 2.2212, "step": 6041000 }, { "epoch": 17.49, "learning_rate": 4.1259343738421644e-05, "loss": 2.2251, "step": 6041500 }, { "epoch": 17.49, "learning_rate": 4.1258620090774366e-05, "loss": 2.2265, "step": 6042000 }, { "epoch": 17.49, "learning_rate": 4.125789644312709e-05, "loss": 2.2327, "step": 6042500 }, { "epoch": 17.49, "learning_rate": 4.1257174242775104e-05, "loss": 2.2097, "step": 6043000 }, { "epoch": 17.49, "learning_rate": 4.1256450595127826e-05, "loss": 2.23, "step": 6043500 }, { "epoch": 17.49, "learning_rate": 4.125572694748055e-05, "loss": 2.2493, "step": 6044000 }, { "epoch": 17.5, "learning_rate": 4.125500329983327e-05, "loss": 2.2419, "step": 6044500 }, { "epoch": 17.5, "learning_rate": 4.125427965218599e-05, "loss": 2.2104, "step": 6045000 }, { "epoch": 17.5, "learning_rate": 4.125355600453872e-05, "loss": 2.2295, "step": 6045500 }, { "epoch": 17.5, "learning_rate": 4.1252832356891444e-05, "loss": 2.2112, "step": 6046000 }, { "epoch": 17.5, "learning_rate": 4.1252108709244166e-05, "loss": 2.2246, "step": 6046500 }, { "epoch": 17.5, "learning_rate": 4.1251385061596895e-05, "loss": 2.2191, "step": 6047000 }, { "epoch": 17.51, "learning_rate": 4.125066141394962e-05, "loss": 2.233, "step": 6047500 }, { "epoch": 17.51, "learning_rate": 4.124993776630234e-05, "loss": 2.2061, "step": 6048000 }, { "epoch": 17.51, "learning_rate": 4.124921411865506e-05, "loss": 2.2151, "step": 6048500 }, { "epoch": 17.51, "learning_rate": 4.1248490471007784e-05, "loss": 2.1958, "step": 6049000 }, { "epoch": 17.51, "learning_rate": 4.1247766823360506e-05, "loss": 2.2197, "step": 6049500 }, { "epoch": 17.51, "learning_rate": 4.124704462300852e-05, "loss": 2.225, "step": 6050000 }, { "epoch": 17.51, "learning_rate": 4.1246320975361244e-05, "loss": 2.2214, "step": 6050500 }, { "epoch": 17.52, "learning_rate": 4.1245597327713966e-05, "loss": 2.2105, "step": 6051000 }, { "epoch": 17.52, "learning_rate": 4.1244873680066695e-05, "loss": 2.2327, "step": 6051500 }, { "epoch": 17.52, "learning_rate": 4.124415003241942e-05, "loss": 2.2329, "step": 6052000 }, { "epoch": 17.52, "learning_rate": 4.124342638477214e-05, "loss": 2.2132, "step": 6052500 }, { "epoch": 17.52, "learning_rate": 4.124270273712486e-05, "loss": 2.2321, "step": 6053000 }, { "epoch": 17.52, "learning_rate": 4.1241979089477584e-05, "loss": 2.2417, "step": 6053500 }, { "epoch": 17.52, "learning_rate": 4.124125544183031e-05, "loss": 2.2298, "step": 6054000 }, { "epoch": 17.53, "learning_rate": 4.124053324147833e-05, "loss": 2.2118, "step": 6054500 }, { "epoch": 17.53, "learning_rate": 4.123980959383105e-05, "loss": 2.2412, "step": 6055000 }, { "epoch": 17.53, "learning_rate": 4.123908594618377e-05, "loss": 2.2222, "step": 6055500 }, { "epoch": 17.53, "learning_rate": 4.1238362298536495e-05, "loss": 2.1998, "step": 6056000 }, { "epoch": 17.53, "learning_rate": 4.123763865088922e-05, "loss": 2.2234, "step": 6056500 }, { "epoch": 17.53, "learning_rate": 4.123691645053724e-05, "loss": 2.2174, "step": 6057000 }, { "epoch": 17.53, "learning_rate": 4.1236194250185256e-05, "loss": 2.195, "step": 6057500 }, { "epoch": 17.54, "learning_rate": 4.123547204983327e-05, "loss": 2.2192, "step": 6058000 }, { "epoch": 17.54, "learning_rate": 4.1234749849481293e-05, "loss": 2.2076, "step": 6058500 }, { "epoch": 17.54, "learning_rate": 4.1234026201834016e-05, "loss": 2.2161, "step": 6059000 }, { "epoch": 17.54, "learning_rate": 4.123330255418674e-05, "loss": 2.2214, "step": 6059500 }, { "epoch": 17.54, "learning_rate": 4.123257890653946e-05, "loss": 2.2222, "step": 6060000 }, { "epoch": 17.54, "learning_rate": 4.123185525889218e-05, "loss": 2.2129, "step": 6060500 }, { "epoch": 17.54, "learning_rate": 4.1231131611244905e-05, "loss": 2.2087, "step": 6061000 }, { "epoch": 17.55, "learning_rate": 4.123040796359763e-05, "loss": 2.223, "step": 6061500 }, { "epoch": 17.55, "learning_rate": 4.1229684315950356e-05, "loss": 2.2042, "step": 6062000 }, { "epoch": 17.55, "learning_rate": 4.122896066830308e-05, "loss": 2.2036, "step": 6062500 }, { "epoch": 17.55, "learning_rate": 4.1228238467951094e-05, "loss": 2.2261, "step": 6063000 }, { "epoch": 17.55, "learning_rate": 4.122751482030382e-05, "loss": 2.2348, "step": 6063500 }, { "epoch": 17.55, "learning_rate": 4.1226791172656545e-05, "loss": 2.2132, "step": 6064000 }, { "epoch": 17.55, "learning_rate": 4.122606752500927e-05, "loss": 2.2318, "step": 6064500 }, { "epoch": 17.56, "learning_rate": 4.122534387736199e-05, "loss": 2.2149, "step": 6065000 }, { "epoch": 17.56, "learning_rate": 4.122462022971471e-05, "loss": 2.192, "step": 6065500 }, { "epoch": 17.56, "learning_rate": 4.1223896582067434e-05, "loss": 2.2255, "step": 6066000 }, { "epoch": 17.56, "learning_rate": 4.1223172934420156e-05, "loss": 2.2362, "step": 6066500 }, { "epoch": 17.56, "learning_rate": 4.122244928677288e-05, "loss": 2.2435, "step": 6067000 }, { "epoch": 17.56, "learning_rate": 4.1221727086420894e-05, "loss": 2.2173, "step": 6067500 }, { "epoch": 17.56, "learning_rate": 4.122100343877362e-05, "loss": 2.2052, "step": 6068000 }, { "epoch": 17.57, "learning_rate": 4.122028123842164e-05, "loss": 2.2291, "step": 6068500 }, { "epoch": 17.57, "learning_rate": 4.1219559038069654e-05, "loss": 2.2237, "step": 6069000 }, { "epoch": 17.57, "learning_rate": 4.121883539042238e-05, "loss": 2.2181, "step": 6069500 }, { "epoch": 17.57, "learning_rate": 4.1218111742775105e-05, "loss": 2.2134, "step": 6070000 }, { "epoch": 17.57, "learning_rate": 4.121738809512783e-05, "loss": 2.1953, "step": 6070500 }, { "epoch": 17.57, "learning_rate": 4.121666444748055e-05, "loss": 2.2024, "step": 6071000 }, { "epoch": 17.57, "learning_rate": 4.121594079983327e-05, "loss": 2.2382, "step": 6071500 }, { "epoch": 17.58, "learning_rate": 4.1215217152186e-05, "loss": 2.2323, "step": 6072000 }, { "epoch": 17.58, "learning_rate": 4.121449350453872e-05, "loss": 2.237, "step": 6072500 }, { "epoch": 17.58, "learning_rate": 4.1213769856891445e-05, "loss": 2.2239, "step": 6073000 }, { "epoch": 17.58, "learning_rate": 4.121304765653946e-05, "loss": 2.228, "step": 6073500 }, { "epoch": 17.58, "learning_rate": 4.121232400889218e-05, "loss": 2.2043, "step": 6074000 }, { "epoch": 17.58, "learning_rate": 4.1211600361244905e-05, "loss": 2.2326, "step": 6074500 }, { "epoch": 17.58, "learning_rate": 4.121087671359763e-05, "loss": 2.2348, "step": 6075000 }, { "epoch": 17.59, "learning_rate": 4.121015306595035e-05, "loss": 2.214, "step": 6075500 }, { "epoch": 17.59, "learning_rate": 4.120942941830307e-05, "loss": 2.1983, "step": 6076000 }, { "epoch": 17.59, "learning_rate": 4.12087057706558e-05, "loss": 2.219, "step": 6076500 }, { "epoch": 17.59, "learning_rate": 4.120798212300852e-05, "loss": 2.2336, "step": 6077000 }, { "epoch": 17.59, "learning_rate": 4.1207258475361246e-05, "loss": 2.2229, "step": 6077500 }, { "epoch": 17.59, "learning_rate": 4.1206534827713975e-05, "loss": 2.2196, "step": 6078000 }, { "epoch": 17.59, "learning_rate": 4.12058111800667e-05, "loss": 2.246, "step": 6078500 }, { "epoch": 17.6, "learning_rate": 4.120508897971471e-05, "loss": 2.2154, "step": 6079000 }, { "epoch": 17.6, "learning_rate": 4.1204365332067435e-05, "loss": 2.2153, "step": 6079500 }, { "epoch": 17.6, "learning_rate": 4.120364168442016e-05, "loss": 2.2236, "step": 6080000 }, { "epoch": 17.6, "learning_rate": 4.120291803677288e-05, "loss": 2.2391, "step": 6080500 }, { "epoch": 17.6, "learning_rate": 4.12021943891256e-05, "loss": 2.2307, "step": 6081000 }, { "epoch": 17.6, "learning_rate": 4.1201470741478324e-05, "loss": 2.2105, "step": 6081500 }, { "epoch": 17.6, "learning_rate": 4.1200748541126346e-05, "loss": 2.2257, "step": 6082000 }, { "epoch": 17.61, "learning_rate": 4.120002489347907e-05, "loss": 2.1817, "step": 6082500 }, { "epoch": 17.61, "learning_rate": 4.119930124583179e-05, "loss": 2.2259, "step": 6083000 }, { "epoch": 17.61, "learning_rate": 4.119857759818451e-05, "loss": 2.226, "step": 6083500 }, { "epoch": 17.61, "learning_rate": 4.119785395053724e-05, "loss": 2.2229, "step": 6084000 }, { "epoch": 17.61, "learning_rate": 4.1197130302889964e-05, "loss": 2.2286, "step": 6084500 }, { "epoch": 17.61, "learning_rate": 4.119640810253798e-05, "loss": 2.2414, "step": 6085000 }, { "epoch": 17.62, "learning_rate": 4.11956844548907e-05, "loss": 2.2023, "step": 6085500 }, { "epoch": 17.62, "learning_rate": 4.1194960807243424e-05, "loss": 2.2016, "step": 6086000 }, { "epoch": 17.62, "learning_rate": 4.119423715959615e-05, "loss": 2.2307, "step": 6086500 }, { "epoch": 17.62, "learning_rate": 4.1193513511948875e-05, "loss": 2.2559, "step": 6087000 }, { "epoch": 17.62, "learning_rate": 4.11927898643016e-05, "loss": 2.2428, "step": 6087500 }, { "epoch": 17.62, "learning_rate": 4.119206766394961e-05, "loss": 2.2119, "step": 6088000 }, { "epoch": 17.62, "learning_rate": 4.1191344016302335e-05, "loss": 2.2219, "step": 6088500 }, { "epoch": 17.63, "learning_rate": 4.119062036865506e-05, "loss": 2.1981, "step": 6089000 }, { "epoch": 17.63, "learning_rate": 4.118989672100778e-05, "loss": 2.2025, "step": 6089500 }, { "epoch": 17.63, "learning_rate": 4.11891745206558e-05, "loss": 2.1952, "step": 6090000 }, { "epoch": 17.63, "learning_rate": 4.1188450873008524e-05, "loss": 2.2077, "step": 6090500 }, { "epoch": 17.63, "learning_rate": 4.1187727225361246e-05, "loss": 2.2174, "step": 6091000 }, { "epoch": 17.63, "learning_rate": 4.118700502500926e-05, "loss": 2.226, "step": 6091500 }, { "epoch": 17.63, "learning_rate": 4.118628137736199e-05, "loss": 2.2276, "step": 6092000 }, { "epoch": 17.64, "learning_rate": 4.118555772971471e-05, "loss": 2.2052, "step": 6092500 }, { "epoch": 17.64, "learning_rate": 4.1184834082067435e-05, "loss": 2.2095, "step": 6093000 }, { "epoch": 17.64, "learning_rate": 4.118411043442016e-05, "loss": 2.2411, "step": 6093500 }, { "epoch": 17.64, "learning_rate": 4.118338823406817e-05, "loss": 2.2379, "step": 6094000 }, { "epoch": 17.64, "learning_rate": 4.11826645864209e-05, "loss": 2.201, "step": 6094500 }, { "epoch": 17.64, "learning_rate": 4.1181940938773624e-05, "loss": 2.2073, "step": 6095000 }, { "epoch": 17.64, "learning_rate": 4.118121729112635e-05, "loss": 2.2152, "step": 6095500 }, { "epoch": 17.65, "learning_rate": 4.118049364347907e-05, "loss": 2.2174, "step": 6096000 }, { "epoch": 17.65, "learning_rate": 4.117976999583179e-05, "loss": 2.2254, "step": 6096500 }, { "epoch": 17.65, "learning_rate": 4.117904634818451e-05, "loss": 2.2057, "step": 6097000 }, { "epoch": 17.65, "learning_rate": 4.1178322700537236e-05, "loss": 2.2266, "step": 6097500 }, { "epoch": 17.65, "learning_rate": 4.117759905288996e-05, "loss": 2.2234, "step": 6098000 }, { "epoch": 17.65, "learning_rate": 4.117687540524268e-05, "loss": 2.2011, "step": 6098500 }, { "epoch": 17.65, "learning_rate": 4.11761532048907e-05, "loss": 2.2481, "step": 6099000 }, { "epoch": 17.66, "learning_rate": 4.1175429557243425e-05, "loss": 2.2212, "step": 6099500 }, { "epoch": 17.66, "learning_rate": 4.1174705909596154e-05, "loss": 2.2289, "step": 6100000 }, { "epoch": 17.66, "learning_rate": 4.1173982261948876e-05, "loss": 2.2217, "step": 6100500 }, { "epoch": 17.66, "learning_rate": 4.11732586143016e-05, "loss": 2.2343, "step": 6101000 }, { "epoch": 17.66, "learning_rate": 4.1172536413949614e-05, "loss": 2.2431, "step": 6101500 }, { "epoch": 17.66, "learning_rate": 4.1171812766302336e-05, "loss": 2.2407, "step": 6102000 }, { "epoch": 17.66, "learning_rate": 4.117108911865506e-05, "loss": 2.1995, "step": 6102500 }, { "epoch": 17.67, "learning_rate": 4.117036691830308e-05, "loss": 2.2302, "step": 6103000 }, { "epoch": 17.67, "learning_rate": 4.11696432706558e-05, "loss": 2.1885, "step": 6103500 }, { "epoch": 17.67, "learning_rate": 4.1168919623008525e-05, "loss": 2.2365, "step": 6104000 }, { "epoch": 17.67, "learning_rate": 4.116819597536125e-05, "loss": 2.2159, "step": 6104500 }, { "epoch": 17.67, "learning_rate": 4.116747232771397e-05, "loss": 2.1995, "step": 6105000 }, { "epoch": 17.67, "learning_rate": 4.116674868006669e-05, "loss": 2.2191, "step": 6105500 }, { "epoch": 17.67, "learning_rate": 4.1166025032419414e-05, "loss": 2.2345, "step": 6106000 }, { "epoch": 17.68, "learning_rate": 4.116530138477214e-05, "loss": 2.2477, "step": 6106500 }, { "epoch": 17.68, "learning_rate": 4.1164577737124865e-05, "loss": 2.2352, "step": 6107000 }, { "epoch": 17.68, "learning_rate": 4.116385408947759e-05, "loss": 2.2065, "step": 6107500 }, { "epoch": 17.68, "learning_rate": 4.116313044183031e-05, "loss": 2.2222, "step": 6108000 }, { "epoch": 17.68, "learning_rate": 4.116240679418303e-05, "loss": 2.2218, "step": 6108500 }, { "epoch": 17.68, "learning_rate": 4.1161683146535754e-05, "loss": 2.2016, "step": 6109000 }, { "epoch": 17.68, "learning_rate": 4.1160960946183776e-05, "loss": 2.2269, "step": 6109500 }, { "epoch": 17.69, "learning_rate": 4.11602372985365e-05, "loss": 2.1999, "step": 6110000 }, { "epoch": 17.69, "learning_rate": 4.115951365088922e-05, "loss": 2.1942, "step": 6110500 }, { "epoch": 17.69, "learning_rate": 4.115879000324194e-05, "loss": 2.2393, "step": 6111000 }, { "epoch": 17.69, "learning_rate": 4.1158066355594665e-05, "loss": 2.2088, "step": 6111500 }, { "epoch": 17.69, "learning_rate": 4.115734560253798e-05, "loss": 2.2279, "step": 6112000 }, { "epoch": 17.69, "learning_rate": 4.11566219548907e-05, "loss": 2.2284, "step": 6112500 }, { "epoch": 17.69, "learning_rate": 4.1155898307243425e-05, "loss": 2.2312, "step": 6113000 }, { "epoch": 17.7, "learning_rate": 4.115517610689144e-05, "loss": 2.2016, "step": 6113500 }, { "epoch": 17.7, "learning_rate": 4.115445245924416e-05, "loss": 2.2313, "step": 6114000 }, { "epoch": 17.7, "learning_rate": 4.115372881159689e-05, "loss": 2.2438, "step": 6114500 }, { "epoch": 17.7, "learning_rate": 4.1153005163949614e-05, "loss": 2.2156, "step": 6115000 }, { "epoch": 17.7, "learning_rate": 4.115228151630234e-05, "loss": 2.2167, "step": 6115500 }, { "epoch": 17.7, "learning_rate": 4.115155786865506e-05, "loss": 2.2287, "step": 6116000 }, { "epoch": 17.7, "learning_rate": 4.115083422100778e-05, "loss": 2.2012, "step": 6116500 }, { "epoch": 17.71, "learning_rate": 4.11501105733605e-05, "loss": 2.2084, "step": 6117000 }, { "epoch": 17.71, "learning_rate": 4.114938692571323e-05, "loss": 2.2286, "step": 6117500 }, { "epoch": 17.71, "learning_rate": 4.1148663278065955e-05, "loss": 2.233, "step": 6118000 }, { "epoch": 17.71, "learning_rate": 4.114793963041868e-05, "loss": 2.2077, "step": 6118500 }, { "epoch": 17.71, "learning_rate": 4.11472159827714e-05, "loss": 2.2178, "step": 6119000 }, { "epoch": 17.71, "learning_rate": 4.114649233512412e-05, "loss": 2.235, "step": 6119500 }, { "epoch": 17.71, "learning_rate": 4.1145768687476844e-05, "loss": 2.2326, "step": 6120000 }, { "epoch": 17.72, "learning_rate": 4.114504648712486e-05, "loss": 2.2315, "step": 6120500 }, { "epoch": 17.72, "learning_rate": 4.114432283947758e-05, "loss": 2.2128, "step": 6121000 }, { "epoch": 17.72, "learning_rate": 4.114359919183031e-05, "loss": 2.223, "step": 6121500 }, { "epoch": 17.72, "learning_rate": 4.114287554418303e-05, "loss": 2.2155, "step": 6122000 }, { "epoch": 17.72, "learning_rate": 4.1142153343831055e-05, "loss": 2.2165, "step": 6122500 }, { "epoch": 17.72, "learning_rate": 4.114142969618378e-05, "loss": 2.2226, "step": 6123000 }, { "epoch": 17.73, "learning_rate": 4.114070749583179e-05, "loss": 2.2161, "step": 6123500 }, { "epoch": 17.73, "learning_rate": 4.1139983848184515e-05, "loss": 2.2129, "step": 6124000 }, { "epoch": 17.73, "learning_rate": 4.113926020053724e-05, "loss": 2.2142, "step": 6124500 }, { "epoch": 17.73, "learning_rate": 4.113853655288996e-05, "loss": 2.221, "step": 6125000 }, { "epoch": 17.73, "learning_rate": 4.113781290524268e-05, "loss": 2.2161, "step": 6125500 }, { "epoch": 17.73, "learning_rate": 4.1137089257595404e-05, "loss": 2.2488, "step": 6126000 }, { "epoch": 17.73, "learning_rate": 4.113636560994813e-05, "loss": 2.2175, "step": 6126500 }, { "epoch": 17.74, "learning_rate": 4.1135641962300855e-05, "loss": 2.2328, "step": 6127000 }, { "epoch": 17.74, "learning_rate": 4.113491831465358e-05, "loss": 2.2208, "step": 6127500 }, { "epoch": 17.74, "learning_rate": 4.11341946670063e-05, "loss": 2.2483, "step": 6128000 }, { "epoch": 17.74, "learning_rate": 4.1133472466654315e-05, "loss": 2.2177, "step": 6128500 }, { "epoch": 17.74, "learning_rate": 4.1132748819007044e-05, "loss": 2.2301, "step": 6129000 }, { "epoch": 17.74, "learning_rate": 4.113202806595036e-05, "loss": 2.1951, "step": 6129500 }, { "epoch": 17.74, "learning_rate": 4.113130441830308e-05, "loss": 2.256, "step": 6130000 }, { "epoch": 17.75, "learning_rate": 4.1130580770655804e-05, "loss": 2.2136, "step": 6130500 }, { "epoch": 17.75, "learning_rate": 4.1129857123008527e-05, "loss": 2.2035, "step": 6131000 }, { "epoch": 17.75, "learning_rate": 4.112913347536125e-05, "loss": 2.219, "step": 6131500 }, { "epoch": 17.75, "learning_rate": 4.112840982771397e-05, "loss": 2.2452, "step": 6132000 }, { "epoch": 17.75, "learning_rate": 4.112768618006669e-05, "loss": 2.2118, "step": 6132500 }, { "epoch": 17.75, "learning_rate": 4.1126962532419415e-05, "loss": 2.2507, "step": 6133000 }, { "epoch": 17.75, "learning_rate": 4.112623888477214e-05, "loss": 2.2171, "step": 6133500 }, { "epoch": 17.76, "learning_rate": 4.112551523712486e-05, "loss": 2.2377, "step": 6134000 }, { "epoch": 17.76, "learning_rate": 4.112479158947758e-05, "loss": 2.2333, "step": 6134500 }, { "epoch": 17.76, "learning_rate": 4.1124069389125604e-05, "loss": 2.2029, "step": 6135000 }, { "epoch": 17.76, "learning_rate": 4.112334574147833e-05, "loss": 2.2402, "step": 6135500 }, { "epoch": 17.76, "learning_rate": 4.112262209383105e-05, "loss": 2.2146, "step": 6136000 }, { "epoch": 17.76, "learning_rate": 4.112189844618378e-05, "loss": 2.244, "step": 6136500 }, { "epoch": 17.76, "learning_rate": 4.11211747985365e-05, "loss": 2.1881, "step": 6137000 }, { "epoch": 17.77, "learning_rate": 4.112045115088922e-05, "loss": 2.2065, "step": 6137500 }, { "epoch": 17.77, "learning_rate": 4.1119727503241945e-05, "loss": 2.241, "step": 6138000 }, { "epoch": 17.77, "learning_rate": 4.111900385559467e-05, "loss": 2.2098, "step": 6138500 }, { "epoch": 17.77, "learning_rate": 4.111828310253798e-05, "loss": 2.2299, "step": 6139000 }, { "epoch": 17.77, "learning_rate": 4.1117560902186e-05, "loss": 2.2085, "step": 6139500 }, { "epoch": 17.77, "learning_rate": 4.111683725453872e-05, "loss": 2.2023, "step": 6140000 }, { "epoch": 17.77, "learning_rate": 4.111611360689144e-05, "loss": 2.2353, "step": 6140500 }, { "epoch": 17.78, "learning_rate": 4.1115389959244165e-05, "loss": 2.2297, "step": 6141000 }, { "epoch": 17.78, "learning_rate": 4.111466631159689e-05, "loss": 2.219, "step": 6141500 }, { "epoch": 17.78, "learning_rate": 4.111394266394961e-05, "loss": 2.2216, "step": 6142000 }, { "epoch": 17.78, "learning_rate": 4.111321901630233e-05, "loss": 2.2246, "step": 6142500 }, { "epoch": 17.78, "learning_rate": 4.111249536865506e-05, "loss": 2.2362, "step": 6143000 }, { "epoch": 17.78, "learning_rate": 4.111177172100778e-05, "loss": 2.2401, "step": 6143500 }, { "epoch": 17.78, "learning_rate": 4.111104807336051e-05, "loss": 2.218, "step": 6144000 }, { "epoch": 17.79, "learning_rate": 4.1110324425713234e-05, "loss": 2.2238, "step": 6144500 }, { "epoch": 17.79, "learning_rate": 4.1109600778065956e-05, "loss": 2.2212, "step": 6145000 }, { "epoch": 17.79, "learning_rate": 4.110887713041868e-05, "loss": 2.2137, "step": 6145500 }, { "epoch": 17.79, "learning_rate": 4.11081534827714e-05, "loss": 2.2113, "step": 6146000 }, { "epoch": 17.79, "learning_rate": 4.110742983512412e-05, "loss": 2.2021, "step": 6146500 }, { "epoch": 17.79, "learning_rate": 4.1106706187476845e-05, "loss": 2.2264, "step": 6147000 }, { "epoch": 17.79, "learning_rate": 4.110598253982957e-05, "loss": 2.2426, "step": 6147500 }, { "epoch": 17.8, "learning_rate": 4.110526033947758e-05, "loss": 2.2332, "step": 6148000 }, { "epoch": 17.8, "learning_rate": 4.1104538139125605e-05, "loss": 2.2281, "step": 6148500 }, { "epoch": 17.8, "learning_rate": 4.110381449147833e-05, "loss": 2.2247, "step": 6149000 }, { "epoch": 17.8, "learning_rate": 4.110309084383105e-05, "loss": 2.2245, "step": 6149500 }, { "epoch": 17.8, "learning_rate": 4.110236719618377e-05, "loss": 2.1833, "step": 6150000 }, { "epoch": 17.8, "learning_rate": 4.1101643548536494e-05, "loss": 2.2152, "step": 6150500 }, { "epoch": 17.8, "learning_rate": 4.110092134818451e-05, "loss": 2.241, "step": 6151000 }, { "epoch": 17.81, "learning_rate": 4.110019770053724e-05, "loss": 2.2239, "step": 6151500 }, { "epoch": 17.81, "learning_rate": 4.109947405288996e-05, "loss": 2.2148, "step": 6152000 }, { "epoch": 17.81, "learning_rate": 4.109875040524268e-05, "loss": 2.2147, "step": 6152500 }, { "epoch": 17.81, "learning_rate": 4.109802675759541e-05, "loss": 2.212, "step": 6153000 }, { "epoch": 17.81, "learning_rate": 4.1097303109948134e-05, "loss": 2.2366, "step": 6153500 }, { "epoch": 17.81, "learning_rate": 4.109657946230086e-05, "loss": 2.219, "step": 6154000 }, { "epoch": 17.81, "learning_rate": 4.109585581465358e-05, "loss": 2.2265, "step": 6154500 }, { "epoch": 17.82, "learning_rate": 4.1095133614301595e-05, "loss": 2.2346, "step": 6155000 }, { "epoch": 17.82, "learning_rate": 4.109441141394961e-05, "loss": 2.2224, "step": 6155500 }, { "epoch": 17.82, "learning_rate": 4.109368776630233e-05, "loss": 2.222, "step": 6156000 }, { "epoch": 17.82, "learning_rate": 4.109296411865506e-05, "loss": 2.2319, "step": 6156500 }, { "epoch": 17.82, "learning_rate": 4.1092240471007784e-05, "loss": 2.2326, "step": 6157000 }, { "epoch": 17.82, "learning_rate": 4.1091516823360506e-05, "loss": 2.2187, "step": 6157500 }, { "epoch": 17.82, "learning_rate": 4.109079317571323e-05, "loss": 2.2278, "step": 6158000 }, { "epoch": 17.83, "learning_rate": 4.109007242265654e-05, "loss": 2.2374, "step": 6158500 }, { "epoch": 17.83, "learning_rate": 4.108934877500926e-05, "loss": 2.214, "step": 6159000 }, { "epoch": 17.83, "learning_rate": 4.108862512736199e-05, "loss": 2.2243, "step": 6159500 }, { "epoch": 17.83, "learning_rate": 4.108790147971471e-05, "loss": 2.2182, "step": 6160000 }, { "epoch": 17.83, "learning_rate": 4.108717783206744e-05, "loss": 2.2574, "step": 6160500 }, { "epoch": 17.83, "learning_rate": 4.108645418442016e-05, "loss": 2.2138, "step": 6161000 }, { "epoch": 17.84, "learning_rate": 4.1085730536772884e-05, "loss": 2.237, "step": 6161500 }, { "epoch": 17.84, "learning_rate": 4.1085006889125606e-05, "loss": 2.2166, "step": 6162000 }, { "epoch": 17.84, "learning_rate": 4.108428324147833e-05, "loss": 2.2385, "step": 6162500 }, { "epoch": 17.84, "learning_rate": 4.108356248842164e-05, "loss": 2.2419, "step": 6163000 }, { "epoch": 17.84, "learning_rate": 4.108283884077436e-05, "loss": 2.2235, "step": 6163500 }, { "epoch": 17.84, "learning_rate": 4.108211519312709e-05, "loss": 2.2321, "step": 6164000 }, { "epoch": 17.84, "learning_rate": 4.108139154547981e-05, "loss": 2.231, "step": 6164500 }, { "epoch": 17.85, "learning_rate": 4.108066789783253e-05, "loss": 2.2133, "step": 6165000 }, { "epoch": 17.85, "learning_rate": 4.1079944250185255e-05, "loss": 2.2339, "step": 6165500 }, { "epoch": 17.85, "learning_rate": 4.107922204983327e-05, "loss": 2.2349, "step": 6166000 }, { "epoch": 17.85, "learning_rate": 4.1078499849481286e-05, "loss": 2.2299, "step": 6166500 }, { "epoch": 17.85, "learning_rate": 4.1077776201834015e-05, "loss": 2.2272, "step": 6167000 }, { "epoch": 17.85, "learning_rate": 4.107705255418674e-05, "loss": 2.2492, "step": 6167500 }, { "epoch": 17.85, "learning_rate": 4.107632890653946e-05, "loss": 2.2336, "step": 6168000 }, { "epoch": 17.86, "learning_rate": 4.107560525889219e-05, "loss": 2.1937, "step": 6168500 }, { "epoch": 17.86, "learning_rate": 4.107488161124491e-05, "loss": 2.234, "step": 6169000 }, { "epoch": 17.86, "learning_rate": 4.107415796359763e-05, "loss": 2.1948, "step": 6169500 }, { "epoch": 17.86, "learning_rate": 4.1073434315950355e-05, "loss": 2.2248, "step": 6170000 }, { "epoch": 17.86, "learning_rate": 4.107271066830308e-05, "loss": 2.2354, "step": 6170500 }, { "epoch": 17.86, "learning_rate": 4.10719870206558e-05, "loss": 2.2439, "step": 6171000 }, { "epoch": 17.86, "learning_rate": 4.1071264820303815e-05, "loss": 2.2308, "step": 6171500 }, { "epoch": 17.87, "learning_rate": 4.107054117265654e-05, "loss": 2.2303, "step": 6172000 }, { "epoch": 17.87, "learning_rate": 4.106981752500926e-05, "loss": 2.2299, "step": 6172500 }, { "epoch": 17.87, "learning_rate": 4.106909387736199e-05, "loss": 2.2173, "step": 6173000 }, { "epoch": 17.87, "learning_rate": 4.106837022971471e-05, "loss": 2.2232, "step": 6173500 }, { "epoch": 17.87, "learning_rate": 4.1067646582067433e-05, "loss": 2.2132, "step": 6174000 }, { "epoch": 17.87, "learning_rate": 4.106692293442016e-05, "loss": 2.1987, "step": 6174500 }, { "epoch": 17.87, "learning_rate": 4.1066199286772885e-05, "loss": 2.1875, "step": 6175000 }, { "epoch": 17.88, "learning_rate": 4.10654770864209e-05, "loss": 2.2302, "step": 6175500 }, { "epoch": 17.88, "learning_rate": 4.1064754886068916e-05, "loss": 2.2089, "step": 6176000 }, { "epoch": 17.88, "learning_rate": 4.106403123842164e-05, "loss": 2.2312, "step": 6176500 }, { "epoch": 17.88, "learning_rate": 4.106330759077437e-05, "loss": 2.2292, "step": 6177000 }, { "epoch": 17.88, "learning_rate": 4.106258394312709e-05, "loss": 2.2014, "step": 6177500 }, { "epoch": 17.88, "learning_rate": 4.106186029547981e-05, "loss": 2.2352, "step": 6178000 }, { "epoch": 17.88, "learning_rate": 4.1061136647832534e-05, "loss": 2.2417, "step": 6178500 }, { "epoch": 17.89, "learning_rate": 4.1060413000185256e-05, "loss": 2.1997, "step": 6179000 }, { "epoch": 17.89, "learning_rate": 4.105968935253798e-05, "loss": 2.1903, "step": 6179500 }, { "epoch": 17.89, "learning_rate": 4.10589657048907e-05, "loss": 2.2158, "step": 6180000 }, { "epoch": 17.89, "learning_rate": 4.105824205724342e-05, "loss": 2.229, "step": 6180500 }, { "epoch": 17.89, "learning_rate": 4.1057518409596145e-05, "loss": 2.2159, "step": 6181000 }, { "epoch": 17.89, "learning_rate": 4.1056794761948874e-05, "loss": 2.2222, "step": 6181500 }, { "epoch": 17.89, "learning_rate": 4.1056071114301596e-05, "loss": 2.2069, "step": 6182000 }, { "epoch": 17.9, "learning_rate": 4.105534891394961e-05, "loss": 2.2155, "step": 6182500 }, { "epoch": 17.9, "learning_rate": 4.1054626713597634e-05, "loss": 2.2371, "step": 6183000 }, { "epoch": 17.9, "learning_rate": 4.1053903065950356e-05, "loss": 2.2003, "step": 6183500 }, { "epoch": 17.9, "learning_rate": 4.105317941830308e-05, "loss": 2.2234, "step": 6184000 }, { "epoch": 17.9, "learning_rate": 4.1052457217951094e-05, "loss": 2.1885, "step": 6184500 }, { "epoch": 17.9, "learning_rate": 4.1051733570303816e-05, "loss": 2.2106, "step": 6185000 }, { "epoch": 17.9, "learning_rate": 4.105100992265654e-05, "loss": 2.2027, "step": 6185500 }, { "epoch": 17.91, "learning_rate": 4.105028627500927e-05, "loss": 2.1959, "step": 6186000 }, { "epoch": 17.91, "learning_rate": 4.104956262736199e-05, "loss": 2.2362, "step": 6186500 }, { "epoch": 17.91, "learning_rate": 4.104883897971471e-05, "loss": 2.2103, "step": 6187000 }, { "epoch": 17.91, "learning_rate": 4.1048115332067434e-05, "loss": 2.2003, "step": 6187500 }, { "epoch": 17.91, "learning_rate": 4.1047391684420156e-05, "loss": 2.2204, "step": 6188000 }, { "epoch": 17.91, "learning_rate": 4.104666948406817e-05, "loss": 2.2192, "step": 6188500 }, { "epoch": 17.91, "learning_rate": 4.1045945836420894e-05, "loss": 2.2116, "step": 6189000 }, { "epoch": 17.92, "learning_rate": 4.104522218877362e-05, "loss": 2.2272, "step": 6189500 }, { "epoch": 17.92, "learning_rate": 4.104449998842164e-05, "loss": 2.2411, "step": 6190000 }, { "epoch": 17.92, "learning_rate": 4.104377634077437e-05, "loss": 2.2034, "step": 6190500 }, { "epoch": 17.92, "learning_rate": 4.104305269312709e-05, "loss": 2.2241, "step": 6191000 }, { "epoch": 17.92, "learning_rate": 4.104232904547981e-05, "loss": 2.2231, "step": 6191500 }, { "epoch": 17.92, "learning_rate": 4.1041605397832534e-05, "loss": 2.1969, "step": 6192000 }, { "epoch": 17.92, "learning_rate": 4.104088175018526e-05, "loss": 2.2181, "step": 6192500 }, { "epoch": 17.93, "learning_rate": 4.104015810253798e-05, "loss": 2.2089, "step": 6193000 }, { "epoch": 17.93, "learning_rate": 4.10394344548907e-05, "loss": 2.2246, "step": 6193500 }, { "epoch": 17.93, "learning_rate": 4.1038710807243423e-05, "loss": 2.2194, "step": 6194000 }, { "epoch": 17.93, "learning_rate": 4.1037987159596146e-05, "loss": 2.2371, "step": 6194500 }, { "epoch": 17.93, "learning_rate": 4.103726495924417e-05, "loss": 2.2185, "step": 6195000 }, { "epoch": 17.93, "learning_rate": 4.103654131159689e-05, "loss": 2.2024, "step": 6195500 }, { "epoch": 17.93, "learning_rate": 4.103581766394961e-05, "loss": 2.2258, "step": 6196000 }, { "epoch": 17.94, "learning_rate": 4.1035094016302335e-05, "loss": 2.2222, "step": 6196500 }, { "epoch": 17.94, "learning_rate": 4.103437181595036e-05, "loss": 2.2053, "step": 6197000 }, { "epoch": 17.94, "learning_rate": 4.103364816830308e-05, "loss": 2.2281, "step": 6197500 }, { "epoch": 17.94, "learning_rate": 4.10329245206558e-05, "loss": 2.2462, "step": 6198000 }, { "epoch": 17.94, "learning_rate": 4.1032200873008524e-05, "loss": 2.2296, "step": 6198500 }, { "epoch": 17.94, "learning_rate": 4.103147867265654e-05, "loss": 2.206, "step": 6199000 }, { "epoch": 17.95, "learning_rate": 4.103075647230456e-05, "loss": 2.2012, "step": 6199500 }, { "epoch": 17.95, "learning_rate": 4.1030032824657284e-05, "loss": 2.2374, "step": 6200000 }, { "epoch": 17.95, "learning_rate": 4.1029309177010006e-05, "loss": 2.2485, "step": 6200500 }, { "epoch": 17.95, "learning_rate": 4.102858697665802e-05, "loss": 2.2132, "step": 6201000 }, { "epoch": 17.95, "learning_rate": 4.1027863329010744e-05, "loss": 2.2252, "step": 6201500 }, { "epoch": 17.95, "learning_rate": 4.1027139681363466e-05, "loss": 2.2083, "step": 6202000 }, { "epoch": 17.95, "learning_rate": 4.1026416033716195e-05, "loss": 2.203, "step": 6202500 }, { "epoch": 17.96, "learning_rate": 4.102569238606892e-05, "loss": 2.2262, "step": 6203000 }, { "epoch": 17.96, "learning_rate": 4.102497018571693e-05, "loss": 2.2281, "step": 6203500 }, { "epoch": 17.96, "learning_rate": 4.1024246538069655e-05, "loss": 2.2032, "step": 6204000 }, { "epoch": 17.96, "learning_rate": 4.1023522890422384e-05, "loss": 2.2317, "step": 6204500 }, { "epoch": 17.96, "learning_rate": 4.1022799242775106e-05, "loss": 2.2337, "step": 6205000 }, { "epoch": 17.96, "learning_rate": 4.102207559512783e-05, "loss": 2.2321, "step": 6205500 }, { "epoch": 17.96, "learning_rate": 4.102135194748055e-05, "loss": 2.2385, "step": 6206000 }, { "epoch": 17.97, "learning_rate": 4.102062829983327e-05, "loss": 2.2238, "step": 6206500 }, { "epoch": 17.97, "learning_rate": 4.1019904652185995e-05, "loss": 2.2098, "step": 6207000 }, { "epoch": 17.97, "learning_rate": 4.101918100453872e-05, "loss": 2.2322, "step": 6207500 }, { "epoch": 17.97, "learning_rate": 4.1018457356891447e-05, "loss": 2.2176, "step": 6208000 }, { "epoch": 17.97, "learning_rate": 4.101773370924417e-05, "loss": 2.2117, "step": 6208500 }, { "epoch": 17.97, "learning_rate": 4.1017011508892184e-05, "loss": 2.2323, "step": 6209000 }, { "epoch": 17.97, "learning_rate": 4.1016287861244907e-05, "loss": 2.2372, "step": 6209500 }, { "epoch": 17.98, "learning_rate": 4.101556421359763e-05, "loss": 2.2379, "step": 6210000 }, { "epoch": 17.98, "learning_rate": 4.101484056595035e-05, "loss": 2.2254, "step": 6210500 }, { "epoch": 17.98, "learning_rate": 4.101411691830307e-05, "loss": 2.221, "step": 6211000 }, { "epoch": 17.98, "learning_rate": 4.1013393270655796e-05, "loss": 2.2158, "step": 6211500 }, { "epoch": 17.98, "learning_rate": 4.1012669623008525e-05, "loss": 2.2298, "step": 6212000 }, { "epoch": 17.98, "learning_rate": 4.101194597536125e-05, "loss": 2.2545, "step": 6212500 }, { "epoch": 17.98, "learning_rate": 4.101122232771397e-05, "loss": 2.2317, "step": 6213000 }, { "epoch": 17.99, "learning_rate": 4.101049868006669e-05, "loss": 2.1958, "step": 6213500 }, { "epoch": 17.99, "learning_rate": 4.1009776479714714e-05, "loss": 2.2104, "step": 6214000 }, { "epoch": 17.99, "learning_rate": 4.100905427936273e-05, "loss": 2.2432, "step": 6214500 }, { "epoch": 17.99, "learning_rate": 4.100833063171545e-05, "loss": 2.2369, "step": 6215000 }, { "epoch": 17.99, "learning_rate": 4.1007606984068174e-05, "loss": 2.2299, "step": 6215500 }, { "epoch": 17.99, "learning_rate": 4.1006883336420896e-05, "loss": 2.2177, "step": 6216000 }, { "epoch": 17.99, "learning_rate": 4.100615968877362e-05, "loss": 2.2005, "step": 6216500 }, { "epoch": 18.0, "learning_rate": 4.100543604112635e-05, "loss": 2.2107, "step": 6217000 }, { "epoch": 18.0, "learning_rate": 4.100471239347907e-05, "loss": 2.1942, "step": 6217500 }, { "epoch": 18.0, "learning_rate": 4.100398874583179e-05, "loss": 2.1974, "step": 6218000 }, { "epoch": 18.0, "eval_accuracy": 0.6574707416013661, "eval_accuracy_mlm": 0.621008649420185, "eval_accuracy_nsp": 0.8532239806043274, "eval_loss": 2.247318744659424, "eval_runtime": 330.4145, "eval_samples_per_second": 1320.723, "eval_steps_per_second": 55.031, "step": 6218496 }, { "epoch": 18.0, "learning_rate": 4.1003265098184514e-05, "loss": 2.2094, "step": 6218500 }, { "epoch": 18.0, "learning_rate": 4.100254145053724e-05, "loss": 2.1805, "step": 6219000 }, { "epoch": 18.0, "learning_rate": 4.1001817802889965e-05, "loss": 2.2035, "step": 6219500 }, { "epoch": 18.0, "learning_rate": 4.100109415524269e-05, "loss": 2.186, "step": 6220000 }, { "epoch": 18.01, "learning_rate": 4.100037050759541e-05, "loss": 2.187, "step": 6220500 }, { "epoch": 18.01, "learning_rate": 4.0999648307243425e-05, "loss": 2.1996, "step": 6221000 }, { "epoch": 18.01, "learning_rate": 4.099892465959615e-05, "loss": 2.1806, "step": 6221500 }, { "epoch": 18.01, "learning_rate": 4.099820101194887e-05, "loss": 2.2009, "step": 6222000 }, { "epoch": 18.01, "learning_rate": 4.09974773643016e-05, "loss": 2.1903, "step": 6222500 }, { "epoch": 18.01, "learning_rate": 4.099675371665432e-05, "loss": 2.1659, "step": 6223000 }, { "epoch": 18.01, "learning_rate": 4.099603006900704e-05, "loss": 2.2044, "step": 6223500 }, { "epoch": 18.02, "learning_rate": 4.0995306421359765e-05, "loss": 2.1927, "step": 6224000 }, { "epoch": 18.02, "learning_rate": 4.099458277371249e-05, "loss": 2.1789, "step": 6224500 }, { "epoch": 18.02, "learning_rate": 4.099385912606521e-05, "loss": 2.1911, "step": 6225000 }, { "epoch": 18.02, "learning_rate": 4.099313837300852e-05, "loss": 2.2013, "step": 6225500 }, { "epoch": 18.02, "learning_rate": 4.099241472536125e-05, "loss": 2.2245, "step": 6226000 }, { "epoch": 18.02, "learning_rate": 4.099169107771397e-05, "loss": 2.1782, "step": 6226500 }, { "epoch": 18.02, "learning_rate": 4.09909674300667e-05, "loss": 2.2054, "step": 6227000 }, { "epoch": 18.03, "learning_rate": 4.099024378241942e-05, "loss": 2.2007, "step": 6227500 }, { "epoch": 18.03, "learning_rate": 4.098952013477214e-05, "loss": 2.1931, "step": 6228000 }, { "epoch": 18.03, "learning_rate": 4.098879793442016e-05, "loss": 2.1865, "step": 6228500 }, { "epoch": 18.03, "learning_rate": 4.098807428677288e-05, "loss": 2.1988, "step": 6229000 }, { "epoch": 18.03, "learning_rate": 4.09873506391256e-05, "loss": 2.2037, "step": 6229500 }, { "epoch": 18.03, "learning_rate": 4.098662843877362e-05, "loss": 2.203, "step": 6230000 }, { "epoch": 18.03, "learning_rate": 4.098590623842164e-05, "loss": 2.1995, "step": 6230500 }, { "epoch": 18.04, "learning_rate": 4.098518403806966e-05, "loss": 2.1877, "step": 6231000 }, { "epoch": 18.04, "learning_rate": 4.098446039042238e-05, "loss": 2.2022, "step": 6231500 }, { "epoch": 18.04, "learning_rate": 4.09837367427751e-05, "loss": 2.1848, "step": 6232000 }, { "epoch": 18.04, "learning_rate": 4.0983013095127823e-05, "loss": 2.1901, "step": 6232500 }, { "epoch": 18.04, "learning_rate": 4.0982289447480546e-05, "loss": 2.1981, "step": 6233000 }, { "epoch": 18.04, "learning_rate": 4.0981565799833275e-05, "loss": 2.2141, "step": 6233500 }, { "epoch": 18.04, "learning_rate": 4.0980842152186e-05, "loss": 2.2312, "step": 6234000 }, { "epoch": 18.05, "learning_rate": 4.0980118504538726e-05, "loss": 2.2281, "step": 6234500 }, { "epoch": 18.05, "learning_rate": 4.097939485689145e-05, "loss": 2.2172, "step": 6235000 }, { "epoch": 18.05, "learning_rate": 4.097867120924417e-05, "loss": 2.2217, "step": 6235500 }, { "epoch": 18.05, "learning_rate": 4.097794756159689e-05, "loss": 2.1958, "step": 6236000 }, { "epoch": 18.05, "learning_rate": 4.0977223913949615e-05, "loss": 2.1713, "step": 6236500 }, { "epoch": 18.05, "learning_rate": 4.097650026630234e-05, "loss": 2.2056, "step": 6237000 }, { "epoch": 18.06, "learning_rate": 4.097577661865506e-05, "loss": 2.2119, "step": 6237500 }, { "epoch": 18.06, "learning_rate": 4.0975054418303075e-05, "loss": 2.236, "step": 6238000 }, { "epoch": 18.06, "learning_rate": 4.09743307706558e-05, "loss": 2.1786, "step": 6238500 }, { "epoch": 18.06, "learning_rate": 4.0973607123008526e-05, "loss": 2.2054, "step": 6239000 }, { "epoch": 18.06, "learning_rate": 4.097288492265654e-05, "loss": 2.2009, "step": 6239500 }, { "epoch": 18.06, "learning_rate": 4.0972161275009264e-05, "loss": 2.2083, "step": 6240000 }, { "epoch": 18.06, "learning_rate": 4.0971437627361986e-05, "loss": 2.1918, "step": 6240500 }, { "epoch": 18.07, "learning_rate": 4.097071397971471e-05, "loss": 2.2003, "step": 6241000 }, { "epoch": 18.07, "learning_rate": 4.096999033206743e-05, "loss": 2.1908, "step": 6241500 }, { "epoch": 18.07, "learning_rate": 4.096926668442016e-05, "loss": 2.1986, "step": 6242000 }, { "epoch": 18.07, "learning_rate": 4.096854303677288e-05, "loss": 2.2173, "step": 6242500 }, { "epoch": 18.07, "learning_rate": 4.0967819389125604e-05, "loss": 2.185, "step": 6243000 }, { "epoch": 18.07, "learning_rate": 4.0967095741478326e-05, "loss": 2.2058, "step": 6243500 }, { "epoch": 18.07, "learning_rate": 4.096637209383105e-05, "loss": 2.2128, "step": 6244000 }, { "epoch": 18.08, "learning_rate": 4.096564989347907e-05, "loss": 2.1966, "step": 6244500 }, { "epoch": 18.08, "learning_rate": 4.096492624583179e-05, "loss": 2.1975, "step": 6245000 }, { "epoch": 18.08, "learning_rate": 4.0964202598184515e-05, "loss": 2.2148, "step": 6245500 }, { "epoch": 18.08, "learning_rate": 4.096348039783253e-05, "loss": 2.207, "step": 6246000 }, { "epoch": 18.08, "learning_rate": 4.096275675018525e-05, "loss": 2.2186, "step": 6246500 }, { "epoch": 18.08, "learning_rate": 4.0962033102537975e-05, "loss": 2.1778, "step": 6247000 }, { "epoch": 18.08, "learning_rate": 4.09613094548907e-05, "loss": 2.221, "step": 6247500 }, { "epoch": 18.09, "learning_rate": 4.0960585807243427e-05, "loss": 2.213, "step": 6248000 }, { "epoch": 18.09, "learning_rate": 4.095986360689144e-05, "loss": 2.212, "step": 6248500 }, { "epoch": 18.09, "learning_rate": 4.0959139959244164e-05, "loss": 2.1738, "step": 6249000 }, { "epoch": 18.09, "learning_rate": 4.0958416311596893e-05, "loss": 2.2217, "step": 6249500 }, { "epoch": 18.09, "learning_rate": 4.0957692663949616e-05, "loss": 2.2128, "step": 6250000 }, { "epoch": 18.09, "learning_rate": 4.095696901630234e-05, "loss": 2.2102, "step": 6250500 }, { "epoch": 18.09, "learning_rate": 4.095624536865506e-05, "loss": 2.1828, "step": 6251000 }, { "epoch": 18.1, "learning_rate": 4.095552172100778e-05, "loss": 2.2074, "step": 6251500 }, { "epoch": 18.1, "learning_rate": 4.0954798073360505e-05, "loss": 2.2163, "step": 6252000 }, { "epoch": 18.1, "learning_rate": 4.095407442571323e-05, "loss": 2.2057, "step": 6252500 }, { "epoch": 18.1, "learning_rate": 4.095335077806595e-05, "loss": 2.2075, "step": 6253000 }, { "epoch": 18.1, "learning_rate": 4.095262857771397e-05, "loss": 2.2223, "step": 6253500 }, { "epoch": 18.1, "learning_rate": 4.095190637736199e-05, "loss": 2.1895, "step": 6254000 }, { "epoch": 18.1, "learning_rate": 4.095118272971471e-05, "loss": 2.2077, "step": 6254500 }, { "epoch": 18.11, "learning_rate": 4.095045908206743e-05, "loss": 2.1956, "step": 6255000 }, { "epoch": 18.11, "learning_rate": 4.0949735434420154e-05, "loss": 2.2121, "step": 6255500 }, { "epoch": 18.11, "learning_rate": 4.0949011786772876e-05, "loss": 2.224, "step": 6256000 }, { "epoch": 18.11, "learning_rate": 4.0948288139125605e-05, "loss": 2.1935, "step": 6256500 }, { "epoch": 18.11, "learning_rate": 4.094756593877363e-05, "loss": 2.2131, "step": 6257000 }, { "epoch": 18.11, "learning_rate": 4.094684373842164e-05, "loss": 2.1954, "step": 6257500 }, { "epoch": 18.11, "learning_rate": 4.0946120090774365e-05, "loss": 2.2209, "step": 6258000 }, { "epoch": 18.12, "learning_rate": 4.094539644312709e-05, "loss": 2.1848, "step": 6258500 }, { "epoch": 18.12, "learning_rate": 4.094467279547981e-05, "loss": 2.2127, "step": 6259000 }, { "epoch": 18.12, "learning_rate": 4.0943950595127825e-05, "loss": 2.1965, "step": 6259500 }, { "epoch": 18.12, "learning_rate": 4.0943226947480554e-05, "loss": 2.1881, "step": 6260000 }, { "epoch": 18.12, "learning_rate": 4.094250474712857e-05, "loss": 2.1915, "step": 6260500 }, { "epoch": 18.12, "learning_rate": 4.094178109948129e-05, "loss": 2.2042, "step": 6261000 }, { "epoch": 18.12, "learning_rate": 4.0941057451834014e-05, "loss": 2.2127, "step": 6261500 }, { "epoch": 18.13, "learning_rate": 4.0940333804186736e-05, "loss": 2.195, "step": 6262000 }, { "epoch": 18.13, "learning_rate": 4.093961015653946e-05, "loss": 2.1942, "step": 6262500 }, { "epoch": 18.13, "learning_rate": 4.093888650889218e-05, "loss": 2.1827, "step": 6263000 }, { "epoch": 18.13, "learning_rate": 4.09381628612449e-05, "loss": 2.1874, "step": 6263500 }, { "epoch": 18.13, "learning_rate": 4.0937439213597625e-05, "loss": 2.215, "step": 6264000 }, { "epoch": 18.13, "learning_rate": 4.0936715565950354e-05, "loss": 2.2031, "step": 6264500 }, { "epoch": 18.13, "learning_rate": 4.0935991918303076e-05, "loss": 2.252, "step": 6265000 }, { "epoch": 18.14, "learning_rate": 4.0935268270655805e-05, "loss": 2.2281, "step": 6265500 }, { "epoch": 18.14, "learning_rate": 4.093454462300853e-05, "loss": 2.1939, "step": 6266000 }, { "epoch": 18.14, "learning_rate": 4.093382097536125e-05, "loss": 2.2146, "step": 6266500 }, { "epoch": 18.14, "learning_rate": 4.0933098775009265e-05, "loss": 2.1937, "step": 6267000 }, { "epoch": 18.14, "learning_rate": 4.093237512736199e-05, "loss": 2.202, "step": 6267500 }, { "epoch": 18.14, "learning_rate": 4.093165147971471e-05, "loss": 2.1918, "step": 6268000 }, { "epoch": 18.14, "learning_rate": 4.093092783206743e-05, "loss": 2.207, "step": 6268500 }, { "epoch": 18.15, "learning_rate": 4.0930205631715455e-05, "loss": 2.2055, "step": 6269000 }, { "epoch": 18.15, "learning_rate": 4.092948343136347e-05, "loss": 2.2198, "step": 6269500 }, { "epoch": 18.15, "learning_rate": 4.0928761231011486e-05, "loss": 2.22, "step": 6270000 }, { "epoch": 18.15, "learning_rate": 4.092803758336421e-05, "loss": 2.2112, "step": 6270500 }, { "epoch": 18.15, "learning_rate": 4.092731393571693e-05, "loss": 2.1897, "step": 6271000 }, { "epoch": 18.15, "learning_rate": 4.092659028806965e-05, "loss": 2.2115, "step": 6271500 }, { "epoch": 18.15, "learning_rate": 4.092586664042238e-05, "loss": 2.2236, "step": 6272000 }, { "epoch": 18.16, "learning_rate": 4.0925142992775104e-05, "loss": 2.2052, "step": 6272500 }, { "epoch": 18.16, "learning_rate": 4.0924420792423126e-05, "loss": 2.2038, "step": 6273000 }, { "epoch": 18.16, "learning_rate": 4.092369714477585e-05, "loss": 2.2078, "step": 6273500 }, { "epoch": 18.16, "learning_rate": 4.092297349712857e-05, "loss": 2.2206, "step": 6274000 }, { "epoch": 18.16, "learning_rate": 4.0922251296776586e-05, "loss": 2.1661, "step": 6274500 }, { "epoch": 18.16, "learning_rate": 4.092152764912931e-05, "loss": 2.2155, "step": 6275000 }, { "epoch": 18.17, "learning_rate": 4.092080400148203e-05, "loss": 2.2066, "step": 6275500 }, { "epoch": 18.17, "learning_rate": 4.092008035383475e-05, "loss": 2.2096, "step": 6276000 }, { "epoch": 18.17, "learning_rate": 4.091935670618748e-05, "loss": 2.2022, "step": 6276500 }, { "epoch": 18.17, "learning_rate": 4.0918633058540204e-05, "loss": 2.195, "step": 6277000 }, { "epoch": 18.17, "learning_rate": 4.091791085818822e-05, "loss": 2.2153, "step": 6277500 }, { "epoch": 18.17, "learning_rate": 4.091718721054094e-05, "loss": 2.1798, "step": 6278000 }, { "epoch": 18.17, "learning_rate": 4.0916463562893664e-05, "loss": 2.1871, "step": 6278500 }, { "epoch": 18.18, "learning_rate": 4.091574136254168e-05, "loss": 2.1961, "step": 6279000 }, { "epoch": 18.18, "learning_rate": 4.09150177148944e-05, "loss": 2.2195, "step": 6279500 }, { "epoch": 18.18, "learning_rate": 4.091429406724713e-05, "loss": 2.2057, "step": 6280000 }, { "epoch": 18.18, "learning_rate": 4.091357041959985e-05, "loss": 2.1976, "step": 6280500 }, { "epoch": 18.18, "learning_rate": 4.091284677195258e-05, "loss": 2.212, "step": 6281000 }, { "epoch": 18.18, "learning_rate": 4.0912123124305304e-05, "loss": 2.2237, "step": 6281500 }, { "epoch": 18.18, "learning_rate": 4.0911399476658026e-05, "loss": 2.2127, "step": 6282000 }, { "epoch": 18.19, "learning_rate": 4.091067582901075e-05, "loss": 2.1955, "step": 6282500 }, { "epoch": 18.19, "learning_rate": 4.090995218136347e-05, "loss": 2.1986, "step": 6283000 }, { "epoch": 18.19, "learning_rate": 4.090922853371619e-05, "loss": 2.2097, "step": 6283500 }, { "epoch": 18.19, "learning_rate": 4.0908504886068915e-05, "loss": 2.213, "step": 6284000 }, { "epoch": 18.19, "learning_rate": 4.090778123842164e-05, "loss": 2.1966, "step": 6284500 }, { "epoch": 18.19, "learning_rate": 4.090705759077436e-05, "loss": 2.1968, "step": 6285000 }, { "epoch": 18.19, "learning_rate": 4.090633394312708e-05, "loss": 2.2055, "step": 6285500 }, { "epoch": 18.2, "learning_rate": 4.0905611742775104e-05, "loss": 2.2165, "step": 6286000 }, { "epoch": 18.2, "learning_rate": 4.0904888095127827e-05, "loss": 2.1856, "step": 6286500 }, { "epoch": 18.2, "learning_rate": 4.0904164447480556e-05, "loss": 2.2236, "step": 6287000 }, { "epoch": 18.2, "learning_rate": 4.090344079983328e-05, "loss": 2.203, "step": 6287500 }, { "epoch": 18.2, "learning_rate": 4.0902717152186e-05, "loss": 2.2089, "step": 6288000 }, { "epoch": 18.2, "learning_rate": 4.090199350453872e-05, "loss": 2.1855, "step": 6288500 }, { "epoch": 18.2, "learning_rate": 4.0901269856891445e-05, "loss": 2.1925, "step": 6289000 }, { "epoch": 18.21, "learning_rate": 4.090054620924417e-05, "loss": 2.187, "step": 6289500 }, { "epoch": 18.21, "learning_rate": 4.089982256159689e-05, "loss": 2.1972, "step": 6290000 }, { "epoch": 18.21, "learning_rate": 4.0899100361244905e-05, "loss": 2.2174, "step": 6290500 }, { "epoch": 18.21, "learning_rate": 4.0898376713597634e-05, "loss": 2.2285, "step": 6291000 }, { "epoch": 18.21, "learning_rate": 4.0897653065950356e-05, "loss": 2.2031, "step": 6291500 }, { "epoch": 18.21, "learning_rate": 4.089692941830308e-05, "loss": 2.1896, "step": 6292000 }, { "epoch": 18.21, "learning_rate": 4.08962057706558e-05, "loss": 2.1824, "step": 6292500 }, { "epoch": 18.22, "learning_rate": 4.089548212300852e-05, "loss": 2.214, "step": 6293000 }, { "epoch": 18.22, "learning_rate": 4.089475992265654e-05, "loss": 2.1879, "step": 6293500 }, { "epoch": 18.22, "learning_rate": 4.089403627500926e-05, "loss": 2.2092, "step": 6294000 }, { "epoch": 18.22, "learning_rate": 4.089331262736199e-05, "loss": 2.2006, "step": 6294500 }, { "epoch": 18.22, "learning_rate": 4.089258897971471e-05, "loss": 2.1957, "step": 6295000 }, { "epoch": 18.22, "learning_rate": 4.0891865332067434e-05, "loss": 2.2209, "step": 6295500 }, { "epoch": 18.22, "learning_rate": 4.0891141684420156e-05, "loss": 2.2028, "step": 6296000 }, { "epoch": 18.23, "learning_rate": 4.0890418036772885e-05, "loss": 2.1951, "step": 6296500 }, { "epoch": 18.23, "learning_rate": 4.088969438912561e-05, "loss": 2.2108, "step": 6297000 }, { "epoch": 18.23, "learning_rate": 4.088897074147833e-05, "loss": 2.238, "step": 6297500 }, { "epoch": 18.23, "learning_rate": 4.088824709383105e-05, "loss": 2.1793, "step": 6298000 }, { "epoch": 18.23, "learning_rate": 4.0887523446183774e-05, "loss": 2.2006, "step": 6298500 }, { "epoch": 18.23, "learning_rate": 4.0886799798536496e-05, "loss": 2.2033, "step": 6299000 }, { "epoch": 18.23, "learning_rate": 4.088607759818451e-05, "loss": 2.2051, "step": 6299500 }, { "epoch": 18.24, "learning_rate": 4.0885353950537234e-05, "loss": 2.2029, "step": 6300000 }, { "epoch": 18.24, "learning_rate": 4.0884630302889956e-05, "loss": 2.218, "step": 6300500 }, { "epoch": 18.24, "learning_rate": 4.0883906655242685e-05, "loss": 2.1918, "step": 6301000 }, { "epoch": 18.24, "learning_rate": 4.088318300759541e-05, "loss": 2.1989, "step": 6301500 }, { "epoch": 18.24, "learning_rate": 4.0882459359948136e-05, "loss": 2.1754, "step": 6302000 }, { "epoch": 18.24, "learning_rate": 4.088173571230086e-05, "loss": 2.1951, "step": 6302500 }, { "epoch": 18.24, "learning_rate": 4.088101206465358e-05, "loss": 2.2137, "step": 6303000 }, { "epoch": 18.25, "learning_rate": 4.08802884170063e-05, "loss": 2.2152, "step": 6303500 }, { "epoch": 18.25, "learning_rate": 4.0879564769359025e-05, "loss": 2.2405, "step": 6304000 }, { "epoch": 18.25, "learning_rate": 4.087884112171175e-05, "loss": 2.2363, "step": 6304500 }, { "epoch": 18.25, "learning_rate": 4.087811747406447e-05, "loss": 2.2048, "step": 6305000 }, { "epoch": 18.25, "learning_rate": 4.087739382641719e-05, "loss": 2.2139, "step": 6305500 }, { "epoch": 18.25, "learning_rate": 4.087667162606521e-05, "loss": 2.203, "step": 6306000 }, { "epoch": 18.25, "learning_rate": 4.087594797841794e-05, "loss": 2.2341, "step": 6306500 }, { "epoch": 18.26, "learning_rate": 4.087522433077066e-05, "loss": 2.2078, "step": 6307000 }, { "epoch": 18.26, "learning_rate": 4.0874502130418674e-05, "loss": 2.2169, "step": 6307500 }, { "epoch": 18.26, "learning_rate": 4.08737784827714e-05, "loss": 2.1963, "step": 6308000 }, { "epoch": 18.26, "learning_rate": 4.087305483512412e-05, "loss": 2.2266, "step": 6308500 }, { "epoch": 18.26, "learning_rate": 4.087233118747685e-05, "loss": 2.2118, "step": 6309000 }, { "epoch": 18.26, "learning_rate": 4.087160753982957e-05, "loss": 2.2246, "step": 6309500 }, { "epoch": 18.26, "learning_rate": 4.087088389218229e-05, "loss": 2.168, "step": 6310000 }, { "epoch": 18.27, "learning_rate": 4.087016169183031e-05, "loss": 2.2174, "step": 6310500 }, { "epoch": 18.27, "learning_rate": 4.086943949147833e-05, "loss": 2.1978, "step": 6311000 }, { "epoch": 18.27, "learning_rate": 4.086871584383105e-05, "loss": 2.2121, "step": 6311500 }, { "epoch": 18.27, "learning_rate": 4.0867992196183775e-05, "loss": 2.2253, "step": 6312000 }, { "epoch": 18.27, "learning_rate": 4.086726999583179e-05, "loss": 2.2224, "step": 6312500 }, { "epoch": 18.27, "learning_rate": 4.086654634818451e-05, "loss": 2.1889, "step": 6313000 }, { "epoch": 18.27, "learning_rate": 4.0865822700537235e-05, "loss": 2.1993, "step": 6313500 }, { "epoch": 18.28, "learning_rate": 4.086509905288996e-05, "loss": 2.2033, "step": 6314000 }, { "epoch": 18.28, "learning_rate": 4.0864375405242686e-05, "loss": 2.213, "step": 6314500 }, { "epoch": 18.28, "learning_rate": 4.086365175759541e-05, "loss": 2.1991, "step": 6315000 }, { "epoch": 18.28, "learning_rate": 4.086292810994813e-05, "loss": 2.223, "step": 6315500 }, { "epoch": 18.28, "learning_rate": 4.086220446230085e-05, "loss": 2.2149, "step": 6316000 }, { "epoch": 18.28, "learning_rate": 4.086148081465358e-05, "loss": 2.1947, "step": 6316500 }, { "epoch": 18.29, "learning_rate": 4.0860757167006304e-05, "loss": 2.207, "step": 6317000 }, { "epoch": 18.29, "learning_rate": 4.0860033519359026e-05, "loss": 2.198, "step": 6317500 }, { "epoch": 18.29, "learning_rate": 4.0859312766302335e-05, "loss": 2.194, "step": 6318000 }, { "epoch": 18.29, "learning_rate": 4.085858911865506e-05, "loss": 2.2228, "step": 6318500 }, { "epoch": 18.29, "learning_rate": 4.0857865471007786e-05, "loss": 2.2279, "step": 6319000 }, { "epoch": 18.29, "learning_rate": 4.08571432706558e-05, "loss": 2.2013, "step": 6319500 }, { "epoch": 18.29, "learning_rate": 4.0856419623008524e-05, "loss": 2.1864, "step": 6320000 }, { "epoch": 18.3, "learning_rate": 4.0855695975361246e-05, "loss": 2.1955, "step": 6320500 }, { "epoch": 18.3, "learning_rate": 4.085497232771397e-05, "loss": 2.2256, "step": 6321000 }, { "epoch": 18.3, "learning_rate": 4.085424868006669e-05, "loss": 2.2078, "step": 6321500 }, { "epoch": 18.3, "learning_rate": 4.085352503241941e-05, "loss": 2.2152, "step": 6322000 }, { "epoch": 18.3, "learning_rate": 4.0852801384772135e-05, "loss": 2.2024, "step": 6322500 }, { "epoch": 18.3, "learning_rate": 4.085207773712486e-05, "loss": 2.1948, "step": 6323000 }, { "epoch": 18.3, "learning_rate": 4.0851354089477587e-05, "loss": 2.2103, "step": 6323500 }, { "epoch": 18.31, "learning_rate": 4.085063044183031e-05, "loss": 2.2078, "step": 6324000 }, { "epoch": 18.31, "learning_rate": 4.084990679418304e-05, "loss": 2.1948, "step": 6324500 }, { "epoch": 18.31, "learning_rate": 4.084918314653576e-05, "loss": 2.2235, "step": 6325000 }, { "epoch": 18.31, "learning_rate": 4.084845949888848e-05, "loss": 2.1937, "step": 6325500 }, { "epoch": 18.31, "learning_rate": 4.0847735851241204e-05, "loss": 2.1923, "step": 6326000 }, { "epoch": 18.31, "learning_rate": 4.084701220359393e-05, "loss": 2.1962, "step": 6326500 }, { "epoch": 18.31, "learning_rate": 4.084628855594665e-05, "loss": 2.1993, "step": 6327000 }, { "epoch": 18.32, "learning_rate": 4.084556490829937e-05, "loss": 2.2362, "step": 6327500 }, { "epoch": 18.32, "learning_rate": 4.0844841260652093e-05, "loss": 2.2172, "step": 6328000 }, { "epoch": 18.32, "learning_rate": 4.0844117613004816e-05, "loss": 2.2263, "step": 6328500 }, { "epoch": 18.32, "learning_rate": 4.084339685994813e-05, "loss": 2.2187, "step": 6329000 }, { "epoch": 18.32, "learning_rate": 4.0842673212300854e-05, "loss": 2.2171, "step": 6329500 }, { "epoch": 18.32, "learning_rate": 4.084195101194887e-05, "loss": 2.2261, "step": 6330000 }, { "epoch": 18.32, "learning_rate": 4.084122736430159e-05, "loss": 2.2247, "step": 6330500 }, { "epoch": 18.33, "learning_rate": 4.0840503716654314e-05, "loss": 2.2004, "step": 6331000 }, { "epoch": 18.33, "learning_rate": 4.083978006900704e-05, "loss": 2.218, "step": 6331500 }, { "epoch": 18.33, "learning_rate": 4.0839056421359765e-05, "loss": 2.2125, "step": 6332000 }, { "epoch": 18.33, "learning_rate": 4.083833422100779e-05, "loss": 2.2138, "step": 6332500 }, { "epoch": 18.33, "learning_rate": 4.083761057336051e-05, "loss": 2.2387, "step": 6333000 }, { "epoch": 18.33, "learning_rate": 4.083688692571323e-05, "loss": 2.2185, "step": 6333500 }, { "epoch": 18.33, "learning_rate": 4.0836163278065954e-05, "loss": 2.2142, "step": 6334000 }, { "epoch": 18.34, "learning_rate": 4.0835439630418676e-05, "loss": 2.2031, "step": 6334500 }, { "epoch": 18.34, "learning_rate": 4.08347159827714e-05, "loss": 2.2188, "step": 6335000 }, { "epoch": 18.34, "learning_rate": 4.083399233512412e-05, "loss": 2.2132, "step": 6335500 }, { "epoch": 18.34, "learning_rate": 4.083326868747684e-05, "loss": 2.2129, "step": 6336000 }, { "epoch": 18.34, "learning_rate": 4.0832545039829565e-05, "loss": 2.179, "step": 6336500 }, { "epoch": 18.34, "learning_rate": 4.083182139218229e-05, "loss": 2.2442, "step": 6337000 }, { "epoch": 18.34, "learning_rate": 4.0831097744535016e-05, "loss": 2.2008, "step": 6337500 }, { "epoch": 18.35, "learning_rate": 4.083037554418303e-05, "loss": 2.2204, "step": 6338000 }, { "epoch": 18.35, "learning_rate": 4.082965334383105e-05, "loss": 2.22, "step": 6338500 }, { "epoch": 18.35, "learning_rate": 4.0828929696183776e-05, "loss": 2.209, "step": 6339000 }, { "epoch": 18.35, "learning_rate": 4.08282060485365e-05, "loss": 2.2159, "step": 6339500 }, { "epoch": 18.35, "learning_rate": 4.082748240088922e-05, "loss": 2.2165, "step": 6340000 }, { "epoch": 18.35, "learning_rate": 4.082675875324194e-05, "loss": 2.226, "step": 6340500 }, { "epoch": 18.35, "learning_rate": 4.0826035105594665e-05, "loss": 2.2199, "step": 6341000 }, { "epoch": 18.36, "learning_rate": 4.082531145794739e-05, "loss": 2.2158, "step": 6341500 }, { "epoch": 18.36, "learning_rate": 4.0824587810300117e-05, "loss": 2.2275, "step": 6342000 }, { "epoch": 18.36, "learning_rate": 4.082386416265284e-05, "loss": 2.2004, "step": 6342500 }, { "epoch": 18.36, "learning_rate": 4.082314051500556e-05, "loss": 2.2055, "step": 6343000 }, { "epoch": 18.36, "learning_rate": 4.082241686735828e-05, "loss": 2.2131, "step": 6343500 }, { "epoch": 18.36, "learning_rate": 4.08216946670063e-05, "loss": 2.1978, "step": 6344000 }, { "epoch": 18.36, "learning_rate": 4.082097101935902e-05, "loss": 2.2166, "step": 6344500 }, { "epoch": 18.37, "learning_rate": 4.082024737171174e-05, "loss": 2.1994, "step": 6345000 }, { "epoch": 18.37, "learning_rate": 4.0819523724064465e-05, "loss": 2.2165, "step": 6345500 }, { "epoch": 18.37, "learning_rate": 4.081880152371249e-05, "loss": 2.2125, "step": 6346000 }, { "epoch": 18.37, "learning_rate": 4.081807787606522e-05, "loss": 2.2272, "step": 6346500 }, { "epoch": 18.37, "learning_rate": 4.081735422841794e-05, "loss": 2.2166, "step": 6347000 }, { "epoch": 18.37, "learning_rate": 4.081663058077066e-05, "loss": 2.2138, "step": 6347500 }, { "epoch": 18.37, "learning_rate": 4.081590838041868e-05, "loss": 2.2211, "step": 6348000 }, { "epoch": 18.38, "learning_rate": 4.08151847327714e-05, "loss": 2.2084, "step": 6348500 }, { "epoch": 18.38, "learning_rate": 4.081446108512412e-05, "loss": 2.2263, "step": 6349000 }, { "epoch": 18.38, "learning_rate": 4.0813737437476844e-05, "loss": 2.2108, "step": 6349500 }, { "epoch": 18.38, "learning_rate": 4.0813013789829566e-05, "loss": 2.2173, "step": 6350000 }, { "epoch": 18.38, "learning_rate": 4.081229014218229e-05, "loss": 2.1767, "step": 6350500 }, { "epoch": 18.38, "learning_rate": 4.081156649453502e-05, "loss": 2.2156, "step": 6351000 }, { "epoch": 18.38, "learning_rate": 4.081084284688774e-05, "loss": 2.2319, "step": 6351500 }, { "epoch": 18.39, "learning_rate": 4.081011919924046e-05, "loss": 2.198, "step": 6352000 }, { "epoch": 18.39, "learning_rate": 4.0809395551593184e-05, "loss": 2.2356, "step": 6352500 }, { "epoch": 18.39, "learning_rate": 4.08086733512412e-05, "loss": 2.2196, "step": 6353000 }, { "epoch": 18.39, "learning_rate": 4.080794970359392e-05, "loss": 2.2042, "step": 6353500 }, { "epoch": 18.39, "learning_rate": 4.080722605594665e-05, "loss": 2.1989, "step": 6354000 }, { "epoch": 18.39, "learning_rate": 4.080650240829937e-05, "loss": 2.2192, "step": 6354500 }, { "epoch": 18.4, "learning_rate": 4.0805778760652095e-05, "loss": 2.2049, "step": 6355000 }, { "epoch": 18.4, "learning_rate": 4.080505511300482e-05, "loss": 2.2, "step": 6355500 }, { "epoch": 18.4, "learning_rate": 4.080433146535754e-05, "loss": 2.1992, "step": 6356000 }, { "epoch": 18.4, "learning_rate": 4.080360781771027e-05, "loss": 2.213, "step": 6356500 }, { "epoch": 18.4, "learning_rate": 4.080288417006299e-05, "loss": 2.1852, "step": 6357000 }, { "epoch": 18.4, "learning_rate": 4.0802161969711006e-05, "loss": 2.2163, "step": 6357500 }, { "epoch": 18.4, "learning_rate": 4.080143832206373e-05, "loss": 2.1719, "step": 6358000 }, { "epoch": 18.41, "learning_rate": 4.080071467441645e-05, "loss": 2.2057, "step": 6358500 }, { "epoch": 18.41, "learning_rate": 4.079999102676917e-05, "loss": 2.194, "step": 6359000 }, { "epoch": 18.41, "learning_rate": 4.0799267379121895e-05, "loss": 2.1995, "step": 6359500 }, { "epoch": 18.41, "learning_rate": 4.079854373147462e-05, "loss": 2.1933, "step": 6360000 }, { "epoch": 18.41, "learning_rate": 4.079782008382734e-05, "loss": 2.1939, "step": 6360500 }, { "epoch": 18.41, "learning_rate": 4.079709643618007e-05, "loss": 2.1999, "step": 6361000 }, { "epoch": 18.41, "learning_rate": 4.079637423582809e-05, "loss": 2.2173, "step": 6361500 }, { "epoch": 18.42, "learning_rate": 4.079565058818081e-05, "loss": 2.2237, "step": 6362000 }, { "epoch": 18.42, "learning_rate": 4.0794926940533535e-05, "loss": 2.2159, "step": 6362500 }, { "epoch": 18.42, "learning_rate": 4.079420329288626e-05, "loss": 2.2205, "step": 6363000 }, { "epoch": 18.42, "learning_rate": 4.079347964523898e-05, "loss": 2.2027, "step": 6363500 }, { "epoch": 18.42, "learning_rate": 4.0792758892182296e-05, "loss": 2.2153, "step": 6364000 }, { "epoch": 18.42, "learning_rate": 4.079203524453502e-05, "loss": 2.204, "step": 6364500 }, { "epoch": 18.42, "learning_rate": 4.079131159688774e-05, "loss": 2.2055, "step": 6365000 }, { "epoch": 18.43, "learning_rate": 4.079058794924046e-05, "loss": 2.1951, "step": 6365500 }, { "epoch": 18.43, "learning_rate": 4.0789864301593185e-05, "loss": 2.2341, "step": 6366000 }, { "epoch": 18.43, "learning_rate": 4.078914065394591e-05, "loss": 2.2103, "step": 6366500 }, { "epoch": 18.43, "learning_rate": 4.078841700629863e-05, "loss": 2.2011, "step": 6367000 }, { "epoch": 18.43, "learning_rate": 4.078769335865135e-05, "loss": 2.22, "step": 6367500 }, { "epoch": 18.43, "learning_rate": 4.0786969711004073e-05, "loss": 2.2226, "step": 6368000 }, { "epoch": 18.43, "learning_rate": 4.07862460633568e-05, "loss": 2.192, "step": 6368500 }, { "epoch": 18.44, "learning_rate": 4.078552386300482e-05, "loss": 2.189, "step": 6369000 }, { "epoch": 18.44, "learning_rate": 4.078480021535755e-05, "loss": 2.217, "step": 6369500 }, { "epoch": 18.44, "learning_rate": 4.078407656771027e-05, "loss": 2.1926, "step": 6370000 }, { "epoch": 18.44, "learning_rate": 4.078335292006299e-05, "loss": 2.2171, "step": 6370500 }, { "epoch": 18.44, "learning_rate": 4.0782629272415714e-05, "loss": 2.2168, "step": 6371000 }, { "epoch": 18.44, "learning_rate": 4.078190707206373e-05, "loss": 2.2005, "step": 6371500 }, { "epoch": 18.44, "learning_rate": 4.078118342441645e-05, "loss": 2.2163, "step": 6372000 }, { "epoch": 18.45, "learning_rate": 4.0780459776769174e-05, "loss": 2.2096, "step": 6372500 }, { "epoch": 18.45, "learning_rate": 4.0779736129121896e-05, "loss": 2.1958, "step": 6373000 }, { "epoch": 18.45, "learning_rate": 4.077901248147462e-05, "loss": 2.1901, "step": 6373500 }, { "epoch": 18.45, "learning_rate": 4.077828883382735e-05, "loss": 2.1985, "step": 6374000 }, { "epoch": 18.45, "learning_rate": 4.077756518618007e-05, "loss": 2.2001, "step": 6374500 }, { "epoch": 18.45, "learning_rate": 4.077684153853279e-05, "loss": 2.2285, "step": 6375000 }, { "epoch": 18.45, "learning_rate": 4.0776117890885514e-05, "loss": 2.1884, "step": 6375500 }, { "epoch": 18.46, "learning_rate": 4.077539424323824e-05, "loss": 2.2339, "step": 6376000 }, { "epoch": 18.46, "learning_rate": 4.077467204288626e-05, "loss": 2.223, "step": 6376500 }, { "epoch": 18.46, "learning_rate": 4.077394839523898e-05, "loss": 2.2426, "step": 6377000 }, { "epoch": 18.46, "learning_rate": 4.0773226194886996e-05, "loss": 2.2088, "step": 6377500 }, { "epoch": 18.46, "learning_rate": 4.077250254723972e-05, "loss": 2.1883, "step": 6378000 }, { "epoch": 18.46, "learning_rate": 4.077177889959245e-05, "loss": 2.192, "step": 6378500 }, { "epoch": 18.46, "learning_rate": 4.077105525194517e-05, "loss": 2.2184, "step": 6379000 }, { "epoch": 18.47, "learning_rate": 4.077033160429789e-05, "loss": 2.2105, "step": 6379500 }, { "epoch": 18.47, "learning_rate": 4.0769607956650614e-05, "loss": 2.2115, "step": 6380000 }, { "epoch": 18.47, "learning_rate": 4.0768884309003336e-05, "loss": 2.2089, "step": 6380500 }, { "epoch": 18.47, "learning_rate": 4.076816066135606e-05, "loss": 2.2202, "step": 6381000 }, { "epoch": 18.47, "learning_rate": 4.076743701370878e-05, "loss": 2.2147, "step": 6381500 }, { "epoch": 18.47, "learning_rate": 4.0766716260652097e-05, "loss": 2.2254, "step": 6382000 }, { "epoch": 18.47, "learning_rate": 4.076599406030011e-05, "loss": 2.2161, "step": 6382500 }, { "epoch": 18.48, "learning_rate": 4.0765270412652834e-05, "loss": 2.1944, "step": 6383000 }, { "epoch": 18.48, "learning_rate": 4.0764546765005557e-05, "loss": 2.2124, "step": 6383500 }, { "epoch": 18.48, "learning_rate": 4.0763823117358286e-05, "loss": 2.2334, "step": 6384000 }, { "epoch": 18.48, "learning_rate": 4.076309946971101e-05, "loss": 2.1812, "step": 6384500 }, { "epoch": 18.48, "learning_rate": 4.076237582206373e-05, "loss": 2.1969, "step": 6385000 }, { "epoch": 18.48, "learning_rate": 4.076165217441645e-05, "loss": 2.2115, "step": 6385500 }, { "epoch": 18.48, "learning_rate": 4.076092997406447e-05, "loss": 2.2127, "step": 6386000 }, { "epoch": 18.49, "learning_rate": 4.07602063264172e-05, "loss": 2.2198, "step": 6386500 }, { "epoch": 18.49, "learning_rate": 4.075948267876992e-05, "loss": 2.2301, "step": 6387000 }, { "epoch": 18.49, "learning_rate": 4.075875903112264e-05, "loss": 2.2226, "step": 6387500 }, { "epoch": 18.49, "learning_rate": 4.0758035383475364e-05, "loss": 2.2228, "step": 6388000 }, { "epoch": 18.49, "learning_rate": 4.0757311735828086e-05, "loss": 2.2135, "step": 6388500 }, { "epoch": 18.49, "learning_rate": 4.075658808818081e-05, "loss": 2.1943, "step": 6389000 }, { "epoch": 18.49, "learning_rate": 4.075586444053353e-05, "loss": 2.1971, "step": 6389500 }, { "epoch": 18.5, "learning_rate": 4.075514079288625e-05, "loss": 2.2041, "step": 6390000 }, { "epoch": 18.5, "learning_rate": 4.0754417145238975e-05, "loss": 2.1765, "step": 6390500 }, { "epoch": 18.5, "learning_rate": 4.0753693497591704e-05, "loss": 2.2086, "step": 6391000 }, { "epoch": 18.5, "learning_rate": 4.0752969849944426e-05, "loss": 2.2056, "step": 6391500 }, { "epoch": 18.5, "learning_rate": 4.075224764959245e-05, "loss": 2.22, "step": 6392000 }, { "epoch": 18.5, "learning_rate": 4.075152400194517e-05, "loss": 2.2204, "step": 6392500 }, { "epoch": 18.51, "learning_rate": 4.075080035429789e-05, "loss": 2.2235, "step": 6393000 }, { "epoch": 18.51, "learning_rate": 4.0750076706650615e-05, "loss": 2.2091, "step": 6393500 }, { "epoch": 18.51, "learning_rate": 4.074935305900334e-05, "loss": 2.2001, "step": 6394000 }, { "epoch": 18.51, "learning_rate": 4.0748632305946646e-05, "loss": 2.2021, "step": 6394500 }, { "epoch": 18.51, "learning_rate": 4.0747908658299375e-05, "loss": 2.2117, "step": 6395000 }, { "epoch": 18.51, "learning_rate": 4.07471850106521e-05, "loss": 2.2492, "step": 6395500 }, { "epoch": 18.51, "learning_rate": 4.074646136300482e-05, "loss": 2.2102, "step": 6396000 }, { "epoch": 18.52, "learning_rate": 4.074573771535754e-05, "loss": 2.2228, "step": 6396500 }, { "epoch": 18.52, "learning_rate": 4.0745014067710264e-05, "loss": 2.2011, "step": 6397000 }, { "epoch": 18.52, "learning_rate": 4.0744290420062986e-05, "loss": 2.2178, "step": 6397500 }, { "epoch": 18.52, "learning_rate": 4.0743568219711e-05, "loss": 2.2099, "step": 6398000 }, { "epoch": 18.52, "learning_rate": 4.074284457206373e-05, "loss": 2.2277, "step": 6398500 }, { "epoch": 18.52, "learning_rate": 4.074212092441645e-05, "loss": 2.1893, "step": 6399000 }, { "epoch": 18.52, "learning_rate": 4.0741397276769175e-05, "loss": 2.2072, "step": 6399500 }, { "epoch": 18.53, "learning_rate": 4.07406736291219e-05, "loss": 2.1991, "step": 6400000 }, { "epoch": 18.53, "learning_rate": 4.0739949981474627e-05, "loss": 2.2167, "step": 6400500 }, { "epoch": 18.53, "learning_rate": 4.073922633382735e-05, "loss": 2.2123, "step": 6401000 }, { "epoch": 18.53, "learning_rate": 4.073850268618007e-05, "loss": 2.2247, "step": 6401500 }, { "epoch": 18.53, "learning_rate": 4.073777903853279e-05, "loss": 2.2172, "step": 6402000 }, { "epoch": 18.53, "learning_rate": 4.0737055390885516e-05, "loss": 2.2101, "step": 6402500 }, { "epoch": 18.53, "learning_rate": 4.073633319053353e-05, "loss": 2.2082, "step": 6403000 }, { "epoch": 18.54, "learning_rate": 4.073560954288625e-05, "loss": 2.1909, "step": 6403500 }, { "epoch": 18.54, "learning_rate": 4.0734885895238976e-05, "loss": 2.1951, "step": 6404000 }, { "epoch": 18.54, "learning_rate": 4.07341622475917e-05, "loss": 2.2012, "step": 6404500 }, { "epoch": 18.54, "learning_rate": 4.073344004723972e-05, "loss": 2.2279, "step": 6405000 }, { "epoch": 18.54, "learning_rate": 4.073271639959244e-05, "loss": 2.1974, "step": 6405500 }, { "epoch": 18.54, "learning_rate": 4.0731994199240465e-05, "loss": 2.2066, "step": 6406000 }, { "epoch": 18.54, "learning_rate": 4.073127055159319e-05, "loss": 2.2027, "step": 6406500 }, { "epoch": 18.55, "learning_rate": 4.073054690394591e-05, "loss": 2.1847, "step": 6407000 }, { "epoch": 18.55, "learning_rate": 4.072982325629863e-05, "loss": 2.2049, "step": 6407500 }, { "epoch": 18.55, "learning_rate": 4.0729099608651354e-05, "loss": 2.22, "step": 6408000 }, { "epoch": 18.55, "learning_rate": 4.0728375961004076e-05, "loss": 2.1872, "step": 6408500 }, { "epoch": 18.55, "learning_rate": 4.07276523133568e-05, "loss": 2.1968, "step": 6409000 }, { "epoch": 18.55, "learning_rate": 4.072692866570953e-05, "loss": 2.2271, "step": 6409500 }, { "epoch": 18.55, "learning_rate": 4.072620501806225e-05, "loss": 2.2086, "step": 6410000 }, { "epoch": 18.56, "learning_rate": 4.072548137041497e-05, "loss": 2.2094, "step": 6410500 }, { "epoch": 18.56, "learning_rate": 4.0724757722767694e-05, "loss": 2.1971, "step": 6411000 }, { "epoch": 18.56, "learning_rate": 4.0724034075120416e-05, "loss": 2.218, "step": 6411500 }, { "epoch": 18.56, "learning_rate": 4.072331042747314e-05, "loss": 2.2173, "step": 6412000 }, { "epoch": 18.56, "learning_rate": 4.072258967441645e-05, "loss": 2.1927, "step": 6412500 }, { "epoch": 18.56, "learning_rate": 4.0721866026769176e-05, "loss": 2.2, "step": 6413000 }, { "epoch": 18.56, "learning_rate": 4.07211423791219e-05, "loss": 2.2176, "step": 6413500 }, { "epoch": 18.57, "learning_rate": 4.072041873147463e-05, "loss": 2.205, "step": 6414000 }, { "epoch": 18.57, "learning_rate": 4.071969508382735e-05, "loss": 2.1966, "step": 6414500 }, { "epoch": 18.57, "learning_rate": 4.071897143618007e-05, "loss": 2.1922, "step": 6415000 }, { "epoch": 18.57, "learning_rate": 4.0718247788532794e-05, "loss": 2.2279, "step": 6415500 }, { "epoch": 18.57, "learning_rate": 4.0717524140885516e-05, "loss": 2.2025, "step": 6416000 }, { "epoch": 18.57, "learning_rate": 4.071680194053353e-05, "loss": 2.1905, "step": 6416500 }, { "epoch": 18.57, "learning_rate": 4.071607974018155e-05, "loss": 2.1931, "step": 6417000 }, { "epoch": 18.58, "learning_rate": 4.0715356092534276e-05, "loss": 2.2224, "step": 6417500 }, { "epoch": 18.58, "learning_rate": 4.071463389218229e-05, "loss": 2.2145, "step": 6418000 }, { "epoch": 18.58, "learning_rate": 4.0713910244535014e-05, "loss": 2.1937, "step": 6418500 }, { "epoch": 18.58, "learning_rate": 4.0713186596887736e-05, "loss": 2.2067, "step": 6419000 }, { "epoch": 18.58, "learning_rate": 4.071246294924046e-05, "loss": 2.1892, "step": 6419500 }, { "epoch": 18.58, "learning_rate": 4.071173930159318e-05, "loss": 2.2333, "step": 6420000 }, { "epoch": 18.58, "learning_rate": 4.07110156539459e-05, "loss": 2.2102, "step": 6420500 }, { "epoch": 18.59, "learning_rate": 4.071029200629863e-05, "loss": 2.2314, "step": 6421000 }, { "epoch": 18.59, "learning_rate": 4.0709568358651354e-05, "loss": 2.1897, "step": 6421500 }, { "epoch": 18.59, "learning_rate": 4.070884471100408e-05, "loss": 2.2129, "step": 6422000 }, { "epoch": 18.59, "learning_rate": 4.07081225106521e-05, "loss": 2.1974, "step": 6422500 }, { "epoch": 18.59, "learning_rate": 4.070739886300482e-05, "loss": 2.2034, "step": 6423000 }, { "epoch": 18.59, "learning_rate": 4.070667666265284e-05, "loss": 2.1997, "step": 6423500 }, { "epoch": 18.59, "learning_rate": 4.070595301500556e-05, "loss": 2.2016, "step": 6424000 }, { "epoch": 18.6, "learning_rate": 4.070522936735828e-05, "loss": 2.2145, "step": 6424500 }, { "epoch": 18.6, "learning_rate": 4.0704505719711003e-05, "loss": 2.2172, "step": 6425000 }, { "epoch": 18.6, "learning_rate": 4.0703782072063726e-05, "loss": 2.2065, "step": 6425500 }, { "epoch": 18.6, "learning_rate": 4.0703058424416455e-05, "loss": 2.242, "step": 6426000 }, { "epoch": 18.6, "learning_rate": 4.070233477676918e-05, "loss": 2.2194, "step": 6426500 }, { "epoch": 18.6, "learning_rate": 4.07016111291219e-05, "loss": 2.2052, "step": 6427000 }, { "epoch": 18.6, "learning_rate": 4.070088748147462e-05, "loss": 2.1961, "step": 6427500 }, { "epoch": 18.61, "learning_rate": 4.070016528112264e-05, "loss": 2.2148, "step": 6428000 }, { "epoch": 18.61, "learning_rate": 4.069944308077065e-05, "loss": 2.1986, "step": 6428500 }, { "epoch": 18.61, "learning_rate": 4.069871943312338e-05, "loss": 2.2028, "step": 6429000 }, { "epoch": 18.61, "learning_rate": 4.0697995785476104e-05, "loss": 2.2003, "step": 6429500 }, { "epoch": 18.61, "learning_rate": 4.0697272137828826e-05, "loss": 2.2164, "step": 6430000 }, { "epoch": 18.61, "learning_rate": 4.0696548490181555e-05, "loss": 2.2049, "step": 6430500 }, { "epoch": 18.62, "learning_rate": 4.069582484253428e-05, "loss": 2.2078, "step": 6431000 }, { "epoch": 18.62, "learning_rate": 4.0695101194887e-05, "loss": 2.1875, "step": 6431500 }, { "epoch": 18.62, "learning_rate": 4.069437754723972e-05, "loss": 2.1991, "step": 6432000 }, { "epoch": 18.62, "learning_rate": 4.0693653899592444e-05, "loss": 2.2108, "step": 6432500 }, { "epoch": 18.62, "learning_rate": 4.0692930251945166e-05, "loss": 2.2104, "step": 6433000 }, { "epoch": 18.62, "learning_rate": 4.069220660429789e-05, "loss": 2.2093, "step": 6433500 }, { "epoch": 18.62, "learning_rate": 4.069148295665061e-05, "loss": 2.2284, "step": 6434000 }, { "epoch": 18.63, "learning_rate": 4.069075930900333e-05, "loss": 2.2032, "step": 6434500 }, { "epoch": 18.63, "learning_rate": 4.0690037108651355e-05, "loss": 2.2033, "step": 6435000 }, { "epoch": 18.63, "learning_rate": 4.068931490829937e-05, "loss": 2.2287, "step": 6435500 }, { "epoch": 18.63, "learning_rate": 4.06885912606521e-05, "loss": 2.2279, "step": 6436000 }, { "epoch": 18.63, "learning_rate": 4.068786761300482e-05, "loss": 2.2097, "step": 6436500 }, { "epoch": 18.63, "learning_rate": 4.068714541265284e-05, "loss": 2.2368, "step": 6437000 }, { "epoch": 18.63, "learning_rate": 4.068642176500556e-05, "loss": 2.2298, "step": 6437500 }, { "epoch": 18.64, "learning_rate": 4.068569811735828e-05, "loss": 2.1628, "step": 6438000 }, { "epoch": 18.64, "learning_rate": 4.0684974469711004e-05, "loss": 2.2073, "step": 6438500 }, { "epoch": 18.64, "learning_rate": 4.0684250822063726e-05, "loss": 2.1907, "step": 6439000 }, { "epoch": 18.64, "learning_rate": 4.0683527174416455e-05, "loss": 2.2381, "step": 6439500 }, { "epoch": 18.64, "learning_rate": 4.068280352676918e-05, "loss": 2.2224, "step": 6440000 }, { "epoch": 18.64, "learning_rate": 4.06820798791219e-05, "loss": 2.2251, "step": 6440500 }, { "epoch": 18.64, "learning_rate": 4.068135623147462e-05, "loss": 2.2169, "step": 6441000 }, { "epoch": 18.65, "learning_rate": 4.0680632583827344e-05, "loss": 2.2073, "step": 6441500 }, { "epoch": 18.65, "learning_rate": 4.067990893618007e-05, "loss": 2.1777, "step": 6442000 }, { "epoch": 18.65, "learning_rate": 4.067918528853279e-05, "loss": 2.2194, "step": 6442500 }, { "epoch": 18.65, "learning_rate": 4.067846164088551e-05, "loss": 2.192, "step": 6443000 }, { "epoch": 18.65, "learning_rate": 4.0677740887828834e-05, "loss": 2.2177, "step": 6443500 }, { "epoch": 18.65, "learning_rate": 4.0677017240181556e-05, "loss": 2.1985, "step": 6444000 }, { "epoch": 18.65, "learning_rate": 4.067629359253428e-05, "loss": 2.214, "step": 6444500 }, { "epoch": 18.66, "learning_rate": 4.0675569944887e-05, "loss": 2.2131, "step": 6445000 }, { "epoch": 18.66, "learning_rate": 4.067484629723972e-05, "loss": 2.2063, "step": 6445500 }, { "epoch": 18.66, "learning_rate": 4.0674122649592445e-05, "loss": 2.198, "step": 6446000 }, { "epoch": 18.66, "learning_rate": 4.067339900194517e-05, "loss": 2.2155, "step": 6446500 }, { "epoch": 18.66, "learning_rate": 4.067267535429789e-05, "loss": 2.2089, "step": 6447000 }, { "epoch": 18.66, "learning_rate": 4.067195170665061e-05, "loss": 2.2217, "step": 6447500 }, { "epoch": 18.66, "learning_rate": 4.0671228059003334e-05, "loss": 2.189, "step": 6448000 }, { "epoch": 18.67, "learning_rate": 4.067050730594665e-05, "loss": 2.2384, "step": 6448500 }, { "epoch": 18.67, "learning_rate": 4.066978365829937e-05, "loss": 2.2298, "step": 6449000 }, { "epoch": 18.67, "learning_rate": 4.0669060010652094e-05, "loss": 2.2055, "step": 6449500 }, { "epoch": 18.67, "learning_rate": 4.066833781030011e-05, "loss": 2.2142, "step": 6450000 }, { "epoch": 18.67, "learning_rate": 4.066761416265283e-05, "loss": 2.2111, "step": 6450500 }, { "epoch": 18.67, "learning_rate": 4.0666890515005554e-05, "loss": 2.2139, "step": 6451000 }, { "epoch": 18.67, "learning_rate": 4.066616686735828e-05, "loss": 2.2324, "step": 6451500 }, { "epoch": 18.68, "learning_rate": 4.0665443219711005e-05, "loss": 2.214, "step": 6452000 }, { "epoch": 18.68, "learning_rate": 4.0664719572063734e-05, "loss": 2.2089, "step": 6452500 }, { "epoch": 18.68, "learning_rate": 4.0663995924416456e-05, "loss": 2.2126, "step": 6453000 }, { "epoch": 18.68, "learning_rate": 4.066327372406447e-05, "loss": 2.202, "step": 6453500 }, { "epoch": 18.68, "learning_rate": 4.0662550076417194e-05, "loss": 2.232, "step": 6454000 }, { "epoch": 18.68, "learning_rate": 4.066182787606521e-05, "loss": 2.2082, "step": 6454500 }, { "epoch": 18.68, "learning_rate": 4.066110422841793e-05, "loss": 2.2551, "step": 6455000 }, { "epoch": 18.69, "learning_rate": 4.0660380580770654e-05, "loss": 2.212, "step": 6455500 }, { "epoch": 18.69, "learning_rate": 4.065965693312338e-05, "loss": 2.2215, "step": 6456000 }, { "epoch": 18.69, "learning_rate": 4.0658933285476105e-05, "loss": 2.212, "step": 6456500 }, { "epoch": 18.69, "learning_rate": 4.065820963782883e-05, "loss": 2.2087, "step": 6457000 }, { "epoch": 18.69, "learning_rate": 4.065748599018155e-05, "loss": 2.1818, "step": 6457500 }, { "epoch": 18.69, "learning_rate": 4.065676234253427e-05, "loss": 2.2234, "step": 6458000 }, { "epoch": 18.69, "learning_rate": 4.0656038694887e-05, "loss": 2.2052, "step": 6458500 }, { "epoch": 18.7, "learning_rate": 4.065531504723972e-05, "loss": 2.1988, "step": 6459000 }, { "epoch": 18.7, "learning_rate": 4.065459284688774e-05, "loss": 2.1825, "step": 6459500 }, { "epoch": 18.7, "learning_rate": 4.065386919924046e-05, "loss": 2.1943, "step": 6460000 }, { "epoch": 18.7, "learning_rate": 4.065314555159318e-05, "loss": 2.1997, "step": 6460500 }, { "epoch": 18.7, "learning_rate": 4.0652421903945906e-05, "loss": 2.2089, "step": 6461000 }, { "epoch": 18.7, "learning_rate": 4.0651698256298635e-05, "loss": 2.2063, "step": 6461500 }, { "epoch": 18.7, "learning_rate": 4.065097460865136e-05, "loss": 2.2328, "step": 6462000 }, { "epoch": 18.71, "learning_rate": 4.065025096100408e-05, "loss": 2.2301, "step": 6462500 }, { "epoch": 18.71, "learning_rate": 4.06495273133568e-05, "loss": 2.2358, "step": 6463000 }, { "epoch": 18.71, "learning_rate": 4.0648803665709523e-05, "loss": 2.2062, "step": 6463500 }, { "epoch": 18.71, "learning_rate": 4.0648080018062246e-05, "loss": 2.2141, "step": 6464000 }, { "epoch": 18.71, "learning_rate": 4.064735781771026e-05, "loss": 2.2126, "step": 6464500 }, { "epoch": 18.71, "learning_rate": 4.0646634170062984e-05, "loss": 2.2049, "step": 6465000 }, { "epoch": 18.71, "learning_rate": 4.0645910522415706e-05, "loss": 2.219, "step": 6465500 }, { "epoch": 18.72, "learning_rate": 4.0645186874768435e-05, "loss": 2.2115, "step": 6466000 }, { "epoch": 18.72, "learning_rate": 4.064446322712116e-05, "loss": 2.2268, "step": 6466500 }, { "epoch": 18.72, "learning_rate": 4.0643739579473886e-05, "loss": 2.2169, "step": 6467000 }, { "epoch": 18.72, "learning_rate": 4.064301593182661e-05, "loss": 2.2272, "step": 6467500 }, { "epoch": 18.72, "learning_rate": 4.064229228417933e-05, "loss": 2.22, "step": 6468000 }, { "epoch": 18.72, "learning_rate": 4.064156863653205e-05, "loss": 2.2024, "step": 6468500 }, { "epoch": 18.73, "learning_rate": 4.0640844988884775e-05, "loss": 2.2187, "step": 6469000 }, { "epoch": 18.73, "learning_rate": 4.06401213412375e-05, "loss": 2.1841, "step": 6469500 }, { "epoch": 18.73, "learning_rate": 4.063939769359022e-05, "loss": 2.2127, "step": 6470000 }, { "epoch": 18.73, "learning_rate": 4.0638675493238235e-05, "loss": 2.1938, "step": 6470500 }, { "epoch": 18.73, "learning_rate": 4.063795329288626e-05, "loss": 2.2124, "step": 6471000 }, { "epoch": 18.73, "learning_rate": 4.063722964523898e-05, "loss": 2.1897, "step": 6471500 }, { "epoch": 18.73, "learning_rate": 4.06365059975917e-05, "loss": 2.1927, "step": 6472000 }, { "epoch": 18.74, "learning_rate": 4.0635782349944424e-05, "loss": 2.1959, "step": 6472500 }, { "epoch": 18.74, "learning_rate": 4.0635058702297146e-05, "loss": 2.2161, "step": 6473000 }, { "epoch": 18.74, "learning_rate": 4.0634335054649875e-05, "loss": 2.2139, "step": 6473500 }, { "epoch": 18.74, "learning_rate": 4.06336114070026e-05, "loss": 2.1892, "step": 6474000 }, { "epoch": 18.74, "learning_rate": 4.063288775935532e-05, "loss": 2.1936, "step": 6474500 }, { "epoch": 18.74, "learning_rate": 4.063216411170804e-05, "loss": 2.2137, "step": 6475000 }, { "epoch": 18.74, "learning_rate": 4.063144335865136e-05, "loss": 2.2072, "step": 6475500 }, { "epoch": 18.75, "learning_rate": 4.063071971100408e-05, "loss": 2.2084, "step": 6476000 }, { "epoch": 18.75, "learning_rate": 4.0629997510652095e-05, "loss": 2.2148, "step": 6476500 }, { "epoch": 18.75, "learning_rate": 4.062927386300482e-05, "loss": 2.2339, "step": 6477000 }, { "epoch": 18.75, "learning_rate": 4.062855021535754e-05, "loss": 2.1988, "step": 6477500 }, { "epoch": 18.75, "learning_rate": 4.062782656771026e-05, "loss": 2.2251, "step": 6478000 }, { "epoch": 18.75, "learning_rate": 4.0627102920062984e-05, "loss": 2.2126, "step": 6478500 }, { "epoch": 18.75, "learning_rate": 4.062637927241571e-05, "loss": 2.2027, "step": 6479000 }, { "epoch": 18.76, "learning_rate": 4.0625655624768436e-05, "loss": 2.2218, "step": 6479500 }, { "epoch": 18.76, "learning_rate": 4.062493197712116e-05, "loss": 2.2059, "step": 6480000 }, { "epoch": 18.76, "learning_rate": 4.062420832947388e-05, "loss": 2.2219, "step": 6480500 }, { "epoch": 18.76, "learning_rate": 4.062348468182661e-05, "loss": 2.2047, "step": 6481000 }, { "epoch": 18.76, "learning_rate": 4.0622762481474625e-05, "loss": 2.2378, "step": 6481500 }, { "epoch": 18.76, "learning_rate": 4.062203883382735e-05, "loss": 2.1982, "step": 6482000 }, { "epoch": 18.76, "learning_rate": 4.062131518618007e-05, "loss": 2.229, "step": 6482500 }, { "epoch": 18.77, "learning_rate": 4.062059153853279e-05, "loss": 2.1974, "step": 6483000 }, { "epoch": 18.77, "learning_rate": 4.0619867890885514e-05, "loss": 2.2119, "step": 6483500 }, { "epoch": 18.77, "learning_rate": 4.0619144243238236e-05, "loss": 2.2091, "step": 6484000 }, { "epoch": 18.77, "learning_rate": 4.061842059559096e-05, "loss": 2.2292, "step": 6484500 }, { "epoch": 18.77, "learning_rate": 4.061769694794369e-05, "loss": 2.2074, "step": 6485000 }, { "epoch": 18.77, "learning_rate": 4.061697330029641e-05, "loss": 2.1986, "step": 6485500 }, { "epoch": 18.77, "learning_rate": 4.061625254723972e-05, "loss": 2.1964, "step": 6486000 }, { "epoch": 18.78, "learning_rate": 4.0615530346887734e-05, "loss": 2.2231, "step": 6486500 }, { "epoch": 18.78, "learning_rate": 4.061480669924046e-05, "loss": 2.2258, "step": 6487000 }, { "epoch": 18.78, "learning_rate": 4.0614083051593185e-05, "loss": 2.2199, "step": 6487500 }, { "epoch": 18.78, "learning_rate": 4.061335940394591e-05, "loss": 2.2291, "step": 6488000 }, { "epoch": 18.78, "learning_rate": 4.061263720359392e-05, "loss": 2.1963, "step": 6488500 }, { "epoch": 18.78, "learning_rate": 4.061191355594665e-05, "loss": 2.1982, "step": 6489000 }, { "epoch": 18.78, "learning_rate": 4.0611189908299374e-05, "loss": 2.2184, "step": 6489500 }, { "epoch": 18.79, "learning_rate": 4.061046770794739e-05, "loss": 2.2305, "step": 6490000 }, { "epoch": 18.79, "learning_rate": 4.060974406030011e-05, "loss": 2.2025, "step": 6490500 }, { "epoch": 18.79, "learning_rate": 4.0609020412652834e-05, "loss": 2.2261, "step": 6491000 }, { "epoch": 18.79, "learning_rate": 4.060829676500556e-05, "loss": 2.1933, "step": 6491500 }, { "epoch": 18.79, "learning_rate": 4.0607573117358285e-05, "loss": 2.1909, "step": 6492000 }, { "epoch": 18.79, "learning_rate": 4.060684946971101e-05, "loss": 2.2286, "step": 6492500 }, { "epoch": 18.79, "learning_rate": 4.060612582206373e-05, "loss": 2.1838, "step": 6493000 }, { "epoch": 18.8, "learning_rate": 4.060540217441645e-05, "loss": 2.2074, "step": 6493500 }, { "epoch": 18.8, "learning_rate": 4.0604678526769174e-05, "loss": 2.223, "step": 6494000 }, { "epoch": 18.8, "learning_rate": 4.060395632641719e-05, "loss": 2.2016, "step": 6494500 }, { "epoch": 18.8, "learning_rate": 4.060323267876991e-05, "loss": 2.2099, "step": 6495000 }, { "epoch": 18.8, "learning_rate": 4.0602509031122634e-05, "loss": 2.2372, "step": 6495500 }, { "epoch": 18.8, "learning_rate": 4.060178538347536e-05, "loss": 2.2179, "step": 6496000 }, { "epoch": 18.8, "learning_rate": 4.0601061735828085e-05, "loss": 2.2069, "step": 6496500 }, { "epoch": 18.81, "learning_rate": 4.0600338088180814e-05, "loss": 2.2121, "step": 6497000 }, { "epoch": 18.81, "learning_rate": 4.059961444053354e-05, "loss": 2.1871, "step": 6497500 }, { "epoch": 18.81, "learning_rate": 4.059889079288626e-05, "loss": 2.198, "step": 6498000 }, { "epoch": 18.81, "learning_rate": 4.059816714523898e-05, "loss": 2.2216, "step": 6498500 }, { "epoch": 18.81, "learning_rate": 4.05974434975917e-05, "loss": 2.1853, "step": 6499000 }, { "epoch": 18.81, "learning_rate": 4.059672129723972e-05, "loss": 2.2291, "step": 6499500 }, { "epoch": 18.81, "learning_rate": 4.059599764959244e-05, "loss": 2.1993, "step": 6500000 }, { "epoch": 18.82, "learning_rate": 4.059527400194516e-05, "loss": 2.2146, "step": 6500500 }, { "epoch": 18.82, "learning_rate": 4.0594551801593186e-05, "loss": 2.2145, "step": 6501000 }, { "epoch": 18.82, "learning_rate": 4.059382815394591e-05, "loss": 2.1997, "step": 6501500 }, { "epoch": 18.82, "learning_rate": 4.059310450629863e-05, "loss": 2.2138, "step": 6502000 }, { "epoch": 18.82, "learning_rate": 4.059238085865135e-05, "loss": 2.2072, "step": 6502500 }, { "epoch": 18.82, "learning_rate": 4.0591657211004075e-05, "loss": 2.211, "step": 6503000 }, { "epoch": 18.82, "learning_rate": 4.05909350106521e-05, "loss": 2.1991, "step": 6503500 }, { "epoch": 18.83, "learning_rate": 4.059021136300482e-05, "loss": 2.2079, "step": 6504000 }, { "epoch": 18.83, "learning_rate": 4.058948771535754e-05, "loss": 2.1962, "step": 6504500 }, { "epoch": 18.83, "learning_rate": 4.0588764067710264e-05, "loss": 2.193, "step": 6505000 }, { "epoch": 18.83, "learning_rate": 4.058804042006299e-05, "loss": 2.2241, "step": 6505500 }, { "epoch": 18.83, "learning_rate": 4.058731821971101e-05, "loss": 2.2187, "step": 6506000 }, { "epoch": 18.83, "learning_rate": 4.0586596019359024e-05, "loss": 2.1999, "step": 6506500 }, { "epoch": 18.84, "learning_rate": 4.0585872371711746e-05, "loss": 2.2268, "step": 6507000 }, { "epoch": 18.84, "learning_rate": 4.058514872406447e-05, "loss": 2.2309, "step": 6507500 }, { "epoch": 18.84, "learning_rate": 4.058442507641719e-05, "loss": 2.2105, "step": 6508000 }, { "epoch": 18.84, "learning_rate": 4.058370142876991e-05, "loss": 2.2008, "step": 6508500 }, { "epoch": 18.84, "learning_rate": 4.058297778112264e-05, "loss": 2.2343, "step": 6509000 }, { "epoch": 18.84, "learning_rate": 4.0582254133475364e-05, "loss": 2.2046, "step": 6509500 }, { "epoch": 18.84, "learning_rate": 4.0581530485828086e-05, "loss": 2.1807, "step": 6510000 }, { "epoch": 18.85, "learning_rate": 4.058080683818081e-05, "loss": 2.223, "step": 6510500 }, { "epoch": 18.85, "learning_rate": 4.058008319053354e-05, "loss": 2.2139, "step": 6511000 }, { "epoch": 18.85, "learning_rate": 4.057935954288626e-05, "loss": 2.2075, "step": 6511500 }, { "epoch": 18.85, "learning_rate": 4.057863589523898e-05, "loss": 2.1946, "step": 6512000 }, { "epoch": 18.85, "learning_rate": 4.0577913694887e-05, "loss": 2.2106, "step": 6512500 }, { "epoch": 18.85, "learning_rate": 4.057719004723972e-05, "loss": 2.2311, "step": 6513000 }, { "epoch": 18.85, "learning_rate": 4.057646639959244e-05, "loss": 2.2392, "step": 6513500 }, { "epoch": 18.86, "learning_rate": 4.0575742751945164e-05, "loss": 2.2137, "step": 6514000 }, { "epoch": 18.86, "learning_rate": 4.057501910429789e-05, "loss": 2.2066, "step": 6514500 }, { "epoch": 18.86, "learning_rate": 4.0574295456650615e-05, "loss": 2.2152, "step": 6515000 }, { "epoch": 18.86, "learning_rate": 4.057357180900334e-05, "loss": 2.1989, "step": 6515500 }, { "epoch": 18.86, "learning_rate": 4.057284960865135e-05, "loss": 2.2212, "step": 6516000 }, { "epoch": 18.86, "learning_rate": 4.0572125961004075e-05, "loss": 2.2147, "step": 6516500 }, { "epoch": 18.86, "learning_rate": 4.05714023133568e-05, "loss": 2.1978, "step": 6517000 }, { "epoch": 18.87, "learning_rate": 4.057067866570952e-05, "loss": 2.2133, "step": 6517500 }, { "epoch": 18.87, "learning_rate": 4.056995501806224e-05, "loss": 2.2104, "step": 6518000 }, { "epoch": 18.87, "learning_rate": 4.056923137041497e-05, "loss": 2.221, "step": 6518500 }, { "epoch": 18.87, "learning_rate": 4.056850772276769e-05, "loss": 2.1952, "step": 6519000 }, { "epoch": 18.87, "learning_rate": 4.0567784075120416e-05, "loss": 2.2018, "step": 6519500 }, { "epoch": 18.87, "learning_rate": 4.0567060427473145e-05, "loss": 2.217, "step": 6520000 }, { "epoch": 18.87, "learning_rate": 4.056633822712116e-05, "loss": 2.2201, "step": 6520500 }, { "epoch": 18.88, "learning_rate": 4.056561457947388e-05, "loss": 2.2014, "step": 6521000 }, { "epoch": 18.88, "learning_rate": 4.0564890931826605e-05, "loss": 2.2018, "step": 6521500 }, { "epoch": 18.88, "learning_rate": 4.056416728417933e-05, "loss": 2.2116, "step": 6522000 }, { "epoch": 18.88, "learning_rate": 4.056344508382734e-05, "loss": 2.2175, "step": 6522500 }, { "epoch": 18.88, "learning_rate": 4.0562722883475365e-05, "loss": 2.2371, "step": 6523000 }, { "epoch": 18.88, "learning_rate": 4.056199923582809e-05, "loss": 2.2433, "step": 6523500 }, { "epoch": 18.88, "learning_rate": 4.056127558818081e-05, "loss": 2.2253, "step": 6524000 }, { "epoch": 18.89, "learning_rate": 4.056055194053353e-05, "loss": 2.2185, "step": 6524500 }, { "epoch": 18.89, "learning_rate": 4.0559828292886254e-05, "loss": 2.1982, "step": 6525000 }, { "epoch": 18.89, "learning_rate": 4.0559104645238976e-05, "loss": 2.2099, "step": 6525500 }, { "epoch": 18.89, "learning_rate": 4.0558380997591705e-05, "loss": 2.2191, "step": 6526000 }, { "epoch": 18.89, "learning_rate": 4.055765734994443e-05, "loss": 2.2015, "step": 6526500 }, { "epoch": 18.89, "learning_rate": 4.055693370229715e-05, "loss": 2.1937, "step": 6527000 }, { "epoch": 18.89, "learning_rate": 4.0556211501945165e-05, "loss": 2.1924, "step": 6527500 }, { "epoch": 18.9, "learning_rate": 4.0555487854297894e-05, "loss": 2.2068, "step": 6528000 }, { "epoch": 18.9, "learning_rate": 4.0554764206650616e-05, "loss": 2.2166, "step": 6528500 }, { "epoch": 18.9, "learning_rate": 4.055404055900334e-05, "loss": 2.2143, "step": 6529000 }, { "epoch": 18.9, "learning_rate": 4.055331691135606e-05, "loss": 2.2003, "step": 6529500 }, { "epoch": 18.9, "learning_rate": 4.0552594711004076e-05, "loss": 2.2035, "step": 6530000 }, { "epoch": 18.9, "learning_rate": 4.05518710633568e-05, "loss": 2.2089, "step": 6530500 }, { "epoch": 18.9, "learning_rate": 4.055114741570952e-05, "loss": 2.1762, "step": 6531000 }, { "epoch": 18.91, "learning_rate": 4.055042376806224e-05, "loss": 2.2093, "step": 6531500 }, { "epoch": 18.91, "learning_rate": 4.0549700120414965e-05, "loss": 2.226, "step": 6532000 }, { "epoch": 18.91, "learning_rate": 4.054897792006299e-05, "loss": 2.2242, "step": 6532500 }, { "epoch": 18.91, "learning_rate": 4.054825427241571e-05, "loss": 2.1848, "step": 6533000 }, { "epoch": 18.91, "learning_rate": 4.054753062476844e-05, "loss": 2.2157, "step": 6533500 }, { "epoch": 18.91, "learning_rate": 4.054680697712116e-05, "loss": 2.2024, "step": 6534000 }, { "epoch": 18.91, "learning_rate": 4.054608332947388e-05, "loss": 2.2154, "step": 6534500 }, { "epoch": 18.92, "learning_rate": 4.0545359681826605e-05, "loss": 2.2064, "step": 6535000 }, { "epoch": 18.92, "learning_rate": 4.054463603417933e-05, "loss": 2.2277, "step": 6535500 }, { "epoch": 18.92, "learning_rate": 4.054391238653205e-05, "loss": 2.1986, "step": 6536000 }, { "epoch": 18.92, "learning_rate": 4.054319018618007e-05, "loss": 2.2211, "step": 6536500 }, { "epoch": 18.92, "learning_rate": 4.0542466538532794e-05, "loss": 2.211, "step": 6537000 }, { "epoch": 18.92, "learning_rate": 4.054174433818081e-05, "loss": 2.2004, "step": 6537500 }, { "epoch": 18.92, "learning_rate": 4.054102069053353e-05, "loss": 2.1953, "step": 6538000 }, { "epoch": 18.93, "learning_rate": 4.0540297042886254e-05, "loss": 2.2121, "step": 6538500 }, { "epoch": 18.93, "learning_rate": 4.053957339523898e-05, "loss": 2.2147, "step": 6539000 }, { "epoch": 18.93, "learning_rate": 4.053885264218229e-05, "loss": 2.2284, "step": 6539500 }, { "epoch": 18.93, "learning_rate": 4.0538128994535015e-05, "loss": 2.2147, "step": 6540000 }, { "epoch": 18.93, "learning_rate": 4.053740534688774e-05, "loss": 2.1942, "step": 6540500 }, { "epoch": 18.93, "learning_rate": 4.0536681699240466e-05, "loss": 2.2145, "step": 6541000 }, { "epoch": 18.93, "learning_rate": 4.053595805159319e-05, "loss": 2.2282, "step": 6541500 }, { "epoch": 18.94, "learning_rate": 4.053523440394591e-05, "loss": 2.1757, "step": 6542000 }, { "epoch": 18.94, "learning_rate": 4.0534512203593926e-05, "loss": 2.2036, "step": 6542500 }, { "epoch": 18.94, "learning_rate": 4.053378855594665e-05, "loss": 2.204, "step": 6543000 }, { "epoch": 18.94, "learning_rate": 4.053306490829937e-05, "loss": 2.2285, "step": 6543500 }, { "epoch": 18.94, "learning_rate": 4.053234126065209e-05, "loss": 2.1902, "step": 6544000 }, { "epoch": 18.94, "learning_rate": 4.053161761300482e-05, "loss": 2.1909, "step": 6544500 }, { "epoch": 18.95, "learning_rate": 4.0530893965357544e-05, "loss": 2.2006, "step": 6545000 }, { "epoch": 18.95, "learning_rate": 4.0530170317710266e-05, "loss": 2.2041, "step": 6545500 }, { "epoch": 18.95, "learning_rate": 4.052944667006299e-05, "loss": 2.2416, "step": 6546000 }, { "epoch": 18.95, "learning_rate": 4.052872302241571e-05, "loss": 2.2253, "step": 6546500 }, { "epoch": 18.95, "learning_rate": 4.0528000822063726e-05, "loss": 2.2375, "step": 6547000 }, { "epoch": 18.95, "learning_rate": 4.052727717441645e-05, "loss": 2.2126, "step": 6547500 }, { "epoch": 18.95, "learning_rate": 4.052655352676917e-05, "loss": 2.2172, "step": 6548000 }, { "epoch": 18.96, "learning_rate": 4.05258298791219e-05, "loss": 2.1745, "step": 6548500 }, { "epoch": 18.96, "learning_rate": 4.052510623147462e-05, "loss": 2.2124, "step": 6549000 }, { "epoch": 18.96, "learning_rate": 4.0524384031122644e-05, "loss": 2.1835, "step": 6549500 }, { "epoch": 18.96, "learning_rate": 4.052366183077066e-05, "loss": 2.209, "step": 6550000 }, { "epoch": 18.96, "learning_rate": 4.052293818312338e-05, "loss": 2.21, "step": 6550500 }, { "epoch": 18.96, "learning_rate": 4.0522214535476104e-05, "loss": 2.1825, "step": 6551000 }, { "epoch": 18.96, "learning_rate": 4.0521490887828826e-05, "loss": 2.2039, "step": 6551500 }, { "epoch": 18.97, "learning_rate": 4.052076868747685e-05, "loss": 2.2155, "step": 6552000 }, { "epoch": 18.97, "learning_rate": 4.052004503982957e-05, "loss": 2.2147, "step": 6552500 }, { "epoch": 18.97, "learning_rate": 4.051932139218229e-05, "loss": 2.1927, "step": 6553000 }, { "epoch": 18.97, "learning_rate": 4.0518597744535015e-05, "loss": 2.2009, "step": 6553500 }, { "epoch": 18.97, "learning_rate": 4.051787409688774e-05, "loss": 2.1889, "step": 6554000 }, { "epoch": 18.97, "learning_rate": 4.051715044924046e-05, "loss": 2.2055, "step": 6554500 }, { "epoch": 18.97, "learning_rate": 4.051642680159318e-05, "loss": 2.2273, "step": 6555000 }, { "epoch": 18.98, "learning_rate": 4.0515703153945904e-05, "loss": 2.2143, "step": 6555500 }, { "epoch": 18.98, "learning_rate": 4.051498095359392e-05, "loss": 2.2055, "step": 6556000 }, { "epoch": 18.98, "learning_rate": 4.051425730594665e-05, "loss": 2.1922, "step": 6556500 }, { "epoch": 18.98, "learning_rate": 4.051353365829937e-05, "loss": 2.2235, "step": 6557000 }, { "epoch": 18.98, "learning_rate": 4.05128100106521e-05, "loss": 2.2118, "step": 6557500 }, { "epoch": 18.98, "learning_rate": 4.051208636300482e-05, "loss": 2.1868, "step": 6558000 }, { "epoch": 18.98, "learning_rate": 4.0511362715357545e-05, "loss": 2.2065, "step": 6558500 }, { "epoch": 18.99, "learning_rate": 4.051063906771027e-05, "loss": 2.192, "step": 6559000 }, { "epoch": 18.99, "learning_rate": 4.050991542006299e-05, "loss": 2.2365, "step": 6559500 }, { "epoch": 18.99, "learning_rate": 4.050919177241571e-05, "loss": 2.198, "step": 6560000 }, { "epoch": 18.99, "learning_rate": 4.0508468124768434e-05, "loss": 2.2093, "step": 6560500 }, { "epoch": 18.99, "learning_rate": 4.0507744477121156e-05, "loss": 2.1989, "step": 6561000 }, { "epoch": 18.99, "learning_rate": 4.050702082947388e-05, "loss": 2.2166, "step": 6561500 }, { "epoch": 18.99, "learning_rate": 4.05062971818266e-05, "loss": 2.1775, "step": 6562000 }, { "epoch": 19.0, "learning_rate": 4.050557498147462e-05, "loss": 2.1896, "step": 6562500 }, { "epoch": 19.0, "learning_rate": 4.050485278112264e-05, "loss": 2.1944, "step": 6563000 }, { "epoch": 19.0, "learning_rate": 4.050412913347537e-05, "loss": 2.2476, "step": 6563500 }, { "epoch": 19.0, "eval_accuracy": 0.6590668850698368, "eval_accuracy_mlm": 0.6224593176492826, "eval_accuracy_nsp": 0.8552222115283259, "eval_loss": 2.2322213649749756, "eval_runtime": 330.6144, "eval_samples_per_second": 1319.924, "eval_steps_per_second": 54.998, "step": 6563968 }, { "epoch": 19.0, "learning_rate": 4.050340548582809e-05, "loss": 2.2257, "step": 6564000 }, { "epoch": 19.0, "learning_rate": 4.050268183818081e-05, "loss": 2.209, "step": 6564500 }, { "epoch": 19.0, "learning_rate": 4.0501958190533534e-05, "loss": 2.1808, "step": 6565000 }, { "epoch": 19.0, "learning_rate": 4.0501234542886256e-05, "loss": 2.194, "step": 6565500 }, { "epoch": 19.01, "learning_rate": 4.050051089523898e-05, "loss": 2.2056, "step": 6566000 }, { "epoch": 19.01, "learning_rate": 4.04997872475917e-05, "loss": 2.2181, "step": 6566500 }, { "epoch": 19.01, "learning_rate": 4.049906504723972e-05, "loss": 2.1933, "step": 6567000 }, { "epoch": 19.01, "learning_rate": 4.0498341399592445e-05, "loss": 2.1761, "step": 6567500 }, { "epoch": 19.01, "learning_rate": 4.049761775194517e-05, "loss": 2.1707, "step": 6568000 }, { "epoch": 19.01, "learning_rate": 4.049689410429789e-05, "loss": 2.1927, "step": 6568500 }, { "epoch": 19.01, "learning_rate": 4.049617045665061e-05, "loss": 2.2015, "step": 6569000 }, { "epoch": 19.02, "learning_rate": 4.049544825629863e-05, "loss": 2.1803, "step": 6569500 }, { "epoch": 19.02, "learning_rate": 4.049472460865135e-05, "loss": 2.1729, "step": 6570000 }, { "epoch": 19.02, "learning_rate": 4.049400096100407e-05, "loss": 2.1727, "step": 6570500 }, { "epoch": 19.02, "learning_rate": 4.04932773133568e-05, "loss": 2.1625, "step": 6571000 }, { "epoch": 19.02, "learning_rate": 4.049255511300482e-05, "loss": 2.1767, "step": 6571500 }, { "epoch": 19.02, "learning_rate": 4.0491831465357545e-05, "loss": 2.1767, "step": 6572000 }, { "epoch": 19.02, "learning_rate": 4.049110781771027e-05, "loss": 2.1841, "step": 6572500 }, { "epoch": 19.03, "learning_rate": 4.049038417006299e-05, "loss": 2.1705, "step": 6573000 }, { "epoch": 19.03, "learning_rate": 4.048966052241571e-05, "loss": 2.1885, "step": 6573500 }, { "epoch": 19.03, "learning_rate": 4.048893832206373e-05, "loss": 2.1676, "step": 6574000 }, { "epoch": 19.03, "learning_rate": 4.048821467441645e-05, "loss": 2.1838, "step": 6574500 }, { "epoch": 19.03, "learning_rate": 4.048749247406447e-05, "loss": 2.1708, "step": 6575000 }, { "epoch": 19.03, "learning_rate": 4.0486768826417194e-05, "loss": 2.1954, "step": 6575500 }, { "epoch": 19.03, "learning_rate": 4.048604662606521e-05, "loss": 2.1827, "step": 6576000 }, { "epoch": 19.04, "learning_rate": 4.048532297841793e-05, "loss": 2.1804, "step": 6576500 }, { "epoch": 19.04, "learning_rate": 4.0484599330770654e-05, "loss": 2.1918, "step": 6577000 }, { "epoch": 19.04, "learning_rate": 4.048387713041868e-05, "loss": 2.2117, "step": 6577500 }, { "epoch": 19.04, "learning_rate": 4.04831534827714e-05, "loss": 2.2157, "step": 6578000 }, { "epoch": 19.04, "learning_rate": 4.048242983512412e-05, "loss": 2.1904, "step": 6578500 }, { "epoch": 19.04, "learning_rate": 4.048170618747685e-05, "loss": 2.1928, "step": 6579000 }, { "epoch": 19.04, "learning_rate": 4.048098253982957e-05, "loss": 2.1847, "step": 6579500 }, { "epoch": 19.05, "learning_rate": 4.0480258892182295e-05, "loss": 2.1988, "step": 6580000 }, { "epoch": 19.05, "learning_rate": 4.047953669183031e-05, "loss": 2.1895, "step": 6580500 }, { "epoch": 19.05, "learning_rate": 4.047881304418303e-05, "loss": 2.1924, "step": 6581000 }, { "epoch": 19.05, "learning_rate": 4.0478089396535755e-05, "loss": 2.2204, "step": 6581500 }, { "epoch": 19.05, "learning_rate": 4.047736574888848e-05, "loss": 2.2065, "step": 6582000 }, { "epoch": 19.05, "learning_rate": 4.04766421012412e-05, "loss": 2.1929, "step": 6582500 }, { "epoch": 19.06, "learning_rate": 4.047591845359393e-05, "loss": 2.2114, "step": 6583000 }, { "epoch": 19.06, "learning_rate": 4.047519480594665e-05, "loss": 2.1758, "step": 6583500 }, { "epoch": 19.06, "learning_rate": 4.047447115829937e-05, "loss": 2.1846, "step": 6584000 }, { "epoch": 19.06, "learning_rate": 4.0473747510652095e-05, "loss": 2.204, "step": 6584500 }, { "epoch": 19.06, "learning_rate": 4.047302386300482e-05, "loss": 2.1918, "step": 6585000 }, { "epoch": 19.06, "learning_rate": 4.047230021535754e-05, "loss": 2.185, "step": 6585500 }, { "epoch": 19.06, "learning_rate": 4.047157656771027e-05, "loss": 2.1966, "step": 6586000 }, { "epoch": 19.07, "learning_rate": 4.047085292006299e-05, "loss": 2.2302, "step": 6586500 }, { "epoch": 19.07, "learning_rate": 4.0470130719711006e-05, "loss": 2.2058, "step": 6587000 }, { "epoch": 19.07, "learning_rate": 4.046940851935903e-05, "loss": 2.2018, "step": 6587500 }, { "epoch": 19.07, "learning_rate": 4.046868487171175e-05, "loss": 2.2082, "step": 6588000 }, { "epoch": 19.07, "learning_rate": 4.046796122406447e-05, "loss": 2.2063, "step": 6588500 }, { "epoch": 19.07, "learning_rate": 4.0467237576417195e-05, "loss": 2.1953, "step": 6589000 }, { "epoch": 19.07, "learning_rate": 4.046651537606521e-05, "loss": 2.1798, "step": 6589500 }, { "epoch": 19.08, "learning_rate": 4.046579172841793e-05, "loss": 2.2021, "step": 6590000 }, { "epoch": 19.08, "learning_rate": 4.0465068080770655e-05, "loss": 2.1942, "step": 6590500 }, { "epoch": 19.08, "learning_rate": 4.046434443312338e-05, "loss": 2.1842, "step": 6591000 }, { "epoch": 19.08, "learning_rate": 4.04636207854761e-05, "loss": 2.1926, "step": 6591500 }, { "epoch": 19.08, "learning_rate": 4.046289713782883e-05, "loss": 2.1996, "step": 6592000 }, { "epoch": 19.08, "learning_rate": 4.046217349018155e-05, "loss": 2.2088, "step": 6592500 }, { "epoch": 19.08, "learning_rate": 4.046144984253427e-05, "loss": 2.1997, "step": 6593000 }, { "epoch": 19.09, "learning_rate": 4.046072764218229e-05, "loss": 2.1806, "step": 6593500 }, { "epoch": 19.09, "learning_rate": 4.046000544183031e-05, "loss": 2.1762, "step": 6594000 }, { "epoch": 19.09, "learning_rate": 4.045928179418303e-05, "loss": 2.2227, "step": 6594500 }, { "epoch": 19.09, "learning_rate": 4.0458558146535756e-05, "loss": 2.1969, "step": 6595000 }, { "epoch": 19.09, "learning_rate": 4.045783449888848e-05, "loss": 2.2016, "step": 6595500 }, { "epoch": 19.09, "learning_rate": 4.04571122985365e-05, "loss": 2.1938, "step": 6596000 }, { "epoch": 19.09, "learning_rate": 4.045638865088922e-05, "loss": 2.1893, "step": 6596500 }, { "epoch": 19.1, "learning_rate": 4.0455665003241945e-05, "loss": 2.1763, "step": 6597000 }, { "epoch": 19.1, "learning_rate": 4.045494135559467e-05, "loss": 2.2101, "step": 6597500 }, { "epoch": 19.1, "learning_rate": 4.045421770794739e-05, "loss": 2.1869, "step": 6598000 }, { "epoch": 19.1, "learning_rate": 4.045349406030011e-05, "loss": 2.1845, "step": 6598500 }, { "epoch": 19.1, "learning_rate": 4.045277185994813e-05, "loss": 2.159, "step": 6599000 }, { "epoch": 19.1, "learning_rate": 4.0452048212300856e-05, "loss": 2.1834, "step": 6599500 }, { "epoch": 19.1, "learning_rate": 4.045132456465358e-05, "loss": 2.2199, "step": 6600000 }, { "epoch": 19.11, "learning_rate": 4.04506009170063e-05, "loss": 2.1982, "step": 6600500 }, { "epoch": 19.11, "learning_rate": 4.044987726935902e-05, "loss": 2.1871, "step": 6601000 }, { "epoch": 19.11, "learning_rate": 4.044915362171175e-05, "loss": 2.1988, "step": 6601500 }, { "epoch": 19.11, "learning_rate": 4.0448429974064474e-05, "loss": 2.181, "step": 6602000 }, { "epoch": 19.11, "learning_rate": 4.0447706326417196e-05, "loss": 2.2015, "step": 6602500 }, { "epoch": 19.11, "learning_rate": 4.044698267876992e-05, "loss": 2.199, "step": 6603000 }, { "epoch": 19.11, "learning_rate": 4.044625903112264e-05, "loss": 2.1809, "step": 6603500 }, { "epoch": 19.12, "learning_rate": 4.0445536830770656e-05, "loss": 2.1746, "step": 6604000 }, { "epoch": 19.12, "learning_rate": 4.044481318312338e-05, "loss": 2.1994, "step": 6604500 }, { "epoch": 19.12, "learning_rate": 4.044408953547611e-05, "loss": 2.1937, "step": 6605000 }, { "epoch": 19.12, "learning_rate": 4.044336733512412e-05, "loss": 2.1929, "step": 6605500 }, { "epoch": 19.12, "learning_rate": 4.0442643687476845e-05, "loss": 2.1964, "step": 6606000 }, { "epoch": 19.12, "learning_rate": 4.044192003982957e-05, "loss": 2.1868, "step": 6606500 }, { "epoch": 19.12, "learning_rate": 4.044119639218229e-05, "loss": 2.1799, "step": 6607000 }, { "epoch": 19.13, "learning_rate": 4.044047274453501e-05, "loss": 2.186, "step": 6607500 }, { "epoch": 19.13, "learning_rate": 4.0439749096887734e-05, "loss": 2.1848, "step": 6608000 }, { "epoch": 19.13, "learning_rate": 4.043902544924046e-05, "loss": 2.2073, "step": 6608500 }, { "epoch": 19.13, "learning_rate": 4.043830324888848e-05, "loss": 2.1942, "step": 6609000 }, { "epoch": 19.13, "learning_rate": 4.043757960124121e-05, "loss": 2.1969, "step": 6609500 }, { "epoch": 19.13, "learning_rate": 4.043685595359393e-05, "loss": 2.1843, "step": 6610000 }, { "epoch": 19.13, "learning_rate": 4.043613230594665e-05, "loss": 2.1945, "step": 6610500 }, { "epoch": 19.14, "learning_rate": 4.0435408658299374e-05, "loss": 2.1787, "step": 6611000 }, { "epoch": 19.14, "learning_rate": 4.0434685010652097e-05, "loss": 2.2042, "step": 6611500 }, { "epoch": 19.14, "learning_rate": 4.043396136300482e-05, "loss": 2.1861, "step": 6612000 }, { "epoch": 19.14, "learning_rate": 4.043323771535754e-05, "loss": 2.213, "step": 6612500 }, { "epoch": 19.14, "learning_rate": 4.043251406771026e-05, "loss": 2.1987, "step": 6613000 }, { "epoch": 19.14, "learning_rate": 4.0431790420062985e-05, "loss": 2.1976, "step": 6613500 }, { "epoch": 19.14, "learning_rate": 4.043106677241571e-05, "loss": 2.185, "step": 6614000 }, { "epoch": 19.15, "learning_rate": 4.043034312476843e-05, "loss": 2.1997, "step": 6614500 }, { "epoch": 19.15, "learning_rate": 4.042961947712116e-05, "loss": 2.1999, "step": 6615000 }, { "epoch": 19.15, "learning_rate": 4.042889582947388e-05, "loss": 2.1979, "step": 6615500 }, { "epoch": 19.15, "learning_rate": 4.042817218182661e-05, "loss": 2.2022, "step": 6616000 }, { "epoch": 19.15, "learning_rate": 4.0427449981474626e-05, "loss": 2.1927, "step": 6616500 }, { "epoch": 19.15, "learning_rate": 4.042672633382735e-05, "loss": 2.1835, "step": 6617000 }, { "epoch": 19.15, "learning_rate": 4.042600268618007e-05, "loss": 2.1965, "step": 6617500 }, { "epoch": 19.16, "learning_rate": 4.042527903853279e-05, "loss": 2.1653, "step": 6618000 }, { "epoch": 19.16, "learning_rate": 4.0424555390885515e-05, "loss": 2.1848, "step": 6618500 }, { "epoch": 19.16, "learning_rate": 4.042383319053353e-05, "loss": 2.1851, "step": 6619000 }, { "epoch": 19.16, "learning_rate": 4.042310954288626e-05, "loss": 2.2026, "step": 6619500 }, { "epoch": 19.16, "learning_rate": 4.042238589523898e-05, "loss": 2.1681, "step": 6620000 }, { "epoch": 19.16, "learning_rate": 4.0421662247591704e-05, "loss": 2.1686, "step": 6620500 }, { "epoch": 19.17, "learning_rate": 4.0420938599944426e-05, "loss": 2.2037, "step": 6621000 }, { "epoch": 19.17, "learning_rate": 4.042021495229715e-05, "loss": 2.1938, "step": 6621500 }, { "epoch": 19.17, "learning_rate": 4.0419492751945164e-05, "loss": 2.1828, "step": 6622000 }, { "epoch": 19.17, "learning_rate": 4.0418769104297886e-05, "loss": 2.1805, "step": 6622500 }, { "epoch": 19.17, "learning_rate": 4.041804545665061e-05, "loss": 2.1885, "step": 6623000 }, { "epoch": 19.17, "learning_rate": 4.041732180900334e-05, "loss": 2.1707, "step": 6623500 }, { "epoch": 19.17, "learning_rate": 4.041659816135606e-05, "loss": 2.1856, "step": 6624000 }, { "epoch": 19.18, "learning_rate": 4.041587451370878e-05, "loss": 2.2082, "step": 6624500 }, { "epoch": 19.18, "learning_rate": 4.041515086606151e-05, "loss": 2.1944, "step": 6625000 }, { "epoch": 19.18, "learning_rate": 4.041442721841423e-05, "loss": 2.1683, "step": 6625500 }, { "epoch": 19.18, "learning_rate": 4.0413703570766955e-05, "loss": 2.1901, "step": 6626000 }, { "epoch": 19.18, "learning_rate": 4.0412982817710264e-05, "loss": 2.2121, "step": 6626500 }, { "epoch": 19.18, "learning_rate": 4.0412259170062986e-05, "loss": 2.2108, "step": 6627000 }, { "epoch": 19.18, "learning_rate": 4.041153552241571e-05, "loss": 2.1826, "step": 6627500 }, { "epoch": 19.19, "learning_rate": 4.041081187476843e-05, "loss": 2.1906, "step": 6628000 }, { "epoch": 19.19, "learning_rate": 4.041008822712116e-05, "loss": 2.1902, "step": 6628500 }, { "epoch": 19.19, "learning_rate": 4.040936457947388e-05, "loss": 2.2146, "step": 6629000 }, { "epoch": 19.19, "learning_rate": 4.0408640931826604e-05, "loss": 2.1805, "step": 6629500 }, { "epoch": 19.19, "learning_rate": 4.040791873147462e-05, "loss": 2.2137, "step": 6630000 }, { "epoch": 19.19, "learning_rate": 4.040719508382734e-05, "loss": 2.1918, "step": 6630500 }, { "epoch": 19.19, "learning_rate": 4.0406472883475364e-05, "loss": 2.1649, "step": 6631000 }, { "epoch": 19.2, "learning_rate": 4.0405749235828087e-05, "loss": 2.1951, "step": 6631500 }, { "epoch": 19.2, "learning_rate": 4.040502558818081e-05, "loss": 2.1928, "step": 6632000 }, { "epoch": 19.2, "learning_rate": 4.040430194053353e-05, "loss": 2.2087, "step": 6632500 }, { "epoch": 19.2, "learning_rate": 4.040357974018155e-05, "loss": 2.2152, "step": 6633000 }, { "epoch": 19.2, "learning_rate": 4.0402856092534276e-05, "loss": 2.1875, "step": 6633500 }, { "epoch": 19.2, "learning_rate": 4.0402132444887e-05, "loss": 2.1719, "step": 6634000 }, { "epoch": 19.2, "learning_rate": 4.040140879723972e-05, "loss": 2.1662, "step": 6634500 }, { "epoch": 19.21, "learning_rate": 4.040068514959244e-05, "loss": 2.2123, "step": 6635000 }, { "epoch": 19.21, "learning_rate": 4.0399961501945165e-05, "loss": 2.2067, "step": 6635500 }, { "epoch": 19.21, "learning_rate": 4.039923785429789e-05, "loss": 2.2215, "step": 6636000 }, { "epoch": 19.21, "learning_rate": 4.039851420665061e-05, "loss": 2.1941, "step": 6636500 }, { "epoch": 19.21, "learning_rate": 4.039779055900333e-05, "loss": 2.1735, "step": 6637000 }, { "epoch": 19.21, "learning_rate": 4.0397068358651354e-05, "loss": 2.1873, "step": 6637500 }, { "epoch": 19.21, "learning_rate": 4.039634615829937e-05, "loss": 2.19, "step": 6638000 }, { "epoch": 19.22, "learning_rate": 4.03956225106521e-05, "loss": 2.1955, "step": 6638500 }, { "epoch": 19.22, "learning_rate": 4.039489886300482e-05, "loss": 2.1935, "step": 6639000 }, { "epoch": 19.22, "learning_rate": 4.039417521535754e-05, "loss": 2.1985, "step": 6639500 }, { "epoch": 19.22, "learning_rate": 4.0393451567710265e-05, "loss": 2.1651, "step": 6640000 }, { "epoch": 19.22, "learning_rate": 4.039272792006299e-05, "loss": 2.2108, "step": 6640500 }, { "epoch": 19.22, "learning_rate": 4.039200427241571e-05, "loss": 2.1798, "step": 6641000 }, { "epoch": 19.22, "learning_rate": 4.039128062476844e-05, "loss": 2.1984, "step": 6641500 }, { "epoch": 19.23, "learning_rate": 4.039055697712116e-05, "loss": 2.1909, "step": 6642000 }, { "epoch": 19.23, "learning_rate": 4.0389834776769176e-05, "loss": 2.1784, "step": 6642500 }, { "epoch": 19.23, "learning_rate": 4.03891111291219e-05, "loss": 2.1895, "step": 6643000 }, { "epoch": 19.23, "learning_rate": 4.038838748147462e-05, "loss": 2.2007, "step": 6643500 }, { "epoch": 19.23, "learning_rate": 4.0387665281122636e-05, "loss": 2.1809, "step": 6644000 }, { "epoch": 19.23, "learning_rate": 4.038694163347536e-05, "loss": 2.2148, "step": 6644500 }, { "epoch": 19.23, "learning_rate": 4.038621798582809e-05, "loss": 2.1965, "step": 6645000 }, { "epoch": 19.24, "learning_rate": 4.038549433818081e-05, "loss": 2.2322, "step": 6645500 }, { "epoch": 19.24, "learning_rate": 4.038477069053354e-05, "loss": 2.2017, "step": 6646000 }, { "epoch": 19.24, "learning_rate": 4.038404704288626e-05, "loss": 2.181, "step": 6646500 }, { "epoch": 19.24, "learning_rate": 4.038332339523898e-05, "loss": 2.196, "step": 6647000 }, { "epoch": 19.24, "learning_rate": 4.0382599747591705e-05, "loss": 2.1998, "step": 6647500 }, { "epoch": 19.24, "learning_rate": 4.038187754723972e-05, "loss": 2.2068, "step": 6648000 }, { "epoch": 19.24, "learning_rate": 4.038115389959244e-05, "loss": 2.1861, "step": 6648500 }, { "epoch": 19.25, "learning_rate": 4.038043169924046e-05, "loss": 2.2148, "step": 6649000 }, { "epoch": 19.25, "learning_rate": 4.037970805159319e-05, "loss": 2.2102, "step": 6649500 }, { "epoch": 19.25, "learning_rate": 4.037898440394591e-05, "loss": 2.1926, "step": 6650000 }, { "epoch": 19.25, "learning_rate": 4.037826075629863e-05, "loss": 2.1843, "step": 6650500 }, { "epoch": 19.25, "learning_rate": 4.0377537108651354e-05, "loss": 2.1763, "step": 6651000 }, { "epoch": 19.25, "learning_rate": 4.0376813461004077e-05, "loss": 2.1979, "step": 6651500 }, { "epoch": 19.25, "learning_rate": 4.03760898133568e-05, "loss": 2.1776, "step": 6652000 }, { "epoch": 19.26, "learning_rate": 4.0375367613004814e-05, "loss": 2.1904, "step": 6652500 }, { "epoch": 19.26, "learning_rate": 4.037464396535754e-05, "loss": 2.1847, "step": 6653000 }, { "epoch": 19.26, "learning_rate": 4.0373920317710266e-05, "loss": 2.2326, "step": 6653500 }, { "epoch": 19.26, "learning_rate": 4.037319667006299e-05, "loss": 2.2081, "step": 6654000 }, { "epoch": 19.26, "learning_rate": 4.037247302241571e-05, "loss": 2.2012, "step": 6654500 }, { "epoch": 19.26, "learning_rate": 4.037174937476844e-05, "loss": 2.1606, "step": 6655000 }, { "epoch": 19.26, "learning_rate": 4.037102572712116e-05, "loss": 2.1812, "step": 6655500 }, { "epoch": 19.27, "learning_rate": 4.0370302079473884e-05, "loss": 2.2279, "step": 6656000 }, { "epoch": 19.27, "learning_rate": 4.03695798791219e-05, "loss": 2.2002, "step": 6656500 }, { "epoch": 19.27, "learning_rate": 4.036885623147462e-05, "loss": 2.207, "step": 6657000 }, { "epoch": 19.27, "learning_rate": 4.0368132583827344e-05, "loss": 2.1946, "step": 6657500 }, { "epoch": 19.27, "learning_rate": 4.0367408936180066e-05, "loss": 2.189, "step": 6658000 }, { "epoch": 19.27, "learning_rate": 4.036668673582809e-05, "loss": 2.1887, "step": 6658500 }, { "epoch": 19.28, "learning_rate": 4.036596308818081e-05, "loss": 2.2034, "step": 6659000 }, { "epoch": 19.28, "learning_rate": 4.0365240887828826e-05, "loss": 2.2004, "step": 6659500 }, { "epoch": 19.28, "learning_rate": 4.036451724018155e-05, "loss": 2.2219, "step": 6660000 }, { "epoch": 19.28, "learning_rate": 4.036379359253427e-05, "loss": 2.2028, "step": 6660500 }, { "epoch": 19.28, "learning_rate": 4.0363069944887e-05, "loss": 2.2232, "step": 6661000 }, { "epoch": 19.28, "learning_rate": 4.0362347744535015e-05, "loss": 2.2095, "step": 6661500 }, { "epoch": 19.28, "learning_rate": 4.036162409688774e-05, "loss": 2.2054, "step": 6662000 }, { "epoch": 19.29, "learning_rate": 4.0360900449240466e-05, "loss": 2.2042, "step": 6662500 }, { "epoch": 19.29, "learning_rate": 4.036017680159319e-05, "loss": 2.2078, "step": 6663000 }, { "epoch": 19.29, "learning_rate": 4.035945315394591e-05, "loss": 2.2, "step": 6663500 }, { "epoch": 19.29, "learning_rate": 4.0358730953593926e-05, "loss": 2.195, "step": 6664000 }, { "epoch": 19.29, "learning_rate": 4.035800730594665e-05, "loss": 2.1939, "step": 6664500 }, { "epoch": 19.29, "learning_rate": 4.035728365829937e-05, "loss": 2.1852, "step": 6665000 }, { "epoch": 19.29, "learning_rate": 4.035656001065209e-05, "loss": 2.1952, "step": 6665500 }, { "epoch": 19.3, "learning_rate": 4.0355836363004815e-05, "loss": 2.1878, "step": 6666000 }, { "epoch": 19.3, "learning_rate": 4.035511271535754e-05, "loss": 2.1727, "step": 6666500 }, { "epoch": 19.3, "learning_rate": 4.0354389067710266e-05, "loss": 2.2001, "step": 6667000 }, { "epoch": 19.3, "learning_rate": 4.035366686735828e-05, "loss": 2.1879, "step": 6667500 }, { "epoch": 19.3, "learning_rate": 4.0352943219711004e-05, "loss": 2.187, "step": 6668000 }, { "epoch": 19.3, "learning_rate": 4.035221957206373e-05, "loss": 2.1977, "step": 6668500 }, { "epoch": 19.3, "learning_rate": 4.0351495924416455e-05, "loss": 2.2013, "step": 6669000 }, { "epoch": 19.31, "learning_rate": 4.035077227676918e-05, "loss": 2.2012, "step": 6669500 }, { "epoch": 19.31, "learning_rate": 4.03500486291219e-05, "loss": 2.218, "step": 6670000 }, { "epoch": 19.31, "learning_rate": 4.034932498147462e-05, "loss": 2.1839, "step": 6670500 }, { "epoch": 19.31, "learning_rate": 4.0348601333827344e-05, "loss": 2.1914, "step": 6671000 }, { "epoch": 19.31, "learning_rate": 4.034787768618007e-05, "loss": 2.1932, "step": 6671500 }, { "epoch": 19.31, "learning_rate": 4.034715403853279e-05, "loss": 2.1835, "step": 6672000 }, { "epoch": 19.31, "learning_rate": 4.034643039088552e-05, "loss": 2.2225, "step": 6672500 }, { "epoch": 19.32, "learning_rate": 4.0345708190533533e-05, "loss": 2.1971, "step": 6673000 }, { "epoch": 19.32, "learning_rate": 4.0344984542886256e-05, "loss": 2.2181, "step": 6673500 }, { "epoch": 19.32, "learning_rate": 4.034426089523898e-05, "loss": 2.2077, "step": 6674000 }, { "epoch": 19.32, "learning_rate": 4.03435372475917e-05, "loss": 2.1748, "step": 6674500 }, { "epoch": 19.32, "learning_rate": 4.034281359994442e-05, "loss": 2.1993, "step": 6675000 }, { "epoch": 19.32, "learning_rate": 4.0342089952297145e-05, "loss": 2.2028, "step": 6675500 }, { "epoch": 19.32, "learning_rate": 4.034136775194517e-05, "loss": 2.1766, "step": 6676000 }, { "epoch": 19.33, "learning_rate": 4.034064410429789e-05, "loss": 2.2037, "step": 6676500 }, { "epoch": 19.33, "learning_rate": 4.033992045665062e-05, "loss": 2.1895, "step": 6677000 }, { "epoch": 19.33, "learning_rate": 4.033919680900334e-05, "loss": 2.2126, "step": 6677500 }, { "epoch": 19.33, "learning_rate": 4.0338474608651356e-05, "loss": 2.1948, "step": 6678000 }, { "epoch": 19.33, "learning_rate": 4.033775096100408e-05, "loss": 2.1996, "step": 6678500 }, { "epoch": 19.33, "learning_rate": 4.03370273133568e-05, "loss": 2.1799, "step": 6679000 }, { "epoch": 19.33, "learning_rate": 4.0336305113004816e-05, "loss": 2.1976, "step": 6679500 }, { "epoch": 19.34, "learning_rate": 4.033558146535754e-05, "loss": 2.2098, "step": 6680000 }, { "epoch": 19.34, "learning_rate": 4.033485781771027e-05, "loss": 2.2152, "step": 6680500 }, { "epoch": 19.34, "learning_rate": 4.033413417006299e-05, "loss": 2.1822, "step": 6681000 }, { "epoch": 19.34, "learning_rate": 4.033341052241571e-05, "loss": 2.1933, "step": 6681500 }, { "epoch": 19.34, "learning_rate": 4.0332686874768434e-05, "loss": 2.1993, "step": 6682000 }, { "epoch": 19.34, "learning_rate": 4.0331963227121156e-05, "loss": 2.213, "step": 6682500 }, { "epoch": 19.34, "learning_rate": 4.033123957947388e-05, "loss": 2.2035, "step": 6683000 }, { "epoch": 19.35, "learning_rate": 4.033051593182661e-05, "loss": 2.2032, "step": 6683500 }, { "epoch": 19.35, "learning_rate": 4.032979228417933e-05, "loss": 2.1806, "step": 6684000 }, { "epoch": 19.35, "learning_rate": 4.032906863653205e-05, "loss": 2.1933, "step": 6684500 }, { "epoch": 19.35, "learning_rate": 4.0328344988884774e-05, "loss": 2.2224, "step": 6685000 }, { "epoch": 19.35, "learning_rate": 4.0327621341237496e-05, "loss": 2.205, "step": 6685500 }, { "epoch": 19.35, "learning_rate": 4.032689914088552e-05, "loss": 2.2125, "step": 6686000 }, { "epoch": 19.35, "learning_rate": 4.032617549323824e-05, "loss": 2.2165, "step": 6686500 }, { "epoch": 19.36, "learning_rate": 4.032545184559096e-05, "loss": 2.2037, "step": 6687000 }, { "epoch": 19.36, "learning_rate": 4.0324728197943685e-05, "loss": 2.2124, "step": 6687500 }, { "epoch": 19.36, "learning_rate": 4.03240059975917e-05, "loss": 2.1957, "step": 6688000 }, { "epoch": 19.36, "learning_rate": 4.032328234994442e-05, "loss": 2.1977, "step": 6688500 }, { "epoch": 19.36, "learning_rate": 4.0322558702297145e-05, "loss": 2.1854, "step": 6689000 }, { "epoch": 19.36, "learning_rate": 4.032183505464987e-05, "loss": 2.1769, "step": 6689500 }, { "epoch": 19.36, "learning_rate": 4.032111140700259e-05, "loss": 2.1957, "step": 6690000 }, { "epoch": 19.37, "learning_rate": 4.032038775935532e-05, "loss": 2.1904, "step": 6690500 }, { "epoch": 19.37, "learning_rate": 4.031966555900334e-05, "loss": 2.1974, "step": 6691000 }, { "epoch": 19.37, "learning_rate": 4.0318941911356063e-05, "loss": 2.2067, "step": 6691500 }, { "epoch": 19.37, "learning_rate": 4.0318218263708786e-05, "loss": 2.1782, "step": 6692000 }, { "epoch": 19.37, "learning_rate": 4.031749461606151e-05, "loss": 2.1948, "step": 6692500 }, { "epoch": 19.37, "learning_rate": 4.031677096841423e-05, "loss": 2.1947, "step": 6693000 }, { "epoch": 19.37, "learning_rate": 4.031604732076695e-05, "loss": 2.1972, "step": 6693500 }, { "epoch": 19.38, "learning_rate": 4.0315323673119675e-05, "loss": 2.1981, "step": 6694000 }, { "epoch": 19.38, "learning_rate": 4.03146000254724e-05, "loss": 2.2073, "step": 6694500 }, { "epoch": 19.38, "learning_rate": 4.031387637782512e-05, "loss": 2.2195, "step": 6695000 }, { "epoch": 19.38, "learning_rate": 4.0313155624768435e-05, "loss": 2.195, "step": 6695500 }, { "epoch": 19.38, "learning_rate": 4.031243197712116e-05, "loss": 2.2027, "step": 6696000 }, { "epoch": 19.38, "learning_rate": 4.031170832947388e-05, "loss": 2.2058, "step": 6696500 }, { "epoch": 19.39, "learning_rate": 4.03109846818266e-05, "loss": 2.1848, "step": 6697000 }, { "epoch": 19.39, "learning_rate": 4.0310261034179324e-05, "loss": 2.1989, "step": 6697500 }, { "epoch": 19.39, "learning_rate": 4.0309538833827346e-05, "loss": 2.2232, "step": 6698000 }, { "epoch": 19.39, "learning_rate": 4.030881518618007e-05, "loss": 2.19, "step": 6698500 }, { "epoch": 19.39, "learning_rate": 4.03080915385328e-05, "loss": 2.1857, "step": 6699000 }, { "epoch": 19.39, "learning_rate": 4.030736789088552e-05, "loss": 2.2108, "step": 6699500 }, { "epoch": 19.39, "learning_rate": 4.030664424323824e-05, "loss": 2.1965, "step": 6700000 }, { "epoch": 19.4, "learning_rate": 4.0305920595590964e-05, "loss": 2.2032, "step": 6700500 }, { "epoch": 19.4, "learning_rate": 4.0305196947943686e-05, "loss": 2.2082, "step": 6701000 }, { "epoch": 19.4, "learning_rate": 4.030447330029641e-05, "loss": 2.1999, "step": 6701500 }, { "epoch": 19.4, "learning_rate": 4.0303751099944424e-05, "loss": 2.2083, "step": 6702000 }, { "epoch": 19.4, "learning_rate": 4.0303027452297146e-05, "loss": 2.1802, "step": 6702500 }, { "epoch": 19.4, "learning_rate": 4.030230380464987e-05, "loss": 2.195, "step": 6703000 }, { "epoch": 19.4, "learning_rate": 4.030158160429789e-05, "loss": 2.1774, "step": 6703500 }, { "epoch": 19.41, "learning_rate": 4.030085795665061e-05, "loss": 2.1838, "step": 6704000 }, { "epoch": 19.41, "learning_rate": 4.0300134309003335e-05, "loss": 2.2008, "step": 6704500 }, { "epoch": 19.41, "learning_rate": 4.029941066135606e-05, "loss": 2.1958, "step": 6705000 }, { "epoch": 19.41, "learning_rate": 4.029868701370878e-05, "loss": 2.2014, "step": 6705500 }, { "epoch": 19.41, "learning_rate": 4.029796336606151e-05, "loss": 2.2067, "step": 6706000 }, { "epoch": 19.41, "learning_rate": 4.029723971841423e-05, "loss": 2.1841, "step": 6706500 }, { "epoch": 19.41, "learning_rate": 4.029651607076695e-05, "loss": 2.1804, "step": 6707000 }, { "epoch": 19.42, "learning_rate": 4.029579531771027e-05, "loss": 2.1914, "step": 6707500 }, { "epoch": 19.42, "learning_rate": 4.029507167006299e-05, "loss": 2.1927, "step": 6708000 }, { "epoch": 19.42, "learning_rate": 4.029434802241571e-05, "loss": 2.2043, "step": 6708500 }, { "epoch": 19.42, "learning_rate": 4.0293624374768436e-05, "loss": 2.2168, "step": 6709000 }, { "epoch": 19.42, "learning_rate": 4.029290072712116e-05, "loss": 2.1887, "step": 6709500 }, { "epoch": 19.42, "learning_rate": 4.029217707947388e-05, "loss": 2.2023, "step": 6710000 }, { "epoch": 19.42, "learning_rate": 4.0291454879121896e-05, "loss": 2.2045, "step": 6710500 }, { "epoch": 19.43, "learning_rate": 4.029073123147462e-05, "loss": 2.209, "step": 6711000 }, { "epoch": 19.43, "learning_rate": 4.029000758382735e-05, "loss": 2.1897, "step": 6711500 }, { "epoch": 19.43, "learning_rate": 4.028928393618007e-05, "loss": 2.1852, "step": 6712000 }, { "epoch": 19.43, "learning_rate": 4.028856028853279e-05, "loss": 2.1949, "step": 6712500 }, { "epoch": 19.43, "learning_rate": 4.0287836640885513e-05, "loss": 2.2071, "step": 6713000 }, { "epoch": 19.43, "learning_rate": 4.028711299323824e-05, "loss": 2.2086, "step": 6713500 }, { "epoch": 19.43, "learning_rate": 4.0286389345590965e-05, "loss": 2.2093, "step": 6714000 }, { "epoch": 19.44, "learning_rate": 4.028566569794369e-05, "loss": 2.189, "step": 6714500 }, { "epoch": 19.44, "learning_rate": 4.028494205029641e-05, "loss": 2.2057, "step": 6715000 }, { "epoch": 19.44, "learning_rate": 4.028421840264913e-05, "loss": 2.1859, "step": 6715500 }, { "epoch": 19.44, "learning_rate": 4.0283494755001854e-05, "loss": 2.1977, "step": 6716000 }, { "epoch": 19.44, "learning_rate": 4.0282771107354576e-05, "loss": 2.1706, "step": 6716500 }, { "epoch": 19.44, "learning_rate": 4.02820474597073e-05, "loss": 2.1653, "step": 6717000 }, { "epoch": 19.44, "learning_rate": 4.028132381206002e-05, "loss": 2.2289, "step": 6717500 }, { "epoch": 19.45, "learning_rate": 4.028060016441275e-05, "loss": 2.1964, "step": 6718000 }, { "epoch": 19.45, "learning_rate": 4.0279877964060765e-05, "loss": 2.2091, "step": 6718500 }, { "epoch": 19.45, "learning_rate": 4.027915431641349e-05, "loss": 2.2033, "step": 6719000 }, { "epoch": 19.45, "learning_rate": 4.027843066876621e-05, "loss": 2.2245, "step": 6719500 }, { "epoch": 19.45, "learning_rate": 4.027770702111893e-05, "loss": 2.1863, "step": 6720000 }, { "epoch": 19.45, "learning_rate": 4.027698337347166e-05, "loss": 2.2144, "step": 6720500 }, { "epoch": 19.45, "learning_rate": 4.0276261173119676e-05, "loss": 2.19, "step": 6721000 }, { "epoch": 19.46, "learning_rate": 4.02755375254724e-05, "loss": 2.2027, "step": 6721500 }, { "epoch": 19.46, "learning_rate": 4.027481387782512e-05, "loss": 2.1907, "step": 6722000 }, { "epoch": 19.46, "learning_rate": 4.027409023017785e-05, "loss": 2.2343, "step": 6722500 }, { "epoch": 19.46, "learning_rate": 4.027336658253057e-05, "loss": 2.2079, "step": 6723000 }, { "epoch": 19.46, "learning_rate": 4.027264438217859e-05, "loss": 2.2208, "step": 6723500 }, { "epoch": 19.46, "learning_rate": 4.027192073453131e-05, "loss": 2.1946, "step": 6724000 }, { "epoch": 19.46, "learning_rate": 4.027119708688403e-05, "loss": 2.211, "step": 6724500 }, { "epoch": 19.47, "learning_rate": 4.0270473439236754e-05, "loss": 2.1931, "step": 6725000 }, { "epoch": 19.47, "learning_rate": 4.026975123888477e-05, "loss": 2.1996, "step": 6725500 }, { "epoch": 19.47, "learning_rate": 4.026902903853279e-05, "loss": 2.186, "step": 6726000 }, { "epoch": 19.47, "learning_rate": 4.0268305390885514e-05, "loss": 2.1992, "step": 6726500 }, { "epoch": 19.47, "learning_rate": 4.0267581743238237e-05, "loss": 2.1925, "step": 6727000 }, { "epoch": 19.47, "learning_rate": 4.026685809559096e-05, "loss": 2.2058, "step": 6727500 }, { "epoch": 19.47, "learning_rate": 4.026613444794369e-05, "loss": 2.1998, "step": 6728000 }, { "epoch": 19.48, "learning_rate": 4.02654122475917e-05, "loss": 2.1805, "step": 6728500 }, { "epoch": 19.48, "learning_rate": 4.0264688599944426e-05, "loss": 2.1829, "step": 6729000 }, { "epoch": 19.48, "learning_rate": 4.026396495229715e-05, "loss": 2.19, "step": 6729500 }, { "epoch": 19.48, "learning_rate": 4.026324130464988e-05, "loss": 2.1806, "step": 6730000 }, { "epoch": 19.48, "learning_rate": 4.02625176570026e-05, "loss": 2.2078, "step": 6730500 }, { "epoch": 19.48, "learning_rate": 4.026179400935532e-05, "loss": 2.1777, "step": 6731000 }, { "epoch": 19.48, "learning_rate": 4.0261070361708043e-05, "loss": 2.2134, "step": 6731500 }, { "epoch": 19.49, "learning_rate": 4.0260346714060766e-05, "loss": 2.1964, "step": 6732000 }, { "epoch": 19.49, "learning_rate": 4.025962306641349e-05, "loss": 2.1955, "step": 6732500 }, { "epoch": 19.49, "learning_rate": 4.025889941876621e-05, "loss": 2.1721, "step": 6733000 }, { "epoch": 19.49, "learning_rate": 4.025817577111893e-05, "loss": 2.2037, "step": 6733500 }, { "epoch": 19.49, "learning_rate": 4.0257452123471655e-05, "loss": 2.1858, "step": 6734000 }, { "epoch": 19.49, "learning_rate": 4.025672847582438e-05, "loss": 2.1963, "step": 6734500 }, { "epoch": 19.5, "learning_rate": 4.02560048281771e-05, "loss": 2.1717, "step": 6735000 }, { "epoch": 19.5, "learning_rate": 4.025528118052983e-05, "loss": 2.1703, "step": 6735500 }, { "epoch": 19.5, "learning_rate": 4.025455753288255e-05, "loss": 2.1896, "step": 6736000 }, { "epoch": 19.5, "learning_rate": 4.025383533253057e-05, "loss": 2.1724, "step": 6736500 }, { "epoch": 19.5, "learning_rate": 4.025311313217859e-05, "loss": 2.2151, "step": 6737000 }, { "epoch": 19.5, "learning_rate": 4.025238948453131e-05, "loss": 2.2026, "step": 6737500 }, { "epoch": 19.5, "learning_rate": 4.025166583688403e-05, "loss": 2.1847, "step": 6738000 }, { "epoch": 19.51, "learning_rate": 4.0250942189236755e-05, "loss": 2.2017, "step": 6738500 }, { "epoch": 19.51, "learning_rate": 4.025021854158948e-05, "loss": 2.2132, "step": 6739000 }, { "epoch": 19.51, "learning_rate": 4.02494948939422e-05, "loss": 2.1742, "step": 6739500 }, { "epoch": 19.51, "learning_rate": 4.024877124629493e-05, "loss": 2.1921, "step": 6740000 }, { "epoch": 19.51, "learning_rate": 4.024804759864765e-05, "loss": 2.2057, "step": 6740500 }, { "epoch": 19.51, "learning_rate": 4.024732395100037e-05, "loss": 2.2099, "step": 6741000 }, { "epoch": 19.51, "learning_rate": 4.0246600303353095e-05, "loss": 2.2007, "step": 6741500 }, { "epoch": 19.52, "learning_rate": 4.024587665570582e-05, "loss": 2.1931, "step": 6742000 }, { "epoch": 19.52, "learning_rate": 4.0245153008058546e-05, "loss": 2.2025, "step": 6742500 }, { "epoch": 19.52, "learning_rate": 4.024443080770656e-05, "loss": 2.1922, "step": 6743000 }, { "epoch": 19.52, "learning_rate": 4.024371005464988e-05, "loss": 2.1923, "step": 6743500 }, { "epoch": 19.52, "learning_rate": 4.02429864070026e-05, "loss": 2.1979, "step": 6744000 }, { "epoch": 19.52, "learning_rate": 4.024226275935532e-05, "loss": 2.2192, "step": 6744500 }, { "epoch": 19.52, "learning_rate": 4.0241539111708044e-05, "loss": 2.2139, "step": 6745000 }, { "epoch": 19.53, "learning_rate": 4.0240815464060767e-05, "loss": 2.1979, "step": 6745500 }, { "epoch": 19.53, "learning_rate": 4.024009181641349e-05, "loss": 2.2098, "step": 6746000 }, { "epoch": 19.53, "learning_rate": 4.023936816876621e-05, "loss": 2.1757, "step": 6746500 }, { "epoch": 19.53, "learning_rate": 4.0238645968414227e-05, "loss": 2.1977, "step": 6747000 }, { "epoch": 19.53, "learning_rate": 4.023792232076695e-05, "loss": 2.1954, "step": 6747500 }, { "epoch": 19.53, "learning_rate": 4.023719867311968e-05, "loss": 2.2057, "step": 6748000 }, { "epoch": 19.53, "learning_rate": 4.02364750254724e-05, "loss": 2.195, "step": 6748500 }, { "epoch": 19.54, "learning_rate": 4.023575137782512e-05, "loss": 2.184, "step": 6749000 }, { "epoch": 19.54, "learning_rate": 4.0235027730177844e-05, "loss": 2.1932, "step": 6749500 }, { "epoch": 19.54, "learning_rate": 4.023430408253057e-05, "loss": 2.1906, "step": 6750000 }, { "epoch": 19.54, "learning_rate": 4.0233580434883296e-05, "loss": 2.2255, "step": 6750500 }, { "epoch": 19.54, "learning_rate": 4.023285678723602e-05, "loss": 2.1901, "step": 6751000 }, { "epoch": 19.54, "learning_rate": 4.0232134586884034e-05, "loss": 2.218, "step": 6751500 }, { "epoch": 19.54, "learning_rate": 4.0231410939236756e-05, "loss": 2.1764, "step": 6752000 }, { "epoch": 19.55, "learning_rate": 4.023068873888478e-05, "loss": 2.21, "step": 6752500 }, { "epoch": 19.55, "learning_rate": 4.02299650912375e-05, "loss": 2.216, "step": 6753000 }, { "epoch": 19.55, "learning_rate": 4.022924144359022e-05, "loss": 2.1812, "step": 6753500 }, { "epoch": 19.55, "learning_rate": 4.0228517795942945e-05, "loss": 2.1996, "step": 6754000 }, { "epoch": 19.55, "learning_rate": 4.022779559559096e-05, "loss": 2.1841, "step": 6754500 }, { "epoch": 19.55, "learning_rate": 4.0227073395238976e-05, "loss": 2.2002, "step": 6755000 }, { "epoch": 19.55, "learning_rate": 4.0226349747591705e-05, "loss": 2.208, "step": 6755500 }, { "epoch": 19.56, "learning_rate": 4.022562609994443e-05, "loss": 2.2125, "step": 6756000 }, { "epoch": 19.56, "learning_rate": 4.022490245229715e-05, "loss": 2.2029, "step": 6756500 }, { "epoch": 19.56, "learning_rate": 4.022417880464987e-05, "loss": 2.1934, "step": 6757000 }, { "epoch": 19.56, "learning_rate": 4.0223455157002594e-05, "loss": 2.165, "step": 6757500 }, { "epoch": 19.56, "learning_rate": 4.022273150935532e-05, "loss": 2.1966, "step": 6758000 }, { "epoch": 19.56, "learning_rate": 4.0222007861708045e-05, "loss": 2.2091, "step": 6758500 }, { "epoch": 19.56, "learning_rate": 4.022128421406077e-05, "loss": 2.2038, "step": 6759000 }, { "epoch": 19.57, "learning_rate": 4.022056201370878e-05, "loss": 2.1732, "step": 6759500 }, { "epoch": 19.57, "learning_rate": 4.0219838366061505e-05, "loss": 2.1909, "step": 6760000 }, { "epoch": 19.57, "learning_rate": 4.021911471841423e-05, "loss": 2.1956, "step": 6760500 }, { "epoch": 19.57, "learning_rate": 4.021839251806225e-05, "loss": 2.1845, "step": 6761000 }, { "epoch": 19.57, "learning_rate": 4.021766887041497e-05, "loss": 2.2145, "step": 6761500 }, { "epoch": 19.57, "learning_rate": 4.0216945222767694e-05, "loss": 2.2101, "step": 6762000 }, { "epoch": 19.57, "learning_rate": 4.0216221575120416e-05, "loss": 2.2059, "step": 6762500 }, { "epoch": 19.58, "learning_rate": 4.021549792747314e-05, "loss": 2.2105, "step": 6763000 }, { "epoch": 19.58, "learning_rate": 4.0214775727121154e-05, "loss": 2.1938, "step": 6763500 }, { "epoch": 19.58, "learning_rate": 4.0214053526769176e-05, "loss": 2.193, "step": 6764000 }, { "epoch": 19.58, "learning_rate": 4.02133298791219e-05, "loss": 2.1882, "step": 6764500 }, { "epoch": 19.58, "learning_rate": 4.021260623147462e-05, "loss": 2.1961, "step": 6765000 }, { "epoch": 19.58, "learning_rate": 4.021188258382734e-05, "loss": 2.1918, "step": 6765500 }, { "epoch": 19.58, "learning_rate": 4.021115893618007e-05, "loss": 2.2056, "step": 6766000 }, { "epoch": 19.59, "learning_rate": 4.0210435288532794e-05, "loss": 2.1806, "step": 6766500 }, { "epoch": 19.59, "learning_rate": 4.020971164088552e-05, "loss": 2.2175, "step": 6767000 }, { "epoch": 19.59, "learning_rate": 4.020898799323824e-05, "loss": 2.1912, "step": 6767500 }, { "epoch": 19.59, "learning_rate": 4.020826434559096e-05, "loss": 2.2138, "step": 6768000 }, { "epoch": 19.59, "learning_rate": 4.020754069794368e-05, "loss": 2.2036, "step": 6768500 }, { "epoch": 19.59, "learning_rate": 4.0206817050296406e-05, "loss": 2.183, "step": 6769000 }, { "epoch": 19.59, "learning_rate": 4.020609340264913e-05, "loss": 2.1992, "step": 6769500 }, { "epoch": 19.6, "learning_rate": 4.020536975500186e-05, "loss": 2.209, "step": 6770000 }, { "epoch": 19.6, "learning_rate": 4.020464755464987e-05, "loss": 2.1808, "step": 6770500 }, { "epoch": 19.6, "learning_rate": 4.0203923907002595e-05, "loss": 2.1888, "step": 6771000 }, { "epoch": 19.6, "learning_rate": 4.020320025935532e-05, "loss": 2.207, "step": 6771500 }, { "epoch": 19.6, "learning_rate": 4.020247661170804e-05, "loss": 2.2042, "step": 6772000 }, { "epoch": 19.6, "learning_rate": 4.0201754411356055e-05, "loss": 2.1934, "step": 6772500 }, { "epoch": 19.61, "learning_rate": 4.020103076370878e-05, "loss": 2.2191, "step": 6773000 }, { "epoch": 19.61, "learning_rate": 4.0200307116061506e-05, "loss": 2.1844, "step": 6773500 }, { "epoch": 19.61, "learning_rate": 4.019958346841423e-05, "loss": 2.21, "step": 6774000 }, { "epoch": 19.61, "learning_rate": 4.019885982076696e-05, "loss": 2.1987, "step": 6774500 }, { "epoch": 19.61, "learning_rate": 4.019813617311968e-05, "loss": 2.2102, "step": 6775000 }, { "epoch": 19.61, "learning_rate": 4.01974125254724e-05, "loss": 2.1867, "step": 6775500 }, { "epoch": 19.61, "learning_rate": 4.019669032512042e-05, "loss": 2.2003, "step": 6776000 }, { "epoch": 19.62, "learning_rate": 4.0195971019359026e-05, "loss": 2.2015, "step": 6776500 }, { "epoch": 19.62, "learning_rate": 4.019524737171175e-05, "loss": 2.2109, "step": 6777000 }, { "epoch": 19.62, "learning_rate": 4.019452372406447e-05, "loss": 2.2223, "step": 6777500 }, { "epoch": 19.62, "learning_rate": 4.019380007641719e-05, "loss": 2.1982, "step": 6778000 }, { "epoch": 19.62, "learning_rate": 4.0193076428769915e-05, "loss": 2.1773, "step": 6778500 }, { "epoch": 19.62, "learning_rate": 4.019235278112264e-05, "loss": 2.2017, "step": 6779000 }, { "epoch": 19.62, "learning_rate": 4.019163058077065e-05, "loss": 2.2142, "step": 6779500 }, { "epoch": 19.63, "learning_rate": 4.019090693312338e-05, "loss": 2.1963, "step": 6780000 }, { "epoch": 19.63, "learning_rate": 4.0190183285476104e-05, "loss": 2.1852, "step": 6780500 }, { "epoch": 19.63, "learning_rate": 4.018945963782883e-05, "loss": 2.1988, "step": 6781000 }, { "epoch": 19.63, "learning_rate": 4.0188735990181555e-05, "loss": 2.1826, "step": 6781500 }, { "epoch": 19.63, "learning_rate": 4.018801234253428e-05, "loss": 2.2059, "step": 6782000 }, { "epoch": 19.63, "learning_rate": 4.0187288694887e-05, "loss": 2.2237, "step": 6782500 }, { "epoch": 19.63, "learning_rate": 4.018656504723972e-05, "loss": 2.1972, "step": 6783000 }, { "epoch": 19.64, "learning_rate": 4.0185841399592444e-05, "loss": 2.1832, "step": 6783500 }, { "epoch": 19.64, "learning_rate": 4.0185117751945167e-05, "loss": 2.1942, "step": 6784000 }, { "epoch": 19.64, "learning_rate": 4.018439410429789e-05, "loss": 2.2242, "step": 6784500 }, { "epoch": 19.64, "learning_rate": 4.018367045665061e-05, "loss": 2.22, "step": 6785000 }, { "epoch": 19.64, "learning_rate": 4.018294680900333e-05, "loss": 2.2027, "step": 6785500 }, { "epoch": 19.64, "learning_rate": 4.0182223161356055e-05, "loss": 2.1904, "step": 6786000 }, { "epoch": 19.64, "learning_rate": 4.0181499513708784e-05, "loss": 2.1912, "step": 6786500 }, { "epoch": 19.65, "learning_rate": 4.018077586606151e-05, "loss": 2.1829, "step": 6787000 }, { "epoch": 19.65, "learning_rate": 4.018005366570952e-05, "loss": 2.1843, "step": 6787500 }, { "epoch": 19.65, "learning_rate": 4.0179330018062244e-05, "loss": 2.2278, "step": 6788000 }, { "epoch": 19.65, "learning_rate": 4.0178606370414973e-05, "loss": 2.1986, "step": 6788500 }, { "epoch": 19.65, "learning_rate": 4.0177882722767696e-05, "loss": 2.2172, "step": 6789000 }, { "epoch": 19.65, "learning_rate": 4.017715907512042e-05, "loss": 2.2212, "step": 6789500 }, { "epoch": 19.65, "learning_rate": 4.017643542747314e-05, "loss": 2.2027, "step": 6790000 }, { "epoch": 19.66, "learning_rate": 4.0175713227121156e-05, "loss": 2.208, "step": 6790500 }, { "epoch": 19.66, "learning_rate": 4.017499102676918e-05, "loss": 2.2088, "step": 6791000 }, { "epoch": 19.66, "learning_rate": 4.01742673791219e-05, "loss": 2.207, "step": 6791500 }, { "epoch": 19.66, "learning_rate": 4.017354373147462e-05, "loss": 2.1959, "step": 6792000 }, { "epoch": 19.66, "learning_rate": 4.0172820083827345e-05, "loss": 2.1899, "step": 6792500 }, { "epoch": 19.66, "learning_rate": 4.017209643618007e-05, "loss": 2.2128, "step": 6793000 }, { "epoch": 19.66, "learning_rate": 4.017137423582808e-05, "loss": 2.2035, "step": 6793500 }, { "epoch": 19.67, "learning_rate": 4.0170650588180805e-05, "loss": 2.1831, "step": 6794000 }, { "epoch": 19.67, "learning_rate": 4.0169926940533534e-05, "loss": 2.2129, "step": 6794500 }, { "epoch": 19.67, "learning_rate": 4.0169203292886256e-05, "loss": 2.197, "step": 6795000 }, { "epoch": 19.67, "learning_rate": 4.016847964523898e-05, "loss": 2.1835, "step": 6795500 }, { "epoch": 19.67, "learning_rate": 4.016775599759171e-05, "loss": 2.1974, "step": 6796000 }, { "epoch": 19.67, "learning_rate": 4.016703234994443e-05, "loss": 2.212, "step": 6796500 }, { "epoch": 19.67, "learning_rate": 4.016630870229715e-05, "loss": 2.2273, "step": 6797000 }, { "epoch": 19.68, "learning_rate": 4.0165585054649874e-05, "loss": 2.2138, "step": 6797500 }, { "epoch": 19.68, "learning_rate": 4.0164861407002596e-05, "loss": 2.2346, "step": 6798000 }, { "epoch": 19.68, "learning_rate": 4.016413775935532e-05, "loss": 2.1942, "step": 6798500 }, { "epoch": 19.68, "learning_rate": 4.016341411170804e-05, "loss": 2.1815, "step": 6799000 }, { "epoch": 19.68, "learning_rate": 4.016269046406076e-05, "loss": 2.2309, "step": 6799500 }, { "epoch": 19.68, "learning_rate": 4.0161966816413485e-05, "loss": 2.1963, "step": 6800000 }, { "epoch": 19.68, "learning_rate": 4.016124316876621e-05, "loss": 2.2142, "step": 6800500 }, { "epoch": 19.69, "learning_rate": 4.016052096841423e-05, "loss": 2.2109, "step": 6801000 }, { "epoch": 19.69, "learning_rate": 4.015979732076695e-05, "loss": 2.1888, "step": 6801500 }, { "epoch": 19.69, "learning_rate": 4.0159073673119674e-05, "loss": 2.2088, "step": 6802000 }, { "epoch": 19.69, "learning_rate": 4.015835147276769e-05, "loss": 2.1671, "step": 6802500 }, { "epoch": 19.69, "learning_rate": 4.015762782512042e-05, "loss": 2.2049, "step": 6803000 }, { "epoch": 19.69, "learning_rate": 4.0156905624768434e-05, "loss": 2.2036, "step": 6803500 }, { "epoch": 19.69, "learning_rate": 4.015618197712116e-05, "loss": 2.2175, "step": 6804000 }, { "epoch": 19.7, "learning_rate": 4.0155458329473886e-05, "loss": 2.1838, "step": 6804500 }, { "epoch": 19.7, "learning_rate": 4.015473468182661e-05, "loss": 2.2199, "step": 6805000 }, { "epoch": 19.7, "learning_rate": 4.015401103417933e-05, "loss": 2.2231, "step": 6805500 }, { "epoch": 19.7, "learning_rate": 4.015328738653205e-05, "loss": 2.2181, "step": 6806000 }, { "epoch": 19.7, "learning_rate": 4.0152563738884774e-05, "loss": 2.1993, "step": 6806500 }, { "epoch": 19.7, "learning_rate": 4.01518400912375e-05, "loss": 2.1912, "step": 6807000 }, { "epoch": 19.7, "learning_rate": 4.015111789088551e-05, "loss": 2.2173, "step": 6807500 }, { "epoch": 19.71, "learning_rate": 4.0150394243238235e-05, "loss": 2.2026, "step": 6808000 }, { "epoch": 19.71, "learning_rate": 4.0149670595590964e-05, "loss": 2.2089, "step": 6808500 }, { "epoch": 19.71, "learning_rate": 4.0148946947943686e-05, "loss": 2.1945, "step": 6809000 }, { "epoch": 19.71, "learning_rate": 4.014822330029641e-05, "loss": 2.1993, "step": 6809500 }, { "epoch": 19.71, "learning_rate": 4.014749965264913e-05, "loss": 2.1954, "step": 6810000 }, { "epoch": 19.71, "learning_rate": 4.014677600500186e-05, "loss": 2.1989, "step": 6810500 }, { "epoch": 19.72, "learning_rate": 4.014605235735458e-05, "loss": 2.2043, "step": 6811000 }, { "epoch": 19.72, "learning_rate": 4.01453301570026e-05, "loss": 2.1943, "step": 6811500 }, { "epoch": 19.72, "learning_rate": 4.014460795665061e-05, "loss": 2.2147, "step": 6812000 }, { "epoch": 19.72, "learning_rate": 4.0143884309003335e-05, "loss": 2.2199, "step": 6812500 }, { "epoch": 19.72, "learning_rate": 4.0143160661356064e-05, "loss": 2.2133, "step": 6813000 }, { "epoch": 19.72, "learning_rate": 4.0142437013708786e-05, "loss": 2.211, "step": 6813500 }, { "epoch": 19.72, "learning_rate": 4.014171336606151e-05, "loss": 2.2018, "step": 6814000 }, { "epoch": 19.73, "learning_rate": 4.014098971841423e-05, "loss": 2.2019, "step": 6814500 }, { "epoch": 19.73, "learning_rate": 4.014026607076695e-05, "loss": 2.1805, "step": 6815000 }, { "epoch": 19.73, "learning_rate": 4.0139542423119675e-05, "loss": 2.2068, "step": 6815500 }, { "epoch": 19.73, "learning_rate": 4.01388187754724e-05, "loss": 2.2421, "step": 6816000 }, { "epoch": 19.73, "learning_rate": 4.013809512782512e-05, "loss": 2.1797, "step": 6816500 }, { "epoch": 19.73, "learning_rate": 4.013737148017784e-05, "loss": 2.1602, "step": 6817000 }, { "epoch": 19.73, "learning_rate": 4.0136647832530564e-05, "loss": 2.1954, "step": 6817500 }, { "epoch": 19.74, "learning_rate": 4.0135925632178586e-05, "loss": 2.1853, "step": 6818000 }, { "epoch": 19.74, "learning_rate": 4.0135201984531315e-05, "loss": 2.2073, "step": 6818500 }, { "epoch": 19.74, "learning_rate": 4.013447833688404e-05, "loss": 2.1894, "step": 6819000 }, { "epoch": 19.74, "learning_rate": 4.013375468923676e-05, "loss": 2.207, "step": 6819500 }, { "epoch": 19.74, "learning_rate": 4.013303393618007e-05, "loss": 2.196, "step": 6820000 }, { "epoch": 19.74, "learning_rate": 4.013231028853279e-05, "loss": 2.1892, "step": 6820500 }, { "epoch": 19.74, "learning_rate": 4.013158664088551e-05, "loss": 2.1806, "step": 6821000 }, { "epoch": 19.75, "learning_rate": 4.0130862993238235e-05, "loss": 2.199, "step": 6821500 }, { "epoch": 19.75, "learning_rate": 4.0130139345590964e-05, "loss": 2.2203, "step": 6822000 }, { "epoch": 19.75, "learning_rate": 4.0129415697943687e-05, "loss": 2.1911, "step": 6822500 }, { "epoch": 19.75, "learning_rate": 4.01286934975917e-05, "loss": 2.1741, "step": 6823000 }, { "epoch": 19.75, "learning_rate": 4.012797129723972e-05, "loss": 2.2012, "step": 6823500 }, { "epoch": 19.75, "learning_rate": 4.012724764959244e-05, "loss": 2.2076, "step": 6824000 }, { "epoch": 19.75, "learning_rate": 4.012652400194516e-05, "loss": 2.2091, "step": 6824500 }, { "epoch": 19.76, "learning_rate": 4.0125800354297884e-05, "loss": 2.204, "step": 6825000 }, { "epoch": 19.76, "learning_rate": 4.012507670665061e-05, "loss": 2.2232, "step": 6825500 }, { "epoch": 19.76, "learning_rate": 4.0124353059003336e-05, "loss": 2.2057, "step": 6826000 }, { "epoch": 19.76, "learning_rate": 4.0123629411356065e-05, "loss": 2.1927, "step": 6826500 }, { "epoch": 19.76, "learning_rate": 4.012290576370879e-05, "loss": 2.2027, "step": 6827000 }, { "epoch": 19.76, "learning_rate": 4.0122185010652096e-05, "loss": 2.1895, "step": 6827500 }, { "epoch": 19.76, "learning_rate": 4.012146136300482e-05, "loss": 2.1875, "step": 6828000 }, { "epoch": 19.77, "learning_rate": 4.012073771535754e-05, "loss": 2.1964, "step": 6828500 }, { "epoch": 19.77, "learning_rate": 4.012001406771026e-05, "loss": 2.2251, "step": 6829000 }, { "epoch": 19.77, "learning_rate": 4.011929042006299e-05, "loss": 2.2008, "step": 6829500 }, { "epoch": 19.77, "learning_rate": 4.0118566772415714e-05, "loss": 2.2016, "step": 6830000 }, { "epoch": 19.77, "learning_rate": 4.011784457206373e-05, "loss": 2.1897, "step": 6830500 }, { "epoch": 19.77, "learning_rate": 4.011712092441645e-05, "loss": 2.2225, "step": 6831000 }, { "epoch": 19.77, "learning_rate": 4.0116397276769174e-05, "loss": 2.2002, "step": 6831500 }, { "epoch": 19.78, "learning_rate": 4.0115673629121896e-05, "loss": 2.2016, "step": 6832000 }, { "epoch": 19.78, "learning_rate": 4.011494998147462e-05, "loss": 2.1609, "step": 6832500 }, { "epoch": 19.78, "learning_rate": 4.011422633382734e-05, "loss": 2.1917, "step": 6833000 }, { "epoch": 19.78, "learning_rate": 4.011350413347536e-05, "loss": 2.1826, "step": 6833500 }, { "epoch": 19.78, "learning_rate": 4.011278048582809e-05, "loss": 2.1872, "step": 6834000 }, { "epoch": 19.78, "learning_rate": 4.0112056838180814e-05, "loss": 2.1748, "step": 6834500 }, { "epoch": 19.78, "learning_rate": 4.0111333190533536e-05, "loss": 2.2007, "step": 6835000 }, { "epoch": 19.79, "learning_rate": 4.011061099018155e-05, "loss": 2.2029, "step": 6835500 }, { "epoch": 19.79, "learning_rate": 4.0109887342534274e-05, "loss": 2.2043, "step": 6836000 }, { "epoch": 19.79, "learning_rate": 4.0109163694886996e-05, "loss": 2.1884, "step": 6836500 }, { "epoch": 19.79, "learning_rate": 4.010844004723972e-05, "loss": 2.1946, "step": 6837000 }, { "epoch": 19.79, "learning_rate": 4.010771639959244e-05, "loss": 2.2101, "step": 6837500 }, { "epoch": 19.79, "learning_rate": 4.010699275194516e-05, "loss": 2.2165, "step": 6838000 }, { "epoch": 19.79, "learning_rate": 4.010626910429789e-05, "loss": 2.2138, "step": 6838500 }, { "epoch": 19.8, "learning_rate": 4.0105545456650614e-05, "loss": 2.2036, "step": 6839000 }, { "epoch": 19.8, "learning_rate": 4.010482325629863e-05, "loss": 2.2116, "step": 6839500 }, { "epoch": 19.8, "learning_rate": 4.010409960865135e-05, "loss": 2.1896, "step": 6840000 }, { "epoch": 19.8, "learning_rate": 4.0103375961004074e-05, "loss": 2.1874, "step": 6840500 }, { "epoch": 19.8, "learning_rate": 4.01026523133568e-05, "loss": 2.1939, "step": 6841000 }, { "epoch": 19.8, "learning_rate": 4.0101928665709525e-05, "loss": 2.2027, "step": 6841500 }, { "epoch": 19.8, "learning_rate": 4.010120501806225e-05, "loss": 2.2109, "step": 6842000 }, { "epoch": 19.81, "learning_rate": 4.010048137041497e-05, "loss": 2.1994, "step": 6842500 }, { "epoch": 19.81, "learning_rate": 4.009975772276769e-05, "loss": 2.1692, "step": 6843000 }, { "epoch": 19.81, "learning_rate": 4.0099035522415714e-05, "loss": 2.1784, "step": 6843500 }, { "epoch": 19.81, "learning_rate": 4.009831187476844e-05, "loss": 2.1942, "step": 6844000 }, { "epoch": 19.81, "learning_rate": 4.009758822712116e-05, "loss": 2.2209, "step": 6844500 }, { "epoch": 19.81, "learning_rate": 4.009686457947388e-05, "loss": 2.2059, "step": 6845000 }, { "epoch": 19.81, "learning_rate": 4.0096140931826603e-05, "loss": 2.1967, "step": 6845500 }, { "epoch": 19.82, "learning_rate": 4.0095417284179326e-05, "loss": 2.2082, "step": 6846000 }, { "epoch": 19.82, "learning_rate": 4.009469363653205e-05, "loss": 2.1688, "step": 6846500 }, { "epoch": 19.82, "learning_rate": 4.009396998888477e-05, "loss": 2.2202, "step": 6847000 }, { "epoch": 19.82, "learning_rate": 4.009324778853279e-05, "loss": 2.1967, "step": 6847500 }, { "epoch": 19.82, "learning_rate": 4.009252558818081e-05, "loss": 2.1909, "step": 6848000 }, { "epoch": 19.82, "learning_rate": 4.009180194053354e-05, "loss": 2.1748, "step": 6848500 }, { "epoch": 19.83, "learning_rate": 4.009107974018155e-05, "loss": 2.1747, "step": 6849000 }, { "epoch": 19.83, "learning_rate": 4.0090356092534275e-05, "loss": 2.187, "step": 6849500 }, { "epoch": 19.83, "learning_rate": 4.0089632444887e-05, "loss": 2.2181, "step": 6850000 }, { "epoch": 19.83, "learning_rate": 4.008890879723972e-05, "loss": 2.203, "step": 6850500 }, { "epoch": 19.83, "learning_rate": 4.008818659688774e-05, "loss": 2.1954, "step": 6851000 }, { "epoch": 19.83, "learning_rate": 4.0087462949240464e-05, "loss": 2.2017, "step": 6851500 }, { "epoch": 19.83, "learning_rate": 4.0086739301593186e-05, "loss": 2.2075, "step": 6852000 }, { "epoch": 19.84, "learning_rate": 4.008601565394591e-05, "loss": 2.2293, "step": 6852500 }, { "epoch": 19.84, "learning_rate": 4.008529200629863e-05, "loss": 2.2148, "step": 6853000 }, { "epoch": 19.84, "learning_rate": 4.008456835865135e-05, "loss": 2.1706, "step": 6853500 }, { "epoch": 19.84, "learning_rate": 4.0083844711004075e-05, "loss": 2.1996, "step": 6854000 }, { "epoch": 19.84, "learning_rate": 4.00831210633568e-05, "loss": 2.2068, "step": 6854500 }, { "epoch": 19.84, "learning_rate": 4.008239741570952e-05, "loss": 2.1957, "step": 6855000 }, { "epoch": 19.84, "learning_rate": 4.008167376806224e-05, "loss": 2.2038, "step": 6855500 }, { "epoch": 19.85, "learning_rate": 4.008095012041497e-05, "loss": 2.1784, "step": 6856000 }, { "epoch": 19.85, "learning_rate": 4.008022792006299e-05, "loss": 2.1926, "step": 6856500 }, { "epoch": 19.85, "learning_rate": 4.0079504272415715e-05, "loss": 2.2158, "step": 6857000 }, { "epoch": 19.85, "learning_rate": 4.007878062476844e-05, "loss": 2.1827, "step": 6857500 }, { "epoch": 19.85, "learning_rate": 4.007805697712116e-05, "loss": 2.2085, "step": 6858000 }, { "epoch": 19.85, "learning_rate": 4.007733332947388e-05, "loss": 2.1999, "step": 6858500 }, { "epoch": 19.85, "learning_rate": 4.0076609681826604e-05, "loss": 2.2057, "step": 6859000 }, { "epoch": 19.86, "learning_rate": 4.0075886034179326e-05, "loss": 2.1739, "step": 6859500 }, { "epoch": 19.86, "learning_rate": 4.007516238653205e-05, "loss": 2.2106, "step": 6860000 }, { "epoch": 19.86, "learning_rate": 4.007444018618007e-05, "loss": 2.1874, "step": 6860500 }, { "epoch": 19.86, "learning_rate": 4.007371653853279e-05, "loss": 2.2094, "step": 6861000 }, { "epoch": 19.86, "learning_rate": 4.0072992890885515e-05, "loss": 2.1859, "step": 6861500 }, { "epoch": 19.86, "learning_rate": 4.007226924323824e-05, "loss": 2.2109, "step": 6862000 }, { "epoch": 19.86, "learning_rate": 4.007154559559096e-05, "loss": 2.1857, "step": 6862500 }, { "epoch": 19.87, "learning_rate": 4.007082194794369e-05, "loss": 2.2074, "step": 6863000 }, { "epoch": 19.87, "learning_rate": 4.007009830029641e-05, "loss": 2.193, "step": 6863500 }, { "epoch": 19.87, "learning_rate": 4.0069374652649133e-05, "loss": 2.2222, "step": 6864000 }, { "epoch": 19.87, "learning_rate": 4.0068651005001856e-05, "loss": 2.1954, "step": 6864500 }, { "epoch": 19.87, "learning_rate": 4.006792735735458e-05, "loss": 2.2066, "step": 6865000 }, { "epoch": 19.87, "learning_rate": 4.00672037097073e-05, "loss": 2.1944, "step": 6865500 }, { "epoch": 19.87, "learning_rate": 4.006648150935532e-05, "loss": 2.2019, "step": 6866000 }, { "epoch": 19.88, "learning_rate": 4.0065757861708045e-05, "loss": 2.1703, "step": 6866500 }, { "epoch": 19.88, "learning_rate": 4.006503421406077e-05, "loss": 2.198, "step": 6867000 }, { "epoch": 19.88, "learning_rate": 4.006431056641349e-05, "loss": 2.1998, "step": 6867500 }, { "epoch": 19.88, "learning_rate": 4.006358691876621e-05, "loss": 2.1927, "step": 6868000 }, { "epoch": 19.88, "learning_rate": 4.0062863271118934e-05, "loss": 2.2021, "step": 6868500 }, { "epoch": 19.88, "learning_rate": 4.0062139623471656e-05, "loss": 2.205, "step": 6869000 }, { "epoch": 19.88, "learning_rate": 4.006141597582438e-05, "loss": 2.1982, "step": 6869500 }, { "epoch": 19.89, "learning_rate": 4.0060693775472394e-05, "loss": 2.226, "step": 6870000 }, { "epoch": 19.89, "learning_rate": 4.005997012782512e-05, "loss": 2.1944, "step": 6870500 }, { "epoch": 19.89, "learning_rate": 4.0059247927473145e-05, "loss": 2.1788, "step": 6871000 }, { "epoch": 19.89, "learning_rate": 4.005852427982587e-05, "loss": 2.1867, "step": 6871500 }, { "epoch": 19.89, "learning_rate": 4.005780207947388e-05, "loss": 2.1796, "step": 6872000 }, { "epoch": 19.89, "learning_rate": 4.0057078431826605e-05, "loss": 2.2026, "step": 6872500 }, { "epoch": 19.89, "learning_rate": 4.005635478417933e-05, "loss": 2.213, "step": 6873000 }, { "epoch": 19.9, "learning_rate": 4.005563113653205e-05, "loss": 2.1899, "step": 6873500 }, { "epoch": 19.9, "learning_rate": 4.005490748888477e-05, "loss": 2.1841, "step": 6874000 }, { "epoch": 19.9, "learning_rate": 4.0054183841237494e-05, "loss": 2.2014, "step": 6874500 }, { "epoch": 19.9, "learning_rate": 4.005346019359022e-05, "loss": 2.1983, "step": 6875000 }, { "epoch": 19.9, "learning_rate": 4.0052736545942945e-05, "loss": 2.1877, "step": 6875500 }, { "epoch": 19.9, "learning_rate": 4.005201289829567e-05, "loss": 2.2286, "step": 6876000 }, { "epoch": 19.9, "learning_rate": 4.005128925064839e-05, "loss": 2.2294, "step": 6876500 }, { "epoch": 19.91, "learning_rate": 4.00505684975917e-05, "loss": 2.1813, "step": 6877000 }, { "epoch": 19.91, "learning_rate": 4.004984484994442e-05, "loss": 2.1948, "step": 6877500 }, { "epoch": 19.91, "learning_rate": 4.004912120229714e-05, "loss": 2.1977, "step": 6878000 }, { "epoch": 19.91, "learning_rate": 4.004839755464987e-05, "loss": 2.2074, "step": 6878500 }, { "epoch": 19.91, "learning_rate": 4.0047675354297894e-05, "loss": 2.209, "step": 6879000 }, { "epoch": 19.91, "learning_rate": 4.0046951706650617e-05, "loss": 2.1737, "step": 6879500 }, { "epoch": 19.91, "learning_rate": 4.004622805900334e-05, "loss": 2.1881, "step": 6880000 }, { "epoch": 19.92, "learning_rate": 4.004550441135606e-05, "loss": 2.2033, "step": 6880500 }, { "epoch": 19.92, "learning_rate": 4.004478076370878e-05, "loss": 2.2023, "step": 6881000 }, { "epoch": 19.92, "learning_rate": 4.0044057116061505e-05, "loss": 2.1895, "step": 6881500 }, { "epoch": 19.92, "learning_rate": 4.004333346841423e-05, "loss": 2.2082, "step": 6882000 }, { "epoch": 19.92, "learning_rate": 4.004261126806225e-05, "loss": 2.2031, "step": 6882500 }, { "epoch": 19.92, "learning_rate": 4.004188762041497e-05, "loss": 2.1948, "step": 6883000 }, { "epoch": 19.92, "learning_rate": 4.0041163972767695e-05, "loss": 2.2036, "step": 6883500 }, { "epoch": 19.93, "learning_rate": 4.004044032512042e-05, "loss": 2.225, "step": 6884000 }, { "epoch": 19.93, "learning_rate": 4.003971667747314e-05, "loss": 2.1969, "step": 6884500 }, { "epoch": 19.93, "learning_rate": 4.0038994477121155e-05, "loss": 2.204, "step": 6885000 }, { "epoch": 19.93, "learning_rate": 4.003827082947388e-05, "loss": 2.212, "step": 6885500 }, { "epoch": 19.93, "learning_rate": 4.0037547181826606e-05, "loss": 2.1954, "step": 6886000 }, { "epoch": 19.93, "learning_rate": 4.003682353417933e-05, "loss": 2.187, "step": 6886500 }, { "epoch": 19.94, "learning_rate": 4.003609988653205e-05, "loss": 2.2084, "step": 6887000 }, { "epoch": 19.94, "learning_rate": 4.003537623888477e-05, "loss": 2.2074, "step": 6887500 }, { "epoch": 19.94, "learning_rate": 4.00346525912375e-05, "loss": 2.1716, "step": 6888000 }, { "epoch": 19.94, "learning_rate": 4.003393039088552e-05, "loss": 2.2132, "step": 6888500 }, { "epoch": 19.94, "learning_rate": 4.003320674323824e-05, "loss": 2.2078, "step": 6889000 }, { "epoch": 19.94, "learning_rate": 4.003248309559096e-05, "loss": 2.2157, "step": 6889500 }, { "epoch": 19.94, "learning_rate": 4.0031759447943684e-05, "loss": 2.1805, "step": 6890000 }, { "epoch": 19.95, "learning_rate": 4.0031035800296406e-05, "loss": 2.1971, "step": 6890500 }, { "epoch": 19.95, "learning_rate": 4.003031215264913e-05, "loss": 2.2131, "step": 6891000 }, { "epoch": 19.95, "learning_rate": 4.002958850500185e-05, "loss": 2.2248, "step": 6891500 }, { "epoch": 19.95, "learning_rate": 4.002886485735457e-05, "loss": 2.185, "step": 6892000 }, { "epoch": 19.95, "learning_rate": 4.0028141209707295e-05, "loss": 2.1968, "step": 6892500 }, { "epoch": 19.95, "learning_rate": 4.0027417562060024e-05, "loss": 2.2095, "step": 6893000 }, { "epoch": 19.95, "learning_rate": 4.0026693914412746e-05, "loss": 2.2032, "step": 6893500 }, { "epoch": 19.96, "learning_rate": 4.0025970266765475e-05, "loss": 2.2116, "step": 6894000 }, { "epoch": 19.96, "learning_rate": 4.00252466191182e-05, "loss": 2.2343, "step": 6894500 }, { "epoch": 19.96, "learning_rate": 4.002452297147092e-05, "loss": 2.1686, "step": 6895000 }, { "epoch": 19.96, "learning_rate": 4.0023800771118935e-05, "loss": 2.2043, "step": 6895500 }, { "epoch": 19.96, "learning_rate": 4.002307712347166e-05, "loss": 2.1747, "step": 6896000 }, { "epoch": 19.96, "learning_rate": 4.002235347582438e-05, "loss": 2.1926, "step": 6896500 }, { "epoch": 19.96, "learning_rate": 4.00216298281771e-05, "loss": 2.1883, "step": 6897000 }, { "epoch": 19.97, "learning_rate": 4.0020907627825124e-05, "loss": 2.1899, "step": 6897500 }, { "epoch": 19.97, "learning_rate": 4.0020183980177846e-05, "loss": 2.205, "step": 6898000 }, { "epoch": 19.97, "learning_rate": 4.001946033253057e-05, "loss": 2.1912, "step": 6898500 }, { "epoch": 19.97, "learning_rate": 4.0018738132178584e-05, "loss": 2.1971, "step": 6899000 }, { "epoch": 19.97, "learning_rate": 4.0018014484531306e-05, "loss": 2.1907, "step": 6899500 }, { "epoch": 19.97, "learning_rate": 4.001729083688403e-05, "loss": 2.2183, "step": 6900000 }, { "epoch": 19.97, "learning_rate": 4.001656718923676e-05, "loss": 2.1959, "step": 6900500 }, { "epoch": 19.98, "learning_rate": 4.001584354158948e-05, "loss": 2.2013, "step": 6901000 }, { "epoch": 19.98, "learning_rate": 4.00151213412375e-05, "loss": 2.2053, "step": 6901500 }, { "epoch": 19.98, "learning_rate": 4.0014397693590225e-05, "loss": 2.2024, "step": 6902000 }, { "epoch": 19.98, "learning_rate": 4.001367549323824e-05, "loss": 2.1931, "step": 6902500 }, { "epoch": 19.98, "learning_rate": 4.001295184559096e-05, "loss": 2.2021, "step": 6903000 }, { "epoch": 19.98, "learning_rate": 4.0012228197943685e-05, "loss": 2.224, "step": 6903500 }, { "epoch": 19.98, "learning_rate": 4.001150455029641e-05, "loss": 2.2483, "step": 6904000 }, { "epoch": 19.99, "learning_rate": 4.001078090264913e-05, "loss": 2.2025, "step": 6904500 }, { "epoch": 19.99, "learning_rate": 4.001005725500185e-05, "loss": 2.2075, "step": 6905000 }, { "epoch": 19.99, "learning_rate": 4.0009333607354573e-05, "loss": 2.2215, "step": 6905500 }, { "epoch": 19.99, "learning_rate": 4.00086099597073e-05, "loss": 2.1861, "step": 6906000 }, { "epoch": 19.99, "learning_rate": 4.0007886312060025e-05, "loss": 2.1994, "step": 6906500 }, { "epoch": 19.99, "learning_rate": 4.000716411170804e-05, "loss": 2.2068, "step": 6907000 }, { "epoch": 19.99, "learning_rate": 4.000644046406076e-05, "loss": 2.1917, "step": 6907500 }, { "epoch": 20.0, "learning_rate": 4.000571681641349e-05, "loss": 2.2223, "step": 6908000 }, { "epoch": 20.0, "learning_rate": 4.0004993168766214e-05, "loss": 2.1749, "step": 6908500 }, { "epoch": 20.0, "learning_rate": 4.000427096841423e-05, "loss": 2.1973, "step": 6909000 }, { "epoch": 20.0, "eval_accuracy": 0.6594410792436429, "eval_accuracy_mlm": 0.6233413656263951, "eval_accuracy_nsp": 0.8532675200395979, "eval_loss": 2.231724500656128, "eval_runtime": 330.3097, "eval_samples_per_second": 1321.142, "eval_steps_per_second": 55.048, "step": 6909440 } ], "max_steps": 34547200, "num_train_epochs": 100, "total_flos": 9.466493397162928e+18, "trial_name": null, "trial_params": null }