diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 20.0, - "global_step": 6909440, + "epoch": 30.0, + "global_step": 10364160, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -83134,11 +83134,41581 @@ "eval_samples_per_second": 1321.142, "eval_steps_per_second": 55.048, "step": 6909440 + }, + { + "epoch": 20.0, + "learning_rate": 4.000354732076695e-05, + "loss": 2.1754, + "step": 6909500 + }, + { + "epoch": 20.0, + "learning_rate": 4.0002823673119674e-05, + "loss": 2.1769, + "step": 6910000 + }, + { + "epoch": 20.0, + "learning_rate": 4.00021000254724e-05, + "loss": 2.175, + "step": 6910500 + }, + { + "epoch": 20.0, + "learning_rate": 4.0001376377825125e-05, + "loss": 2.1955, + "step": 6911000 + }, + { + "epoch": 20.01, + "learning_rate": 4.000065273017785e-05, + "loss": 2.1907, + "step": 6911500 + }, + { + "epoch": 20.01, + "learning_rate": 3.999992908253057e-05, + "loss": 2.2082, + "step": 6912000 + }, + { + "epoch": 20.01, + "learning_rate": 3.999920543488329e-05, + "loss": 2.1514, + "step": 6912500 + }, + { + "epoch": 20.01, + "learning_rate": 3.9998481787236014e-05, + "loss": 2.2034, + "step": 6913000 + }, + { + "epoch": 20.01, + "learning_rate": 3.999775958688403e-05, + "loss": 2.1719, + "step": 6913500 + }, + { + "epoch": 20.01, + "learning_rate": 3.999703593923675e-05, + "loss": 2.1982, + "step": 6914000 + }, + { + "epoch": 20.01, + "learning_rate": 3.9996312291589474e-05, + "loss": 2.1762, + "step": 6914500 + }, + { + "epoch": 20.02, + "learning_rate": 3.99955886439422e-05, + "loss": 2.1888, + "step": 6915000 + }, + { + "epoch": 20.02, + "learning_rate": 3.9994866443590225e-05, + "loss": 2.1714, + "step": 6915500 + }, + { + "epoch": 20.02, + "learning_rate": 3.9994145690533534e-05, + "loss": 2.1863, + "step": 6916000 + }, + { + "epoch": 20.02, + "learning_rate": 3.9993422042886256e-05, + "loss": 2.1737, + "step": 6916500 + }, + { + "epoch": 20.02, + "learning_rate": 3.999269839523898e-05, + "loss": 2.1762, + "step": 6917000 + }, + { + "epoch": 20.02, + "learning_rate": 3.99919747475917e-05, + "loss": 2.1594, + "step": 6917500 + }, + { + "epoch": 20.02, + "learning_rate": 3.999125109994443e-05, + "loss": 2.1686, + "step": 6918000 + }, + { + "epoch": 20.03, + "learning_rate": 3.999052745229715e-05, + "loss": 2.1785, + "step": 6918500 + }, + { + "epoch": 20.03, + "learning_rate": 3.9989803804649874e-05, + "loss": 2.195, + "step": 6919000 + }, + { + "epoch": 20.03, + "learning_rate": 3.99890801570026e-05, + "loss": 2.1416, + "step": 6919500 + }, + { + "epoch": 20.03, + "learning_rate": 3.998835795665061e-05, + "loss": 2.1669, + "step": 6920000 + }, + { + "epoch": 20.03, + "learning_rate": 3.9987634309003334e-05, + "loss": 2.202, + "step": 6920500 + }, + { + "epoch": 20.03, + "learning_rate": 3.998691066135606e-05, + "loss": 2.199, + "step": 6921000 + }, + { + "epoch": 20.03, + "learning_rate": 3.998618701370878e-05, + "loss": 2.159, + "step": 6921500 + }, + { + "epoch": 20.04, + "learning_rate": 3.99854633660615e-05, + "loss": 2.1523, + "step": 6922000 + }, + { + "epoch": 20.04, + "learning_rate": 3.9984741165709523e-05, + "loss": 2.1608, + "step": 6922500 + }, + { + "epoch": 20.04, + "learning_rate": 3.9984017518062246e-05, + "loss": 2.1697, + "step": 6923000 + }, + { + "epoch": 20.04, + "learning_rate": 3.9983293870414975e-05, + "loss": 2.2001, + "step": 6923500 + }, + { + "epoch": 20.04, + "learning_rate": 3.99825702227677e-05, + "loss": 2.2104, + "step": 6924000 + }, + { + "epoch": 20.04, + "learning_rate": 3.998184802241571e-05, + "loss": 2.1931, + "step": 6924500 + }, + { + "epoch": 20.05, + "learning_rate": 3.9981124374768435e-05, + "loss": 2.191, + "step": 6925000 + }, + { + "epoch": 20.05, + "learning_rate": 3.998040072712116e-05, + "loss": 2.2201, + "step": 6925500 + }, + { + "epoch": 20.05, + "learning_rate": 3.997967707947388e-05, + "loss": 2.2081, + "step": 6926000 + }, + { + "epoch": 20.05, + "learning_rate": 3.99789534318266e-05, + "loss": 2.2025, + "step": 6926500 + }, + { + "epoch": 20.05, + "learning_rate": 3.997822978417933e-05, + "loss": 2.1786, + "step": 6927000 + }, + { + "epoch": 20.05, + "learning_rate": 3.997750613653205e-05, + "loss": 2.1673, + "step": 6927500 + }, + { + "epoch": 20.05, + "learning_rate": 3.9976782488884775e-05, + "loss": 2.1964, + "step": 6928000 + }, + { + "epoch": 20.06, + "learning_rate": 3.997606028853279e-05, + "loss": 2.1654, + "step": 6928500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997533664088551e-05, + "loss": 2.1721, + "step": 6929000 + }, + { + "epoch": 20.06, + "learning_rate": 3.9974612993238235e-05, + "loss": 2.1773, + "step": 6929500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997388934559096e-05, + "loss": 2.1785, + "step": 6930000 + }, + { + "epoch": 20.06, + "learning_rate": 3.9973165697943686e-05, + "loss": 2.1914, + "step": 6930500 + }, + { + "epoch": 20.06, + "learning_rate": 3.997244205029641e-05, + "loss": 2.1868, + "step": 6931000 + }, + { + "epoch": 20.06, + "learning_rate": 3.997171840264913e-05, + "loss": 2.1811, + "step": 6931500 + }, + { + "epoch": 20.07, + "learning_rate": 3.997099620229715e-05, + "loss": 2.1854, + "step": 6932000 + }, + { + "epoch": 20.07, + "learning_rate": 3.9970272554649875e-05, + "loss": 2.1944, + "step": 6932500 + }, + { + "epoch": 20.07, + "learning_rate": 3.99695489070026e-05, + "loss": 2.1912, + "step": 6933000 + }, + { + "epoch": 20.07, + "learning_rate": 3.996882525935532e-05, + "loss": 2.1516, + "step": 6933500 + }, + { + "epoch": 20.07, + "learning_rate": 3.996810161170804e-05, + "loss": 2.2039, + "step": 6934000 + }, + { + "epoch": 20.07, + "learning_rate": 3.9967377964060764e-05, + "loss": 2.1754, + "step": 6934500 + }, + { + "epoch": 20.07, + "learning_rate": 3.9966654316413486e-05, + "loss": 2.182, + "step": 6935000 + }, + { + "epoch": 20.08, + "learning_rate": 3.996593066876621e-05, + "loss": 2.1789, + "step": 6935500 + }, + { + "epoch": 20.08, + "learning_rate": 3.996520702111893e-05, + "loss": 2.1669, + "step": 6936000 + }, + { + "epoch": 20.08, + "learning_rate": 3.996448482076695e-05, + "loss": 2.1872, + "step": 6936500 + }, + { + "epoch": 20.08, + "learning_rate": 3.9963761173119675e-05, + "loss": 2.1969, + "step": 6937000 + }, + { + "epoch": 20.08, + "learning_rate": 3.99630375254724e-05, + "loss": 2.1933, + "step": 6937500 + }, + { + "epoch": 20.08, + "learning_rate": 3.996231532512042e-05, + "loss": 2.1991, + "step": 6938000 + }, + { + "epoch": 20.08, + "learning_rate": 3.996159167747314e-05, + "loss": 2.1975, + "step": 6938500 + }, + { + "epoch": 20.09, + "learning_rate": 3.9960868029825864e-05, + "loss": 2.1958, + "step": 6939000 + }, + { + "epoch": 20.09, + "learning_rate": 3.996014438217859e-05, + "loss": 2.1826, + "step": 6939500 + }, + { + "epoch": 20.09, + "learning_rate": 3.995942073453131e-05, + "loss": 2.1876, + "step": 6940000 + }, + { + "epoch": 20.09, + "learning_rate": 3.995869708688403e-05, + "loss": 2.197, + "step": 6940500 + }, + { + "epoch": 20.09, + "learning_rate": 3.995797343923675e-05, + "loss": 2.173, + "step": 6941000 + }, + { + "epoch": 20.09, + "learning_rate": 3.995724979158948e-05, + "loss": 2.1933, + "step": 6941500 + }, + { + "epoch": 20.09, + "learning_rate": 3.9956526143942205e-05, + "loss": 2.1779, + "step": 6942000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995580249629493e-05, + "loss": 2.1874, + "step": 6942500 + }, + { + "epoch": 20.1, + "learning_rate": 3.995507884864765e-05, + "loss": 2.1934, + "step": 6943000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995435520100037e-05, + "loss": 2.2024, + "step": 6943500 + }, + { + "epoch": 20.1, + "learning_rate": 3.995363300064839e-05, + "loss": 2.1691, + "step": 6944000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995290935300111e-05, + "loss": 2.2027, + "step": 6944500 + }, + { + "epoch": 20.1, + "learning_rate": 3.995218570535383e-05, + "loss": 2.1865, + "step": 6945000 + }, + { + "epoch": 20.1, + "learning_rate": 3.995146205770656e-05, + "loss": 2.1712, + "step": 6945500 + }, + { + "epoch": 20.11, + "learning_rate": 3.995073841005928e-05, + "loss": 2.1801, + "step": 6946000 + }, + { + "epoch": 20.11, + "learning_rate": 3.9950016209707305e-05, + "loss": 2.1931, + "step": 6946500 + }, + { + "epoch": 20.11, + "learning_rate": 3.994929256206003e-05, + "loss": 2.1908, + "step": 6947000 + }, + { + "epoch": 20.11, + "learning_rate": 3.994856891441275e-05, + "loss": 2.2075, + "step": 6947500 + }, + { + "epoch": 20.11, + "learning_rate": 3.994784526676547e-05, + "loss": 2.1888, + "step": 6948000 + }, + { + "epoch": 20.11, + "learning_rate": 3.994712306641349e-05, + "loss": 2.185, + "step": 6948500 + }, + { + "epoch": 20.11, + "learning_rate": 3.994640086606151e-05, + "loss": 2.1887, + "step": 6949000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994567721841423e-05, + "loss": 2.162, + "step": 6949500 + }, + { + "epoch": 20.12, + "learning_rate": 3.9944953570766954e-05, + "loss": 2.1662, + "step": 6950000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994423281771026e-05, + "loss": 2.1872, + "step": 6950500 + }, + { + "epoch": 20.12, + "learning_rate": 3.9943509170062985e-05, + "loss": 2.1945, + "step": 6951000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994278552241571e-05, + "loss": 2.1905, + "step": 6951500 + }, + { + "epoch": 20.12, + "learning_rate": 3.994206187476843e-05, + "loss": 2.1907, + "step": 6952000 + }, + { + "epoch": 20.12, + "learning_rate": 3.994133822712116e-05, + "loss": 2.1891, + "step": 6952500 + }, + { + "epoch": 20.13, + "learning_rate": 3.994061457947388e-05, + "loss": 2.165, + "step": 6953000 + }, + { + "epoch": 20.13, + "learning_rate": 3.99398923791219e-05, + "loss": 2.1748, + "step": 6953500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993917017876992e-05, + "loss": 2.1902, + "step": 6954000 + }, + { + "epoch": 20.13, + "learning_rate": 3.993844653112264e-05, + "loss": 2.1953, + "step": 6954500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993772288347536e-05, + "loss": 2.1898, + "step": 6955000 + }, + { + "epoch": 20.13, + "learning_rate": 3.9936999235828085e-05, + "loss": 2.1984, + "step": 6955500 + }, + { + "epoch": 20.13, + "learning_rate": 3.993627558818081e-05, + "loss": 2.1697, + "step": 6956000 + }, + { + "epoch": 20.14, + "learning_rate": 3.9935551940533537e-05, + "loss": 2.1823, + "step": 6956500 + }, + { + "epoch": 20.14, + "learning_rate": 3.993482829288626e-05, + "loss": 2.207, + "step": 6957000 + }, + { + "epoch": 20.14, + "learning_rate": 3.993410464523898e-05, + "loss": 2.1829, + "step": 6957500 + }, + { + "epoch": 20.14, + "learning_rate": 3.99333809975917e-05, + "loss": 2.1809, + "step": 6958000 + }, + { + "epoch": 20.14, + "learning_rate": 3.993265879723972e-05, + "loss": 2.213, + "step": 6958500 + }, + { + "epoch": 20.14, + "learning_rate": 3.993193514959244e-05, + "loss": 2.1971, + "step": 6959000 + }, + { + "epoch": 20.14, + "learning_rate": 3.993121150194516e-05, + "loss": 2.1764, + "step": 6959500 + }, + { + "epoch": 20.15, + "learning_rate": 3.9930487854297886e-05, + "loss": 2.1732, + "step": 6960000 + }, + { + "epoch": 20.15, + "learning_rate": 3.992976420665061e-05, + "loss": 2.2331, + "step": 6960500 + }, + { + "epoch": 20.15, + "learning_rate": 3.992904200629864e-05, + "loss": 2.1961, + "step": 6961000 + }, + { + "epoch": 20.15, + "learning_rate": 3.992831835865136e-05, + "loss": 2.1823, + "step": 6961500 + }, + { + "epoch": 20.15, + "learning_rate": 3.992759471100408e-05, + "loss": 2.1727, + "step": 6962000 + }, + { + "epoch": 20.15, + "learning_rate": 3.9926871063356804e-05, + "loss": 2.1899, + "step": 6962500 + }, + { + "epoch": 20.16, + "learning_rate": 3.9926147415709526e-05, + "loss": 2.1896, + "step": 6963000 + }, + { + "epoch": 20.16, + "learning_rate": 3.992542376806225e-05, + "loss": 2.1848, + "step": 6963500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992470012041497e-05, + "loss": 2.1608, + "step": 6964000 + }, + { + "epoch": 20.16, + "learning_rate": 3.992397647276769e-05, + "loss": 2.1977, + "step": 6964500 + }, + { + "epoch": 20.16, + "learning_rate": 3.9923252825120415e-05, + "loss": 2.2078, + "step": 6965000 + }, + { + "epoch": 20.16, + "learning_rate": 3.992252917747314e-05, + "loss": 2.1759, + "step": 6965500 + }, + { + "epoch": 20.16, + "learning_rate": 3.992180552982586e-05, + "loss": 2.1903, + "step": 6966000 + }, + { + "epoch": 20.17, + "learning_rate": 3.992108188217858e-05, + "loss": 2.1703, + "step": 6966500 + }, + { + "epoch": 20.17, + "learning_rate": 3.992035823453131e-05, + "loss": 2.1843, + "step": 6967000 + }, + { + "epoch": 20.17, + "learning_rate": 3.9919636034179326e-05, + "loss": 2.1936, + "step": 6967500 + }, + { + "epoch": 20.17, + "learning_rate": 3.9918912386532055e-05, + "loss": 2.185, + "step": 6968000 + }, + { + "epoch": 20.17, + "learning_rate": 3.991818873888478e-05, + "loss": 2.1762, + "step": 6968500 + }, + { + "epoch": 20.17, + "learning_rate": 3.99174650912375e-05, + "loss": 2.1699, + "step": 6969000 + }, + { + "epoch": 20.17, + "learning_rate": 3.991674144359022e-05, + "loss": 2.1831, + "step": 6969500 + }, + { + "epoch": 20.18, + "learning_rate": 3.9916017795942944e-05, + "loss": 2.1411, + "step": 6970000 + }, + { + "epoch": 20.18, + "learning_rate": 3.9915294148295666e-05, + "loss": 2.1941, + "step": 6970500 + }, + { + "epoch": 20.18, + "learning_rate": 3.991457050064839e-05, + "loss": 2.1852, + "step": 6971000 + }, + { + "epoch": 20.18, + "learning_rate": 3.991384830029641e-05, + "loss": 2.1898, + "step": 6971500 + }, + { + "epoch": 20.18, + "learning_rate": 3.991312465264913e-05, + "loss": 2.1809, + "step": 6972000 + }, + { + "epoch": 20.18, + "learning_rate": 3.9912401005001855e-05, + "loss": 2.1772, + "step": 6972500 + }, + { + "epoch": 20.18, + "learning_rate": 3.991167880464987e-05, + "loss": 2.179, + "step": 6973000 + }, + { + "epoch": 20.19, + "learning_rate": 3.9910956604297886e-05, + "loss": 2.1684, + "step": 6973500 + }, + { + "epoch": 20.19, + "learning_rate": 3.991023295665061e-05, + "loss": 2.2027, + "step": 6974000 + }, + { + "epoch": 20.19, + "learning_rate": 3.990950930900334e-05, + "loss": 2.1633, + "step": 6974500 + }, + { + "epoch": 20.19, + "learning_rate": 3.990878566135606e-05, + "loss": 2.1725, + "step": 6975000 + }, + { + "epoch": 20.19, + "learning_rate": 3.9908063461004075e-05, + "loss": 2.1788, + "step": 6975500 + }, + { + "epoch": 20.19, + "learning_rate": 3.9907339813356804e-05, + "loss": 2.1985, + "step": 6976000 + }, + { + "epoch": 20.19, + "learning_rate": 3.990661616570953e-05, + "loss": 2.1897, + "step": 6976500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990589251806225e-05, + "loss": 2.1774, + "step": 6977000 + }, + { + "epoch": 20.2, + "learning_rate": 3.990516887041497e-05, + "loss": 2.166, + "step": 6977500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990444522276769e-05, + "loss": 2.2045, + "step": 6978000 + }, + { + "epoch": 20.2, + "learning_rate": 3.9903721575120416e-05, + "loss": 2.202, + "step": 6978500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990299937476844e-05, + "loss": 2.1984, + "step": 6979000 + }, + { + "epoch": 20.2, + "learning_rate": 3.990227572712116e-05, + "loss": 2.191, + "step": 6979500 + }, + { + "epoch": 20.2, + "learning_rate": 3.990155207947388e-05, + "loss": 2.1859, + "step": 6980000 + }, + { + "epoch": 20.21, + "learning_rate": 3.9900828431826605e-05, + "loss": 2.1798, + "step": 6980500 + }, + { + "epoch": 20.21, + "learning_rate": 3.990010478417933e-05, + "loss": 2.1933, + "step": 6981000 + }, + { + "epoch": 20.21, + "learning_rate": 3.989938113653205e-05, + "loss": 2.1866, + "step": 6981500 + }, + { + "epoch": 20.21, + "learning_rate": 3.989865748888477e-05, + "loss": 2.1655, + "step": 6982000 + }, + { + "epoch": 20.21, + "learning_rate": 3.9897933841237494e-05, + "loss": 2.1898, + "step": 6982500 + }, + { + "epoch": 20.21, + "learning_rate": 3.989721164088551e-05, + "loss": 2.1785, + "step": 6983000 + }, + { + "epoch": 20.21, + "learning_rate": 3.989648799323824e-05, + "loss": 2.1879, + "step": 6983500 + }, + { + "epoch": 20.22, + "learning_rate": 3.989576434559096e-05, + "loss": 2.1776, + "step": 6984000 + }, + { + "epoch": 20.22, + "learning_rate": 3.989504069794369e-05, + "loss": 2.1721, + "step": 6984500 + }, + { + "epoch": 20.22, + "learning_rate": 3.989431705029641e-05, + "loss": 2.1749, + "step": 6985000 + }, + { + "epoch": 20.22, + "learning_rate": 3.9893593402649134e-05, + "loss": 2.1844, + "step": 6985500 + }, + { + "epoch": 20.22, + "learning_rate": 3.9892869755001856e-05, + "loss": 2.1808, + "step": 6986000 + }, + { + "epoch": 20.22, + "learning_rate": 3.989214610735458e-05, + "loss": 2.1985, + "step": 6986500 + }, + { + "epoch": 20.22, + "learning_rate": 3.9891423907002594e-05, + "loss": 2.1776, + "step": 6987000 + }, + { + "epoch": 20.23, + "learning_rate": 3.9890700259355316e-05, + "loss": 2.2041, + "step": 6987500 + }, + { + "epoch": 20.23, + "learning_rate": 3.988997661170804e-05, + "loss": 2.1811, + "step": 6988000 + }, + { + "epoch": 20.23, + "learning_rate": 3.988925296406076e-05, + "loss": 2.1988, + "step": 6988500 + }, + { + "epoch": 20.23, + "learning_rate": 3.988852931641349e-05, + "loss": 2.218, + "step": 6989000 + }, + { + "epoch": 20.23, + "learning_rate": 3.9887807116061505e-05, + "loss": 2.1924, + "step": 6989500 + }, + { + "epoch": 20.23, + "learning_rate": 3.988708346841423e-05, + "loss": 2.1868, + "step": 6990000 + }, + { + "epoch": 20.23, + "learning_rate": 3.9886359820766956e-05, + "loss": 2.1678, + "step": 6990500 + }, + { + "epoch": 20.24, + "learning_rate": 3.988563617311968e-05, + "loss": 2.1913, + "step": 6991000 + }, + { + "epoch": 20.24, + "learning_rate": 3.98849125254724e-05, + "loss": 2.2153, + "step": 6991500 + }, + { + "epoch": 20.24, + "learning_rate": 3.988418887782512e-05, + "loss": 2.1932, + "step": 6992000 + }, + { + "epoch": 20.24, + "learning_rate": 3.988346667747314e-05, + "loss": 2.1721, + "step": 6992500 + }, + { + "epoch": 20.24, + "learning_rate": 3.988274302982586e-05, + "loss": 2.1697, + "step": 6993000 + }, + { + "epoch": 20.24, + "learning_rate": 3.988201938217859e-05, + "loss": 2.1609, + "step": 6993500 + }, + { + "epoch": 20.24, + "learning_rate": 3.988129573453131e-05, + "loss": 2.1694, + "step": 6994000 + }, + { + "epoch": 20.25, + "learning_rate": 3.9880572086884034e-05, + "loss": 2.1845, + "step": 6994500 + }, + { + "epoch": 20.25, + "learning_rate": 3.9879848439236757e-05, + "loss": 2.1812, + "step": 6995000 + }, + { + "epoch": 20.25, + "learning_rate": 3.987912479158948e-05, + "loss": 2.1769, + "step": 6995500 + }, + { + "epoch": 20.25, + "learning_rate": 3.98784011439422e-05, + "loss": 2.207, + "step": 6996000 + }, + { + "epoch": 20.25, + "learning_rate": 3.987767749629492e-05, + "loss": 2.1728, + "step": 6996500 + }, + { + "epoch": 20.25, + "learning_rate": 3.9876953848647645e-05, + "loss": 2.2241, + "step": 6997000 + }, + { + "epoch": 20.25, + "learning_rate": 3.987623164829566e-05, + "loss": 2.209, + "step": 6997500 + }, + { + "epoch": 20.26, + "learning_rate": 3.987550944794369e-05, + "loss": 2.185, + "step": 6998000 + }, + { + "epoch": 20.26, + "learning_rate": 3.9874787247591706e-05, + "loss": 2.175, + "step": 6998500 + }, + { + "epoch": 20.26, + "learning_rate": 3.987406359994443e-05, + "loss": 2.195, + "step": 6999000 + }, + { + "epoch": 20.26, + "learning_rate": 3.987333995229715e-05, + "loss": 2.1832, + "step": 6999500 + }, + { + "epoch": 20.26, + "learning_rate": 3.987261630464987e-05, + "loss": 2.1897, + "step": 7000000 + }, + { + "epoch": 20.26, + "learning_rate": 3.9871892657002595e-05, + "loss": 2.1876, + "step": 7000500 + }, + { + "epoch": 20.27, + "learning_rate": 3.987117045665062e-05, + "loss": 2.202, + "step": 7001000 + }, + { + "epoch": 20.27, + "learning_rate": 3.987044680900334e-05, + "loss": 2.1758, + "step": 7001500 + }, + { + "epoch": 20.27, + "learning_rate": 3.9869724608651355e-05, + "loss": 2.1852, + "step": 7002000 + }, + { + "epoch": 20.27, + "learning_rate": 3.986900096100408e-05, + "loss": 2.1671, + "step": 7002500 + }, + { + "epoch": 20.27, + "learning_rate": 3.98682773133568e-05, + "loss": 2.1718, + "step": 7003000 + }, + { + "epoch": 20.27, + "learning_rate": 3.986755366570952e-05, + "loss": 2.1998, + "step": 7003500 + }, + { + "epoch": 20.27, + "learning_rate": 3.9866830018062244e-05, + "loss": 2.1956, + "step": 7004000 + }, + { + "epoch": 20.28, + "learning_rate": 3.9866106370414966e-05, + "loss": 2.2042, + "step": 7004500 + }, + { + "epoch": 20.28, + "learning_rate": 3.986538272276769e-05, + "loss": 2.1867, + "step": 7005000 + }, + { + "epoch": 20.28, + "learning_rate": 3.986465907512042e-05, + "loss": 2.1871, + "step": 7005500 + }, + { + "epoch": 20.28, + "learning_rate": 3.986393542747314e-05, + "loss": 2.1908, + "step": 7006000 + }, + { + "epoch": 20.28, + "learning_rate": 3.986321177982587e-05, + "loss": 2.1811, + "step": 7006500 + }, + { + "epoch": 20.28, + "learning_rate": 3.986248813217859e-05, + "loss": 2.1708, + "step": 7007000 + }, + { + "epoch": 20.28, + "learning_rate": 3.9861765931826606e-05, + "loss": 2.1658, + "step": 7007500 + }, + { + "epoch": 20.29, + "learning_rate": 3.986104228417933e-05, + "loss": 2.2123, + "step": 7008000 + }, + { + "epoch": 20.29, + "learning_rate": 3.986031863653205e-05, + "loss": 2.1934, + "step": 7008500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985959498888477e-05, + "loss": 2.1881, + "step": 7009000 + }, + { + "epoch": 20.29, + "learning_rate": 3.9858871341237495e-05, + "loss": 2.1759, + "step": 7009500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985814769359022e-05, + "loss": 2.1932, + "step": 7010000 + }, + { + "epoch": 20.29, + "learning_rate": 3.985742404594294e-05, + "loss": 2.1948, + "step": 7010500 + }, + { + "epoch": 20.29, + "learning_rate": 3.985670184559096e-05, + "loss": 2.1971, + "step": 7011000 + }, + { + "epoch": 20.3, + "learning_rate": 3.9855978197943684e-05, + "loss": 2.2012, + "step": 7011500 + }, + { + "epoch": 20.3, + "learning_rate": 3.98552559975917e-05, + "loss": 2.1917, + "step": 7012000 + }, + { + "epoch": 20.3, + "learning_rate": 3.985453234994442e-05, + "loss": 2.1855, + "step": 7012500 + }, + { + "epoch": 20.3, + "learning_rate": 3.9853808702297144e-05, + "loss": 2.1852, + "step": 7013000 + }, + { + "epoch": 20.3, + "learning_rate": 3.985308505464987e-05, + "loss": 2.2068, + "step": 7013500 + }, + { + "epoch": 20.3, + "learning_rate": 3.9852361407002595e-05, + "loss": 2.2028, + "step": 7014000 + }, + { + "epoch": 20.3, + "learning_rate": 3.985163775935532e-05, + "loss": 2.1731, + "step": 7014500 + }, + { + "epoch": 20.31, + "learning_rate": 3.985091411170804e-05, + "loss": 2.18, + "step": 7015000 + }, + { + "epoch": 20.31, + "learning_rate": 3.985019046406077e-05, + "loss": 2.1947, + "step": 7015500 + }, + { + "epoch": 20.31, + "learning_rate": 3.9849468263708784e-05, + "loss": 2.2185, + "step": 7016000 + }, + { + "epoch": 20.31, + "learning_rate": 3.984874461606151e-05, + "loss": 2.1597, + "step": 7016500 + }, + { + "epoch": 20.31, + "learning_rate": 3.984802096841423e-05, + "loss": 2.1912, + "step": 7017000 + }, + { + "epoch": 20.31, + "learning_rate": 3.984729732076695e-05, + "loss": 2.1821, + "step": 7017500 + }, + { + "epoch": 20.31, + "learning_rate": 3.984657367311967e-05, + "loss": 2.1817, + "step": 7018000 + }, + { + "epoch": 20.32, + "learning_rate": 3.9845850025472396e-05, + "loss": 2.1752, + "step": 7018500 + }, + { + "epoch": 20.32, + "learning_rate": 3.984512782512042e-05, + "loss": 2.2003, + "step": 7019000 + }, + { + "epoch": 20.32, + "learning_rate": 3.984440417747314e-05, + "loss": 2.1833, + "step": 7019500 + }, + { + "epoch": 20.32, + "learning_rate": 3.984368052982586e-05, + "loss": 2.1761, + "step": 7020000 + }, + { + "epoch": 20.32, + "learning_rate": 3.984295688217859e-05, + "loss": 2.1692, + "step": 7020500 + }, + { + "epoch": 20.32, + "learning_rate": 3.9842233234531314e-05, + "loss": 2.1949, + "step": 7021000 + }, + { + "epoch": 20.32, + "learning_rate": 3.9841509586884036e-05, + "loss": 2.1685, + "step": 7021500 + }, + { + "epoch": 20.33, + "learning_rate": 3.984078593923676e-05, + "loss": 2.1944, + "step": 7022000 + }, + { + "epoch": 20.33, + "learning_rate": 3.984006229158948e-05, + "loss": 2.1733, + "step": 7022500 + }, + { + "epoch": 20.33, + "learning_rate": 3.98393386439422e-05, + "loss": 2.1836, + "step": 7023000 + }, + { + "epoch": 20.33, + "learning_rate": 3.983861644359022e-05, + "loss": 2.1937, + "step": 7023500 + }, + { + "epoch": 20.33, + "learning_rate": 3.983789279594295e-05, + "loss": 2.2019, + "step": 7024000 + }, + { + "epoch": 20.33, + "learning_rate": 3.983716914829567e-05, + "loss": 2.185, + "step": 7024500 + }, + { + "epoch": 20.33, + "learning_rate": 3.983644550064839e-05, + "loss": 2.2017, + "step": 7025000 + }, + { + "epoch": 20.34, + "learning_rate": 3.9835721853001114e-05, + "loss": 2.1772, + "step": 7025500 + }, + { + "epoch": 20.34, + "learning_rate": 3.9834998205353836e-05, + "loss": 2.1999, + "step": 7026000 + }, + { + "epoch": 20.34, + "learning_rate": 3.983427600500185e-05, + "loss": 2.1927, + "step": 7026500 + }, + { + "epoch": 20.34, + "learning_rate": 3.9833552357354574e-05, + "loss": 2.1623, + "step": 7027000 + }, + { + "epoch": 20.34, + "learning_rate": 3.9832830157002596e-05, + "loss": 2.1911, + "step": 7027500 + }, + { + "epoch": 20.34, + "learning_rate": 3.983210650935532e-05, + "loss": 2.1765, + "step": 7028000 + }, + { + "epoch": 20.34, + "learning_rate": 3.983138286170805e-05, + "loss": 2.1804, + "step": 7028500 + }, + { + "epoch": 20.35, + "learning_rate": 3.983065921406077e-05, + "loss": 2.1886, + "step": 7029000 + }, + { + "epoch": 20.35, + "learning_rate": 3.9829937013708785e-05, + "loss": 2.1703, + "step": 7029500 + }, + { + "epoch": 20.35, + "learning_rate": 3.98292148133568e-05, + "loss": 2.1797, + "step": 7030000 + }, + { + "epoch": 20.35, + "learning_rate": 3.982849116570952e-05, + "loss": 2.2094, + "step": 7030500 + }, + { + "epoch": 20.35, + "learning_rate": 3.9827767518062245e-05, + "loss": 2.191, + "step": 7031000 + }, + { + "epoch": 20.35, + "learning_rate": 3.982704387041497e-05, + "loss": 2.1692, + "step": 7031500 + }, + { + "epoch": 20.35, + "learning_rate": 3.9826320222767697e-05, + "loss": 2.2012, + "step": 7032000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982559657512042e-05, + "loss": 2.1667, + "step": 7032500 + }, + { + "epoch": 20.36, + "learning_rate": 3.982487292747314e-05, + "loss": 2.1905, + "step": 7033000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982414927982586e-05, + "loss": 2.1896, + "step": 7033500 + }, + { + "epoch": 20.36, + "learning_rate": 3.9823425632178585e-05, + "loss": 2.1945, + "step": 7034000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982270198453131e-05, + "loss": 2.1896, + "step": 7034500 + }, + { + "epoch": 20.36, + "learning_rate": 3.982197833688403e-05, + "loss": 2.2126, + "step": 7035000 + }, + { + "epoch": 20.36, + "learning_rate": 3.982125468923676e-05, + "loss": 2.2072, + "step": 7035500 + }, + { + "epoch": 20.37, + "learning_rate": 3.982053104158948e-05, + "loss": 2.1824, + "step": 7036000 + }, + { + "epoch": 20.37, + "learning_rate": 3.98198073939422e-05, + "loss": 2.1857, + "step": 7036500 + }, + { + "epoch": 20.37, + "learning_rate": 3.981908519359022e-05, + "loss": 2.1857, + "step": 7037000 + }, + { + "epoch": 20.37, + "learning_rate": 3.981836154594295e-05, + "loss": 2.1818, + "step": 7037500 + }, + { + "epoch": 20.37, + "learning_rate": 3.981763789829567e-05, + "loss": 2.174, + "step": 7038000 + }, + { + "epoch": 20.37, + "learning_rate": 3.981691425064839e-05, + "loss": 2.154, + "step": 7038500 + }, + { + "epoch": 20.38, + "learning_rate": 3.9816190603001115e-05, + "loss": 2.1931, + "step": 7039000 + }, + { + "epoch": 20.38, + "learning_rate": 3.981546695535384e-05, + "loss": 2.1898, + "step": 7039500 + }, + { + "epoch": 20.38, + "learning_rate": 3.981474330770656e-05, + "loss": 2.1961, + "step": 7040000 + }, + { + "epoch": 20.38, + "learning_rate": 3.981401966005928e-05, + "loss": 2.1729, + "step": 7040500 + }, + { + "epoch": 20.38, + "learning_rate": 3.9813296012412004e-05, + "loss": 2.1951, + "step": 7041000 + }, + { + "epoch": 20.38, + "learning_rate": 3.9812572364764726e-05, + "loss": 2.1889, + "step": 7041500 + }, + { + "epoch": 20.38, + "learning_rate": 3.981184871711745e-05, + "loss": 2.1871, + "step": 7042000 + }, + { + "epoch": 20.39, + "learning_rate": 3.981112651676547e-05, + "loss": 2.2107, + "step": 7042500 + }, + { + "epoch": 20.39, + "learning_rate": 3.98104028691182e-05, + "loss": 2.1899, + "step": 7043000 + }, + { + "epoch": 20.39, + "learning_rate": 3.9809680668766215e-05, + "loss": 2.1718, + "step": 7043500 + }, + { + "epoch": 20.39, + "learning_rate": 3.980895702111894e-05, + "loss": 2.1903, + "step": 7044000 + }, + { + "epoch": 20.39, + "learning_rate": 3.980823337347166e-05, + "loss": 2.1603, + "step": 7044500 + }, + { + "epoch": 20.39, + "learning_rate": 3.980750972582438e-05, + "loss": 2.1761, + "step": 7045000 + }, + { + "epoch": 20.39, + "learning_rate": 3.9806786078177104e-05, + "loss": 2.1583, + "step": 7045500 + }, + { + "epoch": 20.4, + "learning_rate": 3.9806062430529826e-05, + "loss": 2.1942, + "step": 7046000 + }, + { + "epoch": 20.4, + "learning_rate": 3.980534023017785e-05, + "loss": 2.1957, + "step": 7046500 + }, + { + "epoch": 20.4, + "learning_rate": 3.980461658253057e-05, + "loss": 2.194, + "step": 7047000 + }, + { + "epoch": 20.4, + "learning_rate": 3.980389293488329e-05, + "loss": 2.1757, + "step": 7047500 + }, + { + "epoch": 20.4, + "learning_rate": 3.9803169287236015e-05, + "loss": 2.1898, + "step": 7048000 + }, + { + "epoch": 20.4, + "learning_rate": 3.980244563958874e-05, + "loss": 2.2045, + "step": 7048500 + }, + { + "epoch": 20.4, + "learning_rate": 3.980172343923675e-05, + "loss": 2.1692, + "step": 7049000 + }, + { + "epoch": 20.41, + "learning_rate": 3.9800999791589475e-05, + "loss": 2.1962, + "step": 7049500 + }, + { + "epoch": 20.41, + "learning_rate": 3.98002761439422e-05, + "loss": 2.1654, + "step": 7050000 + }, + { + "epoch": 20.41, + "learning_rate": 3.9799552496294926e-05, + "loss": 2.1826, + "step": 7050500 + }, + { + "epoch": 20.41, + "learning_rate": 3.979882884864765e-05, + "loss": 2.189, + "step": 7051000 + }, + { + "epoch": 20.41, + "learning_rate": 3.979810520100037e-05, + "loss": 2.203, + "step": 7051500 + }, + { + "epoch": 20.41, + "learning_rate": 3.97973815533531e-05, + "loss": 2.1781, + "step": 7052000 + }, + { + "epoch": 20.41, + "learning_rate": 3.979665790570582e-05, + "loss": 2.1805, + "step": 7052500 + }, + { + "epoch": 20.42, + "learning_rate": 3.9795934258058544e-05, + "loss": 2.1746, + "step": 7053000 + }, + { + "epoch": 20.42, + "learning_rate": 3.979521205770656e-05, + "loss": 2.1918, + "step": 7053500 + }, + { + "epoch": 20.42, + "learning_rate": 3.979448841005928e-05, + "loss": 2.2041, + "step": 7054000 + }, + { + "epoch": 20.42, + "learning_rate": 3.9793764762412004e-05, + "loss": 2.2015, + "step": 7054500 + }, + { + "epoch": 20.42, + "learning_rate": 3.9793041114764727e-05, + "loss": 2.1931, + "step": 7055000 + }, + { + "epoch": 20.42, + "learning_rate": 3.979231746711745e-05, + "loss": 2.1616, + "step": 7055500 + }, + { + "epoch": 20.42, + "learning_rate": 3.979159381947017e-05, + "loss": 2.1781, + "step": 7056000 + }, + { + "epoch": 20.43, + "learning_rate": 3.9790871619118193e-05, + "loss": 2.2021, + "step": 7056500 + }, + { + "epoch": 20.43, + "learning_rate": 3.9790147971470916e-05, + "loss": 2.2019, + "step": 7057000 + }, + { + "epoch": 20.43, + "learning_rate": 3.9789424323823645e-05, + "loss": 2.1837, + "step": 7057500 + }, + { + "epoch": 20.43, + "learning_rate": 3.978870067617637e-05, + "loss": 2.1752, + "step": 7058000 + }, + { + "epoch": 20.43, + "learning_rate": 3.978797702852909e-05, + "loss": 2.1906, + "step": 7058500 + }, + { + "epoch": 20.43, + "learning_rate": 3.9787254828177105e-05, + "loss": 2.2004, + "step": 7059000 + }, + { + "epoch": 20.43, + "learning_rate": 3.978653118052983e-05, + "loss": 2.1731, + "step": 7059500 + }, + { + "epoch": 20.44, + "learning_rate": 3.978580753288255e-05, + "loss": 2.1845, + "step": 7060000 + }, + { + "epoch": 20.44, + "learning_rate": 3.978508388523527e-05, + "loss": 2.1708, + "step": 7060500 + }, + { + "epoch": 20.44, + "learning_rate": 3.9784361684883294e-05, + "loss": 2.2081, + "step": 7061000 + }, + { + "epoch": 20.44, + "learning_rate": 3.978363948453131e-05, + "loss": 2.1721, + "step": 7061500 + }, + { + "epoch": 20.44, + "learning_rate": 3.9782917284179325e-05, + "loss": 2.2073, + "step": 7062000 + }, + { + "epoch": 20.44, + "learning_rate": 3.978219363653205e-05, + "loss": 2.1906, + "step": 7062500 + }, + { + "epoch": 20.44, + "learning_rate": 3.9781469988884776e-05, + "loss": 2.1897, + "step": 7063000 + }, + { + "epoch": 20.45, + "learning_rate": 3.97807463412375e-05, + "loss": 2.203, + "step": 7063500 + }, + { + "epoch": 20.45, + "learning_rate": 3.978002269359022e-05, + "loss": 2.1499, + "step": 7064000 + }, + { + "epoch": 20.45, + "learning_rate": 3.977929904594294e-05, + "loss": 2.2014, + "step": 7064500 + }, + { + "epoch": 20.45, + "learning_rate": 3.9778575398295665e-05, + "loss": 2.1797, + "step": 7065000 + }, + { + "epoch": 20.45, + "learning_rate": 3.9777851750648394e-05, + "loss": 2.1962, + "step": 7065500 + }, + { + "epoch": 20.45, + "learning_rate": 3.9777128103001116e-05, + "loss": 2.1929, + "step": 7066000 + }, + { + "epoch": 20.45, + "learning_rate": 3.977640590264913e-05, + "loss": 2.1796, + "step": 7066500 + }, + { + "epoch": 20.46, + "learning_rate": 3.9775682255001854e-05, + "loss": 2.1689, + "step": 7067000 + }, + { + "epoch": 20.46, + "learning_rate": 3.9774958607354576e-05, + "loss": 2.2087, + "step": 7067500 + }, + { + "epoch": 20.46, + "learning_rate": 3.97742349597073e-05, + "loss": 2.2148, + "step": 7068000 + }, + { + "epoch": 20.46, + "learning_rate": 3.977351131206003e-05, + "loss": 2.1938, + "step": 7068500 + }, + { + "epoch": 20.46, + "learning_rate": 3.977278766441275e-05, + "loss": 2.2049, + "step": 7069000 + }, + { + "epoch": 20.46, + "learning_rate": 3.977206401676547e-05, + "loss": 2.192, + "step": 7069500 + }, + { + "epoch": 20.46, + "learning_rate": 3.977134181641349e-05, + "loss": 2.1895, + "step": 7070000 + }, + { + "epoch": 20.47, + "learning_rate": 3.977061816876621e-05, + "loss": 2.1864, + "step": 7070500 + }, + { + "epoch": 20.47, + "learning_rate": 3.976989452111893e-05, + "loss": 2.1879, + "step": 7071000 + }, + { + "epoch": 20.47, + "learning_rate": 3.9769170873471654e-05, + "loss": 2.2107, + "step": 7071500 + }, + { + "epoch": 20.47, + "learning_rate": 3.9768448673119677e-05, + "loss": 2.2035, + "step": 7072000 + }, + { + "epoch": 20.47, + "learning_rate": 3.97677250254724e-05, + "loss": 2.1805, + "step": 7072500 + }, + { + "epoch": 20.47, + "learning_rate": 3.976700137782513e-05, + "loss": 2.1792, + "step": 7073000 + }, + { + "epoch": 20.47, + "learning_rate": 3.976627773017785e-05, + "loss": 2.1931, + "step": 7073500 + }, + { + "epoch": 20.48, + "learning_rate": 3.9765555529825866e-05, + "loss": 2.2143, + "step": 7074000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976483188217859e-05, + "loss": 2.195, + "step": 7074500 + }, + { + "epoch": 20.48, + "learning_rate": 3.976410823453131e-05, + "loss": 2.187, + "step": 7075000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976338458688403e-05, + "loss": 2.2038, + "step": 7075500 + }, + { + "epoch": 20.48, + "learning_rate": 3.9762660939236755e-05, + "loss": 2.1757, + "step": 7076000 + }, + { + "epoch": 20.48, + "learning_rate": 3.976193729158948e-05, + "loss": 2.2237, + "step": 7076500 + }, + { + "epoch": 20.49, + "learning_rate": 3.97612136439422e-05, + "loss": 2.1807, + "step": 7077000 + }, + { + "epoch": 20.49, + "learning_rate": 3.976048999629493e-05, + "loss": 2.1902, + "step": 7077500 + }, + { + "epoch": 20.49, + "learning_rate": 3.975976634864765e-05, + "loss": 2.2015, + "step": 7078000 + }, + { + "epoch": 20.49, + "learning_rate": 3.975904270100037e-05, + "loss": 2.1906, + "step": 7078500 + }, + { + "epoch": 20.49, + "learning_rate": 3.975832050064839e-05, + "loss": 2.2108, + "step": 7079000 + }, + { + "epoch": 20.49, + "learning_rate": 3.975759685300111e-05, + "loss": 2.2003, + "step": 7079500 + }, + { + "epoch": 20.49, + "learning_rate": 3.975687320535383e-05, + "loss": 2.2145, + "step": 7080000 + }, + { + "epoch": 20.5, + "learning_rate": 3.975614955770656e-05, + "loss": 2.2078, + "step": 7080500 + }, + { + "epoch": 20.5, + "learning_rate": 3.9755425910059284e-05, + "loss": 2.2111, + "step": 7081000 + }, + { + "epoch": 20.5, + "learning_rate": 3.9754703709707306e-05, + "loss": 2.1868, + "step": 7081500 + }, + { + "epoch": 20.5, + "learning_rate": 3.975398006206003e-05, + "loss": 2.1746, + "step": 7082000 + }, + { + "epoch": 20.5, + "learning_rate": 3.975325641441275e-05, + "loss": 2.1731, + "step": 7082500 + }, + { + "epoch": 20.5, + "learning_rate": 3.975253276676547e-05, + "loss": 2.1799, + "step": 7083000 + }, + { + "epoch": 20.5, + "learning_rate": 3.975181056641349e-05, + "loss": 2.1802, + "step": 7083500 + }, + { + "epoch": 20.51, + "learning_rate": 3.975108691876621e-05, + "loss": 2.192, + "step": 7084000 + }, + { + "epoch": 20.51, + "learning_rate": 3.975036327111893e-05, + "loss": 2.2028, + "step": 7084500 + }, + { + "epoch": 20.51, + "learning_rate": 3.9749639623471655e-05, + "loss": 2.1753, + "step": 7085000 + }, + { + "epoch": 20.51, + "learning_rate": 3.974891597582438e-05, + "loss": 2.1644, + "step": 7085500 + }, + { + "epoch": 20.51, + "learning_rate": 3.9748192328177106e-05, + "loss": 2.1746, + "step": 7086000 + }, + { + "epoch": 20.51, + "learning_rate": 3.974746868052983e-05, + "loss": 2.1841, + "step": 7086500 + }, + { + "epoch": 20.51, + "learning_rate": 3.974674792747314e-05, + "loss": 2.2011, + "step": 7087000 + }, + { + "epoch": 20.52, + "learning_rate": 3.974602427982586e-05, + "loss": 2.1821, + "step": 7087500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974530063217859e-05, + "loss": 2.1987, + "step": 7088000 + }, + { + "epoch": 20.52, + "learning_rate": 3.974457698453131e-05, + "loss": 2.1808, + "step": 7088500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974385333688403e-05, + "loss": 2.2231, + "step": 7089000 + }, + { + "epoch": 20.52, + "learning_rate": 3.9743129689236755e-05, + "loss": 2.2048, + "step": 7089500 + }, + { + "epoch": 20.52, + "learning_rate": 3.974240604158948e-05, + "loss": 2.1878, + "step": 7090000 + }, + { + "epoch": 20.52, + "learning_rate": 3.9741682393942207e-05, + "loss": 2.1903, + "step": 7090500 + }, + { + "epoch": 20.53, + "learning_rate": 3.974096019359022e-05, + "loss": 2.2219, + "step": 7091000 + }, + { + "epoch": 20.53, + "learning_rate": 3.9740236545942944e-05, + "loss": 2.1865, + "step": 7091500 + }, + { + "epoch": 20.53, + "learning_rate": 3.9739512898295667e-05, + "loss": 2.1993, + "step": 7092000 + }, + { + "epoch": 20.53, + "learning_rate": 3.973879069794368e-05, + "loss": 2.1897, + "step": 7092500 + }, + { + "epoch": 20.53, + "learning_rate": 3.9738067050296404e-05, + "loss": 2.1883, + "step": 7093000 + }, + { + "epoch": 20.53, + "learning_rate": 3.9737343402649127e-05, + "loss": 2.1794, + "step": 7093500 + }, + { + "epoch": 20.53, + "learning_rate": 3.9736619755001856e-05, + "loss": 2.1779, + "step": 7094000 + }, + { + "epoch": 20.54, + "learning_rate": 3.973589610735458e-05, + "loss": 2.1929, + "step": 7094500 + }, + { + "epoch": 20.54, + "learning_rate": 3.97351724597073e-05, + "loss": 2.1846, + "step": 7095000 + }, + { + "epoch": 20.54, + "learning_rate": 3.973444881206003e-05, + "loss": 2.1924, + "step": 7095500 + }, + { + "epoch": 20.54, + "learning_rate": 3.9733726611708045e-05, + "loss": 2.1688, + "step": 7096000 + }, + { + "epoch": 20.54, + "learning_rate": 3.973300296406077e-05, + "loss": 2.2002, + "step": 7096500 + }, + { + "epoch": 20.54, + "learning_rate": 3.973227931641349e-05, + "loss": 2.1993, + "step": 7097000 + }, + { + "epoch": 20.54, + "learning_rate": 3.973155566876621e-05, + "loss": 2.1994, + "step": 7097500 + }, + { + "epoch": 20.55, + "learning_rate": 3.9730832021118934e-05, + "loss": 2.1679, + "step": 7098000 + }, + { + "epoch": 20.55, + "learning_rate": 3.9730108373471656e-05, + "loss": 2.1774, + "step": 7098500 + }, + { + "epoch": 20.55, + "learning_rate": 3.972938472582438e-05, + "loss": 2.1987, + "step": 7099000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972866107817711e-05, + "loss": 2.1917, + "step": 7099500 + }, + { + "epoch": 20.55, + "learning_rate": 3.972793743052983e-05, + "loss": 2.2247, + "step": 7100000 + }, + { + "epoch": 20.55, + "learning_rate": 3.972721378288255e-05, + "loss": 2.1916, + "step": 7100500 + }, + { + "epoch": 20.55, + "learning_rate": 3.9726490135235274e-05, + "loss": 2.2065, + "step": 7101000 + }, + { + "epoch": 20.56, + "learning_rate": 3.9725766487587996e-05, + "loss": 2.1739, + "step": 7101500 + }, + { + "epoch": 20.56, + "learning_rate": 3.972504283994072e-05, + "loss": 2.1685, + "step": 7102000 + }, + { + "epoch": 20.56, + "learning_rate": 3.972431919229345e-05, + "loss": 2.2143, + "step": 7102500 + }, + { + "epoch": 20.56, + "learning_rate": 3.972359699194146e-05, + "loss": 2.1757, + "step": 7103000 + }, + { + "epoch": 20.56, + "learning_rate": 3.9722873344294185e-05, + "loss": 2.2041, + "step": 7103500 + }, + { + "epoch": 20.56, + "learning_rate": 3.972214969664691e-05, + "loss": 2.1868, + "step": 7104000 + }, + { + "epoch": 20.56, + "learning_rate": 3.972142604899963e-05, + "loss": 2.1786, + "step": 7104500 + }, + { + "epoch": 20.57, + "learning_rate": 3.972070240135236e-05, + "loss": 2.1694, + "step": 7105000 + }, + { + "epoch": 20.57, + "learning_rate": 3.971997875370508e-05, + "loss": 2.1881, + "step": 7105500 + }, + { + "epoch": 20.57, + "learning_rate": 3.97192551060578e-05, + "loss": 2.1967, + "step": 7106000 + }, + { + "epoch": 20.57, + "learning_rate": 3.971853290570582e-05, + "loss": 2.1878, + "step": 7106500 + }, + { + "epoch": 20.57, + "learning_rate": 3.9717810705353834e-05, + "loss": 2.1783, + "step": 7107000 + }, + { + "epoch": 20.57, + "learning_rate": 3.9717087057706556e-05, + "loss": 2.1804, + "step": 7107500 + }, + { + "epoch": 20.57, + "learning_rate": 3.971636341005928e-05, + "loss": 2.1845, + "step": 7108000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971563976241201e-05, + "loss": 2.179, + "step": 7108500 + }, + { + "epoch": 20.58, + "learning_rate": 3.971491611476473e-05, + "loss": 2.178, + "step": 7109000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971419246711745e-05, + "loss": 2.1881, + "step": 7109500 + }, + { + "epoch": 20.58, + "learning_rate": 3.971346881947018e-05, + "loss": 2.1806, + "step": 7110000 + }, + { + "epoch": 20.58, + "learning_rate": 3.97127451718229e-05, + "loss": 2.1826, + "step": 7110500 + }, + { + "epoch": 20.58, + "learning_rate": 3.9712021524175626e-05, + "loss": 2.1917, + "step": 7111000 + }, + { + "epoch": 20.58, + "learning_rate": 3.971129787652835e-05, + "loss": 2.1799, + "step": 7111500 + }, + { + "epoch": 20.59, + "learning_rate": 3.971057567617636e-05, + "loss": 2.2012, + "step": 7112000 + }, + { + "epoch": 20.59, + "learning_rate": 3.9709853475824386e-05, + "loss": 2.1971, + "step": 7112500 + }, + { + "epoch": 20.59, + "learning_rate": 3.970912982817711e-05, + "loss": 2.1988, + "step": 7113000 + }, + { + "epoch": 20.59, + "learning_rate": 3.970840618052983e-05, + "loss": 2.1677, + "step": 7113500 + }, + { + "epoch": 20.59, + "learning_rate": 3.970768253288255e-05, + "loss": 2.2067, + "step": 7114000 + }, + { + "epoch": 20.59, + "learning_rate": 3.9706958885235275e-05, + "loss": 2.1956, + "step": 7114500 + }, + { + "epoch": 20.6, + "learning_rate": 3.9706235237588e-05, + "loss": 2.1921, + "step": 7115000 + }, + { + "epoch": 20.6, + "learning_rate": 3.970551158994072e-05, + "loss": 2.1682, + "step": 7115500 + }, + { + "epoch": 20.6, + "learning_rate": 3.9704789389588735e-05, + "loss": 2.1757, + "step": 7116000 + }, + { + "epoch": 20.6, + "learning_rate": 3.970406574194146e-05, + "loss": 2.2008, + "step": 7116500 + }, + { + "epoch": 20.6, + "learning_rate": 3.9703342094294186e-05, + "loss": 2.2106, + "step": 7117000 + }, + { + "epoch": 20.6, + "learning_rate": 3.970261844664691e-05, + "loss": 2.1789, + "step": 7117500 + }, + { + "epoch": 20.6, + "learning_rate": 3.970189479899964e-05, + "loss": 2.2042, + "step": 7118000 + }, + { + "epoch": 20.61, + "learning_rate": 3.970117115135236e-05, + "loss": 2.1878, + "step": 7118500 + }, + { + "epoch": 20.61, + "learning_rate": 3.970044750370508e-05, + "loss": 2.1809, + "step": 7119000 + }, + { + "epoch": 20.61, + "learning_rate": 3.9699723856057804e-05, + "loss": 2.1625, + "step": 7119500 + }, + { + "epoch": 20.61, + "learning_rate": 3.969900165570582e-05, + "loss": 2.1908, + "step": 7120000 + }, + { + "epoch": 20.61, + "learning_rate": 3.969827800805854e-05, + "loss": 2.209, + "step": 7120500 + }, + { + "epoch": 20.61, + "learning_rate": 3.9697554360411264e-05, + "loss": 2.192, + "step": 7121000 + }, + { + "epoch": 20.61, + "learning_rate": 3.9696830712763986e-05, + "loss": 2.187, + "step": 7121500 + }, + { + "epoch": 20.62, + "learning_rate": 3.969610706511671e-05, + "loss": 2.2004, + "step": 7122000 + }, + { + "epoch": 20.62, + "learning_rate": 3.969538486476473e-05, + "loss": 2.1871, + "step": 7122500 + }, + { + "epoch": 20.62, + "learning_rate": 3.9694662664412746e-05, + "loss": 2.1944, + "step": 7123000 + }, + { + "epoch": 20.62, + "learning_rate": 3.969393901676547e-05, + "loss": 2.1808, + "step": 7123500 + }, + { + "epoch": 20.62, + "learning_rate": 3.9693216816413484e-05, + "loss": 2.207, + "step": 7124000 + }, + { + "epoch": 20.62, + "learning_rate": 3.9692493168766206e-05, + "loss": 2.1933, + "step": 7124500 + }, + { + "epoch": 20.62, + "learning_rate": 3.9691769521118935e-05, + "loss": 2.2003, + "step": 7125000 + }, + { + "epoch": 20.63, + "learning_rate": 3.969104587347166e-05, + "loss": 2.1714, + "step": 7125500 + }, + { + "epoch": 20.63, + "learning_rate": 3.9690322225824386e-05, + "loss": 2.1927, + "step": 7126000 + }, + { + "epoch": 20.63, + "learning_rate": 3.968959857817711e-05, + "loss": 2.1798, + "step": 7126500 + }, + { + "epoch": 20.63, + "learning_rate": 3.968887493052983e-05, + "loss": 2.1889, + "step": 7127000 + }, + { + "epoch": 20.63, + "learning_rate": 3.968815128288255e-05, + "loss": 2.1951, + "step": 7127500 + }, + { + "epoch": 20.63, + "learning_rate": 3.9687427635235275e-05, + "loss": 2.1823, + "step": 7128000 + }, + { + "epoch": 20.63, + "learning_rate": 3.9686703987588e-05, + "loss": 2.1805, + "step": 7128500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968598033994072e-05, + "loss": 2.2102, + "step": 7129000 + }, + { + "epoch": 20.64, + "learning_rate": 3.968525669229344e-05, + "loss": 2.1742, + "step": 7129500 + }, + { + "epoch": 20.64, + "learning_rate": 3.9684533044646164e-05, + "loss": 2.1756, + "step": 7130000 + }, + { + "epoch": 20.64, + "learning_rate": 3.9683809396998887e-05, + "loss": 2.1813, + "step": 7130500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968308574935161e-05, + "loss": 2.1899, + "step": 7131000 + }, + { + "epoch": 20.64, + "learning_rate": 3.968236210170434e-05, + "loss": 2.1827, + "step": 7131500 + }, + { + "epoch": 20.64, + "learning_rate": 3.968163845405706e-05, + "loss": 2.1851, + "step": 7132000 + }, + { + "epoch": 20.65, + "learning_rate": 3.968091480640979e-05, + "loss": 2.1713, + "step": 7132500 + }, + { + "epoch": 20.65, + "learning_rate": 3.9680192606057805e-05, + "loss": 2.2002, + "step": 7133000 + }, + { + "epoch": 20.65, + "learning_rate": 3.967947040570582e-05, + "loss": 2.1963, + "step": 7133500 + }, + { + "epoch": 20.65, + "learning_rate": 3.967874675805854e-05, + "loss": 2.1959, + "step": 7134000 + }, + { + "epoch": 20.65, + "learning_rate": 3.9678023110411265e-05, + "loss": 2.1962, + "step": 7134500 + }, + { + "epoch": 20.65, + "learning_rate": 3.967729946276399e-05, + "loss": 2.1916, + "step": 7135000 + }, + { + "epoch": 20.65, + "learning_rate": 3.967657581511671e-05, + "loss": 2.1807, + "step": 7135500 + }, + { + "epoch": 20.66, + "learning_rate": 3.967585216746944e-05, + "loss": 2.2127, + "step": 7136000 + }, + { + "epoch": 20.66, + "learning_rate": 3.967512851982216e-05, + "loss": 2.1899, + "step": 7136500 + }, + { + "epoch": 20.66, + "learning_rate": 3.967440487217488e-05, + "loss": 2.1724, + "step": 7137000 + }, + { + "epoch": 20.66, + "learning_rate": 3.9673681224527605e-05, + "loss": 2.2133, + "step": 7137500 + }, + { + "epoch": 20.66, + "learning_rate": 3.967295757688033e-05, + "loss": 2.1579, + "step": 7138000 + }, + { + "epoch": 20.66, + "learning_rate": 3.967223392923305e-05, + "loss": 2.1966, + "step": 7138500 + }, + { + "epoch": 20.66, + "learning_rate": 3.967151028158577e-05, + "loss": 2.1854, + "step": 7139000 + }, + { + "epoch": 20.67, + "learning_rate": 3.96707866339385e-05, + "loss": 2.2129, + "step": 7139500 + }, + { + "epoch": 20.67, + "learning_rate": 3.9670064433586516e-05, + "loss": 2.1787, + "step": 7140000 + }, + { + "epoch": 20.67, + "learning_rate": 3.966934078593924e-05, + "loss": 2.1755, + "step": 7140500 + }, + { + "epoch": 20.67, + "learning_rate": 3.966861713829196e-05, + "loss": 2.1842, + "step": 7141000 + }, + { + "epoch": 20.67, + "learning_rate": 3.966789493793998e-05, + "loss": 2.1688, + "step": 7141500 + }, + { + "epoch": 20.67, + "learning_rate": 3.9667171290292705e-05, + "loss": 2.1846, + "step": 7142000 + }, + { + "epoch": 20.67, + "learning_rate": 3.966644764264543e-05, + "loss": 2.1878, + "step": 7142500 + }, + { + "epoch": 20.68, + "learning_rate": 3.966572399499815e-05, + "loss": 2.2005, + "step": 7143000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966500034735087e-05, + "loss": 2.1735, + "step": 7143500 + }, + { + "epoch": 20.68, + "learning_rate": 3.9664276699703594e-05, + "loss": 2.1811, + "step": 7144000 + }, + { + "epoch": 20.68, + "learning_rate": 3.9663553052056316e-05, + "loss": 2.1703, + "step": 7144500 + }, + { + "epoch": 20.68, + "learning_rate": 3.966282940440904e-05, + "loss": 2.2097, + "step": 7145000 + }, + { + "epoch": 20.68, + "learning_rate": 3.966210575676176e-05, + "loss": 2.1886, + "step": 7145500 + }, + { + "epoch": 20.68, + "learning_rate": 3.966138210911449e-05, + "loss": 2.1999, + "step": 7146000 + }, + { + "epoch": 20.69, + "learning_rate": 3.9660659908762505e-05, + "loss": 2.1974, + "step": 7146500 + }, + { + "epoch": 20.69, + "learning_rate": 3.9659936261115234e-05, + "loss": 2.1876, + "step": 7147000 + }, + { + "epoch": 20.69, + "learning_rate": 3.965921406076325e-05, + "loss": 2.1884, + "step": 7147500 + }, + { + "epoch": 20.69, + "learning_rate": 3.965849041311597e-05, + "loss": 2.1904, + "step": 7148000 + }, + { + "epoch": 20.69, + "learning_rate": 3.965776821276399e-05, + "loss": 2.1854, + "step": 7148500 + }, + { + "epoch": 20.69, + "learning_rate": 3.965704456511672e-05, + "loss": 2.189, + "step": 7149000 + }, + { + "epoch": 20.69, + "learning_rate": 3.965632091746944e-05, + "loss": 2.1912, + "step": 7149500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965559726982216e-05, + "loss": 2.1867, + "step": 7150000 + }, + { + "epoch": 20.7, + "learning_rate": 3.965487362217488e-05, + "loss": 2.1754, + "step": 7150500 + }, + { + "epoch": 20.7, + "learning_rate": 3.9654149974527606e-05, + "loss": 2.1954, + "step": 7151000 + }, + { + "epoch": 20.7, + "learning_rate": 3.965342632688033e-05, + "loss": 2.1747, + "step": 7151500 + }, + { + "epoch": 20.7, + "learning_rate": 3.965270267923305e-05, + "loss": 2.1966, + "step": 7152000 + }, + { + "epoch": 20.7, + "learning_rate": 3.965197903158577e-05, + "loss": 2.2262, + "step": 7152500 + }, + { + "epoch": 20.71, + "learning_rate": 3.9651255383938494e-05, + "loss": 2.1811, + "step": 7153000 + }, + { + "epoch": 20.71, + "learning_rate": 3.965053173629122e-05, + "loss": 2.2038, + "step": 7153500 + }, + { + "epoch": 20.71, + "learning_rate": 3.964980808864394e-05, + "loss": 2.1834, + "step": 7154000 + }, + { + "epoch": 20.71, + "learning_rate": 3.964908444099667e-05, + "loss": 2.1938, + "step": 7154500 + }, + { + "epoch": 20.71, + "learning_rate": 3.964836079334939e-05, + "loss": 2.1805, + "step": 7155000 + }, + { + "epoch": 20.71, + "learning_rate": 3.964763714570211e-05, + "loss": 2.1713, + "step": 7155500 + }, + { + "epoch": 20.71, + "learning_rate": 3.9646914945350135e-05, + "loss": 2.2008, + "step": 7156000 + }, + { + "epoch": 20.72, + "learning_rate": 3.964619274499815e-05, + "loss": 2.2036, + "step": 7156500 + }, + { + "epoch": 20.72, + "learning_rate": 3.964546909735087e-05, + "loss": 2.2091, + "step": 7157000 + }, + { + "epoch": 20.72, + "learning_rate": 3.964474689699889e-05, + "loss": 2.2039, + "step": 7157500 + }, + { + "epoch": 20.72, + "learning_rate": 3.964402324935162e-05, + "loss": 2.2009, + "step": 7158000 + }, + { + "epoch": 20.72, + "learning_rate": 3.964329960170434e-05, + "loss": 2.1991, + "step": 7158500 + }, + { + "epoch": 20.72, + "learning_rate": 3.964257595405706e-05, + "loss": 2.1926, + "step": 7159000 + }, + { + "epoch": 20.72, + "learning_rate": 3.9641852306409784e-05, + "loss": 2.1953, + "step": 7159500 + }, + { + "epoch": 20.73, + "learning_rate": 3.9641128658762506e-05, + "loss": 2.1746, + "step": 7160000 + }, + { + "epoch": 20.73, + "learning_rate": 3.964040501111523e-05, + "loss": 2.1994, + "step": 7160500 + }, + { + "epoch": 20.73, + "learning_rate": 3.963968136346795e-05, + "loss": 2.183, + "step": 7161000 + }, + { + "epoch": 20.73, + "learning_rate": 3.9638959163115966e-05, + "loss": 2.1718, + "step": 7161500 + }, + { + "epoch": 20.73, + "learning_rate": 3.963823551546869e-05, + "loss": 2.2027, + "step": 7162000 + }, + { + "epoch": 20.73, + "learning_rate": 3.963751186782142e-05, + "loss": 2.2022, + "step": 7162500 + }, + { + "epoch": 20.73, + "learning_rate": 3.963678822017414e-05, + "loss": 2.1776, + "step": 7163000 + }, + { + "epoch": 20.74, + "learning_rate": 3.963606457252687e-05, + "loss": 2.1903, + "step": 7163500 + }, + { + "epoch": 20.74, + "learning_rate": 3.9635342372174884e-05, + "loss": 2.193, + "step": 7164000 + }, + { + "epoch": 20.74, + "learning_rate": 3.9634618724527606e-05, + "loss": 2.2214, + "step": 7164500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963389507688033e-05, + "loss": 2.1937, + "step": 7165000 + }, + { + "epoch": 20.74, + "learning_rate": 3.963317142923305e-05, + "loss": 2.1901, + "step": 7165500 + }, + { + "epoch": 20.74, + "learning_rate": 3.963244778158577e-05, + "loss": 2.1934, + "step": 7166000 + }, + { + "epoch": 20.74, + "learning_rate": 3.963172558123379e-05, + "loss": 2.1893, + "step": 7166500 + }, + { + "epoch": 20.75, + "learning_rate": 3.963100193358652e-05, + "loss": 2.1762, + "step": 7167000 + }, + { + "epoch": 20.75, + "learning_rate": 3.963027828593924e-05, + "loss": 2.1888, + "step": 7167500 + }, + { + "epoch": 20.75, + "learning_rate": 3.962955463829196e-05, + "loss": 2.2067, + "step": 7168000 + }, + { + "epoch": 20.75, + "learning_rate": 3.9628830990644684e-05, + "loss": 2.199, + "step": 7168500 + }, + { + "epoch": 20.75, + "learning_rate": 3.96281087902927e-05, + "loss": 2.2017, + "step": 7169000 + }, + { + "epoch": 20.75, + "learning_rate": 3.962738514264542e-05, + "loss": 2.1802, + "step": 7169500 + }, + { + "epoch": 20.75, + "learning_rate": 3.962666149499815e-05, + "loss": 2.2185, + "step": 7170000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962593784735087e-05, + "loss": 2.185, + "step": 7170500 + }, + { + "epoch": 20.76, + "learning_rate": 3.9625214199703596e-05, + "loss": 2.1653, + "step": 7171000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962449055205632e-05, + "loss": 2.2023, + "step": 7171500 + }, + { + "epoch": 20.76, + "learning_rate": 3.962376690440904e-05, + "loss": 2.1924, + "step": 7172000 + }, + { + "epoch": 20.76, + "learning_rate": 3.962304325676177e-05, + "loss": 2.2058, + "step": 7172500 + }, + { + "epoch": 20.76, + "learning_rate": 3.962231960911449e-05, + "loss": 2.1963, + "step": 7173000 + }, + { + "epoch": 20.76, + "learning_rate": 3.9621595961467214e-05, + "loss": 2.1966, + "step": 7173500 + }, + { + "epoch": 20.77, + "learning_rate": 3.9620872313819936e-05, + "loss": 2.2057, + "step": 7174000 + }, + { + "epoch": 20.77, + "learning_rate": 3.962014866617266e-05, + "loss": 2.1787, + "step": 7174500 + }, + { + "epoch": 20.77, + "learning_rate": 3.9619426465820674e-05, + "loss": 2.198, + "step": 7175000 + }, + { + "epoch": 20.77, + "learning_rate": 3.961870426546869e-05, + "loss": 2.1983, + "step": 7175500 + }, + { + "epoch": 20.77, + "learning_rate": 3.961798206511671e-05, + "loss": 2.1938, + "step": 7176000 + }, + { + "epoch": 20.77, + "learning_rate": 3.9617258417469434e-05, + "loss": 2.2027, + "step": 7176500 + }, + { + "epoch": 20.77, + "learning_rate": 3.9616534769822156e-05, + "loss": 2.1954, + "step": 7177000 + }, + { + "epoch": 20.78, + "learning_rate": 3.9615811122174885e-05, + "loss": 2.2239, + "step": 7177500 + }, + { + "epoch": 20.78, + "learning_rate": 3.961508747452761e-05, + "loss": 2.1961, + "step": 7178000 + }, + { + "epoch": 20.78, + "learning_rate": 3.961436382688033e-05, + "loss": 2.1883, + "step": 7178500 + }, + { + "epoch": 20.78, + "learning_rate": 3.9613641626528345e-05, + "loss": 2.1904, + "step": 7179000 + }, + { + "epoch": 20.78, + "learning_rate": 3.961291797888107e-05, + "loss": 2.2041, + "step": 7179500 + }, + { + "epoch": 20.78, + "learning_rate": 3.9612194331233796e-05, + "loss": 2.1883, + "step": 7180000 + }, + { + "epoch": 20.78, + "learning_rate": 3.961147068358652e-05, + "loss": 2.1977, + "step": 7180500 + }, + { + "epoch": 20.79, + "learning_rate": 3.961074703593924e-05, + "loss": 2.194, + "step": 7181000 + }, + { + "epoch": 20.79, + "learning_rate": 3.961002338829196e-05, + "loss": 2.1984, + "step": 7181500 + }, + { + "epoch": 20.79, + "learning_rate": 3.9609299740644685e-05, + "loss": 2.1997, + "step": 7182000 + }, + { + "epoch": 20.79, + "learning_rate": 3.96085775402927e-05, + "loss": 2.207, + "step": 7182500 + }, + { + "epoch": 20.79, + "learning_rate": 3.960785389264542e-05, + "loss": 2.1786, + "step": 7183000 + }, + { + "epoch": 20.79, + "learning_rate": 3.9607130244998145e-05, + "loss": 2.2072, + "step": 7183500 + }, + { + "epoch": 20.79, + "learning_rate": 3.960640659735087e-05, + "loss": 2.1652, + "step": 7184000 + }, + { + "epoch": 20.8, + "learning_rate": 3.960568439699889e-05, + "loss": 2.1916, + "step": 7184500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960496074935162e-05, + "loss": 2.1847, + "step": 7185000 + }, + { + "epoch": 20.8, + "learning_rate": 3.960423710170434e-05, + "loss": 2.2219, + "step": 7185500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960351345405706e-05, + "loss": 2.1979, + "step": 7186000 + }, + { + "epoch": 20.8, + "learning_rate": 3.9602789806409785e-05, + "loss": 2.1909, + "step": 7186500 + }, + { + "epoch": 20.8, + "learning_rate": 3.960206615876251e-05, + "loss": 2.1851, + "step": 7187000 + }, + { + "epoch": 20.8, + "learning_rate": 3.960134251111523e-05, + "loss": 2.2095, + "step": 7187500 + }, + { + "epoch": 20.81, + "learning_rate": 3.960061886346795e-05, + "loss": 2.1729, + "step": 7188000 + }, + { + "epoch": 20.81, + "learning_rate": 3.959989811041127e-05, + "loss": 2.2107, + "step": 7188500 + }, + { + "epoch": 20.81, + "learning_rate": 3.959917446276399e-05, + "loss": 2.1775, + "step": 7189000 + }, + { + "epoch": 20.81, + "learning_rate": 3.959845081511671e-05, + "loss": 2.1794, + "step": 7189500 + }, + { + "epoch": 20.81, + "learning_rate": 3.9597727167469434e-05, + "loss": 2.1903, + "step": 7190000 + }, + { + "epoch": 20.81, + "learning_rate": 3.959700351982216e-05, + "loss": 2.1969, + "step": 7190500 + }, + { + "epoch": 20.82, + "learning_rate": 3.959627987217488e-05, + "loss": 2.2004, + "step": 7191000 + }, + { + "epoch": 20.82, + "learning_rate": 3.95955562245276e-05, + "loss": 2.1858, + "step": 7191500 + }, + { + "epoch": 20.82, + "learning_rate": 3.9594832576880323e-05, + "loss": 2.1923, + "step": 7192000 + }, + { + "epoch": 20.82, + "learning_rate": 3.959410892923305e-05, + "loss": 2.1947, + "step": 7192500 + }, + { + "epoch": 20.82, + "learning_rate": 3.9593385281585775e-05, + "loss": 2.1651, + "step": 7193000 + }, + { + "epoch": 20.82, + "learning_rate": 3.95926616339385e-05, + "loss": 2.1864, + "step": 7193500 + }, + { + "epoch": 20.82, + "learning_rate": 3.959193943358652e-05, + "loss": 2.1864, + "step": 7194000 + }, + { + "epoch": 20.83, + "learning_rate": 3.959121578593924e-05, + "loss": 2.1666, + "step": 7194500 + }, + { + "epoch": 20.83, + "learning_rate": 3.9590492138291964e-05, + "loss": 2.1699, + "step": 7195000 + }, + { + "epoch": 20.83, + "learning_rate": 3.9589768490644686e-05, + "loss": 2.1882, + "step": 7195500 + }, + { + "epoch": 20.83, + "learning_rate": 3.958904484299741e-05, + "loss": 2.1871, + "step": 7196000 + }, + { + "epoch": 20.83, + "learning_rate": 3.9588322642645424e-05, + "loss": 2.1892, + "step": 7196500 + }, + { + "epoch": 20.83, + "learning_rate": 3.9587598994998146e-05, + "loss": 2.2012, + "step": 7197000 + }, + { + "epoch": 20.83, + "learning_rate": 3.958687534735087e-05, + "loss": 2.1832, + "step": 7197500 + }, + { + "epoch": 20.84, + "learning_rate": 3.9586154594294184e-05, + "loss": 2.1717, + "step": 7198000 + }, + { + "epoch": 20.84, + "learning_rate": 3.9585430946646906e-05, + "loss": 2.1965, + "step": 7198500 + }, + { + "epoch": 20.84, + "learning_rate": 3.958470729899963e-05, + "loss": 2.2071, + "step": 7199000 + }, + { + "epoch": 20.84, + "learning_rate": 3.958398365135235e-05, + "loss": 2.1903, + "step": 7199500 + }, + { + "epoch": 20.84, + "learning_rate": 3.958326000370508e-05, + "loss": 2.1991, + "step": 7200000 + }, + { + "epoch": 20.84, + "learning_rate": 3.95825363560578e-05, + "loss": 2.1824, + "step": 7200500 + }, + { + "epoch": 20.84, + "learning_rate": 3.9581812708410524e-05, + "loss": 2.2161, + "step": 7201000 + }, + { + "epoch": 20.85, + "learning_rate": 3.9581089060763246e-05, + "loss": 2.1802, + "step": 7201500 + }, + { + "epoch": 20.85, + "learning_rate": 3.958036541311597e-05, + "loss": 2.2092, + "step": 7202000 + }, + { + "epoch": 20.85, + "learning_rate": 3.95796417654687e-05, + "loss": 2.2037, + "step": 7202500 + }, + { + "epoch": 20.85, + "learning_rate": 3.957891956511671e-05, + "loss": 2.2011, + "step": 7203000 + }, + { + "epoch": 20.85, + "learning_rate": 3.9578195917469435e-05, + "loss": 2.2149, + "step": 7203500 + }, + { + "epoch": 20.85, + "learning_rate": 3.957747371711745e-05, + "loss": 2.2025, + "step": 7204000 + }, + { + "epoch": 20.85, + "learning_rate": 3.957675006947017e-05, + "loss": 2.1809, + "step": 7204500 + }, + { + "epoch": 20.86, + "learning_rate": 3.9576027869118195e-05, + "loss": 2.1722, + "step": 7205000 + }, + { + "epoch": 20.86, + "learning_rate": 3.957530422147092e-05, + "loss": 2.1973, + "step": 7205500 + }, + { + "epoch": 20.86, + "learning_rate": 3.957458057382364e-05, + "loss": 2.1982, + "step": 7206000 + }, + { + "epoch": 20.86, + "learning_rate": 3.957385692617636e-05, + "loss": 2.2047, + "step": 7206500 + }, + { + "epoch": 20.86, + "learning_rate": 3.9573133278529084e-05, + "loss": 2.1824, + "step": 7207000 + }, + { + "epoch": 20.86, + "learning_rate": 3.957240963088181e-05, + "loss": 2.1721, + "step": 7207500 + }, + { + "epoch": 20.86, + "learning_rate": 3.9571685983234536e-05, + "loss": 2.1989, + "step": 7208000 + }, + { + "epoch": 20.87, + "learning_rate": 3.957096233558726e-05, + "loss": 2.1662, + "step": 7208500 + }, + { + "epoch": 20.87, + "learning_rate": 3.957023868793998e-05, + "loss": 2.1649, + "step": 7209000 + }, + { + "epoch": 20.87, + "learning_rate": 3.95695150402927e-05, + "loss": 2.1563, + "step": 7209500 + }, + { + "epoch": 20.87, + "learning_rate": 3.9568791392645424e-05, + "loss": 2.1625, + "step": 7210000 + }, + { + "epoch": 20.87, + "learning_rate": 3.956806774499815e-05, + "loss": 2.1936, + "step": 7210500 + }, + { + "epoch": 20.87, + "learning_rate": 3.956734554464617e-05, + "loss": 2.1889, + "step": 7211000 + }, + { + "epoch": 20.87, + "learning_rate": 3.956662189699889e-05, + "loss": 2.2026, + "step": 7211500 + }, + { + "epoch": 20.88, + "learning_rate": 3.9565898249351614e-05, + "loss": 2.1658, + "step": 7212000 + }, + { + "epoch": 20.88, + "learning_rate": 3.9565174601704336e-05, + "loss": 2.181, + "step": 7212500 + }, + { + "epoch": 20.88, + "learning_rate": 3.956445095405706e-05, + "loss": 2.2005, + "step": 7213000 + }, + { + "epoch": 20.88, + "learning_rate": 3.956372730640978e-05, + "loss": 2.1768, + "step": 7213500 + }, + { + "epoch": 20.88, + "learning_rate": 3.95630036587625e-05, + "loss": 2.1973, + "step": 7214000 + }, + { + "epoch": 20.88, + "learning_rate": 3.9562281458410525e-05, + "loss": 2.1814, + "step": 7214500 + }, + { + "epoch": 20.88, + "learning_rate": 3.956155781076325e-05, + "loss": 2.1893, + "step": 7215000 + }, + { + "epoch": 20.89, + "learning_rate": 3.9560834163115976e-05, + "loss": 2.2131, + "step": 7215500 + }, + { + "epoch": 20.89, + "learning_rate": 3.95601105154687e-05, + "loss": 2.2068, + "step": 7216000 + }, + { + "epoch": 20.89, + "learning_rate": 3.955938686782142e-05, + "loss": 2.1922, + "step": 7216500 + }, + { + "epoch": 20.89, + "learning_rate": 3.955866322017414e-05, + "loss": 2.1701, + "step": 7217000 + }, + { + "epoch": 20.89, + "learning_rate": 3.9557939572526865e-05, + "loss": 2.1862, + "step": 7217500 + }, + { + "epoch": 20.89, + "learning_rate": 3.955721592487959e-05, + "loss": 2.187, + "step": 7218000 + }, + { + "epoch": 20.89, + "learning_rate": 3.955649227723231e-05, + "loss": 2.1803, + "step": 7218500 + }, + { + "epoch": 20.9, + "learning_rate": 3.9555770076880325e-05, + "loss": 2.1939, + "step": 7219000 + }, + { + "epoch": 20.9, + "learning_rate": 3.955504642923305e-05, + "loss": 2.1774, + "step": 7219500 + }, + { + "epoch": 20.9, + "learning_rate": 3.955432422888107e-05, + "loss": 2.1759, + "step": 7220000 + }, + { + "epoch": 20.9, + "learning_rate": 3.955360058123379e-05, + "loss": 2.2085, + "step": 7220500 + }, + { + "epoch": 20.9, + "learning_rate": 3.9552876933586514e-05, + "loss": 2.1986, + "step": 7221000 + }, + { + "epoch": 20.9, + "learning_rate": 3.9552153285939236e-05, + "loss": 2.2012, + "step": 7221500 + }, + { + "epoch": 20.9, + "learning_rate": 3.955142963829196e-05, + "loss": 2.1802, + "step": 7222000 + }, + { + "epoch": 20.91, + "learning_rate": 3.955070599064469e-05, + "loss": 2.2077, + "step": 7222500 + }, + { + "epoch": 20.91, + "learning_rate": 3.95499837902927e-05, + "loss": 2.1825, + "step": 7223000 + }, + { + "epoch": 20.91, + "learning_rate": 3.9549260142645425e-05, + "loss": 2.1874, + "step": 7223500 + }, + { + "epoch": 20.91, + "learning_rate": 3.954853649499815e-05, + "loss": 2.1706, + "step": 7224000 + }, + { + "epoch": 20.91, + "learning_rate": 3.9547812847350877e-05, + "loss": 2.1958, + "step": 7224500 + }, + { + "epoch": 20.91, + "learning_rate": 3.95470891997036e-05, + "loss": 2.1795, + "step": 7225000 + }, + { + "epoch": 20.91, + "learning_rate": 3.954636555205632e-05, + "loss": 2.1917, + "step": 7225500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954564190440904e-05, + "loss": 2.1903, + "step": 7226000 + }, + { + "epoch": 20.92, + "learning_rate": 3.9544918256761765e-05, + "loss": 2.2002, + "step": 7226500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954419460911449e-05, + "loss": 2.1824, + "step": 7227000 + }, + { + "epoch": 20.92, + "learning_rate": 3.954347096146721e-05, + "loss": 2.1861, + "step": 7227500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954274731381993e-05, + "loss": 2.1993, + "step": 7228000 + }, + { + "epoch": 20.92, + "learning_rate": 3.9542023666172654e-05, + "loss": 2.2133, + "step": 7228500 + }, + { + "epoch": 20.92, + "learning_rate": 3.954130001852538e-05, + "loss": 2.1809, + "step": 7229000 + }, + { + "epoch": 20.93, + "learning_rate": 3.9540576370878106e-05, + "loss": 2.1779, + "step": 7229500 + }, + { + "epoch": 20.93, + "learning_rate": 3.953985417052613e-05, + "loss": 2.1869, + "step": 7230000 + }, + { + "epoch": 20.93, + "learning_rate": 3.953913052287885e-05, + "loss": 2.1685, + "step": 7230500 + }, + { + "epoch": 20.93, + "learning_rate": 3.9538408322526866e-05, + "loss": 2.1714, + "step": 7231000 + }, + { + "epoch": 20.93, + "learning_rate": 3.953768612217488e-05, + "loss": 2.1683, + "step": 7231500 + }, + { + "epoch": 20.93, + "learning_rate": 3.9536962474527604e-05, + "loss": 2.1793, + "step": 7232000 + }, + { + "epoch": 20.94, + "learning_rate": 3.9536238826880326e-05, + "loss": 2.1777, + "step": 7232500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953551517923305e-05, + "loss": 2.1841, + "step": 7233000 + }, + { + "epoch": 20.94, + "learning_rate": 3.953479153158578e-05, + "loss": 2.2017, + "step": 7233500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953406933123379e-05, + "loss": 2.1756, + "step": 7234000 + }, + { + "epoch": 20.94, + "learning_rate": 3.9533345683586515e-05, + "loss": 2.2047, + "step": 7234500 + }, + { + "epoch": 20.94, + "learning_rate": 3.953262203593924e-05, + "loss": 2.1831, + "step": 7235000 + }, + { + "epoch": 20.94, + "learning_rate": 3.953189838829196e-05, + "loss": 2.1749, + "step": 7235500 + }, + { + "epoch": 20.95, + "learning_rate": 3.953117474064468e-05, + "loss": 2.194, + "step": 7236000 + }, + { + "epoch": 20.95, + "learning_rate": 3.9530451092997404e-05, + "loss": 2.2104, + "step": 7236500 + }, + { + "epoch": 20.95, + "learning_rate": 3.952972744535013e-05, + "loss": 2.2044, + "step": 7237000 + }, + { + "epoch": 20.95, + "learning_rate": 3.9529003797702855e-05, + "loss": 2.1808, + "step": 7237500 + }, + { + "epoch": 20.95, + "learning_rate": 3.952828159735088e-05, + "loss": 2.2064, + "step": 7238000 + }, + { + "epoch": 20.95, + "learning_rate": 3.952755939699889e-05, + "loss": 2.1714, + "step": 7238500 + }, + { + "epoch": 20.95, + "learning_rate": 3.9526835749351615e-05, + "loss": 2.1923, + "step": 7239000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952611210170434e-05, + "loss": 2.1771, + "step": 7239500 + }, + { + "epoch": 20.96, + "learning_rate": 3.952538845405706e-05, + "loss": 2.191, + "step": 7240000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952466480640978e-05, + "loss": 2.1641, + "step": 7240500 + }, + { + "epoch": 20.96, + "learning_rate": 3.9523941158762504e-05, + "loss": 2.1744, + "step": 7241000 + }, + { + "epoch": 20.96, + "learning_rate": 3.9523217511115226e-05, + "loss": 2.2073, + "step": 7241500 + }, + { + "epoch": 20.96, + "learning_rate": 3.9522493863467955e-05, + "loss": 2.1801, + "step": 7242000 + }, + { + "epoch": 20.96, + "learning_rate": 3.952177021582068e-05, + "loss": 2.1941, + "step": 7242500 + }, + { + "epoch": 20.97, + "learning_rate": 3.95210465681734e-05, + "loss": 2.186, + "step": 7243000 + }, + { + "epoch": 20.97, + "learning_rate": 3.952032292052612e-05, + "loss": 2.178, + "step": 7243500 + }, + { + "epoch": 20.97, + "learning_rate": 3.951960072017414e-05, + "loss": 2.1826, + "step": 7244000 + }, + { + "epoch": 20.97, + "learning_rate": 3.9518877072526867e-05, + "loss": 2.1881, + "step": 7244500 + }, + { + "epoch": 20.97, + "learning_rate": 3.951815342487959e-05, + "loss": 2.1954, + "step": 7245000 + }, + { + "epoch": 20.97, + "learning_rate": 3.951742977723231e-05, + "loss": 2.1761, + "step": 7245500 + }, + { + "epoch": 20.97, + "learning_rate": 3.951670612958503e-05, + "loss": 2.1957, + "step": 7246000 + }, + { + "epoch": 20.98, + "learning_rate": 3.9515982481937755e-05, + "loss": 2.2072, + "step": 7246500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951526028158578e-05, + "loss": 2.1965, + "step": 7247000 + }, + { + "epoch": 20.98, + "learning_rate": 3.95145366339385e-05, + "loss": 2.1911, + "step": 7247500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951381298629122e-05, + "loss": 2.2166, + "step": 7248000 + }, + { + "epoch": 20.98, + "learning_rate": 3.9513089338643945e-05, + "loss": 2.2259, + "step": 7248500 + }, + { + "epoch": 20.98, + "learning_rate": 3.951236569099667e-05, + "loss": 2.1975, + "step": 7249000 + }, + { + "epoch": 20.98, + "learning_rate": 3.951164204334939e-05, + "loss": 2.188, + "step": 7249500 + }, + { + "epoch": 20.99, + "learning_rate": 3.951091839570211e-05, + "loss": 2.2196, + "step": 7250000 + }, + { + "epoch": 20.99, + "learning_rate": 3.9510194748054833e-05, + "loss": 2.2185, + "step": 7250500 + }, + { + "epoch": 20.99, + "learning_rate": 3.950947399499815e-05, + "loss": 2.1839, + "step": 7251000 + }, + { + "epoch": 20.99, + "learning_rate": 3.950875034735087e-05, + "loss": 2.168, + "step": 7251500 + }, + { + "epoch": 20.99, + "learning_rate": 3.9508026699703594e-05, + "loss": 2.1618, + "step": 7252000 + }, + { + "epoch": 20.99, + "learning_rate": 3.950730305205632e-05, + "loss": 2.1876, + "step": 7252500 + }, + { + "epoch": 20.99, + "learning_rate": 3.950658085170434e-05, + "loss": 2.2013, + "step": 7253000 + }, + { + "epoch": 21.0, + "learning_rate": 3.950585720405706e-05, + "loss": 2.2074, + "step": 7253500 + }, + { + "epoch": 21.0, + "learning_rate": 3.950513355640978e-05, + "loss": 2.1825, + "step": 7254000 + }, + { + "epoch": 21.0, + "learning_rate": 3.9504409908762505e-05, + "loss": 2.1969, + "step": 7254500 + }, + { + "epoch": 21.0, + "eval_accuracy": 0.6605211557582317, + "eval_accuracy_mlm": 0.6243918605421886, + "eval_accuracy_nsp": 0.8541291425481111, + "eval_loss": 2.224362373352051, + "eval_runtime": 331.3457, + "eval_samples_per_second": 1317.011, + "eval_steps_per_second": 54.876, + "step": 7254912 + }, + { + "epoch": 21.0, + "learning_rate": 3.950368626111523e-05, + "loss": 2.1811, + "step": 7255000 + }, + { + "epoch": 21.0, + "learning_rate": 3.9502962613467956e-05, + "loss": 2.1592, + "step": 7255500 + }, + { + "epoch": 21.0, + "learning_rate": 3.950223896582068e-05, + "loss": 2.1583, + "step": 7256000 + }, + { + "epoch": 21.0, + "learning_rate": 3.95015153181734e-05, + "loss": 2.1849, + "step": 7256500 + }, + { + "epoch": 21.01, + "learning_rate": 3.950079167052612e-05, + "loss": 2.1725, + "step": 7257000 + }, + { + "epoch": 21.01, + "learning_rate": 3.950006947017414e-05, + "loss": 2.1677, + "step": 7257500 + }, + { + "epoch": 21.01, + "learning_rate": 3.9499347269822154e-05, + "loss": 2.167, + "step": 7258000 + }, + { + "epoch": 21.01, + "learning_rate": 3.949862362217488e-05, + "loss": 2.1521, + "step": 7258500 + }, + { + "epoch": 21.01, + "learning_rate": 3.9497899974527605e-05, + "loss": 2.1557, + "step": 7259000 + }, + { + "epoch": 21.01, + "learning_rate": 3.949717632688033e-05, + "loss": 2.1681, + "step": 7259500 + }, + { + "epoch": 21.01, + "learning_rate": 3.9496452679233056e-05, + "loss": 2.1724, + "step": 7260000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949572903158578e-05, + "loss": 2.1526, + "step": 7260500 + }, + { + "epoch": 21.02, + "learning_rate": 3.94950053839385e-05, + "loss": 2.1771, + "step": 7261000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949428173629122e-05, + "loss": 2.1626, + "step": 7261500 + }, + { + "epoch": 21.02, + "learning_rate": 3.9493558088643945e-05, + "loss": 2.1695, + "step": 7262000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949283444099667e-05, + "loss": 2.1735, + "step": 7262500 + }, + { + "epoch": 21.02, + "learning_rate": 3.949211079334939e-05, + "loss": 2.1681, + "step": 7263000 + }, + { + "epoch": 21.02, + "learning_rate": 3.949138714570211e-05, + "loss": 2.17, + "step": 7263500 + }, + { + "epoch": 21.03, + "learning_rate": 3.949066494535013e-05, + "loss": 2.1688, + "step": 7264000 + }, + { + "epoch": 21.03, + "learning_rate": 3.9489941297702857e-05, + "loss": 2.1476, + "step": 7264500 + }, + { + "epoch": 21.03, + "learning_rate": 3.948921765005558e-05, + "loss": 2.1606, + "step": 7265000 + }, + { + "epoch": 21.03, + "learning_rate": 3.94884940024083e-05, + "loss": 2.1699, + "step": 7265500 + }, + { + "epoch": 21.03, + "learning_rate": 3.948777180205632e-05, + "loss": 2.1625, + "step": 7266000 + }, + { + "epoch": 21.03, + "learning_rate": 3.948704815440904e-05, + "loss": 2.1701, + "step": 7266500 + }, + { + "epoch": 21.03, + "learning_rate": 3.9486325954057054e-05, + "loss": 2.155, + "step": 7267000 + }, + { + "epoch": 21.04, + "learning_rate": 3.9485603753705084e-05, + "loss": 2.1811, + "step": 7267500 + }, + { + "epoch": 21.04, + "learning_rate": 3.9484880106057806e-05, + "loss": 2.1702, + "step": 7268000 + }, + { + "epoch": 21.04, + "learning_rate": 3.948415790570582e-05, + "loss": 2.1634, + "step": 7268500 + }, + { + "epoch": 21.04, + "learning_rate": 3.9483434258058544e-05, + "loss": 2.1607, + "step": 7269000 + }, + { + "epoch": 21.04, + "learning_rate": 3.9482710610411266e-05, + "loss": 2.164, + "step": 7269500 + }, + { + "epoch": 21.04, + "learning_rate": 3.948198696276399e-05, + "loss": 2.1654, + "step": 7270000 + }, + { + "epoch": 21.05, + "learning_rate": 3.948126331511671e-05, + "loss": 2.1881, + "step": 7270500 + }, + { + "epoch": 21.05, + "learning_rate": 3.948053966746943e-05, + "loss": 2.1687, + "step": 7271000 + }, + { + "epoch": 21.05, + "learning_rate": 3.9479816019822155e-05, + "loss": 2.1866, + "step": 7271500 + }, + { + "epoch": 21.05, + "learning_rate": 3.9479092372174884e-05, + "loss": 2.1686, + "step": 7272000 + }, + { + "epoch": 21.05, + "learning_rate": 3.9478368724527606e-05, + "loss": 2.1756, + "step": 7272500 + }, + { + "epoch": 21.05, + "learning_rate": 3.947764507688033e-05, + "loss": 2.1684, + "step": 7273000 + }, + { + "epoch": 21.05, + "learning_rate": 3.9476922876528344e-05, + "loss": 2.155, + "step": 7273500 + }, + { + "epoch": 21.06, + "learning_rate": 3.9476199228881066e-05, + "loss": 2.1698, + "step": 7274000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947547558123379e-05, + "loss": 2.1788, + "step": 7274500 + }, + { + "epoch": 21.06, + "learning_rate": 3.947475193358652e-05, + "loss": 2.1493, + "step": 7275000 + }, + { + "epoch": 21.06, + "learning_rate": 3.947402828593924e-05, + "loss": 2.1658, + "step": 7275500 + }, + { + "epoch": 21.06, + "learning_rate": 3.947330463829196e-05, + "loss": 2.1779, + "step": 7276000 + }, + { + "epoch": 21.06, + "learning_rate": 3.9472580990644684e-05, + "loss": 2.1535, + "step": 7276500 + }, + { + "epoch": 21.06, + "learning_rate": 3.9471857342997406e-05, + "loss": 2.1848, + "step": 7277000 + }, + { + "epoch": 21.07, + "learning_rate": 3.9471133695350135e-05, + "loss": 2.1536, + "step": 7277500 + }, + { + "epoch": 21.07, + "learning_rate": 3.947041149499815e-05, + "loss": 2.164, + "step": 7278000 + }, + { + "epoch": 21.07, + "learning_rate": 3.946968784735087e-05, + "loss": 2.1742, + "step": 7278500 + }, + { + "epoch": 21.07, + "learning_rate": 3.9468964199703595e-05, + "loss": 2.1703, + "step": 7279000 + }, + { + "epoch": 21.07, + "learning_rate": 3.946824199935161e-05, + "loss": 2.1969, + "step": 7279500 + }, + { + "epoch": 21.07, + "learning_rate": 3.946751835170433e-05, + "loss": 2.1698, + "step": 7280000 + }, + { + "epoch": 21.07, + "learning_rate": 3.9466794704057055e-05, + "loss": 2.1673, + "step": 7280500 + }, + { + "epoch": 21.08, + "learning_rate": 3.9466071056409784e-05, + "loss": 2.1703, + "step": 7281000 + }, + { + "epoch": 21.08, + "learning_rate": 3.9465347408762506e-05, + "loss": 2.1922, + "step": 7281500 + }, + { + "epoch": 21.08, + "learning_rate": 3.946462520841052e-05, + "loss": 2.1833, + "step": 7282000 + }, + { + "epoch": 21.08, + "learning_rate": 3.946390156076325e-05, + "loss": 2.1768, + "step": 7282500 + }, + { + "epoch": 21.08, + "learning_rate": 3.9463179360411267e-05, + "loss": 2.1654, + "step": 7283000 + }, + { + "epoch": 21.08, + "learning_rate": 3.946245571276399e-05, + "loss": 2.1596, + "step": 7283500 + }, + { + "epoch": 21.08, + "learning_rate": 3.946173206511671e-05, + "loss": 2.1583, + "step": 7284000 + }, + { + "epoch": 21.09, + "learning_rate": 3.946100841746943e-05, + "loss": 2.1923, + "step": 7284500 + }, + { + "epoch": 21.09, + "learning_rate": 3.946028476982216e-05, + "loss": 2.191, + "step": 7285000 + }, + { + "epoch": 21.09, + "learning_rate": 3.9459561122174885e-05, + "loss": 2.1561, + "step": 7285500 + }, + { + "epoch": 21.09, + "learning_rate": 3.94588389218229e-05, + "loss": 2.193, + "step": 7286000 + }, + { + "epoch": 21.09, + "learning_rate": 3.945811527417562e-05, + "loss": 2.1985, + "step": 7286500 + }, + { + "epoch": 21.09, + "learning_rate": 3.9457391626528345e-05, + "loss": 2.1653, + "step": 7287000 + }, + { + "epoch": 21.09, + "learning_rate": 3.945666797888107e-05, + "loss": 2.162, + "step": 7287500 + }, + { + "epoch": 21.1, + "learning_rate": 3.945594433123379e-05, + "loss": 2.1915, + "step": 7288000 + }, + { + "epoch": 21.1, + "learning_rate": 3.945522068358651e-05, + "loss": 2.182, + "step": 7288500 + }, + { + "epoch": 21.1, + "learning_rate": 3.9454497035939233e-05, + "loss": 2.1659, + "step": 7289000 + }, + { + "epoch": 21.1, + "learning_rate": 3.945377338829196e-05, + "loss": 2.1941, + "step": 7289500 + }, + { + "epoch": 21.1, + "learning_rate": 3.9453049740644685e-05, + "loss": 2.1575, + "step": 7290000 + }, + { + "epoch": 21.1, + "learning_rate": 3.9452326092997414e-05, + "loss": 2.1747, + "step": 7290500 + }, + { + "epoch": 21.1, + "learning_rate": 3.9451602445350136e-05, + "loss": 2.1626, + "step": 7291000 + }, + { + "epoch": 21.11, + "learning_rate": 3.945087879770286e-05, + "loss": 2.1849, + "step": 7291500 + }, + { + "epoch": 21.11, + "learning_rate": 3.945015515005558e-05, + "loss": 2.1592, + "step": 7292000 + }, + { + "epoch": 21.11, + "learning_rate": 3.9449432949703596e-05, + "loss": 2.1604, + "step": 7292500 + }, + { + "epoch": 21.11, + "learning_rate": 3.944870930205632e-05, + "loss": 2.1631, + "step": 7293000 + }, + { + "epoch": 21.11, + "learning_rate": 3.944798565440904e-05, + "loss": 2.1726, + "step": 7293500 + }, + { + "epoch": 21.11, + "learning_rate": 3.944726200676176e-05, + "loss": 2.1853, + "step": 7294000 + }, + { + "epoch": 21.11, + "learning_rate": 3.9446539806409785e-05, + "loss": 2.1589, + "step": 7294500 + }, + { + "epoch": 21.12, + "learning_rate": 3.944581615876251e-05, + "loss": 2.1762, + "step": 7295000 + }, + { + "epoch": 21.12, + "learning_rate": 3.944509251111523e-05, + "loss": 2.1781, + "step": 7295500 + }, + { + "epoch": 21.12, + "learning_rate": 3.944436886346795e-05, + "loss": 2.1752, + "step": 7296000 + }, + { + "epoch": 21.12, + "learning_rate": 3.9443645215820674e-05, + "loss": 2.183, + "step": 7296500 + }, + { + "epoch": 21.12, + "learning_rate": 3.94429215681734e-05, + "loss": 2.1667, + "step": 7297000 + }, + { + "epoch": 21.12, + "learning_rate": 3.9442197920526125e-05, + "loss": 2.1706, + "step": 7297500 + }, + { + "epoch": 21.12, + "learning_rate": 3.944147572017414e-05, + "loss": 2.1725, + "step": 7298000 + }, + { + "epoch": 21.13, + "learning_rate": 3.944075207252686e-05, + "loss": 2.1753, + "step": 7298500 + }, + { + "epoch": 21.13, + "learning_rate": 3.9440028424879585e-05, + "loss": 2.1702, + "step": 7299000 + }, + { + "epoch": 21.13, + "learning_rate": 3.9439304777232314e-05, + "loss": 2.1848, + "step": 7299500 + }, + { + "epoch": 21.13, + "learning_rate": 3.943858257688033e-05, + "loss": 2.1862, + "step": 7300000 + }, + { + "epoch": 21.13, + "learning_rate": 3.943785892923305e-05, + "loss": 2.19, + "step": 7300500 + }, + { + "epoch": 21.13, + "learning_rate": 3.943713672888107e-05, + "loss": 2.1662, + "step": 7301000 + }, + { + "epoch": 21.13, + "learning_rate": 3.943641308123379e-05, + "loss": 2.1805, + "step": 7301500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943568943358651e-05, + "loss": 2.1605, + "step": 7302000 + }, + { + "epoch": 21.14, + "learning_rate": 3.9434965785939234e-05, + "loss": 2.1541, + "step": 7302500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943424213829196e-05, + "loss": 2.1838, + "step": 7303000 + }, + { + "epoch": 21.14, + "learning_rate": 3.9433518490644686e-05, + "loss": 2.1809, + "step": 7303500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943279484299741e-05, + "loss": 2.2009, + "step": 7304000 + }, + { + "epoch": 21.14, + "learning_rate": 3.943207119535014e-05, + "loss": 2.1774, + "step": 7304500 + }, + { + "epoch": 21.14, + "learning_rate": 3.943134754770286e-05, + "loss": 2.1718, + "step": 7305000 + }, + { + "epoch": 21.15, + "learning_rate": 3.943062390005558e-05, + "loss": 2.1606, + "step": 7305500 + }, + { + "epoch": 21.15, + "learning_rate": 3.9429900252408303e-05, + "loss": 2.1617, + "step": 7306000 + }, + { + "epoch": 21.15, + "learning_rate": 3.9429176604761026e-05, + "loss": 2.15, + "step": 7306500 + }, + { + "epoch": 21.15, + "learning_rate": 3.942845295711375e-05, + "loss": 2.1653, + "step": 7307000 + }, + { + "epoch": 21.15, + "learning_rate": 3.942772930946647e-05, + "loss": 2.1616, + "step": 7307500 + }, + { + "epoch": 21.15, + "learning_rate": 3.942700566181919e-05, + "loss": 2.1741, + "step": 7308000 + }, + { + "epoch": 21.16, + "learning_rate": 3.9426283461467215e-05, + "loss": 2.1527, + "step": 7308500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942555981381994e-05, + "loss": 2.1781, + "step": 7309000 + }, + { + "epoch": 21.16, + "learning_rate": 3.942483616617266e-05, + "loss": 2.1946, + "step": 7309500 + }, + { + "epoch": 21.16, + "learning_rate": 3.942411251852538e-05, + "loss": 2.1558, + "step": 7310000 + }, + { + "epoch": 21.16, + "learning_rate": 3.9423388870878104e-05, + "loss": 2.1607, + "step": 7310500 + }, + { + "epoch": 21.16, + "learning_rate": 3.9422665223230826e-05, + "loss": 2.1964, + "step": 7311000 + }, + { + "epoch": 21.16, + "learning_rate": 3.942194157558355e-05, + "loss": 2.1828, + "step": 7311500 + }, + { + "epoch": 21.17, + "learning_rate": 3.942121792793628e-05, + "loss": 2.1622, + "step": 7312000 + }, + { + "epoch": 21.17, + "learning_rate": 3.9420494280289e-05, + "loss": 2.1892, + "step": 7312500 + }, + { + "epoch": 21.17, + "learning_rate": 3.9419772079937015e-05, + "loss": 2.1681, + "step": 7313000 + }, + { + "epoch": 21.17, + "learning_rate": 3.941904843228974e-05, + "loss": 2.1733, + "step": 7313500 + }, + { + "epoch": 21.17, + "learning_rate": 3.9418324784642466e-05, + "loss": 2.1789, + "step": 7314000 + }, + { + "epoch": 21.17, + "learning_rate": 3.941760113699519e-05, + "loss": 2.1653, + "step": 7314500 + }, + { + "epoch": 21.17, + "learning_rate": 3.9416878936643204e-05, + "loss": 2.1745, + "step": 7315000 + }, + { + "epoch": 21.18, + "learning_rate": 3.9416155288995926e-05, + "loss": 2.1559, + "step": 7315500 + }, + { + "epoch": 21.18, + "learning_rate": 3.941543164134865e-05, + "loss": 2.1852, + "step": 7316000 + }, + { + "epoch": 21.18, + "learning_rate": 3.9414709440996664e-05, + "loss": 2.1647, + "step": 7316500 + }, + { + "epoch": 21.18, + "learning_rate": 3.9413985793349386e-05, + "loss": 2.1863, + "step": 7317000 + }, + { + "epoch": 21.18, + "learning_rate": 3.941326359299741e-05, + "loss": 2.1804, + "step": 7317500 + }, + { + "epoch": 21.18, + "learning_rate": 3.941253994535013e-05, + "loss": 2.1875, + "step": 7318000 + }, + { + "epoch": 21.18, + "learning_rate": 3.941181629770285e-05, + "loss": 2.1557, + "step": 7318500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9411092650055575e-05, + "loss": 2.1733, + "step": 7319000 + }, + { + "epoch": 21.19, + "learning_rate": 3.9410369002408304e-05, + "loss": 2.1816, + "step": 7319500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9409645354761026e-05, + "loss": 2.1725, + "step": 7320000 + }, + { + "epoch": 21.19, + "learning_rate": 3.940892170711375e-05, + "loss": 2.137, + "step": 7320500 + }, + { + "epoch": 21.19, + "learning_rate": 3.940819805946647e-05, + "loss": 2.1726, + "step": 7321000 + }, + { + "epoch": 21.19, + "learning_rate": 3.940747441181919e-05, + "loss": 2.1622, + "step": 7321500 + }, + { + "epoch": 21.19, + "learning_rate": 3.9406750764171915e-05, + "loss": 2.1717, + "step": 7322000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940602711652464e-05, + "loss": 2.1578, + "step": 7322500 + }, + { + "epoch": 21.2, + "learning_rate": 3.940530346887737e-05, + "loss": 2.1802, + "step": 7323000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940457982123009e-05, + "loss": 2.1847, + "step": 7323500 + }, + { + "epoch": 21.2, + "learning_rate": 3.940385617358281e-05, + "loss": 2.1939, + "step": 7324000 + }, + { + "epoch": 21.2, + "learning_rate": 3.940313252593553e-05, + "loss": 2.1643, + "step": 7324500 + }, + { + "epoch": 21.2, + "learning_rate": 3.9402408878288256e-05, + "loss": 2.1956, + "step": 7325000 + }, + { + "epoch": 21.2, + "learning_rate": 3.9401688125231564e-05, + "loss": 2.1687, + "step": 7325500 + }, + { + "epoch": 21.21, + "learning_rate": 3.9400964477584293e-05, + "loss": 2.1542, + "step": 7326000 + }, + { + "epoch": 21.21, + "learning_rate": 3.9400240829937016e-05, + "loss": 2.1785, + "step": 7326500 + }, + { + "epoch": 21.21, + "learning_rate": 3.939951718228974e-05, + "loss": 2.1795, + "step": 7327000 + }, + { + "epoch": 21.21, + "learning_rate": 3.939879353464247e-05, + "loss": 2.1856, + "step": 7327500 + }, + { + "epoch": 21.21, + "learning_rate": 3.939806988699519e-05, + "loss": 2.1624, + "step": 7328000 + }, + { + "epoch": 21.21, + "learning_rate": 3.939734623934791e-05, + "loss": 2.1646, + "step": 7328500 + }, + { + "epoch": 21.21, + "learning_rate": 3.9396622591700634e-05, + "loss": 2.1746, + "step": 7329000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9395898944053356e-05, + "loss": 2.1867, + "step": 7329500 + }, + { + "epoch": 21.22, + "learning_rate": 3.939517529640608e-05, + "loss": 2.1711, + "step": 7330000 + }, + { + "epoch": 21.22, + "learning_rate": 3.93944516487588e-05, + "loss": 2.1838, + "step": 7330500 + }, + { + "epoch": 21.22, + "learning_rate": 3.939372800111152e-05, + "loss": 2.169, + "step": 7331000 + }, + { + "epoch": 21.22, + "learning_rate": 3.9393004353464245e-05, + "loss": 2.1612, + "step": 7331500 + }, + { + "epoch": 21.22, + "learning_rate": 3.939228360040756e-05, + "loss": 2.1424, + "step": 7332000 + }, + { + "epoch": 21.22, + "learning_rate": 3.939155995276028e-05, + "loss": 2.1659, + "step": 7332500 + }, + { + "epoch": 21.23, + "learning_rate": 3.9390836305113005e-05, + "loss": 2.1778, + "step": 7333000 + }, + { + "epoch": 21.23, + "learning_rate": 3.939011410476102e-05, + "loss": 2.1708, + "step": 7333500 + }, + { + "epoch": 21.23, + "learning_rate": 3.938939045711374e-05, + "loss": 2.1804, + "step": 7334000 + }, + { + "epoch": 21.23, + "learning_rate": 3.938866680946647e-05, + "loss": 2.2183, + "step": 7334500 + }, + { + "epoch": 21.23, + "learning_rate": 3.9387943161819194e-05, + "loss": 2.1728, + "step": 7335000 + }, + { + "epoch": 21.23, + "learning_rate": 3.9387219514171916e-05, + "loss": 2.1556, + "step": 7335500 + }, + { + "epoch": 21.23, + "learning_rate": 3.9386495866524645e-05, + "loss": 2.1651, + "step": 7336000 + }, + { + "epoch": 21.24, + "learning_rate": 3.938577221887737e-05, + "loss": 2.1995, + "step": 7336500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938505001852538e-05, + "loss": 2.1532, + "step": 7337000 + }, + { + "epoch": 21.24, + "learning_rate": 3.9384326370878105e-05, + "loss": 2.177, + "step": 7337500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938360272323083e-05, + "loss": 2.1766, + "step": 7338000 + }, + { + "epoch": 21.24, + "learning_rate": 3.938287907558355e-05, + "loss": 2.1945, + "step": 7338500 + }, + { + "epoch": 21.24, + "learning_rate": 3.938215542793627e-05, + "loss": 2.1851, + "step": 7339000 + }, + { + "epoch": 21.24, + "learning_rate": 3.9381431780288994e-05, + "loss": 2.177, + "step": 7339500 + }, + { + "epoch": 21.25, + "learning_rate": 3.9380709579937017e-05, + "loss": 2.1821, + "step": 7340000 + }, + { + "epoch": 21.25, + "learning_rate": 3.937998737958503e-05, + "loss": 2.1676, + "step": 7340500 + }, + { + "epoch": 21.25, + "learning_rate": 3.9379263731937754e-05, + "loss": 2.1636, + "step": 7341000 + }, + { + "epoch": 21.25, + "learning_rate": 3.9378540084290477e-05, + "loss": 2.1845, + "step": 7341500 + }, + { + "epoch": 21.25, + "learning_rate": 3.9377816436643206e-05, + "loss": 2.1416, + "step": 7342000 + }, + { + "epoch": 21.25, + "learning_rate": 3.937709278899593e-05, + "loss": 2.1879, + "step": 7342500 + }, + { + "epoch": 21.25, + "learning_rate": 3.937636914134865e-05, + "loss": 2.1821, + "step": 7343000 + }, + { + "epoch": 21.26, + "learning_rate": 3.937564549370137e-05, + "loss": 2.1761, + "step": 7343500 + }, + { + "epoch": 21.26, + "learning_rate": 3.9374923293349395e-05, + "loss": 2.1915, + "step": 7344000 + }, + { + "epoch": 21.26, + "learning_rate": 3.937419964570212e-05, + "loss": 2.1481, + "step": 7344500 + }, + { + "epoch": 21.26, + "learning_rate": 3.937347599805484e-05, + "loss": 2.1783, + "step": 7345000 + }, + { + "epoch": 21.26, + "learning_rate": 3.937275235040756e-05, + "loss": 2.1802, + "step": 7345500 + }, + { + "epoch": 21.26, + "learning_rate": 3.9372028702760284e-05, + "loss": 2.1797, + "step": 7346000 + }, + { + "epoch": 21.27, + "learning_rate": 3.9371305055113006e-05, + "loss": 2.1631, + "step": 7346500 + }, + { + "epoch": 21.27, + "learning_rate": 3.937058140746573e-05, + "loss": 2.1934, + "step": 7347000 + }, + { + "epoch": 21.27, + "learning_rate": 3.936985775981845e-05, + "loss": 2.1966, + "step": 7347500 + }, + { + "epoch": 21.27, + "learning_rate": 3.9369135559466466e-05, + "loss": 2.1479, + "step": 7348000 + }, + { + "epoch": 21.27, + "learning_rate": 3.9368411911819195e-05, + "loss": 2.1707, + "step": 7348500 + }, + { + "epoch": 21.27, + "learning_rate": 3.936768826417192e-05, + "loss": 2.1543, + "step": 7349000 + }, + { + "epoch": 21.27, + "learning_rate": 3.9366964616524646e-05, + "loss": 2.1768, + "step": 7349500 + }, + { + "epoch": 21.28, + "learning_rate": 3.936624096887737e-05, + "loss": 2.2039, + "step": 7350000 + }, + { + "epoch": 21.28, + "learning_rate": 3.936551732123009e-05, + "loss": 2.1764, + "step": 7350500 + }, + { + "epoch": 21.28, + "learning_rate": 3.936479367358281e-05, + "loss": 2.2059, + "step": 7351000 + }, + { + "epoch": 21.28, + "learning_rate": 3.936407147323083e-05, + "loss": 2.1905, + "step": 7351500 + }, + { + "epoch": 21.28, + "learning_rate": 3.936334782558355e-05, + "loss": 2.2004, + "step": 7352000 + }, + { + "epoch": 21.28, + "learning_rate": 3.936262417793627e-05, + "loss": 2.19, + "step": 7352500 + }, + { + "epoch": 21.28, + "learning_rate": 3.9361900530288995e-05, + "loss": 2.1772, + "step": 7353000 + }, + { + "epoch": 21.29, + "learning_rate": 3.936117688264172e-05, + "loss": 2.1938, + "step": 7353500 + }, + { + "epoch": 21.29, + "learning_rate": 3.9360453234994446e-05, + "loss": 2.1745, + "step": 7354000 + }, + { + "epoch": 21.29, + "learning_rate": 3.935972958734717e-05, + "loss": 2.1877, + "step": 7354500 + }, + { + "epoch": 21.29, + "learning_rate": 3.935900593969989e-05, + "loss": 2.1442, + "step": 7355000 + }, + { + "epoch": 21.29, + "learning_rate": 3.9358283739347906e-05, + "loss": 2.1824, + "step": 7355500 + }, + { + "epoch": 21.29, + "learning_rate": 3.935756009170063e-05, + "loss": 2.1821, + "step": 7356000 + }, + { + "epoch": 21.29, + "learning_rate": 3.935683644405336e-05, + "loss": 2.1921, + "step": 7356500 + }, + { + "epoch": 21.3, + "learning_rate": 3.935611279640608e-05, + "loss": 2.1854, + "step": 7357000 + }, + { + "epoch": 21.3, + "learning_rate": 3.9355390596054095e-05, + "loss": 2.1697, + "step": 7357500 + }, + { + "epoch": 21.3, + "learning_rate": 3.9354666948406824e-05, + "loss": 2.1799, + "step": 7358000 + }, + { + "epoch": 21.3, + "learning_rate": 3.9353943300759547e-05, + "loss": 2.1927, + "step": 7358500 + }, + { + "epoch": 21.3, + "learning_rate": 3.935321965311227e-05, + "loss": 2.1768, + "step": 7359000 + }, + { + "epoch": 21.3, + "learning_rate": 3.935249890005558e-05, + "loss": 2.1827, + "step": 7359500 + }, + { + "epoch": 21.3, + "learning_rate": 3.935177669970359e-05, + "loss": 2.1646, + "step": 7360000 + }, + { + "epoch": 21.31, + "learning_rate": 3.935105305205632e-05, + "loss": 2.1761, + "step": 7360500 + }, + { + "epoch": 21.31, + "learning_rate": 3.9350329404409044e-05, + "loss": 2.1737, + "step": 7361000 + }, + { + "epoch": 21.31, + "learning_rate": 3.934960575676177e-05, + "loss": 2.1669, + "step": 7361500 + }, + { + "epoch": 21.31, + "learning_rate": 3.934888210911449e-05, + "loss": 2.1957, + "step": 7362000 + }, + { + "epoch": 21.31, + "learning_rate": 3.9348159908762504e-05, + "loss": 2.2002, + "step": 7362500 + }, + { + "epoch": 21.31, + "learning_rate": 3.934743626111523e-05, + "loss": 2.2162, + "step": 7363000 + }, + { + "epoch": 21.31, + "learning_rate": 3.934671261346795e-05, + "loss": 2.2243, + "step": 7363500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934598896582067e-05, + "loss": 2.2011, + "step": 7364000 + }, + { + "epoch": 21.32, + "learning_rate": 3.93452653181734e-05, + "loss": 2.1808, + "step": 7364500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934454167052612e-05, + "loss": 2.1931, + "step": 7365000 + }, + { + "epoch": 21.32, + "learning_rate": 3.9343818022878845e-05, + "loss": 2.2043, + "step": 7365500 + }, + { + "epoch": 21.32, + "learning_rate": 3.9343094375231574e-05, + "loss": 2.1645, + "step": 7366000 + }, + { + "epoch": 21.32, + "learning_rate": 3.934237217487959e-05, + "loss": 2.1788, + "step": 7366500 + }, + { + "epoch": 21.32, + "learning_rate": 3.934164852723231e-05, + "loss": 2.1872, + "step": 7367000 + }, + { + "epoch": 21.33, + "learning_rate": 3.9340924879585034e-05, + "loss": 2.1893, + "step": 7367500 + }, + { + "epoch": 21.33, + "learning_rate": 3.9340201231937756e-05, + "loss": 2.154, + "step": 7368000 + }, + { + "epoch": 21.33, + "learning_rate": 3.933947903158577e-05, + "loss": 2.1713, + "step": 7368500 + }, + { + "epoch": 21.33, + "learning_rate": 3.9338755383938494e-05, + "loss": 2.19, + "step": 7369000 + }, + { + "epoch": 21.33, + "learning_rate": 3.933803173629122e-05, + "loss": 2.1574, + "step": 7369500 + }, + { + "epoch": 21.33, + "learning_rate": 3.933730953593924e-05, + "loss": 2.167, + "step": 7370000 + }, + { + "epoch": 21.33, + "learning_rate": 3.933658588829196e-05, + "loss": 2.189, + "step": 7370500 + }, + { + "epoch": 21.34, + "learning_rate": 3.933586224064468e-05, + "loss": 2.1435, + "step": 7371000 + }, + { + "epoch": 21.34, + "learning_rate": 3.9335138592997405e-05, + "loss": 2.21, + "step": 7371500 + }, + { + "epoch": 21.34, + "learning_rate": 3.9334414945350134e-05, + "loss": 2.1827, + "step": 7372000 + }, + { + "epoch": 21.34, + "learning_rate": 3.9333691297702856e-05, + "loss": 2.1837, + "step": 7372500 + }, + { + "epoch": 21.34, + "learning_rate": 3.933296765005558e-05, + "loss": 2.1621, + "step": 7373000 + }, + { + "epoch": 21.34, + "learning_rate": 3.93322440024083e-05, + "loss": 2.1642, + "step": 7373500 + }, + { + "epoch": 21.34, + "learning_rate": 3.933152035476102e-05, + "loss": 2.1759, + "step": 7374000 + }, + { + "epoch": 21.35, + "learning_rate": 3.9330796707113745e-05, + "loss": 2.1893, + "step": 7374500 + }, + { + "epoch": 21.35, + "learning_rate": 3.9330073059466474e-05, + "loss": 2.1808, + "step": 7375000 + }, + { + "epoch": 21.35, + "learning_rate": 3.9329349411819196e-05, + "loss": 2.1829, + "step": 7375500 + }, + { + "epoch": 21.35, + "learning_rate": 3.932862576417192e-05, + "loss": 2.1685, + "step": 7376000 + }, + { + "epoch": 21.35, + "learning_rate": 3.9327903563819934e-05, + "loss": 2.1903, + "step": 7376500 + }, + { + "epoch": 21.35, + "learning_rate": 3.9327179916172656e-05, + "loss": 2.1887, + "step": 7377000 + }, + { + "epoch": 21.35, + "learning_rate": 3.932645626852538e-05, + "loss": 2.1607, + "step": 7377500 + }, + { + "epoch": 21.36, + "learning_rate": 3.93257326208781e-05, + "loss": 2.179, + "step": 7378000 + }, + { + "epoch": 21.36, + "learning_rate": 3.932501042052612e-05, + "loss": 2.1683, + "step": 7378500 + }, + { + "epoch": 21.36, + "learning_rate": 3.9324286772878845e-05, + "loss": 2.1657, + "step": 7379000 + }, + { + "epoch": 21.36, + "learning_rate": 3.9323563125231574e-05, + "loss": 2.1847, + "step": 7379500 + }, + { + "epoch": 21.36, + "learning_rate": 3.93228394775843e-05, + "loss": 2.1889, + "step": 7380000 + }, + { + "epoch": 21.36, + "learning_rate": 3.932211582993702e-05, + "loss": 2.1722, + "step": 7380500 + }, + { + "epoch": 21.36, + "learning_rate": 3.932139218228974e-05, + "loss": 2.164, + "step": 7381000 + }, + { + "epoch": 21.37, + "learning_rate": 3.932066853464246e-05, + "loss": 2.1855, + "step": 7381500 + }, + { + "epoch": 21.37, + "learning_rate": 3.9319944886995186e-05, + "loss": 2.1724, + "step": 7382000 + }, + { + "epoch": 21.37, + "learning_rate": 3.931922123934791e-05, + "loss": 2.1618, + "step": 7382500 + }, + { + "epoch": 21.37, + "learning_rate": 3.931849759170063e-05, + "loss": 2.1968, + "step": 7383000 + }, + { + "epoch": 21.37, + "learning_rate": 3.931777539134865e-05, + "loss": 2.1908, + "step": 7383500 + }, + { + "epoch": 21.37, + "learning_rate": 3.931705463829196e-05, + "loss": 2.1698, + "step": 7384000 + }, + { + "epoch": 21.38, + "learning_rate": 3.9316330990644684e-05, + "loss": 2.1771, + "step": 7384500 + }, + { + "epoch": 21.38, + "learning_rate": 3.9315607342997406e-05, + "loss": 2.1714, + "step": 7385000 + }, + { + "epoch": 21.38, + "learning_rate": 3.931488369535013e-05, + "loss": 2.1799, + "step": 7385500 + }, + { + "epoch": 21.38, + "learning_rate": 3.931416004770285e-05, + "loss": 2.166, + "step": 7386000 + }, + { + "epoch": 21.38, + "learning_rate": 3.931343640005557e-05, + "loss": 2.1757, + "step": 7386500 + }, + { + "epoch": 21.38, + "learning_rate": 3.93127127524083e-05, + "loss": 2.1987, + "step": 7387000 + }, + { + "epoch": 21.38, + "learning_rate": 3.9311989104761024e-05, + "loss": 2.1875, + "step": 7387500 + }, + { + "epoch": 21.39, + "learning_rate": 3.931126545711375e-05, + "loss": 2.1956, + "step": 7388000 + }, + { + "epoch": 21.39, + "learning_rate": 3.9310541809466475e-05, + "loss": 2.1745, + "step": 7388500 + }, + { + "epoch": 21.39, + "learning_rate": 3.93098181618192e-05, + "loss": 2.1945, + "step": 7389000 + }, + { + "epoch": 21.39, + "learning_rate": 3.930909451417192e-05, + "loss": 2.1807, + "step": 7389500 + }, + { + "epoch": 21.39, + "learning_rate": 3.930837086652464e-05, + "loss": 2.1674, + "step": 7390000 + }, + { + "epoch": 21.39, + "learning_rate": 3.930764866617266e-05, + "loss": 2.1812, + "step": 7390500 + }, + { + "epoch": 21.39, + "learning_rate": 3.930692501852538e-05, + "loss": 2.1649, + "step": 7391000 + }, + { + "epoch": 21.4, + "learning_rate": 3.93062028181734e-05, + "loss": 2.1796, + "step": 7391500 + }, + { + "epoch": 21.4, + "learning_rate": 3.9305479170526124e-05, + "loss": 2.1525, + "step": 7392000 + }, + { + "epoch": 21.4, + "learning_rate": 3.9304755522878846e-05, + "loss": 2.1747, + "step": 7392500 + }, + { + "epoch": 21.4, + "learning_rate": 3.930403187523157e-05, + "loss": 2.1768, + "step": 7393000 + }, + { + "epoch": 21.4, + "learning_rate": 3.930330822758429e-05, + "loss": 2.1695, + "step": 7393500 + }, + { + "epoch": 21.4, + "learning_rate": 3.930258457993701e-05, + "loss": 2.1586, + "step": 7394000 + }, + { + "epoch": 21.4, + "learning_rate": 3.930186093228974e-05, + "loss": 2.1701, + "step": 7394500 + }, + { + "epoch": 21.41, + "learning_rate": 3.9301137284642464e-05, + "loss": 2.1704, + "step": 7395000 + }, + { + "epoch": 21.41, + "learning_rate": 3.9300413636995186e-05, + "loss": 2.1689, + "step": 7395500 + }, + { + "epoch": 21.41, + "learning_rate": 3.929968998934791e-05, + "loss": 2.1827, + "step": 7396000 + }, + { + "epoch": 21.41, + "learning_rate": 3.929896634170063e-05, + "loss": 2.1671, + "step": 7396500 + }, + { + "epoch": 21.41, + "learning_rate": 3.929824269405335e-05, + "loss": 2.1846, + "step": 7397000 + }, + { + "epoch": 21.41, + "learning_rate": 3.9297519046406075e-05, + "loss": 2.183, + "step": 7397500 + }, + { + "epoch": 21.41, + "learning_rate": 3.9296795398758804e-05, + "loss": 2.1863, + "step": 7398000 + }, + { + "epoch": 21.42, + "learning_rate": 3.9296071751111527e-05, + "loss": 2.1783, + "step": 7398500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929534810346425e-05, + "loss": 2.181, + "step": 7399000 + }, + { + "epoch": 21.42, + "learning_rate": 3.929462445581697e-05, + "loss": 2.1829, + "step": 7399500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929390080816969e-05, + "loss": 2.1929, + "step": 7400000 + }, + { + "epoch": 21.42, + "learning_rate": 3.9293177160522415e-05, + "loss": 2.1762, + "step": 7400500 + }, + { + "epoch": 21.42, + "learning_rate": 3.929245351287514e-05, + "loss": 2.1899, + "step": 7401000 + }, + { + "epoch": 21.42, + "learning_rate": 3.929173131252316e-05, + "loss": 2.1811, + "step": 7401500 + }, + { + "epoch": 21.43, + "learning_rate": 3.929100766487588e-05, + "loss": 2.1795, + "step": 7402000 + }, + { + "epoch": 21.43, + "learning_rate": 3.9290285464523905e-05, + "loss": 2.1811, + "step": 7402500 + }, + { + "epoch": 21.43, + "learning_rate": 3.928956181687663e-05, + "loss": 2.1707, + "step": 7403000 + }, + { + "epoch": 21.43, + "learning_rate": 3.928883816922935e-05, + "loss": 2.2064, + "step": 7403500 + }, + { + "epoch": 21.43, + "learning_rate": 3.928811452158207e-05, + "loss": 2.1536, + "step": 7404000 + }, + { + "epoch": 21.43, + "learning_rate": 3.9287390873934794e-05, + "loss": 2.2146, + "step": 7404500 + }, + { + "epoch": 21.43, + "learning_rate": 3.92866701208781e-05, + "loss": 2.1962, + "step": 7405000 + }, + { + "epoch": 21.44, + "learning_rate": 3.9285946473230825e-05, + "loss": 2.1564, + "step": 7405500 + }, + { + "epoch": 21.44, + "learning_rate": 3.9285222825583554e-05, + "loss": 2.1938, + "step": 7406000 + }, + { + "epoch": 21.44, + "learning_rate": 3.9284499177936276e-05, + "loss": 2.1974, + "step": 7406500 + }, + { + "epoch": 21.44, + "learning_rate": 3.9283775530289e-05, + "loss": 2.1517, + "step": 7407000 + }, + { + "epoch": 21.44, + "learning_rate": 3.928305188264172e-05, + "loss": 2.2067, + "step": 7407500 + }, + { + "epoch": 21.44, + "learning_rate": 3.928232823499444e-05, + "loss": 2.1722, + "step": 7408000 + }, + { + "epoch": 21.44, + "learning_rate": 3.9281604587347165e-05, + "loss": 2.1874, + "step": 7408500 + }, + { + "epoch": 21.45, + "learning_rate": 3.9280880939699894e-05, + "loss": 2.1932, + "step": 7409000 + }, + { + "epoch": 21.45, + "learning_rate": 3.9280157292052616e-05, + "loss": 2.1972, + "step": 7409500 + }, + { + "epoch": 21.45, + "learning_rate": 3.927943364440534e-05, + "loss": 2.172, + "step": 7410000 + }, + { + "epoch": 21.45, + "learning_rate": 3.9278711444053354e-05, + "loss": 2.1714, + "step": 7410500 + }, + { + "epoch": 21.45, + "learning_rate": 3.9277987796406076e-05, + "loss": 2.1979, + "step": 7411000 + }, + { + "epoch": 21.45, + "learning_rate": 3.9277264148758805e-05, + "loss": 2.2076, + "step": 7411500 + }, + { + "epoch": 21.45, + "learning_rate": 3.927654050111153e-05, + "loss": 2.1689, + "step": 7412000 + }, + { + "epoch": 21.46, + "learning_rate": 3.927581685346425e-05, + "loss": 2.1666, + "step": 7412500 + }, + { + "epoch": 21.46, + "learning_rate": 3.9275094653112265e-05, + "loss": 2.1898, + "step": 7413000 + }, + { + "epoch": 21.46, + "learning_rate": 3.927437100546499e-05, + "loss": 2.1852, + "step": 7413500 + }, + { + "epoch": 21.46, + "learning_rate": 3.927364735781771e-05, + "loss": 2.1855, + "step": 7414000 + }, + { + "epoch": 21.46, + "learning_rate": 3.927292371017043e-05, + "loss": 2.171, + "step": 7414500 + }, + { + "epoch": 21.46, + "learning_rate": 3.9272200062523154e-05, + "loss": 2.1626, + "step": 7415000 + }, + { + "epoch": 21.46, + "learning_rate": 3.9271476414875876e-05, + "loss": 2.1961, + "step": 7415500 + }, + { + "epoch": 21.47, + "learning_rate": 3.92707542145239e-05, + "loss": 2.1506, + "step": 7416000 + }, + { + "epoch": 21.47, + "learning_rate": 3.927003056687663e-05, + "loss": 2.2037, + "step": 7416500 + }, + { + "epoch": 21.47, + "learning_rate": 3.926930691922935e-05, + "loss": 2.1585, + "step": 7417000 + }, + { + "epoch": 21.47, + "learning_rate": 3.9268584718877365e-05, + "loss": 2.1756, + "step": 7417500 + }, + { + "epoch": 21.47, + "learning_rate": 3.926786107123009e-05, + "loss": 2.1634, + "step": 7418000 + }, + { + "epoch": 21.47, + "learning_rate": 3.926713742358281e-05, + "loss": 2.1433, + "step": 7418500 + }, + { + "epoch": 21.47, + "learning_rate": 3.926641377593553e-05, + "loss": 2.1699, + "step": 7419000 + }, + { + "epoch": 21.48, + "learning_rate": 3.9265691575583554e-05, + "loss": 2.168, + "step": 7419500 + }, + { + "epoch": 21.48, + "learning_rate": 3.926496792793628e-05, + "loss": 2.2164, + "step": 7420000 + }, + { + "epoch": 21.48, + "learning_rate": 3.9264244280289e-05, + "loss": 2.1976, + "step": 7420500 + }, + { + "epoch": 21.48, + "learning_rate": 3.926352063264172e-05, + "loss": 2.1949, + "step": 7421000 + }, + { + "epoch": 21.48, + "learning_rate": 3.926279843228974e-05, + "loss": 2.1881, + "step": 7421500 + }, + { + "epoch": 21.48, + "learning_rate": 3.926207478464246e-05, + "loss": 2.183, + "step": 7422000 + }, + { + "epoch": 21.49, + "learning_rate": 3.926135113699518e-05, + "loss": 2.21, + "step": 7422500 + }, + { + "epoch": 21.49, + "learning_rate": 3.9260627489347903e-05, + "loss": 2.1824, + "step": 7423000 + }, + { + "epoch": 21.49, + "learning_rate": 3.925990384170063e-05, + "loss": 2.1955, + "step": 7423500 + }, + { + "epoch": 21.49, + "learning_rate": 3.9259180194053355e-05, + "loss": 2.1647, + "step": 7424000 + }, + { + "epoch": 21.49, + "learning_rate": 3.9258456546406084e-05, + "loss": 2.173, + "step": 7424500 + }, + { + "epoch": 21.49, + "learning_rate": 3.92577343460541e-05, + "loss": 2.197, + "step": 7425000 + }, + { + "epoch": 21.49, + "learning_rate": 3.9257012145702115e-05, + "loss": 2.1935, + "step": 7425500 + }, + { + "epoch": 21.5, + "learning_rate": 3.925628849805484e-05, + "loss": 2.1915, + "step": 7426000 + }, + { + "epoch": 21.5, + "learning_rate": 3.925556485040756e-05, + "loss": 2.1668, + "step": 7426500 + }, + { + "epoch": 21.5, + "learning_rate": 3.925484120276028e-05, + "loss": 2.1476, + "step": 7427000 + }, + { + "epoch": 21.5, + "learning_rate": 3.9254117555113004e-05, + "loss": 2.2069, + "step": 7427500 + }, + { + "epoch": 21.5, + "learning_rate": 3.925339390746573e-05, + "loss": 2.2022, + "step": 7428000 + }, + { + "epoch": 21.5, + "learning_rate": 3.9252670259818455e-05, + "loss": 2.1494, + "step": 7428500 + }, + { + "epoch": 21.5, + "learning_rate": 3.925194661217118e-05, + "loss": 2.1633, + "step": 7429000 + }, + { + "epoch": 21.51, + "learning_rate": 3.92512229645239e-05, + "loss": 2.1751, + "step": 7429500 + }, + { + "epoch": 21.51, + "learning_rate": 3.925049931687662e-05, + "loss": 2.1869, + "step": 7430000 + }, + { + "epoch": 21.51, + "learning_rate": 3.9249775669229344e-05, + "loss": 2.1915, + "step": 7430500 + }, + { + "epoch": 21.51, + "learning_rate": 3.9249052021582066e-05, + "loss": 2.1894, + "step": 7431000 + }, + { + "epoch": 21.51, + "learning_rate": 3.9248328373934795e-05, + "loss": 2.1581, + "step": 7431500 + }, + { + "epoch": 21.51, + "learning_rate": 3.924760472628752e-05, + "loss": 2.1725, + "step": 7432000 + }, + { + "epoch": 21.51, + "learning_rate": 3.924688107864024e-05, + "loss": 2.2109, + "step": 7432500 + }, + { + "epoch": 21.52, + "learning_rate": 3.924615743099296e-05, + "loss": 2.2039, + "step": 7433000 + }, + { + "epoch": 21.52, + "learning_rate": 3.9245433783345684e-05, + "loss": 2.1859, + "step": 7433500 + }, + { + "epoch": 21.52, + "learning_rate": 3.9244710135698406e-05, + "loss": 2.1771, + "step": 7434000 + }, + { + "epoch": 21.52, + "learning_rate": 3.9243986488051135e-05, + "loss": 2.2055, + "step": 7434500 + }, + { + "epoch": 21.52, + "learning_rate": 3.924326284040386e-05, + "loss": 2.1934, + "step": 7435000 + }, + { + "epoch": 21.52, + "learning_rate": 3.924253919275658e-05, + "loss": 2.1834, + "step": 7435500 + }, + { + "epoch": 21.52, + "learning_rate": 3.92418155451093e-05, + "loss": 2.1704, + "step": 7436000 + }, + { + "epoch": 21.53, + "learning_rate": 3.924109334475732e-05, + "loss": 2.1682, + "step": 7436500 + }, + { + "epoch": 21.53, + "learning_rate": 3.924036969711004e-05, + "loss": 2.1772, + "step": 7437000 + }, + { + "epoch": 21.53, + "learning_rate": 3.923964604946276e-05, + "loss": 2.1624, + "step": 7437500 + }, + { + "epoch": 21.53, + "learning_rate": 3.9238922401815484e-05, + "loss": 2.1864, + "step": 7438000 + }, + { + "epoch": 21.53, + "learning_rate": 3.923820020146351e-05, + "loss": 2.179, + "step": 7438500 + }, + { + "epoch": 21.53, + "learning_rate": 3.9237476553816236e-05, + "loss": 2.1987, + "step": 7439000 + }, + { + "epoch": 21.53, + "learning_rate": 3.923675290616896e-05, + "loss": 2.2051, + "step": 7439500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923602925852168e-05, + "loss": 2.1821, + "step": 7440000 + }, + { + "epoch": 21.54, + "learning_rate": 3.92353056108744e-05, + "loss": 2.1852, + "step": 7440500 + }, + { + "epoch": 21.54, + "learning_rate": 3.9234581963227125e-05, + "loss": 2.1968, + "step": 7441000 + }, + { + "epoch": 21.54, + "learning_rate": 3.923385831557985e-05, + "loss": 2.1977, + "step": 7441500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923313466793257e-05, + "loss": 2.2089, + "step": 7442000 + }, + { + "epoch": 21.54, + "learning_rate": 3.923241102028529e-05, + "loss": 2.1745, + "step": 7442500 + }, + { + "epoch": 21.54, + "learning_rate": 3.923168881993331e-05, + "loss": 2.178, + "step": 7443000 + }, + { + "epoch": 21.55, + "learning_rate": 3.9230965172286036e-05, + "loss": 2.1747, + "step": 7443500 + }, + { + "epoch": 21.55, + "learning_rate": 3.923024152463876e-05, + "loss": 2.1972, + "step": 7444000 + }, + { + "epoch": 21.55, + "learning_rate": 3.922951787699148e-05, + "loss": 2.1925, + "step": 7444500 + }, + { + "epoch": 21.55, + "learning_rate": 3.9228795676639496e-05, + "loss": 2.1751, + "step": 7445000 + }, + { + "epoch": 21.55, + "learning_rate": 3.922807202899222e-05, + "loss": 2.1494, + "step": 7445500 + }, + { + "epoch": 21.55, + "learning_rate": 3.922734838134495e-05, + "loss": 2.1624, + "step": 7446000 + }, + { + "epoch": 21.55, + "learning_rate": 3.922662762828826e-05, + "loss": 2.163, + "step": 7446500 + }, + { + "epoch": 21.56, + "learning_rate": 3.9225903980640985e-05, + "loss": 2.179, + "step": 7447000 + }, + { + "epoch": 21.56, + "learning_rate": 3.9225181780289e-05, + "loss": 2.2142, + "step": 7447500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922445813264172e-05, + "loss": 2.1795, + "step": 7448000 + }, + { + "epoch": 21.56, + "learning_rate": 3.9223734484994445e-05, + "loss": 2.1742, + "step": 7448500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922301083734717e-05, + "loss": 2.1664, + "step": 7449000 + }, + { + "epoch": 21.56, + "learning_rate": 3.922228718969989e-05, + "loss": 2.1848, + "step": 7449500 + }, + { + "epoch": 21.56, + "learning_rate": 3.922156354205261e-05, + "loss": 2.1794, + "step": 7450000 + }, + { + "epoch": 21.57, + "learning_rate": 3.9220841341700634e-05, + "loss": 2.199, + "step": 7450500 + }, + { + "epoch": 21.57, + "learning_rate": 3.9220117694053356e-05, + "loss": 2.1618, + "step": 7451000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921939404640608e-05, + "loss": 2.1731, + "step": 7451500 + }, + { + "epoch": 21.57, + "learning_rate": 3.92186703987588e-05, + "loss": 2.1871, + "step": 7452000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921794675111152e-05, + "loss": 2.1632, + "step": 7452500 + }, + { + "epoch": 21.57, + "learning_rate": 3.9217223103464245e-05, + "loss": 2.1524, + "step": 7453000 + }, + { + "epoch": 21.57, + "learning_rate": 3.921649945581697e-05, + "loss": 2.1799, + "step": 7453500 + }, + { + "epoch": 21.58, + "learning_rate": 3.9215775808169696e-05, + "loss": 2.2059, + "step": 7454000 + }, + { + "epoch": 21.58, + "learning_rate": 3.921505216052242e-05, + "loss": 2.178, + "step": 7454500 + }, + { + "epoch": 21.58, + "learning_rate": 3.921432851287514e-05, + "loss": 2.1574, + "step": 7455000 + }, + { + "epoch": 21.58, + "learning_rate": 3.921360486522786e-05, + "loss": 2.1919, + "step": 7455500 + }, + { + "epoch": 21.58, + "learning_rate": 3.9212881217580585e-05, + "loss": 2.1674, + "step": 7456000 + }, + { + "epoch": 21.58, + "learning_rate": 3.9212157569933314e-05, + "loss": 2.188, + "step": 7456500 + }, + { + "epoch": 21.58, + "learning_rate": 3.921143392228604e-05, + "loss": 2.191, + "step": 7457000 + }, + { + "epoch": 21.59, + "learning_rate": 3.921071027463876e-05, + "loss": 2.1843, + "step": 7457500 + }, + { + "epoch": 21.59, + "learning_rate": 3.920998662699148e-05, + "loss": 2.1864, + "step": 7458000 + }, + { + "epoch": 21.59, + "learning_rate": 3.92092629793442e-05, + "loss": 2.2013, + "step": 7458500 + }, + { + "epoch": 21.59, + "learning_rate": 3.920854222628751e-05, + "loss": 2.2088, + "step": 7459000 + }, + { + "epoch": 21.59, + "learning_rate": 3.9207820025935535e-05, + "loss": 2.182, + "step": 7459500 + }, + { + "epoch": 21.59, + "learning_rate": 3.920709637828826e-05, + "loss": 2.1812, + "step": 7460000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920637273064098e-05, + "loss": 2.2006, + "step": 7460500 + }, + { + "epoch": 21.6, + "learning_rate": 3.92056490829937e-05, + "loss": 2.1763, + "step": 7461000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920492543534643e-05, + "loss": 2.1878, + "step": 7461500 + }, + { + "epoch": 21.6, + "learning_rate": 3.9204203234994446e-05, + "loss": 2.1814, + "step": 7462000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920347958734717e-05, + "loss": 2.1871, + "step": 7462500 + }, + { + "epoch": 21.6, + "learning_rate": 3.920275593969989e-05, + "loss": 2.1993, + "step": 7463000 + }, + { + "epoch": 21.6, + "learning_rate": 3.920203229205261e-05, + "loss": 2.2042, + "step": 7463500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9201308644405335e-05, + "loss": 2.1742, + "step": 7464000 + }, + { + "epoch": 21.61, + "learning_rate": 3.9200584996758064e-05, + "loss": 2.1776, + "step": 7464500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9199861349110786e-05, + "loss": 2.1785, + "step": 7465000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919913770146351e-05, + "loss": 2.1707, + "step": 7465500 + }, + { + "epoch": 21.61, + "learning_rate": 3.919841405381623e-05, + "loss": 2.1471, + "step": 7466000 + }, + { + "epoch": 21.61, + "learning_rate": 3.919769040616895e-05, + "loss": 2.1765, + "step": 7466500 + }, + { + "epoch": 21.61, + "learning_rate": 3.9196966758521675e-05, + "loss": 2.1801, + "step": 7467000 + }, + { + "epoch": 21.62, + "learning_rate": 3.91962431108744e-05, + "loss": 2.1702, + "step": 7467500 + }, + { + "epoch": 21.62, + "learning_rate": 3.919551946322712e-05, + "loss": 2.1616, + "step": 7468000 + }, + { + "epoch": 21.62, + "learning_rate": 3.919479581557985e-05, + "loss": 2.1438, + "step": 7468500 + }, + { + "epoch": 21.62, + "learning_rate": 3.9194073615227864e-05, + "loss": 2.179, + "step": 7469000 + }, + { + "epoch": 21.62, + "learning_rate": 3.9193349967580586e-05, + "loss": 2.171, + "step": 7469500 + }, + { + "epoch": 21.62, + "learning_rate": 3.9192626319933315e-05, + "loss": 2.1588, + "step": 7470000 + }, + { + "epoch": 21.62, + "learning_rate": 3.919190411958133e-05, + "loss": 2.1717, + "step": 7470500 + }, + { + "epoch": 21.63, + "learning_rate": 3.919118047193405e-05, + "loss": 2.1756, + "step": 7471000 + }, + { + "epoch": 21.63, + "learning_rate": 3.9190456824286775e-05, + "loss": 2.1669, + "step": 7471500 + }, + { + "epoch": 21.63, + "learning_rate": 3.918973462393479e-05, + "loss": 2.156, + "step": 7472000 + }, + { + "epoch": 21.63, + "learning_rate": 3.918901097628751e-05, + "loss": 2.1952, + "step": 7472500 + }, + { + "epoch": 21.63, + "learning_rate": 3.9188288775935535e-05, + "loss": 2.1767, + "step": 7473000 + }, + { + "epoch": 21.63, + "learning_rate": 3.918756512828826e-05, + "loss": 2.1598, + "step": 7473500 + }, + { + "epoch": 21.63, + "learning_rate": 3.918684148064098e-05, + "loss": 2.1992, + "step": 7474000 + }, + { + "epoch": 21.64, + "learning_rate": 3.91861178329937e-05, + "loss": 2.1774, + "step": 7474500 + }, + { + "epoch": 21.64, + "learning_rate": 3.9185394185346424e-05, + "loss": 2.164, + "step": 7475000 + }, + { + "epoch": 21.64, + "learning_rate": 3.9184670537699146e-05, + "loss": 2.1937, + "step": 7475500 + }, + { + "epoch": 21.64, + "learning_rate": 3.918394689005187e-05, + "loss": 2.172, + "step": 7476000 + }, + { + "epoch": 21.64, + "learning_rate": 3.91832232424046e-05, + "loss": 2.1713, + "step": 7476500 + }, + { + "epoch": 21.64, + "learning_rate": 3.918249959475732e-05, + "loss": 2.1821, + "step": 7477000 + }, + { + "epoch": 21.64, + "learning_rate": 3.918177594711004e-05, + "loss": 2.1878, + "step": 7477500 + }, + { + "epoch": 21.65, + "learning_rate": 3.9181052299462764e-05, + "loss": 2.1677, + "step": 7478000 + }, + { + "epoch": 21.65, + "learning_rate": 3.918032865181549e-05, + "loss": 2.1788, + "step": 7478500 + }, + { + "epoch": 21.65, + "learning_rate": 3.9179605004168216e-05, + "loss": 2.173, + "step": 7479000 + }, + { + "epoch": 21.65, + "learning_rate": 3.917888135652094e-05, + "loss": 2.158, + "step": 7479500 + }, + { + "epoch": 21.65, + "learning_rate": 3.917815770887366e-05, + "loss": 2.1874, + "step": 7480000 + }, + { + "epoch": 21.65, + "learning_rate": 3.917743406122638e-05, + "loss": 2.162, + "step": 7480500 + }, + { + "epoch": 21.65, + "learning_rate": 3.9176710413579105e-05, + "loss": 2.1589, + "step": 7481000 + }, + { + "epoch": 21.66, + "learning_rate": 3.917598821322712e-05, + "loss": 2.1896, + "step": 7481500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917526456557984e-05, + "loss": 2.1629, + "step": 7482000 + }, + { + "epoch": 21.66, + "learning_rate": 3.9174540917932565e-05, + "loss": 2.1803, + "step": 7482500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917381727028529e-05, + "loss": 2.1863, + "step": 7483000 + }, + { + "epoch": 21.66, + "learning_rate": 3.9173095069933316e-05, + "loss": 2.1846, + "step": 7483500 + }, + { + "epoch": 21.66, + "learning_rate": 3.917237142228604e-05, + "loss": 2.1606, + "step": 7484000 + }, + { + "epoch": 21.66, + "learning_rate": 3.917164777463876e-05, + "loss": 2.2064, + "step": 7484500 + }, + { + "epoch": 21.67, + "learning_rate": 3.917092412699148e-05, + "loss": 2.1954, + "step": 7485000 + }, + { + "epoch": 21.67, + "learning_rate": 3.9170200479344205e-05, + "loss": 2.2058, + "step": 7485500 + }, + { + "epoch": 21.67, + "learning_rate": 3.916947683169693e-05, + "loss": 2.1758, + "step": 7486000 + }, + { + "epoch": 21.67, + "learning_rate": 3.916875318404965e-05, + "loss": 2.1934, + "step": 7486500 + }, + { + "epoch": 21.67, + "learning_rate": 3.916802953640237e-05, + "loss": 2.1885, + "step": 7487000 + }, + { + "epoch": 21.67, + "learning_rate": 3.9167307336050394e-05, + "loss": 2.1658, + "step": 7487500 + }, + { + "epoch": 21.67, + "learning_rate": 3.916658513569841e-05, + "loss": 2.1712, + "step": 7488000 + }, + { + "epoch": 21.68, + "learning_rate": 3.916586148805113e-05, + "loss": 2.1918, + "step": 7488500 + }, + { + "epoch": 21.68, + "learning_rate": 3.9165137840403854e-05, + "loss": 2.1972, + "step": 7489000 + }, + { + "epoch": 21.68, + "learning_rate": 3.916441564005187e-05, + "loss": 2.1542, + "step": 7489500 + }, + { + "epoch": 21.68, + "learning_rate": 3.916369199240459e-05, + "loss": 2.1748, + "step": 7490000 + }, + { + "epoch": 21.68, + "learning_rate": 3.9162969792052614e-05, + "loss": 2.1849, + "step": 7490500 + }, + { + "epoch": 21.68, + "learning_rate": 3.9162246144405336e-05, + "loss": 2.1759, + "step": 7491000 + }, + { + "epoch": 21.68, + "learning_rate": 3.9161522496758065e-05, + "loss": 2.2153, + "step": 7491500 + }, + { + "epoch": 21.69, + "learning_rate": 3.916079884911079e-05, + "loss": 2.1853, + "step": 7492000 + }, + { + "epoch": 21.69, + "learning_rate": 3.916007520146351e-05, + "loss": 2.1965, + "step": 7492500 + }, + { + "epoch": 21.69, + "learning_rate": 3.915935155381623e-05, + "loss": 2.1753, + "step": 7493000 + }, + { + "epoch": 21.69, + "learning_rate": 3.9158627906168954e-05, + "loss": 2.1816, + "step": 7493500 + }, + { + "epoch": 21.69, + "learning_rate": 3.9157904258521677e-05, + "loss": 2.1816, + "step": 7494000 + }, + { + "epoch": 21.69, + "learning_rate": 3.91571806108744e-05, + "loss": 2.2008, + "step": 7494500 + }, + { + "epoch": 21.69, + "learning_rate": 3.915645696322712e-05, + "loss": 2.1804, + "step": 7495000 + }, + { + "epoch": 21.7, + "learning_rate": 3.915573331557984e-05, + "loss": 2.1724, + "step": 7495500 + }, + { + "epoch": 21.7, + "learning_rate": 3.9155011115227866e-05, + "loss": 2.1961, + "step": 7496000 + }, + { + "epoch": 21.7, + "learning_rate": 3.915428746758059e-05, + "loss": 2.1982, + "step": 7496500 + }, + { + "epoch": 21.7, + "learning_rate": 3.915356381993331e-05, + "loss": 2.1869, + "step": 7497000 + }, + { + "epoch": 21.7, + "learning_rate": 3.915284017228603e-05, + "loss": 2.1745, + "step": 7497500 + }, + { + "epoch": 21.7, + "learning_rate": 3.9152116524638754e-05, + "loss": 2.1755, + "step": 7498000 + }, + { + "epoch": 21.71, + "learning_rate": 3.9151392876991483e-05, + "loss": 2.1728, + "step": 7498500 + }, + { + "epoch": 21.71, + "learning_rate": 3.9150669229344206e-05, + "loss": 2.1969, + "step": 7499000 + }, + { + "epoch": 21.71, + "learning_rate": 3.914994558169693e-05, + "loss": 2.1751, + "step": 7499500 + }, + { + "epoch": 21.71, + "learning_rate": 3.914922193404965e-05, + "loss": 2.1822, + "step": 7500000 + }, + { + "epoch": 21.71, + "learning_rate": 3.914849828640237e-05, + "loss": 2.1737, + "step": 7500500 + }, + { + "epoch": 21.71, + "learning_rate": 3.9147774638755095e-05, + "loss": 2.1687, + "step": 7501000 + }, + { + "epoch": 21.71, + "learning_rate": 3.914705099110782e-05, + "loss": 2.1637, + "step": 7501500 + }, + { + "epoch": 21.72, + "learning_rate": 3.9146327343460546e-05, + "loss": 2.1725, + "step": 7502000 + }, + { + "epoch": 21.72, + "learning_rate": 3.914560369581327e-05, + "loss": 2.1874, + "step": 7502500 + }, + { + "epoch": 21.72, + "learning_rate": 3.914488004816599e-05, + "loss": 2.1632, + "step": 7503000 + }, + { + "epoch": 21.72, + "learning_rate": 3.9144157847814006e-05, + "loss": 2.1687, + "step": 7503500 + }, + { + "epoch": 21.72, + "learning_rate": 3.914343564746202e-05, + "loss": 2.1903, + "step": 7504000 + }, + { + "epoch": 21.72, + "learning_rate": 3.9142711999814744e-05, + "loss": 2.1855, + "step": 7504500 + }, + { + "epoch": 21.72, + "learning_rate": 3.9141989799462766e-05, + "loss": 2.1708, + "step": 7505000 + }, + { + "epoch": 21.73, + "learning_rate": 3.914126615181549e-05, + "loss": 2.1862, + "step": 7505500 + }, + { + "epoch": 21.73, + "learning_rate": 3.914054250416822e-05, + "loss": 2.1881, + "step": 7506000 + }, + { + "epoch": 21.73, + "learning_rate": 3.913981885652094e-05, + "loss": 2.1584, + "step": 7506500 + }, + { + "epoch": 21.73, + "learning_rate": 3.913909520887366e-05, + "loss": 2.186, + "step": 7507000 + }, + { + "epoch": 21.73, + "learning_rate": 3.9138371561226384e-05, + "loss": 2.1854, + "step": 7507500 + }, + { + "epoch": 21.73, + "learning_rate": 3.9137647913579106e-05, + "loss": 2.2044, + "step": 7508000 + }, + { + "epoch": 21.73, + "learning_rate": 3.913692426593183e-05, + "loss": 2.1793, + "step": 7508500 + }, + { + "epoch": 21.74, + "learning_rate": 3.913620061828455e-05, + "loss": 2.1838, + "step": 7509000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913547697063727e-05, + "loss": 2.1703, + "step": 7509500 + }, + { + "epoch": 21.74, + "learning_rate": 3.9134753322989995e-05, + "loss": 2.1703, + "step": 7510000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913402967534272e-05, + "loss": 2.189, + "step": 7510500 + }, + { + "epoch": 21.74, + "learning_rate": 3.9133306027695446e-05, + "loss": 2.1984, + "step": 7511000 + }, + { + "epoch": 21.74, + "learning_rate": 3.913258238004817e-05, + "loss": 2.1858, + "step": 7511500 + }, + { + "epoch": 21.74, + "learning_rate": 3.9131860179696184e-05, + "loss": 2.1745, + "step": 7512000 + }, + { + "epoch": 21.75, + "learning_rate": 3.9131136532048906e-05, + "loss": 2.1422, + "step": 7512500 + }, + { + "epoch": 21.75, + "learning_rate": 3.913041288440163e-05, + "loss": 2.1975, + "step": 7513000 + }, + { + "epoch": 21.75, + "learning_rate": 3.912968923675436e-05, + "loss": 2.1808, + "step": 7513500 + }, + { + "epoch": 21.75, + "learning_rate": 3.912896703640237e-05, + "loss": 2.1855, + "step": 7514000 + }, + { + "epoch": 21.75, + "learning_rate": 3.9128243388755095e-05, + "loss": 2.18, + "step": 7514500 + }, + { + "epoch": 21.75, + "learning_rate": 3.912751974110782e-05, + "loss": 2.1831, + "step": 7515000 + }, + { + "epoch": 21.75, + "learning_rate": 3.912679609346055e-05, + "loss": 2.198, + "step": 7515500 + }, + { + "epoch": 21.76, + "learning_rate": 3.912607244581327e-05, + "loss": 2.1494, + "step": 7516000 + }, + { + "epoch": 21.76, + "learning_rate": 3.9125350245461284e-05, + "loss": 2.1882, + "step": 7516500 + }, + { + "epoch": 21.76, + "learning_rate": 3.912462659781401e-05, + "loss": 2.1999, + "step": 7517000 + }, + { + "epoch": 21.76, + "learning_rate": 3.912390439746202e-05, + "loss": 2.1839, + "step": 7517500 + }, + { + "epoch": 21.76, + "learning_rate": 3.9123180749814744e-05, + "loss": 2.178, + "step": 7518000 + }, + { + "epoch": 21.76, + "learning_rate": 3.9122457102167474e-05, + "loss": 2.1761, + "step": 7518500 + }, + { + "epoch": 21.76, + "learning_rate": 3.9121733454520196e-05, + "loss": 2.1891, + "step": 7519000 + }, + { + "epoch": 21.77, + "learning_rate": 3.912100980687292e-05, + "loss": 2.213, + "step": 7519500 + }, + { + "epoch": 21.77, + "learning_rate": 3.912028615922564e-05, + "loss": 2.1675, + "step": 7520000 + }, + { + "epoch": 21.77, + "learning_rate": 3.911956251157836e-05, + "loss": 2.1744, + "step": 7520500 + }, + { + "epoch": 21.77, + "learning_rate": 3.9118840311226385e-05, + "loss": 2.1674, + "step": 7521000 + }, + { + "epoch": 21.77, + "learning_rate": 3.911811666357911e-05, + "loss": 2.1704, + "step": 7521500 + }, + { + "epoch": 21.77, + "learning_rate": 3.911739301593183e-05, + "loss": 2.1946, + "step": 7522000 + }, + { + "epoch": 21.77, + "learning_rate": 3.911666936828455e-05, + "loss": 2.194, + "step": 7522500 + }, + { + "epoch": 21.78, + "learning_rate": 3.9115945720637274e-05, + "loss": 2.1863, + "step": 7523000 + }, + { + "epoch": 21.78, + "learning_rate": 3.9115222072989996e-05, + "loss": 2.1545, + "step": 7523500 + }, + { + "epoch": 21.78, + "learning_rate": 3.9114498425342725e-05, + "loss": 2.1872, + "step": 7524000 + }, + { + "epoch": 21.78, + "learning_rate": 3.911377477769545e-05, + "loss": 2.1743, + "step": 7524500 + }, + { + "epoch": 21.78, + "learning_rate": 3.9113054024638756e-05, + "loss": 2.1915, + "step": 7525000 + }, + { + "epoch": 21.78, + "learning_rate": 3.911233037699148e-05, + "loss": 2.207, + "step": 7525500 + }, + { + "epoch": 21.78, + "learning_rate": 3.91116067293442e-05, + "loss": 2.1635, + "step": 7526000 + }, + { + "epoch": 21.79, + "learning_rate": 3.911088308169692e-05, + "loss": 2.1887, + "step": 7526500 + }, + { + "epoch": 21.79, + "learning_rate": 3.9110160881344945e-05, + "loss": 2.1717, + "step": 7527000 + }, + { + "epoch": 21.79, + "learning_rate": 3.910943723369767e-05, + "loss": 2.1795, + "step": 7527500 + }, + { + "epoch": 21.79, + "learning_rate": 3.910871358605039e-05, + "loss": 2.1882, + "step": 7528000 + }, + { + "epoch": 21.79, + "learning_rate": 3.910798993840312e-05, + "loss": 2.1758, + "step": 7528500 + }, + { + "epoch": 21.79, + "learning_rate": 3.910726629075584e-05, + "loss": 2.1844, + "step": 7529000 + }, + { + "epoch": 21.79, + "learning_rate": 3.910654264310856e-05, + "loss": 2.1769, + "step": 7529500 + }, + { + "epoch": 21.8, + "learning_rate": 3.9105818995461285e-05, + "loss": 2.1757, + "step": 7530000 + }, + { + "epoch": 21.8, + "learning_rate": 3.910509534781401e-05, + "loss": 2.166, + "step": 7530500 + }, + { + "epoch": 21.8, + "learning_rate": 3.910437170016673e-05, + "loss": 2.1631, + "step": 7531000 + }, + { + "epoch": 21.8, + "learning_rate": 3.910364805251945e-05, + "loss": 2.1754, + "step": 7531500 + }, + { + "epoch": 21.8, + "learning_rate": 3.9102924404872174e-05, + "loss": 2.1829, + "step": 7532000 + }, + { + "epoch": 21.8, + "learning_rate": 3.9102200757224896e-05, + "loss": 2.1779, + "step": 7532500 + }, + { + "epoch": 21.8, + "learning_rate": 3.9101477109577625e-05, + "loss": 2.2018, + "step": 7533000 + }, + { + "epoch": 21.81, + "learning_rate": 3.910075346193035e-05, + "loss": 2.1823, + "step": 7533500 + }, + { + "epoch": 21.81, + "learning_rate": 3.910002981428307e-05, + "loss": 2.1655, + "step": 7534000 + }, + { + "epoch": 21.81, + "learning_rate": 3.909930616663579e-05, + "loss": 2.1836, + "step": 7534500 + }, + { + "epoch": 21.81, + "learning_rate": 3.9098582518988514e-05, + "loss": 2.1751, + "step": 7535000 + }, + { + "epoch": 21.81, + "learning_rate": 3.9097858871341243e-05, + "loss": 2.1929, + "step": 7535500 + }, + { + "epoch": 21.81, + "learning_rate": 3.909713667098926e-05, + "loss": 2.182, + "step": 7536000 + }, + { + "epoch": 21.82, + "learning_rate": 3.909641302334198e-05, + "loss": 2.1838, + "step": 7536500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909569082299e-05, + "loss": 2.1738, + "step": 7537000 + }, + { + "epoch": 21.82, + "learning_rate": 3.9094967175342726e-05, + "loss": 2.1727, + "step": 7537500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909424352769545e-05, + "loss": 2.1715, + "step": 7538000 + }, + { + "epoch": 21.82, + "learning_rate": 3.909351988004817e-05, + "loss": 2.168, + "step": 7538500 + }, + { + "epoch": 21.82, + "learning_rate": 3.909279623240089e-05, + "loss": 2.1789, + "step": 7539000 + }, + { + "epoch": 21.82, + "learning_rate": 3.9092072584753615e-05, + "loss": 2.1871, + "step": 7539500 + }, + { + "epoch": 21.83, + "learning_rate": 3.909134893710634e-05, + "loss": 2.1779, + "step": 7540000 + }, + { + "epoch": 21.83, + "learning_rate": 3.909062528945906e-05, + "loss": 2.1981, + "step": 7540500 + }, + { + "epoch": 21.83, + "learning_rate": 3.908990164181178e-05, + "loss": 2.1884, + "step": 7541000 + }, + { + "epoch": 21.83, + "learning_rate": 3.90891794414598e-05, + "loss": 2.1672, + "step": 7541500 + }, + { + "epoch": 21.83, + "learning_rate": 3.9088455793812526e-05, + "loss": 2.1553, + "step": 7542000 + }, + { + "epoch": 21.83, + "learning_rate": 3.908773214616525e-05, + "loss": 2.1748, + "step": 7542500 + }, + { + "epoch": 21.83, + "learning_rate": 3.908700849851798e-05, + "loss": 2.1748, + "step": 7543000 + }, + { + "epoch": 21.84, + "learning_rate": 3.90862848508707e-05, + "loss": 2.1851, + "step": 7543500 + }, + { + "epoch": 21.84, + "learning_rate": 3.908556120322342e-05, + "loss": 2.2001, + "step": 7544000 + }, + { + "epoch": 21.84, + "learning_rate": 3.908483900287144e-05, + "loss": 2.1648, + "step": 7544500 + }, + { + "epoch": 21.84, + "learning_rate": 3.908411535522416e-05, + "loss": 2.1833, + "step": 7545000 + }, + { + "epoch": 21.84, + "learning_rate": 3.908339170757688e-05, + "loss": 2.1634, + "step": 7545500 + }, + { + "epoch": 21.84, + "learning_rate": 3.9082668059929604e-05, + "loss": 2.2034, + "step": 7546000 + }, + { + "epoch": 21.84, + "learning_rate": 3.9081944412282326e-05, + "loss": 2.1534, + "step": 7546500 + }, + { + "epoch": 21.85, + "learning_rate": 3.908122076463505e-05, + "loss": 2.2166, + "step": 7547000 + }, + { + "epoch": 21.85, + "learning_rate": 3.908049711698778e-05, + "loss": 2.1725, + "step": 7547500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907977491663579e-05, + "loss": 2.1905, + "step": 7548000 + }, + { + "epoch": 21.85, + "learning_rate": 3.9079051268988515e-05, + "loss": 2.2052, + "step": 7548500 + }, + { + "epoch": 21.85, + "learning_rate": 3.907832762134124e-05, + "loss": 2.2163, + "step": 7549000 + }, + { + "epoch": 21.85, + "learning_rate": 3.907760397369396e-05, + "loss": 2.1789, + "step": 7549500 + }, + { + "epoch": 21.85, + "learning_rate": 3.9076881773341975e-05, + "loss": 2.1577, + "step": 7550000 + }, + { + "epoch": 21.86, + "learning_rate": 3.907615957299e-05, + "loss": 2.196, + "step": 7550500 + }, + { + "epoch": 21.86, + "learning_rate": 3.9075435925342727e-05, + "loss": 2.173, + "step": 7551000 + }, + { + "epoch": 21.86, + "learning_rate": 3.907471227769545e-05, + "loss": 2.193, + "step": 7551500 + }, + { + "epoch": 21.86, + "learning_rate": 3.9073990077343464e-05, + "loss": 2.1713, + "step": 7552000 + }, + { + "epoch": 21.86, + "learning_rate": 3.9073266429696187e-05, + "loss": 2.1857, + "step": 7552500 + }, + { + "epoch": 21.86, + "learning_rate": 3.907254278204891e-05, + "loss": 2.1811, + "step": 7553000 + }, + { + "epoch": 21.86, + "learning_rate": 3.907181913440163e-05, + "loss": 2.1852, + "step": 7553500 + }, + { + "epoch": 21.87, + "learning_rate": 3.907109548675435e-05, + "loss": 2.1536, + "step": 7554000 + }, + { + "epoch": 21.87, + "learning_rate": 3.9070371839107075e-05, + "loss": 2.1752, + "step": 7554500 + }, + { + "epoch": 21.87, + "learning_rate": 3.9069648191459805e-05, + "loss": 2.1734, + "step": 7555000 + }, + { + "epoch": 21.87, + "learning_rate": 3.906892454381253e-05, + "loss": 2.2085, + "step": 7555500 + }, + { + "epoch": 21.87, + "learning_rate": 3.906820234346054e-05, + "loss": 2.2047, + "step": 7556000 + }, + { + "epoch": 21.87, + "learning_rate": 3.9067478695813265e-05, + "loss": 2.1934, + "step": 7556500 + }, + { + "epoch": 21.87, + "learning_rate": 3.906675504816599e-05, + "loss": 2.2059, + "step": 7557000 + }, + { + "epoch": 21.88, + "learning_rate": 3.9066032847814e-05, + "loss": 2.1902, + "step": 7557500 + }, + { + "epoch": 21.88, + "learning_rate": 3.9065309200166725e-05, + "loss": 2.1782, + "step": 7558000 + }, + { + "epoch": 21.88, + "learning_rate": 3.9064585552519454e-05, + "loss": 2.1751, + "step": 7558500 + }, + { + "epoch": 21.88, + "learning_rate": 3.9063861904872176e-05, + "loss": 2.1642, + "step": 7559000 + }, + { + "epoch": 21.88, + "learning_rate": 3.9063138257224905e-05, + "loss": 2.1571, + "step": 7559500 + }, + { + "epoch": 21.88, + "learning_rate": 3.906241460957763e-05, + "loss": 2.1717, + "step": 7560000 + }, + { + "epoch": 21.88, + "learning_rate": 3.906169096193035e-05, + "loss": 2.1746, + "step": 7560500 + }, + { + "epoch": 21.89, + "learning_rate": 3.906096731428307e-05, + "loss": 2.1788, + "step": 7561000 + }, + { + "epoch": 21.89, + "learning_rate": 3.9060243666635794e-05, + "loss": 2.1843, + "step": 7561500 + }, + { + "epoch": 21.89, + "learning_rate": 3.9059520018988516e-05, + "loss": 2.1483, + "step": 7562000 + }, + { + "epoch": 21.89, + "learning_rate": 3.905879637134124e-05, + "loss": 2.1761, + "step": 7562500 + }, + { + "epoch": 21.89, + "learning_rate": 3.905807272369396e-05, + "loss": 2.1484, + "step": 7563000 + }, + { + "epoch": 21.89, + "learning_rate": 3.905734907604668e-05, + "loss": 2.1707, + "step": 7563500 + }, + { + "epoch": 21.89, + "learning_rate": 3.9056626875694705e-05, + "loss": 2.1508, + "step": 7564000 + }, + { + "epoch": 21.9, + "learning_rate": 3.905590322804743e-05, + "loss": 2.2057, + "step": 7564500 + }, + { + "epoch": 21.9, + "learning_rate": 3.905517958040015e-05, + "loss": 2.1926, + "step": 7565000 + }, + { + "epoch": 21.9, + "learning_rate": 3.905445593275288e-05, + "loss": 2.1812, + "step": 7565500 + }, + { + "epoch": 21.9, + "learning_rate": 3.90537322851056e-05, + "loss": 2.2041, + "step": 7566000 + }, + { + "epoch": 21.9, + "learning_rate": 3.9053010084753616e-05, + "loss": 2.1778, + "step": 7566500 + }, + { + "epoch": 21.9, + "learning_rate": 3.905228643710634e-05, + "loss": 2.1626, + "step": 7567000 + }, + { + "epoch": 21.9, + "learning_rate": 3.905156278945906e-05, + "loss": 2.1675, + "step": 7567500 + }, + { + "epoch": 21.91, + "learning_rate": 3.905083914181178e-05, + "loss": 2.1802, + "step": 7568000 + }, + { + "epoch": 21.91, + "learning_rate": 3.9050115494164505e-05, + "loss": 2.1899, + "step": 7568500 + }, + { + "epoch": 21.91, + "learning_rate": 3.904939184651723e-05, + "loss": 2.1737, + "step": 7569000 + }, + { + "epoch": 21.91, + "learning_rate": 3.9048668198869956e-05, + "loss": 2.1874, + "step": 7569500 + }, + { + "epoch": 21.91, + "learning_rate": 3.904794455122268e-05, + "loss": 2.1981, + "step": 7570000 + }, + { + "epoch": 21.91, + "learning_rate": 3.90472209035754e-05, + "loss": 2.1752, + "step": 7570500 + }, + { + "epoch": 21.91, + "learning_rate": 3.9046498703223416e-05, + "loss": 2.2074, + "step": 7571000 + }, + { + "epoch": 21.92, + "learning_rate": 3.904577650287143e-05, + "loss": 2.1879, + "step": 7571500 + }, + { + "epoch": 21.92, + "learning_rate": 3.9045052855224154e-05, + "loss": 2.18, + "step": 7572000 + }, + { + "epoch": 21.92, + "learning_rate": 3.9044329207576876e-05, + "loss": 2.1933, + "step": 7572500 + }, + { + "epoch": 21.92, + "learning_rate": 3.9043605559929606e-05, + "loss": 2.1751, + "step": 7573000 + }, + { + "epoch": 21.92, + "learning_rate": 3.904288191228233e-05, + "loss": 2.1637, + "step": 7573500 + }, + { + "epoch": 21.92, + "learning_rate": 3.904215826463506e-05, + "loss": 2.1561, + "step": 7574000 + }, + { + "epoch": 21.93, + "learning_rate": 3.904143461698778e-05, + "loss": 2.1432, + "step": 7574500 + }, + { + "epoch": 21.93, + "learning_rate": 3.90407109693405e-05, + "loss": 2.1915, + "step": 7575000 + }, + { + "epoch": 21.93, + "learning_rate": 3.9039987321693223e-05, + "loss": 2.2253, + "step": 7575500 + }, + { + "epoch": 21.93, + "learning_rate": 3.903926512134124e-05, + "loss": 2.1731, + "step": 7576000 + }, + { + "epoch": 21.93, + "learning_rate": 3.903854147369396e-05, + "loss": 2.1778, + "step": 7576500 + }, + { + "epoch": 21.93, + "learning_rate": 3.9037817826046683e-05, + "loss": 2.17, + "step": 7577000 + }, + { + "epoch": 21.93, + "learning_rate": 3.9037094178399406e-05, + "loss": 2.2105, + "step": 7577500 + }, + { + "epoch": 21.94, + "learning_rate": 3.903637053075213e-05, + "loss": 2.1666, + "step": 7578000 + }, + { + "epoch": 21.94, + "learning_rate": 3.903564688310486e-05, + "loss": 2.1828, + "step": 7578500 + }, + { + "epoch": 21.94, + "learning_rate": 3.903492323545758e-05, + "loss": 2.192, + "step": 7579000 + }, + { + "epoch": 21.94, + "learning_rate": 3.90341995878103e-05, + "loss": 2.1726, + "step": 7579500 + }, + { + "epoch": 21.94, + "learning_rate": 3.903347594016303e-05, + "loss": 2.1655, + "step": 7580000 + }, + { + "epoch": 21.94, + "learning_rate": 3.9032753739811046e-05, + "loss": 2.1777, + "step": 7580500 + }, + { + "epoch": 21.94, + "learning_rate": 3.903203009216377e-05, + "loss": 2.1563, + "step": 7581000 + }, + { + "epoch": 21.95, + "learning_rate": 3.903130644451649e-05, + "loss": 2.1907, + "step": 7581500 + }, + { + "epoch": 21.95, + "learning_rate": 3.9030584244164506e-05, + "loss": 2.1966, + "step": 7582000 + }, + { + "epoch": 21.95, + "learning_rate": 3.902986059651723e-05, + "loss": 2.1691, + "step": 7582500 + }, + { + "epoch": 21.95, + "learning_rate": 3.902913694886996e-05, + "loss": 2.1687, + "step": 7583000 + }, + { + "epoch": 21.95, + "learning_rate": 3.902841330122268e-05, + "loss": 2.1792, + "step": 7583500 + }, + { + "epoch": 21.95, + "learning_rate": 3.9027691100870695e-05, + "loss": 2.1698, + "step": 7584000 + }, + { + "epoch": 21.95, + "learning_rate": 3.902696745322342e-05, + "loss": 2.1715, + "step": 7584500 + }, + { + "epoch": 21.96, + "learning_rate": 3.902624380557614e-05, + "loss": 2.1834, + "step": 7585000 + }, + { + "epoch": 21.96, + "learning_rate": 3.902552015792886e-05, + "loss": 2.1836, + "step": 7585500 + }, + { + "epoch": 21.96, + "learning_rate": 3.9024797957576884e-05, + "loss": 2.1816, + "step": 7586000 + }, + { + "epoch": 21.96, + "learning_rate": 3.9024074309929606e-05, + "loss": 2.1679, + "step": 7586500 + }, + { + "epoch": 21.96, + "learning_rate": 3.902335210957762e-05, + "loss": 2.1657, + "step": 7587000 + }, + { + "epoch": 21.96, + "learning_rate": 3.9022628461930344e-05, + "loss": 2.1966, + "step": 7587500 + }, + { + "epoch": 21.96, + "learning_rate": 3.902190481428307e-05, + "loss": 2.1623, + "step": 7588000 + }, + { + "epoch": 21.97, + "learning_rate": 3.9021181166635795e-05, + "loss": 2.1577, + "step": 7588500 + }, + { + "epoch": 21.97, + "learning_rate": 3.902045896628381e-05, + "loss": 2.2023, + "step": 7589000 + }, + { + "epoch": 21.97, + "learning_rate": 3.901973531863653e-05, + "loss": 2.1655, + "step": 7589500 + }, + { + "epoch": 21.97, + "learning_rate": 3.9019011670989255e-05, + "loss": 2.1756, + "step": 7590000 + }, + { + "epoch": 21.97, + "learning_rate": 3.901828947063728e-05, + "loss": 2.1583, + "step": 7590500 + }, + { + "epoch": 21.97, + "learning_rate": 3.901756582299e-05, + "loss": 2.1931, + "step": 7591000 + }, + { + "epoch": 21.97, + "learning_rate": 3.901684217534272e-05, + "loss": 2.1851, + "step": 7591500 + }, + { + "epoch": 21.98, + "learning_rate": 3.9016118527695444e-05, + "loss": 2.1946, + "step": 7592000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901539488004817e-05, + "loss": 2.1678, + "step": 7592500 + }, + { + "epoch": 21.98, + "learning_rate": 3.901467123240089e-05, + "loss": 2.1697, + "step": 7593000 + }, + { + "epoch": 21.98, + "learning_rate": 3.901394758475361e-05, + "loss": 2.1563, + "step": 7593500 + }, + { + "epoch": 21.98, + "learning_rate": 3.9013225384401633e-05, + "loss": 2.1698, + "step": 7594000 + }, + { + "epoch": 21.98, + "learning_rate": 3.9012501736754356e-05, + "loss": 2.1727, + "step": 7594500 + }, + { + "epoch": 21.98, + "learning_rate": 3.901177808910708e-05, + "loss": 2.1725, + "step": 7595000 + }, + { + "epoch": 21.99, + "learning_rate": 3.901105444145981e-05, + "loss": 2.171, + "step": 7595500 + }, + { + "epoch": 21.99, + "learning_rate": 3.901033079381253e-05, + "loss": 2.1822, + "step": 7596000 + }, + { + "epoch": 21.99, + "learning_rate": 3.900960714616525e-05, + "loss": 2.1791, + "step": 7596500 + }, + { + "epoch": 21.99, + "learning_rate": 3.9008883498517974e-05, + "loss": 2.1904, + "step": 7597000 + }, + { + "epoch": 21.99, + "learning_rate": 3.9008159850870696e-05, + "loss": 2.1842, + "step": 7597500 + }, + { + "epoch": 21.99, + "learning_rate": 3.900743620322342e-05, + "loss": 2.1409, + "step": 7598000 + }, + { + "epoch": 21.99, + "learning_rate": 3.9006714002871434e-05, + "loss": 2.1635, + "step": 7598500 + }, + { + "epoch": 22.0, + "learning_rate": 3.9005990355224156e-05, + "loss": 2.1908, + "step": 7599000 + }, + { + "epoch": 22.0, + "learning_rate": 3.9005266707576885e-05, + "loss": 2.179, + "step": 7599500 + }, + { + "epoch": 22.0, + "learning_rate": 3.900454305992961e-05, + "loss": 2.1825, + "step": 7600000 + }, + { + "epoch": 22.0, + "eval_accuracy": 0.6618738376038161, + "eval_accuracy_mlm": 0.6256257501839781, + "eval_accuracy_nsp": 0.8561273734721095, + "eval_loss": 2.2176835536956787, + "eval_runtime": 331.6809, + "eval_samples_per_second": 1315.68, + "eval_steps_per_second": 54.821, + "step": 7600384 + }, + { + "epoch": 22.0, + "learning_rate": 3.900381941228233e-05, + "loss": 2.1843, + "step": 7600500 + }, + { + "epoch": 22.0, + "learning_rate": 3.900309576463505e-05, + "loss": 2.1525, + "step": 7601000 + }, + { + "epoch": 22.0, + "learning_rate": 3.900237356428307e-05, + "loss": 2.1665, + "step": 7601500 + }, + { + "epoch": 22.0, + "learning_rate": 3.900164991663579e-05, + "loss": 2.1588, + "step": 7602000 + }, + { + "epoch": 22.01, + "learning_rate": 3.900092626898851e-05, + "loss": 2.1481, + "step": 7602500 + }, + { + "epoch": 22.01, + "learning_rate": 3.900020262134124e-05, + "loss": 2.1454, + "step": 7603000 + }, + { + "epoch": 22.01, + "learning_rate": 3.899947897369396e-05, + "loss": 2.1571, + "step": 7603500 + }, + { + "epoch": 22.01, + "learning_rate": 3.8998755326046685e-05, + "loss": 2.1507, + "step": 7604000 + }, + { + "epoch": 22.01, + "learning_rate": 3.899803312569471e-05, + "loss": 2.1619, + "step": 7604500 + }, + { + "epoch": 22.01, + "learning_rate": 3.899730947804743e-05, + "loss": 2.1628, + "step": 7605000 + }, + { + "epoch": 22.01, + "learning_rate": 3.899658583040015e-05, + "loss": 2.1625, + "step": 7605500 + }, + { + "epoch": 22.02, + "learning_rate": 3.8995862182752874e-05, + "loss": 2.1462, + "step": 7606000 + }, + { + "epoch": 22.02, + "learning_rate": 3.899513998240089e-05, + "loss": 2.1624, + "step": 7606500 + }, + { + "epoch": 22.02, + "learning_rate": 3.899441778204891e-05, + "loss": 2.1682, + "step": 7607000 + }, + { + "epoch": 22.02, + "learning_rate": 3.8993694134401634e-05, + "loss": 2.1578, + "step": 7607500 + }, + { + "epoch": 22.02, + "learning_rate": 3.8992970486754356e-05, + "loss": 2.1436, + "step": 7608000 + }, + { + "epoch": 22.02, + "learning_rate": 3.899224683910708e-05, + "loss": 2.1512, + "step": 7608500 + }, + { + "epoch": 22.02, + "learning_rate": 3.89915231914598e-05, + "loss": 2.1432, + "step": 7609000 + }, + { + "epoch": 22.03, + "learning_rate": 3.899079954381252e-05, + "loss": 2.1692, + "step": 7609500 + }, + { + "epoch": 22.03, + "learning_rate": 3.8990075896165245e-05, + "loss": 2.1634, + "step": 7610000 + }, + { + "epoch": 22.03, + "learning_rate": 3.8989352248517974e-05, + "loss": 2.1887, + "step": 7610500 + }, + { + "epoch": 22.03, + "learning_rate": 3.898863004816599e-05, + "loss": 2.1697, + "step": 7611000 + }, + { + "epoch": 22.03, + "learning_rate": 3.898790640051871e-05, + "loss": 2.1452, + "step": 7611500 + }, + { + "epoch": 22.03, + "learning_rate": 3.8987182752871434e-05, + "loss": 2.1169, + "step": 7612000 + }, + { + "epoch": 22.04, + "learning_rate": 3.8986459105224163e-05, + "loss": 2.1549, + "step": 7612500 + }, + { + "epoch": 22.04, + "learning_rate": 3.898573690487218e-05, + "loss": 2.1482, + "step": 7613000 + }, + { + "epoch": 22.04, + "learning_rate": 3.89850132572249e-05, + "loss": 2.1528, + "step": 7613500 + }, + { + "epoch": 22.04, + "learning_rate": 3.8984289609577623e-05, + "loss": 2.1622, + "step": 7614000 + }, + { + "epoch": 22.04, + "learning_rate": 3.8983565961930346e-05, + "loss": 2.1683, + "step": 7614500 + }, + { + "epoch": 22.04, + "learning_rate": 3.898284231428307e-05, + "loss": 2.1819, + "step": 7615000 + }, + { + "epoch": 22.04, + "learning_rate": 3.898211866663579e-05, + "loss": 2.1598, + "step": 7615500 + }, + { + "epoch": 22.05, + "learning_rate": 3.898139501898851e-05, + "loss": 2.1426, + "step": 7616000 + }, + { + "epoch": 22.05, + "learning_rate": 3.8980671371341235e-05, + "loss": 2.1521, + "step": 7616500 + }, + { + "epoch": 22.05, + "learning_rate": 3.8979947723693964e-05, + "loss": 2.1691, + "step": 7617000 + }, + { + "epoch": 22.05, + "learning_rate": 3.897922552334198e-05, + "loss": 2.1551, + "step": 7617500 + }, + { + "epoch": 22.05, + "learning_rate": 3.897850187569471e-05, + "loss": 2.156, + "step": 7618000 + }, + { + "epoch": 22.05, + "learning_rate": 3.8977779675342724e-05, + "loss": 2.1732, + "step": 7618500 + }, + { + "epoch": 22.05, + "learning_rate": 3.8977056027695446e-05, + "loss": 2.1632, + "step": 7619000 + }, + { + "epoch": 22.06, + "learning_rate": 3.897633238004817e-05, + "loss": 2.1695, + "step": 7619500 + }, + { + "epoch": 22.06, + "learning_rate": 3.897560873240089e-05, + "loss": 2.1729, + "step": 7620000 + }, + { + "epoch": 22.06, + "learning_rate": 3.8974887979344206e-05, + "loss": 2.1893, + "step": 7620500 + }, + { + "epoch": 22.06, + "learning_rate": 3.897416433169693e-05, + "loss": 2.1657, + "step": 7621000 + }, + { + "epoch": 22.06, + "learning_rate": 3.897344068404965e-05, + "loss": 2.1659, + "step": 7621500 + }, + { + "epoch": 22.06, + "learning_rate": 3.897271703640237e-05, + "loss": 2.1542, + "step": 7622000 + }, + { + "epoch": 22.06, + "learning_rate": 3.8971993388755095e-05, + "loss": 2.1733, + "step": 7622500 + }, + { + "epoch": 22.07, + "learning_rate": 3.897127118840311e-05, + "loss": 2.1531, + "step": 7623000 + }, + { + "epoch": 22.07, + "learning_rate": 3.897054754075584e-05, + "loss": 2.155, + "step": 7623500 + }, + { + "epoch": 22.07, + "learning_rate": 3.896982389310856e-05, + "loss": 2.1917, + "step": 7624000 + }, + { + "epoch": 22.07, + "learning_rate": 3.8969100245461284e-05, + "loss": 2.1622, + "step": 7624500 + }, + { + "epoch": 22.07, + "learning_rate": 3.8968376597814006e-05, + "loss": 2.1771, + "step": 7625000 + }, + { + "epoch": 22.07, + "learning_rate": 3.896765295016673e-05, + "loss": 2.1242, + "step": 7625500 + }, + { + "epoch": 22.07, + "learning_rate": 3.896692930251946e-05, + "loss": 2.1531, + "step": 7626000 + }, + { + "epoch": 22.08, + "learning_rate": 3.896620565487218e-05, + "loss": 2.1461, + "step": 7626500 + }, + { + "epoch": 22.08, + "learning_rate": 3.89654820072249e-05, + "loss": 2.1461, + "step": 7627000 + }, + { + "epoch": 22.08, + "learning_rate": 3.896475980687292e-05, + "loss": 2.1567, + "step": 7627500 + }, + { + "epoch": 22.08, + "learning_rate": 3.896403615922564e-05, + "loss": 2.157, + "step": 7628000 + }, + { + "epoch": 22.08, + "learning_rate": 3.896331395887366e-05, + "loss": 2.1644, + "step": 7628500 + }, + { + "epoch": 22.08, + "learning_rate": 3.8962590311226384e-05, + "loss": 2.1523, + "step": 7629000 + }, + { + "epoch": 22.08, + "learning_rate": 3.89618681108744e-05, + "loss": 2.1733, + "step": 7629500 + }, + { + "epoch": 22.09, + "learning_rate": 3.896114446322712e-05, + "loss": 2.1623, + "step": 7630000 + }, + { + "epoch": 22.09, + "learning_rate": 3.8960420815579844e-05, + "loss": 2.164, + "step": 7630500 + }, + { + "epoch": 22.09, + "learning_rate": 3.895969716793257e-05, + "loss": 2.1664, + "step": 7631000 + }, + { + "epoch": 22.09, + "learning_rate": 3.895897352028529e-05, + "loss": 2.1459, + "step": 7631500 + }, + { + "epoch": 22.09, + "learning_rate": 3.895824987263801e-05, + "loss": 2.157, + "step": 7632000 + }, + { + "epoch": 22.09, + "learning_rate": 3.895752622499074e-05, + "loss": 2.1557, + "step": 7632500 + }, + { + "epoch": 22.09, + "learning_rate": 3.895680257734346e-05, + "loss": 2.1615, + "step": 7633000 + }, + { + "epoch": 22.1, + "learning_rate": 3.895607892969619e-05, + "loss": 2.1445, + "step": 7633500 + }, + { + "epoch": 22.1, + "learning_rate": 3.8955355282048914e-05, + "loss": 2.1793, + "step": 7634000 + }, + { + "epoch": 22.1, + "learning_rate": 3.8954631634401636e-05, + "loss": 2.1503, + "step": 7634500 + }, + { + "epoch": 22.1, + "learning_rate": 3.895390798675436e-05, + "loss": 2.1457, + "step": 7635000 + }, + { + "epoch": 22.1, + "learning_rate": 3.895318433910708e-05, + "loss": 2.1628, + "step": 7635500 + }, + { + "epoch": 22.1, + "learning_rate": 3.8952462138755096e-05, + "loss": 2.1451, + "step": 7636000 + }, + { + "epoch": 22.1, + "learning_rate": 3.895173849110782e-05, + "loss": 2.1431, + "step": 7636500 + }, + { + "epoch": 22.11, + "learning_rate": 3.895101484346054e-05, + "loss": 2.1634, + "step": 7637000 + }, + { + "epoch": 22.11, + "learning_rate": 3.895029119581326e-05, + "loss": 2.1614, + "step": 7637500 + }, + { + "epoch": 22.11, + "learning_rate": 3.894956754816599e-05, + "loss": 2.163, + "step": 7638000 + }, + { + "epoch": 22.11, + "learning_rate": 3.89488467951093e-05, + "loss": 2.1657, + "step": 7638500 + }, + { + "epoch": 22.11, + "learning_rate": 3.894812314746202e-05, + "loss": 2.1856, + "step": 7639000 + }, + { + "epoch": 22.11, + "learning_rate": 3.8947399499814745e-05, + "loss": 2.1445, + "step": 7639500 + }, + { + "epoch": 22.11, + "learning_rate": 3.894667585216747e-05, + "loss": 2.192, + "step": 7640000 + }, + { + "epoch": 22.12, + "learning_rate": 3.894595220452019e-05, + "loss": 2.1555, + "step": 7640500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894522855687292e-05, + "loss": 2.1486, + "step": 7641000 + }, + { + "epoch": 22.12, + "learning_rate": 3.894450490922564e-05, + "loss": 2.1745, + "step": 7641500 + }, + { + "epoch": 22.12, + "learning_rate": 3.894378126157836e-05, + "loss": 2.1469, + "step": 7642000 + }, + { + "epoch": 22.12, + "learning_rate": 3.8943059061226385e-05, + "loss": 2.1672, + "step": 7642500 + }, + { + "epoch": 22.12, + "learning_rate": 3.89423368608744e-05, + "loss": 2.1663, + "step": 7643000 + }, + { + "epoch": 22.12, + "learning_rate": 3.894161321322712e-05, + "loss": 2.1638, + "step": 7643500 + }, + { + "epoch": 22.13, + "learning_rate": 3.8940889565579845e-05, + "loss": 2.1588, + "step": 7644000 + }, + { + "epoch": 22.13, + "learning_rate": 3.894016591793257e-05, + "loss": 2.163, + "step": 7644500 + }, + { + "epoch": 22.13, + "learning_rate": 3.893944227028529e-05, + "loss": 2.1767, + "step": 7645000 + }, + { + "epoch": 22.13, + "learning_rate": 3.893871862263801e-05, + "loss": 2.171, + "step": 7645500 + }, + { + "epoch": 22.13, + "learning_rate": 3.893799497499074e-05, + "loss": 2.1679, + "step": 7646000 + }, + { + "epoch": 22.13, + "learning_rate": 3.893727132734346e-05, + "loss": 2.1847, + "step": 7646500 + }, + { + "epoch": 22.13, + "learning_rate": 3.8936547679696185e-05, + "loss": 2.1575, + "step": 7647000 + }, + { + "epoch": 22.14, + "learning_rate": 3.893582403204891e-05, + "loss": 2.1725, + "step": 7647500 + }, + { + "epoch": 22.14, + "learning_rate": 3.893510038440163e-05, + "loss": 2.1721, + "step": 7648000 + }, + { + "epoch": 22.14, + "learning_rate": 3.893437673675436e-05, + "loss": 2.1598, + "step": 7648500 + }, + { + "epoch": 22.14, + "learning_rate": 3.893365308910708e-05, + "loss": 2.1628, + "step": 7649000 + }, + { + "epoch": 22.14, + "learning_rate": 3.89329308887551e-05, + "loss": 2.1704, + "step": 7649500 + }, + { + "epoch": 22.14, + "learning_rate": 3.893220724110782e-05, + "loss": 2.1719, + "step": 7650000 + }, + { + "epoch": 22.15, + "learning_rate": 3.893148359346054e-05, + "loss": 2.1402, + "step": 7650500 + }, + { + "epoch": 22.15, + "learning_rate": 3.893075994581326e-05, + "loss": 2.1626, + "step": 7651000 + }, + { + "epoch": 22.15, + "learning_rate": 3.893003629816599e-05, + "loss": 2.1506, + "step": 7651500 + }, + { + "epoch": 22.15, + "learning_rate": 3.8929312650518715e-05, + "loss": 2.1639, + "step": 7652000 + }, + { + "epoch": 22.15, + "learning_rate": 3.892858900287144e-05, + "loss": 2.1733, + "step": 7652500 + }, + { + "epoch": 22.15, + "learning_rate": 3.892786535522416e-05, + "loss": 2.1566, + "step": 7653000 + }, + { + "epoch": 22.15, + "learning_rate": 3.8927143154872175e-05, + "loss": 2.1792, + "step": 7653500 + }, + { + "epoch": 22.16, + "learning_rate": 3.89264195072249e-05, + "loss": 2.1474, + "step": 7654000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892569585957762e-05, + "loss": 2.1741, + "step": 7654500 + }, + { + "epoch": 22.16, + "learning_rate": 3.892497221193034e-05, + "loss": 2.1564, + "step": 7655000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892424856428307e-05, + "loss": 2.1549, + "step": 7655500 + }, + { + "epoch": 22.16, + "learning_rate": 3.8923527811226386e-05, + "loss": 2.1689, + "step": 7656000 + }, + { + "epoch": 22.16, + "learning_rate": 3.892280416357911e-05, + "loss": 2.1525, + "step": 7656500 + }, + { + "epoch": 22.16, + "learning_rate": 3.892208051593183e-05, + "loss": 2.1423, + "step": 7657000 + }, + { + "epoch": 22.17, + "learning_rate": 3.892135686828455e-05, + "loss": 2.1789, + "step": 7657500 + }, + { + "epoch": 22.17, + "learning_rate": 3.8920633220637275e-05, + "loss": 2.1807, + "step": 7658000 + }, + { + "epoch": 22.17, + "learning_rate": 3.891990957299e-05, + "loss": 2.1792, + "step": 7658500 + }, + { + "epoch": 22.17, + "learning_rate": 3.891918592534272e-05, + "loss": 2.1756, + "step": 7659000 + }, + { + "epoch": 22.17, + "learning_rate": 3.891846227769544e-05, + "loss": 2.1662, + "step": 7659500 + }, + { + "epoch": 22.17, + "learning_rate": 3.891773863004817e-05, + "loss": 2.1724, + "step": 7660000 + }, + { + "epoch": 22.17, + "learning_rate": 3.891701498240089e-05, + "loss": 2.147, + "step": 7660500 + }, + { + "epoch": 22.18, + "learning_rate": 3.8916291334753615e-05, + "loss": 2.1614, + "step": 7661000 + }, + { + "epoch": 22.18, + "learning_rate": 3.891556913440163e-05, + "loss": 2.1831, + "step": 7661500 + }, + { + "epoch": 22.18, + "learning_rate": 3.891484548675435e-05, + "loss": 2.1466, + "step": 7662000 + }, + { + "epoch": 22.18, + "learning_rate": 3.8914121839107075e-05, + "loss": 2.1531, + "step": 7662500 + }, + { + "epoch": 22.18, + "learning_rate": 3.8913398191459804e-05, + "loss": 2.1472, + "step": 7663000 + }, + { + "epoch": 22.18, + "learning_rate": 3.8912674543812526e-05, + "loss": 2.1663, + "step": 7663500 + }, + { + "epoch": 22.18, + "learning_rate": 3.891195089616525e-05, + "loss": 2.1634, + "step": 7664000 + }, + { + "epoch": 22.19, + "learning_rate": 3.891122869581327e-05, + "loss": 2.1659, + "step": 7664500 + }, + { + "epoch": 22.19, + "learning_rate": 3.891050504816599e-05, + "loss": 2.164, + "step": 7665000 + }, + { + "epoch": 22.19, + "learning_rate": 3.8909781400518715e-05, + "loss": 2.1755, + "step": 7665500 + }, + { + "epoch": 22.19, + "learning_rate": 3.890905775287144e-05, + "loss": 2.1684, + "step": 7666000 + }, + { + "epoch": 22.19, + "learning_rate": 3.890833555251945e-05, + "loss": 2.1736, + "step": 7666500 + }, + { + "epoch": 22.19, + "learning_rate": 3.8907611904872175e-05, + "loss": 2.1987, + "step": 7667000 + }, + { + "epoch": 22.19, + "learning_rate": 3.89068882572249e-05, + "loss": 2.1778, + "step": 7667500 + }, + { + "epoch": 22.2, + "learning_rate": 3.890616460957762e-05, + "loss": 2.1562, + "step": 7668000 + }, + { + "epoch": 22.2, + "learning_rate": 3.890544096193034e-05, + "loss": 2.1766, + "step": 7668500 + }, + { + "epoch": 22.2, + "learning_rate": 3.890471731428307e-05, + "loss": 2.1803, + "step": 7669000 + }, + { + "epoch": 22.2, + "learning_rate": 3.890399366663579e-05, + "loss": 2.1537, + "step": 7669500 + }, + { + "epoch": 22.2, + "learning_rate": 3.8903270018988516e-05, + "loss": 2.1523, + "step": 7670000 + }, + { + "epoch": 22.2, + "learning_rate": 3.8902546371341245e-05, + "loss": 2.1493, + "step": 7670500 + }, + { + "epoch": 22.2, + "learning_rate": 3.8901825618284553e-05, + "loss": 2.1617, + "step": 7671000 + }, + { + "epoch": 22.21, + "learning_rate": 3.8901101970637276e-05, + "loss": 2.1887, + "step": 7671500 + }, + { + "epoch": 22.21, + "learning_rate": 3.890037832299e-05, + "loss": 2.1946, + "step": 7672000 + }, + { + "epoch": 22.21, + "learning_rate": 3.889965467534272e-05, + "loss": 2.1536, + "step": 7672500 + }, + { + "epoch": 22.21, + "learning_rate": 3.889893102769544e-05, + "loss": 2.1721, + "step": 7673000 + }, + { + "epoch": 22.21, + "learning_rate": 3.889820738004817e-05, + "loss": 2.1846, + "step": 7673500 + }, + { + "epoch": 22.21, + "learning_rate": 3.889748517969619e-05, + "loss": 2.1547, + "step": 7674000 + }, + { + "epoch": 22.21, + "learning_rate": 3.889676153204891e-05, + "loss": 2.1656, + "step": 7674500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889603788440163e-05, + "loss": 2.1508, + "step": 7675000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889531568404965e-05, + "loss": 2.182, + "step": 7675500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889459203640237e-05, + "loss": 2.1695, + "step": 7676000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889386838875509e-05, + "loss": 2.1854, + "step": 7676500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889314474110782e-05, + "loss": 2.1706, + "step": 7677000 + }, + { + "epoch": 22.22, + "learning_rate": 3.889242109346054e-05, + "loss": 2.1534, + "step": 7677500 + }, + { + "epoch": 22.22, + "learning_rate": 3.889169744581327e-05, + "loss": 2.175, + "step": 7678000 + }, + { + "epoch": 22.23, + "learning_rate": 3.8890973798165994e-05, + "loss": 2.1651, + "step": 7678500 + }, + { + "epoch": 22.23, + "learning_rate": 3.8890250150518716e-05, + "loss": 2.1777, + "step": 7679000 + }, + { + "epoch": 22.23, + "learning_rate": 3.888952650287144e-05, + "loss": 2.1544, + "step": 7679500 + }, + { + "epoch": 22.23, + "learning_rate": 3.888880285522416e-05, + "loss": 2.1813, + "step": 7680000 + }, + { + "epoch": 22.23, + "learning_rate": 3.888807920757688e-05, + "loss": 2.1685, + "step": 7680500 + }, + { + "epoch": 22.23, + "learning_rate": 3.8887355559929605e-05, + "loss": 2.1689, + "step": 7681000 + }, + { + "epoch": 22.23, + "learning_rate": 3.888663335957762e-05, + "loss": 2.1667, + "step": 7681500 + }, + { + "epoch": 22.24, + "learning_rate": 3.888591115922564e-05, + "loss": 2.157, + "step": 7682000 + }, + { + "epoch": 22.24, + "learning_rate": 3.8885187511578365e-05, + "loss": 2.1786, + "step": 7682500 + }, + { + "epoch": 22.24, + "learning_rate": 3.888446386393109e-05, + "loss": 2.1723, + "step": 7683000 + }, + { + "epoch": 22.24, + "learning_rate": 3.888374021628381e-05, + "loss": 2.1667, + "step": 7683500 + }, + { + "epoch": 22.24, + "learning_rate": 3.888301656863653e-05, + "loss": 2.1395, + "step": 7684000 + }, + { + "epoch": 22.24, + "learning_rate": 3.8882292920989254e-05, + "loss": 2.1671, + "step": 7684500 + }, + { + "epoch": 22.24, + "learning_rate": 3.8881569273341976e-05, + "loss": 2.1683, + "step": 7685000 + }, + { + "epoch": 22.25, + "learning_rate": 3.8880845625694705e-05, + "loss": 2.1823, + "step": 7685500 + }, + { + "epoch": 22.25, + "learning_rate": 3.888012197804743e-05, + "loss": 2.179, + "step": 7686000 + }, + { + "epoch": 22.25, + "learning_rate": 3.887939833040015e-05, + "loss": 2.1606, + "step": 7686500 + }, + { + "epoch": 22.25, + "learning_rate": 3.887867613004817e-05, + "loss": 2.1783, + "step": 7687000 + }, + { + "epoch": 22.25, + "learning_rate": 3.887795392969619e-05, + "loss": 2.1652, + "step": 7687500 + }, + { + "epoch": 22.25, + "learning_rate": 3.887723028204891e-05, + "loss": 2.1588, + "step": 7688000 + }, + { + "epoch": 22.26, + "learning_rate": 3.887650663440163e-05, + "loss": 2.1628, + "step": 7688500 + }, + { + "epoch": 22.26, + "learning_rate": 3.8875782986754354e-05, + "loss": 2.171, + "step": 7689000 + }, + { + "epoch": 22.26, + "learning_rate": 3.887505933910708e-05, + "loss": 2.168, + "step": 7689500 + }, + { + "epoch": 22.26, + "learning_rate": 3.88743356914598e-05, + "loss": 2.1617, + "step": 7690000 + }, + { + "epoch": 22.26, + "learning_rate": 3.887361204381252e-05, + "loss": 2.1307, + "step": 7690500 + }, + { + "epoch": 22.26, + "learning_rate": 3.887288839616525e-05, + "loss": 2.1626, + "step": 7691000 + }, + { + "epoch": 22.26, + "learning_rate": 3.887216474851797e-05, + "loss": 2.149, + "step": 7691500 + }, + { + "epoch": 22.27, + "learning_rate": 3.887144254816599e-05, + "loss": 2.1757, + "step": 7692000 + }, + { + "epoch": 22.27, + "learning_rate": 3.887071890051871e-05, + "loss": 2.1794, + "step": 7692500 + }, + { + "epoch": 22.27, + "learning_rate": 3.886999525287144e-05, + "loss": 2.1467, + "step": 7693000 + }, + { + "epoch": 22.27, + "learning_rate": 3.886927160522416e-05, + "loss": 2.1599, + "step": 7693500 + }, + { + "epoch": 22.27, + "learning_rate": 3.8868547957576884e-05, + "loss": 2.157, + "step": 7694000 + }, + { + "epoch": 22.27, + "learning_rate": 3.88678257572249e-05, + "loss": 2.1619, + "step": 7694500 + }, + { + "epoch": 22.27, + "learning_rate": 3.886710210957762e-05, + "loss": 2.1628, + "step": 7695000 + }, + { + "epoch": 22.28, + "learning_rate": 3.886637846193035e-05, + "loss": 2.1698, + "step": 7695500 + }, + { + "epoch": 22.28, + "learning_rate": 3.886565481428307e-05, + "loss": 2.161, + "step": 7696000 + }, + { + "epoch": 22.28, + "learning_rate": 3.8864931166635795e-05, + "loss": 2.1825, + "step": 7696500 + }, + { + "epoch": 22.28, + "learning_rate": 3.886420896628381e-05, + "loss": 2.1718, + "step": 7697000 + }, + { + "epoch": 22.28, + "learning_rate": 3.886348531863653e-05, + "loss": 2.1692, + "step": 7697500 + }, + { + "epoch": 22.28, + "learning_rate": 3.8862761670989255e-05, + "loss": 2.148, + "step": 7698000 + }, + { + "epoch": 22.28, + "learning_rate": 3.886203802334198e-05, + "loss": 2.1812, + "step": 7698500 + }, + { + "epoch": 22.29, + "learning_rate": 3.886131582299e-05, + "loss": 2.1719, + "step": 7699000 + }, + { + "epoch": 22.29, + "learning_rate": 3.886059217534272e-05, + "loss": 2.1468, + "step": 7699500 + }, + { + "epoch": 22.29, + "learning_rate": 3.885986997499074e-05, + "loss": 2.1779, + "step": 7700000 + }, + { + "epoch": 22.29, + "learning_rate": 3.885914632734346e-05, + "loss": 2.1632, + "step": 7700500 + }, + { + "epoch": 22.29, + "learning_rate": 3.885842267969619e-05, + "loss": 2.1364, + "step": 7701000 + }, + { + "epoch": 22.29, + "learning_rate": 3.885769903204891e-05, + "loss": 2.1426, + "step": 7701500 + }, + { + "epoch": 22.29, + "learning_rate": 3.885697538440163e-05, + "loss": 2.1761, + "step": 7702000 + }, + { + "epoch": 22.3, + "learning_rate": 3.8856251736754355e-05, + "loss": 2.1409, + "step": 7702500 + }, + { + "epoch": 22.3, + "learning_rate": 3.885552808910708e-05, + "loss": 2.1685, + "step": 7703000 + }, + { + "epoch": 22.3, + "learning_rate": 3.88548044414598e-05, + "loss": 2.1555, + "step": 7703500 + }, + { + "epoch": 22.3, + "learning_rate": 3.885408079381252e-05, + "loss": 2.1595, + "step": 7704000 + }, + { + "epoch": 22.3, + "learning_rate": 3.885335714616525e-05, + "loss": 2.1716, + "step": 7704500 + }, + { + "epoch": 22.3, + "learning_rate": 3.8852634945813267e-05, + "loss": 2.1818, + "step": 7705000 + }, + { + "epoch": 22.3, + "learning_rate": 3.885191129816599e-05, + "loss": 2.1685, + "step": 7705500 + }, + { + "epoch": 22.31, + "learning_rate": 3.885118765051871e-05, + "loss": 2.1947, + "step": 7706000 + }, + { + "epoch": 22.31, + "learning_rate": 3.885046400287143e-05, + "loss": 2.1836, + "step": 7706500 + }, + { + "epoch": 22.31, + "learning_rate": 3.8849740355224155e-05, + "loss": 2.1672, + "step": 7707000 + }, + { + "epoch": 22.31, + "learning_rate": 3.884901670757688e-05, + "loss": 2.1584, + "step": 7707500 + }, + { + "epoch": 22.31, + "learning_rate": 3.884829305992961e-05, + "loss": 2.1617, + "step": 7708000 + }, + { + "epoch": 22.31, + "learning_rate": 3.884756941228233e-05, + "loss": 2.1605, + "step": 7708500 + }, + { + "epoch": 22.31, + "learning_rate": 3.884684576463505e-05, + "loss": 2.1945, + "step": 7709000 + }, + { + "epoch": 22.32, + "learning_rate": 3.8846123564283073e-05, + "loss": 2.1833, + "step": 7709500 + }, + { + "epoch": 22.32, + "learning_rate": 3.8845399916635796e-05, + "loss": 2.1624, + "step": 7710000 + }, + { + "epoch": 22.32, + "learning_rate": 3.884467626898852e-05, + "loss": 2.1593, + "step": 7710500 + }, + { + "epoch": 22.32, + "learning_rate": 3.884395262134124e-05, + "loss": 2.1691, + "step": 7711000 + }, + { + "epoch": 22.32, + "learning_rate": 3.884322897369396e-05, + "loss": 2.1955, + "step": 7711500 + }, + { + "epoch": 22.32, + "learning_rate": 3.8842505326046685e-05, + "loss": 2.1726, + "step": 7712000 + }, + { + "epoch": 22.32, + "learning_rate": 3.884178167839941e-05, + "loss": 2.1731, + "step": 7712500 + }, + { + "epoch": 22.33, + "learning_rate": 3.884105803075213e-05, + "loss": 2.1569, + "step": 7713000 + }, + { + "epoch": 22.33, + "learning_rate": 3.884033583040015e-05, + "loss": 2.1857, + "step": 7713500 + }, + { + "epoch": 22.33, + "learning_rate": 3.8839612182752874e-05, + "loss": 2.144, + "step": 7714000 + }, + { + "epoch": 22.33, + "learning_rate": 3.883888998240089e-05, + "loss": 2.1481, + "step": 7714500 + }, + { + "epoch": 22.33, + "learning_rate": 3.883816633475361e-05, + "loss": 2.1781, + "step": 7715000 + }, + { + "epoch": 22.33, + "learning_rate": 3.883744268710634e-05, + "loss": 2.1697, + "step": 7715500 + }, + { + "epoch": 22.33, + "learning_rate": 3.8836720486754356e-05, + "loss": 2.1551, + "step": 7716000 + }, + { + "epoch": 22.34, + "learning_rate": 3.883599828640238e-05, + "loss": 2.1846, + "step": 7716500 + }, + { + "epoch": 22.34, + "learning_rate": 3.88352746387551e-05, + "loss": 2.1514, + "step": 7717000 + }, + { + "epoch": 22.34, + "learning_rate": 3.883455099110782e-05, + "loss": 2.1658, + "step": 7717500 + }, + { + "epoch": 22.34, + "learning_rate": 3.8833827343460545e-05, + "loss": 2.1655, + "step": 7718000 + }, + { + "epoch": 22.34, + "learning_rate": 3.883310369581327e-05, + "loss": 2.1625, + "step": 7718500 + }, + { + "epoch": 22.34, + "learning_rate": 3.883238004816599e-05, + "loss": 2.174, + "step": 7719000 + }, + { + "epoch": 22.34, + "learning_rate": 3.883165640051871e-05, + "loss": 2.1712, + "step": 7719500 + }, + { + "epoch": 22.35, + "learning_rate": 3.883093420016673e-05, + "loss": 2.1678, + "step": 7720000 + }, + { + "epoch": 22.35, + "learning_rate": 3.883021055251945e-05, + "loss": 2.1753, + "step": 7720500 + }, + { + "epoch": 22.35, + "learning_rate": 3.882948690487218e-05, + "loss": 2.18, + "step": 7721000 + }, + { + "epoch": 22.35, + "learning_rate": 3.88287632572249e-05, + "loss": 2.1806, + "step": 7721500 + }, + { + "epoch": 22.35, + "learning_rate": 3.882803960957762e-05, + "loss": 2.1847, + "step": 7722000 + }, + { + "epoch": 22.35, + "learning_rate": 3.8827315961930345e-05, + "loss": 2.1838, + "step": 7722500 + }, + { + "epoch": 22.35, + "learning_rate": 3.882659376157836e-05, + "loss": 2.1834, + "step": 7723000 + }, + { + "epoch": 22.36, + "learning_rate": 3.882587156122638e-05, + "loss": 2.1826, + "step": 7723500 + }, + { + "epoch": 22.36, + "learning_rate": 3.8825147913579105e-05, + "loss": 2.1659, + "step": 7724000 + }, + { + "epoch": 22.36, + "learning_rate": 3.882442426593183e-05, + "loss": 2.1518, + "step": 7724500 + }, + { + "epoch": 22.36, + "learning_rate": 3.882370061828455e-05, + "loss": 2.1595, + "step": 7725000 + }, + { + "epoch": 22.36, + "learning_rate": 3.882297697063728e-05, + "loss": 2.1806, + "step": 7725500 + }, + { + "epoch": 22.36, + "learning_rate": 3.882225332299e-05, + "loss": 2.19, + "step": 7726000 + }, + { + "epoch": 22.37, + "learning_rate": 3.882152967534272e-05, + "loss": 2.159, + "step": 7726500 + }, + { + "epoch": 22.37, + "learning_rate": 3.8820806027695446e-05, + "loss": 2.153, + "step": 7727000 + }, + { + "epoch": 22.37, + "learning_rate": 3.882008238004817e-05, + "loss": 2.1653, + "step": 7727500 + }, + { + "epoch": 22.37, + "learning_rate": 3.881936017969618e-05, + "loss": 2.1531, + "step": 7728000 + }, + { + "epoch": 22.37, + "learning_rate": 3.8818636532048906e-05, + "loss": 2.1792, + "step": 7728500 + }, + { + "epoch": 22.37, + "learning_rate": 3.881791288440163e-05, + "loss": 2.1612, + "step": 7729000 + }, + { + "epoch": 22.37, + "learning_rate": 3.881718923675435e-05, + "loss": 2.1576, + "step": 7729500 + }, + { + "epoch": 22.38, + "learning_rate": 3.881646558910708e-05, + "loss": 2.1608, + "step": 7730000 + }, + { + "epoch": 22.38, + "learning_rate": 3.88157419414598e-05, + "loss": 2.165, + "step": 7730500 + }, + { + "epoch": 22.38, + "learning_rate": 3.881501829381253e-05, + "loss": 2.1592, + "step": 7731000 + }, + { + "epoch": 22.38, + "learning_rate": 3.8814296093460546e-05, + "loss": 2.1864, + "step": 7731500 + }, + { + "epoch": 22.38, + "learning_rate": 3.881357244581327e-05, + "loss": 2.1649, + "step": 7732000 + }, + { + "epoch": 22.38, + "learning_rate": 3.881284879816599e-05, + "loss": 2.1827, + "step": 7732500 + }, + { + "epoch": 22.38, + "learning_rate": 3.881212515051871e-05, + "loss": 2.1716, + "step": 7733000 + }, + { + "epoch": 22.39, + "learning_rate": 3.8811401502871435e-05, + "loss": 2.195, + "step": 7733500 + }, + { + "epoch": 22.39, + "learning_rate": 3.881067930251945e-05, + "loss": 2.1707, + "step": 7734000 + }, + { + "epoch": 22.39, + "learning_rate": 3.880995565487218e-05, + "loss": 2.1745, + "step": 7734500 + }, + { + "epoch": 22.39, + "learning_rate": 3.88092320072249e-05, + "loss": 2.1814, + "step": 7735000 + }, + { + "epoch": 22.39, + "learning_rate": 3.8808508359577624e-05, + "loss": 2.1654, + "step": 7735500 + }, + { + "epoch": 22.39, + "learning_rate": 3.8807784711930346e-05, + "loss": 2.1683, + "step": 7736000 + }, + { + "epoch": 22.39, + "learning_rate": 3.880706106428307e-05, + "loss": 2.1475, + "step": 7736500 + }, + { + "epoch": 22.4, + "learning_rate": 3.8806338863931084e-05, + "loss": 2.1745, + "step": 7737000 + }, + { + "epoch": 22.4, + "learning_rate": 3.8805615216283806e-05, + "loss": 2.1819, + "step": 7737500 + }, + { + "epoch": 22.4, + "learning_rate": 3.8804891568636535e-05, + "loss": 2.1592, + "step": 7738000 + }, + { + "epoch": 22.4, + "learning_rate": 3.880416792098926e-05, + "loss": 2.1607, + "step": 7738500 + }, + { + "epoch": 22.4, + "learning_rate": 3.880344427334198e-05, + "loss": 2.1712, + "step": 7739000 + }, + { + "epoch": 22.4, + "learning_rate": 3.88027206256947e-05, + "loss": 2.1652, + "step": 7739500 + }, + { + "epoch": 22.4, + "learning_rate": 3.880199697804743e-05, + "loss": 2.1734, + "step": 7740000 + }, + { + "epoch": 22.41, + "learning_rate": 3.880127333040015e-05, + "loss": 2.1768, + "step": 7740500 + }, + { + "epoch": 22.41, + "learning_rate": 3.8800549682752875e-05, + "loss": 2.1662, + "step": 7741000 + }, + { + "epoch": 22.41, + "learning_rate": 3.87998260351056e-05, + "loss": 2.1706, + "step": 7741500 + }, + { + "epoch": 22.41, + "learning_rate": 3.879910238745832e-05, + "loss": 2.1749, + "step": 7742000 + }, + { + "epoch": 22.41, + "learning_rate": 3.879837873981104e-05, + "loss": 2.1996, + "step": 7742500 + }, + { + "epoch": 22.41, + "learning_rate": 3.8797655092163764e-05, + "loss": 2.1773, + "step": 7743000 + }, + { + "epoch": 22.41, + "learning_rate": 3.8796931444516486e-05, + "loss": 2.1481, + "step": 7743500 + }, + { + "epoch": 22.42, + "learning_rate": 3.879620779686921e-05, + "loss": 2.1503, + "step": 7744000 + }, + { + "epoch": 22.42, + "learning_rate": 3.879548414922193e-05, + "loss": 2.1451, + "step": 7744500 + }, + { + "epoch": 22.42, + "learning_rate": 3.879476050157466e-05, + "loss": 2.1784, + "step": 7745000 + }, + { + "epoch": 22.42, + "learning_rate": 3.879403830122268e-05, + "loss": 2.1722, + "step": 7745500 + }, + { + "epoch": 22.42, + "learning_rate": 3.87933161008707e-05, + "loss": 2.158, + "step": 7746000 + }, + { + "epoch": 22.42, + "learning_rate": 3.879259245322342e-05, + "loss": 2.1522, + "step": 7746500 + }, + { + "epoch": 22.42, + "learning_rate": 3.879186880557614e-05, + "loss": 2.166, + "step": 7747000 + }, + { + "epoch": 22.43, + "learning_rate": 3.879114660522416e-05, + "loss": 2.1847, + "step": 7747500 + }, + { + "epoch": 22.43, + "learning_rate": 3.879042295757688e-05, + "loss": 2.1777, + "step": 7748000 + }, + { + "epoch": 22.43, + "learning_rate": 3.878969930992961e-05, + "loss": 2.18, + "step": 7748500 + }, + { + "epoch": 22.43, + "learning_rate": 3.878897566228233e-05, + "loss": 2.1532, + "step": 7749000 + }, + { + "epoch": 22.43, + "learning_rate": 3.878825346193035e-05, + "loss": 2.1702, + "step": 7749500 + }, + { + "epoch": 22.43, + "learning_rate": 3.878752981428307e-05, + "loss": 2.1611, + "step": 7750000 + }, + { + "epoch": 22.43, + "learning_rate": 3.878680616663579e-05, + "loss": 2.1769, + "step": 7750500 + }, + { + "epoch": 22.44, + "learning_rate": 3.8786082518988514e-05, + "loss": 2.157, + "step": 7751000 + }, + { + "epoch": 22.44, + "learning_rate": 3.878536031863653e-05, + "loss": 2.1711, + "step": 7751500 + }, + { + "epoch": 22.44, + "learning_rate": 3.878463667098926e-05, + "loss": 2.1618, + "step": 7752000 + }, + { + "epoch": 22.44, + "learning_rate": 3.878391302334198e-05, + "loss": 2.1628, + "step": 7752500 + }, + { + "epoch": 22.44, + "learning_rate": 3.878318937569471e-05, + "loss": 2.1816, + "step": 7753000 + }, + { + "epoch": 22.44, + "learning_rate": 3.878246572804743e-05, + "loss": 2.1909, + "step": 7753500 + }, + { + "epoch": 22.44, + "learning_rate": 3.8781742080400154e-05, + "loss": 2.1519, + "step": 7754000 + }, + { + "epoch": 22.45, + "learning_rate": 3.8781018432752876e-05, + "loss": 2.1684, + "step": 7754500 + }, + { + "epoch": 22.45, + "learning_rate": 3.87802947851056e-05, + "loss": 2.1574, + "step": 7755000 + }, + { + "epoch": 22.45, + "learning_rate": 3.877957113745832e-05, + "loss": 2.1792, + "step": 7755500 + }, + { + "epoch": 22.45, + "learning_rate": 3.8778848937106336e-05, + "loss": 2.1617, + "step": 7756000 + }, + { + "epoch": 22.45, + "learning_rate": 3.877812673675436e-05, + "loss": 2.1458, + "step": 7756500 + }, + { + "epoch": 22.45, + "learning_rate": 3.877740308910708e-05, + "loss": 2.1746, + "step": 7757000 + }, + { + "epoch": 22.45, + "learning_rate": 3.87766794414598e-05, + "loss": 2.1571, + "step": 7757500 + }, + { + "epoch": 22.46, + "learning_rate": 3.8775955793812525e-05, + "loss": 2.1687, + "step": 7758000 + }, + { + "epoch": 22.46, + "learning_rate": 3.877523214616525e-05, + "loss": 2.1448, + "step": 7758500 + }, + { + "epoch": 22.46, + "learning_rate": 3.877450849851797e-05, + "loss": 2.1768, + "step": 7759000 + }, + { + "epoch": 22.46, + "learning_rate": 3.877378485087069e-05, + "loss": 2.18, + "step": 7759500 + }, + { + "epoch": 22.46, + "learning_rate": 3.877306265051871e-05, + "loss": 2.1709, + "step": 7760000 + }, + { + "epoch": 22.46, + "learning_rate": 3.8772339002871436e-05, + "loss": 2.1592, + "step": 7760500 + }, + { + "epoch": 22.46, + "learning_rate": 3.877161535522416e-05, + "loss": 2.1725, + "step": 7761000 + }, + { + "epoch": 22.47, + "learning_rate": 3.877089170757688e-05, + "loss": 2.1799, + "step": 7761500 + }, + { + "epoch": 22.47, + "learning_rate": 3.877016805992961e-05, + "loss": 2.167, + "step": 7762000 + }, + { + "epoch": 22.47, + "learning_rate": 3.8769445859577625e-05, + "loss": 2.168, + "step": 7762500 + }, + { + "epoch": 22.47, + "learning_rate": 3.876872221193035e-05, + "loss": 2.1792, + "step": 7763000 + }, + { + "epoch": 22.47, + "learning_rate": 3.876799856428307e-05, + "loss": 2.1752, + "step": 7763500 + }, + { + "epoch": 22.47, + "learning_rate": 3.876727491663579e-05, + "loss": 2.1552, + "step": 7764000 + }, + { + "epoch": 22.48, + "learning_rate": 3.8766551268988514e-05, + "loss": 2.1467, + "step": 7764500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876582906863653e-05, + "loss": 2.1835, + "step": 7765000 + }, + { + "epoch": 22.48, + "learning_rate": 3.876510542098926e-05, + "loss": 2.1845, + "step": 7765500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876438177334198e-05, + "loss": 2.1669, + "step": 7766000 + }, + { + "epoch": 22.48, + "learning_rate": 3.8763658125694703e-05, + "loss": 2.1936, + "step": 7766500 + }, + { + "epoch": 22.48, + "learning_rate": 3.876293592534272e-05, + "loss": 2.1729, + "step": 7767000 + }, + { + "epoch": 22.48, + "learning_rate": 3.876221227769544e-05, + "loss": 2.1918, + "step": 7767500 + }, + { + "epoch": 22.49, + "learning_rate": 3.876148863004817e-05, + "loss": 2.1522, + "step": 7768000 + }, + { + "epoch": 22.49, + "learning_rate": 3.876076498240089e-05, + "loss": 2.175, + "step": 7768500 + }, + { + "epoch": 22.49, + "learning_rate": 3.8760041334753615e-05, + "loss": 2.1644, + "step": 7769000 + }, + { + "epoch": 22.49, + "learning_rate": 3.875931913440164e-05, + "loss": 2.1704, + "step": 7769500 + }, + { + "epoch": 22.49, + "learning_rate": 3.875859548675436e-05, + "loss": 2.1685, + "step": 7770000 + }, + { + "epoch": 22.49, + "learning_rate": 3.875787183910708e-05, + "loss": 2.1617, + "step": 7770500 + }, + { + "epoch": 22.49, + "learning_rate": 3.8757148191459804e-05, + "loss": 2.1854, + "step": 7771000 + }, + { + "epoch": 22.5, + "learning_rate": 3.8756424543812526e-05, + "loss": 2.1846, + "step": 7771500 + }, + { + "epoch": 22.5, + "learning_rate": 3.875570089616525e-05, + "loss": 2.1777, + "step": 7772000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875497724851797e-05, + "loss": 2.1667, + "step": 7772500 + }, + { + "epoch": 22.5, + "learning_rate": 3.8754256495461286e-05, + "loss": 2.1533, + "step": 7773000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875353284781401e-05, + "loss": 2.1757, + "step": 7773500 + }, + { + "epoch": 22.5, + "learning_rate": 3.875280920016673e-05, + "loss": 2.1791, + "step": 7774000 + }, + { + "epoch": 22.5, + "learning_rate": 3.875208555251945e-05, + "loss": 2.1627, + "step": 7774500 + }, + { + "epoch": 22.51, + "learning_rate": 3.8751361904872175e-05, + "loss": 2.1564, + "step": 7775000 + }, + { + "epoch": 22.51, + "learning_rate": 3.8750638257224904e-05, + "loss": 2.2073, + "step": 7775500 + }, + { + "epoch": 22.51, + "learning_rate": 3.8749914609577626e-05, + "loss": 2.1602, + "step": 7776000 + }, + { + "epoch": 22.51, + "learning_rate": 3.874919240922564e-05, + "loss": 2.2062, + "step": 7776500 + }, + { + "epoch": 22.51, + "learning_rate": 3.8748468761578364e-05, + "loss": 2.1559, + "step": 7777000 + }, + { + "epoch": 22.51, + "learning_rate": 3.8747745113931086e-05, + "loss": 2.162, + "step": 7777500 + }, + { + "epoch": 22.51, + "learning_rate": 3.874702146628381e-05, + "loss": 2.1691, + "step": 7778000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874629781863654e-05, + "loss": 2.1844, + "step": 7778500 + }, + { + "epoch": 22.52, + "learning_rate": 3.874557417098926e-05, + "loss": 2.1498, + "step": 7779000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874485052334198e-05, + "loss": 2.1582, + "step": 7779500 + }, + { + "epoch": 22.52, + "learning_rate": 3.8744126875694704e-05, + "loss": 2.188, + "step": 7780000 + }, + { + "epoch": 22.52, + "learning_rate": 3.8743403228047426e-05, + "loss": 2.176, + "step": 7780500 + }, + { + "epoch": 22.52, + "learning_rate": 3.874267958040015e-05, + "loss": 2.1601, + "step": 7781000 + }, + { + "epoch": 22.52, + "learning_rate": 3.874195593275287e-05, + "loss": 2.1624, + "step": 7781500 + }, + { + "epoch": 22.53, + "learning_rate": 3.874123228510559e-05, + "loss": 2.1615, + "step": 7782000 + }, + { + "epoch": 22.53, + "learning_rate": 3.8740508637458315e-05, + "loss": 2.1485, + "step": 7782500 + }, + { + "epoch": 22.53, + "learning_rate": 3.873978643710634e-05, + "loss": 2.1623, + "step": 7783000 + }, + { + "epoch": 22.53, + "learning_rate": 3.873906278945906e-05, + "loss": 2.1976, + "step": 7783500 + }, + { + "epoch": 22.53, + "learning_rate": 3.873834058910708e-05, + "loss": 2.1814, + "step": 7784000 + }, + { + "epoch": 22.53, + "learning_rate": 3.8737616941459804e-05, + "loss": 2.1771, + "step": 7784500 + }, + { + "epoch": 22.53, + "learning_rate": 3.873689329381253e-05, + "loss": 2.1463, + "step": 7785000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873616964616525e-05, + "loss": 2.1693, + "step": 7785500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873544599851797e-05, + "loss": 2.189, + "step": 7786000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873472379816599e-05, + "loss": 2.1608, + "step": 7786500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873400015051871e-05, + "loss": 2.167, + "step": 7787000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873327650287144e-05, + "loss": 2.1885, + "step": 7787500 + }, + { + "epoch": 22.54, + "learning_rate": 3.873255285522416e-05, + "loss": 2.1841, + "step": 7788000 + }, + { + "epoch": 22.54, + "learning_rate": 3.873182920757688e-05, + "loss": 2.1374, + "step": 7788500 + }, + { + "epoch": 22.55, + "learning_rate": 3.8731105559929605e-05, + "loss": 2.1823, + "step": 7789000 + }, + { + "epoch": 22.55, + "learning_rate": 3.873038191228233e-05, + "loss": 2.186, + "step": 7789500 + }, + { + "epoch": 22.55, + "learning_rate": 3.872965826463505e-05, + "loss": 2.1766, + "step": 7790000 + }, + { + "epoch": 22.55, + "learning_rate": 3.872893461698778e-05, + "loss": 2.1684, + "step": 7790500 + }, + { + "epoch": 22.55, + "learning_rate": 3.8728212416635794e-05, + "loss": 2.1664, + "step": 7791000 + }, + { + "epoch": 22.55, + "learning_rate": 3.8727488768988516e-05, + "loss": 2.1762, + "step": 7791500 + }, + { + "epoch": 22.55, + "learning_rate": 3.872676512134124e-05, + "loss": 2.1775, + "step": 7792000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872604292098926e-05, + "loss": 2.1771, + "step": 7792500 + }, + { + "epoch": 22.56, + "learning_rate": 3.872531927334198e-05, + "loss": 2.1838, + "step": 7793000 + }, + { + "epoch": 22.56, + "learning_rate": 3.8724595625694705e-05, + "loss": 2.1716, + "step": 7793500 + }, + { + "epoch": 22.56, + "learning_rate": 3.872387342534272e-05, + "loss": 2.1574, + "step": 7794000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872314977769544e-05, + "loss": 2.1901, + "step": 7794500 + }, + { + "epoch": 22.56, + "learning_rate": 3.8722426130048165e-05, + "loss": 2.1842, + "step": 7795000 + }, + { + "epoch": 22.56, + "learning_rate": 3.872170248240089e-05, + "loss": 2.1366, + "step": 7795500 + }, + { + "epoch": 22.57, + "learning_rate": 3.8720978834753616e-05, + "loss": 2.1581, + "step": 7796000 + }, + { + "epoch": 22.57, + "learning_rate": 3.872025663440163e-05, + "loss": 2.1783, + "step": 7796500 + }, + { + "epoch": 22.57, + "learning_rate": 3.8719532986754354e-05, + "loss": 2.1952, + "step": 7797000 + }, + { + "epoch": 22.57, + "learning_rate": 3.871881078640237e-05, + "loss": 2.1839, + "step": 7797500 + }, + { + "epoch": 22.57, + "learning_rate": 3.871808713875509e-05, + "loss": 2.1815, + "step": 7798000 + }, + { + "epoch": 22.57, + "learning_rate": 3.871736349110782e-05, + "loss": 2.1613, + "step": 7798500 + }, + { + "epoch": 22.57, + "learning_rate": 3.871663984346054e-05, + "loss": 2.1996, + "step": 7799000 + }, + { + "epoch": 22.58, + "learning_rate": 3.8715916195813265e-05, + "loss": 2.1669, + "step": 7799500 + }, + { + "epoch": 22.58, + "learning_rate": 3.871519254816599e-05, + "loss": 2.2006, + "step": 7800000 + }, + { + "epoch": 22.58, + "learning_rate": 3.8714468900518717e-05, + "loss": 2.152, + "step": 7800500 + }, + { + "epoch": 22.58, + "learning_rate": 3.871374525287144e-05, + "loss": 2.1787, + "step": 7801000 + }, + { + "epoch": 22.58, + "learning_rate": 3.871302160522416e-05, + "loss": 2.1958, + "step": 7801500 + }, + { + "epoch": 22.58, + "learning_rate": 3.871229795757688e-05, + "loss": 2.1455, + "step": 7802000 + }, + { + "epoch": 22.59, + "learning_rate": 3.8711574309929605e-05, + "loss": 2.1723, + "step": 7802500 + }, + { + "epoch": 22.59, + "learning_rate": 3.871085066228233e-05, + "loss": 2.1608, + "step": 7803000 + }, + { + "epoch": 22.59, + "learning_rate": 3.871012701463505e-05, + "loss": 2.1673, + "step": 7803500 + }, + { + "epoch": 22.59, + "learning_rate": 3.8709404814283066e-05, + "loss": 2.141, + "step": 7804000 + }, + { + "epoch": 22.59, + "learning_rate": 3.870868116663579e-05, + "loss": 2.1554, + "step": 7804500 + }, + { + "epoch": 22.59, + "learning_rate": 3.870795751898852e-05, + "loss": 2.1863, + "step": 7805000 + }, + { + "epoch": 22.59, + "learning_rate": 3.870723387134124e-05, + "loss": 2.1736, + "step": 7805500 + }, + { + "epoch": 22.6, + "learning_rate": 3.870651022369397e-05, + "loss": 2.1445, + "step": 7806000 + }, + { + "epoch": 22.6, + "learning_rate": 3.8705788023341984e-05, + "loss": 2.1581, + "step": 7806500 + }, + { + "epoch": 22.6, + "learning_rate": 3.8705064375694706e-05, + "loss": 2.1994, + "step": 7807000 + }, + { + "epoch": 22.6, + "learning_rate": 3.870434072804743e-05, + "loss": 2.1474, + "step": 7807500 + }, + { + "epoch": 22.6, + "learning_rate": 3.870361708040015e-05, + "loss": 2.1766, + "step": 7808000 + }, + { + "epoch": 22.6, + "learning_rate": 3.870289343275287e-05, + "loss": 2.1509, + "step": 7808500 + }, + { + "epoch": 22.6, + "learning_rate": 3.8702169785105595e-05, + "loss": 2.1494, + "step": 7809000 + }, + { + "epoch": 22.61, + "learning_rate": 3.870144613745832e-05, + "loss": 2.181, + "step": 7809500 + }, + { + "epoch": 22.61, + "learning_rate": 3.870072248981104e-05, + "loss": 2.1766, + "step": 7810000 + }, + { + "epoch": 22.61, + "learning_rate": 3.869999884216377e-05, + "loss": 2.1602, + "step": 7810500 + }, + { + "epoch": 22.61, + "learning_rate": 3.869927519451649e-05, + "loss": 2.1538, + "step": 7811000 + }, + { + "epoch": 22.61, + "learning_rate": 3.869855154686921e-05, + "loss": 2.1746, + "step": 7811500 + }, + { + "epoch": 22.61, + "learning_rate": 3.8697833688403115e-05, + "loss": 2.1944, + "step": 7812000 + }, + { + "epoch": 22.61, + "learning_rate": 3.869711004075584e-05, + "loss": 2.1946, + "step": 7812500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869638639310856e-05, + "loss": 2.1922, + "step": 7813000 + }, + { + "epoch": 22.62, + "learning_rate": 3.869566274546129e-05, + "loss": 2.1595, + "step": 7813500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869493909781401e-05, + "loss": 2.1679, + "step": 7814000 + }, + { + "epoch": 22.62, + "learning_rate": 3.8694216897462026e-05, + "loss": 2.201, + "step": 7814500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869349324981475e-05, + "loss": 2.1789, + "step": 7815000 + }, + { + "epoch": 22.62, + "learning_rate": 3.869276960216747e-05, + "loss": 2.1825, + "step": 7815500 + }, + { + "epoch": 22.62, + "learning_rate": 3.869204595452019e-05, + "loss": 2.17, + "step": 7816000 + }, + { + "epoch": 22.63, + "learning_rate": 3.8691322306872915e-05, + "loss": 2.1745, + "step": 7816500 + }, + { + "epoch": 22.63, + "learning_rate": 3.8690598659225644e-05, + "loss": 2.1779, + "step": 7817000 + }, + { + "epoch": 22.63, + "learning_rate": 3.8689875011578366e-05, + "loss": 2.1683, + "step": 7817500 + }, + { + "epoch": 22.63, + "learning_rate": 3.868915136393109e-05, + "loss": 2.1643, + "step": 7818000 + }, + { + "epoch": 22.63, + "learning_rate": 3.868842771628381e-05, + "loss": 2.1589, + "step": 7818500 + }, + { + "epoch": 22.63, + "learning_rate": 3.868770406863653e-05, + "loss": 2.1922, + "step": 7819000 + }, + { + "epoch": 22.63, + "learning_rate": 3.8686980420989255e-05, + "loss": 2.1725, + "step": 7819500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868625677334198e-05, + "loss": 2.1735, + "step": 7820000 + }, + { + "epoch": 22.64, + "learning_rate": 3.8685533125694707e-05, + "loss": 2.1706, + "step": 7820500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868480947804743e-05, + "loss": 2.1985, + "step": 7821000 + }, + { + "epoch": 22.64, + "learning_rate": 3.8684087277695444e-05, + "loss": 2.1585, + "step": 7821500 + }, + { + "epoch": 22.64, + "learning_rate": 3.8683363630048167e-05, + "loss": 2.1734, + "step": 7822000 + }, + { + "epoch": 22.64, + "learning_rate": 3.868264142969619e-05, + "loss": 2.179, + "step": 7822500 + }, + { + "epoch": 22.64, + "learning_rate": 3.868191778204891e-05, + "loss": 2.184, + "step": 7823000 + }, + { + "epoch": 22.65, + "learning_rate": 3.8681194134401633e-05, + "loss": 2.1694, + "step": 7823500 + }, + { + "epoch": 22.65, + "learning_rate": 3.8680470486754356e-05, + "loss": 2.1732, + "step": 7824000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867974683910708e-05, + "loss": 2.1599, + "step": 7824500 + }, + { + "epoch": 22.65, + "learning_rate": 3.86790231914598e-05, + "loss": 2.1651, + "step": 7825000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867829954381252e-05, + "loss": 2.1613, + "step": 7825500 + }, + { + "epoch": 22.65, + "learning_rate": 3.867757879075584e-05, + "loss": 2.1681, + "step": 7826000 + }, + { + "epoch": 22.65, + "learning_rate": 3.867685514310856e-05, + "loss": 2.1918, + "step": 7826500 + }, + { + "epoch": 22.66, + "learning_rate": 3.867613149546128e-05, + "loss": 2.1695, + "step": 7827000 + }, + { + "epoch": 22.66, + "learning_rate": 3.8675407847814005e-05, + "loss": 2.1918, + "step": 7827500 + }, + { + "epoch": 22.66, + "learning_rate": 3.867468420016673e-05, + "loss": 2.1921, + "step": 7828000 + }, + { + "epoch": 22.66, + "learning_rate": 3.8673960552519456e-05, + "loss": 2.18, + "step": 7828500 + }, + { + "epoch": 22.66, + "learning_rate": 3.867323690487218e-05, + "loss": 2.1813, + "step": 7829000 + }, + { + "epoch": 22.66, + "learning_rate": 3.8672514704520194e-05, + "loss": 2.1916, + "step": 7829500 + }, + { + "epoch": 22.66, + "learning_rate": 3.8671791056872916e-05, + "loss": 2.1613, + "step": 7830000 + }, + { + "epoch": 22.67, + "learning_rate": 3.8671067409225645e-05, + "loss": 2.1668, + "step": 7830500 + }, + { + "epoch": 22.67, + "learning_rate": 3.867034376157837e-05, + "loss": 2.185, + "step": 7831000 + }, + { + "epoch": 22.67, + "learning_rate": 3.866962011393109e-05, + "loss": 2.1922, + "step": 7831500 + }, + { + "epoch": 22.67, + "learning_rate": 3.866889646628381e-05, + "loss": 2.1818, + "step": 7832000 + }, + { + "epoch": 22.67, + "learning_rate": 3.8668172818636534e-05, + "loss": 2.1637, + "step": 7832500 + }, + { + "epoch": 22.67, + "learning_rate": 3.8667449170989256e-05, + "loss": 2.1558, + "step": 7833000 + }, + { + "epoch": 22.67, + "learning_rate": 3.866672552334198e-05, + "loss": 2.1697, + "step": 7833500 + }, + { + "epoch": 22.68, + "learning_rate": 3.86660018756947e-05, + "loss": 2.155, + "step": 7834000 + }, + { + "epoch": 22.68, + "learning_rate": 3.866527822804742e-05, + "loss": 2.2049, + "step": 7834500 + }, + { + "epoch": 22.68, + "learning_rate": 3.8664554580400145e-05, + "loss": 2.1896, + "step": 7835000 + }, + { + "epoch": 22.68, + "learning_rate": 3.8663830932752874e-05, + "loss": 2.1517, + "step": 7835500 + }, + { + "epoch": 22.68, + "learning_rate": 3.8663107285105596e-05, + "loss": 2.1678, + "step": 7836000 + }, + { + "epoch": 22.68, + "learning_rate": 3.866238508475362e-05, + "loss": 2.1557, + "step": 7836500 + }, + { + "epoch": 22.68, + "learning_rate": 3.8661662884401634e-05, + "loss": 2.1618, + "step": 7837000 + }, + { + "epoch": 22.69, + "learning_rate": 3.8660939236754356e-05, + "loss": 2.1806, + "step": 7837500 + }, + { + "epoch": 22.69, + "learning_rate": 3.866021558910708e-05, + "loss": 2.1697, + "step": 7838000 + }, + { + "epoch": 22.69, + "learning_rate": 3.86594919414598e-05, + "loss": 2.1788, + "step": 7838500 + }, + { + "epoch": 22.69, + "learning_rate": 3.865876829381252e-05, + "loss": 2.1632, + "step": 7839000 + }, + { + "epoch": 22.69, + "learning_rate": 3.8658044646165245e-05, + "loss": 2.1906, + "step": 7839500 + }, + { + "epoch": 22.69, + "learning_rate": 3.865732099851797e-05, + "loss": 2.1871, + "step": 7840000 + }, + { + "epoch": 22.7, + "learning_rate": 3.8656597350870697e-05, + "loss": 2.1827, + "step": 7840500 + }, + { + "epoch": 22.7, + "learning_rate": 3.865587370322342e-05, + "loss": 2.1924, + "step": 7841000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865515005557614e-05, + "loss": 2.1582, + "step": 7841500 + }, + { + "epoch": 22.7, + "learning_rate": 3.865442640792886e-05, + "loss": 2.1586, + "step": 7842000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865370420757688e-05, + "loss": 2.1745, + "step": 7842500 + }, + { + "epoch": 22.7, + "learning_rate": 3.865298055992961e-05, + "loss": 2.1928, + "step": 7843000 + }, + { + "epoch": 22.7, + "learning_rate": 3.865225691228233e-05, + "loss": 2.1668, + "step": 7843500 + }, + { + "epoch": 22.71, + "learning_rate": 3.865153326463505e-05, + "loss": 2.1708, + "step": 7844000 + }, + { + "epoch": 22.71, + "learning_rate": 3.8650809616987775e-05, + "loss": 2.155, + "step": 7844500 + }, + { + "epoch": 22.71, + "learning_rate": 3.86500874166358e-05, + "loss": 2.1662, + "step": 7845000 + }, + { + "epoch": 22.71, + "learning_rate": 3.864936376898852e-05, + "loss": 2.1554, + "step": 7845500 + }, + { + "epoch": 22.71, + "learning_rate": 3.864864012134124e-05, + "loss": 2.1818, + "step": 7846000 + }, + { + "epoch": 22.71, + "learning_rate": 3.8647916473693964e-05, + "loss": 2.1443, + "step": 7846500 + }, + { + "epoch": 22.71, + "learning_rate": 3.8647192826046686e-05, + "loss": 2.1567, + "step": 7847000 + }, + { + "epoch": 22.72, + "learning_rate": 3.864646917839941e-05, + "loss": 2.1653, + "step": 7847500 + }, + { + "epoch": 22.72, + "learning_rate": 3.8645746978047424e-05, + "loss": 2.1748, + "step": 7848000 + }, + { + "epoch": 22.72, + "learning_rate": 3.8645023330400146e-05, + "loss": 2.1683, + "step": 7848500 + }, + { + "epoch": 22.72, + "learning_rate": 3.864429968275287e-05, + "loss": 2.1805, + "step": 7849000 + }, + { + "epoch": 22.72, + "learning_rate": 3.86435760351056e-05, + "loss": 2.1885, + "step": 7849500 + }, + { + "epoch": 22.72, + "learning_rate": 3.864285238745832e-05, + "loss": 2.1443, + "step": 7850000 + }, + { + "epoch": 22.72, + "learning_rate": 3.864212873981105e-05, + "loss": 2.171, + "step": 7850500 + }, + { + "epoch": 22.73, + "learning_rate": 3.864140509216377e-05, + "loss": 2.1731, + "step": 7851000 + }, + { + "epoch": 22.73, + "learning_rate": 3.864068144451649e-05, + "loss": 2.1554, + "step": 7851500 + }, + { + "epoch": 22.73, + "learning_rate": 3.8639957796869215e-05, + "loss": 2.16, + "step": 7852000 + }, + { + "epoch": 22.73, + "learning_rate": 3.863923559651723e-05, + "loss": 2.1844, + "step": 7852500 + }, + { + "epoch": 22.73, + "learning_rate": 3.863851194886995e-05, + "loss": 2.1833, + "step": 7853000 + }, + { + "epoch": 22.73, + "learning_rate": 3.8637788301222675e-05, + "loss": 2.1734, + "step": 7853500 + }, + { + "epoch": 22.73, + "learning_rate": 3.86370646535754e-05, + "loss": 2.1769, + "step": 7854000 + }, + { + "epoch": 22.74, + "learning_rate": 3.863634390051871e-05, + "loss": 2.1706, + "step": 7854500 + }, + { + "epoch": 22.74, + "learning_rate": 3.8635620252871435e-05, + "loss": 2.1889, + "step": 7855000 + }, + { + "epoch": 22.74, + "learning_rate": 3.863489660522416e-05, + "loss": 2.1743, + "step": 7855500 + }, + { + "epoch": 22.74, + "learning_rate": 3.863417295757688e-05, + "loss": 2.1509, + "step": 7856000 + }, + { + "epoch": 22.74, + "learning_rate": 3.86334493099296e-05, + "loss": 2.1788, + "step": 7856500 + }, + { + "epoch": 22.74, + "learning_rate": 3.8632725662282324e-05, + "loss": 2.1895, + "step": 7857000 + }, + { + "epoch": 22.74, + "learning_rate": 3.8632002014635046e-05, + "loss": 2.1767, + "step": 7857500 + }, + { + "epoch": 22.75, + "learning_rate": 3.8631278366987775e-05, + "loss": 2.1783, + "step": 7858000 + }, + { + "epoch": 22.75, + "learning_rate": 3.86305561666358e-05, + "loss": 2.167, + "step": 7858500 + }, + { + "epoch": 22.75, + "learning_rate": 3.862983251898852e-05, + "loss": 2.1495, + "step": 7859000 + }, + { + "epoch": 22.75, + "learning_rate": 3.862910887134124e-05, + "loss": 2.1717, + "step": 7859500 + }, + { + "epoch": 22.75, + "learning_rate": 3.862838667098926e-05, + "loss": 2.177, + "step": 7860000 + }, + { + "epoch": 22.75, + "learning_rate": 3.862766302334198e-05, + "loss": 2.1671, + "step": 7860500 + }, + { + "epoch": 22.75, + "learning_rate": 3.86269393756947e-05, + "loss": 2.1805, + "step": 7861000 + }, + { + "epoch": 22.76, + "learning_rate": 3.8626215728047424e-05, + "loss": 2.171, + "step": 7861500 + }, + { + "epoch": 22.76, + "learning_rate": 3.862549208040015e-05, + "loss": 2.1875, + "step": 7862000 + }, + { + "epoch": 22.76, + "learning_rate": 3.862476988004817e-05, + "loss": 2.1924, + "step": 7862500 + }, + { + "epoch": 22.76, + "learning_rate": 3.862404623240089e-05, + "loss": 2.1711, + "step": 7863000 + }, + { + "epoch": 22.76, + "learning_rate": 3.8623322584753613e-05, + "loss": 2.172, + "step": 7863500 + }, + { + "epoch": 22.76, + "learning_rate": 3.8622598937106336e-05, + "loss": 2.1415, + "step": 7864000 + }, + { + "epoch": 22.76, + "learning_rate": 3.862187528945906e-05, + "loss": 2.1526, + "step": 7864500 + }, + { + "epoch": 22.77, + "learning_rate": 3.862115164181178e-05, + "loss": 2.1722, + "step": 7865000 + }, + { + "epoch": 22.77, + "learning_rate": 3.862042799416451e-05, + "loss": 2.174, + "step": 7865500 + }, + { + "epoch": 22.77, + "learning_rate": 3.861970434651723e-05, + "loss": 2.1651, + "step": 7866000 + }, + { + "epoch": 22.77, + "learning_rate": 3.8618980698869954e-05, + "loss": 2.1486, + "step": 7866500 + }, + { + "epoch": 22.77, + "learning_rate": 3.8618257051222676e-05, + "loss": 2.1296, + "step": 7867000 + }, + { + "epoch": 22.77, + "learning_rate": 3.86175334035754e-05, + "loss": 2.1898, + "step": 7867500 + }, + { + "epoch": 22.77, + "learning_rate": 3.861680975592813e-05, + "loss": 2.1796, + "step": 7868000 + }, + { + "epoch": 22.78, + "learning_rate": 3.861608755557614e-05, + "loss": 2.1765, + "step": 7868500 + }, + { + "epoch": 22.78, + "learning_rate": 3.8615363907928865e-05, + "loss": 2.1769, + "step": 7869000 + }, + { + "epoch": 22.78, + "learning_rate": 3.861464026028159e-05, + "loss": 2.1778, + "step": 7869500 + }, + { + "epoch": 22.78, + "learning_rate": 3.861391661263431e-05, + "loss": 2.1837, + "step": 7870000 + }, + { + "epoch": 22.78, + "learning_rate": 3.861319296498703e-05, + "loss": 2.1686, + "step": 7870500 + }, + { + "epoch": 22.78, + "learning_rate": 3.8612469317339754e-05, + "loss": 2.1737, + "step": 7871000 + }, + { + "epoch": 22.78, + "learning_rate": 3.8611745669692476e-05, + "loss": 2.1786, + "step": 7871500 + }, + { + "epoch": 22.79, + "learning_rate": 3.86110220220452e-05, + "loss": 2.1693, + "step": 7872000 + }, + { + "epoch": 22.79, + "learning_rate": 3.861029982169322e-05, + "loss": 2.1827, + "step": 7872500 + }, + { + "epoch": 22.79, + "learning_rate": 3.860957617404595e-05, + "loss": 2.165, + "step": 7873000 + }, + { + "epoch": 22.79, + "learning_rate": 3.860885252639867e-05, + "loss": 2.1688, + "step": 7873500 + }, + { + "epoch": 22.79, + "learning_rate": 3.860813032604669e-05, + "loss": 2.1755, + "step": 7874000 + }, + { + "epoch": 22.79, + "learning_rate": 3.860740667839941e-05, + "loss": 2.1641, + "step": 7874500 + }, + { + "epoch": 22.79, + "learning_rate": 3.860668303075213e-05, + "loss": 2.1942, + "step": 7875000 + }, + { + "epoch": 22.8, + "learning_rate": 3.8605959383104854e-05, + "loss": 2.1957, + "step": 7875500 + }, + { + "epoch": 22.8, + "learning_rate": 3.8605235735457576e-05, + "loss": 2.1693, + "step": 7876000 + }, + { + "epoch": 22.8, + "learning_rate": 3.86045135351056e-05, + "loss": 2.1746, + "step": 7876500 + }, + { + "epoch": 22.8, + "learning_rate": 3.8603791334753614e-05, + "loss": 2.147, + "step": 7877000 + }, + { + "epoch": 22.8, + "learning_rate": 3.8603067687106336e-05, + "loss": 2.1774, + "step": 7877500 + }, + { + "epoch": 22.8, + "learning_rate": 3.860234403945906e-05, + "loss": 2.175, + "step": 7878000 + }, + { + "epoch": 22.81, + "learning_rate": 3.860162039181178e-05, + "loss": 2.1627, + "step": 7878500 + }, + { + "epoch": 22.81, + "learning_rate": 3.86008967441645e-05, + "loss": 2.1579, + "step": 7879000 + }, + { + "epoch": 22.81, + "learning_rate": 3.8600174543812526e-05, + "loss": 2.1922, + "step": 7879500 + }, + { + "epoch": 22.81, + "learning_rate": 3.859945089616525e-05, + "loss": 2.1699, + "step": 7880000 + }, + { + "epoch": 22.81, + "learning_rate": 3.859872724851798e-05, + "loss": 2.1505, + "step": 7880500 + }, + { + "epoch": 22.81, + "learning_rate": 3.85980036008707e-05, + "loss": 2.1709, + "step": 7881000 + }, + { + "epoch": 22.81, + "learning_rate": 3.859727995322342e-05, + "loss": 2.1527, + "step": 7881500 + }, + { + "epoch": 22.82, + "learning_rate": 3.8596556305576143e-05, + "loss": 2.1695, + "step": 7882000 + }, + { + "epoch": 22.82, + "learning_rate": 3.8595832657928866e-05, + "loss": 2.1839, + "step": 7882500 + }, + { + "epoch": 22.82, + "learning_rate": 3.859510901028159e-05, + "loss": 2.1994, + "step": 7883000 + }, + { + "epoch": 22.82, + "learning_rate": 3.859438536263431e-05, + "loss": 2.1735, + "step": 7883500 + }, + { + "epoch": 22.82, + "learning_rate": 3.8593664609577626e-05, + "loss": 2.1946, + "step": 7884000 + }, + { + "epoch": 22.82, + "learning_rate": 3.859294096193035e-05, + "loss": 2.1678, + "step": 7884500 + }, + { + "epoch": 22.82, + "learning_rate": 3.859221731428307e-05, + "loss": 2.1744, + "step": 7885000 + }, + { + "epoch": 22.83, + "learning_rate": 3.8591495113931086e-05, + "loss": 2.1577, + "step": 7885500 + }, + { + "epoch": 22.83, + "learning_rate": 3.859077146628381e-05, + "loss": 2.1584, + "step": 7886000 + }, + { + "epoch": 22.83, + "learning_rate": 3.859004781863653e-05, + "loss": 2.1772, + "step": 7886500 + }, + { + "epoch": 22.83, + "learning_rate": 3.858932417098925e-05, + "loss": 2.1732, + "step": 7887000 + }, + { + "epoch": 22.83, + "learning_rate": 3.8588600523341975e-05, + "loss": 2.1525, + "step": 7887500 + }, + { + "epoch": 22.83, + "learning_rate": 3.8587876875694704e-05, + "loss": 2.1585, + "step": 7888000 + }, + { + "epoch": 22.83, + "learning_rate": 3.8587153228047426e-05, + "loss": 2.1552, + "step": 7888500 + }, + { + "epoch": 22.84, + "learning_rate": 3.8586429580400155e-05, + "loss": 2.1765, + "step": 7889000 + }, + { + "epoch": 22.84, + "learning_rate": 3.858570593275288e-05, + "loss": 2.1544, + "step": 7889500 + }, + { + "epoch": 22.84, + "learning_rate": 3.85849822851056e-05, + "loss": 2.1748, + "step": 7890000 + }, + { + "epoch": 22.84, + "learning_rate": 3.858425863745832e-05, + "loss": 2.1783, + "step": 7890500 + }, + { + "epoch": 22.84, + "learning_rate": 3.8583534989811044e-05, + "loss": 2.1664, + "step": 7891000 + }, + { + "epoch": 22.84, + "learning_rate": 3.858281278945906e-05, + "loss": 2.1422, + "step": 7891500 + }, + { + "epoch": 22.84, + "learning_rate": 3.858208914181178e-05, + "loss": 2.1638, + "step": 7892000 + }, + { + "epoch": 22.85, + "learning_rate": 3.8581366941459804e-05, + "loss": 2.1768, + "step": 7892500 + }, + { + "epoch": 22.85, + "learning_rate": 3.8580643293812526e-05, + "loss": 2.1856, + "step": 7893000 + }, + { + "epoch": 22.85, + "learning_rate": 3.857991964616525e-05, + "loss": 2.1817, + "step": 7893500 + }, + { + "epoch": 22.85, + "learning_rate": 3.857919599851797e-05, + "loss": 2.188, + "step": 7894000 + }, + { + "epoch": 22.85, + "learning_rate": 3.857847235087069e-05, + "loss": 2.1709, + "step": 7894500 + }, + { + "epoch": 22.85, + "learning_rate": 3.8577748703223415e-05, + "loss": 2.1758, + "step": 7895000 + }, + { + "epoch": 22.85, + "learning_rate": 3.8577025055576144e-05, + "loss": 2.1729, + "step": 7895500 + }, + { + "epoch": 22.86, + "learning_rate": 3.8576301407928866e-05, + "loss": 2.1412, + "step": 7896000 + }, + { + "epoch": 22.86, + "learning_rate": 3.857557776028159e-05, + "loss": 2.1444, + "step": 7896500 + }, + { + "epoch": 22.86, + "learning_rate": 3.857485411263431e-05, + "loss": 2.1683, + "step": 7897000 + }, + { + "epoch": 22.86, + "learning_rate": 3.8574131912282327e-05, + "loss": 2.1503, + "step": 7897500 + }, + { + "epoch": 22.86, + "learning_rate": 3.8573408264635056e-05, + "loss": 2.1566, + "step": 7898000 + }, + { + "epoch": 22.86, + "learning_rate": 3.857268461698778e-05, + "loss": 2.2027, + "step": 7898500 + }, + { + "epoch": 22.86, + "learning_rate": 3.85719609693405e-05, + "loss": 2.1658, + "step": 7899000 + }, + { + "epoch": 22.87, + "learning_rate": 3.8571238768988516e-05, + "loss": 2.1976, + "step": 7899500 + }, + { + "epoch": 22.87, + "learning_rate": 3.857051512134124e-05, + "loss": 2.1542, + "step": 7900000 + }, + { + "epoch": 22.87, + "learning_rate": 3.856979147369396e-05, + "loss": 2.1515, + "step": 7900500 + }, + { + "epoch": 22.87, + "learning_rate": 3.856906782604668e-05, + "loss": 2.1669, + "step": 7901000 + }, + { + "epoch": 22.87, + "learning_rate": 3.8568345625694705e-05, + "loss": 2.196, + "step": 7901500 + }, + { + "epoch": 22.87, + "learning_rate": 3.856762197804743e-05, + "loss": 2.1748, + "step": 7902000 + }, + { + "epoch": 22.87, + "learning_rate": 3.856689833040015e-05, + "loss": 2.1874, + "step": 7902500 + }, + { + "epoch": 22.88, + "learning_rate": 3.856617613004817e-05, + "loss": 2.1558, + "step": 7903000 + }, + { + "epoch": 22.88, + "learning_rate": 3.8565452482400894e-05, + "loss": 2.166, + "step": 7903500 + }, + { + "epoch": 22.88, + "learning_rate": 3.8564728834753616e-05, + "loss": 2.1717, + "step": 7904000 + }, + { + "epoch": 22.88, + "learning_rate": 3.856400518710634e-05, + "loss": 2.1853, + "step": 7904500 + }, + { + "epoch": 22.88, + "learning_rate": 3.856328153945906e-05, + "loss": 2.1431, + "step": 7905000 + }, + { + "epoch": 22.88, + "learning_rate": 3.856255789181178e-05, + "loss": 2.1779, + "step": 7905500 + }, + { + "epoch": 22.88, + "learning_rate": 3.8561834244164505e-05, + "loss": 2.1609, + "step": 7906000 + }, + { + "epoch": 22.89, + "learning_rate": 3.856111059651723e-05, + "loss": 2.1711, + "step": 7906500 + }, + { + "epoch": 22.89, + "learning_rate": 3.856038839616525e-05, + "loss": 2.1644, + "step": 7907000 + }, + { + "epoch": 22.89, + "learning_rate": 3.855966474851797e-05, + "loss": 2.1475, + "step": 7907500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855894254816599e-05, + "loss": 2.188, + "step": 7908000 + }, + { + "epoch": 22.89, + "learning_rate": 3.855821890051871e-05, + "loss": 2.1722, + "step": 7908500 + }, + { + "epoch": 22.89, + "learning_rate": 3.855749525287143e-05, + "loss": 2.1819, + "step": 7909000 + }, + { + "epoch": 22.89, + "learning_rate": 3.8556771605224154e-05, + "loss": 2.1751, + "step": 7909500 + }, + { + "epoch": 22.9, + "learning_rate": 3.855604795757688e-05, + "loss": 2.1745, + "step": 7910000 + }, + { + "epoch": 22.9, + "learning_rate": 3.8555325757224905e-05, + "loss": 2.1765, + "step": 7910500 + }, + { + "epoch": 22.9, + "learning_rate": 3.855460210957763e-05, + "loss": 2.173, + "step": 7911000 + }, + { + "epoch": 22.9, + "learning_rate": 3.855387846193035e-05, + "loss": 2.1691, + "step": 7911500 + }, + { + "epoch": 22.9, + "learning_rate": 3.855315481428307e-05, + "loss": 2.1403, + "step": 7912000 + }, + { + "epoch": 22.9, + "learning_rate": 3.8552431166635794e-05, + "loss": 2.1962, + "step": 7912500 + }, + { + "epoch": 22.9, + "learning_rate": 3.8551707518988516e-05, + "loss": 2.1799, + "step": 7913000 + }, + { + "epoch": 22.91, + "learning_rate": 3.855098387134124e-05, + "loss": 2.1699, + "step": 7913500 + }, + { + "epoch": 22.91, + "learning_rate": 3.855026022369396e-05, + "loss": 2.1623, + "step": 7914000 + }, + { + "epoch": 22.91, + "learning_rate": 3.854953657604668e-05, + "loss": 2.1735, + "step": 7914500 + }, + { + "epoch": 22.91, + "learning_rate": 3.854881582299e-05, + "loss": 2.1808, + "step": 7915000 + }, + { + "epoch": 22.91, + "learning_rate": 3.854809217534272e-05, + "loss": 2.1754, + "step": 7915500 + }, + { + "epoch": 22.91, + "learning_rate": 3.854736852769544e-05, + "loss": 2.1588, + "step": 7916000 + }, + { + "epoch": 22.92, + "learning_rate": 3.854664632734346e-05, + "loss": 2.1595, + "step": 7916500 + }, + { + "epoch": 22.92, + "learning_rate": 3.854592267969618e-05, + "loss": 2.156, + "step": 7917000 + }, + { + "epoch": 22.92, + "learning_rate": 3.85451990320489e-05, + "loss": 2.1631, + "step": 7917500 + }, + { + "epoch": 22.92, + "learning_rate": 3.854447538440163e-05, + "loss": 2.1753, + "step": 7918000 + }, + { + "epoch": 22.92, + "learning_rate": 3.8543751736754354e-05, + "loss": 2.2077, + "step": 7918500 + }, + { + "epoch": 22.92, + "learning_rate": 3.8543028089107083e-05, + "loss": 2.1626, + "step": 7919000 + }, + { + "epoch": 22.92, + "learning_rate": 3.8542304441459806e-05, + "loss": 2.1937, + "step": 7919500 + }, + { + "epoch": 22.93, + "learning_rate": 3.854158224110782e-05, + "loss": 2.159, + "step": 7920000 + }, + { + "epoch": 22.93, + "learning_rate": 3.8540858593460543e-05, + "loss": 2.1952, + "step": 7920500 + }, + { + "epoch": 22.93, + "learning_rate": 3.8540134945813266e-05, + "loss": 2.1522, + "step": 7921000 + }, + { + "epoch": 22.93, + "learning_rate": 3.853941129816599e-05, + "loss": 2.1482, + "step": 7921500 + }, + { + "epoch": 22.93, + "learning_rate": 3.853868765051871e-05, + "loss": 2.2005, + "step": 7922000 + }, + { + "epoch": 22.93, + "learning_rate": 3.853796400287143e-05, + "loss": 2.1829, + "step": 7922500 + }, + { + "epoch": 22.93, + "learning_rate": 3.8537240355224155e-05, + "loss": 2.1448, + "step": 7923000 + }, + { + "epoch": 22.94, + "learning_rate": 3.8536516707576884e-05, + "loss": 2.176, + "step": 7923500 + }, + { + "epoch": 22.94, + "learning_rate": 3.8535793059929606e-05, + "loss": 2.1648, + "step": 7924000 + }, + { + "epoch": 22.94, + "learning_rate": 3.853506941228233e-05, + "loss": 2.1883, + "step": 7924500 + }, + { + "epoch": 22.94, + "learning_rate": 3.853434576463505e-05, + "loss": 2.1918, + "step": 7925000 + }, + { + "epoch": 22.94, + "learning_rate": 3.853362211698778e-05, + "loss": 2.1574, + "step": 7925500 + }, + { + "epoch": 22.94, + "learning_rate": 3.85328984693405e-05, + "loss": 2.1816, + "step": 7926000 + }, + { + "epoch": 22.94, + "learning_rate": 3.8532174821693224e-05, + "loss": 2.1578, + "step": 7926500 + }, + { + "epoch": 22.95, + "learning_rate": 3.8531451174045946e-05, + "loss": 2.1797, + "step": 7927000 + }, + { + "epoch": 22.95, + "learning_rate": 3.853072752639867e-05, + "loss": 2.1987, + "step": 7927500 + }, + { + "epoch": 22.95, + "learning_rate": 3.853000387875139e-05, + "loss": 2.1994, + "step": 7928000 + }, + { + "epoch": 22.95, + "learning_rate": 3.8529281678399406e-05, + "loss": 2.1749, + "step": 7928500 + }, + { + "epoch": 22.95, + "learning_rate": 3.852855947804743e-05, + "loss": 2.1965, + "step": 7929000 + }, + { + "epoch": 22.95, + "learning_rate": 3.852783583040015e-05, + "loss": 2.1691, + "step": 7929500 + }, + { + "epoch": 22.95, + "learning_rate": 3.852711218275287e-05, + "loss": 2.1685, + "step": 7930000 + }, + { + "epoch": 22.96, + "learning_rate": 3.8526388535105595e-05, + "loss": 2.1688, + "step": 7930500 + }, + { + "epoch": 22.96, + "learning_rate": 3.852566488745832e-05, + "loss": 2.176, + "step": 7931000 + }, + { + "epoch": 22.96, + "learning_rate": 3.852494123981104e-05, + "loss": 2.1398, + "step": 7931500 + }, + { + "epoch": 22.96, + "learning_rate": 3.852421759216376e-05, + "loss": 2.1552, + "step": 7932000 + }, + { + "epoch": 22.96, + "learning_rate": 3.8523495391811784e-05, + "loss": 2.1962, + "step": 7932500 + }, + { + "epoch": 22.96, + "learning_rate": 3.8522771744164506e-05, + "loss": 2.1589, + "step": 7933000 + }, + { + "epoch": 22.96, + "learning_rate": 3.8522048096517235e-05, + "loss": 2.128, + "step": 7933500 + }, + { + "epoch": 22.97, + "learning_rate": 3.852132444886996e-05, + "loss": 2.1852, + "step": 7934000 + }, + { + "epoch": 22.97, + "learning_rate": 3.852060224851797e-05, + "loss": 2.178, + "step": 7934500 + }, + { + "epoch": 22.97, + "learning_rate": 3.8519878600870695e-05, + "loss": 2.178, + "step": 7935000 + }, + { + "epoch": 22.97, + "learning_rate": 3.851915495322342e-05, + "loss": 2.1823, + "step": 7935500 + }, + { + "epoch": 22.97, + "learning_rate": 3.851843275287143e-05, + "loss": 2.1736, + "step": 7936000 + }, + { + "epoch": 22.97, + "learning_rate": 3.851770910522416e-05, + "loss": 2.163, + "step": 7936500 + }, + { + "epoch": 22.97, + "learning_rate": 3.8516985457576884e-05, + "loss": 2.1661, + "step": 7937000 + }, + { + "epoch": 22.98, + "learning_rate": 3.85162632572249e-05, + "loss": 2.1854, + "step": 7937500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851553960957762e-05, + "loss": 2.167, + "step": 7938000 + }, + { + "epoch": 22.98, + "learning_rate": 3.8514815961930344e-05, + "loss": 2.156, + "step": 7938500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851409231428307e-05, + "loss": 2.203, + "step": 7939000 + }, + { + "epoch": 22.98, + "learning_rate": 3.851336866663579e-05, + "loss": 2.1684, + "step": 7939500 + }, + { + "epoch": 22.98, + "learning_rate": 3.851264501898851e-05, + "loss": 2.1749, + "step": 7940000 + }, + { + "epoch": 22.98, + "learning_rate": 3.851192137134124e-05, + "loss": 2.1663, + "step": 7940500 + }, + { + "epoch": 22.99, + "learning_rate": 3.851119772369396e-05, + "loss": 2.1829, + "step": 7941000 + }, + { + "epoch": 22.99, + "learning_rate": 3.8510474076046685e-05, + "loss": 2.1789, + "step": 7941500 + }, + { + "epoch": 22.99, + "learning_rate": 3.8509750428399414e-05, + "loss": 2.1861, + "step": 7942000 + }, + { + "epoch": 22.99, + "learning_rate": 3.8509026780752136e-05, + "loss": 2.1714, + "step": 7942500 + }, + { + "epoch": 22.99, + "learning_rate": 3.850830313310486e-05, + "loss": 2.1863, + "step": 7943000 + }, + { + "epoch": 22.99, + "learning_rate": 3.850757948545758e-05, + "loss": 2.1632, + "step": 7943500 + }, + { + "epoch": 22.99, + "learning_rate": 3.85068558378103e-05, + "loss": 2.1801, + "step": 7944000 + }, + { + "epoch": 23.0, + "learning_rate": 3.8506132190163025e-05, + "loss": 2.2011, + "step": 7944500 + }, + { + "epoch": 23.0, + "learning_rate": 3.850540854251575e-05, + "loss": 2.1393, + "step": 7945000 + }, + { + "epoch": 23.0, + "learning_rate": 3.850468634216376e-05, + "loss": 2.1625, + "step": 7945500 + }, + { + "epoch": 23.0, + "eval_accuracy": 0.6629433126284148, + "eval_accuracy_mlm": 0.6268137506413546, + "eval_accuracy_nsp": 0.8565788086693891, + "eval_loss": 2.2072818279266357, + "eval_runtime": 331.6784, + "eval_samples_per_second": 1315.69, + "eval_steps_per_second": 54.821, + "step": 7945856 + }, + { + "epoch": 23.0, + "learning_rate": 3.8503962694516485e-05, + "loss": 2.1442, + "step": 7946000 + }, + { + "epoch": 23.0, + "learning_rate": 3.850324049416451e-05, + "loss": 2.1303, + "step": 7946500 + }, + { + "epoch": 23.0, + "learning_rate": 3.850251684651723e-05, + "loss": 2.1414, + "step": 7947000 + }, + { + "epoch": 23.0, + "learning_rate": 3.8501794646165245e-05, + "loss": 2.1315, + "step": 7947500 + }, + { + "epoch": 23.01, + "learning_rate": 3.8501070998517974e-05, + "loss": 2.127, + "step": 7948000 + }, + { + "epoch": 23.01, + "learning_rate": 3.8500347350870696e-05, + "loss": 2.1598, + "step": 7948500 + }, + { + "epoch": 23.01, + "learning_rate": 3.849962370322342e-05, + "loss": 2.1488, + "step": 7949000 + }, + { + "epoch": 23.01, + "learning_rate": 3.849890005557614e-05, + "loss": 2.1516, + "step": 7949500 + }, + { + "epoch": 23.01, + "learning_rate": 3.849817640792886e-05, + "loss": 2.1613, + "step": 7950000 + }, + { + "epoch": 23.01, + "learning_rate": 3.8497454207576885e-05, + "loss": 2.1363, + "step": 7950500 + }, + { + "epoch": 23.01, + "learning_rate": 3.849673055992961e-05, + "loss": 2.1572, + "step": 7951000 + }, + { + "epoch": 23.02, + "learning_rate": 3.849600691228233e-05, + "loss": 2.181, + "step": 7951500 + }, + { + "epoch": 23.02, + "learning_rate": 3.849528326463505e-05, + "loss": 2.1382, + "step": 7952000 + }, + { + "epoch": 23.02, + "learning_rate": 3.8494559616987774e-05, + "loss": 2.1549, + "step": 7952500 + }, + { + "epoch": 23.02, + "learning_rate": 3.8493835969340496e-05, + "loss": 2.1579, + "step": 7953000 + }, + { + "epoch": 23.02, + "learning_rate": 3.849311232169322e-05, + "loss": 2.1404, + "step": 7953500 + }, + { + "epoch": 23.02, + "learning_rate": 3.849238867404594e-05, + "loss": 2.1772, + "step": 7954000 + }, + { + "epoch": 23.03, + "learning_rate": 3.849166647369396e-05, + "loss": 2.1351, + "step": 7954500 + }, + { + "epoch": 23.03, + "learning_rate": 3.8490942826046685e-05, + "loss": 2.1548, + "step": 7955000 + }, + { + "epoch": 23.03, + "learning_rate": 3.849022062569471e-05, + "loss": 2.1422, + "step": 7955500 + }, + { + "epoch": 23.03, + "learning_rate": 3.848949697804743e-05, + "loss": 2.1416, + "step": 7956000 + }, + { + "epoch": 23.03, + "learning_rate": 3.848877333040015e-05, + "loss": 2.1436, + "step": 7956500 + }, + { + "epoch": 23.03, + "learning_rate": 3.8488049682752874e-05, + "loss": 2.1563, + "step": 7957000 + }, + { + "epoch": 23.03, + "learning_rate": 3.84873260351056e-05, + "loss": 2.1466, + "step": 7957500 + }, + { + "epoch": 23.04, + "learning_rate": 3.848660238745832e-05, + "loss": 2.168, + "step": 7958000 + }, + { + "epoch": 23.04, + "learning_rate": 3.848587873981104e-05, + "loss": 2.1611, + "step": 7958500 + }, + { + "epoch": 23.04, + "learning_rate": 3.848515509216376e-05, + "loss": 2.1472, + "step": 7959000 + }, + { + "epoch": 23.04, + "learning_rate": 3.8484431444516486e-05, + "loss": 2.1322, + "step": 7959500 + }, + { + "epoch": 23.04, + "learning_rate": 3.848370924416451e-05, + "loss": 2.154, + "step": 7960000 + }, + { + "epoch": 23.04, + "learning_rate": 3.848298559651723e-05, + "loss": 2.1513, + "step": 7960500 + }, + { + "epoch": 23.04, + "learning_rate": 3.8482263396165246e-05, + "loss": 2.1473, + "step": 7961000 + }, + { + "epoch": 23.05, + "learning_rate": 3.848153974851797e-05, + "loss": 2.1557, + "step": 7961500 + }, + { + "epoch": 23.05, + "learning_rate": 3.848081610087069e-05, + "loss": 2.1321, + "step": 7962000 + }, + { + "epoch": 23.05, + "learning_rate": 3.848009245322341e-05, + "loss": 2.1495, + "step": 7962500 + }, + { + "epoch": 23.05, + "learning_rate": 3.847936880557614e-05, + "loss": 2.1675, + "step": 7963000 + }, + { + "epoch": 23.05, + "learning_rate": 3.8478645157928864e-05, + "loss": 2.1607, + "step": 7963500 + }, + { + "epoch": 23.05, + "learning_rate": 3.847792151028159e-05, + "loss": 2.1607, + "step": 7964000 + }, + { + "epoch": 23.05, + "learning_rate": 3.8477197862634315e-05, + "loss": 2.1329, + "step": 7964500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847647421498704e-05, + "loss": 2.1613, + "step": 7965000 + }, + { + "epoch": 23.06, + "learning_rate": 3.847575056733976e-05, + "loss": 2.1416, + "step": 7965500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847502691969248e-05, + "loss": 2.1727, + "step": 7966000 + }, + { + "epoch": 23.06, + "learning_rate": 3.8474303272045204e-05, + "loss": 2.1365, + "step": 7966500 + }, + { + "epoch": 23.06, + "learning_rate": 3.847358107169322e-05, + "loss": 2.1673, + "step": 7967000 + }, + { + "epoch": 23.06, + "learning_rate": 3.847285742404594e-05, + "loss": 2.1507, + "step": 7967500 + }, + { + "epoch": 23.06, + "learning_rate": 3.8472135223693964e-05, + "loss": 2.1445, + "step": 7968000 + }, + { + "epoch": 23.07, + "learning_rate": 3.8471411576046686e-05, + "loss": 2.1591, + "step": 7968500 + }, + { + "epoch": 23.07, + "learning_rate": 3.847068792839941e-05, + "loss": 2.1565, + "step": 7969000 + }, + { + "epoch": 23.07, + "learning_rate": 3.846996428075213e-05, + "loss": 2.1592, + "step": 7969500 + }, + { + "epoch": 23.07, + "learning_rate": 3.8469242080400146e-05, + "loss": 2.1612, + "step": 7970000 + }, + { + "epoch": 23.07, + "learning_rate": 3.846851988004817e-05, + "loss": 2.1491, + "step": 7970500 + }, + { + "epoch": 23.07, + "learning_rate": 3.846779623240089e-05, + "loss": 2.1527, + "step": 7971000 + }, + { + "epoch": 23.07, + "learning_rate": 3.846707258475361e-05, + "loss": 2.1514, + "step": 7971500 + }, + { + "epoch": 23.08, + "learning_rate": 3.846634893710634e-05, + "loss": 2.1395, + "step": 7972000 + }, + { + "epoch": 23.08, + "learning_rate": 3.8465625289459064e-05, + "loss": 2.166, + "step": 7972500 + }, + { + "epoch": 23.08, + "learning_rate": 3.8464901641811787e-05, + "loss": 2.1407, + "step": 7973000 + }, + { + "epoch": 23.08, + "learning_rate": 3.846417799416451e-05, + "loss": 2.153, + "step": 7973500 + }, + { + "epoch": 23.08, + "learning_rate": 3.8463455793812524e-05, + "loss": 2.1546, + "step": 7974000 + }, + { + "epoch": 23.08, + "learning_rate": 3.8462732146165247e-05, + "loss": 2.1605, + "step": 7974500 + }, + { + "epoch": 23.08, + "learning_rate": 3.846200849851797e-05, + "loss": 2.1606, + "step": 7975000 + }, + { + "epoch": 23.09, + "learning_rate": 3.846128485087069e-05, + "loss": 2.1628, + "step": 7975500 + }, + { + "epoch": 23.09, + "learning_rate": 3.846056120322341e-05, + "loss": 2.1589, + "step": 7976000 + }, + { + "epoch": 23.09, + "learning_rate": 3.845983755557614e-05, + "loss": 2.1561, + "step": 7976500 + }, + { + "epoch": 23.09, + "learning_rate": 3.8459113907928864e-05, + "loss": 2.1478, + "step": 7977000 + }, + { + "epoch": 23.09, + "learning_rate": 3.845839026028159e-05, + "loss": 2.1129, + "step": 7977500 + }, + { + "epoch": 23.09, + "learning_rate": 3.8457666612634316e-05, + "loss": 2.1418, + "step": 7978000 + }, + { + "epoch": 23.09, + "learning_rate": 3.845694296498704e-05, + "loss": 2.1404, + "step": 7978500 + }, + { + "epoch": 23.1, + "learning_rate": 3.845621931733976e-05, + "loss": 2.1566, + "step": 7979000 + }, + { + "epoch": 23.1, + "learning_rate": 3.845549566969248e-05, + "loss": 2.164, + "step": 7979500 + }, + { + "epoch": 23.1, + "learning_rate": 3.8454772022045205e-05, + "loss": 2.1501, + "step": 7980000 + }, + { + "epoch": 23.1, + "learning_rate": 3.845404982169322e-05, + "loss": 2.1566, + "step": 7980500 + }, + { + "epoch": 23.1, + "learning_rate": 3.845332617404594e-05, + "loss": 2.1451, + "step": 7981000 + }, + { + "epoch": 23.1, + "learning_rate": 3.8452602526398665e-05, + "loss": 2.1506, + "step": 7981500 + }, + { + "epoch": 23.1, + "learning_rate": 3.8451878878751394e-05, + "loss": 2.1511, + "step": 7982000 + }, + { + "epoch": 23.11, + "learning_rate": 3.8451155231104116e-05, + "loss": 2.1718, + "step": 7982500 + }, + { + "epoch": 23.11, + "learning_rate": 3.845043158345684e-05, + "loss": 2.1625, + "step": 7983000 + }, + { + "epoch": 23.11, + "learning_rate": 3.844970793580956e-05, + "loss": 2.1542, + "step": 7983500 + }, + { + "epoch": 23.11, + "learning_rate": 3.8448985735457576e-05, + "loss": 2.1534, + "step": 7984000 + }, + { + "epoch": 23.11, + "learning_rate": 3.84482620878103e-05, + "loss": 2.1445, + "step": 7984500 + }, + { + "epoch": 23.11, + "learning_rate": 3.844753844016303e-05, + "loss": 2.1608, + "step": 7985000 + }, + { + "epoch": 23.11, + "learning_rate": 3.844681623981104e-05, + "loss": 2.1556, + "step": 7985500 + }, + { + "epoch": 23.12, + "learning_rate": 3.8446094039459065e-05, + "loss": 2.1433, + "step": 7986000 + }, + { + "epoch": 23.12, + "learning_rate": 3.844537039181179e-05, + "loss": 2.1676, + "step": 7986500 + }, + { + "epoch": 23.12, + "learning_rate": 3.844464674416451e-05, + "loss": 2.1478, + "step": 7987000 + }, + { + "epoch": 23.12, + "learning_rate": 3.844392309651723e-05, + "loss": 2.1672, + "step": 7987500 + }, + { + "epoch": 23.12, + "learning_rate": 3.8443199448869954e-05, + "loss": 2.1471, + "step": 7988000 + }, + { + "epoch": 23.12, + "learning_rate": 3.8442475801222676e-05, + "loss": 2.1514, + "step": 7988500 + }, + { + "epoch": 23.12, + "learning_rate": 3.84417521535754e-05, + "loss": 2.1411, + "step": 7989000 + }, + { + "epoch": 23.13, + "learning_rate": 3.844102995322342e-05, + "loss": 2.1434, + "step": 7989500 + }, + { + "epoch": 23.13, + "learning_rate": 3.844030630557614e-05, + "loss": 2.1495, + "step": 7990000 + }, + { + "epoch": 23.13, + "learning_rate": 3.8439582657928865e-05, + "loss": 2.1476, + "step": 7990500 + }, + { + "epoch": 23.13, + "learning_rate": 3.843885901028159e-05, + "loss": 2.1709, + "step": 7991000 + }, + { + "epoch": 23.13, + "learning_rate": 3.84381368099296e-05, + "loss": 2.1532, + "step": 7991500 + }, + { + "epoch": 23.13, + "learning_rate": 3.8437413162282325e-05, + "loss": 2.1552, + "step": 7992000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843668951463505e-05, + "loss": 2.1778, + "step": 7992500 + }, + { + "epoch": 23.14, + "learning_rate": 3.8435965866987777e-05, + "loss": 2.1377, + "step": 7993000 + }, + { + "epoch": 23.14, + "learning_rate": 3.84352422193405e-05, + "loss": 2.1502, + "step": 7993500 + }, + { + "epoch": 23.14, + "learning_rate": 3.843451857169322e-05, + "loss": 2.1729, + "step": 7994000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843379637134124e-05, + "loss": 2.1515, + "step": 7994500 + }, + { + "epoch": 23.14, + "learning_rate": 3.843307417098926e-05, + "loss": 2.1511, + "step": 7995000 + }, + { + "epoch": 23.14, + "learning_rate": 3.843235052334198e-05, + "loss": 2.1653, + "step": 7995500 + }, + { + "epoch": 23.15, + "learning_rate": 3.84316268756947e-05, + "loss": 2.1508, + "step": 7996000 + }, + { + "epoch": 23.15, + "learning_rate": 3.8430903228047426e-05, + "loss": 2.1203, + "step": 7996500 + }, + { + "epoch": 23.15, + "learning_rate": 3.843017958040015e-05, + "loss": 2.155, + "step": 7997000 + }, + { + "epoch": 23.15, + "learning_rate": 3.842945593275287e-05, + "loss": 2.1523, + "step": 7997500 + }, + { + "epoch": 23.15, + "learning_rate": 3.842873228510559e-05, + "loss": 2.1477, + "step": 7998000 + }, + { + "epoch": 23.15, + "learning_rate": 3.842800863745832e-05, + "loss": 2.1658, + "step": 7998500 + }, + { + "epoch": 23.15, + "learning_rate": 3.8427284989811044e-05, + "loss": 2.1433, + "step": 7999000 + }, + { + "epoch": 23.16, + "learning_rate": 3.8426561342163766e-05, + "loss": 2.1297, + "step": 7999500 + }, + { + "epoch": 23.16, + "learning_rate": 3.8425837694516495e-05, + "loss": 2.1559, + "step": 8000000 + }, + { + "epoch": 23.16, + "learning_rate": 3.842511404686922e-05, + "loss": 2.1479, + "step": 8000500 + }, + { + "epoch": 23.16, + "learning_rate": 3.842439039922194e-05, + "loss": 2.1705, + "step": 8001000 + }, + { + "epoch": 23.16, + "learning_rate": 3.842366675157466e-05, + "loss": 2.163, + "step": 8001500 + }, + { + "epoch": 23.16, + "learning_rate": 3.8422943103927384e-05, + "loss": 2.1501, + "step": 8002000 + }, + { + "epoch": 23.16, + "learning_rate": 3.8422219456280106e-05, + "loss": 2.1433, + "step": 8002500 + }, + { + "epoch": 23.17, + "learning_rate": 3.842149580863283e-05, + "loss": 2.1643, + "step": 8003000 + }, + { + "epoch": 23.17, + "learning_rate": 3.842077216098555e-05, + "loss": 2.1548, + "step": 8003500 + }, + { + "epoch": 23.17, + "learning_rate": 3.842004851333827e-05, + "loss": 2.1574, + "step": 8004000 + }, + { + "epoch": 23.17, + "learning_rate": 3.8419324865690995e-05, + "loss": 2.1732, + "step": 8004500 + }, + { + "epoch": 23.17, + "learning_rate": 3.841860121804372e-05, + "loss": 2.159, + "step": 8005000 + }, + { + "epoch": 23.17, + "learning_rate": 3.841787901769174e-05, + "loss": 2.1464, + "step": 8005500 + }, + { + "epoch": 23.17, + "learning_rate": 3.841715537004446e-05, + "loss": 2.1482, + "step": 8006000 + }, + { + "epoch": 23.18, + "learning_rate": 3.8416431722397184e-05, + "loss": 2.1453, + "step": 8006500 + }, + { + "epoch": 23.18, + "learning_rate": 3.8415708074749906e-05, + "loss": 2.1489, + "step": 8007000 + }, + { + "epoch": 23.18, + "learning_rate": 3.8414984427102635e-05, + "loss": 2.1436, + "step": 8007500 + }, + { + "epoch": 23.18, + "learning_rate": 3.841426222675065e-05, + "loss": 2.1502, + "step": 8008000 + }, + { + "epoch": 23.18, + "learning_rate": 3.841353857910337e-05, + "loss": 2.1435, + "step": 8008500 + }, + { + "epoch": 23.18, + "learning_rate": 3.8412814931456095e-05, + "loss": 2.1633, + "step": 8009000 + }, + { + "epoch": 23.18, + "learning_rate": 3.8412091283808824e-05, + "loss": 2.1467, + "step": 8009500 + }, + { + "epoch": 23.19, + "learning_rate": 3.8411367636161546e-05, + "loss": 2.1665, + "step": 8010000 + }, + { + "epoch": 23.19, + "learning_rate": 3.841064543580956e-05, + "loss": 2.1544, + "step": 8010500 + }, + { + "epoch": 23.19, + "learning_rate": 3.840992323545758e-05, + "loss": 2.1397, + "step": 8011000 + }, + { + "epoch": 23.19, + "learning_rate": 3.84091995878103e-05, + "loss": 2.1601, + "step": 8011500 + }, + { + "epoch": 23.19, + "learning_rate": 3.840847594016302e-05, + "loss": 2.1606, + "step": 8012000 + }, + { + "epoch": 23.19, + "learning_rate": 3.8407752292515744e-05, + "loss": 2.188, + "step": 8012500 + }, + { + "epoch": 23.19, + "learning_rate": 3.840702864486847e-05, + "loss": 2.1765, + "step": 8013000 + }, + { + "epoch": 23.2, + "learning_rate": 3.8406304997221195e-05, + "loss": 2.1394, + "step": 8013500 + }, + { + "epoch": 23.2, + "learning_rate": 3.8405584244164504e-05, + "loss": 2.1532, + "step": 8014000 + }, + { + "epoch": 23.2, + "learning_rate": 3.8404860596517227e-05, + "loss": 2.1509, + "step": 8014500 + }, + { + "epoch": 23.2, + "learning_rate": 3.840413694886995e-05, + "loss": 2.1659, + "step": 8015000 + }, + { + "epoch": 23.2, + "learning_rate": 3.840341330122268e-05, + "loss": 2.1431, + "step": 8015500 + }, + { + "epoch": 23.2, + "learning_rate": 3.84026896535754e-05, + "loss": 2.1528, + "step": 8016000 + }, + { + "epoch": 23.2, + "learning_rate": 3.840196600592812e-05, + "loss": 2.1248, + "step": 8016500 + }, + { + "epoch": 23.21, + "learning_rate": 3.8401242358280845e-05, + "loss": 2.1558, + "step": 8017000 + }, + { + "epoch": 23.21, + "learning_rate": 3.8400518710633574e-05, + "loss": 2.1714, + "step": 8017500 + }, + { + "epoch": 23.21, + "learning_rate": 3.839979651028159e-05, + "loss": 2.1743, + "step": 8018000 + }, + { + "epoch": 23.21, + "learning_rate": 3.83990757572249e-05, + "loss": 2.1425, + "step": 8018500 + }, + { + "epoch": 23.21, + "learning_rate": 3.839835210957762e-05, + "loss": 2.1629, + "step": 8019000 + }, + { + "epoch": 23.21, + "learning_rate": 3.839762846193035e-05, + "loss": 2.1936, + "step": 8019500 + }, + { + "epoch": 23.21, + "learning_rate": 3.8396906261578365e-05, + "loss": 2.1583, + "step": 8020000 + }, + { + "epoch": 23.22, + "learning_rate": 3.839618261393109e-05, + "loss": 2.1329, + "step": 8020500 + }, + { + "epoch": 23.22, + "learning_rate": 3.839545896628381e-05, + "loss": 2.1771, + "step": 8021000 + }, + { + "epoch": 23.22, + "learning_rate": 3.839473531863653e-05, + "loss": 2.133, + "step": 8021500 + }, + { + "epoch": 23.22, + "learning_rate": 3.8394011670989254e-05, + "loss": 2.1578, + "step": 8022000 + }, + { + "epoch": 23.22, + "learning_rate": 3.8393288023341976e-05, + "loss": 2.177, + "step": 8022500 + }, + { + "epoch": 23.22, + "learning_rate": 3.8392564375694705e-05, + "loss": 2.1474, + "step": 8023000 + }, + { + "epoch": 23.22, + "learning_rate": 3.839184072804743e-05, + "loss": 2.1756, + "step": 8023500 + }, + { + "epoch": 23.23, + "learning_rate": 3.839111708040015e-05, + "loss": 2.1576, + "step": 8024000 + }, + { + "epoch": 23.23, + "learning_rate": 3.839039488004817e-05, + "loss": 2.1588, + "step": 8024500 + }, + { + "epoch": 23.23, + "learning_rate": 3.8389671232400894e-05, + "loss": 2.1762, + "step": 8025000 + }, + { + "epoch": 23.23, + "learning_rate": 3.8388947584753616e-05, + "loss": 2.1525, + "step": 8025500 + }, + { + "epoch": 23.23, + "learning_rate": 3.838822538440163e-05, + "loss": 2.1526, + "step": 8026000 + }, + { + "epoch": 23.23, + "learning_rate": 3.838750318404965e-05, + "loss": 2.1667, + "step": 8026500 + }, + { + "epoch": 23.23, + "learning_rate": 3.8386779536402376e-05, + "loss": 2.1483, + "step": 8027000 + }, + { + "epoch": 23.24, + "learning_rate": 3.83860558887551e-05, + "loss": 2.1566, + "step": 8027500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838533224110782e-05, + "loss": 2.1473, + "step": 8028000 + }, + { + "epoch": 23.24, + "learning_rate": 3.838460859346054e-05, + "loss": 2.1534, + "step": 8028500 + }, + { + "epoch": 23.24, + "learning_rate": 3.8383884945813265e-05, + "loss": 2.1436, + "step": 8029000 + }, + { + "epoch": 23.24, + "learning_rate": 3.838316129816599e-05, + "loss": 2.1343, + "step": 8029500 + }, + { + "epoch": 23.24, + "learning_rate": 3.838243765051871e-05, + "loss": 2.1352, + "step": 8030000 + }, + { + "epoch": 23.25, + "learning_rate": 3.838171400287144e-05, + "loss": 2.1471, + "step": 8030500 + }, + { + "epoch": 23.25, + "learning_rate": 3.838099035522416e-05, + "loss": 2.1695, + "step": 8031000 + }, + { + "epoch": 23.25, + "learning_rate": 3.838026670757688e-05, + "loss": 2.1491, + "step": 8031500 + }, + { + "epoch": 23.25, + "learning_rate": 3.8379543059929605e-05, + "loss": 2.1922, + "step": 8032000 + }, + { + "epoch": 23.25, + "learning_rate": 3.837881941228233e-05, + "loss": 2.1257, + "step": 8032500 + }, + { + "epoch": 23.25, + "learning_rate": 3.837809576463505e-05, + "loss": 2.1526, + "step": 8033000 + }, + { + "epoch": 23.25, + "learning_rate": 3.837737211698777e-05, + "loss": 2.1614, + "step": 8033500 + }, + { + "epoch": 23.26, + "learning_rate": 3.83766484693405e-05, + "loss": 2.1471, + "step": 8034000 + }, + { + "epoch": 23.26, + "learning_rate": 3.8375924821693223e-05, + "loss": 2.1583, + "step": 8034500 + }, + { + "epoch": 23.26, + "learning_rate": 3.8375201174045946e-05, + "loss": 2.1541, + "step": 8035000 + }, + { + "epoch": 23.26, + "learning_rate": 3.837447752639867e-05, + "loss": 2.1617, + "step": 8035500 + }, + { + "epoch": 23.26, + "learning_rate": 3.837375387875139e-05, + "loss": 2.1525, + "step": 8036000 + }, + { + "epoch": 23.26, + "learning_rate": 3.837303023110411e-05, + "loss": 2.1786, + "step": 8036500 + }, + { + "epoch": 23.26, + "learning_rate": 3.8372306583456835e-05, + "loss": 2.1409, + "step": 8037000 + }, + { + "epoch": 23.27, + "learning_rate": 3.837158438310486e-05, + "loss": 2.1678, + "step": 8037500 + }, + { + "epoch": 23.27, + "learning_rate": 3.837086073545758e-05, + "loss": 2.175, + "step": 8038000 + }, + { + "epoch": 23.27, + "learning_rate": 3.83701370878103e-05, + "loss": 2.1549, + "step": 8038500 + }, + { + "epoch": 23.27, + "learning_rate": 3.8369414887458324e-05, + "loss": 2.1561, + "step": 8039000 + }, + { + "epoch": 23.27, + "learning_rate": 3.8368691239811046e-05, + "loss": 2.1541, + "step": 8039500 + }, + { + "epoch": 23.27, + "learning_rate": 3.836796759216377e-05, + "loss": 2.1452, + "step": 8040000 + }, + { + "epoch": 23.27, + "learning_rate": 3.836724394451649e-05, + "loss": 2.1547, + "step": 8040500 + }, + { + "epoch": 23.28, + "learning_rate": 3.836652029686921e-05, + "loss": 2.159, + "step": 8041000 + }, + { + "epoch": 23.28, + "learning_rate": 3.8365796649221935e-05, + "loss": 2.162, + "step": 8041500 + }, + { + "epoch": 23.28, + "learning_rate": 3.836507300157466e-05, + "loss": 2.1683, + "step": 8042000 + }, + { + "epoch": 23.28, + "learning_rate": 3.836434935392738e-05, + "loss": 2.1655, + "step": 8042500 + }, + { + "epoch": 23.28, + "learning_rate": 3.83636257062801e-05, + "loss": 2.1738, + "step": 8043000 + }, + { + "epoch": 23.28, + "learning_rate": 3.8362903505928124e-05, + "loss": 2.1574, + "step": 8043500 + }, + { + "epoch": 23.28, + "learning_rate": 3.836218130557614e-05, + "loss": 2.1842, + "step": 8044000 + }, + { + "epoch": 23.29, + "learning_rate": 3.836145765792886e-05, + "loss": 2.156, + "step": 8044500 + }, + { + "epoch": 23.29, + "learning_rate": 3.8360734010281584e-05, + "loss": 2.1711, + "step": 8045000 + }, + { + "epoch": 23.29, + "learning_rate": 3.836001036263431e-05, + "loss": 2.1611, + "step": 8045500 + }, + { + "epoch": 23.29, + "learning_rate": 3.8359286714987035e-05, + "loss": 2.1576, + "step": 8046000 + }, + { + "epoch": 23.29, + "learning_rate": 3.835856306733976e-05, + "loss": 2.1464, + "step": 8046500 + }, + { + "epoch": 23.29, + "learning_rate": 3.835783941969248e-05, + "loss": 2.1397, + "step": 8047000 + }, + { + "epoch": 23.29, + "learning_rate": 3.83571172193405e-05, + "loss": 2.1666, + "step": 8047500 + }, + { + "epoch": 23.3, + "learning_rate": 3.8356393571693224e-05, + "loss": 2.1435, + "step": 8048000 + }, + { + "epoch": 23.3, + "learning_rate": 3.8355669924045946e-05, + "loss": 2.1609, + "step": 8048500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835494627639867e-05, + "loss": 2.1652, + "step": 8049000 + }, + { + "epoch": 23.3, + "learning_rate": 3.835422262875139e-05, + "loss": 2.1409, + "step": 8049500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835349898110411e-05, + "loss": 2.1627, + "step": 8050000 + }, + { + "epoch": 23.3, + "learning_rate": 3.8352775333456835e-05, + "loss": 2.1641, + "step": 8050500 + }, + { + "epoch": 23.3, + "learning_rate": 3.835205168580956e-05, + "loss": 2.1255, + "step": 8051000 + }, + { + "epoch": 23.31, + "learning_rate": 3.835132948545758e-05, + "loss": 2.1319, + "step": 8051500 + }, + { + "epoch": 23.31, + "learning_rate": 3.83506058378103e-05, + "loss": 2.1567, + "step": 8052000 + }, + { + "epoch": 23.31, + "learning_rate": 3.834988219016303e-05, + "loss": 2.1554, + "step": 8052500 + }, + { + "epoch": 23.31, + "learning_rate": 3.8349158542515753e-05, + "loss": 2.16, + "step": 8053000 + }, + { + "epoch": 23.31, + "learning_rate": 3.834843634216377e-05, + "loss": 2.1413, + "step": 8053500 + }, + { + "epoch": 23.31, + "learning_rate": 3.834771269451649e-05, + "loss": 2.1649, + "step": 8054000 + }, + { + "epoch": 23.31, + "learning_rate": 3.8346989046869213e-05, + "loss": 2.1811, + "step": 8054500 + }, + { + "epoch": 23.32, + "learning_rate": 3.8346265399221936e-05, + "loss": 2.1672, + "step": 8055000 + }, + { + "epoch": 23.32, + "learning_rate": 3.834554175157466e-05, + "loss": 2.1631, + "step": 8055500 + }, + { + "epoch": 23.32, + "learning_rate": 3.834481810392738e-05, + "loss": 2.152, + "step": 8056000 + }, + { + "epoch": 23.32, + "learning_rate": 3.83440959035754e-05, + "loss": 2.1567, + "step": 8056500 + }, + { + "epoch": 23.32, + "learning_rate": 3.8343372255928125e-05, + "loss": 2.1679, + "step": 8057000 + }, + { + "epoch": 23.32, + "learning_rate": 3.834264860828085e-05, + "loss": 2.193, + "step": 8057500 + }, + { + "epoch": 23.32, + "learning_rate": 3.834192496063357e-05, + "loss": 2.1605, + "step": 8058000 + }, + { + "epoch": 23.33, + "learning_rate": 3.834120131298629e-05, + "loss": 2.1514, + "step": 8058500 + }, + { + "epoch": 23.33, + "learning_rate": 3.83404805599296e-05, + "loss": 2.1341, + "step": 8059000 + }, + { + "epoch": 23.33, + "learning_rate": 3.833975691228233e-05, + "loss": 2.1524, + "step": 8059500 + }, + { + "epoch": 23.33, + "learning_rate": 3.833903326463505e-05, + "loss": 2.1455, + "step": 8060000 + }, + { + "epoch": 23.33, + "learning_rate": 3.833830961698778e-05, + "loss": 2.1356, + "step": 8060500 + }, + { + "epoch": 23.33, + "learning_rate": 3.83375859693405e-05, + "loss": 2.142, + "step": 8061000 + }, + { + "epoch": 23.33, + "learning_rate": 3.8336862321693225e-05, + "loss": 2.1608, + "step": 8061500 + }, + { + "epoch": 23.34, + "learning_rate": 3.833613867404595e-05, + "loss": 2.1584, + "step": 8062000 + }, + { + "epoch": 23.34, + "learning_rate": 3.833541502639867e-05, + "loss": 2.1618, + "step": 8062500 + }, + { + "epoch": 23.34, + "learning_rate": 3.833469137875139e-05, + "loss": 2.152, + "step": 8063000 + }, + { + "epoch": 23.34, + "learning_rate": 3.8333967731104114e-05, + "loss": 2.1822, + "step": 8063500 + }, + { + "epoch": 23.34, + "learning_rate": 3.833324553075213e-05, + "loss": 2.1364, + "step": 8064000 + }, + { + "epoch": 23.34, + "learning_rate": 3.833252188310485e-05, + "loss": 2.1742, + "step": 8064500 + }, + { + "epoch": 23.34, + "learning_rate": 3.833179823545758e-05, + "loss": 2.1271, + "step": 8065000 + }, + { + "epoch": 23.35, + "learning_rate": 3.8331076035105596e-05, + "loss": 2.1706, + "step": 8065500 + }, + { + "epoch": 23.35, + "learning_rate": 3.833035238745832e-05, + "loss": 2.1534, + "step": 8066000 + }, + { + "epoch": 23.35, + "learning_rate": 3.832962873981104e-05, + "loss": 2.188, + "step": 8066500 + }, + { + "epoch": 23.35, + "learning_rate": 3.832890509216376e-05, + "loss": 2.143, + "step": 8067000 + }, + { + "epoch": 23.35, + "learning_rate": 3.832818144451649e-05, + "loss": 2.1687, + "step": 8067500 + }, + { + "epoch": 23.35, + "learning_rate": 3.8327457796869214e-05, + "loss": 2.1594, + "step": 8068000 + }, + { + "epoch": 23.36, + "learning_rate": 3.8326734149221936e-05, + "loss": 2.1665, + "step": 8068500 + }, + { + "epoch": 23.36, + "learning_rate": 3.832601050157466e-05, + "loss": 2.1698, + "step": 8069000 + }, + { + "epoch": 23.36, + "learning_rate": 3.832528685392738e-05, + "loss": 2.146, + "step": 8069500 + }, + { + "epoch": 23.36, + "learning_rate": 3.83245646535754e-05, + "loss": 2.1528, + "step": 8070000 + }, + { + "epoch": 23.36, + "learning_rate": 3.8323841005928125e-05, + "loss": 2.1544, + "step": 8070500 + }, + { + "epoch": 23.36, + "learning_rate": 3.832311735828085e-05, + "loss": 2.1589, + "step": 8071000 + }, + { + "epoch": 23.36, + "learning_rate": 3.832239515792886e-05, + "loss": 2.1878, + "step": 8071500 + }, + { + "epoch": 23.37, + "learning_rate": 3.8321671510281586e-05, + "loss": 2.1357, + "step": 8072000 + }, + { + "epoch": 23.37, + "learning_rate": 3.832094786263431e-05, + "loss": 2.1545, + "step": 8072500 + }, + { + "epoch": 23.37, + "learning_rate": 3.832022421498703e-05, + "loss": 2.156, + "step": 8073000 + }, + { + "epoch": 23.37, + "learning_rate": 3.831950056733975e-05, + "loss": 2.1636, + "step": 8073500 + }, + { + "epoch": 23.37, + "learning_rate": 3.831877691969248e-05, + "loss": 2.1689, + "step": 8074000 + }, + { + "epoch": 23.37, + "learning_rate": 3.831805616663579e-05, + "loss": 2.1667, + "step": 8074500 + }, + { + "epoch": 23.37, + "learning_rate": 3.831733251898851e-05, + "loss": 2.1807, + "step": 8075000 + }, + { + "epoch": 23.38, + "learning_rate": 3.831660887134124e-05, + "loss": 2.1491, + "step": 8075500 + }, + { + "epoch": 23.38, + "learning_rate": 3.8315885223693964e-05, + "loss": 2.1855, + "step": 8076000 + }, + { + "epoch": 23.38, + "learning_rate": 3.8315161576046686e-05, + "loss": 2.1419, + "step": 8076500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831443792839941e-05, + "loss": 2.1618, + "step": 8077000 + }, + { + "epoch": 23.38, + "learning_rate": 3.831371428075213e-05, + "loss": 2.1637, + "step": 8077500 + }, + { + "epoch": 23.38, + "learning_rate": 3.831299063310486e-05, + "loss": 2.1749, + "step": 8078000 + }, + { + "epoch": 23.38, + "learning_rate": 3.831226698545758e-05, + "loss": 2.1335, + "step": 8078500 + }, + { + "epoch": 23.39, + "learning_rate": 3.8311543337810304e-05, + "loss": 2.1451, + "step": 8079000 + }, + { + "epoch": 23.39, + "learning_rate": 3.8310819690163026e-05, + "loss": 2.1549, + "step": 8079500 + }, + { + "epoch": 23.39, + "learning_rate": 3.831009604251575e-05, + "loss": 2.131, + "step": 8080000 + }, + { + "epoch": 23.39, + "learning_rate": 3.830937239486847e-05, + "loss": 2.1704, + "step": 8080500 + }, + { + "epoch": 23.39, + "learning_rate": 3.830864874722119e-05, + "loss": 2.1779, + "step": 8081000 + }, + { + "epoch": 23.39, + "learning_rate": 3.8307925099573915e-05, + "loss": 2.1493, + "step": 8081500 + }, + { + "epoch": 23.39, + "learning_rate": 3.830720145192664e-05, + "loss": 2.1648, + "step": 8082000 + }, + { + "epoch": 23.4, + "learning_rate": 3.8306477804279366e-05, + "loss": 2.1504, + "step": 8082500 + }, + { + "epoch": 23.4, + "learning_rate": 3.830575415663209e-05, + "loss": 2.1686, + "step": 8083000 + }, + { + "epoch": 23.4, + "learning_rate": 3.830503050898481e-05, + "loss": 2.1385, + "step": 8083500 + }, + { + "epoch": 23.4, + "learning_rate": 3.830430686133753e-05, + "loss": 2.1396, + "step": 8084000 + }, + { + "epoch": 23.4, + "learning_rate": 3.830358610828085e-05, + "loss": 2.1813, + "step": 8084500 + }, + { + "epoch": 23.4, + "learning_rate": 3.830286246063357e-05, + "loss": 2.1622, + "step": 8085000 + }, + { + "epoch": 23.4, + "learning_rate": 3.830213881298629e-05, + "loss": 2.1518, + "step": 8085500 + }, + { + "epoch": 23.41, + "learning_rate": 3.8301415165339015e-05, + "loss": 2.164, + "step": 8086000 + }, + { + "epoch": 23.41, + "learning_rate": 3.830069151769174e-05, + "loss": 2.1583, + "step": 8086500 + }, + { + "epoch": 23.41, + "learning_rate": 3.829996787004446e-05, + "loss": 2.1553, + "step": 8087000 + }, + { + "epoch": 23.41, + "learning_rate": 3.829924422239718e-05, + "loss": 2.1529, + "step": 8087500 + }, + { + "epoch": 23.41, + "learning_rate": 3.829852057474991e-05, + "loss": 2.1819, + "step": 8088000 + }, + { + "epoch": 23.41, + "learning_rate": 3.829779692710263e-05, + "loss": 2.1666, + "step": 8088500 + }, + { + "epoch": 23.41, + "learning_rate": 3.8297073279455355e-05, + "loss": 2.1452, + "step": 8089000 + }, + { + "epoch": 23.42, + "learning_rate": 3.8296349631808084e-05, + "loss": 2.1529, + "step": 8089500 + }, + { + "epoch": 23.42, + "learning_rate": 3.829562598416081e-05, + "loss": 2.178, + "step": 8090000 + }, + { + "epoch": 23.42, + "learning_rate": 3.829490233651353e-05, + "loss": 2.1613, + "step": 8090500 + }, + { + "epoch": 23.42, + "learning_rate": 3.8294180136161544e-05, + "loss": 2.1622, + "step": 8091000 + }, + { + "epoch": 23.42, + "learning_rate": 3.829345648851427e-05, + "loss": 2.1375, + "step": 8091500 + }, + { + "epoch": 23.42, + "learning_rate": 3.829273284086699e-05, + "loss": 2.1878, + "step": 8092000 + }, + { + "epoch": 23.42, + "learning_rate": 3.829201064051501e-05, + "loss": 2.1726, + "step": 8092500 + }, + { + "epoch": 23.43, + "learning_rate": 3.8291286992867733e-05, + "loss": 2.1887, + "step": 8093000 + }, + { + "epoch": 23.43, + "learning_rate": 3.829056479251575e-05, + "loss": 2.1584, + "step": 8093500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828984114486847e-05, + "loss": 2.1612, + "step": 8094000 + }, + { + "epoch": 23.43, + "learning_rate": 3.8289117497221193e-05, + "loss": 2.156, + "step": 8094500 + }, + { + "epoch": 23.43, + "learning_rate": 3.8288393849573916e-05, + "loss": 2.1661, + "step": 8095000 + }, + { + "epoch": 23.43, + "learning_rate": 3.828767020192664e-05, + "loss": 2.1471, + "step": 8095500 + }, + { + "epoch": 23.43, + "learning_rate": 3.828694655427936e-05, + "loss": 2.1523, + "step": 8096000 + }, + { + "epoch": 23.44, + "learning_rate": 3.828622290663208e-05, + "loss": 2.1495, + "step": 8096500 + }, + { + "epoch": 23.44, + "learning_rate": 3.8285500706280105e-05, + "loss": 2.1502, + "step": 8097000 + }, + { + "epoch": 23.44, + "learning_rate": 3.8284777058632834e-05, + "loss": 2.1731, + "step": 8097500 + }, + { + "epoch": 23.44, + "learning_rate": 3.8284053410985556e-05, + "loss": 2.1594, + "step": 8098000 + }, + { + "epoch": 23.44, + "learning_rate": 3.828332976333828e-05, + "loss": 2.1675, + "step": 8098500 + }, + { + "epoch": 23.44, + "learning_rate": 3.8282606115691e-05, + "loss": 2.1627, + "step": 8099000 + }, + { + "epoch": 23.44, + "learning_rate": 3.828188246804372e-05, + "loss": 2.1564, + "step": 8099500 + }, + { + "epoch": 23.45, + "learning_rate": 3.8281158820396445e-05, + "loss": 2.1466, + "step": 8100000 + }, + { + "epoch": 23.45, + "learning_rate": 3.828043806733976e-05, + "loss": 2.1667, + "step": 8100500 + }, + { + "epoch": 23.45, + "learning_rate": 3.827971441969248e-05, + "loss": 2.1631, + "step": 8101000 + }, + { + "epoch": 23.45, + "learning_rate": 3.8278990772045205e-05, + "loss": 2.1652, + "step": 8101500 + }, + { + "epoch": 23.45, + "learning_rate": 3.827826712439793e-05, + "loss": 2.1661, + "step": 8102000 + }, + { + "epoch": 23.45, + "learning_rate": 3.827754347675065e-05, + "loss": 2.1744, + "step": 8102500 + }, + { + "epoch": 23.45, + "learning_rate": 3.827681982910337e-05, + "loss": 2.1489, + "step": 8103000 + }, + { + "epoch": 23.46, + "learning_rate": 3.8276096181456094e-05, + "loss": 2.1685, + "step": 8103500 + }, + { + "epoch": 23.46, + "learning_rate": 3.8275372533808816e-05, + "loss": 2.166, + "step": 8104000 + }, + { + "epoch": 23.46, + "learning_rate": 3.827464888616154e-05, + "loss": 2.1722, + "step": 8104500 + }, + { + "epoch": 23.46, + "learning_rate": 3.827392523851427e-05, + "loss": 2.1875, + "step": 8105000 + }, + { + "epoch": 23.46, + "learning_rate": 3.827320303816228e-05, + "loss": 2.1437, + "step": 8105500 + }, + { + "epoch": 23.46, + "learning_rate": 3.827247939051501e-05, + "loss": 2.1674, + "step": 8106000 + }, + { + "epoch": 23.46, + "learning_rate": 3.8271755742867734e-05, + "loss": 2.1638, + "step": 8106500 + }, + { + "epoch": 23.47, + "learning_rate": 3.8271032095220456e-05, + "loss": 2.1623, + "step": 8107000 + }, + { + "epoch": 23.47, + "learning_rate": 3.827030844757318e-05, + "loss": 2.1803, + "step": 8107500 + }, + { + "epoch": 23.47, + "learning_rate": 3.82695847999259e-05, + "loss": 2.1605, + "step": 8108000 + }, + { + "epoch": 23.47, + "learning_rate": 3.8268862599573917e-05, + "loss": 2.1829, + "step": 8108500 + }, + { + "epoch": 23.47, + "learning_rate": 3.826813895192664e-05, + "loss": 2.1731, + "step": 8109000 + }, + { + "epoch": 23.47, + "learning_rate": 3.826741530427936e-05, + "loss": 2.1725, + "step": 8109500 + }, + { + "epoch": 23.48, + "learning_rate": 3.826669165663208e-05, + "loss": 2.1808, + "step": 8110000 + }, + { + "epoch": 23.48, + "learning_rate": 3.826596800898481e-05, + "loss": 2.1528, + "step": 8110500 + }, + { + "epoch": 23.48, + "learning_rate": 3.8265244361337534e-05, + "loss": 2.1356, + "step": 8111000 + }, + { + "epoch": 23.48, + "learning_rate": 3.826452071369026e-05, + "loss": 2.1422, + "step": 8111500 + }, + { + "epoch": 23.48, + "learning_rate": 3.8263797066042986e-05, + "loss": 2.1799, + "step": 8112000 + }, + { + "epoch": 23.48, + "learning_rate": 3.8263074865691e-05, + "loss": 2.1856, + "step": 8112500 + }, + { + "epoch": 23.48, + "learning_rate": 3.8262351218043723e-05, + "loss": 2.1643, + "step": 8113000 + }, + { + "epoch": 23.49, + "learning_rate": 3.8261627570396446e-05, + "loss": 2.1486, + "step": 8113500 + }, + { + "epoch": 23.49, + "learning_rate": 3.826090392274917e-05, + "loss": 2.1454, + "step": 8114000 + }, + { + "epoch": 23.49, + "learning_rate": 3.826018027510189e-05, + "loss": 2.1712, + "step": 8114500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825945662745461e-05, + "loss": 2.1691, + "step": 8115000 + }, + { + "epoch": 23.49, + "learning_rate": 3.8258732979807335e-05, + "loss": 2.1571, + "step": 8115500 + }, + { + "epoch": 23.49, + "learning_rate": 3.825801077945536e-05, + "loss": 2.1648, + "step": 8116000 + }, + { + "epoch": 23.49, + "learning_rate": 3.825728713180808e-05, + "loss": 2.1826, + "step": 8116500 + }, + { + "epoch": 23.5, + "learning_rate": 3.82565634841608e-05, + "loss": 2.1806, + "step": 8117000 + }, + { + "epoch": 23.5, + "learning_rate": 3.8255839836513524e-05, + "loss": 2.1382, + "step": 8117500 + }, + { + "epoch": 23.5, + "learning_rate": 3.8255116188866246e-05, + "loss": 2.1702, + "step": 8118000 + }, + { + "epoch": 23.5, + "learning_rate": 3.825439254121897e-05, + "loss": 2.1434, + "step": 8118500 + }, + { + "epoch": 23.5, + "learning_rate": 3.825367034086699e-05, + "loss": 2.1758, + "step": 8119000 + }, + { + "epoch": 23.5, + "learning_rate": 3.825294669321971e-05, + "loss": 2.1716, + "step": 8119500 + }, + { + "epoch": 23.5, + "learning_rate": 3.825222304557244e-05, + "loss": 2.1926, + "step": 8120000 + }, + { + "epoch": 23.51, + "learning_rate": 3.8251499397925164e-05, + "loss": 2.1539, + "step": 8120500 + }, + { + "epoch": 23.51, + "learning_rate": 3.8250775750277886e-05, + "loss": 2.1306, + "step": 8121000 + }, + { + "epoch": 23.51, + "learning_rate": 3.825005210263061e-05, + "loss": 2.1608, + "step": 8121500 + }, + { + "epoch": 23.51, + "learning_rate": 3.8249329902278624e-05, + "loss": 2.1581, + "step": 8122000 + }, + { + "epoch": 23.51, + "learning_rate": 3.824860770192664e-05, + "loss": 2.1733, + "step": 8122500 + }, + { + "epoch": 23.51, + "learning_rate": 3.824788405427936e-05, + "loss": 2.1464, + "step": 8123000 + }, + { + "epoch": 23.51, + "learning_rate": 3.824716040663209e-05, + "loss": 2.169, + "step": 8123500 + }, + { + "epoch": 23.52, + "learning_rate": 3.824643675898481e-05, + "loss": 2.1347, + "step": 8124000 + }, + { + "epoch": 23.52, + "learning_rate": 3.8245713111337535e-05, + "loss": 2.1534, + "step": 8124500 + }, + { + "epoch": 23.52, + "learning_rate": 3.824499091098555e-05, + "loss": 2.1684, + "step": 8125000 + }, + { + "epoch": 23.52, + "learning_rate": 3.824426726333827e-05, + "loss": 2.1778, + "step": 8125500 + }, + { + "epoch": 23.52, + "learning_rate": 3.8243543615690995e-05, + "loss": 2.1803, + "step": 8126000 + }, + { + "epoch": 23.52, + "learning_rate": 3.824281996804372e-05, + "loss": 2.1998, + "step": 8126500 + }, + { + "epoch": 23.52, + "learning_rate": 3.824209632039644e-05, + "loss": 2.1521, + "step": 8127000 + }, + { + "epoch": 23.53, + "learning_rate": 3.824137267274917e-05, + "loss": 2.1643, + "step": 8127500 + }, + { + "epoch": 23.53, + "learning_rate": 3.824064902510189e-05, + "loss": 2.1551, + "step": 8128000 + }, + { + "epoch": 23.53, + "learning_rate": 3.823992537745461e-05, + "loss": 2.1777, + "step": 8128500 + }, + { + "epoch": 23.53, + "learning_rate": 3.8239203177102636e-05, + "loss": 2.1739, + "step": 8129000 + }, + { + "epoch": 23.53, + "learning_rate": 3.823847952945536e-05, + "loss": 2.1587, + "step": 8129500 + }, + { + "epoch": 23.53, + "learning_rate": 3.823775732910337e-05, + "loss": 2.1541, + "step": 8130000 + }, + { + "epoch": 23.53, + "learning_rate": 3.8237033681456096e-05, + "loss": 2.1632, + "step": 8130500 + }, + { + "epoch": 23.54, + "learning_rate": 3.823631003380882e-05, + "loss": 2.1414, + "step": 8131000 + }, + { + "epoch": 23.54, + "learning_rate": 3.823558638616154e-05, + "loss": 2.1897, + "step": 8131500 + }, + { + "epoch": 23.54, + "learning_rate": 3.823486273851426e-05, + "loss": 2.15, + "step": 8132000 + }, + { + "epoch": 23.54, + "learning_rate": 3.823413909086699e-05, + "loss": 2.1688, + "step": 8132500 + }, + { + "epoch": 23.54, + "learning_rate": 3.8233415443219714e-05, + "loss": 2.1456, + "step": 8133000 + }, + { + "epoch": 23.54, + "learning_rate": 3.8232691795572436e-05, + "loss": 2.1744, + "step": 8133500 + }, + { + "epoch": 23.54, + "learning_rate": 3.823196814792516e-05, + "loss": 2.1384, + "step": 8134000 + }, + { + "epoch": 23.55, + "learning_rate": 3.823124450027789e-05, + "loss": 2.1718, + "step": 8134500 + }, + { + "epoch": 23.55, + "learning_rate": 3.823052085263061e-05, + "loss": 2.1671, + "step": 8135000 + }, + { + "epoch": 23.55, + "learning_rate": 3.822979720498333e-05, + "loss": 2.1511, + "step": 8135500 + }, + { + "epoch": 23.55, + "learning_rate": 3.8229073557336054e-05, + "loss": 2.1734, + "step": 8136000 + }, + { + "epoch": 23.55, + "learning_rate": 3.822835135698407e-05, + "loss": 2.1548, + "step": 8136500 + }, + { + "epoch": 23.55, + "learning_rate": 3.822762770933679e-05, + "loss": 2.1718, + "step": 8137000 + }, + { + "epoch": 23.55, + "learning_rate": 3.8226904061689514e-05, + "loss": 2.1741, + "step": 8137500 + }, + { + "epoch": 23.56, + "learning_rate": 3.822618041404224e-05, + "loss": 2.1555, + "step": 8138000 + }, + { + "epoch": 23.56, + "learning_rate": 3.8225456766394965e-05, + "loss": 2.1442, + "step": 8138500 + }, + { + "epoch": 23.56, + "learning_rate": 3.822473311874769e-05, + "loss": 2.1682, + "step": 8139000 + }, + { + "epoch": 23.56, + "learning_rate": 3.822400947110041e-05, + "loss": 2.1603, + "step": 8139500 + }, + { + "epoch": 23.56, + "learning_rate": 3.8223287270748425e-05, + "loss": 2.146, + "step": 8140000 + }, + { + "epoch": 23.56, + "learning_rate": 3.822256362310115e-05, + "loss": 2.1843, + "step": 8140500 + }, + { + "epoch": 23.56, + "learning_rate": 3.822183997545387e-05, + "loss": 2.1576, + "step": 8141000 + }, + { + "epoch": 23.57, + "learning_rate": 3.822111632780659e-05, + "loss": 2.1579, + "step": 8141500 + }, + { + "epoch": 23.57, + "learning_rate": 3.8220394127454614e-05, + "loss": 2.1499, + "step": 8142000 + }, + { + "epoch": 23.57, + "learning_rate": 3.821967047980734e-05, + "loss": 2.1718, + "step": 8142500 + }, + { + "epoch": 23.57, + "learning_rate": 3.8218946832160065e-05, + "loss": 2.1596, + "step": 8143000 + }, + { + "epoch": 23.57, + "learning_rate": 3.821822318451279e-05, + "loss": 2.1691, + "step": 8143500 + }, + { + "epoch": 23.57, + "learning_rate": 3.821749953686551e-05, + "loss": 2.1697, + "step": 8144000 + }, + { + "epoch": 23.57, + "learning_rate": 3.821677588921823e-05, + "loss": 2.1672, + "step": 8144500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821605368886625e-05, + "loss": 2.1731, + "step": 8145000 + }, + { + "epoch": 23.58, + "learning_rate": 3.821533004121897e-05, + "loss": 2.167, + "step": 8145500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821460639357169e-05, + "loss": 2.1822, + "step": 8146000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8213882745924414e-05, + "loss": 2.1531, + "step": 8146500 + }, + { + "epoch": 23.58, + "learning_rate": 3.821315909827714e-05, + "loss": 2.1819, + "step": 8147000 + }, + { + "epoch": 23.58, + "learning_rate": 3.8212435450629865e-05, + "loss": 2.1773, + "step": 8147500 + }, + { + "epoch": 23.59, + "learning_rate": 3.821171325027788e-05, + "loss": 2.1347, + "step": 8148000 + }, + { + "epoch": 23.59, + "learning_rate": 3.82109896026306e-05, + "loss": 2.1964, + "step": 8148500 + }, + { + "epoch": 23.59, + "learning_rate": 3.8210265954983325e-05, + "loss": 2.1456, + "step": 8149000 + }, + { + "epoch": 23.59, + "learning_rate": 3.8209542307336054e-05, + "loss": 2.166, + "step": 8149500 + }, + { + "epoch": 23.59, + "learning_rate": 3.820881865968878e-05, + "loss": 2.1605, + "step": 8150000 + }, + { + "epoch": 23.59, + "learning_rate": 3.820809645933679e-05, + "loss": 2.1586, + "step": 8150500 + }, + { + "epoch": 23.59, + "learning_rate": 3.8207374258984815e-05, + "loss": 2.1788, + "step": 8151000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820665061133754e-05, + "loss": 2.1458, + "step": 8151500 + }, + { + "epoch": 23.6, + "learning_rate": 3.820592696369026e-05, + "loss": 2.1648, + "step": 8152000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820520331604298e-05, + "loss": 2.1648, + "step": 8152500 + }, + { + "epoch": 23.6, + "learning_rate": 3.8204479668395704e-05, + "loss": 2.181, + "step": 8153000 + }, + { + "epoch": 23.6, + "learning_rate": 3.8203756020748426e-05, + "loss": 2.1581, + "step": 8153500 + }, + { + "epoch": 23.6, + "learning_rate": 3.820303237310115e-05, + "loss": 2.1623, + "step": 8154000 + }, + { + "epoch": 23.6, + "learning_rate": 3.820230872545387e-05, + "loss": 2.1895, + "step": 8154500 + }, + { + "epoch": 23.61, + "learning_rate": 3.820158507780659e-05, + "loss": 2.1604, + "step": 8155000 + }, + { + "epoch": 23.61, + "learning_rate": 3.820086143015932e-05, + "loss": 2.1569, + "step": 8155500 + }, + { + "epoch": 23.61, + "learning_rate": 3.8200137782512044e-05, + "loss": 2.1513, + "step": 8156000 + }, + { + "epoch": 23.61, + "learning_rate": 3.819941558216006e-05, + "loss": 2.1729, + "step": 8156500 + }, + { + "epoch": 23.61, + "learning_rate": 3.819869193451279e-05, + "loss": 2.1657, + "step": 8157000 + }, + { + "epoch": 23.61, + "learning_rate": 3.819796828686551e-05, + "loss": 2.1643, + "step": 8157500 + }, + { + "epoch": 23.61, + "learning_rate": 3.819724463921823e-05, + "loss": 2.1793, + "step": 8158000 + }, + { + "epoch": 23.62, + "learning_rate": 3.8196520991570955e-05, + "loss": 2.1531, + "step": 8158500 + }, + { + "epoch": 23.62, + "learning_rate": 3.819579879121897e-05, + "loss": 2.1506, + "step": 8159000 + }, + { + "epoch": 23.62, + "learning_rate": 3.819507514357169e-05, + "loss": 2.1548, + "step": 8159500 + }, + { + "epoch": 23.62, + "learning_rate": 3.819435149592442e-05, + "loss": 2.1676, + "step": 8160000 + }, + { + "epoch": 23.62, + "learning_rate": 3.8193627848277144e-05, + "loss": 2.1442, + "step": 8160500 + }, + { + "epoch": 23.62, + "learning_rate": 3.8192904200629866e-05, + "loss": 2.1657, + "step": 8161000 + }, + { + "epoch": 23.62, + "learning_rate": 3.819218055298259e-05, + "loss": 2.1817, + "step": 8161500 + }, + { + "epoch": 23.63, + "learning_rate": 3.819145690533531e-05, + "loss": 2.1683, + "step": 8162000 + }, + { + "epoch": 23.63, + "learning_rate": 3.819073325768803e-05, + "loss": 2.1698, + "step": 8162500 + }, + { + "epoch": 23.63, + "learning_rate": 3.8190009610040755e-05, + "loss": 2.1682, + "step": 8163000 + }, + { + "epoch": 23.63, + "learning_rate": 3.818928596239348e-05, + "loss": 2.1602, + "step": 8163500 + }, + { + "epoch": 23.63, + "learning_rate": 3.818856376204149e-05, + "loss": 2.1651, + "step": 8164000 + }, + { + "epoch": 23.63, + "learning_rate": 3.818784011439422e-05, + "loss": 2.1676, + "step": 8164500 + }, + { + "epoch": 23.63, + "learning_rate": 3.8187116466746944e-05, + "loss": 2.1715, + "step": 8165000 + }, + { + "epoch": 23.64, + "learning_rate": 3.818639281909967e-05, + "loss": 2.1686, + "step": 8165500 + }, + { + "epoch": 23.64, + "learning_rate": 3.8185669171452395e-05, + "loss": 2.1636, + "step": 8166000 + }, + { + "epoch": 23.64, + "learning_rate": 3.818494552380512e-05, + "loss": 2.1742, + "step": 8166500 + }, + { + "epoch": 23.64, + "learning_rate": 3.818422332345313e-05, + "loss": 2.1632, + "step": 8167000 + }, + { + "epoch": 23.64, + "learning_rate": 3.8183499675805855e-05, + "loss": 2.1405, + "step": 8167500 + }, + { + "epoch": 23.64, + "learning_rate": 3.818277602815858e-05, + "loss": 2.1556, + "step": 8168000 + }, + { + "epoch": 23.64, + "learning_rate": 3.81820523805113e-05, + "loss": 2.1711, + "step": 8168500 + }, + { + "epoch": 23.65, + "learning_rate": 3.818133018015932e-05, + "loss": 2.1692, + "step": 8169000 + }, + { + "epoch": 23.65, + "learning_rate": 3.8180606532512045e-05, + "loss": 2.1519, + "step": 8169500 + }, + { + "epoch": 23.65, + "learning_rate": 3.817988288486477e-05, + "loss": 2.1584, + "step": 8170000 + }, + { + "epoch": 23.65, + "learning_rate": 3.817915923721749e-05, + "loss": 2.1754, + "step": 8170500 + }, + { + "epoch": 23.65, + "learning_rate": 3.817843558957021e-05, + "loss": 2.1513, + "step": 8171000 + }, + { + "epoch": 23.65, + "learning_rate": 3.817771194192294e-05, + "loss": 2.1734, + "step": 8171500 + }, + { + "epoch": 23.65, + "learning_rate": 3.8176989741570956e-05, + "loss": 2.1654, + "step": 8172000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817626609392368e-05, + "loss": 2.1565, + "step": 8172500 + }, + { + "epoch": 23.66, + "learning_rate": 3.81755424462764e-05, + "loss": 2.1423, + "step": 8173000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817481879862912e-05, + "loss": 2.1542, + "step": 8173500 + }, + { + "epoch": 23.66, + "learning_rate": 3.8174096598277145e-05, + "loss": 2.1716, + "step": 8174000 + }, + { + "epoch": 23.66, + "learning_rate": 3.817337295062987e-05, + "loss": 2.1389, + "step": 8174500 + }, + { + "epoch": 23.66, + "learning_rate": 3.817265075027788e-05, + "loss": 2.1575, + "step": 8175000 + }, + { + "epoch": 23.66, + "learning_rate": 3.8171927102630605e-05, + "loss": 2.1508, + "step": 8175500 + }, + { + "epoch": 23.67, + "learning_rate": 3.817120345498333e-05, + "loss": 2.176, + "step": 8176000 + }, + { + "epoch": 23.67, + "learning_rate": 3.817047980733605e-05, + "loss": 2.1682, + "step": 8176500 + }, + { + "epoch": 23.67, + "learning_rate": 3.816975615968877e-05, + "loss": 2.1711, + "step": 8177000 + }, + { + "epoch": 23.67, + "learning_rate": 3.8169033959336794e-05, + "loss": 2.1749, + "step": 8177500 + }, + { + "epoch": 23.67, + "learning_rate": 3.816831175898481e-05, + "loss": 2.1356, + "step": 8178000 + }, + { + "epoch": 23.67, + "learning_rate": 3.816758811133753e-05, + "loss": 2.1597, + "step": 8178500 + }, + { + "epoch": 23.67, + "learning_rate": 3.8166864463690254e-05, + "loss": 2.1596, + "step": 8179000 + }, + { + "epoch": 23.68, + "learning_rate": 3.816614081604298e-05, + "loss": 2.1462, + "step": 8179500 + }, + { + "epoch": 23.68, + "learning_rate": 3.8165417168395705e-05, + "loss": 2.183, + "step": 8180000 + }, + { + "epoch": 23.68, + "learning_rate": 3.816469352074843e-05, + "loss": 2.1572, + "step": 8180500 + }, + { + "epoch": 23.68, + "learning_rate": 3.816396987310115e-05, + "loss": 2.1823, + "step": 8181000 + }, + { + "epoch": 23.68, + "learning_rate": 3.816324622545387e-05, + "loss": 2.165, + "step": 8181500 + }, + { + "epoch": 23.68, + "learning_rate": 3.81625225778066e-05, + "loss": 2.1467, + "step": 8182000 + }, + { + "epoch": 23.68, + "learning_rate": 3.816179893015932e-05, + "loss": 2.1667, + "step": 8182500 + }, + { + "epoch": 23.69, + "learning_rate": 3.8161075282512045e-05, + "loss": 2.1691, + "step": 8183000 + }, + { + "epoch": 23.69, + "learning_rate": 3.816035163486477e-05, + "loss": 2.1658, + "step": 8183500 + }, + { + "epoch": 23.69, + "learning_rate": 3.815962798721749e-05, + "loss": 2.1463, + "step": 8184000 + }, + { + "epoch": 23.69, + "learning_rate": 3.815890433957021e-05, + "loss": 2.1441, + "step": 8184500 + }, + { + "epoch": 23.69, + "learning_rate": 3.8158180691922934e-05, + "loss": 2.1675, + "step": 8185000 + }, + { + "epoch": 23.69, + "learning_rate": 3.8157457044275656e-05, + "loss": 2.1554, + "step": 8185500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815673484392367e-05, + "loss": 2.1299, + "step": 8186000 + }, + { + "epoch": 23.7, + "learning_rate": 3.81560111962764e-05, + "loss": 2.1299, + "step": 8186500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815528754862912e-05, + "loss": 2.1451, + "step": 8187000 + }, + { + "epoch": 23.7, + "learning_rate": 3.815456390098185e-05, + "loss": 2.1558, + "step": 8187500 + }, + { + "epoch": 23.7, + "learning_rate": 3.8153840253334575e-05, + "loss": 2.1581, + "step": 8188000 + }, + { + "epoch": 23.7, + "learning_rate": 3.81531166056873e-05, + "loss": 2.1481, + "step": 8188500 + }, + { + "epoch": 23.7, + "learning_rate": 3.815239440533531e-05, + "loss": 2.1685, + "step": 8189000 + }, + { + "epoch": 23.71, + "learning_rate": 3.8151670757688035e-05, + "loss": 2.165, + "step": 8189500 + }, + { + "epoch": 23.71, + "learning_rate": 3.815094855733605e-05, + "loss": 2.1563, + "step": 8190000 + }, + { + "epoch": 23.71, + "learning_rate": 3.815022490968877e-05, + "loss": 2.1851, + "step": 8190500 + }, + { + "epoch": 23.71, + "learning_rate": 3.81495012620415e-05, + "loss": 2.1733, + "step": 8191000 + }, + { + "epoch": 23.71, + "learning_rate": 3.8148777614394224e-05, + "loss": 2.153, + "step": 8191500 + }, + { + "epoch": 23.71, + "learning_rate": 3.8148053966746946e-05, + "loss": 2.1626, + "step": 8192000 + }, + { + "epoch": 23.71, + "learning_rate": 3.814733176639496e-05, + "loss": 2.1456, + "step": 8192500 + }, + { + "epoch": 23.72, + "learning_rate": 3.814660956604298e-05, + "loss": 2.1472, + "step": 8193000 + }, + { + "epoch": 23.72, + "learning_rate": 3.81458859183957e-05, + "loss": 2.1673, + "step": 8193500 + }, + { + "epoch": 23.72, + "learning_rate": 3.814516371804372e-05, + "loss": 2.1704, + "step": 8194000 + }, + { + "epoch": 23.72, + "learning_rate": 3.8144440070396444e-05, + "loss": 2.1626, + "step": 8194500 + }, + { + "epoch": 23.72, + "learning_rate": 3.8143717870044466e-05, + "loss": 2.1555, + "step": 8195000 + }, + { + "epoch": 23.72, + "learning_rate": 3.814299422239719e-05, + "loss": 2.1598, + "step": 8195500 + }, + { + "epoch": 23.72, + "learning_rate": 3.814227057474991e-05, + "loss": 2.1894, + "step": 8196000 + }, + { + "epoch": 23.73, + "learning_rate": 3.814154692710263e-05, + "loss": 2.1539, + "step": 8196500 + }, + { + "epoch": 23.73, + "learning_rate": 3.8140823279455355e-05, + "loss": 2.1522, + "step": 8197000 + }, + { + "epoch": 23.73, + "learning_rate": 3.814009963180808e-05, + "loss": 2.1872, + "step": 8197500 + }, + { + "epoch": 23.73, + "learning_rate": 3.81393759841608e-05, + "loss": 2.1452, + "step": 8198000 + }, + { + "epoch": 23.73, + "learning_rate": 3.813865233651353e-05, + "loss": 2.1418, + "step": 8198500 + }, + { + "epoch": 23.73, + "learning_rate": 3.813792868886625e-05, + "loss": 2.1533, + "step": 8199000 + }, + { + "epoch": 23.73, + "learning_rate": 3.813720504121897e-05, + "loss": 2.1705, + "step": 8199500 + }, + { + "epoch": 23.74, + "learning_rate": 3.8136481393571695e-05, + "loss": 2.1527, + "step": 8200000 + }, + { + "epoch": 23.74, + "learning_rate": 3.813575774592442e-05, + "loss": 2.1689, + "step": 8200500 + }, + { + "epoch": 23.74, + "learning_rate": 3.813503409827714e-05, + "loss": 2.1685, + "step": 8201000 + }, + { + "epoch": 23.74, + "learning_rate": 3.813431045062986e-05, + "loss": 2.1583, + "step": 8201500 + }, + { + "epoch": 23.74, + "learning_rate": 3.813358680298259e-05, + "loss": 2.1638, + "step": 8202000 + }, + { + "epoch": 23.74, + "learning_rate": 3.813286315533531e-05, + "loss": 2.1887, + "step": 8202500 + }, + { + "epoch": 23.74, + "learning_rate": 3.8132139507688035e-05, + "loss": 2.1681, + "step": 8203000 + }, + { + "epoch": 23.75, + "learning_rate": 3.813141586004076e-05, + "loss": 2.1706, + "step": 8203500 + }, + { + "epoch": 23.75, + "learning_rate": 3.813069221239348e-05, + "loss": 2.1541, + "step": 8204000 + }, + { + "epoch": 23.75, + "learning_rate": 3.81299700120415e-05, + "loss": 2.1435, + "step": 8204500 + }, + { + "epoch": 23.75, + "learning_rate": 3.8129246364394224e-05, + "loss": 2.17, + "step": 8205000 + }, + { + "epoch": 23.75, + "learning_rate": 3.8128522716746947e-05, + "loss": 2.1855, + "step": 8205500 + }, + { + "epoch": 23.75, + "learning_rate": 3.812779906909967e-05, + "loss": 2.1642, + "step": 8206000 + }, + { + "epoch": 23.75, + "learning_rate": 3.812707542145239e-05, + "loss": 2.1594, + "step": 8206500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812635177380511e-05, + "loss": 2.1621, + "step": 8207000 + }, + { + "epoch": 23.76, + "learning_rate": 3.8125628126157836e-05, + "loss": 2.1387, + "step": 8207500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812490447851056e-05, + "loss": 2.1518, + "step": 8208000 + }, + { + "epoch": 23.76, + "learning_rate": 3.812418083086328e-05, + "loss": 2.1803, + "step": 8208500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812345718321601e-05, + "loss": 2.1733, + "step": 8209000 + }, + { + "epoch": 23.76, + "learning_rate": 3.8122734982864025e-05, + "loss": 2.1558, + "step": 8209500 + }, + { + "epoch": 23.76, + "learning_rate": 3.812201278251205e-05, + "loss": 2.1262, + "step": 8210000 + }, + { + "epoch": 23.77, + "learning_rate": 3.812128913486477e-05, + "loss": 2.1506, + "step": 8210500 + }, + { + "epoch": 23.77, + "learning_rate": 3.812056548721749e-05, + "loss": 2.1664, + "step": 8211000 + }, + { + "epoch": 23.77, + "learning_rate": 3.8119841839570214e-05, + "loss": 2.1344, + "step": 8211500 + }, + { + "epoch": 23.77, + "learning_rate": 3.8119118191922936e-05, + "loss": 2.1596, + "step": 8212000 + }, + { + "epoch": 23.77, + "learning_rate": 3.811839454427566e-05, + "loss": 2.1568, + "step": 8212500 + }, + { + "epoch": 23.77, + "learning_rate": 3.811767089662838e-05, + "loss": 2.1704, + "step": 8213000 + }, + { + "epoch": 23.77, + "learning_rate": 3.8116950143571696e-05, + "loss": 2.1406, + "step": 8213500 + }, + { + "epoch": 23.78, + "learning_rate": 3.811622649592442e-05, + "loss": 2.1838, + "step": 8214000 + }, + { + "epoch": 23.78, + "learning_rate": 3.811550284827714e-05, + "loss": 2.1785, + "step": 8214500 + }, + { + "epoch": 23.78, + "learning_rate": 3.811477920062986e-05, + "loss": 2.1736, + "step": 8215000 + }, + { + "epoch": 23.78, + "learning_rate": 3.8114055552982585e-05, + "loss": 2.1585, + "step": 8215500 + }, + { + "epoch": 23.78, + "learning_rate": 3.811333190533531e-05, + "loss": 2.1485, + "step": 8216000 + }, + { + "epoch": 23.78, + "learning_rate": 3.811260825768803e-05, + "loss": 2.1771, + "step": 8216500 + }, + { + "epoch": 23.78, + "learning_rate": 3.811188461004076e-05, + "loss": 2.1565, + "step": 8217000 + }, + { + "epoch": 23.79, + "learning_rate": 3.811116096239348e-05, + "loss": 2.1305, + "step": 8217500 + }, + { + "epoch": 23.79, + "learning_rate": 3.81104373147462e-05, + "loss": 2.1484, + "step": 8218000 + }, + { + "epoch": 23.79, + "learning_rate": 3.810971366709893e-05, + "loss": 2.1589, + "step": 8218500 + }, + { + "epoch": 23.79, + "learning_rate": 3.8108990019451654e-05, + "loss": 2.1727, + "step": 8219000 + }, + { + "epoch": 23.79, + "learning_rate": 3.810826926639496e-05, + "loss": 2.1719, + "step": 8219500 + }, + { + "epoch": 23.79, + "learning_rate": 3.8107545618747685e-05, + "loss": 2.1549, + "step": 8220000 + }, + { + "epoch": 23.79, + "learning_rate": 3.810682197110041e-05, + "loss": 2.1434, + "step": 8220500 + }, + { + "epoch": 23.8, + "learning_rate": 3.810609832345313e-05, + "loss": 2.1574, + "step": 8221000 + }, + { + "epoch": 23.8, + "learning_rate": 3.810537467580585e-05, + "loss": 2.1528, + "step": 8221500 + }, + { + "epoch": 23.8, + "learning_rate": 3.810465102815858e-05, + "loss": 2.163, + "step": 8222000 + }, + { + "epoch": 23.8, + "learning_rate": 3.81039273805113e-05, + "loss": 2.1647, + "step": 8222500 + }, + { + "epoch": 23.8, + "learning_rate": 3.8103203732864025e-05, + "loss": 2.1577, + "step": 8223000 + }, + { + "epoch": 23.8, + "learning_rate": 3.810248008521675e-05, + "loss": 2.1901, + "step": 8223500 + }, + { + "epoch": 23.81, + "learning_rate": 3.8101756437569477e-05, + "loss": 2.1336, + "step": 8224000 + }, + { + "epoch": 23.81, + "learning_rate": 3.81010327899222e-05, + "loss": 2.1838, + "step": 8224500 + }, + { + "epoch": 23.81, + "learning_rate": 3.810030914227492e-05, + "loss": 2.1539, + "step": 8225000 + }, + { + "epoch": 23.81, + "learning_rate": 3.809958694192294e-05, + "loss": 2.1486, + "step": 8225500 + }, + { + "epoch": 23.81, + "learning_rate": 3.809886329427566e-05, + "loss": 2.1652, + "step": 8226000 + }, + { + "epoch": 23.81, + "learning_rate": 3.809813964662838e-05, + "loss": 2.1783, + "step": 8226500 + }, + { + "epoch": 23.81, + "learning_rate": 3.80974159989811e-05, + "loss": 2.1726, + "step": 8227000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809669235133383e-05, + "loss": 2.1881, + "step": 8227500 + }, + { + "epoch": 23.82, + "learning_rate": 3.8095968703686555e-05, + "loss": 2.1718, + "step": 8228000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809524505603928e-05, + "loss": 2.1634, + "step": 8228500 + }, + { + "epoch": 23.82, + "learning_rate": 3.8094521408392e-05, + "loss": 2.1803, + "step": 8229000 + }, + { + "epoch": 23.82, + "learning_rate": 3.809379776074472e-05, + "loss": 2.1676, + "step": 8229500 + }, + { + "epoch": 23.82, + "learning_rate": 3.8093074113097444e-05, + "loss": 2.1476, + "step": 8230000 + }, + { + "epoch": 23.82, + "learning_rate": 3.8092350465450166e-05, + "loss": 2.1511, + "step": 8230500 + }, + { + "epoch": 23.83, + "learning_rate": 3.809162826509818e-05, + "loss": 2.152, + "step": 8231000 + }, + { + "epoch": 23.83, + "learning_rate": 3.809090461745091e-05, + "loss": 2.151, + "step": 8231500 + }, + { + "epoch": 23.83, + "learning_rate": 3.809018096980363e-05, + "loss": 2.1798, + "step": 8232000 + }, + { + "epoch": 23.83, + "learning_rate": 3.8089457322156355e-05, + "loss": 2.154, + "step": 8232500 + }, + { + "epoch": 23.83, + "learning_rate": 3.8088733674509084e-05, + "loss": 2.176, + "step": 8233000 + }, + { + "epoch": 23.83, + "learning_rate": 3.80880114741571e-05, + "loss": 2.1372, + "step": 8233500 + }, + { + "epoch": 23.83, + "learning_rate": 3.808728782650982e-05, + "loss": 2.1523, + "step": 8234000 + }, + { + "epoch": 23.84, + "learning_rate": 3.8086564178862544e-05, + "loss": 2.1458, + "step": 8234500 + }, + { + "epoch": 23.84, + "learning_rate": 3.8085840531215266e-05, + "loss": 2.1608, + "step": 8235000 + }, + { + "epoch": 23.84, + "learning_rate": 3.808511688356799e-05, + "loss": 2.1817, + "step": 8235500 + }, + { + "epoch": 23.84, + "learning_rate": 3.808439323592071e-05, + "loss": 2.1701, + "step": 8236000 + }, + { + "epoch": 23.84, + "learning_rate": 3.808367103556873e-05, + "loss": 2.1682, + "step": 8236500 + }, + { + "epoch": 23.84, + "learning_rate": 3.808294883521675e-05, + "loss": 2.1494, + "step": 8237000 + }, + { + "epoch": 23.84, + "learning_rate": 3.8082226634864764e-05, + "loss": 2.1437, + "step": 8237500 + }, + { + "epoch": 23.85, + "learning_rate": 3.8081502987217486e-05, + "loss": 2.1672, + "step": 8238000 + }, + { + "epoch": 23.85, + "learning_rate": 3.808077933957021e-05, + "loss": 2.1494, + "step": 8238500 + }, + { + "epoch": 23.85, + "learning_rate": 3.808005569192294e-05, + "loss": 2.1635, + "step": 8239000 + }, + { + "epoch": 23.85, + "learning_rate": 3.807933349157096e-05, + "loss": 2.1551, + "step": 8239500 + }, + { + "epoch": 23.85, + "learning_rate": 3.807860984392368e-05, + "loss": 2.1856, + "step": 8240000 + }, + { + "epoch": 23.85, + "learning_rate": 3.8077886196276404e-05, + "loss": 2.1547, + "step": 8240500 + }, + { + "epoch": 23.85, + "learning_rate": 3.8077162548629126e-05, + "loss": 2.1465, + "step": 8241000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807643890098185e-05, + "loss": 2.1627, + "step": 8241500 + }, + { + "epoch": 23.86, + "learning_rate": 3.807571525333457e-05, + "loss": 2.1772, + "step": 8242000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807499160568729e-05, + "loss": 2.1372, + "step": 8242500 + }, + { + "epoch": 23.86, + "learning_rate": 3.8074267958040015e-05, + "loss": 2.1626, + "step": 8243000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807354431039274e-05, + "loss": 2.1571, + "step": 8243500 + }, + { + "epoch": 23.86, + "learning_rate": 3.807282066274546e-05, + "loss": 2.1608, + "step": 8244000 + }, + { + "epoch": 23.86, + "learning_rate": 3.807209701509818e-05, + "loss": 2.1878, + "step": 8244500 + }, + { + "epoch": 23.87, + "learning_rate": 3.8071373367450904e-05, + "loss": 2.1578, + "step": 8245000 + }, + { + "epoch": 23.87, + "learning_rate": 3.807065116709893e-05, + "loss": 2.1823, + "step": 8245500 + }, + { + "epoch": 23.87, + "learning_rate": 3.806992896674694e-05, + "loss": 2.1539, + "step": 8246000 + }, + { + "epoch": 23.87, + "learning_rate": 3.806920531909967e-05, + "loss": 2.181, + "step": 8246500 + }, + { + "epoch": 23.87, + "learning_rate": 3.8068481671452393e-05, + "loss": 2.1546, + "step": 8247000 + }, + { + "epoch": 23.87, + "learning_rate": 3.806775947110041e-05, + "loss": 2.1418, + "step": 8247500 + }, + { + "epoch": 23.87, + "learning_rate": 3.806703582345313e-05, + "loss": 2.1687, + "step": 8248000 + }, + { + "epoch": 23.88, + "learning_rate": 3.806631217580586e-05, + "loss": 2.1731, + "step": 8248500 + }, + { + "epoch": 23.88, + "learning_rate": 3.806558852815858e-05, + "loss": 2.18, + "step": 8249000 + }, + { + "epoch": 23.88, + "learning_rate": 3.8064864880511305e-05, + "loss": 2.1846, + "step": 8249500 + }, + { + "epoch": 23.88, + "learning_rate": 3.806414123286403e-05, + "loss": 2.1556, + "step": 8250000 + }, + { + "epoch": 23.88, + "learning_rate": 3.806341758521675e-05, + "loss": 2.1636, + "step": 8250500 + }, + { + "epoch": 23.88, + "learning_rate": 3.806269393756947e-05, + "loss": 2.1484, + "step": 8251000 + }, + { + "epoch": 23.88, + "learning_rate": 3.8061970289922194e-05, + "loss": 2.141, + "step": 8251500 + }, + { + "epoch": 23.89, + "learning_rate": 3.8061246642274916e-05, + "loss": 2.1538, + "step": 8252000 + }, + { + "epoch": 23.89, + "learning_rate": 3.806052299462764e-05, + "loss": 2.1404, + "step": 8252500 + }, + { + "epoch": 23.89, + "learning_rate": 3.805980079427566e-05, + "loss": 2.1597, + "step": 8253000 + }, + { + "epoch": 23.89, + "learning_rate": 3.805907714662838e-05, + "loss": 2.1495, + "step": 8253500 + }, + { + "epoch": 23.89, + "learning_rate": 3.805835349898111e-05, + "loss": 2.1631, + "step": 8254000 + }, + { + "epoch": 23.89, + "learning_rate": 3.8057629851333834e-05, + "loss": 2.161, + "step": 8254500 + }, + { + "epoch": 23.89, + "learning_rate": 3.8056906203686556e-05, + "loss": 2.1557, + "step": 8255000 + }, + { + "epoch": 23.9, + "learning_rate": 3.805618255603928e-05, + "loss": 2.1608, + "step": 8255500 + }, + { + "epoch": 23.9, + "learning_rate": 3.8055458908392e-05, + "loss": 2.1414, + "step": 8256000 + }, + { + "epoch": 23.9, + "learning_rate": 3.805473526074472e-05, + "loss": 2.156, + "step": 8256500 + }, + { + "epoch": 23.9, + "learning_rate": 3.8054011613097445e-05, + "loss": 2.1595, + "step": 8257000 + }, + { + "epoch": 23.9, + "learning_rate": 3.805328796545017e-05, + "loss": 2.1569, + "step": 8257500 + }, + { + "epoch": 23.9, + "learning_rate": 3.805256431780289e-05, + "loss": 2.1546, + "step": 8258000 + }, + { + "epoch": 23.9, + "learning_rate": 3.805184067015561e-05, + "loss": 2.1463, + "step": 8258500 + }, + { + "epoch": 23.91, + "learning_rate": 3.8051118469803634e-05, + "loss": 2.1783, + "step": 8259000 + }, + { + "epoch": 23.91, + "learning_rate": 3.8050394822156356e-05, + "loss": 2.1647, + "step": 8259500 + }, + { + "epoch": 23.91, + "learning_rate": 3.804967262180437e-05, + "loss": 2.1527, + "step": 8260000 + }, + { + "epoch": 23.91, + "learning_rate": 3.804895042145239e-05, + "loss": 2.1926, + "step": 8260500 + }, + { + "epoch": 23.91, + "learning_rate": 3.804822677380511e-05, + "loss": 2.1554, + "step": 8261000 + }, + { + "epoch": 23.91, + "learning_rate": 3.804750312615784e-05, + "loss": 2.1503, + "step": 8261500 + }, + { + "epoch": 23.92, + "learning_rate": 3.804677947851056e-05, + "loss": 2.1311, + "step": 8262000 + }, + { + "epoch": 23.92, + "learning_rate": 3.804605583086328e-05, + "loss": 2.1653, + "step": 8262500 + }, + { + "epoch": 23.92, + "learning_rate": 3.804533218321601e-05, + "loss": 2.1615, + "step": 8263000 + }, + { + "epoch": 23.92, + "learning_rate": 3.8044608535568734e-05, + "loss": 2.1605, + "step": 8263500 + }, + { + "epoch": 23.92, + "learning_rate": 3.804388488792146e-05, + "loss": 2.1875, + "step": 8264000 + }, + { + "epoch": 23.92, + "learning_rate": 3.804316124027418e-05, + "loss": 2.1689, + "step": 8264500 + }, + { + "epoch": 23.92, + "learning_rate": 3.80424375926269e-05, + "loss": 2.1572, + "step": 8265000 + }, + { + "epoch": 23.93, + "learning_rate": 3.804171539227492e-05, + "loss": 2.1518, + "step": 8265500 + }, + { + "epoch": 23.93, + "learning_rate": 3.804099174462764e-05, + "loss": 2.1638, + "step": 8266000 + }, + { + "epoch": 23.93, + "learning_rate": 3.804026809698036e-05, + "loss": 2.1563, + "step": 8266500 + }, + { + "epoch": 23.93, + "learning_rate": 3.8039545896628383e-05, + "loss": 2.191, + "step": 8267000 + }, + { + "epoch": 23.93, + "learning_rate": 3.8038822248981106e-05, + "loss": 2.15, + "step": 8267500 + }, + { + "epoch": 23.93, + "learning_rate": 3.803809860133383e-05, + "loss": 2.1866, + "step": 8268000 + }, + { + "epoch": 23.93, + "learning_rate": 3.803737495368655e-05, + "loss": 2.164, + "step": 8268500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803665130603928e-05, + "loss": 2.1533, + "step": 8269000 + }, + { + "epoch": 23.94, + "learning_rate": 3.8035927658392e-05, + "loss": 2.1681, + "step": 8269500 + }, + { + "epoch": 23.94, + "learning_rate": 3.8035204010744724e-05, + "loss": 2.1631, + "step": 8270000 + }, + { + "epoch": 23.94, + "learning_rate": 3.803448181039274e-05, + "loss": 2.1604, + "step": 8270500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803375816274546e-05, + "loss": 2.172, + "step": 8271000 + }, + { + "epoch": 23.94, + "learning_rate": 3.8033034515098184e-05, + "loss": 2.1431, + "step": 8271500 + }, + { + "epoch": 23.94, + "learning_rate": 3.803231086745091e-05, + "loss": 2.1359, + "step": 8272000 + }, + { + "epoch": 23.95, + "learning_rate": 3.803158866709893e-05, + "loss": 2.1798, + "step": 8272500 + }, + { + "epoch": 23.95, + "learning_rate": 3.803086501945165e-05, + "loss": 2.1435, + "step": 8273000 + }, + { + "epoch": 23.95, + "learning_rate": 3.803014137180437e-05, + "loss": 2.1544, + "step": 8273500 + }, + { + "epoch": 23.95, + "learning_rate": 3.8029417724157095e-05, + "loss": 2.1764, + "step": 8274000 + }, + { + "epoch": 23.95, + "learning_rate": 3.802869407650982e-05, + "loss": 2.1387, + "step": 8274500 + }, + { + "epoch": 23.95, + "learning_rate": 3.802797042886254e-05, + "loss": 2.1784, + "step": 8275000 + }, + { + "epoch": 23.95, + "learning_rate": 3.802724678121526e-05, + "loss": 2.1562, + "step": 8275500 + }, + { + "epoch": 23.96, + "learning_rate": 3.8026523133567984e-05, + "loss": 2.1731, + "step": 8276000 + }, + { + "epoch": 23.96, + "learning_rate": 3.802579948592071e-05, + "loss": 2.1758, + "step": 8276500 + }, + { + "epoch": 23.96, + "learning_rate": 3.8025077285568735e-05, + "loss": 2.1705, + "step": 8277000 + }, + { + "epoch": 23.96, + "learning_rate": 3.802435363792146e-05, + "loss": 2.1576, + "step": 8277500 + }, + { + "epoch": 23.96, + "learning_rate": 3.802362999027418e-05, + "loss": 2.1841, + "step": 8278000 + }, + { + "epoch": 23.96, + "learning_rate": 3.80229063426269e-05, + "loss": 2.1704, + "step": 8278500 + }, + { + "epoch": 23.96, + "learning_rate": 3.802218414227492e-05, + "loss": 2.1693, + "step": 8279000 + }, + { + "epoch": 23.97, + "learning_rate": 3.802146049462764e-05, + "loss": 2.1698, + "step": 8279500 + }, + { + "epoch": 23.97, + "learning_rate": 3.802073684698036e-05, + "loss": 2.1376, + "step": 8280000 + }, + { + "epoch": 23.97, + "learning_rate": 3.802001319933309e-05, + "loss": 2.1496, + "step": 8280500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801928955168581e-05, + "loss": 2.1751, + "step": 8281000 + }, + { + "epoch": 23.97, + "learning_rate": 3.801856735133383e-05, + "loss": 2.1772, + "step": 8281500 + }, + { + "epoch": 23.97, + "learning_rate": 3.801784370368655e-05, + "loss": 2.1202, + "step": 8282000 + }, + { + "epoch": 23.97, + "learning_rate": 3.801712005603927e-05, + "loss": 2.1796, + "step": 8282500 + }, + { + "epoch": 23.98, + "learning_rate": 3.8016396408391995e-05, + "loss": 2.1609, + "step": 8283000 + }, + { + "epoch": 23.98, + "learning_rate": 3.801567276074472e-05, + "loss": 2.1416, + "step": 8283500 + }, + { + "epoch": 23.98, + "learning_rate": 3.801495056039274e-05, + "loss": 2.1584, + "step": 8284000 + }, + { + "epoch": 23.98, + "learning_rate": 3.801422836004076e-05, + "loss": 2.1732, + "step": 8284500 + }, + { + "epoch": 23.98, + "learning_rate": 3.8013504712393485e-05, + "loss": 2.1624, + "step": 8285000 + }, + { + "epoch": 23.98, + "learning_rate": 3.801278106474621e-05, + "loss": 2.1438, + "step": 8285500 + }, + { + "epoch": 23.98, + "learning_rate": 3.801205741709893e-05, + "loss": 2.1604, + "step": 8286000 + }, + { + "epoch": 23.99, + "learning_rate": 3.801133376945165e-05, + "loss": 2.1592, + "step": 8286500 + }, + { + "epoch": 23.99, + "learning_rate": 3.8010610121804374e-05, + "loss": 2.1515, + "step": 8287000 + }, + { + "epoch": 23.99, + "learning_rate": 3.8009886474157096e-05, + "loss": 2.1628, + "step": 8287500 + }, + { + "epoch": 23.99, + "learning_rate": 3.800916282650982e-05, + "loss": 2.1588, + "step": 8288000 + }, + { + "epoch": 23.99, + "learning_rate": 3.800843917886254e-05, + "loss": 2.1775, + "step": 8288500 + }, + { + "epoch": 23.99, + "learning_rate": 3.800771553121526e-05, + "loss": 2.1663, + "step": 8289000 + }, + { + "epoch": 23.99, + "learning_rate": 3.800699188356799e-05, + "loss": 2.1489, + "step": 8289500 + }, + { + "epoch": 24.0, + "learning_rate": 3.8006268235920714e-05, + "loss": 2.1821, + "step": 8290000 + }, + { + "epoch": 24.0, + "learning_rate": 3.8005544588273436e-05, + "loss": 2.15, + "step": 8290500 + }, + { + "epoch": 24.0, + "learning_rate": 3.8004820940626165e-05, + "loss": 2.1397, + "step": 8291000 + }, + { + "epoch": 24.0, + "eval_accuracy": 0.6629079772621248, + "eval_accuracy_mlm": 0.6270114574392218, + "eval_accuracy_nsp": 0.8555338622228944, + "eval_loss": 2.2102925777435303, + "eval_runtime": 331.9145, + "eval_samples_per_second": 1314.754, + "eval_steps_per_second": 54.782, + "step": 8291328 + }, + { + "epoch": 24.0, + "learning_rate": 3.800409874027418e-05, + "loss": 2.1363, + "step": 8291500 + }, + { + "epoch": 24.0, + "learning_rate": 3.80033750926269e-05, + "loss": 2.1397, + "step": 8292000 + }, + { + "epoch": 24.0, + "learning_rate": 3.8002651444979625e-05, + "loss": 2.1261, + "step": 8292500 + }, + { + "epoch": 24.0, + "learning_rate": 3.800192779733235e-05, + "loss": 2.1153, + "step": 8293000 + }, + { + "epoch": 24.01, + "learning_rate": 3.800120559698036e-05, + "loss": 2.1465, + "step": 8293500 + }, + { + "epoch": 24.01, + "learning_rate": 3.800048194933309e-05, + "loss": 2.1581, + "step": 8294000 + }, + { + "epoch": 24.01, + "learning_rate": 3.7999758301685814e-05, + "loss": 2.1546, + "step": 8294500 + }, + { + "epoch": 24.01, + "learning_rate": 3.7999034654038536e-05, + "loss": 2.1238, + "step": 8295000 + }, + { + "epoch": 24.01, + "learning_rate": 3.799831245368655e-05, + "loss": 2.1491, + "step": 8295500 + }, + { + "epoch": 24.01, + "learning_rate": 3.7997588806039274e-05, + "loss": 2.1534, + "step": 8296000 + }, + { + "epoch": 24.01, + "learning_rate": 3.7996865158391996e-05, + "loss": 2.1569, + "step": 8296500 + }, + { + "epoch": 24.02, + "learning_rate": 3.799614151074472e-05, + "loss": 2.162, + "step": 8297000 + }, + { + "epoch": 24.02, + "learning_rate": 3.799541786309744e-05, + "loss": 2.1316, + "step": 8297500 + }, + { + "epoch": 24.02, + "learning_rate": 3.799469566274546e-05, + "loss": 2.1299, + "step": 8298000 + }, + { + "epoch": 24.02, + "learning_rate": 3.7993972015098185e-05, + "loss": 2.1527, + "step": 8298500 + }, + { + "epoch": 24.02, + "learning_rate": 3.799324981474621e-05, + "loss": 2.1406, + "step": 8299000 + }, + { + "epoch": 24.02, + "learning_rate": 3.799252616709893e-05, + "loss": 2.1478, + "step": 8299500 + }, + { + "epoch": 24.03, + "learning_rate": 3.799180251945165e-05, + "loss": 2.1649, + "step": 8300000 + }, + { + "epoch": 24.03, + "learning_rate": 3.7991078871804374e-05, + "loss": 2.127, + "step": 8300500 + }, + { + "epoch": 24.03, + "learning_rate": 3.7990355224157097e-05, + "loss": 2.1331, + "step": 8301000 + }, + { + "epoch": 24.03, + "learning_rate": 3.798963157650982e-05, + "loss": 2.1607, + "step": 8301500 + }, + { + "epoch": 24.03, + "learning_rate": 3.798890792886254e-05, + "loss": 2.1315, + "step": 8302000 + }, + { + "epoch": 24.03, + "learning_rate": 3.798818428121526e-05, + "loss": 2.1581, + "step": 8302500 + }, + { + "epoch": 24.03, + "learning_rate": 3.798746063356799e-05, + "loss": 2.1367, + "step": 8303000 + }, + { + "epoch": 24.04, + "learning_rate": 3.798673843321601e-05, + "loss": 2.1365, + "step": 8303500 + }, + { + "epoch": 24.04, + "learning_rate": 3.798601478556873e-05, + "loss": 2.1485, + "step": 8304000 + }, + { + "epoch": 24.04, + "learning_rate": 3.798529113792145e-05, + "loss": 2.1408, + "step": 8304500 + }, + { + "epoch": 24.04, + "learning_rate": 3.7984567490274175e-05, + "loss": 2.1714, + "step": 8305000 + }, + { + "epoch": 24.04, + "learning_rate": 3.79838438426269e-05, + "loss": 2.1472, + "step": 8305500 + }, + { + "epoch": 24.04, + "learning_rate": 3.798312019497962e-05, + "loss": 2.1618, + "step": 8306000 + }, + { + "epoch": 24.04, + "learning_rate": 3.798239654733235e-05, + "loss": 2.1577, + "step": 8306500 + }, + { + "epoch": 24.05, + "learning_rate": 3.798167289968507e-05, + "loss": 2.1283, + "step": 8307000 + }, + { + "epoch": 24.05, + "learning_rate": 3.798094925203779e-05, + "loss": 2.1504, + "step": 8307500 + }, + { + "epoch": 24.05, + "learning_rate": 3.798022849898111e-05, + "loss": 2.1256, + "step": 8308000 + }, + { + "epoch": 24.05, + "learning_rate": 3.7979506298629124e-05, + "loss": 2.1087, + "step": 8308500 + }, + { + "epoch": 24.05, + "learning_rate": 3.7978782650981846e-05, + "loss": 2.1414, + "step": 8309000 + }, + { + "epoch": 24.05, + "learning_rate": 3.797805900333457e-05, + "loss": 2.1258, + "step": 8309500 + }, + { + "epoch": 24.05, + "learning_rate": 3.797733535568729e-05, + "loss": 2.1683, + "step": 8310000 + }, + { + "epoch": 24.06, + "learning_rate": 3.797661170804002e-05, + "loss": 2.1434, + "step": 8310500 + }, + { + "epoch": 24.06, + "learning_rate": 3.797588806039274e-05, + "loss": 2.1418, + "step": 8311000 + }, + { + "epoch": 24.06, + "learning_rate": 3.7975164412745464e-05, + "loss": 2.1152, + "step": 8311500 + }, + { + "epoch": 24.06, + "learning_rate": 3.7974440765098186e-05, + "loss": 2.1548, + "step": 8312000 + }, + { + "epoch": 24.06, + "learning_rate": 3.79737185647462e-05, + "loss": 2.1485, + "step": 8312500 + }, + { + "epoch": 24.06, + "learning_rate": 3.7972994917098924e-05, + "loss": 2.141, + "step": 8313000 + }, + { + "epoch": 24.06, + "learning_rate": 3.7972271269451646e-05, + "loss": 2.1313, + "step": 8313500 + }, + { + "epoch": 24.07, + "learning_rate": 3.7971547621804375e-05, + "loss": 2.1218, + "step": 8314000 + }, + { + "epoch": 24.07, + "learning_rate": 3.797082542145239e-05, + "loss": 2.156, + "step": 8314500 + }, + { + "epoch": 24.07, + "learning_rate": 3.797010177380512e-05, + "loss": 2.1518, + "step": 8315000 + }, + { + "epoch": 24.07, + "learning_rate": 3.796937812615784e-05, + "loss": 2.1754, + "step": 8315500 + }, + { + "epoch": 24.07, + "learning_rate": 3.7968654478510564e-05, + "loss": 2.162, + "step": 8316000 + }, + { + "epoch": 24.07, + "learning_rate": 3.7967930830863286e-05, + "loss": 2.1538, + "step": 8316500 + }, + { + "epoch": 24.07, + "learning_rate": 3.796720718321601e-05, + "loss": 2.133, + "step": 8317000 + }, + { + "epoch": 24.08, + "learning_rate": 3.7966484982864024e-05, + "loss": 2.1546, + "step": 8317500 + }, + { + "epoch": 24.08, + "learning_rate": 3.7965761335216746e-05, + "loss": 2.1505, + "step": 8318000 + }, + { + "epoch": 24.08, + "learning_rate": 3.796503768756947e-05, + "loss": 2.1261, + "step": 8318500 + }, + { + "epoch": 24.08, + "learning_rate": 3.796431403992219e-05, + "loss": 2.1514, + "step": 8319000 + }, + { + "epoch": 24.08, + "learning_rate": 3.796359039227492e-05, + "loss": 2.1407, + "step": 8319500 + }, + { + "epoch": 24.08, + "learning_rate": 3.7962868191922935e-05, + "loss": 2.1468, + "step": 8320000 + }, + { + "epoch": 24.08, + "learning_rate": 3.796214454427566e-05, + "loss": 2.1241, + "step": 8320500 + }, + { + "epoch": 24.09, + "learning_rate": 3.796142089662838e-05, + "loss": 2.1218, + "step": 8321000 + }, + { + "epoch": 24.09, + "learning_rate": 3.796069724898111e-05, + "loss": 2.1301, + "step": 8321500 + }, + { + "epoch": 24.09, + "learning_rate": 3.795997360133383e-05, + "loss": 2.1362, + "step": 8322000 + }, + { + "epoch": 24.09, + "learning_rate": 3.795925140098185e-05, + "loss": 2.161, + "step": 8322500 + }, + { + "epoch": 24.09, + "learning_rate": 3.795852775333457e-05, + "loss": 2.154, + "step": 8323000 + }, + { + "epoch": 24.09, + "learning_rate": 3.795780555298259e-05, + "loss": 2.1502, + "step": 8323500 + }, + { + "epoch": 24.09, + "learning_rate": 3.7957081905335313e-05, + "loss": 2.1301, + "step": 8324000 + }, + { + "epoch": 24.1, + "learning_rate": 3.7956358257688036e-05, + "loss": 2.1366, + "step": 8324500 + }, + { + "epoch": 24.1, + "learning_rate": 3.795563461004076e-05, + "loss": 2.1506, + "step": 8325000 + }, + { + "epoch": 24.1, + "learning_rate": 3.795491096239348e-05, + "loss": 2.1507, + "step": 8325500 + }, + { + "epoch": 24.1, + "learning_rate": 3.79541873147462e-05, + "loss": 2.1491, + "step": 8326000 + }, + { + "epoch": 24.1, + "learning_rate": 3.7953463667098925e-05, + "loss": 2.1503, + "step": 8326500 + }, + { + "epoch": 24.1, + "learning_rate": 3.795274001945165e-05, + "loss": 2.1587, + "step": 8327000 + }, + { + "epoch": 24.1, + "learning_rate": 3.795201637180437e-05, + "loss": 2.1578, + "step": 8327500 + }, + { + "epoch": 24.11, + "learning_rate": 3.795129417145239e-05, + "loss": 2.1488, + "step": 8328000 + }, + { + "epoch": 24.11, + "learning_rate": 3.7950570523805114e-05, + "loss": 2.1581, + "step": 8328500 + }, + { + "epoch": 24.11, + "learning_rate": 3.794984687615784e-05, + "loss": 2.1502, + "step": 8329000 + }, + { + "epoch": 24.11, + "learning_rate": 3.7949123228510565e-05, + "loss": 2.1713, + "step": 8329500 + }, + { + "epoch": 24.11, + "learning_rate": 3.794840102815858e-05, + "loss": 2.1542, + "step": 8330000 + }, + { + "epoch": 24.11, + "learning_rate": 3.79476773805113e-05, + "loss": 2.138, + "step": 8330500 + }, + { + "epoch": 24.11, + "learning_rate": 3.7946953732864025e-05, + "loss": 2.1514, + "step": 8331000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794623008521675e-05, + "loss": 2.1401, + "step": 8331500 + }, + { + "epoch": 24.12, + "learning_rate": 3.794550643756947e-05, + "loss": 2.1389, + "step": 8332000 + }, + { + "epoch": 24.12, + "learning_rate": 3.79447827899222e-05, + "loss": 2.1453, + "step": 8332500 + }, + { + "epoch": 24.12, + "learning_rate": 3.794405914227492e-05, + "loss": 2.1255, + "step": 8333000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794333549462764e-05, + "loss": 2.1395, + "step": 8333500 + }, + { + "epoch": 24.12, + "learning_rate": 3.7942611846980365e-05, + "loss": 2.1536, + "step": 8334000 + }, + { + "epoch": 24.12, + "learning_rate": 3.794188819933309e-05, + "loss": 2.1509, + "step": 8334500 + }, + { + "epoch": 24.13, + "learning_rate": 3.7941167446276396e-05, + "loss": 2.145, + "step": 8335000 + }, + { + "epoch": 24.13, + "learning_rate": 3.794044379862912e-05, + "loss": 2.1287, + "step": 8335500 + }, + { + "epoch": 24.13, + "learning_rate": 3.793972159827714e-05, + "loss": 2.1377, + "step": 8336000 + }, + { + "epoch": 24.13, + "learning_rate": 3.793899795062986e-05, + "loss": 2.1452, + "step": 8336500 + }, + { + "epoch": 24.13, + "learning_rate": 3.793827430298259e-05, + "loss": 2.1314, + "step": 8337000 + }, + { + "epoch": 24.13, + "learning_rate": 3.7937550655335314e-05, + "loss": 2.1252, + "step": 8337500 + }, + { + "epoch": 24.14, + "learning_rate": 3.7936827007688037e-05, + "loss": 2.18, + "step": 8338000 + }, + { + "epoch": 24.14, + "learning_rate": 3.793610336004076e-05, + "loss": 2.145, + "step": 8338500 + }, + { + "epoch": 24.14, + "learning_rate": 3.793537971239348e-05, + "loss": 2.1529, + "step": 8339000 + }, + { + "epoch": 24.14, + "learning_rate": 3.79346560647462e-05, + "loss": 2.1396, + "step": 8339500 + }, + { + "epoch": 24.14, + "learning_rate": 3.7933932417098925e-05, + "loss": 2.1374, + "step": 8340000 + }, + { + "epoch": 24.14, + "learning_rate": 3.793320876945165e-05, + "loss": 2.1499, + "step": 8340500 + }, + { + "epoch": 24.14, + "learning_rate": 3.793248656909967e-05, + "loss": 2.1372, + "step": 8341000 + }, + { + "epoch": 24.15, + "learning_rate": 3.793176292145239e-05, + "loss": 2.1557, + "step": 8341500 + }, + { + "epoch": 24.15, + "learning_rate": 3.7931039273805114e-05, + "loss": 2.1511, + "step": 8342000 + }, + { + "epoch": 24.15, + "learning_rate": 3.793031562615784e-05, + "loss": 2.1651, + "step": 8342500 + }, + { + "epoch": 24.15, + "learning_rate": 3.792959342580585e-05, + "loss": 2.1475, + "step": 8343000 + }, + { + "epoch": 24.15, + "learning_rate": 3.7928869778158575e-05, + "loss": 2.1299, + "step": 8343500 + }, + { + "epoch": 24.15, + "learning_rate": 3.7928146130511304e-05, + "loss": 2.1563, + "step": 8344000 + }, + { + "epoch": 24.15, + "learning_rate": 3.7927422482864026e-05, + "loss": 2.1483, + "step": 8344500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792669883521675e-05, + "loss": 2.154, + "step": 8345000 + }, + { + "epoch": 24.16, + "learning_rate": 3.792597518756947e-05, + "loss": 2.1513, + "step": 8345500 + }, + { + "epoch": 24.16, + "learning_rate": 3.79252515399222e-05, + "loss": 2.131, + "step": 8346000 + }, + { + "epoch": 24.16, + "learning_rate": 3.792452789227492e-05, + "loss": 2.136, + "step": 8346500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792380569192294e-05, + "loss": 2.1469, + "step": 8347000 + }, + { + "epoch": 24.16, + "learning_rate": 3.792308204427566e-05, + "loss": 2.1459, + "step": 8347500 + }, + { + "epoch": 24.16, + "learning_rate": 3.792235839662838e-05, + "loss": 2.1374, + "step": 8348000 + }, + { + "epoch": 24.17, + "learning_rate": 3.7921634748981104e-05, + "loss": 2.1338, + "step": 8348500 + }, + { + "epoch": 24.17, + "learning_rate": 3.7920911101333826e-05, + "loss": 2.1399, + "step": 8349000 + }, + { + "epoch": 24.17, + "learning_rate": 3.792018745368655e-05, + "loss": 2.1486, + "step": 8349500 + }, + { + "epoch": 24.17, + "learning_rate": 3.791946380603927e-05, + "loss": 2.1324, + "step": 8350000 + }, + { + "epoch": 24.17, + "learning_rate": 3.7918740158392e-05, + "loss": 2.1359, + "step": 8350500 + }, + { + "epoch": 24.17, + "learning_rate": 3.791801651074472e-05, + "loss": 2.1622, + "step": 8351000 + }, + { + "epoch": 24.17, + "learning_rate": 3.7917294310392744e-05, + "loss": 2.1389, + "step": 8351500 + }, + { + "epoch": 24.18, + "learning_rate": 3.7916570662745466e-05, + "loss": 2.147, + "step": 8352000 + }, + { + "epoch": 24.18, + "learning_rate": 3.791584701509819e-05, + "loss": 2.1553, + "step": 8352500 + }, + { + "epoch": 24.18, + "learning_rate": 3.791512336745091e-05, + "loss": 2.1577, + "step": 8353000 + }, + { + "epoch": 24.18, + "learning_rate": 3.791439971980363e-05, + "loss": 2.151, + "step": 8353500 + }, + { + "epoch": 24.18, + "learning_rate": 3.7913676072156355e-05, + "loss": 2.1455, + "step": 8354000 + }, + { + "epoch": 24.18, + "learning_rate": 3.791295242450908e-05, + "loss": 2.1295, + "step": 8354500 + }, + { + "epoch": 24.18, + "learning_rate": 3.79122287768618e-05, + "loss": 2.1686, + "step": 8355000 + }, + { + "epoch": 24.19, + "learning_rate": 3.791150512921452e-05, + "loss": 2.1363, + "step": 8355500 + }, + { + "epoch": 24.19, + "learning_rate": 3.791078148156725e-05, + "loss": 2.1717, + "step": 8356000 + }, + { + "epoch": 24.19, + "learning_rate": 3.791005783391997e-05, + "loss": 2.1328, + "step": 8356500 + }, + { + "epoch": 24.19, + "learning_rate": 3.790933708086328e-05, + "loss": 2.1363, + "step": 8357000 + }, + { + "epoch": 24.19, + "learning_rate": 3.7908613433216004e-05, + "loss": 2.1395, + "step": 8357500 + }, + { + "epoch": 24.19, + "learning_rate": 3.7907889785568726e-05, + "loss": 2.1482, + "step": 8358000 + }, + { + "epoch": 24.19, + "learning_rate": 3.790716613792145e-05, + "loss": 2.1789, + "step": 8358500 + }, + { + "epoch": 24.2, + "learning_rate": 3.790644249027418e-05, + "loss": 2.1602, + "step": 8359000 + }, + { + "epoch": 24.2, + "learning_rate": 3.79057188426269e-05, + "loss": 2.1632, + "step": 8359500 + }, + { + "epoch": 24.2, + "learning_rate": 3.790499519497963e-05, + "loss": 2.1464, + "step": 8360000 + }, + { + "epoch": 24.2, + "learning_rate": 3.790427154733235e-05, + "loss": 2.144, + "step": 8360500 + }, + { + "epoch": 24.2, + "learning_rate": 3.7903547899685073e-05, + "loss": 2.1492, + "step": 8361000 + }, + { + "epoch": 24.2, + "learning_rate": 3.790282569933309e-05, + "loss": 2.1499, + "step": 8361500 + }, + { + "epoch": 24.2, + "learning_rate": 3.790210205168581e-05, + "loss": 2.152, + "step": 8362000 + }, + { + "epoch": 24.21, + "learning_rate": 3.7901378404038533e-05, + "loss": 2.1433, + "step": 8362500 + }, + { + "epoch": 24.21, + "learning_rate": 3.7900654756391256e-05, + "loss": 2.1359, + "step": 8363000 + }, + { + "epoch": 24.21, + "learning_rate": 3.789993110874398e-05, + "loss": 2.1379, + "step": 8363500 + }, + { + "epoch": 24.21, + "learning_rate": 3.7899208908392e-05, + "loss": 2.1399, + "step": 8364000 + }, + { + "epoch": 24.21, + "learning_rate": 3.789848526074472e-05, + "loss": 2.1406, + "step": 8364500 + }, + { + "epoch": 24.21, + "learning_rate": 3.7897761613097445e-05, + "loss": 2.1426, + "step": 8365000 + }, + { + "epoch": 24.21, + "learning_rate": 3.7897040860040754e-05, + "loss": 2.125, + "step": 8365500 + }, + { + "epoch": 24.22, + "learning_rate": 3.7896317212393476e-05, + "loss": 2.1415, + "step": 8366000 + }, + { + "epoch": 24.22, + "learning_rate": 3.7895593564746205e-05, + "loss": 2.143, + "step": 8366500 + }, + { + "epoch": 24.22, + "learning_rate": 3.789486991709893e-05, + "loss": 2.1408, + "step": 8367000 + }, + { + "epoch": 24.22, + "learning_rate": 3.789414626945165e-05, + "loss": 2.1394, + "step": 8367500 + }, + { + "epoch": 24.22, + "learning_rate": 3.789342262180438e-05, + "loss": 2.1476, + "step": 8368000 + }, + { + "epoch": 24.22, + "learning_rate": 3.78926989741571e-05, + "loss": 2.1441, + "step": 8368500 + }, + { + "epoch": 24.22, + "learning_rate": 3.789197532650982e-05, + "loss": 2.153, + "step": 8369000 + }, + { + "epoch": 24.23, + "learning_rate": 3.7891251678862545e-05, + "loss": 2.1368, + "step": 8369500 + }, + { + "epoch": 24.23, + "learning_rate": 3.789052803121527e-05, + "loss": 2.1512, + "step": 8370000 + }, + { + "epoch": 24.23, + "learning_rate": 3.788980438356799e-05, + "loss": 2.1435, + "step": 8370500 + }, + { + "epoch": 24.23, + "learning_rate": 3.788908073592071e-05, + "loss": 2.1428, + "step": 8371000 + }, + { + "epoch": 24.23, + "learning_rate": 3.7888357088273434e-05, + "loss": 2.1456, + "step": 8371500 + }, + { + "epoch": 24.23, + "learning_rate": 3.7887633440626156e-05, + "loss": 2.1354, + "step": 8372000 + }, + { + "epoch": 24.23, + "learning_rate": 3.788690979297888e-05, + "loss": 2.1351, + "step": 8372500 + }, + { + "epoch": 24.24, + "learning_rate": 3.78861861453316e-05, + "loss": 2.1369, + "step": 8373000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788546249768433e-05, + "loss": 2.1479, + "step": 8373500 + }, + { + "epoch": 24.24, + "learning_rate": 3.788474029733235e-05, + "loss": 2.1492, + "step": 8374000 + }, + { + "epoch": 24.24, + "learning_rate": 3.7884016649685074e-05, + "loss": 2.1351, + "step": 8374500 + }, + { + "epoch": 24.24, + "learning_rate": 3.7883293002037796e-05, + "loss": 2.1281, + "step": 8375000 + }, + { + "epoch": 24.24, + "learning_rate": 3.788256935439052e-05, + "loss": 2.1383, + "step": 8375500 + }, + { + "epoch": 24.25, + "learning_rate": 3.788184570674324e-05, + "loss": 2.1312, + "step": 8376000 + }, + { + "epoch": 24.25, + "learning_rate": 3.7881123506391256e-05, + "loss": 2.1453, + "step": 8376500 + }, + { + "epoch": 24.25, + "learning_rate": 3.788039985874398e-05, + "loss": 2.1364, + "step": 8377000 + }, + { + "epoch": 24.25, + "learning_rate": 3.78796762110967e-05, + "loss": 2.1765, + "step": 8377500 + }, + { + "epoch": 24.25, + "learning_rate": 3.787895256344943e-05, + "loss": 2.1426, + "step": 8378000 + }, + { + "epoch": 24.25, + "learning_rate": 3.7878230363097445e-05, + "loss": 2.1264, + "step": 8378500 + }, + { + "epoch": 24.25, + "learning_rate": 3.787750816274546e-05, + "loss": 2.1494, + "step": 8379000 + }, + { + "epoch": 24.26, + "learning_rate": 3.7876785962393477e-05, + "loss": 2.145, + "step": 8379500 + }, + { + "epoch": 24.26, + "learning_rate": 3.7876062314746206e-05, + "loss": 2.13, + "step": 8380000 + }, + { + "epoch": 24.26, + "learning_rate": 3.787533866709893e-05, + "loss": 2.1509, + "step": 8380500 + }, + { + "epoch": 24.26, + "learning_rate": 3.787461501945165e-05, + "loss": 2.1585, + "step": 8381000 + }, + { + "epoch": 24.26, + "learning_rate": 3.787389137180438e-05, + "loss": 2.146, + "step": 8381500 + }, + { + "epoch": 24.26, + "learning_rate": 3.78731677241571e-05, + "loss": 2.1405, + "step": 8382000 + }, + { + "epoch": 24.26, + "learning_rate": 3.787244552380512e-05, + "loss": 2.1364, + "step": 8382500 + }, + { + "epoch": 24.27, + "learning_rate": 3.787172187615784e-05, + "loss": 2.1505, + "step": 8383000 + }, + { + "epoch": 24.27, + "learning_rate": 3.787099822851056e-05, + "loss": 2.1586, + "step": 8383500 + }, + { + "epoch": 24.27, + "learning_rate": 3.7870274580863284e-05, + "loss": 2.1387, + "step": 8384000 + }, + { + "epoch": 24.27, + "learning_rate": 3.7869550933216006e-05, + "loss": 2.1475, + "step": 8384500 + }, + { + "epoch": 24.27, + "learning_rate": 3.786882728556873e-05, + "loss": 2.1466, + "step": 8385000 + }, + { + "epoch": 24.27, + "learning_rate": 3.786810363792146e-05, + "loss": 2.1278, + "step": 8385500 + }, + { + "epoch": 24.27, + "learning_rate": 3.786737999027418e-05, + "loss": 2.1429, + "step": 8386000 + }, + { + "epoch": 24.28, + "learning_rate": 3.78666563426269e-05, + "loss": 2.1506, + "step": 8386500 + }, + { + "epoch": 24.28, + "learning_rate": 3.7865932694979624e-05, + "loss": 2.1569, + "step": 8387000 + }, + { + "epoch": 24.28, + "learning_rate": 3.7865209047332346e-05, + "loss": 2.1404, + "step": 8387500 + }, + { + "epoch": 24.28, + "learning_rate": 3.786448539968507e-05, + "loss": 2.1653, + "step": 8388000 + }, + { + "epoch": 24.28, + "learning_rate": 3.786376464662838e-05, + "loss": 2.1593, + "step": 8388500 + }, + { + "epoch": 24.28, + "learning_rate": 3.7863040998981106e-05, + "loss": 2.1451, + "step": 8389000 + }, + { + "epoch": 24.28, + "learning_rate": 3.786231735133383e-05, + "loss": 2.1534, + "step": 8389500 + }, + { + "epoch": 24.29, + "learning_rate": 3.786159370368656e-05, + "loss": 2.1465, + "step": 8390000 + }, + { + "epoch": 24.29, + "learning_rate": 3.786087005603928e-05, + "loss": 2.1331, + "step": 8390500 + }, + { + "epoch": 24.29, + "learning_rate": 3.7860146408392e-05, + "loss": 2.1543, + "step": 8391000 + }, + { + "epoch": 24.29, + "learning_rate": 3.7859422760744724e-05, + "loss": 2.1694, + "step": 8391500 + }, + { + "epoch": 24.29, + "learning_rate": 3.7858699113097446e-05, + "loss": 2.1353, + "step": 8392000 + }, + { + "epoch": 24.29, + "learning_rate": 3.785797546545017e-05, + "loss": 2.1492, + "step": 8392500 + }, + { + "epoch": 24.29, + "learning_rate": 3.785725181780289e-05, + "loss": 2.1494, + "step": 8393000 + }, + { + "epoch": 24.3, + "learning_rate": 3.785652817015561e-05, + "loss": 2.1425, + "step": 8393500 + }, + { + "epoch": 24.3, + "learning_rate": 3.7855804522508335e-05, + "loss": 2.1467, + "step": 8394000 + }, + { + "epoch": 24.3, + "learning_rate": 3.785508232215636e-05, + "loss": 2.1427, + "step": 8394500 + }, + { + "epoch": 24.3, + "learning_rate": 3.785435867450908e-05, + "loss": 2.1347, + "step": 8395000 + }, + { + "epoch": 24.3, + "learning_rate": 3.78536350268618e-05, + "loss": 2.1628, + "step": 8395500 + }, + { + "epoch": 24.3, + "learning_rate": 3.785291282650982e-05, + "loss": 2.1373, + "step": 8396000 + }, + { + "epoch": 24.3, + "learning_rate": 3.7852189178862547e-05, + "loss": 2.1618, + "step": 8396500 + }, + { + "epoch": 24.31, + "learning_rate": 3.785146697851056e-05, + "loss": 2.1584, + "step": 8397000 + }, + { + "epoch": 24.31, + "learning_rate": 3.7850743330863284e-05, + "loss": 2.1398, + "step": 8397500 + }, + { + "epoch": 24.31, + "learning_rate": 3.7850019683216007e-05, + "loss": 2.1548, + "step": 8398000 + }, + { + "epoch": 24.31, + "learning_rate": 3.784929603556873e-05, + "loss": 2.1873, + "step": 8398500 + }, + { + "epoch": 24.31, + "learning_rate": 3.784857238792146e-05, + "loss": 2.141, + "step": 8399000 + }, + { + "epoch": 24.31, + "learning_rate": 3.784784874027418e-05, + "loss": 2.1438, + "step": 8399500 + }, + { + "epoch": 24.31, + "learning_rate": 3.78471250926269e-05, + "loss": 2.156, + "step": 8400000 + }, + { + "epoch": 24.32, + "learning_rate": 3.7846401444979625e-05, + "loss": 2.1435, + "step": 8400500 + }, + { + "epoch": 24.32, + "learning_rate": 3.784567779733235e-05, + "loss": 2.1652, + "step": 8401000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784495414968507e-05, + "loss": 2.1485, + "step": 8401500 + }, + { + "epoch": 24.32, + "learning_rate": 3.7844231949333085e-05, + "loss": 2.1684, + "step": 8402000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784350830168581e-05, + "loss": 2.1509, + "step": 8402500 + }, + { + "epoch": 24.32, + "learning_rate": 3.784278465403853e-05, + "loss": 2.141, + "step": 8403000 + }, + { + "epoch": 24.32, + "learning_rate": 3.784206100639126e-05, + "loss": 2.1532, + "step": 8403500 + }, + { + "epoch": 24.33, + "learning_rate": 3.784133735874398e-05, + "loss": 2.1508, + "step": 8404000 + }, + { + "epoch": 24.33, + "learning_rate": 3.784061371109671e-05, + "loss": 2.1503, + "step": 8404500 + }, + { + "epoch": 24.33, + "learning_rate": 3.783989006344943e-05, + "loss": 2.1654, + "step": 8405000 + }, + { + "epoch": 24.33, + "learning_rate": 3.7839166415802154e-05, + "loss": 2.1598, + "step": 8405500 + }, + { + "epoch": 24.33, + "learning_rate": 3.7838442768154876e-05, + "loss": 2.1418, + "step": 8406000 + }, + { + "epoch": 24.33, + "learning_rate": 3.78377191205076e-05, + "loss": 2.1587, + "step": 8406500 + }, + { + "epoch": 24.33, + "learning_rate": 3.783699547286032e-05, + "loss": 2.1497, + "step": 8407000 + }, + { + "epoch": 24.34, + "learning_rate": 3.7836273272508336e-05, + "loss": 2.1368, + "step": 8407500 + }, + { + "epoch": 24.34, + "learning_rate": 3.783554962486106e-05, + "loss": 2.1602, + "step": 8408000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783482742450908e-05, + "loss": 2.1556, + "step": 8408500 + }, + { + "epoch": 24.34, + "learning_rate": 3.78341037768618e-05, + "loss": 2.1496, + "step": 8409000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783338157650982e-05, + "loss": 2.1522, + "step": 8409500 + }, + { + "epoch": 24.34, + "learning_rate": 3.783265792886254e-05, + "loss": 2.1293, + "step": 8410000 + }, + { + "epoch": 24.34, + "learning_rate": 3.783193428121526e-05, + "loss": 2.1526, + "step": 8410500 + }, + { + "epoch": 24.35, + "learning_rate": 3.7831210633567985e-05, + "loss": 2.1673, + "step": 8411000 + }, + { + "epoch": 24.35, + "learning_rate": 3.7830486985920714e-05, + "loss": 2.1437, + "step": 8411500 + }, + { + "epoch": 24.35, + "learning_rate": 3.7829763338273436e-05, + "loss": 2.15, + "step": 8412000 + }, + { + "epoch": 24.35, + "learning_rate": 3.782903969062616e-05, + "loss": 2.1523, + "step": 8412500 + }, + { + "epoch": 24.35, + "learning_rate": 3.782831749027418e-05, + "loss": 2.1173, + "step": 8413000 + }, + { + "epoch": 24.35, + "learning_rate": 3.78275938426269e-05, + "loss": 2.1669, + "step": 8413500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782687164227492e-05, + "loss": 2.1469, + "step": 8414000 + }, + { + "epoch": 24.36, + "learning_rate": 3.782614799462764e-05, + "loss": 2.1393, + "step": 8414500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782542434698036e-05, + "loss": 2.1285, + "step": 8415000 + }, + { + "epoch": 24.36, + "learning_rate": 3.7824700699333085e-05, + "loss": 2.1708, + "step": 8415500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782397705168581e-05, + "loss": 2.1573, + "step": 8416000 + }, + { + "epoch": 24.36, + "learning_rate": 3.782325485133383e-05, + "loss": 2.1815, + "step": 8416500 + }, + { + "epoch": 24.36, + "learning_rate": 3.782253120368655e-05, + "loss": 2.1518, + "step": 8417000 + }, + { + "epoch": 24.37, + "learning_rate": 3.7821807556039274e-05, + "loss": 2.1402, + "step": 8417500 + }, + { + "epoch": 24.37, + "learning_rate": 3.7821083908392e-05, + "loss": 2.1504, + "step": 8418000 + }, + { + "epoch": 24.37, + "learning_rate": 3.782036026074472e-05, + "loss": 2.1609, + "step": 8418500 + }, + { + "epoch": 24.37, + "learning_rate": 3.781963661309745e-05, + "loss": 2.1662, + "step": 8419000 + }, + { + "epoch": 24.37, + "learning_rate": 3.781891296545017e-05, + "loss": 2.1435, + "step": 8419500 + }, + { + "epoch": 24.37, + "learning_rate": 3.781818931780289e-05, + "loss": 2.1426, + "step": 8420000 + }, + { + "epoch": 24.37, + "learning_rate": 3.7817465670155615e-05, + "loss": 2.1394, + "step": 8420500 + }, + { + "epoch": 24.38, + "learning_rate": 3.781674202250834e-05, + "loss": 2.1541, + "step": 8421000 + }, + { + "epoch": 24.38, + "learning_rate": 3.781601837486106e-05, + "loss": 2.1589, + "step": 8421500 + }, + { + "epoch": 24.38, + "learning_rate": 3.781529472721379e-05, + "loss": 2.1702, + "step": 8422000 + }, + { + "epoch": 24.38, + "learning_rate": 3.781457107956651e-05, + "loss": 2.1515, + "step": 8422500 + }, + { + "epoch": 24.38, + "learning_rate": 3.781384743191923e-05, + "loss": 2.1466, + "step": 8423000 + }, + { + "epoch": 24.38, + "learning_rate": 3.7813123784271955e-05, + "loss": 2.1413, + "step": 8423500 + }, + { + "epoch": 24.38, + "learning_rate": 3.781240013662468e-05, + "loss": 2.1335, + "step": 8424000 + }, + { + "epoch": 24.39, + "learning_rate": 3.78116764889774e-05, + "loss": 2.1279, + "step": 8424500 + }, + { + "epoch": 24.39, + "learning_rate": 3.781095284133012e-05, + "loss": 2.1441, + "step": 8425000 + }, + { + "epoch": 24.39, + "learning_rate": 3.7810229193682844e-05, + "loss": 2.1484, + "step": 8425500 + }, + { + "epoch": 24.39, + "learning_rate": 3.7809506993330866e-05, + "loss": 2.152, + "step": 8426000 + }, + { + "epoch": 24.39, + "learning_rate": 3.780878334568359e-05, + "loss": 2.1665, + "step": 8426500 + }, + { + "epoch": 24.39, + "learning_rate": 3.780805969803631e-05, + "loss": 2.1696, + "step": 8427000 + }, + { + "epoch": 24.39, + "learning_rate": 3.780733749768433e-05, + "loss": 2.1409, + "step": 8427500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780661529733235e-05, + "loss": 2.1578, + "step": 8428000 + }, + { + "epoch": 24.4, + "learning_rate": 3.780589164968507e-05, + "loss": 2.163, + "step": 8428500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780516800203779e-05, + "loss": 2.1503, + "step": 8429000 + }, + { + "epoch": 24.4, + "learning_rate": 3.7804444354390515e-05, + "loss": 2.1647, + "step": 8429500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780372070674324e-05, + "loss": 2.1648, + "step": 8430000 + }, + { + "epoch": 24.4, + "learning_rate": 3.780299705909596e-05, + "loss": 2.1327, + "step": 8430500 + }, + { + "epoch": 24.4, + "learning_rate": 3.780227341144869e-05, + "loss": 2.1431, + "step": 8431000 + }, + { + "epoch": 24.41, + "learning_rate": 3.780154976380141e-05, + "loss": 2.1654, + "step": 8431500 + }, + { + "epoch": 24.41, + "learning_rate": 3.780082611615413e-05, + "loss": 2.153, + "step": 8432000 + }, + { + "epoch": 24.41, + "learning_rate": 3.7800102468506855e-05, + "loss": 2.1527, + "step": 8432500 + }, + { + "epoch": 24.41, + "learning_rate": 3.779937882085958e-05, + "loss": 2.147, + "step": 8433000 + }, + { + "epoch": 24.41, + "learning_rate": 3.77986566205076e-05, + "loss": 2.1484, + "step": 8433500 + }, + { + "epoch": 24.41, + "learning_rate": 3.779793297286032e-05, + "loss": 2.1595, + "step": 8434000 + }, + { + "epoch": 24.41, + "learning_rate": 3.7797209325213044e-05, + "loss": 2.1551, + "step": 8434500 + }, + { + "epoch": 24.42, + "learning_rate": 3.7796485677565767e-05, + "loss": 2.1628, + "step": 8435000 + }, + { + "epoch": 24.42, + "learning_rate": 3.779576202991849e-05, + "loss": 2.1486, + "step": 8435500 + }, + { + "epoch": 24.42, + "learning_rate": 3.779503838227121e-05, + "loss": 2.1471, + "step": 8436000 + }, + { + "epoch": 24.42, + "learning_rate": 3.779431473462394e-05, + "loss": 2.1442, + "step": 8436500 + }, + { + "epoch": 24.42, + "learning_rate": 3.779359108697666e-05, + "loss": 2.1657, + "step": 8437000 + }, + { + "epoch": 24.42, + "learning_rate": 3.7792867439329384e-05, + "loss": 2.1852, + "step": 8437500 + }, + { + "epoch": 24.42, + "learning_rate": 3.779214379168211e-05, + "loss": 2.1666, + "step": 8438000 + }, + { + "epoch": 24.43, + "learning_rate": 3.779142014403483e-05, + "loss": 2.1416, + "step": 8438500 + }, + { + "epoch": 24.43, + "learning_rate": 3.779069649638755e-05, + "loss": 2.157, + "step": 8439000 + }, + { + "epoch": 24.43, + "learning_rate": 3.7789972848740273e-05, + "loss": 2.1641, + "step": 8439500 + }, + { + "epoch": 24.43, + "learning_rate": 3.778925209568359e-05, + "loss": 2.1303, + "step": 8440000 + }, + { + "epoch": 24.43, + "learning_rate": 3.778852844803631e-05, + "loss": 2.1406, + "step": 8440500 + }, + { + "epoch": 24.43, + "learning_rate": 3.778780480038904e-05, + "loss": 2.1593, + "step": 8441000 + }, + { + "epoch": 24.43, + "learning_rate": 3.778708115274176e-05, + "loss": 2.1471, + "step": 8441500 + }, + { + "epoch": 24.44, + "learning_rate": 3.7786357505094485e-05, + "loss": 2.1387, + "step": 8442000 + }, + { + "epoch": 24.44, + "learning_rate": 3.778563385744721e-05, + "loss": 2.1386, + "step": 8442500 + }, + { + "epoch": 24.44, + "learning_rate": 3.778491165709522e-05, + "loss": 2.148, + "step": 8443000 + }, + { + "epoch": 24.44, + "learning_rate": 3.7784188009447945e-05, + "loss": 2.1408, + "step": 8443500 + }, + { + "epoch": 24.44, + "learning_rate": 3.778346436180067e-05, + "loss": 2.1449, + "step": 8444000 + }, + { + "epoch": 24.44, + "learning_rate": 3.778274071415339e-05, + "loss": 2.1464, + "step": 8444500 + }, + { + "epoch": 24.44, + "learning_rate": 3.778201851380141e-05, + "loss": 2.1662, + "step": 8445000 + }, + { + "epoch": 24.45, + "learning_rate": 3.7781294866154134e-05, + "loss": 2.1528, + "step": 8445500 + }, + { + "epoch": 24.45, + "learning_rate": 3.7780571218506856e-05, + "loss": 2.1194, + "step": 8446000 + }, + { + "epoch": 24.45, + "learning_rate": 3.777984901815487e-05, + "loss": 2.1515, + "step": 8446500 + }, + { + "epoch": 24.45, + "learning_rate": 3.7779125370507594e-05, + "loss": 2.1565, + "step": 8447000 + }, + { + "epoch": 24.45, + "learning_rate": 3.7778401722860316e-05, + "loss": 2.1509, + "step": 8447500 + }, + { + "epoch": 24.45, + "learning_rate": 3.777767807521304e-05, + "loss": 2.157, + "step": 8448000 + }, + { + "epoch": 24.45, + "learning_rate": 3.777695442756577e-05, + "loss": 2.1513, + "step": 8448500 + }, + { + "epoch": 24.46, + "learning_rate": 3.777623077991849e-05, + "loss": 2.1479, + "step": 8449000 + }, + { + "epoch": 24.46, + "learning_rate": 3.777550713227121e-05, + "loss": 2.159, + "step": 8449500 + }, + { + "epoch": 24.46, + "learning_rate": 3.777478348462394e-05, + "loss": 2.1536, + "step": 8450000 + }, + { + "epoch": 24.46, + "learning_rate": 3.7774061284271956e-05, + "loss": 2.1424, + "step": 8450500 + }, + { + "epoch": 24.46, + "learning_rate": 3.777333763662468e-05, + "loss": 2.1505, + "step": 8451000 + }, + { + "epoch": 24.46, + "learning_rate": 3.77726139889774e-05, + "loss": 2.1396, + "step": 8451500 + }, + { + "epoch": 24.47, + "learning_rate": 3.7771891788625416e-05, + "loss": 2.1605, + "step": 8452000 + }, + { + "epoch": 24.47, + "learning_rate": 3.777116814097814e-05, + "loss": 2.1354, + "step": 8452500 + }, + { + "epoch": 24.47, + "learning_rate": 3.777044449333087e-05, + "loss": 2.1648, + "step": 8453000 + }, + { + "epoch": 24.47, + "learning_rate": 3.776972084568359e-05, + "loss": 2.1515, + "step": 8453500 + }, + { + "epoch": 24.47, + "learning_rate": 3.776899719803631e-05, + "loss": 2.1708, + "step": 8454000 + }, + { + "epoch": 24.47, + "learning_rate": 3.7768273550389034e-05, + "loss": 2.1387, + "step": 8454500 + }, + { + "epoch": 24.47, + "learning_rate": 3.7767549902741757e-05, + "loss": 2.1645, + "step": 8455000 + }, + { + "epoch": 24.48, + "learning_rate": 3.7766826255094486e-05, + "loss": 2.1645, + "step": 8455500 + }, + { + "epoch": 24.48, + "learning_rate": 3.776610260744721e-05, + "loss": 2.1446, + "step": 8456000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776537895979993e-05, + "loss": 2.1566, + "step": 8456500 + }, + { + "epoch": 24.48, + "learning_rate": 3.776465531215265e-05, + "loss": 2.1508, + "step": 8457000 + }, + { + "epoch": 24.48, + "learning_rate": 3.7763931664505374e-05, + "loss": 2.1704, + "step": 8457500 + }, + { + "epoch": 24.48, + "learning_rate": 3.77632080168581e-05, + "loss": 2.1594, + "step": 8458000 + }, + { + "epoch": 24.48, + "learning_rate": 3.776248436921082e-05, + "loss": 2.1443, + "step": 8458500 + }, + { + "epoch": 24.49, + "learning_rate": 3.776176072156354e-05, + "loss": 2.1675, + "step": 8459000 + }, + { + "epoch": 24.49, + "learning_rate": 3.7761037073916263e-05, + "loss": 2.159, + "step": 8459500 + }, + { + "epoch": 24.49, + "learning_rate": 3.776031342626899e-05, + "loss": 2.1595, + "step": 8460000 + }, + { + "epoch": 24.49, + "learning_rate": 3.7759589778621715e-05, + "loss": 2.1358, + "step": 8460500 + }, + { + "epoch": 24.49, + "learning_rate": 3.775886613097444e-05, + "loss": 2.1686, + "step": 8461000 + }, + { + "epoch": 24.49, + "learning_rate": 3.775814248332716e-05, + "loss": 2.1126, + "step": 8461500 + }, + { + "epoch": 24.49, + "learning_rate": 3.775741883567988e-05, + "loss": 2.126, + "step": 8462000 + }, + { + "epoch": 24.5, + "learning_rate": 3.775669808262319e-05, + "loss": 2.1548, + "step": 8462500 + }, + { + "epoch": 24.5, + "learning_rate": 3.775597443497592e-05, + "loss": 2.1439, + "step": 8463000 + }, + { + "epoch": 24.5, + "learning_rate": 3.775525078732864e-05, + "loss": 2.1254, + "step": 8463500 + }, + { + "epoch": 24.5, + "learning_rate": 3.775452713968137e-05, + "loss": 2.1541, + "step": 8464000 + }, + { + "epoch": 24.5, + "learning_rate": 3.775380349203409e-05, + "loss": 2.1311, + "step": 8464500 + }, + { + "epoch": 24.5, + "learning_rate": 3.7753079844386815e-05, + "loss": 2.1514, + "step": 8465000 + }, + { + "epoch": 24.5, + "learning_rate": 3.775235764403483e-05, + "loss": 2.1432, + "step": 8465500 + }, + { + "epoch": 24.51, + "learning_rate": 3.775163399638755e-05, + "loss": 2.1862, + "step": 8466000 + }, + { + "epoch": 24.51, + "learning_rate": 3.7750910348740275e-05, + "loss": 2.1487, + "step": 8466500 + }, + { + "epoch": 24.51, + "learning_rate": 3.7750186701093e-05, + "loss": 2.1608, + "step": 8467000 + }, + { + "epoch": 24.51, + "learning_rate": 3.774946305344572e-05, + "loss": 2.1695, + "step": 8467500 + }, + { + "epoch": 24.51, + "learning_rate": 3.774874085309374e-05, + "loss": 2.168, + "step": 8468000 + }, + { + "epoch": 24.51, + "learning_rate": 3.7748017205446464e-05, + "loss": 2.1523, + "step": 8468500 + }, + { + "epoch": 24.51, + "learning_rate": 3.774729500509448e-05, + "loss": 2.14, + "step": 8469000 + }, + { + "epoch": 24.52, + "learning_rate": 3.77465713574472e-05, + "loss": 2.1614, + "step": 8469500 + }, + { + "epoch": 24.52, + "learning_rate": 3.7745847709799924e-05, + "loss": 2.1773, + "step": 8470000 + }, + { + "epoch": 24.52, + "learning_rate": 3.774512406215265e-05, + "loss": 2.165, + "step": 8470500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774440186180067e-05, + "loss": 2.1444, + "step": 8471000 + }, + { + "epoch": 24.52, + "learning_rate": 3.774367821415339e-05, + "loss": 2.1514, + "step": 8471500 + }, + { + "epoch": 24.52, + "learning_rate": 3.774295456650612e-05, + "loss": 2.1656, + "step": 8472000 + }, + { + "epoch": 24.52, + "learning_rate": 3.774223091885884e-05, + "loss": 2.1391, + "step": 8472500 + }, + { + "epoch": 24.53, + "learning_rate": 3.774150871850686e-05, + "loss": 2.1551, + "step": 8473000 + }, + { + "epoch": 24.53, + "learning_rate": 3.774078507085958e-05, + "loss": 2.1434, + "step": 8473500 + }, + { + "epoch": 24.53, + "learning_rate": 3.77400614232123e-05, + "loss": 2.1786, + "step": 8474000 + }, + { + "epoch": 24.53, + "learning_rate": 3.7739337775565024e-05, + "loss": 2.1434, + "step": 8474500 + }, + { + "epoch": 24.53, + "learning_rate": 3.7738614127917747e-05, + "loss": 2.1644, + "step": 8475000 + }, + { + "epoch": 24.53, + "learning_rate": 3.773789048027047e-05, + "loss": 2.1711, + "step": 8475500 + }, + { + "epoch": 24.53, + "learning_rate": 3.773716683262319e-05, + "loss": 2.1603, + "step": 8476000 + }, + { + "epoch": 24.54, + "learning_rate": 3.773644463227121e-05, + "loss": 2.1337, + "step": 8476500 + }, + { + "epoch": 24.54, + "learning_rate": 3.7735720984623936e-05, + "loss": 2.1638, + "step": 8477000 + }, + { + "epoch": 24.54, + "learning_rate": 3.773499733697666e-05, + "loss": 2.1511, + "step": 8477500 + }, + { + "epoch": 24.54, + "learning_rate": 3.773427368932939e-05, + "loss": 2.1592, + "step": 8478000 + }, + { + "epoch": 24.54, + "learning_rate": 3.773355004168211e-05, + "loss": 2.156, + "step": 8478500 + }, + { + "epoch": 24.54, + "learning_rate": 3.773282639403483e-05, + "loss": 2.1665, + "step": 8479000 + }, + { + "epoch": 24.54, + "learning_rate": 3.7732102746387554e-05, + "loss": 2.1623, + "step": 8479500 + }, + { + "epoch": 24.55, + "learning_rate": 3.773138054603557e-05, + "loss": 2.1508, + "step": 8480000 + }, + { + "epoch": 24.55, + "learning_rate": 3.773065689838829e-05, + "loss": 2.1446, + "step": 8480500 + }, + { + "epoch": 24.55, + "learning_rate": 3.772993325074102e-05, + "loss": 2.1516, + "step": 8481000 + }, + { + "epoch": 24.55, + "learning_rate": 3.772920960309374e-05, + "loss": 2.174, + "step": 8481500 + }, + { + "epoch": 24.55, + "learning_rate": 3.772848740274176e-05, + "loss": 2.1404, + "step": 8482000 + }, + { + "epoch": 24.55, + "learning_rate": 3.772776375509448e-05, + "loss": 2.1727, + "step": 8482500 + }, + { + "epoch": 24.55, + "learning_rate": 3.77270401074472e-05, + "loss": 2.1517, + "step": 8483000 + }, + { + "epoch": 24.56, + "learning_rate": 3.7726316459799925e-05, + "loss": 2.1576, + "step": 8483500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772559281215265e-05, + "loss": 2.1743, + "step": 8484000 + }, + { + "epoch": 24.56, + "learning_rate": 3.772486916450537e-05, + "loss": 2.1554, + "step": 8484500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772414696415339e-05, + "loss": 2.1434, + "step": 8485000 + }, + { + "epoch": 24.56, + "learning_rate": 3.772342331650612e-05, + "loss": 2.1564, + "step": 8485500 + }, + { + "epoch": 24.56, + "learning_rate": 3.772269966885884e-05, + "loss": 2.1354, + "step": 8486000 + }, + { + "epoch": 24.56, + "learning_rate": 3.772197746850686e-05, + "loss": 2.1493, + "step": 8486500 + }, + { + "epoch": 24.57, + "learning_rate": 3.772125382085958e-05, + "loss": 2.1307, + "step": 8487000 + }, + { + "epoch": 24.57, + "learning_rate": 3.77205301732123e-05, + "loss": 2.1443, + "step": 8487500 + }, + { + "epoch": 24.57, + "learning_rate": 3.7719806525565025e-05, + "loss": 2.142, + "step": 8488000 + }, + { + "epoch": 24.57, + "learning_rate": 3.771908287791775e-05, + "loss": 2.1597, + "step": 8488500 + }, + { + "epoch": 24.57, + "learning_rate": 3.771835923027047e-05, + "loss": 2.1447, + "step": 8489000 + }, + { + "epoch": 24.57, + "learning_rate": 3.77176355826232e-05, + "loss": 2.1432, + "step": 8489500 + }, + { + "epoch": 24.58, + "learning_rate": 3.771691193497592e-05, + "loss": 2.1555, + "step": 8490000 + }, + { + "epoch": 24.58, + "learning_rate": 3.771618828732864e-05, + "loss": 2.1501, + "step": 8490500 + }, + { + "epoch": 24.58, + "learning_rate": 3.7715464639681365e-05, + "loss": 2.1475, + "step": 8491000 + }, + { + "epoch": 24.58, + "learning_rate": 3.771474099203409e-05, + "loss": 2.1381, + "step": 8491500 + }, + { + "epoch": 24.58, + "learning_rate": 3.771401734438681e-05, + "loss": 2.1692, + "step": 8492000 + }, + { + "epoch": 24.58, + "learning_rate": 3.771329369673953e-05, + "loss": 2.1397, + "step": 8492500 + }, + { + "epoch": 24.58, + "learning_rate": 3.7712571496387554e-05, + "loss": 2.1638, + "step": 8493000 + }, + { + "epoch": 24.59, + "learning_rate": 3.7711847848740277e-05, + "loss": 2.1483, + "step": 8493500 + }, + { + "epoch": 24.59, + "learning_rate": 3.7711124201093e-05, + "loss": 2.1694, + "step": 8494000 + }, + { + "epoch": 24.59, + "learning_rate": 3.771040055344572e-05, + "loss": 2.1654, + "step": 8494500 + }, + { + "epoch": 24.59, + "learning_rate": 3.770967835309374e-05, + "loss": 2.151, + "step": 8495000 + }, + { + "epoch": 24.59, + "learning_rate": 3.7708954705446466e-05, + "loss": 2.1478, + "step": 8495500 + }, + { + "epoch": 24.59, + "learning_rate": 3.770823105779919e-05, + "loss": 2.1552, + "step": 8496000 + }, + { + "epoch": 24.59, + "learning_rate": 3.770750741015191e-05, + "loss": 2.1453, + "step": 8496500 + }, + { + "epoch": 24.6, + "learning_rate": 3.770678376250463e-05, + "loss": 2.1508, + "step": 8497000 + }, + { + "epoch": 24.6, + "learning_rate": 3.770606156215265e-05, + "loss": 2.1658, + "step": 8497500 + }, + { + "epoch": 24.6, + "learning_rate": 3.770533791450537e-05, + "loss": 2.1358, + "step": 8498000 + }, + { + "epoch": 24.6, + "learning_rate": 3.770461571415339e-05, + "loss": 2.1402, + "step": 8498500 + }, + { + "epoch": 24.6, + "learning_rate": 3.770389351380141e-05, + "loss": 2.1474, + "step": 8499000 + }, + { + "epoch": 24.6, + "learning_rate": 3.770316986615413e-05, + "loss": 2.1489, + "step": 8499500 + }, + { + "epoch": 24.6, + "learning_rate": 3.770244621850685e-05, + "loss": 2.1612, + "step": 8500000 + }, + { + "epoch": 24.61, + "learning_rate": 3.7701722570859575e-05, + "loss": 2.1618, + "step": 8500500 + }, + { + "epoch": 24.61, + "learning_rate": 3.7700998923212304e-05, + "loss": 2.1287, + "step": 8501000 + }, + { + "epoch": 24.61, + "learning_rate": 3.7700275275565026e-05, + "loss": 2.1393, + "step": 8501500 + }, + { + "epoch": 24.61, + "learning_rate": 3.769955162791775e-05, + "loss": 2.1764, + "step": 8502000 + }, + { + "epoch": 24.61, + "learning_rate": 3.769882798027047e-05, + "loss": 2.1439, + "step": 8502500 + }, + { + "epoch": 24.61, + "learning_rate": 3.76981043326232e-05, + "loss": 2.1417, + "step": 8503000 + }, + { + "epoch": 24.61, + "learning_rate": 3.7697382132271215e-05, + "loss": 2.1384, + "step": 8503500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769665848462394e-05, + "loss": 2.1684, + "step": 8504000 + }, + { + "epoch": 24.62, + "learning_rate": 3.769593483697666e-05, + "loss": 2.1463, + "step": 8504500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769521118932938e-05, + "loss": 2.152, + "step": 8505000 + }, + { + "epoch": 24.62, + "learning_rate": 3.76944889889774e-05, + "loss": 2.1421, + "step": 8505500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769376534133012e-05, + "loss": 2.153, + "step": 8506000 + }, + { + "epoch": 24.62, + "learning_rate": 3.769304169368285e-05, + "loss": 2.168, + "step": 8506500 + }, + { + "epoch": 24.62, + "learning_rate": 3.769231804603557e-05, + "loss": 2.1493, + "step": 8507000 + }, + { + "epoch": 24.63, + "learning_rate": 3.769159439838829e-05, + "loss": 2.1357, + "step": 8507500 + }, + { + "epoch": 24.63, + "learning_rate": 3.769087075074102e-05, + "loss": 2.1444, + "step": 8508000 + }, + { + "epoch": 24.63, + "learning_rate": 3.7690147103093744e-05, + "loss": 2.1287, + "step": 8508500 + }, + { + "epoch": 24.63, + "learning_rate": 3.7689423455446466e-05, + "loss": 2.1488, + "step": 8509000 + }, + { + "epoch": 24.63, + "learning_rate": 3.768869980779919e-05, + "loss": 2.1404, + "step": 8509500 + }, + { + "epoch": 24.63, + "learning_rate": 3.768797616015191e-05, + "loss": 2.146, + "step": 8510000 + }, + { + "epoch": 24.63, + "learning_rate": 3.768725251250463e-05, + "loss": 2.147, + "step": 8510500 + }, + { + "epoch": 24.64, + "learning_rate": 3.768653031215265e-05, + "loss": 2.1665, + "step": 8511000 + }, + { + "epoch": 24.64, + "learning_rate": 3.768580666450537e-05, + "loss": 2.1381, + "step": 8511500 + }, + { + "epoch": 24.64, + "learning_rate": 3.76850830168581e-05, + "loss": 2.1553, + "step": 8512000 + }, + { + "epoch": 24.64, + "learning_rate": 3.768435936921082e-05, + "loss": 2.1499, + "step": 8512500 + }, + { + "epoch": 24.64, + "learning_rate": 3.7683635721563544e-05, + "loss": 2.1427, + "step": 8513000 + }, + { + "epoch": 24.64, + "learning_rate": 3.7682912073916267e-05, + "loss": 2.1481, + "step": 8513500 + }, + { + "epoch": 24.64, + "learning_rate": 3.768218842626899e-05, + "loss": 2.1519, + "step": 8514000 + }, + { + "epoch": 24.65, + "learning_rate": 3.768146477862171e-05, + "loss": 2.1449, + "step": 8514500 + }, + { + "epoch": 24.65, + "learning_rate": 3.768074113097443e-05, + "loss": 2.1476, + "step": 8515000 + }, + { + "epoch": 24.65, + "learning_rate": 3.7680018930622456e-05, + "loss": 2.1553, + "step": 8515500 + }, + { + "epoch": 24.65, + "learning_rate": 3.767929528297518e-05, + "loss": 2.1688, + "step": 8516000 + }, + { + "epoch": 24.65, + "learning_rate": 3.76785716353279e-05, + "loss": 2.1618, + "step": 8516500 + }, + { + "epoch": 24.65, + "learning_rate": 3.767784798768062e-05, + "loss": 2.1477, + "step": 8517000 + }, + { + "epoch": 24.65, + "learning_rate": 3.767712434003335e-05, + "loss": 2.1426, + "step": 8517500 + }, + { + "epoch": 24.66, + "learning_rate": 3.767640213968137e-05, + "loss": 2.1434, + "step": 8518000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767567849203409e-05, + "loss": 2.1435, + "step": 8518500 + }, + { + "epoch": 24.66, + "learning_rate": 3.767495484438681e-05, + "loss": 2.1395, + "step": 8519000 + }, + { + "epoch": 24.66, + "learning_rate": 3.7674231196739534e-05, + "loss": 2.1539, + "step": 8519500 + }, + { + "epoch": 24.66, + "learning_rate": 3.7673507549092256e-05, + "loss": 2.1318, + "step": 8520000 + }, + { + "epoch": 24.66, + "learning_rate": 3.767278534874028e-05, + "loss": 2.1463, + "step": 8520500 + }, + { + "epoch": 24.66, + "learning_rate": 3.7672061701093e-05, + "loss": 2.1514, + "step": 8521000 + }, + { + "epoch": 24.67, + "learning_rate": 3.767133805344572e-05, + "loss": 2.135, + "step": 8521500 + }, + { + "epoch": 24.67, + "learning_rate": 3.7670614405798445e-05, + "loss": 2.1493, + "step": 8522000 + }, + { + "epoch": 24.67, + "learning_rate": 3.766989075815117e-05, + "loss": 2.1363, + "step": 8522500 + }, + { + "epoch": 24.67, + "learning_rate": 3.766916855779919e-05, + "loss": 2.1642, + "step": 8523000 + }, + { + "epoch": 24.67, + "learning_rate": 3.766844491015191e-05, + "loss": 2.1185, + "step": 8523500 + }, + { + "epoch": 24.67, + "learning_rate": 3.7667721262504634e-05, + "loss": 2.1613, + "step": 8524000 + }, + { + "epoch": 24.67, + "learning_rate": 3.7666997614857356e-05, + "loss": 2.1347, + "step": 8524500 + }, + { + "epoch": 24.68, + "learning_rate": 3.766627541450538e-05, + "loss": 2.1434, + "step": 8525000 + }, + { + "epoch": 24.68, + "learning_rate": 3.76655517668581e-05, + "loss": 2.1502, + "step": 8525500 + }, + { + "epoch": 24.68, + "learning_rate": 3.766482811921082e-05, + "loss": 2.1379, + "step": 8526000 + }, + { + "epoch": 24.68, + "learning_rate": 3.7664104471563545e-05, + "loss": 2.1669, + "step": 8526500 + }, + { + "epoch": 24.68, + "learning_rate": 3.766338082391627e-05, + "loss": 2.1514, + "step": 8527000 + }, + { + "epoch": 24.68, + "learning_rate": 3.766265862356428e-05, + "loss": 2.1491, + "step": 8527500 + }, + { + "epoch": 24.69, + "learning_rate": 3.76619364232123e-05, + "loss": 2.1686, + "step": 8528000 + }, + { + "epoch": 24.69, + "learning_rate": 3.766121277556503e-05, + "loss": 2.1302, + "step": 8528500 + }, + { + "epoch": 24.69, + "learning_rate": 3.766048912791775e-05, + "loss": 2.1454, + "step": 8529000 + }, + { + "epoch": 24.69, + "learning_rate": 3.765976548027047e-05, + "loss": 2.1506, + "step": 8529500 + }, + { + "epoch": 24.69, + "learning_rate": 3.7659041832623194e-05, + "loss": 2.1558, + "step": 8530000 + }, + { + "epoch": 24.69, + "learning_rate": 3.765831818497592e-05, + "loss": 2.1551, + "step": 8530500 + }, + { + "epoch": 24.69, + "learning_rate": 3.7657594537328645e-05, + "loss": 2.1655, + "step": 8531000 + }, + { + "epoch": 24.7, + "learning_rate": 3.765687088968137e-05, + "loss": 2.137, + "step": 8531500 + }, + { + "epoch": 24.7, + "learning_rate": 3.765614724203409e-05, + "loss": 2.1513, + "step": 8532000 + }, + { + "epoch": 24.7, + "learning_rate": 3.7655425041682105e-05, + "loss": 2.1449, + "step": 8532500 + }, + { + "epoch": 24.7, + "learning_rate": 3.765470139403483e-05, + "loss": 2.1463, + "step": 8533000 + }, + { + "epoch": 24.7, + "learning_rate": 3.765398064097814e-05, + "loss": 2.1611, + "step": 8533500 + }, + { + "epoch": 24.7, + "learning_rate": 3.7653256993330866e-05, + "loss": 2.1431, + "step": 8534000 + }, + { + "epoch": 24.7, + "learning_rate": 3.765253334568359e-05, + "loss": 2.1596, + "step": 8534500 + }, + { + "epoch": 24.71, + "learning_rate": 3.765180969803631e-05, + "loss": 2.1665, + "step": 8535000 + }, + { + "epoch": 24.71, + "learning_rate": 3.765108605038903e-05, + "loss": 2.1813, + "step": 8535500 + }, + { + "epoch": 24.71, + "learning_rate": 3.7650362402741755e-05, + "loss": 2.1393, + "step": 8536000 + }, + { + "epoch": 24.71, + "learning_rate": 3.764963875509448e-05, + "loss": 2.1481, + "step": 8536500 + }, + { + "epoch": 24.71, + "learning_rate": 3.764891800203779e-05, + "loss": 2.1378, + "step": 8537000 + }, + { + "epoch": 24.71, + "learning_rate": 3.7648194354390515e-05, + "loss": 2.1382, + "step": 8537500 + }, + { + "epoch": 24.71, + "learning_rate": 3.764747070674324e-05, + "loss": 2.1477, + "step": 8538000 + }, + { + "epoch": 24.72, + "learning_rate": 3.7646747059095966e-05, + "loss": 2.1378, + "step": 8538500 + }, + { + "epoch": 24.72, + "learning_rate": 3.764602341144869e-05, + "loss": 2.1487, + "step": 8539000 + }, + { + "epoch": 24.72, + "learning_rate": 3.764529976380141e-05, + "loss": 2.1462, + "step": 8539500 + }, + { + "epoch": 24.72, + "learning_rate": 3.764457611615413e-05, + "loss": 2.1527, + "step": 8540000 + }, + { + "epoch": 24.72, + "learning_rate": 3.7643852468506855e-05, + "loss": 2.1642, + "step": 8540500 + }, + { + "epoch": 24.72, + "learning_rate": 3.764312882085958e-05, + "loss": 2.1475, + "step": 8541000 + }, + { + "epoch": 24.72, + "learning_rate": 3.7642405173212306e-05, + "loss": 2.1504, + "step": 8541500 + }, + { + "epoch": 24.73, + "learning_rate": 3.764168152556503e-05, + "loss": 2.1524, + "step": 8542000 + }, + { + "epoch": 24.73, + "learning_rate": 3.764095787791775e-05, + "loss": 2.1634, + "step": 8542500 + }, + { + "epoch": 24.73, + "learning_rate": 3.764023423027047e-05, + "loss": 2.1607, + "step": 8543000 + }, + { + "epoch": 24.73, + "learning_rate": 3.7639510582623195e-05, + "loss": 2.1791, + "step": 8543500 + }, + { + "epoch": 24.73, + "learning_rate": 3.763878693497592e-05, + "loss": 2.1314, + "step": 8544000 + }, + { + "epoch": 24.73, + "learning_rate": 3.763806328732864e-05, + "loss": 2.1775, + "step": 8544500 + }, + { + "epoch": 24.73, + "learning_rate": 3.763733963968136e-05, + "loss": 2.1709, + "step": 8545000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763661599203409e-05, + "loss": 2.1546, + "step": 8545500 + }, + { + "epoch": 24.74, + "learning_rate": 3.7635893791682106e-05, + "loss": 2.1463, + "step": 8546000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763517014403483e-05, + "loss": 2.1397, + "step": 8546500 + }, + { + "epoch": 24.74, + "learning_rate": 3.763444649638756e-05, + "loss": 2.1586, + "step": 8547000 + }, + { + "epoch": 24.74, + "learning_rate": 3.763372284874028e-05, + "loss": 2.1403, + "step": 8547500 + }, + { + "epoch": 24.74, + "learning_rate": 3.7632999201093e-05, + "loss": 2.1604, + "step": 8548000 + }, + { + "epoch": 24.74, + "learning_rate": 3.7632275553445724e-05, + "loss": 2.1639, + "step": 8548500 + }, + { + "epoch": 24.75, + "learning_rate": 3.7631551905798446e-05, + "loss": 2.1615, + "step": 8549000 + }, + { + "epoch": 24.75, + "learning_rate": 3.763082970544646e-05, + "loss": 2.1237, + "step": 8549500 + }, + { + "epoch": 24.75, + "learning_rate": 3.7630106057799184e-05, + "loss": 2.1689, + "step": 8550000 + }, + { + "epoch": 24.75, + "learning_rate": 3.7629382410151906e-05, + "loss": 2.1586, + "step": 8550500 + }, + { + "epoch": 24.75, + "learning_rate": 3.762866020979993e-05, + "loss": 2.1449, + "step": 8551000 + }, + { + "epoch": 24.75, + "learning_rate": 3.7627938009447944e-05, + "loss": 2.1342, + "step": 8551500 + }, + { + "epoch": 24.75, + "learning_rate": 3.7627214361800667e-05, + "loss": 2.1597, + "step": 8552000 + }, + { + "epoch": 24.76, + "learning_rate": 3.762649071415339e-05, + "loss": 2.1683, + "step": 8552500 + }, + { + "epoch": 24.76, + "learning_rate": 3.762576706650612e-05, + "loss": 2.1685, + "step": 8553000 + }, + { + "epoch": 24.76, + "learning_rate": 3.762504341885884e-05, + "loss": 2.1642, + "step": 8553500 + }, + { + "epoch": 24.76, + "learning_rate": 3.762431977121156e-05, + "loss": 2.1676, + "step": 8554000 + }, + { + "epoch": 24.76, + "learning_rate": 3.7623596123564285e-05, + "loss": 2.1489, + "step": 8554500 + }, + { + "epoch": 24.76, + "learning_rate": 3.762287247591701e-05, + "loss": 2.167, + "step": 8555000 + }, + { + "epoch": 24.76, + "learning_rate": 3.762214882826973e-05, + "loss": 2.1369, + "step": 8555500 + }, + { + "epoch": 24.77, + "learning_rate": 3.762142518062246e-05, + "loss": 2.133, + "step": 8556000 + }, + { + "epoch": 24.77, + "learning_rate": 3.7620702980270474e-05, + "loss": 2.1603, + "step": 8556500 + }, + { + "epoch": 24.77, + "learning_rate": 3.7619979332623196e-05, + "loss": 2.1641, + "step": 8557000 + }, + { + "epoch": 24.77, + "learning_rate": 3.761925568497592e-05, + "loss": 2.1751, + "step": 8557500 + }, + { + "epoch": 24.77, + "learning_rate": 3.761853203732864e-05, + "loss": 2.1658, + "step": 8558000 + }, + { + "epoch": 24.77, + "learning_rate": 3.7617809836976656e-05, + "loss": 2.1656, + "step": 8558500 + }, + { + "epoch": 24.77, + "learning_rate": 3.761708618932938e-05, + "loss": 2.1233, + "step": 8559000 + }, + { + "epoch": 24.78, + "learning_rate": 3.761636254168211e-05, + "loss": 2.1824, + "step": 8559500 + }, + { + "epoch": 24.78, + "learning_rate": 3.761563889403483e-05, + "loss": 2.1388, + "step": 8560000 + }, + { + "epoch": 24.78, + "learning_rate": 3.761491524638756e-05, + "loss": 2.1483, + "step": 8560500 + }, + { + "epoch": 24.78, + "learning_rate": 3.761419159874028e-05, + "loss": 2.1481, + "step": 8561000 + }, + { + "epoch": 24.78, + "learning_rate": 3.7613467951093e-05, + "loss": 2.1261, + "step": 8561500 + }, + { + "epoch": 24.78, + "learning_rate": 3.7612744303445725e-05, + "loss": 2.162, + "step": 8562000 + }, + { + "epoch": 24.78, + "learning_rate": 3.761202065579845e-05, + "loss": 2.1458, + "step": 8562500 + }, + { + "epoch": 24.79, + "learning_rate": 3.761129700815117e-05, + "loss": 2.1722, + "step": 8563000 + }, + { + "epoch": 24.79, + "learning_rate": 3.761057336050389e-05, + "loss": 2.1785, + "step": 8563500 + }, + { + "epoch": 24.79, + "learning_rate": 3.7609849712856614e-05, + "loss": 2.144, + "step": 8564000 + }, + { + "epoch": 24.79, + "learning_rate": 3.7609126065209336e-05, + "loss": 2.143, + "step": 8564500 + }, + { + "epoch": 24.79, + "learning_rate": 3.760840386485736e-05, + "loss": 2.1617, + "step": 8565000 + }, + { + "epoch": 24.79, + "learning_rate": 3.760768021721008e-05, + "loss": 2.1707, + "step": 8565500 + }, + { + "epoch": 24.8, + "learning_rate": 3.7606958016858096e-05, + "loss": 2.1646, + "step": 8566000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760623436921082e-05, + "loss": 2.1984, + "step": 8566500 + }, + { + "epoch": 24.8, + "learning_rate": 3.760551072156354e-05, + "loss": 2.1484, + "step": 8567000 + }, + { + "epoch": 24.8, + "learning_rate": 3.760478707391626e-05, + "loss": 2.1529, + "step": 8567500 + }, + { + "epoch": 24.8, + "learning_rate": 3.760406342626899e-05, + "loss": 2.1374, + "step": 8568000 + }, + { + "epoch": 24.8, + "learning_rate": 3.7603339778621714e-05, + "loss": 2.1576, + "step": 8568500 + }, + { + "epoch": 24.8, + "learning_rate": 3.7602616130974436e-05, + "loss": 2.1527, + "step": 8569000 + }, + { + "epoch": 24.81, + "learning_rate": 3.760189248332716e-05, + "loss": 2.1506, + "step": 8569500 + }, + { + "epoch": 24.81, + "learning_rate": 3.760116883567988e-05, + "loss": 2.1429, + "step": 8570000 + }, + { + "epoch": 24.81, + "learning_rate": 3.760044518803261e-05, + "loss": 2.1583, + "step": 8570500 + }, + { + "epoch": 24.81, + "learning_rate": 3.7599722987680626e-05, + "loss": 2.1784, + "step": 8571000 + }, + { + "epoch": 24.81, + "learning_rate": 3.759899934003335e-05, + "loss": 2.1536, + "step": 8571500 + }, + { + "epoch": 24.81, + "learning_rate": 3.759827569238607e-05, + "loss": 2.1325, + "step": 8572000 + }, + { + "epoch": 24.81, + "learning_rate": 3.759755204473879e-05, + "loss": 2.164, + "step": 8572500 + }, + { + "epoch": 24.82, + "learning_rate": 3.7596828397091514e-05, + "loss": 2.1706, + "step": 8573000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759610474944424e-05, + "loss": 2.1727, + "step": 8573500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759538110179696e-05, + "loss": 2.1567, + "step": 8574000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759465745414968e-05, + "loss": 2.1439, + "step": 8574500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759393525379771e-05, + "loss": 2.1419, + "step": 8575000 + }, + { + "epoch": 24.82, + "learning_rate": 3.759321160615043e-05, + "loss": 2.1888, + "step": 8575500 + }, + { + "epoch": 24.82, + "learning_rate": 3.759248940579845e-05, + "loss": 2.1594, + "step": 8576000 + }, + { + "epoch": 24.83, + "learning_rate": 3.7591767205446464e-05, + "loss": 2.1384, + "step": 8576500 + }, + { + "epoch": 24.83, + "learning_rate": 3.7591043557799186e-05, + "loss": 2.1556, + "step": 8577000 + }, + { + "epoch": 24.83, + "learning_rate": 3.759031991015191e-05, + "loss": 2.1577, + "step": 8577500 + }, + { + "epoch": 24.83, + "learning_rate": 3.758959626250464e-05, + "loss": 2.1828, + "step": 8578000 + }, + { + "epoch": 24.83, + "learning_rate": 3.758887406215265e-05, + "loss": 2.1814, + "step": 8578500 + }, + { + "epoch": 24.83, + "learning_rate": 3.7588150414505375e-05, + "loss": 2.1376, + "step": 8579000 + }, + { + "epoch": 24.83, + "learning_rate": 3.75874267668581e-05, + "loss": 2.1499, + "step": 8579500 + }, + { + "epoch": 24.84, + "learning_rate": 3.758670311921082e-05, + "loss": 2.1818, + "step": 8580000 + }, + { + "epoch": 24.84, + "learning_rate": 3.758597947156354e-05, + "loss": 2.1828, + "step": 8580500 + }, + { + "epoch": 24.84, + "learning_rate": 3.7585255823916264e-05, + "loss": 2.1494, + "step": 8581000 + }, + { + "epoch": 24.84, + "learning_rate": 3.7584532176268986e-05, + "loss": 2.165, + "step": 8581500 + }, + { + "epoch": 24.84, + "learning_rate": 3.758380852862171e-05, + "loss": 2.1683, + "step": 8582000 + }, + { + "epoch": 24.84, + "learning_rate": 3.758308488097444e-05, + "loss": 2.1592, + "step": 8582500 + }, + { + "epoch": 24.84, + "learning_rate": 3.758236123332716e-05, + "loss": 2.1569, + "step": 8583000 + }, + { + "epoch": 24.85, + "learning_rate": 3.758163903297518e-05, + "loss": 2.1516, + "step": 8583500 + }, + { + "epoch": 24.85, + "learning_rate": 3.7580915385327904e-05, + "loss": 2.1631, + "step": 8584000 + }, + { + "epoch": 24.85, + "learning_rate": 3.7580191737680626e-05, + "loss": 2.1678, + "step": 8584500 + }, + { + "epoch": 24.85, + "learning_rate": 3.757946809003335e-05, + "loss": 2.1427, + "step": 8585000 + }, + { + "epoch": 24.85, + "learning_rate": 3.757874444238607e-05, + "loss": 2.1494, + "step": 8585500 + }, + { + "epoch": 24.85, + "learning_rate": 3.757802079473879e-05, + "loss": 2.1409, + "step": 8586000 + }, + { + "epoch": 24.85, + "learning_rate": 3.7577297147091515e-05, + "loss": 2.1849, + "step": 8586500 + }, + { + "epoch": 24.86, + "learning_rate": 3.757657494673954e-05, + "loss": 2.1366, + "step": 8587000 + }, + { + "epoch": 24.86, + "learning_rate": 3.757585274638755e-05, + "loss": 2.1416, + "step": 8587500 + }, + { + "epoch": 24.86, + "learning_rate": 3.757513054603557e-05, + "loss": 2.1363, + "step": 8588000 + }, + { + "epoch": 24.86, + "learning_rate": 3.757440689838829e-05, + "loss": 2.143, + "step": 8588500 + }, + { + "epoch": 24.86, + "learning_rate": 3.757368325074101e-05, + "loss": 2.1744, + "step": 8589000 + }, + { + "epoch": 24.86, + "learning_rate": 3.7572959603093735e-05, + "loss": 2.1523, + "step": 8589500 + }, + { + "epoch": 24.86, + "learning_rate": 3.757223595544646e-05, + "loss": 2.1579, + "step": 8590000 + }, + { + "epoch": 24.87, + "learning_rate": 3.757151230779919e-05, + "loss": 2.1664, + "step": 8590500 + }, + { + "epoch": 24.87, + "learning_rate": 3.757078866015191e-05, + "loss": 2.1494, + "step": 8591000 + }, + { + "epoch": 24.87, + "learning_rate": 3.757006501250464e-05, + "loss": 2.1543, + "step": 8591500 + }, + { + "epoch": 24.87, + "learning_rate": 3.756934136485736e-05, + "loss": 2.1459, + "step": 8592000 + }, + { + "epoch": 24.87, + "learning_rate": 3.756861771721008e-05, + "loss": 2.1324, + "step": 8592500 + }, + { + "epoch": 24.87, + "learning_rate": 3.7567894069562805e-05, + "loss": 2.1464, + "step": 8593000 + }, + { + "epoch": 24.87, + "learning_rate": 3.756717042191553e-05, + "loss": 2.1465, + "step": 8593500 + }, + { + "epoch": 24.88, + "learning_rate": 3.756644677426825e-05, + "loss": 2.1682, + "step": 8594000 + }, + { + "epoch": 24.88, + "learning_rate": 3.756572312662097e-05, + "loss": 2.156, + "step": 8594500 + }, + { + "epoch": 24.88, + "learning_rate": 3.7564999478973694e-05, + "loss": 2.1662, + "step": 8595000 + }, + { + "epoch": 24.88, + "learning_rate": 3.7564275831326416e-05, + "loss": 2.1568, + "step": 8595500 + }, + { + "epoch": 24.88, + "learning_rate": 3.756355363097444e-05, + "loss": 2.1578, + "step": 8596000 + }, + { + "epoch": 24.88, + "learning_rate": 3.7562831430622454e-05, + "loss": 2.1709, + "step": 8596500 + }, + { + "epoch": 24.88, + "learning_rate": 3.7562107782975176e-05, + "loss": 2.1561, + "step": 8597000 + }, + { + "epoch": 24.89, + "learning_rate": 3.75613841353279e-05, + "loss": 2.1699, + "step": 8597500 + }, + { + "epoch": 24.89, + "learning_rate": 3.756066048768063e-05, + "loss": 2.1833, + "step": 8598000 + }, + { + "epoch": 24.89, + "learning_rate": 3.755993684003335e-05, + "loss": 2.1327, + "step": 8598500 + }, + { + "epoch": 24.89, + "learning_rate": 3.755921319238607e-05, + "loss": 2.1584, + "step": 8599000 + }, + { + "epoch": 24.89, + "learning_rate": 3.7558489544738794e-05, + "loss": 2.1395, + "step": 8599500 + }, + { + "epoch": 24.89, + "learning_rate": 3.7557767344386816e-05, + "loss": 2.1771, + "step": 8600000 + }, + { + "epoch": 24.89, + "learning_rate": 3.755704369673954e-05, + "loss": 2.1429, + "step": 8600500 + }, + { + "epoch": 24.9, + "learning_rate": 3.7556321496387554e-05, + "loss": 2.1833, + "step": 8601000 + }, + { + "epoch": 24.9, + "learning_rate": 3.755559929603557e-05, + "loss": 2.18, + "step": 8601500 + }, + { + "epoch": 24.9, + "learning_rate": 3.755487564838829e-05, + "loss": 2.1448, + "step": 8602000 + }, + { + "epoch": 24.9, + "learning_rate": 3.7554152000741014e-05, + "loss": 2.1571, + "step": 8602500 + }, + { + "epoch": 24.9, + "learning_rate": 3.7553428353093736e-05, + "loss": 2.1701, + "step": 8603000 + }, + { + "epoch": 24.9, + "learning_rate": 3.7552704705446465e-05, + "loss": 2.17, + "step": 8603500 + }, + { + "epoch": 24.91, + "learning_rate": 3.755198250509448e-05, + "loss": 2.1698, + "step": 8604000 + }, + { + "epoch": 24.91, + "learning_rate": 3.75512588574472e-05, + "loss": 2.1715, + "step": 8604500 + }, + { + "epoch": 24.91, + "learning_rate": 3.7550535209799925e-05, + "loss": 2.1627, + "step": 8605000 + }, + { + "epoch": 24.91, + "learning_rate": 3.7549811562152654e-05, + "loss": 2.1445, + "step": 8605500 + }, + { + "epoch": 24.91, + "learning_rate": 3.7549087914505376e-05, + "loss": 2.1485, + "step": 8606000 + }, + { + "epoch": 24.91, + "learning_rate": 3.75483642668581e-05, + "loss": 2.1535, + "step": 8606500 + }, + { + "epoch": 24.91, + "learning_rate": 3.754764061921082e-05, + "loss": 2.165, + "step": 8607000 + }, + { + "epoch": 24.92, + "learning_rate": 3.754691697156354e-05, + "loss": 2.1476, + "step": 8607500 + }, + { + "epoch": 24.92, + "learning_rate": 3.7546193323916265e-05, + "loss": 2.159, + "step": 8608000 + }, + { + "epoch": 24.92, + "learning_rate": 3.754546967626899e-05, + "loss": 2.1687, + "step": 8608500 + }, + { + "epoch": 24.92, + "learning_rate": 3.754474602862172e-05, + "loss": 2.1579, + "step": 8609000 + }, + { + "epoch": 24.92, + "learning_rate": 3.754402238097444e-05, + "loss": 2.1337, + "step": 8609500 + }, + { + "epoch": 24.92, + "learning_rate": 3.754329873332716e-05, + "loss": 2.1646, + "step": 8610000 + }, + { + "epoch": 24.92, + "learning_rate": 3.754257508567988e-05, + "loss": 2.175, + "step": 8610500 + }, + { + "epoch": 24.93, + "learning_rate": 3.7541851438032606e-05, + "loss": 2.1481, + "step": 8611000 + }, + { + "epoch": 24.93, + "learning_rate": 3.754112779038533e-05, + "loss": 2.1402, + "step": 8611500 + }, + { + "epoch": 24.93, + "learning_rate": 3.754040414273805e-05, + "loss": 2.1396, + "step": 8612000 + }, + { + "epoch": 24.93, + "learning_rate": 3.7539683389681366e-05, + "loss": 2.1379, + "step": 8612500 + }, + { + "epoch": 24.93, + "learning_rate": 3.753895974203409e-05, + "loss": 2.127, + "step": 8613000 + }, + { + "epoch": 24.93, + "learning_rate": 3.753823609438682e-05, + "loss": 2.1571, + "step": 8613500 + }, + { + "epoch": 24.93, + "learning_rate": 3.753751244673954e-05, + "loss": 2.16, + "step": 8614000 + }, + { + "epoch": 24.94, + "learning_rate": 3.753678879909226e-05, + "loss": 2.1639, + "step": 8614500 + }, + { + "epoch": 24.94, + "learning_rate": 3.7536065151444984e-05, + "loss": 2.1724, + "step": 8615000 + }, + { + "epoch": 24.94, + "learning_rate": 3.7535342951093e-05, + "loss": 2.1632, + "step": 8615500 + }, + { + "epoch": 24.94, + "learning_rate": 3.753461930344572e-05, + "loss": 2.1744, + "step": 8616000 + }, + { + "epoch": 24.94, + "learning_rate": 3.753389710309374e-05, + "loss": 2.1434, + "step": 8616500 + }, + { + "epoch": 24.94, + "learning_rate": 3.7533173455446466e-05, + "loss": 2.156, + "step": 8617000 + }, + { + "epoch": 24.94, + "learning_rate": 3.753244980779919e-05, + "loss": 2.1484, + "step": 8617500 + }, + { + "epoch": 24.95, + "learning_rate": 3.753172616015191e-05, + "loss": 2.1475, + "step": 8618000 + }, + { + "epoch": 24.95, + "learning_rate": 3.753100251250463e-05, + "loss": 2.1588, + "step": 8618500 + }, + { + "epoch": 24.95, + "learning_rate": 3.7530278864857355e-05, + "loss": 2.1377, + "step": 8619000 + }, + { + "epoch": 24.95, + "learning_rate": 3.752955521721008e-05, + "loss": 2.1535, + "step": 8619500 + }, + { + "epoch": 24.95, + "learning_rate": 3.75288315695628e-05, + "loss": 2.1499, + "step": 8620000 + }, + { + "epoch": 24.95, + "learning_rate": 3.752810792191553e-05, + "loss": 2.1498, + "step": 8620500 + }, + { + "epoch": 24.95, + "learning_rate": 3.752738427426825e-05, + "loss": 2.1387, + "step": 8621000 + }, + { + "epoch": 24.96, + "learning_rate": 3.752666062662097e-05, + "loss": 2.1541, + "step": 8621500 + }, + { + "epoch": 24.96, + "learning_rate": 3.7525936978973695e-05, + "loss": 2.1232, + "step": 8622000 + }, + { + "epoch": 24.96, + "learning_rate": 3.752521477862172e-05, + "loss": 2.1425, + "step": 8622500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752449113097444e-05, + "loss": 2.1524, + "step": 8623000 + }, + { + "epoch": 24.96, + "learning_rate": 3.7523768930622455e-05, + "loss": 2.1705, + "step": 8623500 + }, + { + "epoch": 24.96, + "learning_rate": 3.752304528297518e-05, + "loss": 2.1598, + "step": 8624000 + }, + { + "epoch": 24.96, + "learning_rate": 3.75223216353279e-05, + "loss": 2.1421, + "step": 8624500 + }, + { + "epoch": 24.97, + "learning_rate": 3.752159798768062e-05, + "loss": 2.1577, + "step": 8625000 + }, + { + "epoch": 24.97, + "learning_rate": 3.7520874340033344e-05, + "loss": 2.1655, + "step": 8625500 + }, + { + "epoch": 24.97, + "learning_rate": 3.7520150692386066e-05, + "loss": 2.1316, + "step": 8626000 + }, + { + "epoch": 24.97, + "learning_rate": 3.751942704473879e-05, + "loss": 2.1615, + "step": 8626500 + }, + { + "epoch": 24.97, + "learning_rate": 3.751870339709152e-05, + "loss": 2.1835, + "step": 8627000 + }, + { + "epoch": 24.97, + "learning_rate": 3.751798119673953e-05, + "loss": 2.1476, + "step": 8627500 + }, + { + "epoch": 24.97, + "learning_rate": 3.751725754909226e-05, + "loss": 2.1477, + "step": 8628000 + }, + { + "epoch": 24.98, + "learning_rate": 3.751653534874028e-05, + "loss": 2.1672, + "step": 8628500 + }, + { + "epoch": 24.98, + "learning_rate": 3.7515811701093e-05, + "loss": 2.1265, + "step": 8629000 + }, + { + "epoch": 24.98, + "learning_rate": 3.751508805344572e-05, + "loss": 2.1589, + "step": 8629500 + }, + { + "epoch": 24.98, + "learning_rate": 3.7514364405798444e-05, + "loss": 2.153, + "step": 8630000 + }, + { + "epoch": 24.98, + "learning_rate": 3.751364075815117e-05, + "loss": 2.1568, + "step": 8630500 + }, + { + "epoch": 24.98, + "learning_rate": 3.751291855779919e-05, + "loss": 2.1553, + "step": 8631000 + }, + { + "epoch": 24.98, + "learning_rate": 3.751219491015191e-05, + "loss": 2.1537, + "step": 8631500 + }, + { + "epoch": 24.99, + "learning_rate": 3.7511471262504633e-05, + "loss": 2.1553, + "step": 8632000 + }, + { + "epoch": 24.99, + "learning_rate": 3.7510747614857356e-05, + "loss": 2.1513, + "step": 8632500 + }, + { + "epoch": 24.99, + "learning_rate": 3.751002396721008e-05, + "loss": 2.1253, + "step": 8633000 + }, + { + "epoch": 24.99, + "learning_rate": 3.75093003195628e-05, + "loss": 2.1523, + "step": 8633500 + }, + { + "epoch": 24.99, + "learning_rate": 3.7508578119210816e-05, + "loss": 2.1388, + "step": 8634000 + }, + { + "epoch": 24.99, + "learning_rate": 3.7507854471563545e-05, + "loss": 2.1643, + "step": 8634500 + }, + { + "epoch": 24.99, + "learning_rate": 3.750713082391627e-05, + "loss": 2.1456, + "step": 8635000 + }, + { + "epoch": 25.0, + "learning_rate": 3.7506407176268996e-05, + "loss": 2.1356, + "step": 8635500 + }, + { + "epoch": 25.0, + "learning_rate": 3.750568352862172e-05, + "loss": 2.151, + "step": 8636000 + }, + { + "epoch": 25.0, + "learning_rate": 3.750495988097444e-05, + "loss": 2.1853, + "step": 8636500 + }, + { + "epoch": 25.0, + "eval_accuracy": 0.6643911972760643, + "eval_accuracy_mlm": 0.6288164705108058, + "eval_accuracy_nsp": 0.855142007305459, + "eval_loss": 2.1985487937927246, + "eval_runtime": 331.5793, + "eval_samples_per_second": 1316.084, + "eval_steps_per_second": 54.838, + "step": 8636800 + }, + { + "epoch": 25.0, + "learning_rate": 3.750423623332716e-05, + "loss": 2.1611, + "step": 8637000 + }, + { + "epoch": 25.0, + "learning_rate": 3.7503512585679885e-05, + "loss": 2.1248, + "step": 8637500 + }, + { + "epoch": 25.0, + "learning_rate": 3.750278893803261e-05, + "loss": 2.116, + "step": 8638000 + }, + { + "epoch": 25.0, + "learning_rate": 3.750206529038533e-05, + "loss": 2.1289, + "step": 8638500 + }, + { + "epoch": 25.01, + "learning_rate": 3.7501343090033345e-05, + "loss": 2.1352, + "step": 8639000 + }, + { + "epoch": 25.01, + "learning_rate": 3.750061944238607e-05, + "loss": 2.1219, + "step": 8639500 + }, + { + "epoch": 25.01, + "learning_rate": 3.7499895794738796e-05, + "loss": 2.1537, + "step": 8640000 + }, + { + "epoch": 25.01, + "learning_rate": 3.749917214709152e-05, + "loss": 2.1422, + "step": 8640500 + }, + { + "epoch": 25.01, + "learning_rate": 3.7498449946739534e-05, + "loss": 2.1252, + "step": 8641000 + }, + { + "epoch": 25.01, + "learning_rate": 3.7497726299092256e-05, + "loss": 2.1215, + "step": 8641500 + }, + { + "epoch": 25.02, + "learning_rate": 3.749700265144498e-05, + "loss": 2.109, + "step": 8642000 + }, + { + "epoch": 25.02, + "learning_rate": 3.7496280451092994e-05, + "loss": 2.1269, + "step": 8642500 + }, + { + "epoch": 25.02, + "learning_rate": 3.749555680344572e-05, + "loss": 2.113, + "step": 8643000 + }, + { + "epoch": 25.02, + "learning_rate": 3.7494833155798445e-05, + "loss": 2.1451, + "step": 8643500 + }, + { + "epoch": 25.02, + "learning_rate": 3.749410950815117e-05, + "loss": 2.1177, + "step": 8644000 + }, + { + "epoch": 25.02, + "learning_rate": 3.749338730779919e-05, + "loss": 2.1566, + "step": 8644500 + }, + { + "epoch": 25.02, + "learning_rate": 3.749266366015191e-05, + "loss": 2.1421, + "step": 8645000 + }, + { + "epoch": 25.03, + "learning_rate": 3.7491940012504634e-05, + "loss": 2.1486, + "step": 8645500 + }, + { + "epoch": 25.03, + "learning_rate": 3.7491216364857357e-05, + "loss": 2.1403, + "step": 8646000 + }, + { + "epoch": 25.03, + "learning_rate": 3.749049271721008e-05, + "loss": 2.1392, + "step": 8646500 + }, + { + "epoch": 25.03, + "learning_rate": 3.74897690695628e-05, + "loss": 2.1235, + "step": 8647000 + }, + { + "epoch": 25.03, + "learning_rate": 3.748904542191552e-05, + "loss": 2.1229, + "step": 8647500 + }, + { + "epoch": 25.03, + "learning_rate": 3.7488321774268245e-05, + "loss": 2.1268, + "step": 8648000 + }, + { + "epoch": 25.03, + "learning_rate": 3.748759812662097e-05, + "loss": 2.1283, + "step": 8648500 + }, + { + "epoch": 25.04, + "learning_rate": 3.748687592626899e-05, + "loss": 2.1199, + "step": 8649000 + }, + { + "epoch": 25.04, + "learning_rate": 3.748615227862171e-05, + "loss": 2.1399, + "step": 8649500 + }, + { + "epoch": 25.04, + "learning_rate": 3.748543007826973e-05, + "loss": 2.1184, + "step": 8650000 + }, + { + "epoch": 25.04, + "learning_rate": 3.748470643062246e-05, + "loss": 2.1636, + "step": 8650500 + }, + { + "epoch": 25.04, + "learning_rate": 3.748398278297518e-05, + "loss": 2.1227, + "step": 8651000 + }, + { + "epoch": 25.04, + "learning_rate": 3.74832591353279e-05, + "loss": 2.1397, + "step": 8651500 + }, + { + "epoch": 25.04, + "learning_rate": 3.7482535487680624e-05, + "loss": 2.1339, + "step": 8652000 + }, + { + "epoch": 25.05, + "learning_rate": 3.7481811840033346e-05, + "loss": 2.1513, + "step": 8652500 + }, + { + "epoch": 25.05, + "learning_rate": 3.748108819238607e-05, + "loss": 2.1268, + "step": 8653000 + }, + { + "epoch": 25.05, + "learning_rate": 3.748036599203409e-05, + "loss": 2.1305, + "step": 8653500 + }, + { + "epoch": 25.05, + "learning_rate": 3.747964234438681e-05, + "loss": 2.1514, + "step": 8654000 + }, + { + "epoch": 25.05, + "learning_rate": 3.7478918696739535e-05, + "loss": 2.1455, + "step": 8654500 + }, + { + "epoch": 25.05, + "learning_rate": 3.747819504909226e-05, + "loss": 2.1103, + "step": 8655000 + }, + { + "epoch": 25.05, + "learning_rate": 3.747747284874027e-05, + "loss": 2.1387, + "step": 8655500 + }, + { + "epoch": 25.06, + "learning_rate": 3.7476749201092995e-05, + "loss": 2.1374, + "step": 8656000 + }, + { + "epoch": 25.06, + "learning_rate": 3.7476025553445724e-05, + "loss": 2.1478, + "step": 8656500 + }, + { + "epoch": 25.06, + "learning_rate": 3.7475301905798446e-05, + "loss": 2.1449, + "step": 8657000 + }, + { + "epoch": 25.06, + "learning_rate": 3.747457825815117e-05, + "loss": 2.165, + "step": 8657500 + }, + { + "epoch": 25.06, + "learning_rate": 3.74738546105039e-05, + "loss": 2.1407, + "step": 8658000 + }, + { + "epoch": 25.06, + "learning_rate": 3.747313096285662e-05, + "loss": 2.1436, + "step": 8658500 + }, + { + "epoch": 25.06, + "learning_rate": 3.747240731520934e-05, + "loss": 2.1387, + "step": 8659000 + }, + { + "epoch": 25.07, + "learning_rate": 3.7471683667562064e-05, + "loss": 2.1306, + "step": 8659500 + }, + { + "epoch": 25.07, + "learning_rate": 3.7470960019914786e-05, + "loss": 2.133, + "step": 8660000 + }, + { + "epoch": 25.07, + "learning_rate": 3.747023637226751e-05, + "loss": 2.146, + "step": 8660500 + }, + { + "epoch": 25.07, + "learning_rate": 3.746951272462023e-05, + "loss": 2.132, + "step": 8661000 + }, + { + "epoch": 25.07, + "learning_rate": 3.746878907697295e-05, + "loss": 2.1471, + "step": 8661500 + }, + { + "epoch": 25.07, + "learning_rate": 3.7468065429325675e-05, + "loss": 2.1235, + "step": 8662000 + }, + { + "epoch": 25.07, + "learning_rate": 3.74673432289737e-05, + "loss": 2.1359, + "step": 8662500 + }, + { + "epoch": 25.08, + "learning_rate": 3.746661958132642e-05, + "loss": 2.1529, + "step": 8663000 + }, + { + "epoch": 25.08, + "learning_rate": 3.7465897380974435e-05, + "loss": 2.1217, + "step": 8663500 + }, + { + "epoch": 25.08, + "learning_rate": 3.746517518062245e-05, + "loss": 2.1262, + "step": 8664000 + }, + { + "epoch": 25.08, + "learning_rate": 3.746445153297517e-05, + "loss": 2.1333, + "step": 8664500 + }, + { + "epoch": 25.08, + "learning_rate": 3.7463727885327895e-05, + "loss": 2.1118, + "step": 8665000 + }, + { + "epoch": 25.08, + "learning_rate": 3.7463004237680624e-05, + "loss": 2.1291, + "step": 8665500 + }, + { + "epoch": 25.08, + "learning_rate": 3.7462280590033347e-05, + "loss": 2.1271, + "step": 8666000 + }, + { + "epoch": 25.09, + "learning_rate": 3.7461556942386076e-05, + "loss": 2.1451, + "step": 8666500 + }, + { + "epoch": 25.09, + "learning_rate": 3.74608332947388e-05, + "loss": 2.1524, + "step": 8667000 + }, + { + "epoch": 25.09, + "learning_rate": 3.746010964709152e-05, + "loss": 2.1012, + "step": 8667500 + }, + { + "epoch": 25.09, + "learning_rate": 3.745938599944424e-05, + "loss": 2.1293, + "step": 8668000 + }, + { + "epoch": 25.09, + "learning_rate": 3.7458662351796964e-05, + "loss": 2.1278, + "step": 8668500 + }, + { + "epoch": 25.09, + "learning_rate": 3.745793870414969e-05, + "loss": 2.1313, + "step": 8669000 + }, + { + "epoch": 25.09, + "learning_rate": 3.74572165037977e-05, + "loss": 2.1278, + "step": 8669500 + }, + { + "epoch": 25.1, + "learning_rate": 3.7456492856150425e-05, + "loss": 2.1348, + "step": 8670000 + }, + { + "epoch": 25.1, + "learning_rate": 3.745576920850315e-05, + "loss": 2.1306, + "step": 8670500 + }, + { + "epoch": 25.1, + "learning_rate": 3.7455045560855876e-05, + "loss": 2.1573, + "step": 8671000 + }, + { + "epoch": 25.1, + "learning_rate": 3.74543219132086e-05, + "loss": 2.1213, + "step": 8671500 + }, + { + "epoch": 25.1, + "learning_rate": 3.745359826556132e-05, + "loss": 2.1283, + "step": 8672000 + }, + { + "epoch": 25.1, + "learning_rate": 3.745287461791405e-05, + "loss": 2.1506, + "step": 8672500 + }, + { + "epoch": 25.1, + "learning_rate": 3.745215097026677e-05, + "loss": 2.155, + "step": 8673000 + }, + { + "epoch": 25.11, + "learning_rate": 3.7451427322619494e-05, + "loss": 2.1521, + "step": 8673500 + }, + { + "epoch": 25.11, + "learning_rate": 3.745070512226751e-05, + "loss": 2.1436, + "step": 8674000 + }, + { + "epoch": 25.11, + "learning_rate": 3.744998147462023e-05, + "loss": 2.1416, + "step": 8674500 + }, + { + "epoch": 25.11, + "learning_rate": 3.7449257826972954e-05, + "loss": 2.1242, + "step": 8675000 + }, + { + "epoch": 25.11, + "learning_rate": 3.7448534179325676e-05, + "loss": 2.1427, + "step": 8675500 + }, + { + "epoch": 25.11, + "learning_rate": 3.74478105316784e-05, + "loss": 2.164, + "step": 8676000 + }, + { + "epoch": 25.11, + "learning_rate": 3.744708833132642e-05, + "loss": 2.1568, + "step": 8676500 + }, + { + "epoch": 25.12, + "learning_rate": 3.744636468367914e-05, + "loss": 2.1389, + "step": 8677000 + }, + { + "epoch": 25.12, + "learning_rate": 3.744564248332716e-05, + "loss": 2.1179, + "step": 8677500 + }, + { + "epoch": 25.12, + "learning_rate": 3.744491883567988e-05, + "loss": 2.1256, + "step": 8678000 + }, + { + "epoch": 25.12, + "learning_rate": 3.74441951880326e-05, + "loss": 2.1189, + "step": 8678500 + }, + { + "epoch": 25.12, + "learning_rate": 3.7443471540385325e-05, + "loss": 2.1361, + "step": 8679000 + }, + { + "epoch": 25.12, + "learning_rate": 3.744274789273805e-05, + "loss": 2.1518, + "step": 8679500 + }, + { + "epoch": 25.13, + "learning_rate": 3.7442024245090776e-05, + "loss": 2.157, + "step": 8680000 + }, + { + "epoch": 25.13, + "learning_rate": 3.74413005974435e-05, + "loss": 2.1375, + "step": 8680500 + }, + { + "epoch": 25.13, + "learning_rate": 3.7440579844386814e-05, + "loss": 2.1262, + "step": 8681000 + }, + { + "epoch": 25.13, + "learning_rate": 3.7439856196739536e-05, + "loss": 2.1367, + "step": 8681500 + }, + { + "epoch": 25.13, + "learning_rate": 3.743913254909226e-05, + "loss": 2.1424, + "step": 8682000 + }, + { + "epoch": 25.13, + "learning_rate": 3.743840890144498e-05, + "loss": 2.1308, + "step": 8682500 + }, + { + "epoch": 25.13, + "learning_rate": 3.74376852537977e-05, + "loss": 2.1413, + "step": 8683000 + }, + { + "epoch": 25.14, + "learning_rate": 3.7436961606150425e-05, + "loss": 2.1377, + "step": 8683500 + }, + { + "epoch": 25.14, + "learning_rate": 3.743623795850315e-05, + "loss": 2.1568, + "step": 8684000 + }, + { + "epoch": 25.14, + "learning_rate": 3.7435514310855877e-05, + "loss": 2.1175, + "step": 8684500 + }, + { + "epoch": 25.14, + "learning_rate": 3.74347906632086e-05, + "loss": 2.1596, + "step": 8685000 + }, + { + "epoch": 25.14, + "learning_rate": 3.743406701556132e-05, + "loss": 2.1602, + "step": 8685500 + }, + { + "epoch": 25.14, + "learning_rate": 3.743334336791404e-05, + "loss": 2.1425, + "step": 8686000 + }, + { + "epoch": 25.14, + "learning_rate": 3.7432619720266765e-05, + "loss": 2.1432, + "step": 8686500 + }, + { + "epoch": 25.15, + "learning_rate": 3.743189607261949e-05, + "loss": 2.1264, + "step": 8687000 + }, + { + "epoch": 25.15, + "learning_rate": 3.743117242497222e-05, + "loss": 2.1438, + "step": 8687500 + }, + { + "epoch": 25.15, + "learning_rate": 3.743044877732494e-05, + "loss": 2.1249, + "step": 8688000 + }, + { + "epoch": 25.15, + "learning_rate": 3.742972512967766e-05, + "loss": 2.1357, + "step": 8688500 + }, + { + "epoch": 25.15, + "learning_rate": 3.7429001482030383e-05, + "loss": 2.1598, + "step": 8689000 + }, + { + "epoch": 25.15, + "learning_rate": 3.7428277834383106e-05, + "loss": 2.1356, + "step": 8689500 + }, + { + "epoch": 25.15, + "learning_rate": 3.742755418673583e-05, + "loss": 2.1433, + "step": 8690000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742683053908855e-05, + "loss": 2.1297, + "step": 8690500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742610833873657e-05, + "loss": 2.1297, + "step": 8691000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742538613838459e-05, + "loss": 2.1294, + "step": 8691500 + }, + { + "epoch": 25.16, + "learning_rate": 3.742466249073731e-05, + "loss": 2.1544, + "step": 8692000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742393884309003e-05, + "loss": 2.1588, + "step": 8692500 + }, + { + "epoch": 25.16, + "learning_rate": 3.7423215195442755e-05, + "loss": 2.1517, + "step": 8693000 + }, + { + "epoch": 25.16, + "learning_rate": 3.742249299509078e-05, + "loss": 2.1401, + "step": 8693500 + }, + { + "epoch": 25.17, + "learning_rate": 3.742177079473879e-05, + "loss": 2.1348, + "step": 8694000 + }, + { + "epoch": 25.17, + "learning_rate": 3.7421047147091515e-05, + "loss": 2.119, + "step": 8694500 + }, + { + "epoch": 25.17, + "learning_rate": 3.7420323499444244e-05, + "loss": 2.1308, + "step": 8695000 + }, + { + "epoch": 25.17, + "learning_rate": 3.7419599851796966e-05, + "loss": 2.1231, + "step": 8695500 + }, + { + "epoch": 25.17, + "learning_rate": 3.741887620414969e-05, + "loss": 2.1295, + "step": 8696000 + }, + { + "epoch": 25.17, + "learning_rate": 3.741815255650241e-05, + "loss": 2.1423, + "step": 8696500 + }, + { + "epoch": 25.17, + "learning_rate": 3.7417430356150426e-05, + "loss": 2.1238, + "step": 8697000 + }, + { + "epoch": 25.18, + "learning_rate": 3.7416706708503155e-05, + "loss": 2.136, + "step": 8697500 + }, + { + "epoch": 25.18, + "learning_rate": 3.741598306085588e-05, + "loss": 2.1424, + "step": 8698000 + }, + { + "epoch": 25.18, + "learning_rate": 3.74152594132086e-05, + "loss": 2.1348, + "step": 8698500 + }, + { + "epoch": 25.18, + "learning_rate": 3.741453576556132e-05, + "loss": 2.1387, + "step": 8699000 + }, + { + "epoch": 25.18, + "learning_rate": 3.741381356520934e-05, + "loss": 2.1476, + "step": 8699500 + }, + { + "epoch": 25.18, + "learning_rate": 3.741308991756206e-05, + "loss": 2.1415, + "step": 8700000 + }, + { + "epoch": 25.18, + "learning_rate": 3.741236626991478e-05, + "loss": 2.1634, + "step": 8700500 + }, + { + "epoch": 25.19, + "learning_rate": 3.7411642622267504e-05, + "loss": 2.1488, + "step": 8701000 + }, + { + "epoch": 25.19, + "learning_rate": 3.7410918974620226e-05, + "loss": 2.1551, + "step": 8701500 + }, + { + "epoch": 25.19, + "learning_rate": 3.7410195326972955e-05, + "loss": 2.1446, + "step": 8702000 + }, + { + "epoch": 25.19, + "learning_rate": 3.740947167932568e-05, + "loss": 2.144, + "step": 8702500 + }, + { + "epoch": 25.19, + "learning_rate": 3.7408748031678407e-05, + "loss": 2.1337, + "step": 8703000 + }, + { + "epoch": 25.19, + "learning_rate": 3.740802583132642e-05, + "loss": 2.1356, + "step": 8703500 + }, + { + "epoch": 25.19, + "learning_rate": 3.740730363097444e-05, + "loss": 2.1515, + "step": 8704000 + }, + { + "epoch": 25.2, + "learning_rate": 3.740657998332716e-05, + "loss": 2.1283, + "step": 8704500 + }, + { + "epoch": 25.2, + "learning_rate": 3.740585633567988e-05, + "loss": 2.1364, + "step": 8705000 + }, + { + "epoch": 25.2, + "learning_rate": 3.7405132688032604e-05, + "loss": 2.1344, + "step": 8705500 + }, + { + "epoch": 25.2, + "learning_rate": 3.740441048768063e-05, + "loss": 2.1311, + "step": 8706000 + }, + { + "epoch": 25.2, + "learning_rate": 3.740368684003335e-05, + "loss": 2.1441, + "step": 8706500 + }, + { + "epoch": 25.2, + "learning_rate": 3.7402964639681364e-05, + "loss": 2.1606, + "step": 8707000 + }, + { + "epoch": 25.2, + "learning_rate": 3.740224099203409e-05, + "loss": 2.1437, + "step": 8707500 + }, + { + "epoch": 25.21, + "learning_rate": 3.740151734438681e-05, + "loss": 2.1245, + "step": 8708000 + }, + { + "epoch": 25.21, + "learning_rate": 3.740079369673953e-05, + "loss": 2.1367, + "step": 8708500 + }, + { + "epoch": 25.21, + "learning_rate": 3.7400070049092253e-05, + "loss": 2.1456, + "step": 8709000 + }, + { + "epoch": 25.21, + "learning_rate": 3.7399346401444976e-05, + "loss": 2.1287, + "step": 8709500 + }, + { + "epoch": 25.21, + "learning_rate": 3.7398622753797705e-05, + "loss": 2.1102, + "step": 8710000 + }, + { + "epoch": 25.21, + "learning_rate": 3.739789910615043e-05, + "loss": 2.1369, + "step": 8710500 + }, + { + "epoch": 25.21, + "learning_rate": 3.7397175458503156e-05, + "loss": 2.1073, + "step": 8711000 + }, + { + "epoch": 25.22, + "learning_rate": 3.739645181085588e-05, + "loss": 2.1512, + "step": 8711500 + }, + { + "epoch": 25.22, + "learning_rate": 3.7395729610503894e-05, + "loss": 2.1504, + "step": 8712000 + }, + { + "epoch": 25.22, + "learning_rate": 3.7395005962856616e-05, + "loss": 2.1513, + "step": 8712500 + }, + { + "epoch": 25.22, + "learning_rate": 3.739428231520934e-05, + "loss": 2.1373, + "step": 8713000 + }, + { + "epoch": 25.22, + "learning_rate": 3.739355866756206e-05, + "loss": 2.1274, + "step": 8713500 + }, + { + "epoch": 25.22, + "learning_rate": 3.739283501991478e-05, + "loss": 2.1575, + "step": 8714000 + }, + { + "epoch": 25.22, + "learning_rate": 3.7392112819562805e-05, + "loss": 2.1315, + "step": 8714500 + }, + { + "epoch": 25.23, + "learning_rate": 3.739138917191553e-05, + "loss": 2.1499, + "step": 8715000 + }, + { + "epoch": 25.23, + "learning_rate": 3.739066552426825e-05, + "loss": 2.1287, + "step": 8715500 + }, + { + "epoch": 25.23, + "learning_rate": 3.738994187662097e-05, + "loss": 2.1483, + "step": 8716000 + }, + { + "epoch": 25.23, + "learning_rate": 3.7389218228973694e-05, + "loss": 2.1349, + "step": 8716500 + }, + { + "epoch": 25.23, + "learning_rate": 3.7388494581326416e-05, + "loss": 2.1438, + "step": 8717000 + }, + { + "epoch": 25.23, + "learning_rate": 3.7387770933679145e-05, + "loss": 2.1411, + "step": 8717500 + }, + { + "epoch": 25.24, + "learning_rate": 3.738704728603187e-05, + "loss": 2.1614, + "step": 8718000 + }, + { + "epoch": 25.24, + "learning_rate": 3.738632363838459e-05, + "loss": 2.135, + "step": 8718500 + }, + { + "epoch": 25.24, + "learning_rate": 3.738559999073731e-05, + "loss": 2.1449, + "step": 8719000 + }, + { + "epoch": 25.24, + "learning_rate": 3.7384876343090034e-05, + "loss": 2.1572, + "step": 8719500 + }, + { + "epoch": 25.24, + "learning_rate": 3.7384152695442756e-05, + "loss": 2.1376, + "step": 8720000 + }, + { + "epoch": 25.24, + "learning_rate": 3.738343049509078e-05, + "loss": 2.1297, + "step": 8720500 + }, + { + "epoch": 25.24, + "learning_rate": 3.73827068474435e-05, + "loss": 2.1333, + "step": 8721000 + }, + { + "epoch": 25.25, + "learning_rate": 3.738198319979622e-05, + "loss": 2.1251, + "step": 8721500 + }, + { + "epoch": 25.25, + "learning_rate": 3.738126099944424e-05, + "loss": 2.1384, + "step": 8722000 + }, + { + "epoch": 25.25, + "learning_rate": 3.738053735179696e-05, + "loss": 2.13, + "step": 8722500 + }, + { + "epoch": 25.25, + "learning_rate": 3.737981370414968e-05, + "loss": 2.157, + "step": 8723000 + }, + { + "epoch": 25.25, + "learning_rate": 3.7379090056502405e-05, + "loss": 2.1382, + "step": 8723500 + }, + { + "epoch": 25.25, + "learning_rate": 3.737836785615043e-05, + "loss": 2.1392, + "step": 8724000 + }, + { + "epoch": 25.25, + "learning_rate": 3.737764420850315e-05, + "loss": 2.12, + "step": 8724500 + }, + { + "epoch": 25.26, + "learning_rate": 3.737692056085588e-05, + "loss": 2.1294, + "step": 8725000 + }, + { + "epoch": 25.26, + "learning_rate": 3.73761969132086e-05, + "loss": 2.1359, + "step": 8725500 + }, + { + "epoch": 25.26, + "learning_rate": 3.7375473265561323e-05, + "loss": 2.1526, + "step": 8726000 + }, + { + "epoch": 25.26, + "learning_rate": 3.7374749617914046e-05, + "loss": 2.1395, + "step": 8726500 + }, + { + "epoch": 25.26, + "learning_rate": 3.737402597026677e-05, + "loss": 2.1501, + "step": 8727000 + }, + { + "epoch": 25.26, + "learning_rate": 3.7373303769914783e-05, + "loss": 2.1576, + "step": 8727500 + }, + { + "epoch": 25.26, + "learning_rate": 3.7372580122267506e-05, + "loss": 2.1446, + "step": 8728000 + }, + { + "epoch": 25.27, + "learning_rate": 3.7371856474620235e-05, + "loss": 2.1384, + "step": 8728500 + }, + { + "epoch": 25.27, + "learning_rate": 3.737113282697296e-05, + "loss": 2.1047, + "step": 8729000 + }, + { + "epoch": 25.27, + "learning_rate": 3.737041062662097e-05, + "loss": 2.1382, + "step": 8729500 + }, + { + "epoch": 25.27, + "learning_rate": 3.7369686978973695e-05, + "loss": 2.1428, + "step": 8730000 + }, + { + "epoch": 25.27, + "learning_rate": 3.736896333132642e-05, + "loss": 2.1387, + "step": 8730500 + }, + { + "epoch": 25.27, + "learning_rate": 3.736823968367914e-05, + "loss": 2.1367, + "step": 8731000 + }, + { + "epoch": 25.27, + "learning_rate": 3.736751603603186e-05, + "loss": 2.122, + "step": 8731500 + }, + { + "epoch": 25.28, + "learning_rate": 3.7366792388384584e-05, + "loss": 2.1418, + "step": 8732000 + }, + { + "epoch": 25.28, + "learning_rate": 3.736606874073731e-05, + "loss": 2.117, + "step": 8732500 + }, + { + "epoch": 25.28, + "learning_rate": 3.7365346540385335e-05, + "loss": 2.1505, + "step": 8733000 + }, + { + "epoch": 25.28, + "learning_rate": 3.736462289273806e-05, + "loss": 2.1168, + "step": 8733500 + }, + { + "epoch": 25.28, + "learning_rate": 3.736389924509078e-05, + "loss": 2.1482, + "step": 8734000 + }, + { + "epoch": 25.28, + "learning_rate": 3.73631755974435e-05, + "loss": 2.1437, + "step": 8734500 + }, + { + "epoch": 25.28, + "learning_rate": 3.7362451949796224e-05, + "loss": 2.1474, + "step": 8735000 + }, + { + "epoch": 25.29, + "learning_rate": 3.7361728302148946e-05, + "loss": 2.1335, + "step": 8735500 + }, + { + "epoch": 25.29, + "learning_rate": 3.736100465450167e-05, + "loss": 2.1469, + "step": 8736000 + }, + { + "epoch": 25.29, + "learning_rate": 3.736028100685439e-05, + "loss": 2.1337, + "step": 8736500 + }, + { + "epoch": 25.29, + "learning_rate": 3.735955735920711e-05, + "loss": 2.142, + "step": 8737000 + }, + { + "epoch": 25.29, + "learning_rate": 3.7358833711559835e-05, + "loss": 2.1372, + "step": 8737500 + }, + { + "epoch": 25.29, + "learning_rate": 3.735811006391256e-05, + "loss": 2.1352, + "step": 8738000 + }, + { + "epoch": 25.29, + "learning_rate": 3.7357386416265286e-05, + "loss": 2.1173, + "step": 8738500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7356665663208595e-05, + "loss": 2.1318, + "step": 8739000 + }, + { + "epoch": 25.3, + "learning_rate": 3.735594201556132e-05, + "loss": 2.1281, + "step": 8739500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7355218367914046e-05, + "loss": 2.1388, + "step": 8740000 + }, + { + "epoch": 25.3, + "learning_rate": 3.735449472026677e-05, + "loss": 2.158, + "step": 8740500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7353772519914784e-05, + "loss": 2.1679, + "step": 8741000 + }, + { + "epoch": 25.3, + "learning_rate": 3.7353048872267506e-05, + "loss": 2.1372, + "step": 8741500 + }, + { + "epoch": 25.3, + "learning_rate": 3.7352325224620235e-05, + "loss": 2.1399, + "step": 8742000 + }, + { + "epoch": 25.31, + "learning_rate": 3.735160157697296e-05, + "loss": 2.1442, + "step": 8742500 + }, + { + "epoch": 25.31, + "learning_rate": 3.735087792932568e-05, + "loss": 2.143, + "step": 8743000 + }, + { + "epoch": 25.31, + "learning_rate": 3.7350155728973695e-05, + "loss": 2.1479, + "step": 8743500 + }, + { + "epoch": 25.31, + "learning_rate": 3.734943208132642e-05, + "loss": 2.1425, + "step": 8744000 + }, + { + "epoch": 25.31, + "learning_rate": 3.734870843367914e-05, + "loss": 2.1431, + "step": 8744500 + }, + { + "epoch": 25.31, + "learning_rate": 3.734798478603186e-05, + "loss": 2.1581, + "step": 8745000 + }, + { + "epoch": 25.31, + "learning_rate": 3.7347261138384584e-05, + "loss": 2.1435, + "step": 8745500 + }, + { + "epoch": 25.32, + "learning_rate": 3.734653749073731e-05, + "loss": 2.1275, + "step": 8746000 + }, + { + "epoch": 25.32, + "learning_rate": 3.7345813843090036e-05, + "loss": 2.1488, + "step": 8746500 + }, + { + "epoch": 25.32, + "learning_rate": 3.734509019544276e-05, + "loss": 2.1501, + "step": 8747000 + }, + { + "epoch": 25.32, + "learning_rate": 3.734436654779549e-05, + "loss": 2.1434, + "step": 8747500 + }, + { + "epoch": 25.32, + "learning_rate": 3.734364290014821e-05, + "loss": 2.1363, + "step": 8748000 + }, + { + "epoch": 25.32, + "learning_rate": 3.7342920699796225e-05, + "loss": 2.1485, + "step": 8748500 + }, + { + "epoch": 25.32, + "learning_rate": 3.734219849944424e-05, + "loss": 2.1238, + "step": 8749000 + }, + { + "epoch": 25.33, + "learning_rate": 3.734147629909226e-05, + "loss": 2.1669, + "step": 8749500 + }, + { + "epoch": 25.33, + "learning_rate": 3.7340752651444985e-05, + "loss": 2.1388, + "step": 8750000 + }, + { + "epoch": 25.33, + "learning_rate": 3.734002900379771e-05, + "loss": 2.1322, + "step": 8750500 + }, + { + "epoch": 25.33, + "learning_rate": 3.733930535615043e-05, + "loss": 2.1292, + "step": 8751000 + }, + { + "epoch": 25.33, + "learning_rate": 3.733858170850315e-05, + "loss": 2.133, + "step": 8751500 + }, + { + "epoch": 25.33, + "learning_rate": 3.733785950815117e-05, + "loss": 2.1313, + "step": 8752000 + }, + { + "epoch": 25.33, + "learning_rate": 3.733713586050389e-05, + "loss": 2.1622, + "step": 8752500 + }, + { + "epoch": 25.34, + "learning_rate": 3.733641221285661e-05, + "loss": 2.1408, + "step": 8753000 + }, + { + "epoch": 25.34, + "learning_rate": 3.7335688565209334e-05, + "loss": 2.1379, + "step": 8753500 + }, + { + "epoch": 25.34, + "learning_rate": 3.733496491756206e-05, + "loss": 2.1283, + "step": 8754000 + }, + { + "epoch": 25.34, + "learning_rate": 3.733424271721008e-05, + "loss": 2.1493, + "step": 8754500 + }, + { + "epoch": 25.34, + "learning_rate": 3.73335190695628e-05, + "loss": 2.1383, + "step": 8755000 + }, + { + "epoch": 25.34, + "learning_rate": 3.733279542191553e-05, + "loss": 2.131, + "step": 8755500 + }, + { + "epoch": 25.35, + "learning_rate": 3.733207177426825e-05, + "loss": 2.1597, + "step": 8756000 + }, + { + "epoch": 25.35, + "learning_rate": 3.7331348126620974e-05, + "loss": 2.1396, + "step": 8756500 + }, + { + "epoch": 25.35, + "learning_rate": 3.7330624478973696e-05, + "loss": 2.1267, + "step": 8757000 + }, + { + "epoch": 25.35, + "learning_rate": 3.732990227862171e-05, + "loss": 2.151, + "step": 8757500 + }, + { + "epoch": 25.35, + "learning_rate": 3.7329178630974434e-05, + "loss": 2.143, + "step": 8758000 + }, + { + "epoch": 25.35, + "learning_rate": 3.732845498332716e-05, + "loss": 2.1423, + "step": 8758500 + }, + { + "epoch": 25.35, + "learning_rate": 3.732773278297518e-05, + "loss": 2.1291, + "step": 8759000 + }, + { + "epoch": 25.36, + "learning_rate": 3.73270091353279e-05, + "loss": 2.1456, + "step": 8759500 + }, + { + "epoch": 25.36, + "learning_rate": 3.732628548768062e-05, + "loss": 2.1502, + "step": 8760000 + }, + { + "epoch": 25.36, + "learning_rate": 3.7325561840033345e-05, + "loss": 2.1525, + "step": 8760500 + }, + { + "epoch": 25.36, + "learning_rate": 3.732483819238607e-05, + "loss": 2.1447, + "step": 8761000 + }, + { + "epoch": 25.36, + "learning_rate": 3.732411454473879e-05, + "loss": 2.1554, + "step": 8761500 + }, + { + "epoch": 25.36, + "learning_rate": 3.732339089709151e-05, + "loss": 2.1539, + "step": 8762000 + }, + { + "epoch": 25.36, + "learning_rate": 3.732266724944424e-05, + "loss": 2.1576, + "step": 8762500 + }, + { + "epoch": 25.37, + "learning_rate": 3.732194360179696e-05, + "loss": 2.1255, + "step": 8763000 + }, + { + "epoch": 25.37, + "learning_rate": 3.7321219954149686e-05, + "loss": 2.1441, + "step": 8763500 + }, + { + "epoch": 25.37, + "learning_rate": 3.7320496306502415e-05, + "loss": 2.1539, + "step": 8764000 + }, + { + "epoch": 25.37, + "learning_rate": 3.731977410615043e-05, + "loss": 2.1425, + "step": 8764500 + }, + { + "epoch": 25.37, + "learning_rate": 3.731905045850315e-05, + "loss": 2.1528, + "step": 8765000 + }, + { + "epoch": 25.37, + "learning_rate": 3.7318326810855875e-05, + "loss": 2.1503, + "step": 8765500 + }, + { + "epoch": 25.37, + "learning_rate": 3.73176031632086e-05, + "loss": 2.1622, + "step": 8766000 + }, + { + "epoch": 25.38, + "learning_rate": 3.731687951556132e-05, + "loss": 2.145, + "step": 8766500 + }, + { + "epoch": 25.38, + "learning_rate": 3.731615586791404e-05, + "loss": 2.1288, + "step": 8767000 + }, + { + "epoch": 25.38, + "learning_rate": 3.7315432220266763e-05, + "loss": 2.1321, + "step": 8767500 + }, + { + "epoch": 25.38, + "learning_rate": 3.7314708572619486e-05, + "loss": 2.1285, + "step": 8768000 + }, + { + "epoch": 25.38, + "learning_rate": 3.7313984924972215e-05, + "loss": 2.1213, + "step": 8768500 + }, + { + "epoch": 25.38, + "learning_rate": 3.731326127732494e-05, + "loss": 2.1506, + "step": 8769000 + }, + { + "epoch": 25.38, + "learning_rate": 3.731253907697295e-05, + "loss": 2.1513, + "step": 8769500 + }, + { + "epoch": 25.39, + "learning_rate": 3.731181542932568e-05, + "loss": 2.1257, + "step": 8770000 + }, + { + "epoch": 25.39, + "learning_rate": 3.7311091781678404e-05, + "loss": 2.1353, + "step": 8770500 + }, + { + "epoch": 25.39, + "learning_rate": 3.7310368134031126e-05, + "loss": 2.1617, + "step": 8771000 + }, + { + "epoch": 25.39, + "learning_rate": 3.730964593367914e-05, + "loss": 2.1375, + "step": 8771500 + }, + { + "epoch": 25.39, + "learning_rate": 3.7308922286031864e-05, + "loss": 2.1609, + "step": 8772000 + }, + { + "epoch": 25.39, + "learning_rate": 3.7308198638384586e-05, + "loss": 2.1546, + "step": 8772500 + }, + { + "epoch": 25.39, + "learning_rate": 3.730747643803261e-05, + "loss": 2.1633, + "step": 8773000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730675279038533e-05, + "loss": 2.1565, + "step": 8773500 + }, + { + "epoch": 25.4, + "learning_rate": 3.730602914273805e-05, + "loss": 2.1306, + "step": 8774000 + }, + { + "epoch": 25.4, + "learning_rate": 3.7305305495090775e-05, + "loss": 2.1513, + "step": 8774500 + }, + { + "epoch": 25.4, + "learning_rate": 3.73045818474435e-05, + "loss": 2.1453, + "step": 8775000 + }, + { + "epoch": 25.4, + "learning_rate": 3.730385819979622e-05, + "loss": 2.1375, + "step": 8775500 + }, + { + "epoch": 25.4, + "learning_rate": 3.730313455214894e-05, + "loss": 2.1445, + "step": 8776000 + }, + { + "epoch": 25.4, + "learning_rate": 3.7302410904501664e-05, + "loss": 2.1442, + "step": 8776500 + }, + { + "epoch": 25.41, + "learning_rate": 3.7301688704149686e-05, + "loss": 2.1414, + "step": 8777000 + }, + { + "epoch": 25.41, + "learning_rate": 3.7300965056502415e-05, + "loss": 2.1414, + "step": 8777500 + }, + { + "epoch": 25.41, + "learning_rate": 3.730024140885514e-05, + "loss": 2.1443, + "step": 8778000 + }, + { + "epoch": 25.41, + "learning_rate": 3.729951776120786e-05, + "loss": 2.1391, + "step": 8778500 + }, + { + "epoch": 25.41, + "learning_rate": 3.729879411356058e-05, + "loss": 2.1352, + "step": 8779000 + }, + { + "epoch": 25.41, + "learning_rate": 3.7298070465913304e-05, + "loss": 2.1166, + "step": 8779500 + }, + { + "epoch": 25.41, + "learning_rate": 3.7297346818266026e-05, + "loss": 2.146, + "step": 8780000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729662317061875e-05, + "loss": 2.1499, + "step": 8780500 + }, + { + "epoch": 25.42, + "learning_rate": 3.729589952297147e-05, + "loss": 2.1289, + "step": 8781000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729517732261949e-05, + "loss": 2.1581, + "step": 8781500 + }, + { + "epoch": 25.42, + "learning_rate": 3.7294453674972216e-05, + "loss": 2.1548, + "step": 8782000 + }, + { + "epoch": 25.42, + "learning_rate": 3.729373002732494e-05, + "loss": 2.152, + "step": 8782500 + }, + { + "epoch": 25.42, + "learning_rate": 3.729300637967766e-05, + "loss": 2.1315, + "step": 8783000 + }, + { + "epoch": 25.42, + "learning_rate": 3.7292284179325676e-05, + "loss": 2.1271, + "step": 8783500 + }, + { + "epoch": 25.43, + "learning_rate": 3.72915605316784e-05, + "loss": 2.1267, + "step": 8784000 + }, + { + "epoch": 25.43, + "learning_rate": 3.729083688403112e-05, + "loss": 2.1561, + "step": 8784500 + }, + { + "epoch": 25.43, + "learning_rate": 3.729011323638385e-05, + "loss": 2.1385, + "step": 8785000 + }, + { + "epoch": 25.43, + "learning_rate": 3.7289391036031865e-05, + "loss": 2.1532, + "step": 8785500 + }, + { + "epoch": 25.43, + "learning_rate": 3.7288667388384594e-05, + "loss": 2.1355, + "step": 8786000 + }, + { + "epoch": 25.43, + "learning_rate": 3.728794518803261e-05, + "loss": 2.1468, + "step": 8786500 + }, + { + "epoch": 25.43, + "learning_rate": 3.728722154038533e-05, + "loss": 2.1671, + "step": 8787000 + }, + { + "epoch": 25.44, + "learning_rate": 3.7286497892738054e-05, + "loss": 2.1503, + "step": 8787500 + }, + { + "epoch": 25.44, + "learning_rate": 3.728577569238607e-05, + "loss": 2.1667, + "step": 8788000 + }, + { + "epoch": 25.44, + "learning_rate": 3.728505204473879e-05, + "loss": 2.1319, + "step": 8788500 + }, + { + "epoch": 25.44, + "learning_rate": 3.7284328397091514e-05, + "loss": 2.1254, + "step": 8789000 + }, + { + "epoch": 25.44, + "learning_rate": 3.728360474944424e-05, + "loss": 2.1396, + "step": 8789500 + }, + { + "epoch": 25.44, + "learning_rate": 3.7282881101796965e-05, + "loss": 2.1369, + "step": 8790000 + }, + { + "epoch": 25.44, + "learning_rate": 3.728215745414969e-05, + "loss": 2.1298, + "step": 8790500 + }, + { + "epoch": 25.45, + "learning_rate": 3.728143380650241e-05, + "loss": 2.1316, + "step": 8791000 + }, + { + "epoch": 25.45, + "learning_rate": 3.728071015885513e-05, + "loss": 2.1597, + "step": 8791500 + }, + { + "epoch": 25.45, + "learning_rate": 3.7279986511207854e-05, + "loss": 2.1239, + "step": 8792000 + }, + { + "epoch": 25.45, + "learning_rate": 3.727926286356058e-05, + "loss": 2.1307, + "step": 8792500 + }, + { + "epoch": 25.45, + "learning_rate": 3.7278539215913305e-05, + "loss": 2.1287, + "step": 8793000 + }, + { + "epoch": 25.45, + "learning_rate": 3.727781556826603e-05, + "loss": 2.1383, + "step": 8793500 + }, + { + "epoch": 25.46, + "learning_rate": 3.727709192061875e-05, + "loss": 2.1386, + "step": 8794000 + }, + { + "epoch": 25.46, + "learning_rate": 3.727636827297147e-05, + "loss": 2.1388, + "step": 8794500 + }, + { + "epoch": 25.46, + "learning_rate": 3.7275644625324194e-05, + "loss": 2.1361, + "step": 8795000 + }, + { + "epoch": 25.46, + "learning_rate": 3.7274920977676916e-05, + "loss": 2.1362, + "step": 8795500 + }, + { + "epoch": 25.46, + "learning_rate": 3.7274197330029645e-05, + "loss": 2.1527, + "step": 8796000 + }, + { + "epoch": 25.46, + "learning_rate": 3.727347368238237e-05, + "loss": 2.1298, + "step": 8796500 + }, + { + "epoch": 25.46, + "learning_rate": 3.727275003473509e-05, + "loss": 2.1528, + "step": 8797000 + }, + { + "epoch": 25.47, + "learning_rate": 3.727202638708781e-05, + "loss": 2.1531, + "step": 8797500 + }, + { + "epoch": 25.47, + "learning_rate": 3.727130418673583e-05, + "loss": 2.1447, + "step": 8798000 + }, + { + "epoch": 25.47, + "learning_rate": 3.727058343367914e-05, + "loss": 2.1628, + "step": 8798500 + }, + { + "epoch": 25.47, + "learning_rate": 3.7269859786031865e-05, + "loss": 2.1406, + "step": 8799000 + }, + { + "epoch": 25.47, + "learning_rate": 3.726913613838459e-05, + "loss": 2.1397, + "step": 8799500 + }, + { + "epoch": 25.47, + "learning_rate": 3.726841249073732e-05, + "loss": 2.1713, + "step": 8800000 + }, + { + "epoch": 25.47, + "learning_rate": 3.726769029038533e-05, + "loss": 2.126, + "step": 8800500 + }, + { + "epoch": 25.48, + "learning_rate": 3.7266966642738054e-05, + "loss": 2.133, + "step": 8801000 + }, + { + "epoch": 25.48, + "learning_rate": 3.726624299509078e-05, + "loss": 2.1465, + "step": 8801500 + }, + { + "epoch": 25.48, + "learning_rate": 3.72655193474435e-05, + "loss": 2.1265, + "step": 8802000 + }, + { + "epoch": 25.48, + "learning_rate": 3.726479714709152e-05, + "loss": 2.1425, + "step": 8802500 + }, + { + "epoch": 25.48, + "learning_rate": 3.7264073499444243e-05, + "loss": 2.1311, + "step": 8803000 + }, + { + "epoch": 25.48, + "learning_rate": 3.7263349851796966e-05, + "loss": 2.1497, + "step": 8803500 + }, + { + "epoch": 25.48, + "learning_rate": 3.726262620414969e-05, + "loss": 2.1495, + "step": 8804000 + }, + { + "epoch": 25.49, + "learning_rate": 3.726190255650241e-05, + "loss": 2.149, + "step": 8804500 + }, + { + "epoch": 25.49, + "learning_rate": 3.726117890885513e-05, + "loss": 2.1636, + "step": 8805000 + }, + { + "epoch": 25.49, + "learning_rate": 3.7260455261207855e-05, + "loss": 2.1421, + "step": 8805500 + }, + { + "epoch": 25.49, + "learning_rate": 3.725973161356058e-05, + "loss": 2.1386, + "step": 8806000 + }, + { + "epoch": 25.49, + "learning_rate": 3.72590079659133e-05, + "loss": 2.1569, + "step": 8806500 + }, + { + "epoch": 25.49, + "learning_rate": 3.725828431826602e-05, + "loss": 2.1697, + "step": 8807000 + }, + { + "epoch": 25.49, + "learning_rate": 3.725756067061875e-05, + "loss": 2.1843, + "step": 8807500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725683702297147e-05, + "loss": 2.1585, + "step": 8808000 + }, + { + "epoch": 25.5, + "learning_rate": 3.7256113375324195e-05, + "loss": 2.1302, + "step": 8808500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725538972767692e-05, + "loss": 2.122, + "step": 8809000 + }, + { + "epoch": 25.5, + "learning_rate": 3.7254666080029646e-05, + "loss": 2.1358, + "step": 8809500 + }, + { + "epoch": 25.5, + "learning_rate": 3.725394243238237e-05, + "loss": 2.1437, + "step": 8810000 + }, + { + "epoch": 25.5, + "learning_rate": 3.725321878473509e-05, + "loss": 2.138, + "step": 8810500 + }, + { + "epoch": 25.5, + "learning_rate": 3.7252496584383106e-05, + "loss": 2.152, + "step": 8811000 + }, + { + "epoch": 25.51, + "learning_rate": 3.725177438403112e-05, + "loss": 2.1411, + "step": 8811500 + }, + { + "epoch": 25.51, + "learning_rate": 3.7251050736383844e-05, + "loss": 2.1417, + "step": 8812000 + }, + { + "epoch": 25.51, + "learning_rate": 3.725032708873657e-05, + "loss": 2.1396, + "step": 8812500 + }, + { + "epoch": 25.51, + "learning_rate": 3.7249603441089295e-05, + "loss": 2.127, + "step": 8813000 + }, + { + "epoch": 25.51, + "learning_rate": 3.724887979344202e-05, + "loss": 2.1191, + "step": 8813500 + }, + { + "epoch": 25.51, + "learning_rate": 3.724815759309003e-05, + "loss": 2.1389, + "step": 8814000 + }, + { + "epoch": 25.51, + "learning_rate": 3.7247433945442755e-05, + "loss": 2.1538, + "step": 8814500 + }, + { + "epoch": 25.52, + "learning_rate": 3.7246710297795484e-05, + "loss": 2.1251, + "step": 8815000 + }, + { + "epoch": 25.52, + "learning_rate": 3.7245986650148206e-05, + "loss": 2.1593, + "step": 8815500 + }, + { + "epoch": 25.52, + "learning_rate": 3.724526589709152e-05, + "loss": 2.1379, + "step": 8816000 + }, + { + "epoch": 25.52, + "learning_rate": 3.724454369673954e-05, + "loss": 2.1711, + "step": 8816500 + }, + { + "epoch": 25.52, + "learning_rate": 3.724382004909226e-05, + "loss": 2.1667, + "step": 8817000 + }, + { + "epoch": 25.52, + "learning_rate": 3.724309640144498e-05, + "loss": 2.111, + "step": 8817500 + }, + { + "epoch": 25.52, + "learning_rate": 3.7242372753797704e-05, + "loss": 2.1671, + "step": 8818000 + }, + { + "epoch": 25.53, + "learning_rate": 3.7241649106150426e-05, + "loss": 2.136, + "step": 8818500 + }, + { + "epoch": 25.53, + "learning_rate": 3.724092545850315e-05, + "loss": 2.1636, + "step": 8819000 + }, + { + "epoch": 25.53, + "learning_rate": 3.724020181085587e-05, + "loss": 2.1358, + "step": 8819500 + }, + { + "epoch": 25.53, + "learning_rate": 3.723947816320859e-05, + "loss": 2.1514, + "step": 8820000 + }, + { + "epoch": 25.53, + "learning_rate": 3.723875451556132e-05, + "loss": 2.1552, + "step": 8820500 + }, + { + "epoch": 25.53, + "learning_rate": 3.7238030867914044e-05, + "loss": 2.1541, + "step": 8821000 + }, + { + "epoch": 25.53, + "learning_rate": 3.723730866756206e-05, + "loss": 2.1513, + "step": 8821500 + }, + { + "epoch": 25.54, + "learning_rate": 3.723658501991478e-05, + "loss": 2.1404, + "step": 8822000 + }, + { + "epoch": 25.54, + "learning_rate": 3.723586137226751e-05, + "loss": 2.1584, + "step": 8822500 + }, + { + "epoch": 25.54, + "learning_rate": 3.723513917191553e-05, + "loss": 2.1296, + "step": 8823000 + }, + { + "epoch": 25.54, + "learning_rate": 3.723441552426825e-05, + "loss": 2.1703, + "step": 8823500 + }, + { + "epoch": 25.54, + "learning_rate": 3.723369187662097e-05, + "loss": 2.1471, + "step": 8824000 + }, + { + "epoch": 25.54, + "learning_rate": 3.72329682289737e-05, + "loss": 2.1183, + "step": 8824500 + }, + { + "epoch": 25.54, + "learning_rate": 3.7232246028621716e-05, + "loss": 2.1427, + "step": 8825000 + }, + { + "epoch": 25.55, + "learning_rate": 3.723152238097444e-05, + "loss": 2.1526, + "step": 8825500 + }, + { + "epoch": 25.55, + "learning_rate": 3.723079873332716e-05, + "loss": 2.1572, + "step": 8826000 + }, + { + "epoch": 25.55, + "learning_rate": 3.723007508567988e-05, + "loss": 2.1235, + "step": 8826500 + }, + { + "epoch": 25.55, + "learning_rate": 3.7229351438032605e-05, + "loss": 2.1413, + "step": 8827000 + }, + { + "epoch": 25.55, + "learning_rate": 3.722862779038533e-05, + "loss": 2.1589, + "step": 8827500 + }, + { + "epoch": 25.55, + "learning_rate": 3.722790414273805e-05, + "loss": 2.1673, + "step": 8828000 + }, + { + "epoch": 25.55, + "learning_rate": 3.722718049509077e-05, + "loss": 2.1736, + "step": 8828500 + }, + { + "epoch": 25.56, + "learning_rate": 3.72264568474435e-05, + "loss": 2.1289, + "step": 8829000 + }, + { + "epoch": 25.56, + "learning_rate": 3.7225734647091516e-05, + "loss": 2.1274, + "step": 8829500 + }, + { + "epoch": 25.56, + "learning_rate": 3.7225010999444245e-05, + "loss": 2.1674, + "step": 8830000 + }, + { + "epoch": 25.56, + "learning_rate": 3.722428735179697e-05, + "loss": 2.1582, + "step": 8830500 + }, + { + "epoch": 25.56, + "learning_rate": 3.722356370414969e-05, + "loss": 2.1191, + "step": 8831000 + }, + { + "epoch": 25.56, + "learning_rate": 3.722284005650241e-05, + "loss": 2.1365, + "step": 8831500 + }, + { + "epoch": 25.57, + "learning_rate": 3.7222116408855134e-05, + "loss": 2.1457, + "step": 8832000 + }, + { + "epoch": 25.57, + "learning_rate": 3.7221392761207856e-05, + "loss": 2.1431, + "step": 8832500 + }, + { + "epoch": 25.57, + "learning_rate": 3.722066911356058e-05, + "loss": 2.134, + "step": 8833000 + }, + { + "epoch": 25.57, + "learning_rate": 3.72199469132086e-05, + "loss": 2.1304, + "step": 8833500 + }, + { + "epoch": 25.57, + "learning_rate": 3.721922326556132e-05, + "loss": 2.1316, + "step": 8834000 + }, + { + "epoch": 25.57, + "learning_rate": 3.7218499617914045e-05, + "loss": 2.1543, + "step": 8834500 + }, + { + "epoch": 25.57, + "learning_rate": 3.721777597026677e-05, + "loss": 2.1429, + "step": 8835000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721705376991478e-05, + "loss": 2.1336, + "step": 8835500 + }, + { + "epoch": 25.58, + "learning_rate": 3.7216330122267505e-05, + "loss": 2.1074, + "step": 8836000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721560647462023e-05, + "loss": 2.1454, + "step": 8836500 + }, + { + "epoch": 25.58, + "learning_rate": 3.721488427426825e-05, + "loss": 2.1506, + "step": 8837000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721416062662097e-05, + "loss": 2.1578, + "step": 8837500 + }, + { + "epoch": 25.58, + "learning_rate": 3.72134369789737e-05, + "loss": 2.1417, + "step": 8838000 + }, + { + "epoch": 25.58, + "learning_rate": 3.721271333132642e-05, + "loss": 2.163, + "step": 8838500 + }, + { + "epoch": 25.59, + "learning_rate": 3.7211989683679146e-05, + "loss": 2.1459, + "step": 8839000 + }, + { + "epoch": 25.59, + "learning_rate": 3.721126603603187e-05, + "loss": 2.1443, + "step": 8839500 + }, + { + "epoch": 25.59, + "learning_rate": 3.721054238838459e-05, + "loss": 2.1338, + "step": 8840000 + }, + { + "epoch": 25.59, + "learning_rate": 3.720981874073731e-05, + "loss": 2.1389, + "step": 8840500 + }, + { + "epoch": 25.59, + "learning_rate": 3.7209095093090034e-05, + "loss": 2.158, + "step": 8841000 + }, + { + "epoch": 25.59, + "learning_rate": 3.720837144544276e-05, + "loss": 2.1521, + "step": 8841500 + }, + { + "epoch": 25.59, + "learning_rate": 3.720764779779548e-05, + "loss": 2.133, + "step": 8842000 + }, + { + "epoch": 25.6, + "learning_rate": 3.72069241501482e-05, + "loss": 2.1157, + "step": 8842500 + }, + { + "epoch": 25.6, + "learning_rate": 3.7206201949796223e-05, + "loss": 2.1605, + "step": 8843000 + }, + { + "epoch": 25.6, + "learning_rate": 3.7205478302148946e-05, + "loss": 2.1536, + "step": 8843500 + }, + { + "epoch": 25.6, + "learning_rate": 3.720475465450167e-05, + "loss": 2.1313, + "step": 8844000 + }, + { + "epoch": 25.6, + "learning_rate": 3.720403100685439e-05, + "loss": 2.1387, + "step": 8844500 + }, + { + "epoch": 25.6, + "learning_rate": 3.720330735920712e-05, + "loss": 2.1446, + "step": 8845000 + }, + { + "epoch": 25.6, + "learning_rate": 3.720258371155984e-05, + "loss": 2.134, + "step": 8845500 + }, + { + "epoch": 25.61, + "learning_rate": 3.7201860063912564e-05, + "loss": 2.1326, + "step": 8846000 + }, + { + "epoch": 25.61, + "learning_rate": 3.7201136416265286e-05, + "loss": 2.1532, + "step": 8846500 + }, + { + "epoch": 25.61, + "learning_rate": 3.72004142159133e-05, + "loss": 2.1624, + "step": 8847000 + }, + { + "epoch": 25.61, + "learning_rate": 3.7199692015561324e-05, + "loss": 2.136, + "step": 8847500 + }, + { + "epoch": 25.61, + "learning_rate": 3.7198968367914046e-05, + "loss": 2.1259, + "step": 8848000 + }, + { + "epoch": 25.61, + "learning_rate": 3.719824472026677e-05, + "loss": 2.1583, + "step": 8848500 + }, + { + "epoch": 25.61, + "learning_rate": 3.719752107261949e-05, + "loss": 2.1116, + "step": 8849000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719679742497221e-05, + "loss": 2.1055, + "step": 8849500 + }, + { + "epoch": 25.62, + "learning_rate": 3.7196073777324935e-05, + "loss": 2.1308, + "step": 8850000 + }, + { + "epoch": 25.62, + "learning_rate": 3.719535012967766e-05, + "loss": 2.1281, + "step": 8850500 + }, + { + "epoch": 25.62, + "learning_rate": 3.719462648203038e-05, + "loss": 2.1534, + "step": 8851000 + }, + { + "epoch": 25.62, + "learning_rate": 3.71939042816784e-05, + "loss": 2.151, + "step": 8851500 + }, + { + "epoch": 25.62, + "learning_rate": 3.719318208132642e-05, + "loss": 2.1499, + "step": 8852000 + }, + { + "epoch": 25.62, + "learning_rate": 3.7192458433679146e-05, + "loss": 2.1329, + "step": 8852500 + }, + { + "epoch": 25.63, + "learning_rate": 3.719173623332716e-05, + "loss": 2.1712, + "step": 8853000 + }, + { + "epoch": 25.63, + "learning_rate": 3.7191012585679884e-05, + "loss": 2.1569, + "step": 8853500 + }, + { + "epoch": 25.63, + "learning_rate": 3.7190288938032606e-05, + "loss": 2.1365, + "step": 8854000 + }, + { + "epoch": 25.63, + "learning_rate": 3.718956673768063e-05, + "loss": 2.154, + "step": 8854500 + }, + { + "epoch": 25.63, + "learning_rate": 3.718884309003335e-05, + "loss": 2.1352, + "step": 8855000 + }, + { + "epoch": 25.63, + "learning_rate": 3.718811944238607e-05, + "loss": 2.1265, + "step": 8855500 + }, + { + "epoch": 25.63, + "learning_rate": 3.7187395794738795e-05, + "loss": 2.1489, + "step": 8856000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718667214709152e-05, + "loss": 2.1435, + "step": 8856500 + }, + { + "epoch": 25.64, + "learning_rate": 3.718594849944424e-05, + "loss": 2.157, + "step": 8857000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718522485179696e-05, + "loss": 2.1586, + "step": 8857500 + }, + { + "epoch": 25.64, + "learning_rate": 3.7184501204149684e-05, + "loss": 2.1255, + "step": 8858000 + }, + { + "epoch": 25.64, + "learning_rate": 3.7183777556502407e-05, + "loss": 2.1367, + "step": 8858500 + }, + { + "epoch": 25.64, + "learning_rate": 3.718305390885513e-05, + "loss": 2.1572, + "step": 8859000 + }, + { + "epoch": 25.64, + "learning_rate": 3.718233026120785e-05, + "loss": 2.1621, + "step": 8859500 + }, + { + "epoch": 25.65, + "learning_rate": 3.718160806085588e-05, + "loss": 2.1704, + "step": 8860000 + }, + { + "epoch": 25.65, + "learning_rate": 3.71808844132086e-05, + "loss": 2.1306, + "step": 8860500 + }, + { + "epoch": 25.65, + "learning_rate": 3.7180160765561325e-05, + "loss": 2.1422, + "step": 8861000 + }, + { + "epoch": 25.65, + "learning_rate": 3.717943856520934e-05, + "loss": 2.1629, + "step": 8861500 + }, + { + "epoch": 25.65, + "learning_rate": 3.717871491756206e-05, + "loss": 2.1433, + "step": 8862000 + }, + { + "epoch": 25.65, + "learning_rate": 3.7177991269914785e-05, + "loss": 2.1427, + "step": 8862500 + }, + { + "epoch": 25.65, + "learning_rate": 3.71772690695628e-05, + "loss": 2.1365, + "step": 8863000 + }, + { + "epoch": 25.66, + "learning_rate": 3.717654542191553e-05, + "loss": 2.1534, + "step": 8863500 + }, + { + "epoch": 25.66, + "learning_rate": 3.717582177426825e-05, + "loss": 2.113, + "step": 8864000 + }, + { + "epoch": 25.66, + "learning_rate": 3.7175098126620974e-05, + "loss": 2.1388, + "step": 8864500 + }, + { + "epoch": 25.66, + "learning_rate": 3.7174374478973696e-05, + "loss": 2.162, + "step": 8865000 + }, + { + "epoch": 25.66, + "learning_rate": 3.717365083132642e-05, + "loss": 2.134, + "step": 8865500 + }, + { + "epoch": 25.66, + "learning_rate": 3.717292718367914e-05, + "loss": 2.1473, + "step": 8866000 + }, + { + "epoch": 25.66, + "learning_rate": 3.717220353603186e-05, + "loss": 2.1371, + "step": 8866500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7171479888384585e-05, + "loss": 2.1553, + "step": 8867000 + }, + { + "epoch": 25.67, + "learning_rate": 3.7170756240737314e-05, + "loss": 2.1349, + "step": 8867500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7170032593090036e-05, + "loss": 2.1482, + "step": 8868000 + }, + { + "epoch": 25.67, + "learning_rate": 3.716931184003335e-05, + "loss": 2.1318, + "step": 8868500 + }, + { + "epoch": 25.67, + "learning_rate": 3.7168588192386074e-05, + "loss": 2.1187, + "step": 8869000 + }, + { + "epoch": 25.67, + "learning_rate": 3.7167864544738796e-05, + "loss": 2.133, + "step": 8869500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716714089709152e-05, + "loss": 2.152, + "step": 8870000 + }, + { + "epoch": 25.68, + "learning_rate": 3.716641724944424e-05, + "loss": 2.1549, + "step": 8870500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716569360179696e-05, + "loss": 2.152, + "step": 8871000 + }, + { + "epoch": 25.68, + "learning_rate": 3.7164969954149685e-05, + "loss": 2.1777, + "step": 8871500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716424630650241e-05, + "loss": 2.1192, + "step": 8872000 + }, + { + "epoch": 25.68, + "learning_rate": 3.716352265885513e-05, + "loss": 2.1401, + "step": 8872500 + }, + { + "epoch": 25.68, + "learning_rate": 3.716279901120785e-05, + "loss": 2.1459, + "step": 8873000 + }, + { + "epoch": 25.69, + "learning_rate": 3.716207536356058e-05, + "loss": 2.1714, + "step": 8873500 + }, + { + "epoch": 25.69, + "learning_rate": 3.71613517159133e-05, + "loss": 2.1425, + "step": 8874000 + }, + { + "epoch": 25.69, + "learning_rate": 3.7160628068266025e-05, + "loss": 2.1509, + "step": 8874500 + }, + { + "epoch": 25.69, + "learning_rate": 3.715990586791405e-05, + "loss": 2.1224, + "step": 8875000 + }, + { + "epoch": 25.69, + "learning_rate": 3.715918222026677e-05, + "loss": 2.1457, + "step": 8875500 + }, + { + "epoch": 25.69, + "learning_rate": 3.715845857261949e-05, + "loss": 2.172, + "step": 8876000 + }, + { + "epoch": 25.69, + "learning_rate": 3.7157734924972214e-05, + "loss": 2.1284, + "step": 8876500 + }, + { + "epoch": 25.7, + "learning_rate": 3.7157011277324937e-05, + "loss": 2.1368, + "step": 8877000 + }, + { + "epoch": 25.7, + "learning_rate": 3.715628762967766e-05, + "loss": 2.1268, + "step": 8877500 + }, + { + "epoch": 25.7, + "learning_rate": 3.715556398203038e-05, + "loss": 2.1363, + "step": 8878000 + }, + { + "epoch": 25.7, + "learning_rate": 3.71548403343831e-05, + "loss": 2.1381, + "step": 8878500 + }, + { + "epoch": 25.7, + "learning_rate": 3.715411668673583e-05, + "loss": 2.1409, + "step": 8879000 + }, + { + "epoch": 25.7, + "learning_rate": 3.7153393039088554e-05, + "loss": 2.1436, + "step": 8879500 + }, + { + "epoch": 25.7, + "learning_rate": 3.715267083873657e-05, + "loss": 2.1651, + "step": 8880000 + }, + { + "epoch": 25.71, + "learning_rate": 3.715194719108929e-05, + "loss": 2.165, + "step": 8880500 + }, + { + "epoch": 25.71, + "learning_rate": 3.715122499073731e-05, + "loss": 2.1547, + "step": 8881000 + }, + { + "epoch": 25.71, + "learning_rate": 3.715050134309003e-05, + "loss": 2.1316, + "step": 8881500 + }, + { + "epoch": 25.71, + "learning_rate": 3.714977769544275e-05, + "loss": 2.1693, + "step": 8882000 + }, + { + "epoch": 25.71, + "learning_rate": 3.714905404779548e-05, + "loss": 2.1481, + "step": 8882500 + }, + { + "epoch": 25.71, + "learning_rate": 3.7148330400148204e-05, + "loss": 2.1481, + "step": 8883000 + }, + { + "epoch": 25.71, + "learning_rate": 3.7147608199796226e-05, + "loss": 2.1496, + "step": 8883500 + }, + { + "epoch": 25.72, + "learning_rate": 3.714688599944424e-05, + "loss": 2.1588, + "step": 8884000 + }, + { + "epoch": 25.72, + "learning_rate": 3.7146162351796964e-05, + "loss": 2.1363, + "step": 8884500 + }, + { + "epoch": 25.72, + "learning_rate": 3.7145438704149686e-05, + "loss": 2.1599, + "step": 8885000 + }, + { + "epoch": 25.72, + "learning_rate": 3.714471505650241e-05, + "loss": 2.1307, + "step": 8885500 + }, + { + "epoch": 25.72, + "learning_rate": 3.714399140885513e-05, + "loss": 2.1512, + "step": 8886000 + }, + { + "epoch": 25.72, + "learning_rate": 3.714326776120786e-05, + "loss": 2.1565, + "step": 8886500 + }, + { + "epoch": 25.72, + "learning_rate": 3.7142545560855875e-05, + "loss": 2.1398, + "step": 8887000 + }, + { + "epoch": 25.73, + "learning_rate": 3.71418219132086e-05, + "loss": 2.1457, + "step": 8887500 + }, + { + "epoch": 25.73, + "learning_rate": 3.714109826556132e-05, + "loss": 2.127, + "step": 8888000 + }, + { + "epoch": 25.73, + "learning_rate": 3.714037461791404e-05, + "loss": 2.1493, + "step": 8888500 + }, + { + "epoch": 25.73, + "learning_rate": 3.7139650970266764e-05, + "loss": 2.1459, + "step": 8889000 + }, + { + "epoch": 25.73, + "learning_rate": 3.7138927322619486e-05, + "loss": 2.1224, + "step": 8889500 + }, + { + "epoch": 25.73, + "learning_rate": 3.7138203674972215e-05, + "loss": 2.1247, + "step": 8890000 + }, + { + "epoch": 25.73, + "learning_rate": 3.713748292191553e-05, + "loss": 2.1521, + "step": 8890500 + }, + { + "epoch": 25.74, + "learning_rate": 3.7136760721563546e-05, + "loss": 2.1532, + "step": 8891000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713603707391627e-05, + "loss": 2.159, + "step": 8891500 + }, + { + "epoch": 25.74, + "learning_rate": 3.713531342626899e-05, + "loss": 2.1714, + "step": 8892000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713458977862171e-05, + "loss": 2.1552, + "step": 8892500 + }, + { + "epoch": 25.74, + "learning_rate": 3.7133866130974435e-05, + "loss": 2.1712, + "step": 8893000 + }, + { + "epoch": 25.74, + "learning_rate": 3.713314248332716e-05, + "loss": 2.133, + "step": 8893500 + }, + { + "epoch": 25.74, + "learning_rate": 3.713241883567988e-05, + "loss": 2.1477, + "step": 8894000 + }, + { + "epoch": 25.75, + "learning_rate": 3.713169518803261e-05, + "loss": 2.1126, + "step": 8894500 + }, + { + "epoch": 25.75, + "learning_rate": 3.713097154038533e-05, + "loss": 2.158, + "step": 8895000 + }, + { + "epoch": 25.75, + "learning_rate": 3.713024789273805e-05, + "loss": 2.1387, + "step": 8895500 + }, + { + "epoch": 25.75, + "learning_rate": 3.7129524245090775e-05, + "loss": 2.1262, + "step": 8896000 + }, + { + "epoch": 25.75, + "learning_rate": 3.71288005974435e-05, + "loss": 2.1486, + "step": 8896500 + }, + { + "epoch": 25.75, + "learning_rate": 3.712807694979622e-05, + "loss": 2.127, + "step": 8897000 + }, + { + "epoch": 25.75, + "learning_rate": 3.712735330214895e-05, + "loss": 2.1321, + "step": 8897500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712662965450167e-05, + "loss": 2.1612, + "step": 8898000 + }, + { + "epoch": 25.76, + "learning_rate": 3.712590745414969e-05, + "loss": 2.1608, + "step": 8898500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712518380650241e-05, + "loss": 2.1371, + "step": 8899000 + }, + { + "epoch": 25.76, + "learning_rate": 3.712446015885513e-05, + "loss": 2.1416, + "step": 8899500 + }, + { + "epoch": 25.76, + "learning_rate": 3.712373651120786e-05, + "loss": 2.1373, + "step": 8900000 + }, + { + "epoch": 25.76, + "learning_rate": 3.712301286356058e-05, + "loss": 2.1311, + "step": 8900500 + }, + { + "epoch": 25.76, + "learning_rate": 3.7122289215913305e-05, + "loss": 2.1498, + "step": 8901000 + }, + { + "epoch": 25.77, + "learning_rate": 3.712156556826603e-05, + "loss": 2.1789, + "step": 8901500 + }, + { + "epoch": 25.77, + "learning_rate": 3.712084192061875e-05, + "loss": 2.1502, + "step": 8902000 + }, + { + "epoch": 25.77, + "learning_rate": 3.712011827297147e-05, + "loss": 2.1536, + "step": 8902500 + }, + { + "epoch": 25.77, + "learning_rate": 3.7119394625324194e-05, + "loss": 2.1377, + "step": 8903000 + }, + { + "epoch": 25.77, + "learning_rate": 3.7118670977676916e-05, + "loss": 2.1456, + "step": 8903500 + }, + { + "epoch": 25.77, + "learning_rate": 3.711794733002964e-05, + "loss": 2.1476, + "step": 8904000 + }, + { + "epoch": 25.77, + "learning_rate": 3.711722368238237e-05, + "loss": 2.1421, + "step": 8904500 + }, + { + "epoch": 25.78, + "learning_rate": 3.711650003473509e-05, + "loss": 2.1247, + "step": 8905000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711577638708781e-05, + "loss": 2.1566, + "step": 8905500 + }, + { + "epoch": 25.78, + "learning_rate": 3.7115052739440534e-05, + "loss": 2.1444, + "step": 8906000 + }, + { + "epoch": 25.78, + "learning_rate": 3.711432909179326e-05, + "loss": 2.1558, + "step": 8906500 + }, + { + "epoch": 25.78, + "learning_rate": 3.711360689144128e-05, + "loss": 2.1424, + "step": 8907000 + }, + { + "epoch": 25.78, + "learning_rate": 3.7112883243794e-05, + "loss": 2.1381, + "step": 8907500 + }, + { + "epoch": 25.79, + "learning_rate": 3.711215959614672e-05, + "loss": 2.1233, + "step": 8908000 + }, + { + "epoch": 25.79, + "learning_rate": 3.711143739579474e-05, + "loss": 2.136, + "step": 8908500 + }, + { + "epoch": 25.79, + "learning_rate": 3.711071374814746e-05, + "loss": 2.137, + "step": 8909000 + }, + { + "epoch": 25.79, + "learning_rate": 3.710999010050018e-05, + "loss": 2.1273, + "step": 8909500 + }, + { + "epoch": 25.79, + "learning_rate": 3.710926645285291e-05, + "loss": 2.1615, + "step": 8910000 + }, + { + "epoch": 25.79, + "learning_rate": 3.7108542805205634e-05, + "loss": 2.1267, + "step": 8910500 + }, + { + "epoch": 25.79, + "learning_rate": 3.7107819157558356e-05, + "loss": 2.1354, + "step": 8911000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710709550991108e-05, + "loss": 2.1571, + "step": 8911500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710637186226381e-05, + "loss": 2.1581, + "step": 8912000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710564821461653e-05, + "loss": 2.1469, + "step": 8912500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710492456696925e-05, + "loss": 2.1563, + "step": 8913000 + }, + { + "epoch": 25.8, + "learning_rate": 3.7104200919321974e-05, + "loss": 2.1527, + "step": 8913500 + }, + { + "epoch": 25.8, + "learning_rate": 3.710347871896999e-05, + "loss": 2.1488, + "step": 8914000 + }, + { + "epoch": 25.8, + "learning_rate": 3.710275651861801e-05, + "loss": 2.1524, + "step": 8914500 + }, + { + "epoch": 25.81, + "learning_rate": 3.710203431826603e-05, + "loss": 2.1635, + "step": 8915000 + }, + { + "epoch": 25.81, + "learning_rate": 3.710131067061875e-05, + "loss": 2.1471, + "step": 8915500 + }, + { + "epoch": 25.81, + "learning_rate": 3.710058702297147e-05, + "loss": 2.1429, + "step": 8916000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709986482261949e-05, + "loss": 2.1334, + "step": 8916500 + }, + { + "epoch": 25.81, + "learning_rate": 3.709914117497221e-05, + "loss": 2.1579, + "step": 8917000 + }, + { + "epoch": 25.81, + "learning_rate": 3.709841752732494e-05, + "loss": 2.1411, + "step": 8917500 + }, + { + "epoch": 25.81, + "learning_rate": 3.7097695326972954e-05, + "loss": 2.1445, + "step": 8918000 + }, + { + "epoch": 25.82, + "learning_rate": 3.709697167932568e-05, + "loss": 2.1207, + "step": 8918500 + }, + { + "epoch": 25.82, + "learning_rate": 3.70962480316784e-05, + "loss": 2.1472, + "step": 8919000 + }, + { + "epoch": 25.82, + "learning_rate": 3.709552438403112e-05, + "loss": 2.1468, + "step": 8919500 + }, + { + "epoch": 25.82, + "learning_rate": 3.709480073638385e-05, + "loss": 2.1532, + "step": 8920000 + }, + { + "epoch": 25.82, + "learning_rate": 3.709407708873657e-05, + "loss": 2.1575, + "step": 8920500 + }, + { + "epoch": 25.82, + "learning_rate": 3.7093353441089295e-05, + "loss": 2.1434, + "step": 8921000 + }, + { + "epoch": 25.82, + "learning_rate": 3.709262979344202e-05, + "loss": 2.1485, + "step": 8921500 + }, + { + "epoch": 25.83, + "learning_rate": 3.709190614579474e-05, + "loss": 2.167, + "step": 8922000 + }, + { + "epoch": 25.83, + "learning_rate": 3.709118394544276e-05, + "loss": 2.1712, + "step": 8922500 + }, + { + "epoch": 25.83, + "learning_rate": 3.7090460297795484e-05, + "loss": 2.1275, + "step": 8923000 + }, + { + "epoch": 25.83, + "learning_rate": 3.7089736650148206e-05, + "loss": 2.1571, + "step": 8923500 + }, + { + "epoch": 25.83, + "learning_rate": 3.708901300250093e-05, + "loss": 2.1436, + "step": 8924000 + }, + { + "epoch": 25.83, + "learning_rate": 3.708828935485365e-05, + "loss": 2.157, + "step": 8924500 + }, + { + "epoch": 25.83, + "learning_rate": 3.708756570720637e-05, + "loss": 2.141, + "step": 8925000 + }, + { + "epoch": 25.84, + "learning_rate": 3.7086842059559095e-05, + "loss": 2.1461, + "step": 8925500 + }, + { + "epoch": 25.84, + "learning_rate": 3.708611841191182e-05, + "loss": 2.1437, + "step": 8926000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708539476426454e-05, + "loss": 2.1508, + "step": 8926500 + }, + { + "epoch": 25.84, + "learning_rate": 3.708467111661727e-05, + "loss": 2.1196, + "step": 8927000 + }, + { + "epoch": 25.84, + "learning_rate": 3.708394891626529e-05, + "loss": 2.1668, + "step": 8927500 + }, + { + "epoch": 25.84, + "learning_rate": 3.708322526861801e-05, + "loss": 2.1362, + "step": 8928000 + }, + { + "epoch": 25.84, + "learning_rate": 3.7082501620970735e-05, + "loss": 2.133, + "step": 8928500 + }, + { + "epoch": 25.85, + "learning_rate": 3.708177797332346e-05, + "loss": 2.1493, + "step": 8929000 + }, + { + "epoch": 25.85, + "learning_rate": 3.708105432567618e-05, + "loss": 2.1433, + "step": 8929500 + }, + { + "epoch": 25.85, + "learning_rate": 3.70803306780289e-05, + "loss": 2.1386, + "step": 8930000 + }, + { + "epoch": 25.85, + "learning_rate": 3.7079607030381624e-05, + "loss": 2.128, + "step": 8930500 + }, + { + "epoch": 25.85, + "learning_rate": 3.7078883382734346e-05, + "loss": 2.1333, + "step": 8931000 + }, + { + "epoch": 25.85, + "learning_rate": 3.707815973508707e-05, + "loss": 2.1296, + "step": 8931500 + }, + { + "epoch": 25.85, + "learning_rate": 3.707743608743979e-05, + "loss": 2.1245, + "step": 8932000 + }, + { + "epoch": 25.86, + "learning_rate": 3.707671243979251e-05, + "loss": 2.1415, + "step": 8932500 + }, + { + "epoch": 25.86, + "learning_rate": 3.7075990239440535e-05, + "loss": 2.1393, + "step": 8933000 + }, + { + "epoch": 25.86, + "learning_rate": 3.707526803908855e-05, + "loss": 2.1125, + "step": 8933500 + }, + { + "epoch": 25.86, + "learning_rate": 3.7074545838736566e-05, + "loss": 2.1401, + "step": 8934000 + }, + { + "epoch": 25.86, + "learning_rate": 3.707382363838459e-05, + "loss": 2.1541, + "step": 8934500 + }, + { + "epoch": 25.86, + "learning_rate": 3.707309999073732e-05, + "loss": 2.1542, + "step": 8935000 + }, + { + "epoch": 25.86, + "learning_rate": 3.707237634309004e-05, + "loss": 2.1553, + "step": 8935500 + }, + { + "epoch": 25.87, + "learning_rate": 3.707165269544276e-05, + "loss": 2.1308, + "step": 8936000 + }, + { + "epoch": 25.87, + "learning_rate": 3.7070929047795484e-05, + "loss": 2.1555, + "step": 8936500 + }, + { + "epoch": 25.87, + "learning_rate": 3.707020540014821e-05, + "loss": 2.1467, + "step": 8937000 + }, + { + "epoch": 25.87, + "learning_rate": 3.706948175250093e-05, + "loss": 2.122, + "step": 8937500 + }, + { + "epoch": 25.87, + "learning_rate": 3.706875810485365e-05, + "loss": 2.145, + "step": 8938000 + }, + { + "epoch": 25.87, + "learning_rate": 3.7068034457206373e-05, + "loss": 2.1369, + "step": 8938500 + }, + { + "epoch": 25.87, + "learning_rate": 3.7067310809559096e-05, + "loss": 2.158, + "step": 8939000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706658716191182e-05, + "loss": 2.157, + "step": 8939500 + }, + { + "epoch": 25.88, + "learning_rate": 3.706586351426454e-05, + "loss": 2.1417, + "step": 8940000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706513986661726e-05, + "loss": 2.1469, + "step": 8940500 + }, + { + "epoch": 25.88, + "learning_rate": 3.7064417666265285e-05, + "loss": 2.1528, + "step": 8941000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706369401861801e-05, + "loss": 2.1294, + "step": 8941500 + }, + { + "epoch": 25.88, + "learning_rate": 3.7062970370970736e-05, + "loss": 2.1534, + "step": 8942000 + }, + { + "epoch": 25.88, + "learning_rate": 3.706224672332346e-05, + "loss": 2.1097, + "step": 8942500 + }, + { + "epoch": 25.89, + "learning_rate": 3.706152307567618e-05, + "loss": 2.1597, + "step": 8943000 + }, + { + "epoch": 25.89, + "learning_rate": 3.70607994280289e-05, + "loss": 2.1183, + "step": 8943500 + }, + { + "epoch": 25.89, + "learning_rate": 3.7060075780381625e-05, + "loss": 2.1356, + "step": 8944000 + }, + { + "epoch": 25.89, + "learning_rate": 3.705935213273435e-05, + "loss": 2.1415, + "step": 8944500 + }, + { + "epoch": 25.89, + "learning_rate": 3.705862848508707e-05, + "loss": 2.1458, + "step": 8945000 + }, + { + "epoch": 25.89, + "learning_rate": 3.705790483743979e-05, + "loss": 2.1247, + "step": 8945500 + }, + { + "epoch": 25.9, + "learning_rate": 3.7057182637087814e-05, + "loss": 2.1485, + "step": 8946000 + }, + { + "epoch": 25.9, + "learning_rate": 3.7056458989440536e-05, + "loss": 2.1638, + "step": 8946500 + }, + { + "epoch": 25.9, + "learning_rate": 3.705573534179326e-05, + "loss": 2.133, + "step": 8947000 + }, + { + "epoch": 25.9, + "learning_rate": 3.705501169414598e-05, + "loss": 2.1538, + "step": 8947500 + }, + { + "epoch": 25.9, + "learning_rate": 3.70542880464987e-05, + "loss": 2.1604, + "step": 8948000 + }, + { + "epoch": 25.9, + "learning_rate": 3.7053564398851425e-05, + "loss": 2.1223, + "step": 8948500 + }, + { + "epoch": 25.9, + "learning_rate": 3.7052840751204154e-05, + "loss": 2.1594, + "step": 8949000 + }, + { + "epoch": 25.91, + "learning_rate": 3.7052117103556876e-05, + "loss": 2.1586, + "step": 8949500 + }, + { + "epoch": 25.91, + "learning_rate": 3.70513934559096e-05, + "loss": 2.1465, + "step": 8950000 + }, + { + "epoch": 25.91, + "learning_rate": 3.7050671255557614e-05, + "loss": 2.1271, + "step": 8950500 + }, + { + "epoch": 25.91, + "learning_rate": 3.704994760791034e-05, + "loss": 2.1594, + "step": 8951000 + }, + { + "epoch": 25.91, + "learning_rate": 3.7049223960263065e-05, + "loss": 2.1451, + "step": 8951500 + }, + { + "epoch": 25.91, + "learning_rate": 3.704850031261579e-05, + "loss": 2.1443, + "step": 8952000 + }, + { + "epoch": 25.91, + "learning_rate": 3.704777666496851e-05, + "loss": 2.1507, + "step": 8952500 + }, + { + "epoch": 25.92, + "learning_rate": 3.704705301732123e-05, + "loss": 2.156, + "step": 8953000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704633226426454e-05, + "loss": 2.1362, + "step": 8953500 + }, + { + "epoch": 25.92, + "learning_rate": 3.704560861661727e-05, + "loss": 2.1435, + "step": 8954000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704488496896999e-05, + "loss": 2.142, + "step": 8954500 + }, + { + "epoch": 25.92, + "learning_rate": 3.7044161321322714e-05, + "loss": 2.1775, + "step": 8955000 + }, + { + "epoch": 25.92, + "learning_rate": 3.704343767367544e-05, + "loss": 2.1431, + "step": 8955500 + }, + { + "epoch": 25.92, + "learning_rate": 3.704271402602816e-05, + "loss": 2.125, + "step": 8956000 + }, + { + "epoch": 25.93, + "learning_rate": 3.704199037838088e-05, + "loss": 2.1473, + "step": 8956500 + }, + { + "epoch": 25.93, + "learning_rate": 3.7041268178028903e-05, + "loss": 2.1452, + "step": 8957000 + }, + { + "epoch": 25.93, + "learning_rate": 3.7040544530381626e-05, + "loss": 2.1516, + "step": 8957500 + }, + { + "epoch": 25.93, + "learning_rate": 3.703982088273435e-05, + "loss": 2.1366, + "step": 8958000 + }, + { + "epoch": 25.93, + "learning_rate": 3.703909868238237e-05, + "loss": 2.1659, + "step": 8958500 + }, + { + "epoch": 25.93, + "learning_rate": 3.703837503473509e-05, + "loss": 2.1461, + "step": 8959000 + }, + { + "epoch": 25.93, + "learning_rate": 3.7037651387087815e-05, + "loss": 2.1582, + "step": 8959500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703692773944054e-05, + "loss": 2.1329, + "step": 8960000 + }, + { + "epoch": 25.94, + "learning_rate": 3.703620409179326e-05, + "loss": 2.1535, + "step": 8960500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703548044414598e-05, + "loss": 2.1443, + "step": 8961000 + }, + { + "epoch": 25.94, + "learning_rate": 3.7034756796498704e-05, + "loss": 2.1299, + "step": 8961500 + }, + { + "epoch": 25.94, + "learning_rate": 3.7034033148851426e-05, + "loss": 2.1598, + "step": 8962000 + }, + { + "epoch": 25.94, + "learning_rate": 3.703330950120415e-05, + "loss": 2.1196, + "step": 8962500 + }, + { + "epoch": 25.94, + "learning_rate": 3.703258585355687e-05, + "loss": 2.1332, + "step": 8963000 + }, + { + "epoch": 25.95, + "learning_rate": 3.703186220590959e-05, + "loss": 2.1577, + "step": 8963500 + }, + { + "epoch": 25.95, + "learning_rate": 3.703113855826232e-05, + "loss": 2.1334, + "step": 8964000 + }, + { + "epoch": 25.95, + "learning_rate": 3.7030416357910344e-05, + "loss": 2.1257, + "step": 8964500 + }, + { + "epoch": 25.95, + "learning_rate": 3.7029692710263066e-05, + "loss": 2.143, + "step": 8965000 + }, + { + "epoch": 25.95, + "learning_rate": 3.702897050991108e-05, + "loss": 2.152, + "step": 8965500 + }, + { + "epoch": 25.95, + "learning_rate": 3.7028246862263804e-05, + "loss": 2.1524, + "step": 8966000 + }, + { + "epoch": 25.95, + "learning_rate": 3.7027523214616526e-05, + "loss": 2.1357, + "step": 8966500 + }, + { + "epoch": 25.96, + "learning_rate": 3.702679956696925e-05, + "loss": 2.1274, + "step": 8967000 + }, + { + "epoch": 25.96, + "learning_rate": 3.702607591932197e-05, + "loss": 2.1353, + "step": 8967500 + }, + { + "epoch": 25.96, + "learning_rate": 3.702535371896999e-05, + "loss": 2.1502, + "step": 8968000 + }, + { + "epoch": 25.96, + "learning_rate": 3.7024630071322715e-05, + "loss": 2.1368, + "step": 8968500 + }, + { + "epoch": 25.96, + "learning_rate": 3.702390642367544e-05, + "loss": 2.1393, + "step": 8969000 + }, + { + "epoch": 25.96, + "learning_rate": 3.702318277602816e-05, + "loss": 2.1497, + "step": 8969500 + }, + { + "epoch": 25.96, + "learning_rate": 3.702245912838088e-05, + "loss": 2.1291, + "step": 8970000 + }, + { + "epoch": 25.97, + "learning_rate": 3.7021735480733604e-05, + "loss": 2.163, + "step": 8970500 + }, + { + "epoch": 25.97, + "learning_rate": 3.702101328038162e-05, + "loss": 2.1246, + "step": 8971000 + }, + { + "epoch": 25.97, + "learning_rate": 3.702029108002964e-05, + "loss": 2.1488, + "step": 8971500 + }, + { + "epoch": 25.97, + "learning_rate": 3.701956743238237e-05, + "loss": 2.1501, + "step": 8972000 + }, + { + "epoch": 25.97, + "learning_rate": 3.701884378473509e-05, + "loss": 2.1442, + "step": 8972500 + }, + { + "epoch": 25.97, + "learning_rate": 3.7018120137087815e-05, + "loss": 2.137, + "step": 8973000 + }, + { + "epoch": 25.97, + "learning_rate": 3.701739648944054e-05, + "loss": 2.1569, + "step": 8973500 + }, + { + "epoch": 25.98, + "learning_rate": 3.701667284179326e-05, + "loss": 2.1253, + "step": 8974000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701594919414598e-05, + "loss": 2.131, + "step": 8974500 + }, + { + "epoch": 25.98, + "learning_rate": 3.7015225546498704e-05, + "loss": 2.1437, + "step": 8975000 + }, + { + "epoch": 25.98, + "learning_rate": 3.701450189885143e-05, + "loss": 2.1385, + "step": 8975500 + }, + { + "epoch": 25.98, + "learning_rate": 3.701377969849944e-05, + "loss": 2.1423, + "step": 8976000 + }, + { + "epoch": 25.98, + "learning_rate": 3.7013057498147465e-05, + "loss": 2.1507, + "step": 8976500 + }, + { + "epoch": 25.98, + "learning_rate": 3.701233385050019e-05, + "loss": 2.1345, + "step": 8977000 + }, + { + "epoch": 25.99, + "learning_rate": 3.701161020285291e-05, + "loss": 2.1249, + "step": 8977500 + }, + { + "epoch": 25.99, + "learning_rate": 3.701088655520563e-05, + "loss": 2.1573, + "step": 8978000 + }, + { + "epoch": 25.99, + "learning_rate": 3.701016435485365e-05, + "loss": 2.1351, + "step": 8978500 + }, + { + "epoch": 25.99, + "learning_rate": 3.700944070720637e-05, + "loss": 2.1653, + "step": 8979000 + }, + { + "epoch": 25.99, + "learning_rate": 3.70087170595591e-05, + "loss": 2.1664, + "step": 8979500 + }, + { + "epoch": 25.99, + "learning_rate": 3.700799341191182e-05, + "loss": 2.1596, + "step": 8980000 + }, + { + "epoch": 25.99, + "learning_rate": 3.700726976426455e-05, + "loss": 2.1665, + "step": 8980500 + }, + { + "epoch": 26.0, + "learning_rate": 3.700654611661727e-05, + "loss": 2.1455, + "step": 8981000 + }, + { + "epoch": 26.0, + "learning_rate": 3.7005822468969994e-05, + "loss": 2.1675, + "step": 8981500 + }, + { + "epoch": 26.0, + "learning_rate": 3.7005098821322716e-05, + "loss": 2.1458, + "step": 8982000 + }, + { + "epoch": 26.0, + "eval_accuracy": 0.6646410012811101, + "eval_accuracy_mlm": 0.6288016755078222, + "eval_accuracy_nsp": 0.8570210776697694, + "eval_loss": 2.196295738220215, + "eval_runtime": 331.5653, + "eval_samples_per_second": 1316.139, + "eval_steps_per_second": 54.84, + "step": 8982272 + }, + { + "epoch": 26.0, + "learning_rate": 3.700437517367544e-05, + "loss": 2.115, + "step": 8982500 + }, + { + "epoch": 26.0, + "learning_rate": 3.700365152602816e-05, + "loss": 2.1196, + "step": 8983000 + }, + { + "epoch": 26.0, + "learning_rate": 3.700292787838088e-05, + "loss": 2.1178, + "step": 8983500 + }, + { + "epoch": 26.01, + "learning_rate": 3.70022056780289e-05, + "loss": 2.1168, + "step": 8984000 + }, + { + "epoch": 26.01, + "learning_rate": 3.700148203038162e-05, + "loss": 2.1063, + "step": 8984500 + }, + { + "epoch": 26.01, + "learning_rate": 3.700075838273435e-05, + "loss": 2.1416, + "step": 8985000 + }, + { + "epoch": 26.01, + "learning_rate": 3.700003473508707e-05, + "loss": 2.1263, + "step": 8985500 + }, + { + "epoch": 26.01, + "learning_rate": 3.6999311087439794e-05, + "loss": 2.1203, + "step": 8986000 + }, + { + "epoch": 26.01, + "learning_rate": 3.699858888708781e-05, + "loss": 2.1251, + "step": 8986500 + }, + { + "epoch": 26.01, + "learning_rate": 3.699786523944054e-05, + "loss": 2.1247, + "step": 8987000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699714159179326e-05, + "loss": 2.1123, + "step": 8987500 + }, + { + "epoch": 26.02, + "learning_rate": 3.699641794414598e-05, + "loss": 2.1132, + "step": 8988000 + }, + { + "epoch": 26.02, + "learning_rate": 3.6995694296498705e-05, + "loss": 2.1001, + "step": 8988500 + }, + { + "epoch": 26.02, + "learning_rate": 3.699497064885143e-05, + "loss": 2.1507, + "step": 8989000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699424844849945e-05, + "loss": 2.1417, + "step": 8989500 + }, + { + "epoch": 26.02, + "learning_rate": 3.6993526248147465e-05, + "loss": 2.1089, + "step": 8990000 + }, + { + "epoch": 26.02, + "learning_rate": 3.699280260050019e-05, + "loss": 2.117, + "step": 8990500 + }, + { + "epoch": 26.03, + "learning_rate": 3.699207895285291e-05, + "loss": 2.1257, + "step": 8991000 + }, + { + "epoch": 26.03, + "learning_rate": 3.699135530520563e-05, + "loss": 2.1017, + "step": 8991500 + }, + { + "epoch": 26.03, + "learning_rate": 3.6990631657558354e-05, + "loss": 2.1304, + "step": 8992000 + }, + { + "epoch": 26.03, + "learning_rate": 3.6989908009911077e-05, + "loss": 2.1561, + "step": 8992500 + }, + { + "epoch": 26.03, + "learning_rate": 3.69891858095591e-05, + "loss": 2.1537, + "step": 8993000 + }, + { + "epoch": 26.03, + "learning_rate": 3.698846216191182e-05, + "loss": 2.1263, + "step": 8993500 + }, + { + "epoch": 26.03, + "learning_rate": 3.698774140885513e-05, + "loss": 2.1262, + "step": 8994000 + }, + { + "epoch": 26.04, + "learning_rate": 3.698701776120785e-05, + "loss": 2.1102, + "step": 8994500 + }, + { + "epoch": 26.04, + "learning_rate": 3.698629411356058e-05, + "loss": 2.1301, + "step": 8995000 + }, + { + "epoch": 26.04, + "learning_rate": 3.6985570465913303e-05, + "loss": 2.1059, + "step": 8995500 + }, + { + "epoch": 26.04, + "learning_rate": 3.6984846818266026e-05, + "loss": 2.1171, + "step": 8996000 + }, + { + "epoch": 26.04, + "learning_rate": 3.698412317061875e-05, + "loss": 2.1184, + "step": 8996500 + }, + { + "epoch": 26.04, + "learning_rate": 3.698339952297148e-05, + "loss": 2.1239, + "step": 8997000 + }, + { + "epoch": 26.04, + "learning_rate": 3.69826758753242e-05, + "loss": 2.1337, + "step": 8997500 + }, + { + "epoch": 26.05, + "learning_rate": 3.698195222767692e-05, + "loss": 2.1337, + "step": 8998000 + }, + { + "epoch": 26.05, + "learning_rate": 3.6981228580029644e-05, + "loss": 2.1263, + "step": 8998500 + }, + { + "epoch": 26.05, + "learning_rate": 3.6980504932382366e-05, + "loss": 2.1372, + "step": 8999000 + }, + { + "epoch": 26.05, + "learning_rate": 3.697978128473509e-05, + "loss": 2.1075, + "step": 8999500 + }, + { + "epoch": 26.05, + "learning_rate": 3.697905763708781e-05, + "loss": 2.1373, + "step": 9000000 + }, + { + "epoch": 26.05, + "learning_rate": 3.697833398944053e-05, + "loss": 2.1132, + "step": 9000500 + }, + { + "epoch": 26.05, + "learning_rate": 3.6977610341793255e-05, + "loss": 2.1306, + "step": 9001000 + }, + { + "epoch": 26.06, + "learning_rate": 3.697688669414598e-05, + "loss": 2.1407, + "step": 9001500 + }, + { + "epoch": 26.06, + "learning_rate": 3.6976164493794e-05, + "loss": 2.162, + "step": 9002000 + }, + { + "epoch": 26.06, + "learning_rate": 3.697544229344202e-05, + "loss": 2.1394, + "step": 9002500 + }, + { + "epoch": 26.06, + "learning_rate": 3.6974718645794744e-05, + "loss": 2.1344, + "step": 9003000 + }, + { + "epoch": 26.06, + "learning_rate": 3.6973994998147466e-05, + "loss": 2.1266, + "step": 9003500 + }, + { + "epoch": 26.06, + "learning_rate": 3.697327135050019e-05, + "loss": 2.1385, + "step": 9004000 + }, + { + "epoch": 26.06, + "learning_rate": 3.697254770285291e-05, + "loss": 2.152, + "step": 9004500 + }, + { + "epoch": 26.07, + "learning_rate": 3.697182405520563e-05, + "loss": 2.137, + "step": 9005000 + }, + { + "epoch": 26.07, + "learning_rate": 3.6971100407558355e-05, + "loss": 2.1074, + "step": 9005500 + }, + { + "epoch": 26.07, + "learning_rate": 3.697037675991108e-05, + "loss": 2.1188, + "step": 9006000 + }, + { + "epoch": 26.07, + "learning_rate": 3.69696531122638e-05, + "loss": 2.1222, + "step": 9006500 + }, + { + "epoch": 26.07, + "learning_rate": 3.696893091191182e-05, + "loss": 2.1264, + "step": 9007000 + }, + { + "epoch": 26.07, + "learning_rate": 3.696820871155984e-05, + "loss": 2.1113, + "step": 9007500 + }, + { + "epoch": 26.07, + "learning_rate": 3.696748506391256e-05, + "loss": 2.1249, + "step": 9008000 + }, + { + "epoch": 26.08, + "learning_rate": 3.696676141626528e-05, + "loss": 2.1115, + "step": 9008500 + }, + { + "epoch": 26.08, + "learning_rate": 3.6966037768618004e-05, + "loss": 2.1572, + "step": 9009000 + }, + { + "epoch": 26.08, + "learning_rate": 3.696531412097073e-05, + "loss": 2.121, + "step": 9009500 + }, + { + "epoch": 26.08, + "learning_rate": 3.696459192061875e-05, + "loss": 2.1272, + "step": 9010000 + }, + { + "epoch": 26.08, + "learning_rate": 3.696386827297148e-05, + "loss": 2.1275, + "step": 9010500 + }, + { + "epoch": 26.08, + "learning_rate": 3.69631446253242e-05, + "loss": 2.1338, + "step": 9011000 + }, + { + "epoch": 26.08, + "learning_rate": 3.696242097767692e-05, + "loss": 2.1312, + "step": 9011500 + }, + { + "epoch": 26.09, + "learning_rate": 3.6961697330029644e-05, + "loss": 2.1388, + "step": 9012000 + }, + { + "epoch": 26.09, + "learning_rate": 3.696097368238237e-05, + "loss": 2.1167, + "step": 9012500 + }, + { + "epoch": 26.09, + "learning_rate": 3.696025003473509e-05, + "loss": 2.1326, + "step": 9013000 + }, + { + "epoch": 26.09, + "learning_rate": 3.695952638708781e-05, + "loss": 2.1263, + "step": 9013500 + }, + { + "epoch": 26.09, + "learning_rate": 3.695880418673583e-05, + "loss": 2.1322, + "step": 9014000 + }, + { + "epoch": 26.09, + "learning_rate": 3.695808053908855e-05, + "loss": 2.1408, + "step": 9014500 + }, + { + "epoch": 26.09, + "learning_rate": 3.695735689144128e-05, + "loss": 2.1419, + "step": 9015000 + }, + { + "epoch": 26.1, + "learning_rate": 3.6956634691089293e-05, + "loss": 2.1355, + "step": 9015500 + }, + { + "epoch": 26.1, + "learning_rate": 3.6955911043442016e-05, + "loss": 2.1144, + "step": 9016000 + }, + { + "epoch": 26.1, + "learning_rate": 3.695518739579474e-05, + "loss": 2.1168, + "step": 9016500 + }, + { + "epoch": 26.1, + "learning_rate": 3.6954465195442753e-05, + "loss": 2.1385, + "step": 9017000 + }, + { + "epoch": 26.1, + "learning_rate": 3.695374154779548e-05, + "loss": 2.1045, + "step": 9017500 + }, + { + "epoch": 26.1, + "learning_rate": 3.6953017900148205e-05, + "loss": 2.1059, + "step": 9018000 + }, + { + "epoch": 26.1, + "learning_rate": 3.695229425250093e-05, + "loss": 2.1003, + "step": 9018500 + }, + { + "epoch": 26.11, + "learning_rate": 3.695157060485365e-05, + "loss": 2.1116, + "step": 9019000 + }, + { + "epoch": 26.11, + "learning_rate": 3.695084695720638e-05, + "loss": 2.1263, + "step": 9019500 + }, + { + "epoch": 26.11, + "learning_rate": 3.69501233095591e-05, + "loss": 2.1269, + "step": 9020000 + }, + { + "epoch": 26.11, + "learning_rate": 3.694939966191182e-05, + "loss": 2.1252, + "step": 9020500 + }, + { + "epoch": 26.11, + "learning_rate": 3.6948676014264545e-05, + "loss": 2.132, + "step": 9021000 + }, + { + "epoch": 26.11, + "learning_rate": 3.694795236661727e-05, + "loss": 2.1331, + "step": 9021500 + }, + { + "epoch": 26.11, + "learning_rate": 3.694723016626528e-05, + "loss": 2.1381, + "step": 9022000 + }, + { + "epoch": 26.12, + "learning_rate": 3.6946506518618005e-05, + "loss": 2.118, + "step": 9022500 + }, + { + "epoch": 26.12, + "learning_rate": 3.694578287097073e-05, + "loss": 2.1239, + "step": 9023000 + }, + { + "epoch": 26.12, + "learning_rate": 3.694505922332345e-05, + "loss": 2.1418, + "step": 9023500 + }, + { + "epoch": 26.12, + "learning_rate": 3.694433557567618e-05, + "loss": 2.1505, + "step": 9024000 + }, + { + "epoch": 26.12, + "learning_rate": 3.69436119280289e-05, + "loss": 2.1188, + "step": 9024500 + }, + { + "epoch": 26.12, + "learning_rate": 3.694288972767692e-05, + "loss": 2.1366, + "step": 9025000 + }, + { + "epoch": 26.13, + "learning_rate": 3.6942166080029645e-05, + "loss": 2.1231, + "step": 9025500 + }, + { + "epoch": 26.13, + "learning_rate": 3.694144243238237e-05, + "loss": 2.1257, + "step": 9026000 + }, + { + "epoch": 26.13, + "learning_rate": 3.694071878473509e-05, + "loss": 2.1105, + "step": 9026500 + }, + { + "epoch": 26.13, + "learning_rate": 3.693999513708781e-05, + "loss": 2.1502, + "step": 9027000 + }, + { + "epoch": 26.13, + "learning_rate": 3.6939271489440534e-05, + "loss": 2.1258, + "step": 9027500 + }, + { + "epoch": 26.13, + "learning_rate": 3.6938549289088556e-05, + "loss": 2.134, + "step": 9028000 + }, + { + "epoch": 26.13, + "learning_rate": 3.693782564144128e-05, + "loss": 2.1355, + "step": 9028500 + }, + { + "epoch": 26.14, + "learning_rate": 3.6937101993794e-05, + "loss": 2.1275, + "step": 9029000 + }, + { + "epoch": 26.14, + "learning_rate": 3.693637834614672e-05, + "loss": 2.1374, + "step": 9029500 + }, + { + "epoch": 26.14, + "learning_rate": 3.6935654698499445e-05, + "loss": 2.1248, + "step": 9030000 + }, + { + "epoch": 26.14, + "learning_rate": 3.693493105085217e-05, + "loss": 2.1246, + "step": 9030500 + }, + { + "epoch": 26.14, + "learning_rate": 3.693420740320489e-05, + "loss": 2.1231, + "step": 9031000 + }, + { + "epoch": 26.14, + "learning_rate": 3.693348375555761e-05, + "loss": 2.1273, + "step": 9031500 + }, + { + "epoch": 26.14, + "learning_rate": 3.6932761555205634e-05, + "loss": 2.1419, + "step": 9032000 + }, + { + "epoch": 26.15, + "learning_rate": 3.693203790755836e-05, + "loss": 2.1537, + "step": 9032500 + }, + { + "epoch": 26.15, + "learning_rate": 3.693131425991108e-05, + "loss": 2.1194, + "step": 9033000 + }, + { + "epoch": 26.15, + "learning_rate": 3.693059061226381e-05, + "loss": 2.1415, + "step": 9033500 + }, + { + "epoch": 26.15, + "learning_rate": 3.692986696461653e-05, + "loss": 2.119, + "step": 9034000 + }, + { + "epoch": 26.15, + "learning_rate": 3.692914331696925e-05, + "loss": 2.1363, + "step": 9034500 + }, + { + "epoch": 26.15, + "learning_rate": 3.6928419669321975e-05, + "loss": 2.1507, + "step": 9035000 + }, + { + "epoch": 26.15, + "learning_rate": 3.69276960216747e-05, + "loss": 2.1318, + "step": 9035500 + }, + { + "epoch": 26.16, + "learning_rate": 3.692697237402742e-05, + "loss": 2.1336, + "step": 9036000 + }, + { + "epoch": 26.16, + "learning_rate": 3.6926250173675435e-05, + "loss": 2.1171, + "step": 9036500 + }, + { + "epoch": 26.16, + "learning_rate": 3.692552652602816e-05, + "loss": 2.1344, + "step": 9037000 + }, + { + "epoch": 26.16, + "learning_rate": 3.692480432567618e-05, + "loss": 2.1271, + "step": 9037500 + }, + { + "epoch": 26.16, + "learning_rate": 3.69240806780289e-05, + "loss": 2.1277, + "step": 9038000 + }, + { + "epoch": 26.16, + "learning_rate": 3.6923357030381624e-05, + "loss": 2.1138, + "step": 9038500 + }, + { + "epoch": 26.16, + "learning_rate": 3.6922633382734346e-05, + "loss": 2.1341, + "step": 9039000 + }, + { + "epoch": 26.17, + "learning_rate": 3.6921909735087075e-05, + "loss": 2.1369, + "step": 9039500 + }, + { + "epoch": 26.17, + "learning_rate": 3.69211860874398e-05, + "loss": 2.1049, + "step": 9040000 + }, + { + "epoch": 26.17, + "learning_rate": 3.692046243979252e-05, + "loss": 2.133, + "step": 9040500 + }, + { + "epoch": 26.17, + "learning_rate": 3.6919740239440535e-05, + "loss": 2.1326, + "step": 9041000 + }, + { + "epoch": 26.17, + "learning_rate": 3.691901659179326e-05, + "loss": 2.1246, + "step": 9041500 + }, + { + "epoch": 26.17, + "learning_rate": 3.691829294414598e-05, + "loss": 2.1609, + "step": 9042000 + }, + { + "epoch": 26.17, + "learning_rate": 3.691756929649871e-05, + "loss": 2.135, + "step": 9042500 + }, + { + "epoch": 26.18, + "learning_rate": 3.691684564885143e-05, + "loss": 2.1292, + "step": 9043000 + }, + { + "epoch": 26.18, + "learning_rate": 3.6916123448499446e-05, + "loss": 2.1372, + "step": 9043500 + }, + { + "epoch": 26.18, + "learning_rate": 3.691539980085217e-05, + "loss": 2.1319, + "step": 9044000 + }, + { + "epoch": 26.18, + "learning_rate": 3.691467615320489e-05, + "loss": 2.1338, + "step": 9044500 + }, + { + "epoch": 26.18, + "learning_rate": 3.691395250555761e-05, + "loss": 2.1113, + "step": 9045000 + }, + { + "epoch": 26.18, + "learning_rate": 3.6913228857910335e-05, + "loss": 2.1211, + "step": 9045500 + }, + { + "epoch": 26.18, + "learning_rate": 3.691250665755836e-05, + "loss": 2.1096, + "step": 9046000 + }, + { + "epoch": 26.19, + "learning_rate": 3.691178300991108e-05, + "loss": 2.1257, + "step": 9046500 + }, + { + "epoch": 26.19, + "learning_rate": 3.691105936226381e-05, + "loss": 2.1303, + "step": 9047000 + }, + { + "epoch": 26.19, + "learning_rate": 3.691033571461653e-05, + "loss": 2.1123, + "step": 9047500 + }, + { + "epoch": 26.19, + "learning_rate": 3.6909613514264546e-05, + "loss": 2.1369, + "step": 9048000 + }, + { + "epoch": 26.19, + "learning_rate": 3.690888986661727e-05, + "loss": 2.1272, + "step": 9048500 + }, + { + "epoch": 26.19, + "learning_rate": 3.690816621896999e-05, + "loss": 2.1053, + "step": 9049000 + }, + { + "epoch": 26.19, + "learning_rate": 3.690744257132271e-05, + "loss": 2.1362, + "step": 9049500 + }, + { + "epoch": 26.2, + "learning_rate": 3.6906718923675435e-05, + "loss": 2.1486, + "step": 9050000 + }, + { + "epoch": 26.2, + "learning_rate": 3.690599527602816e-05, + "loss": 2.1572, + "step": 9050500 + }, + { + "epoch": 26.2, + "learning_rate": 3.690527162838088e-05, + "loss": 2.1528, + "step": 9051000 + }, + { + "epoch": 26.2, + "learning_rate": 3.690454798073361e-05, + "loss": 2.1333, + "step": 9051500 + }, + { + "epoch": 26.2, + "learning_rate": 3.690382433308633e-05, + "loss": 2.1269, + "step": 9052000 + }, + { + "epoch": 26.2, + "learning_rate": 3.690310068543905e-05, + "loss": 2.1424, + "step": 9052500 + }, + { + "epoch": 26.2, + "learning_rate": 3.6902377037791776e-05, + "loss": 2.1244, + "step": 9053000 + }, + { + "epoch": 26.21, + "learning_rate": 3.69016533901445e-05, + "loss": 2.1345, + "step": 9053500 + }, + { + "epoch": 26.21, + "learning_rate": 3.690092974249723e-05, + "loss": 2.1417, + "step": 9054000 + }, + { + "epoch": 26.21, + "learning_rate": 3.690020609484995e-05, + "loss": 2.1199, + "step": 9054500 + }, + { + "epoch": 26.21, + "learning_rate": 3.689948244720267e-05, + "loss": 2.1327, + "step": 9055000 + }, + { + "epoch": 26.21, + "learning_rate": 3.6898758799555394e-05, + "loss": 2.1441, + "step": 9055500 + }, + { + "epoch": 26.21, + "learning_rate": 3.689803804649871e-05, + "loss": 2.1296, + "step": 9056000 + }, + { + "epoch": 26.21, + "learning_rate": 3.689731439885143e-05, + "loss": 2.1436, + "step": 9056500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689659219849945e-05, + "loss": 2.1082, + "step": 9057000 + }, + { + "epoch": 26.22, + "learning_rate": 3.689586855085217e-05, + "loss": 2.1487, + "step": 9057500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689514490320489e-05, + "loss": 2.1502, + "step": 9058000 + }, + { + "epoch": 26.22, + "learning_rate": 3.6894421255557614e-05, + "loss": 2.1373, + "step": 9058500 + }, + { + "epoch": 26.22, + "learning_rate": 3.6893697607910336e-05, + "loss": 2.1291, + "step": 9059000 + }, + { + "epoch": 26.22, + "learning_rate": 3.689297396026306e-05, + "loss": 2.1238, + "step": 9059500 + }, + { + "epoch": 26.22, + "learning_rate": 3.689225031261578e-05, + "loss": 2.1491, + "step": 9060000 + }, + { + "epoch": 26.23, + "learning_rate": 3.689152666496851e-05, + "loss": 2.1268, + "step": 9060500 + }, + { + "epoch": 26.23, + "learning_rate": 3.689080301732123e-05, + "loss": 2.1219, + "step": 9061000 + }, + { + "epoch": 26.23, + "learning_rate": 3.689007936967396e-05, + "loss": 2.134, + "step": 9061500 + }, + { + "epoch": 26.23, + "learning_rate": 3.6889357169321976e-05, + "loss": 2.1225, + "step": 9062000 + }, + { + "epoch": 26.23, + "learning_rate": 3.68886335216747e-05, + "loss": 2.1288, + "step": 9062500 + }, + { + "epoch": 26.23, + "learning_rate": 3.688790987402742e-05, + "loss": 2.1168, + "step": 9063000 + }, + { + "epoch": 26.24, + "learning_rate": 3.688718622638014e-05, + "loss": 2.1528, + "step": 9063500 + }, + { + "epoch": 26.24, + "learning_rate": 3.6886462578732865e-05, + "loss": 2.1139, + "step": 9064000 + }, + { + "epoch": 26.24, + "learning_rate": 3.688574037838089e-05, + "loss": 2.1238, + "step": 9064500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688501673073361e-05, + "loss": 2.1232, + "step": 9065000 + }, + { + "epoch": 26.24, + "learning_rate": 3.6884294530381625e-05, + "loss": 2.1337, + "step": 9065500 + }, + { + "epoch": 26.24, + "learning_rate": 3.688357233002964e-05, + "loss": 2.1341, + "step": 9066000 + }, + { + "epoch": 26.24, + "learning_rate": 3.688284868238236e-05, + "loss": 2.1463, + "step": 9066500 + }, + { + "epoch": 26.25, + "learning_rate": 3.6882125034735085e-05, + "loss": 2.1204, + "step": 9067000 + }, + { + "epoch": 26.25, + "learning_rate": 3.688140138708781e-05, + "loss": 2.1451, + "step": 9067500 + }, + { + "epoch": 26.25, + "learning_rate": 3.6880677739440537e-05, + "loss": 2.1229, + "step": 9068000 + }, + { + "epoch": 26.25, + "learning_rate": 3.687995409179326e-05, + "loss": 2.1478, + "step": 9068500 + }, + { + "epoch": 26.25, + "learning_rate": 3.687923044414598e-05, + "loss": 2.131, + "step": 9069000 + }, + { + "epoch": 26.25, + "learning_rate": 3.687850679649871e-05, + "loss": 2.1204, + "step": 9069500 + }, + { + "epoch": 26.25, + "learning_rate": 3.6877784596146726e-05, + "loss": 2.1131, + "step": 9070000 + }, + { + "epoch": 26.26, + "learning_rate": 3.687706094849945e-05, + "loss": 2.1423, + "step": 9070500 + }, + { + "epoch": 26.26, + "learning_rate": 3.687633874814746e-05, + "loss": 2.1051, + "step": 9071000 + }, + { + "epoch": 26.26, + "learning_rate": 3.6875616547795486e-05, + "loss": 2.133, + "step": 9071500 + }, + { + "epoch": 26.26, + "learning_rate": 3.687489290014821e-05, + "loss": 2.1328, + "step": 9072000 + }, + { + "epoch": 26.26, + "learning_rate": 3.687416925250093e-05, + "loss": 2.144, + "step": 9072500 + }, + { + "epoch": 26.26, + "learning_rate": 3.687344560485365e-05, + "loss": 2.1069, + "step": 9073000 + }, + { + "epoch": 26.26, + "learning_rate": 3.6872721957206375e-05, + "loss": 2.1227, + "step": 9073500 + }, + { + "epoch": 26.27, + "learning_rate": 3.68719983095591e-05, + "loss": 2.1207, + "step": 9074000 + }, + { + "epoch": 26.27, + "learning_rate": 3.687127466191182e-05, + "loss": 2.1541, + "step": 9074500 + }, + { + "epoch": 26.27, + "learning_rate": 3.687055101426454e-05, + "loss": 2.1103, + "step": 9075000 + }, + { + "epoch": 26.27, + "learning_rate": 3.6869827366617264e-05, + "loss": 2.1335, + "step": 9075500 + }, + { + "epoch": 26.27, + "learning_rate": 3.6869103718969986e-05, + "loss": 2.1551, + "step": 9076000 + }, + { + "epoch": 26.27, + "learning_rate": 3.686838007132271e-05, + "loss": 2.1337, + "step": 9076500 + }, + { + "epoch": 26.27, + "learning_rate": 3.686765642367544e-05, + "loss": 2.1507, + "step": 9077000 + }, + { + "epoch": 26.28, + "learning_rate": 3.686693277602816e-05, + "loss": 2.1285, + "step": 9077500 + }, + { + "epoch": 26.28, + "learning_rate": 3.686620912838089e-05, + "loss": 2.1198, + "step": 9078000 + }, + { + "epoch": 26.28, + "learning_rate": 3.686548548073361e-05, + "loss": 2.1481, + "step": 9078500 + }, + { + "epoch": 26.28, + "learning_rate": 3.6864763280381626e-05, + "loss": 2.1511, + "step": 9079000 + }, + { + "epoch": 26.28, + "learning_rate": 3.686403963273435e-05, + "loss": 2.1319, + "step": 9079500 + }, + { + "epoch": 26.28, + "learning_rate": 3.686331598508707e-05, + "loss": 2.1197, + "step": 9080000 + }, + { + "epoch": 26.28, + "learning_rate": 3.686259233743979e-05, + "loss": 2.1341, + "step": 9080500 + }, + { + "epoch": 26.29, + "learning_rate": 3.6861868689792515e-05, + "loss": 2.1118, + "step": 9081000 + }, + { + "epoch": 26.29, + "learning_rate": 3.686114504214524e-05, + "loss": 2.0959, + "step": 9081500 + }, + { + "epoch": 26.29, + "learning_rate": 3.686042139449796e-05, + "loss": 2.1312, + "step": 9082000 + }, + { + "epoch": 26.29, + "learning_rate": 3.685969774685069e-05, + "loss": 2.1285, + "step": 9082500 + }, + { + "epoch": 26.29, + "learning_rate": 3.685897409920341e-05, + "loss": 2.1444, + "step": 9083000 + }, + { + "epoch": 26.29, + "learning_rate": 3.685825045155613e-05, + "loss": 2.1235, + "step": 9083500 + }, + { + "epoch": 26.29, + "learning_rate": 3.685752680390886e-05, + "loss": 2.1134, + "step": 9084000 + }, + { + "epoch": 26.3, + "learning_rate": 3.6856803156261584e-05, + "loss": 2.1482, + "step": 9084500 + }, + { + "epoch": 26.3, + "learning_rate": 3.68560809559096e-05, + "loss": 2.1447, + "step": 9085000 + }, + { + "epoch": 26.3, + "learning_rate": 3.685535730826232e-05, + "loss": 2.1316, + "step": 9085500 + }, + { + "epoch": 26.3, + "learning_rate": 3.6854633660615044e-05, + "loss": 2.1416, + "step": 9086000 + }, + { + "epoch": 26.3, + "learning_rate": 3.6853910012967766e-05, + "loss": 2.123, + "step": 9086500 + }, + { + "epoch": 26.3, + "learning_rate": 3.685318636532049e-05, + "loss": 2.1418, + "step": 9087000 + }, + { + "epoch": 26.3, + "learning_rate": 3.685246271767321e-05, + "loss": 2.1396, + "step": 9087500 + }, + { + "epoch": 26.31, + "learning_rate": 3.685173907002594e-05, + "loss": 2.1622, + "step": 9088000 + }, + { + "epoch": 26.31, + "learning_rate": 3.6851016869673955e-05, + "loss": 2.1469, + "step": 9088500 + }, + { + "epoch": 26.31, + "learning_rate": 3.685029322202668e-05, + "loss": 2.1323, + "step": 9089000 + }, + { + "epoch": 26.31, + "learning_rate": 3.68495695743794e-05, + "loss": 2.1256, + "step": 9089500 + }, + { + "epoch": 26.31, + "learning_rate": 3.684884592673212e-05, + "loss": 2.1258, + "step": 9090000 + }, + { + "epoch": 26.31, + "learning_rate": 3.6848122279084844e-05, + "loss": 2.1376, + "step": 9090500 + }, + { + "epoch": 26.31, + "learning_rate": 3.684739863143757e-05, + "loss": 2.1762, + "step": 9091000 + }, + { + "epoch": 26.32, + "learning_rate": 3.684667643108559e-05, + "loss": 2.1337, + "step": 9091500 + }, + { + "epoch": 26.32, + "learning_rate": 3.684595278343831e-05, + "loss": 2.1196, + "step": 9092000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6845230583086334e-05, + "loss": 2.1242, + "step": 9092500 + }, + { + "epoch": 26.32, + "learning_rate": 3.684450838273435e-05, + "loss": 2.1347, + "step": 9093000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6843786182382365e-05, + "loss": 2.1126, + "step": 9093500 + }, + { + "epoch": 26.32, + "learning_rate": 3.684306253473509e-05, + "loss": 2.153, + "step": 9094000 + }, + { + "epoch": 26.32, + "learning_rate": 3.6842338887087816e-05, + "loss": 2.1585, + "step": 9094500 + }, + { + "epoch": 26.33, + "learning_rate": 3.684161523944054e-05, + "loss": 2.1367, + "step": 9095000 + }, + { + "epoch": 26.33, + "learning_rate": 3.684089159179326e-05, + "loss": 2.1191, + "step": 9095500 + }, + { + "epoch": 26.33, + "learning_rate": 3.684016794414598e-05, + "loss": 2.1624, + "step": 9096000 + }, + { + "epoch": 26.33, + "learning_rate": 3.6839445743794e-05, + "loss": 2.1598, + "step": 9096500 + }, + { + "epoch": 26.33, + "learning_rate": 3.683872209614672e-05, + "loss": 2.1327, + "step": 9097000 + }, + { + "epoch": 26.33, + "learning_rate": 3.683799844849944e-05, + "loss": 2.1274, + "step": 9097500 + }, + { + "epoch": 26.33, + "learning_rate": 3.6837274800852165e-05, + "loss": 2.1354, + "step": 9098000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683655115320489e-05, + "loss": 2.1448, + "step": 9098500 + }, + { + "epoch": 26.34, + "learning_rate": 3.6835827505557616e-05, + "loss": 2.1467, + "step": 9099000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683510385791034e-05, + "loss": 2.1147, + "step": 9099500 + }, + { + "epoch": 26.34, + "learning_rate": 3.683438021026307e-05, + "loss": 2.152, + "step": 9100000 + }, + { + "epoch": 26.34, + "learning_rate": 3.683365656261579e-05, + "loss": 2.137, + "step": 9100500 + }, + { + "epoch": 26.34, + "learning_rate": 3.683293291496851e-05, + "loss": 2.1129, + "step": 9101000 + }, + { + "epoch": 26.35, + "learning_rate": 3.6832209267321234e-05, + "loss": 2.1538, + "step": 9101500 + }, + { + "epoch": 26.35, + "learning_rate": 3.6831485619673956e-05, + "loss": 2.1512, + "step": 9102000 + }, + { + "epoch": 26.35, + "learning_rate": 3.683076197202668e-05, + "loss": 2.1229, + "step": 9102500 + }, + { + "epoch": 26.35, + "learning_rate": 3.68300383243794e-05, + "loss": 2.1289, + "step": 9103000 + }, + { + "epoch": 26.35, + "learning_rate": 3.682931467673212e-05, + "loss": 2.1404, + "step": 9103500 + }, + { + "epoch": 26.35, + "learning_rate": 3.682859247638014e-05, + "loss": 2.1519, + "step": 9104000 + }, + { + "epoch": 26.35, + "learning_rate": 3.682787027602816e-05, + "loss": 2.125, + "step": 9104500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682714662838088e-05, + "loss": 2.1236, + "step": 9105000 + }, + { + "epoch": 26.36, + "learning_rate": 3.6826422980733605e-05, + "loss": 2.1367, + "step": 9105500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682569933308633e-05, + "loss": 2.1129, + "step": 9106000 + }, + { + "epoch": 26.36, + "learning_rate": 3.6824975685439057e-05, + "loss": 2.136, + "step": 9106500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682425203779178e-05, + "loss": 2.1213, + "step": 9107000 + }, + { + "epoch": 26.36, + "learning_rate": 3.68235283901445e-05, + "loss": 2.1441, + "step": 9107500 + }, + { + "epoch": 26.36, + "learning_rate": 3.682280474249722e-05, + "loss": 2.1304, + "step": 9108000 + }, + { + "epoch": 26.37, + "learning_rate": 3.6822081094849945e-05, + "loss": 2.1307, + "step": 9108500 + }, + { + "epoch": 26.37, + "learning_rate": 3.682135889449797e-05, + "loss": 2.1587, + "step": 9109000 + }, + { + "epoch": 26.37, + "learning_rate": 3.682063669414598e-05, + "loss": 2.1129, + "step": 9109500 + }, + { + "epoch": 26.37, + "learning_rate": 3.6819913046498706e-05, + "loss": 2.1541, + "step": 9110000 + }, + { + "epoch": 26.37, + "learning_rate": 3.681918939885143e-05, + "loss": 2.1313, + "step": 9110500 + }, + { + "epoch": 26.37, + "learning_rate": 3.681846575120415e-05, + "loss": 2.1533, + "step": 9111000 + }, + { + "epoch": 26.37, + "learning_rate": 3.681774210355687e-05, + "loss": 2.1137, + "step": 9111500 + }, + { + "epoch": 26.38, + "learning_rate": 3.6817018455909595e-05, + "loss": 2.1638, + "step": 9112000 + }, + { + "epoch": 26.38, + "learning_rate": 3.681629480826232e-05, + "loss": 2.1494, + "step": 9112500 + }, + { + "epoch": 26.38, + "learning_rate": 3.681557116061504e-05, + "loss": 2.1222, + "step": 9113000 + }, + { + "epoch": 26.38, + "learning_rate": 3.681484751296777e-05, + "loss": 2.1166, + "step": 9113500 + }, + { + "epoch": 26.38, + "learning_rate": 3.681412386532049e-05, + "loss": 2.1556, + "step": 9114000 + }, + { + "epoch": 26.38, + "learning_rate": 3.681340021767322e-05, + "loss": 2.1594, + "step": 9114500 + }, + { + "epoch": 26.38, + "learning_rate": 3.681267657002594e-05, + "loss": 2.1309, + "step": 9115000 + }, + { + "epoch": 26.39, + "learning_rate": 3.6811952922378664e-05, + "loss": 2.1399, + "step": 9115500 + }, + { + "epoch": 26.39, + "learning_rate": 3.681123072202668e-05, + "loss": 2.1268, + "step": 9116000 + }, + { + "epoch": 26.39, + "learning_rate": 3.68105070743794e-05, + "loss": 2.1392, + "step": 9116500 + }, + { + "epoch": 26.39, + "learning_rate": 3.6809783426732124e-05, + "loss": 2.1116, + "step": 9117000 + }, + { + "epoch": 26.39, + "learning_rate": 3.6809059779084846e-05, + "loss": 2.1291, + "step": 9117500 + }, + { + "epoch": 26.39, + "learning_rate": 3.680833613143757e-05, + "loss": 2.1357, + "step": 9118000 + }, + { + "epoch": 26.39, + "learning_rate": 3.680761248379029e-05, + "loss": 2.1135, + "step": 9118500 + }, + { + "epoch": 26.4, + "learning_rate": 3.680689028343831e-05, + "loss": 2.1408, + "step": 9119000 + }, + { + "epoch": 26.4, + "learning_rate": 3.680616808308633e-05, + "loss": 2.1234, + "step": 9119500 + }, + { + "epoch": 26.4, + "learning_rate": 3.680544443543905e-05, + "loss": 2.114, + "step": 9120000 + }, + { + "epoch": 26.4, + "learning_rate": 3.680472078779177e-05, + "loss": 2.1281, + "step": 9120500 + }, + { + "epoch": 26.4, + "learning_rate": 3.6803997140144495e-05, + "loss": 2.128, + "step": 9121000 + }, + { + "epoch": 26.4, + "learning_rate": 3.6803273492497224e-05, + "loss": 2.1517, + "step": 9121500 + }, + { + "epoch": 26.4, + "learning_rate": 3.6802549844849946e-05, + "loss": 2.1459, + "step": 9122000 + }, + { + "epoch": 26.41, + "learning_rate": 3.680182619720267e-05, + "loss": 2.1253, + "step": 9122500 + }, + { + "epoch": 26.41, + "learning_rate": 3.680110254955539e-05, + "loss": 2.1332, + "step": 9123000 + }, + { + "epoch": 26.41, + "learning_rate": 3.680037890190812e-05, + "loss": 2.1396, + "step": 9123500 + }, + { + "epoch": 26.41, + "learning_rate": 3.6799656701556135e-05, + "loss": 2.1593, + "step": 9124000 + }, + { + "epoch": 26.41, + "learning_rate": 3.679893450120415e-05, + "loss": 2.1489, + "step": 9124500 + }, + { + "epoch": 26.41, + "learning_rate": 3.679821085355687e-05, + "loss": 2.1372, + "step": 9125000 + }, + { + "epoch": 26.41, + "learning_rate": 3.6797487205909595e-05, + "loss": 2.1567, + "step": 9125500 + }, + { + "epoch": 26.42, + "learning_rate": 3.679676355826232e-05, + "loss": 2.1535, + "step": 9126000 + }, + { + "epoch": 26.42, + "learning_rate": 3.679604135791034e-05, + "loss": 2.1742, + "step": 9126500 + }, + { + "epoch": 26.42, + "learning_rate": 3.6795319157558355e-05, + "loss": 2.1456, + "step": 9127000 + }, + { + "epoch": 26.42, + "learning_rate": 3.679459550991108e-05, + "loss": 2.1333, + "step": 9127500 + }, + { + "epoch": 26.42, + "learning_rate": 3.67938718622638e-05, + "loss": 2.151, + "step": 9128000 + }, + { + "epoch": 26.42, + "learning_rate": 3.679314821461652e-05, + "loss": 2.1298, + "step": 9128500 + }, + { + "epoch": 26.42, + "learning_rate": 3.6792424566969244e-05, + "loss": 2.1186, + "step": 9129000 + }, + { + "epoch": 26.43, + "learning_rate": 3.6791700919321973e-05, + "loss": 2.1293, + "step": 9129500 + }, + { + "epoch": 26.43, + "learning_rate": 3.6790977271674696e-05, + "loss": 2.1047, + "step": 9130000 + }, + { + "epoch": 26.43, + "learning_rate": 3.679025362402742e-05, + "loss": 2.1281, + "step": 9130500 + }, + { + "epoch": 26.43, + "learning_rate": 3.678953142367544e-05, + "loss": 2.1327, + "step": 9131000 + }, + { + "epoch": 26.43, + "learning_rate": 3.678880777602816e-05, + "loss": 2.1541, + "step": 9131500 + }, + { + "epoch": 26.43, + "learning_rate": 3.678808557567618e-05, + "loss": 2.1098, + "step": 9132000 + }, + { + "epoch": 26.43, + "learning_rate": 3.67873619280289e-05, + "loss": 2.1419, + "step": 9132500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678663828038162e-05, + "loss": 2.1375, + "step": 9133000 + }, + { + "epoch": 26.44, + "learning_rate": 3.6785914632734345e-05, + "loss": 2.1292, + "step": 9133500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678519098508707e-05, + "loss": 2.128, + "step": 9134000 + }, + { + "epoch": 26.44, + "learning_rate": 3.6784467337439796e-05, + "loss": 2.1289, + "step": 9134500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678374368979252e-05, + "loss": 2.1182, + "step": 9135000 + }, + { + "epoch": 26.44, + "learning_rate": 3.678302004214524e-05, + "loss": 2.1254, + "step": 9135500 + }, + { + "epoch": 26.44, + "learning_rate": 3.678229639449796e-05, + "loss": 2.1327, + "step": 9136000 + }, + { + "epoch": 26.45, + "learning_rate": 3.678157564144127e-05, + "loss": 2.1531, + "step": 9136500 + }, + { + "epoch": 26.45, + "learning_rate": 3.6780851993794e-05, + "loss": 2.1163, + "step": 9137000 + }, + { + "epoch": 26.45, + "learning_rate": 3.678012834614672e-05, + "loss": 2.1233, + "step": 9137500 + }, + { + "epoch": 26.45, + "learning_rate": 3.6779404698499445e-05, + "loss": 2.1228, + "step": 9138000 + }, + { + "epoch": 26.45, + "learning_rate": 3.6778681050852174e-05, + "loss": 2.133, + "step": 9138500 + }, + { + "epoch": 26.45, + "learning_rate": 3.6777957403204896e-05, + "loss": 2.1509, + "step": 9139000 + }, + { + "epoch": 26.46, + "learning_rate": 3.677723520285291e-05, + "loss": 2.1352, + "step": 9139500 + }, + { + "epoch": 26.46, + "learning_rate": 3.6776511555205634e-05, + "loss": 2.1335, + "step": 9140000 + }, + { + "epoch": 26.46, + "learning_rate": 3.6775787907558356e-05, + "loss": 2.1556, + "step": 9140500 + }, + { + "epoch": 26.46, + "learning_rate": 3.677506570720637e-05, + "loss": 2.167, + "step": 9141000 + }, + { + "epoch": 26.46, + "learning_rate": 3.6774343506854394e-05, + "loss": 2.1393, + "step": 9141500 + }, + { + "epoch": 26.46, + "learning_rate": 3.6773619859207116e-05, + "loss": 2.1128, + "step": 9142000 + }, + { + "epoch": 26.46, + "learning_rate": 3.677289621155984e-05, + "loss": 2.1315, + "step": 9142500 + }, + { + "epoch": 26.47, + "learning_rate": 3.677217256391256e-05, + "loss": 2.1425, + "step": 9143000 + }, + { + "epoch": 26.47, + "learning_rate": 3.677144891626528e-05, + "loss": 2.1512, + "step": 9143500 + }, + { + "epoch": 26.47, + "learning_rate": 3.6770725268618005e-05, + "loss": 2.125, + "step": 9144000 + }, + { + "epoch": 26.47, + "learning_rate": 3.6770001620970734e-05, + "loss": 2.1214, + "step": 9144500 + }, + { + "epoch": 26.47, + "learning_rate": 3.6769277973323457e-05, + "loss": 2.1471, + "step": 9145000 + }, + { + "epoch": 26.47, + "learning_rate": 3.676855432567618e-05, + "loss": 2.1413, + "step": 9145500 + }, + { + "epoch": 26.47, + "learning_rate": 3.67678306780289e-05, + "loss": 2.1458, + "step": 9146000 + }, + { + "epoch": 26.48, + "learning_rate": 3.676710703038162e-05, + "loss": 2.1184, + "step": 9146500 + }, + { + "epoch": 26.48, + "learning_rate": 3.6766383382734345e-05, + "loss": 2.1495, + "step": 9147000 + }, + { + "epoch": 26.48, + "learning_rate": 3.6765659735087075e-05, + "loss": 2.141, + "step": 9147500 + }, + { + "epoch": 26.48, + "learning_rate": 3.67649360874398e-05, + "loss": 2.1262, + "step": 9148000 + }, + { + "epoch": 26.48, + "learning_rate": 3.676421243979252e-05, + "loss": 2.1389, + "step": 9148500 + }, + { + "epoch": 26.48, + "learning_rate": 3.676348879214524e-05, + "loss": 2.1343, + "step": 9149000 + }, + { + "epoch": 26.48, + "learning_rate": 3.6762765144497963e-05, + "loss": 2.1418, + "step": 9149500 + }, + { + "epoch": 26.49, + "learning_rate": 3.6762041496850686e-05, + "loss": 2.1259, + "step": 9150000 + }, + { + "epoch": 26.49, + "learning_rate": 3.676131784920341e-05, + "loss": 2.1004, + "step": 9150500 + }, + { + "epoch": 26.49, + "learning_rate": 3.6760595648851423e-05, + "loss": 2.1433, + "step": 9151000 + }, + { + "epoch": 26.49, + "learning_rate": 3.675987200120415e-05, + "loss": 2.1264, + "step": 9151500 + }, + { + "epoch": 26.49, + "learning_rate": 3.6759148353556875e-05, + "loss": 2.1295, + "step": 9152000 + }, + { + "epoch": 26.49, + "learning_rate": 3.67584261532049e-05, + "loss": 2.1401, + "step": 9152500 + }, + { + "epoch": 26.49, + "learning_rate": 3.675770250555762e-05, + "loss": 2.1281, + "step": 9153000 + }, + { + "epoch": 26.5, + "learning_rate": 3.675697885791034e-05, + "loss": 2.1347, + "step": 9153500 + }, + { + "epoch": 26.5, + "learning_rate": 3.6756255210263064e-05, + "loss": 2.1333, + "step": 9154000 + }, + { + "epoch": 26.5, + "learning_rate": 3.6755531562615786e-05, + "loss": 2.1514, + "step": 9154500 + }, + { + "epoch": 26.5, + "learning_rate": 3.675480791496851e-05, + "loss": 2.1318, + "step": 9155000 + }, + { + "epoch": 26.5, + "learning_rate": 3.675408426732123e-05, + "loss": 2.1404, + "step": 9155500 + }, + { + "epoch": 26.5, + "learning_rate": 3.675336061967395e-05, + "loss": 2.1406, + "step": 9156000 + }, + { + "epoch": 26.5, + "learning_rate": 3.6752636972026675e-05, + "loss": 2.1468, + "step": 9156500 + }, + { + "epoch": 26.51, + "learning_rate": 3.67519133243794e-05, + "loss": 2.1264, + "step": 9157000 + }, + { + "epoch": 26.51, + "learning_rate": 3.6751189676732126e-05, + "loss": 2.1622, + "step": 9157500 + }, + { + "epoch": 26.51, + "learning_rate": 3.675046602908485e-05, + "loss": 2.1551, + "step": 9158000 + }, + { + "epoch": 26.51, + "learning_rate": 3.6749743828732864e-05, + "loss": 2.1614, + "step": 9158500 + }, + { + "epoch": 26.51, + "learning_rate": 3.674902018108559e-05, + "loss": 2.1525, + "step": 9159000 + }, + { + "epoch": 26.51, + "learning_rate": 3.6748296533438315e-05, + "loss": 2.1326, + "step": 9159500 + }, + { + "epoch": 26.51, + "learning_rate": 3.674757288579104e-05, + "loss": 2.1278, + "step": 9160000 + }, + { + "epoch": 26.52, + "learning_rate": 3.674684923814376e-05, + "loss": 2.1561, + "step": 9160500 + }, + { + "epoch": 26.52, + "learning_rate": 3.6746127037791775e-05, + "loss": 2.1438, + "step": 9161000 + }, + { + "epoch": 26.52, + "learning_rate": 3.67454033901445e-05, + "loss": 2.1258, + "step": 9161500 + }, + { + "epoch": 26.52, + "learning_rate": 3.6744679742497226e-05, + "loss": 2.1378, + "step": 9162000 + }, + { + "epoch": 26.52, + "learning_rate": 3.674395609484995e-05, + "loss": 2.147, + "step": 9162500 + }, + { + "epoch": 26.52, + "learning_rate": 3.674323244720267e-05, + "loss": 2.131, + "step": 9163000 + }, + { + "epoch": 26.52, + "learning_rate": 3.674250879955539e-05, + "loss": 2.1427, + "step": 9163500 + }, + { + "epoch": 26.53, + "learning_rate": 3.674178659920341e-05, + "loss": 2.1377, + "step": 9164000 + }, + { + "epoch": 26.53, + "learning_rate": 3.674106295155613e-05, + "loss": 2.1482, + "step": 9164500 + }, + { + "epoch": 26.53, + "learning_rate": 3.674033930390885e-05, + "loss": 2.139, + "step": 9165000 + }, + { + "epoch": 26.53, + "learning_rate": 3.6739615656261575e-05, + "loss": 2.1478, + "step": 9165500 + }, + { + "epoch": 26.53, + "learning_rate": 3.67388920086143e-05, + "loss": 2.1425, + "step": 9166000 + }, + { + "epoch": 26.53, + "learning_rate": 3.673816836096703e-05, + "loss": 2.1377, + "step": 9166500 + }, + { + "epoch": 26.53, + "learning_rate": 3.673744471331975e-05, + "loss": 2.1237, + "step": 9167000 + }, + { + "epoch": 26.54, + "learning_rate": 3.673672106567248e-05, + "loss": 2.1322, + "step": 9167500 + }, + { + "epoch": 26.54, + "learning_rate": 3.67359974180252e-05, + "loss": 2.1451, + "step": 9168000 + }, + { + "epoch": 26.54, + "learning_rate": 3.673527377037792e-05, + "loss": 2.1698, + "step": 9168500 + }, + { + "epoch": 26.54, + "learning_rate": 3.6734550122730645e-05, + "loss": 2.1015, + "step": 9169000 + }, + { + "epoch": 26.54, + "learning_rate": 3.673382647508337e-05, + "loss": 2.1358, + "step": 9169500 + }, + { + "epoch": 26.54, + "learning_rate": 3.673310282743609e-05, + "loss": 2.1305, + "step": 9170000 + }, + { + "epoch": 26.54, + "learning_rate": 3.6732380627084105e-05, + "loss": 2.1278, + "step": 9170500 + }, + { + "epoch": 26.55, + "learning_rate": 3.673165697943683e-05, + "loss": 2.1284, + "step": 9171000 + }, + { + "epoch": 26.55, + "learning_rate": 3.673093333178955e-05, + "loss": 2.1518, + "step": 9171500 + }, + { + "epoch": 26.55, + "learning_rate": 3.673021113143757e-05, + "loss": 2.1475, + "step": 9172000 + }, + { + "epoch": 26.55, + "learning_rate": 3.6729487483790294e-05, + "loss": 2.1455, + "step": 9172500 + }, + { + "epoch": 26.55, + "learning_rate": 3.6728763836143016e-05, + "loss": 2.1337, + "step": 9173000 + }, + { + "epoch": 26.55, + "learning_rate": 3.6728040188495745e-05, + "loss": 2.1241, + "step": 9173500 + }, + { + "epoch": 26.55, + "learning_rate": 3.672731654084847e-05, + "loss": 2.1295, + "step": 9174000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672659289320119e-05, + "loss": 2.1186, + "step": 9174500 + }, + { + "epoch": 26.56, + "learning_rate": 3.672586924555391e-05, + "loss": 2.1272, + "step": 9175000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672514704520193e-05, + "loss": 2.1527, + "step": 9175500 + }, + { + "epoch": 26.56, + "learning_rate": 3.672442339755465e-05, + "loss": 2.1433, + "step": 9176000 + }, + { + "epoch": 26.56, + "learning_rate": 3.672369974990738e-05, + "loss": 2.126, + "step": 9176500 + }, + { + "epoch": 26.56, + "learning_rate": 3.67229761022601e-05, + "loss": 2.1539, + "step": 9177000 + }, + { + "epoch": 26.57, + "learning_rate": 3.672225245461282e-05, + "loss": 2.1526, + "step": 9177500 + }, + { + "epoch": 26.57, + "learning_rate": 3.6721528806965545e-05, + "loss": 2.1414, + "step": 9178000 + }, + { + "epoch": 26.57, + "learning_rate": 3.672080660661356e-05, + "loss": 2.1362, + "step": 9178500 + }, + { + "epoch": 26.57, + "learning_rate": 3.6720084406261576e-05, + "loss": 2.1084, + "step": 9179000 + }, + { + "epoch": 26.57, + "learning_rate": 3.67193607586143e-05, + "loss": 2.1167, + "step": 9179500 + }, + { + "epoch": 26.57, + "learning_rate": 3.671863711096703e-05, + "loss": 2.1127, + "step": 9180000 + }, + { + "epoch": 26.57, + "learning_rate": 3.671791346331975e-05, + "loss": 2.1119, + "step": 9180500 + }, + { + "epoch": 26.58, + "learning_rate": 3.671718981567247e-05, + "loss": 2.1605, + "step": 9181000 + }, + { + "epoch": 26.58, + "learning_rate": 3.67164661680252e-05, + "loss": 2.1275, + "step": 9181500 + }, + { + "epoch": 26.58, + "learning_rate": 3.6715743967673216e-05, + "loss": 2.1273, + "step": 9182000 + }, + { + "epoch": 26.58, + "learning_rate": 3.671502032002594e-05, + "loss": 2.1093, + "step": 9182500 + }, + { + "epoch": 26.58, + "learning_rate": 3.671429667237866e-05, + "loss": 2.1511, + "step": 9183000 + }, + { + "epoch": 26.58, + "learning_rate": 3.671357302473138e-05, + "loss": 2.1362, + "step": 9183500 + }, + { + "epoch": 26.58, + "learning_rate": 3.6712849377084105e-05, + "loss": 2.1469, + "step": 9184000 + }, + { + "epoch": 26.59, + "learning_rate": 3.671212572943683e-05, + "loss": 2.1289, + "step": 9184500 + }, + { + "epoch": 26.59, + "learning_rate": 3.671140208178955e-05, + "loss": 2.1461, + "step": 9185000 + }, + { + "epoch": 26.59, + "learning_rate": 3.671067843414228e-05, + "loss": 2.1371, + "step": 9185500 + }, + { + "epoch": 26.59, + "learning_rate": 3.6709956233790294e-05, + "loss": 2.128, + "step": 9186000 + }, + { + "epoch": 26.59, + "learning_rate": 3.670923258614302e-05, + "loss": 2.154, + "step": 9186500 + }, + { + "epoch": 26.59, + "learning_rate": 3.670850893849574e-05, + "loss": 2.1334, + "step": 9187000 + }, + { + "epoch": 26.59, + "learning_rate": 3.670778529084846e-05, + "loss": 2.1331, + "step": 9187500 + }, + { + "epoch": 26.6, + "learning_rate": 3.670706309049648e-05, + "loss": 2.1125, + "step": 9188000 + }, + { + "epoch": 26.6, + "learning_rate": 3.670634233743979e-05, + "loss": 2.1372, + "step": 9188500 + }, + { + "epoch": 26.6, + "learning_rate": 3.670561868979252e-05, + "loss": 2.1458, + "step": 9189000 + }, + { + "epoch": 26.6, + "learning_rate": 3.6704895042145244e-05, + "loss": 2.141, + "step": 9189500 + }, + { + "epoch": 26.6, + "learning_rate": 3.6704171394497966e-05, + "loss": 2.1386, + "step": 9190000 + }, + { + "epoch": 26.6, + "learning_rate": 3.670344774685069e-05, + "loss": 2.1422, + "step": 9190500 + }, + { + "epoch": 26.6, + "learning_rate": 3.670272409920341e-05, + "loss": 2.1495, + "step": 9191000 + }, + { + "epoch": 26.61, + "learning_rate": 3.670200045155613e-05, + "loss": 2.1428, + "step": 9191500 + }, + { + "epoch": 26.61, + "learning_rate": 3.6701276803908855e-05, + "loss": 2.1358, + "step": 9192000 + }, + { + "epoch": 26.61, + "learning_rate": 3.670055315626158e-05, + "loss": 2.1554, + "step": 9192500 + }, + { + "epoch": 26.61, + "learning_rate": 3.6699829508614306e-05, + "loss": 2.1434, + "step": 9193000 + }, + { + "epoch": 26.61, + "learning_rate": 3.669910586096703e-05, + "loss": 2.1222, + "step": 9193500 + }, + { + "epoch": 26.61, + "learning_rate": 3.669838221331975e-05, + "loss": 2.1239, + "step": 9194000 + }, + { + "epoch": 26.61, + "learning_rate": 3.669765856567247e-05, + "loss": 2.115, + "step": 9194500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669693636532049e-05, + "loss": 2.1451, + "step": 9195000 + }, + { + "epoch": 26.62, + "learning_rate": 3.669621271767321e-05, + "loss": 2.125, + "step": 9195500 + }, + { + "epoch": 26.62, + "learning_rate": 3.6695491964616526e-05, + "loss": 2.1508, + "step": 9196000 + }, + { + "epoch": 26.62, + "learning_rate": 3.669476831696925e-05, + "loss": 2.1543, + "step": 9196500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669404466932198e-05, + "loss": 2.1166, + "step": 9197000 + }, + { + "epoch": 26.62, + "learning_rate": 3.66933210216747e-05, + "loss": 2.1467, + "step": 9197500 + }, + { + "epoch": 26.62, + "learning_rate": 3.669259737402742e-05, + "loss": 2.1466, + "step": 9198000 + }, + { + "epoch": 26.63, + "learning_rate": 3.6691873726380144e-05, + "loss": 2.1286, + "step": 9198500 + }, + { + "epoch": 26.63, + "learning_rate": 3.6691150078732866e-05, + "loss": 2.1323, + "step": 9199000 + }, + { + "epoch": 26.63, + "learning_rate": 3.669042643108559e-05, + "loss": 2.1461, + "step": 9199500 + }, + { + "epoch": 26.63, + "learning_rate": 3.668970278343831e-05, + "loss": 2.1402, + "step": 9200000 + }, + { + "epoch": 26.63, + "learning_rate": 3.668897913579103e-05, + "loss": 2.1385, + "step": 9200500 + }, + { + "epoch": 26.63, + "learning_rate": 3.6688255488143755e-05, + "loss": 2.1449, + "step": 9201000 + }, + { + "epoch": 26.63, + "learning_rate": 3.668753184049648e-05, + "loss": 2.1374, + "step": 9201500 + }, + { + "epoch": 26.64, + "learning_rate": 3.6686808192849206e-05, + "loss": 2.1375, + "step": 9202000 + }, + { + "epoch": 26.64, + "learning_rate": 3.668608454520193e-05, + "loss": 2.1437, + "step": 9202500 + }, + { + "epoch": 26.64, + "learning_rate": 3.6685362344849944e-05, + "loss": 2.1153, + "step": 9203000 + }, + { + "epoch": 26.64, + "learning_rate": 3.6684638697202667e-05, + "loss": 2.1451, + "step": 9203500 + }, + { + "epoch": 26.64, + "learning_rate": 3.6683915049555396e-05, + "loss": 2.1506, + "step": 9204000 + }, + { + "epoch": 26.64, + "learning_rate": 3.668319140190812e-05, + "loss": 2.1622, + "step": 9204500 + }, + { + "epoch": 26.64, + "learning_rate": 3.668246775426084e-05, + "loss": 2.1542, + "step": 9205000 + }, + { + "epoch": 26.65, + "learning_rate": 3.668174410661356e-05, + "loss": 2.1223, + "step": 9205500 + }, + { + "epoch": 26.65, + "learning_rate": 3.6681020458966284e-05, + "loss": 2.1261, + "step": 9206000 + }, + { + "epoch": 26.65, + "learning_rate": 3.668029681131901e-05, + "loss": 2.1192, + "step": 9206500 + }, + { + "epoch": 26.65, + "learning_rate": 3.667957316367173e-05, + "loss": 2.1366, + "step": 9207000 + }, + { + "epoch": 26.65, + "learning_rate": 3.667885096331975e-05, + "loss": 2.1092, + "step": 9207500 + }, + { + "epoch": 26.65, + "learning_rate": 3.6678127315672473e-05, + "loss": 2.1258, + "step": 9208000 + }, + { + "epoch": 26.65, + "learning_rate": 3.667740511532049e-05, + "loss": 2.1426, + "step": 9208500 + }, + { + "epoch": 26.66, + "learning_rate": 3.667668146767321e-05, + "loss": 2.1382, + "step": 9209000 + }, + { + "epoch": 26.66, + "learning_rate": 3.6675957820025934e-05, + "loss": 2.1398, + "step": 9209500 + }, + { + "epoch": 26.66, + "learning_rate": 3.6675234172378656e-05, + "loss": 2.137, + "step": 9210000 + }, + { + "epoch": 26.66, + "learning_rate": 3.667451197202668e-05, + "loss": 2.1328, + "step": 9210500 + }, + { + "epoch": 26.66, + "learning_rate": 3.6673789771674694e-05, + "loss": 2.1133, + "step": 9211000 + }, + { + "epoch": 26.66, + "learning_rate": 3.667306612402742e-05, + "loss": 2.1502, + "step": 9211500 + }, + { + "epoch": 26.66, + "learning_rate": 3.6672342476380145e-05, + "loss": 2.1196, + "step": 9212000 + }, + { + "epoch": 26.67, + "learning_rate": 3.667161882873287e-05, + "loss": 2.1227, + "step": 9212500 + }, + { + "epoch": 26.67, + "learning_rate": 3.667089518108559e-05, + "loss": 2.1302, + "step": 9213000 + }, + { + "epoch": 26.67, + "learning_rate": 3.667017153343831e-05, + "loss": 2.1368, + "step": 9213500 + }, + { + "epoch": 26.67, + "learning_rate": 3.6669447885791034e-05, + "loss": 2.1432, + "step": 9214000 + }, + { + "epoch": 26.67, + "learning_rate": 3.6668724238143756e-05, + "loss": 2.1367, + "step": 9214500 + }, + { + "epoch": 26.67, + "learning_rate": 3.6668000590496485e-05, + "loss": 2.1125, + "step": 9215000 + }, + { + "epoch": 26.68, + "learning_rate": 3.666727694284921e-05, + "loss": 2.1507, + "step": 9215500 + }, + { + "epoch": 26.68, + "learning_rate": 3.666655474249722e-05, + "loss": 2.1429, + "step": 9216000 + }, + { + "epoch": 26.68, + "learning_rate": 3.6665831094849945e-05, + "loss": 2.1362, + "step": 9216500 + }, + { + "epoch": 26.68, + "learning_rate": 3.666510744720267e-05, + "loss": 2.1175, + "step": 9217000 + }, + { + "epoch": 26.68, + "learning_rate": 3.666438379955539e-05, + "loss": 2.1568, + "step": 9217500 + }, + { + "epoch": 26.68, + "learning_rate": 3.666366015190811e-05, + "loss": 2.1417, + "step": 9218000 + }, + { + "epoch": 26.68, + "learning_rate": 3.6662936504260834e-05, + "loss": 2.1523, + "step": 9218500 + }, + { + "epoch": 26.69, + "learning_rate": 3.666221285661356e-05, + "loss": 2.1162, + "step": 9219000 + }, + { + "epoch": 26.69, + "learning_rate": 3.6661489208966285e-05, + "loss": 2.1588, + "step": 9219500 + }, + { + "epoch": 26.69, + "learning_rate": 3.666076556131901e-05, + "loss": 2.1314, + "step": 9220000 + }, + { + "epoch": 26.69, + "learning_rate": 3.6660041913671737e-05, + "loss": 2.1179, + "step": 9220500 + }, + { + "epoch": 26.69, + "learning_rate": 3.665931826602446e-05, + "loss": 2.1386, + "step": 9221000 + }, + { + "epoch": 26.69, + "learning_rate": 3.665859461837718e-05, + "loss": 2.1212, + "step": 9221500 + }, + { + "epoch": 26.69, + "learning_rate": 3.66578709707299e-05, + "loss": 2.1295, + "step": 9222000 + }, + { + "epoch": 26.7, + "learning_rate": 3.665714877037792e-05, + "loss": 2.1387, + "step": 9222500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665642512273064e-05, + "loss": 2.1558, + "step": 9223000 + }, + { + "epoch": 26.7, + "learning_rate": 3.665570147508336e-05, + "loss": 2.1516, + "step": 9223500 + }, + { + "epoch": 26.7, + "learning_rate": 3.6654977827436085e-05, + "loss": 2.1343, + "step": 9224000 + }, + { + "epoch": 26.7, + "learning_rate": 3.66542570743794e-05, + "loss": 2.1452, + "step": 9224500 + }, + { + "epoch": 26.7, + "learning_rate": 3.665353342673212e-05, + "loss": 2.1307, + "step": 9225000 + }, + { + "epoch": 26.7, + "learning_rate": 3.6652809779084846e-05, + "loss": 2.1329, + "step": 9225500 + }, + { + "epoch": 26.71, + "learning_rate": 3.665208613143757e-05, + "loss": 2.1144, + "step": 9226000 + }, + { + "epoch": 26.71, + "learning_rate": 3.6651365378380883e-05, + "loss": 2.1593, + "step": 9226500 + }, + { + "epoch": 26.71, + "learning_rate": 3.665064173073361e-05, + "loss": 2.1296, + "step": 9227000 + }, + { + "epoch": 26.71, + "learning_rate": 3.6649918083086335e-05, + "loss": 2.153, + "step": 9227500 + }, + { + "epoch": 26.71, + "learning_rate": 3.664919443543906e-05, + "loss": 2.145, + "step": 9228000 + }, + { + "epoch": 26.71, + "learning_rate": 3.664847078779178e-05, + "loss": 2.1672, + "step": 9228500 + }, + { + "epoch": 26.71, + "learning_rate": 3.66477471401445e-05, + "loss": 2.1151, + "step": 9229000 + }, + { + "epoch": 26.72, + "learning_rate": 3.6647023492497224e-05, + "loss": 2.1376, + "step": 9229500 + }, + { + "epoch": 26.72, + "learning_rate": 3.6646299844849946e-05, + "loss": 2.1237, + "step": 9230000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664557619720267e-05, + "loss": 2.1373, + "step": 9230500 + }, + { + "epoch": 26.72, + "learning_rate": 3.664485254955539e-05, + "loss": 2.1437, + "step": 9231000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664412890190811e-05, + "loss": 2.1523, + "step": 9231500 + }, + { + "epoch": 26.72, + "learning_rate": 3.6643405254260835e-05, + "loss": 2.149, + "step": 9232000 + }, + { + "epoch": 26.72, + "learning_rate": 3.664268160661356e-05, + "loss": 2.1558, + "step": 9232500 + }, + { + "epoch": 26.73, + "learning_rate": 3.6641957958966286e-05, + "loss": 2.1288, + "step": 9233000 + }, + { + "epoch": 26.73, + "learning_rate": 3.664123431131901e-05, + "loss": 2.165, + "step": 9233500 + }, + { + "epoch": 26.73, + "learning_rate": 3.664051066367174e-05, + "loss": 2.1393, + "step": 9234000 + }, + { + "epoch": 26.73, + "learning_rate": 3.663978701602446e-05, + "loss": 2.1356, + "step": 9234500 + }, + { + "epoch": 26.73, + "learning_rate": 3.6639064815672475e-05, + "loss": 2.1422, + "step": 9235000 + }, + { + "epoch": 26.73, + "learning_rate": 3.66383411680252e-05, + "loss": 2.1455, + "step": 9235500 + }, + { + "epoch": 26.73, + "learning_rate": 3.663761896767321e-05, + "loss": 2.1487, + "step": 9236000 + }, + { + "epoch": 26.74, + "learning_rate": 3.6636895320025935e-05, + "loss": 2.1423, + "step": 9236500 + }, + { + "epoch": 26.74, + "learning_rate": 3.6636171672378664e-05, + "loss": 2.122, + "step": 9237000 + }, + { + "epoch": 26.74, + "learning_rate": 3.6635448024731386e-05, + "loss": 2.156, + "step": 9237500 + }, + { + "epoch": 26.74, + "learning_rate": 3.663472437708411e-05, + "loss": 2.1476, + "step": 9238000 + }, + { + "epoch": 26.74, + "learning_rate": 3.663400072943683e-05, + "loss": 2.1299, + "step": 9238500 + }, + { + "epoch": 26.74, + "learning_rate": 3.6633278529084846e-05, + "loss": 2.1625, + "step": 9239000 + }, + { + "epoch": 26.74, + "learning_rate": 3.663255488143757e-05, + "loss": 2.1428, + "step": 9239500 + }, + { + "epoch": 26.75, + "learning_rate": 3.663183123379029e-05, + "loss": 2.1355, + "step": 9240000 + }, + { + "epoch": 26.75, + "learning_rate": 3.663110758614301e-05, + "loss": 2.1319, + "step": 9240500 + }, + { + "epoch": 26.75, + "learning_rate": 3.6630385385791035e-05, + "loss": 2.1423, + "step": 9241000 + }, + { + "epoch": 26.75, + "learning_rate": 3.6629661738143764e-05, + "loss": 2.1486, + "step": 9241500 + }, + { + "epoch": 26.75, + "learning_rate": 3.662893809049649e-05, + "loss": 2.1383, + "step": 9242000 + }, + { + "epoch": 26.75, + "learning_rate": 3.662821444284921e-05, + "loss": 2.1648, + "step": 9242500 + }, + { + "epoch": 26.75, + "learning_rate": 3.662749079520193e-05, + "loss": 2.136, + "step": 9243000 + }, + { + "epoch": 26.76, + "learning_rate": 3.662676714755465e-05, + "loss": 2.1374, + "step": 9243500 + }, + { + "epoch": 26.76, + "learning_rate": 3.6626043499907376e-05, + "loss": 2.1315, + "step": 9244000 + }, + { + "epoch": 26.76, + "learning_rate": 3.66253198522601e-05, + "loss": 2.1656, + "step": 9244500 + }, + { + "epoch": 26.76, + "learning_rate": 3.662459620461282e-05, + "loss": 2.1479, + "step": 9245000 + }, + { + "epoch": 26.76, + "learning_rate": 3.6623874004260836e-05, + "loss": 2.1446, + "step": 9245500 + }, + { + "epoch": 26.76, + "learning_rate": 3.6623150356613565e-05, + "loss": 2.135, + "step": 9246000 + }, + { + "epoch": 26.76, + "learning_rate": 3.662242815626158e-05, + "loss": 2.1551, + "step": 9246500 + }, + { + "epoch": 26.77, + "learning_rate": 3.66217045086143e-05, + "loss": 2.1248, + "step": 9247000 + }, + { + "epoch": 26.77, + "learning_rate": 3.6620980860967025e-05, + "loss": 2.1632, + "step": 9247500 + }, + { + "epoch": 26.77, + "learning_rate": 3.662025866061504e-05, + "loss": 2.1292, + "step": 9248000 + }, + { + "epoch": 26.77, + "learning_rate": 3.661953501296776e-05, + "loss": 2.1409, + "step": 9248500 + }, + { + "epoch": 26.77, + "learning_rate": 3.661881136532049e-05, + "loss": 2.1437, + "step": 9249000 + }, + { + "epoch": 26.77, + "learning_rate": 3.6618087717673214e-05, + "loss": 2.1611, + "step": 9249500 + }, + { + "epoch": 26.77, + "learning_rate": 3.6617364070025936e-05, + "loss": 2.1592, + "step": 9250000 + }, + { + "epoch": 26.78, + "learning_rate": 3.6616640422378665e-05, + "loss": 2.1548, + "step": 9250500 + }, + { + "epoch": 26.78, + "learning_rate": 3.661591677473139e-05, + "loss": 2.1295, + "step": 9251000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661519312708411e-05, + "loss": 2.1375, + "step": 9251500 + }, + { + "epoch": 26.78, + "learning_rate": 3.661446947943683e-05, + "loss": 2.1253, + "step": 9252000 + }, + { + "epoch": 26.78, + "learning_rate": 3.661374727908485e-05, + "loss": 2.133, + "step": 9252500 + }, + { + "epoch": 26.78, + "learning_rate": 3.661302363143757e-05, + "loss": 2.1567, + "step": 9253000 + }, + { + "epoch": 26.79, + "learning_rate": 3.661229998379029e-05, + "loss": 2.1385, + "step": 9253500 + }, + { + "epoch": 26.79, + "learning_rate": 3.6611576336143014e-05, + "loss": 2.1184, + "step": 9254000 + }, + { + "epoch": 26.79, + "learning_rate": 3.6610852688495736e-05, + "loss": 2.1171, + "step": 9254500 + }, + { + "epoch": 26.79, + "learning_rate": 3.661013048814376e-05, + "loss": 2.1176, + "step": 9255000 + }, + { + "epoch": 26.79, + "learning_rate": 3.660940684049648e-05, + "loss": 2.1424, + "step": 9255500 + }, + { + "epoch": 26.79, + "learning_rate": 3.66086831928492e-05, + "loss": 2.1311, + "step": 9256000 + }, + { + "epoch": 26.79, + "learning_rate": 3.660795954520193e-05, + "loss": 2.1362, + "step": 9256500 + }, + { + "epoch": 26.8, + "learning_rate": 3.6607235897554654e-05, + "loss": 2.1427, + "step": 9257000 + }, + { + "epoch": 26.8, + "learning_rate": 3.6606512249907376e-05, + "loss": 2.1485, + "step": 9257500 + }, + { + "epoch": 26.8, + "learning_rate": 3.66057886022601e-05, + "loss": 2.1493, + "step": 9258000 + }, + { + "epoch": 26.8, + "learning_rate": 3.660506495461282e-05, + "loss": 2.1304, + "step": 9258500 + }, + { + "epoch": 26.8, + "learning_rate": 3.6604342754260836e-05, + "loss": 2.11, + "step": 9259000 + }, + { + "epoch": 26.8, + "learning_rate": 3.6603619106613565e-05, + "loss": 2.1388, + "step": 9259500 + }, + { + "epoch": 26.8, + "learning_rate": 3.660289545896629e-05, + "loss": 2.1235, + "step": 9260000 + }, + { + "epoch": 26.81, + "learning_rate": 3.66021732586143e-05, + "loss": 2.1608, + "step": 9260500 + }, + { + "epoch": 26.81, + "learning_rate": 3.6601449610967025e-05, + "loss": 2.1398, + "step": 9261000 + }, + { + "epoch": 26.81, + "learning_rate": 3.660072741061504e-05, + "loss": 2.144, + "step": 9261500 + }, + { + "epoch": 26.81, + "learning_rate": 3.660000376296776e-05, + "loss": 2.1471, + "step": 9262000 + }, + { + "epoch": 26.81, + "learning_rate": 3.659928011532049e-05, + "loss": 2.1408, + "step": 9262500 + }, + { + "epoch": 26.81, + "learning_rate": 3.6598556467673214e-05, + "loss": 2.1159, + "step": 9263000 + }, + { + "epoch": 26.81, + "learning_rate": 3.659783282002594e-05, + "loss": 2.1462, + "step": 9263500 + }, + { + "epoch": 26.82, + "learning_rate": 3.6597109172378666e-05, + "loss": 2.1333, + "step": 9264000 + }, + { + "epoch": 26.82, + "learning_rate": 3.659638552473139e-05, + "loss": 2.1169, + "step": 9264500 + }, + { + "epoch": 26.82, + "learning_rate": 3.659566187708411e-05, + "loss": 2.1264, + "step": 9265000 + }, + { + "epoch": 26.82, + "learning_rate": 3.659493822943683e-05, + "loss": 2.1359, + "step": 9265500 + }, + { + "epoch": 26.82, + "learning_rate": 3.6594214581789555e-05, + "loss": 2.1231, + "step": 9266000 + }, + { + "epoch": 26.82, + "learning_rate": 3.659349093414228e-05, + "loss": 2.1545, + "step": 9266500 + }, + { + "epoch": 26.82, + "learning_rate": 3.6592767286495e-05, + "loss": 2.1399, + "step": 9267000 + }, + { + "epoch": 26.83, + "learning_rate": 3.6592045086143015e-05, + "loss": 2.1413, + "step": 9267500 + }, + { + "epoch": 26.83, + "learning_rate": 3.6591321438495744e-05, + "loss": 2.133, + "step": 9268000 + }, + { + "epoch": 26.83, + "learning_rate": 3.659059923814376e-05, + "loss": 2.1533, + "step": 9268500 + }, + { + "epoch": 26.83, + "learning_rate": 3.6589877037791775e-05, + "loss": 2.1318, + "step": 9269000 + }, + { + "epoch": 26.83, + "learning_rate": 3.65891533901445e-05, + "loss": 2.1441, + "step": 9269500 + }, + { + "epoch": 26.83, + "learning_rate": 3.658842974249722e-05, + "loss": 2.1386, + "step": 9270000 + }, + { + "epoch": 26.83, + "learning_rate": 3.658770609484994e-05, + "loss": 2.1183, + "step": 9270500 + }, + { + "epoch": 26.84, + "learning_rate": 3.6586982447202664e-05, + "loss": 2.1395, + "step": 9271000 + }, + { + "epoch": 26.84, + "learning_rate": 3.658625879955539e-05, + "loss": 2.1544, + "step": 9271500 + }, + { + "epoch": 26.84, + "learning_rate": 3.6585535151908115e-05, + "loss": 2.1648, + "step": 9272000 + }, + { + "epoch": 26.84, + "learning_rate": 3.6584811504260844e-05, + "loss": 2.1506, + "step": 9272500 + }, + { + "epoch": 26.84, + "learning_rate": 3.6584087856613566e-05, + "loss": 2.1505, + "step": 9273000 + }, + { + "epoch": 26.84, + "learning_rate": 3.658336420896629e-05, + "loss": 2.1548, + "step": 9273500 + }, + { + "epoch": 26.84, + "learning_rate": 3.658264056131901e-05, + "loss": 2.1496, + "step": 9274000 + }, + { + "epoch": 26.85, + "learning_rate": 3.658191691367173e-05, + "loss": 2.1281, + "step": 9274500 + }, + { + "epoch": 26.85, + "learning_rate": 3.6581193266024455e-05, + "loss": 2.1403, + "step": 9275000 + }, + { + "epoch": 26.85, + "learning_rate": 3.658046961837718e-05, + "loss": 2.1404, + "step": 9275500 + }, + { + "epoch": 26.85, + "learning_rate": 3.65797459707299e-05, + "loss": 2.1401, + "step": 9276000 + }, + { + "epoch": 26.85, + "learning_rate": 3.657902232308262e-05, + "loss": 2.1477, + "step": 9276500 + }, + { + "epoch": 26.85, + "learning_rate": 3.6578298675435344e-05, + "loss": 2.1195, + "step": 9277000 + }, + { + "epoch": 26.85, + "learning_rate": 3.6577575027788066e-05, + "loss": 2.1554, + "step": 9277500 + }, + { + "epoch": 26.86, + "learning_rate": 3.6576851380140795e-05, + "loss": 2.1367, + "step": 9278000 + }, + { + "epoch": 26.86, + "learning_rate": 3.6576130627084104e-05, + "loss": 2.1406, + "step": 9278500 + }, + { + "epoch": 26.86, + "learning_rate": 3.657540697943683e-05, + "loss": 2.1552, + "step": 9279000 + }, + { + "epoch": 26.86, + "learning_rate": 3.6574683331789555e-05, + "loss": 2.1598, + "step": 9279500 + }, + { + "epoch": 26.86, + "learning_rate": 3.657395968414228e-05, + "loss": 2.1591, + "step": 9280000 + }, + { + "epoch": 26.86, + "learning_rate": 3.6573236036495e-05, + "loss": 2.1188, + "step": 9280500 + }, + { + "epoch": 26.86, + "learning_rate": 3.657251238884772e-05, + "loss": 2.1295, + "step": 9281000 + }, + { + "epoch": 26.87, + "learning_rate": 3.6571788741200444e-05, + "loss": 2.1022, + "step": 9281500 + }, + { + "epoch": 26.87, + "learning_rate": 3.6571065093553167e-05, + "loss": 2.1486, + "step": 9282000 + }, + { + "epoch": 26.87, + "learning_rate": 3.6570341445905896e-05, + "loss": 2.1545, + "step": 9282500 + }, + { + "epoch": 26.87, + "learning_rate": 3.656961779825862e-05, + "loss": 2.1382, + "step": 9283000 + }, + { + "epoch": 26.87, + "learning_rate": 3.656889415061134e-05, + "loss": 2.1414, + "step": 9283500 + }, + { + "epoch": 26.87, + "learning_rate": 3.6568171950259356e-05, + "loss": 2.1492, + "step": 9284000 + }, + { + "epoch": 26.87, + "learning_rate": 3.656744974990737e-05, + "loss": 2.1552, + "step": 9284500 + }, + { + "epoch": 26.88, + "learning_rate": 3.6566726102260093e-05, + "loss": 2.1419, + "step": 9285000 + }, + { + "epoch": 26.88, + "learning_rate": 3.6566002454612816e-05, + "loss": 2.1349, + "step": 9285500 + }, + { + "epoch": 26.88, + "learning_rate": 3.656528025426084e-05, + "loss": 2.1363, + "step": 9286000 + }, + { + "epoch": 26.88, + "learning_rate": 3.656455660661357e-05, + "loss": 2.1527, + "step": 9286500 + }, + { + "epoch": 26.88, + "learning_rate": 3.656383295896629e-05, + "loss": 2.1319, + "step": 9287000 + }, + { + "epoch": 26.88, + "learning_rate": 3.6563110758614305e-05, + "loss": 2.1648, + "step": 9287500 + }, + { + "epoch": 26.88, + "learning_rate": 3.656238711096703e-05, + "loss": 2.1269, + "step": 9288000 + }, + { + "epoch": 26.89, + "learning_rate": 3.656166346331975e-05, + "loss": 2.1514, + "step": 9288500 + }, + { + "epoch": 26.89, + "learning_rate": 3.656093981567247e-05, + "loss": 2.1763, + "step": 9289000 + }, + { + "epoch": 26.89, + "learning_rate": 3.6560216168025194e-05, + "loss": 2.1329, + "step": 9289500 + }, + { + "epoch": 26.89, + "learning_rate": 3.6559492520377916e-05, + "loss": 2.1321, + "step": 9290000 + }, + { + "epoch": 26.89, + "learning_rate": 3.6558768872730645e-05, + "loss": 2.1622, + "step": 9290500 + }, + { + "epoch": 26.89, + "learning_rate": 3.655804522508337e-05, + "loss": 2.1351, + "step": 9291000 + }, + { + "epoch": 26.9, + "learning_rate": 3.655732157743609e-05, + "loss": 2.1534, + "step": 9291500 + }, + { + "epoch": 26.9, + "learning_rate": 3.655659792978881e-05, + "loss": 2.1489, + "step": 9292000 + }, + { + "epoch": 26.9, + "learning_rate": 3.6555874282141534e-05, + "loss": 2.1286, + "step": 9292500 + }, + { + "epoch": 26.9, + "learning_rate": 3.6555150634494256e-05, + "loss": 2.1301, + "step": 9293000 + }, + { + "epoch": 26.9, + "learning_rate": 3.6554426986846985e-05, + "loss": 2.1108, + "step": 9293500 + }, + { + "epoch": 26.9, + "learning_rate": 3.655370333919971e-05, + "loss": 2.1345, + "step": 9294000 + }, + { + "epoch": 26.9, + "learning_rate": 3.655298113884772e-05, + "loss": 2.1089, + "step": 9294500 + }, + { + "epoch": 26.91, + "learning_rate": 3.6552257491200445e-05, + "loss": 2.157, + "step": 9295000 + }, + { + "epoch": 26.91, + "learning_rate": 3.655153384355317e-05, + "loss": 2.1542, + "step": 9295500 + }, + { + "epoch": 26.91, + "learning_rate": 3.655081164320119e-05, + "loss": 2.1379, + "step": 9296000 + }, + { + "epoch": 26.91, + "learning_rate": 3.655008799555391e-05, + "loss": 2.153, + "step": 9296500 + }, + { + "epoch": 26.91, + "learning_rate": 3.6549364347906634e-05, + "loss": 2.1381, + "step": 9297000 + }, + { + "epoch": 26.91, + "learning_rate": 3.6548640700259356e-05, + "loss": 2.1364, + "step": 9297500 + }, + { + "epoch": 26.91, + "learning_rate": 3.654791705261208e-05, + "loss": 2.1477, + "step": 9298000 + }, + { + "epoch": 26.92, + "learning_rate": 3.65471934049648e-05, + "loss": 2.1424, + "step": 9298500 + }, + { + "epoch": 26.92, + "learning_rate": 3.654646975731752e-05, + "loss": 2.1482, + "step": 9299000 + }, + { + "epoch": 26.92, + "learning_rate": 3.6545746109670245e-05, + "loss": 2.1598, + "step": 9299500 + }, + { + "epoch": 26.92, + "learning_rate": 3.654502246202297e-05, + "loss": 2.1278, + "step": 9300000 + }, + { + "epoch": 26.92, + "learning_rate": 3.654430026167099e-05, + "loss": 2.1282, + "step": 9300500 + }, + { + "epoch": 26.92, + "learning_rate": 3.654357661402372e-05, + "loss": 2.1273, + "step": 9301000 + }, + { + "epoch": 26.92, + "learning_rate": 3.6542854413671735e-05, + "loss": 2.1472, + "step": 9301500 + }, + { + "epoch": 26.93, + "learning_rate": 3.654213076602446e-05, + "loss": 2.157, + "step": 9302000 + }, + { + "epoch": 26.93, + "learning_rate": 3.654140856567247e-05, + "loss": 2.1444, + "step": 9302500 + }, + { + "epoch": 26.93, + "learning_rate": 3.6540684918025195e-05, + "loss": 2.1562, + "step": 9303000 + }, + { + "epoch": 26.93, + "learning_rate": 3.6539961270377924e-05, + "loss": 2.1473, + "step": 9303500 + }, + { + "epoch": 26.93, + "learning_rate": 3.6539237622730646e-05, + "loss": 2.1421, + "step": 9304000 + }, + { + "epoch": 26.93, + "learning_rate": 3.6538516869673955e-05, + "loss": 2.16, + "step": 9304500 + }, + { + "epoch": 26.93, + "learning_rate": 3.653779322202668e-05, + "loss": 2.1664, + "step": 9305000 + }, + { + "epoch": 26.94, + "learning_rate": 3.65370695743794e-05, + "loss": 2.1723, + "step": 9305500 + }, + { + "epoch": 26.94, + "learning_rate": 3.653634592673212e-05, + "loss": 2.1505, + "step": 9306000 + }, + { + "epoch": 26.94, + "learning_rate": 3.6535622279084844e-05, + "loss": 2.124, + "step": 9306500 + }, + { + "epoch": 26.94, + "learning_rate": 3.653489863143757e-05, + "loss": 2.1374, + "step": 9307000 + }, + { + "epoch": 26.94, + "learning_rate": 3.6534174983790295e-05, + "loss": 2.1466, + "step": 9307500 + }, + { + "epoch": 26.94, + "learning_rate": 3.653345133614302e-05, + "loss": 2.1059, + "step": 9308000 + }, + { + "epoch": 26.94, + "learning_rate": 3.6532727688495746e-05, + "loss": 2.1284, + "step": 9308500 + }, + { + "epoch": 26.95, + "learning_rate": 3.653200404084847e-05, + "loss": 2.1206, + "step": 9309000 + }, + { + "epoch": 26.95, + "learning_rate": 3.653128039320119e-05, + "loss": 2.1431, + "step": 9309500 + }, + { + "epoch": 26.95, + "learning_rate": 3.653055674555391e-05, + "loss": 2.1342, + "step": 9310000 + }, + { + "epoch": 26.95, + "learning_rate": 3.6529833097906635e-05, + "loss": 2.1258, + "step": 9310500 + }, + { + "epoch": 26.95, + "learning_rate": 3.652910945025936e-05, + "loss": 2.1348, + "step": 9311000 + }, + { + "epoch": 26.95, + "learning_rate": 3.652838580261208e-05, + "loss": 2.1432, + "step": 9311500 + }, + { + "epoch": 26.95, + "learning_rate": 3.65276621549648e-05, + "loss": 2.1449, + "step": 9312000 + }, + { + "epoch": 26.96, + "learning_rate": 3.6526939954612824e-05, + "loss": 2.1341, + "step": 9312500 + }, + { + "epoch": 26.96, + "learning_rate": 3.6526216306965546e-05, + "loss": 2.1419, + "step": 9313000 + }, + { + "epoch": 26.96, + "learning_rate": 3.652549265931827e-05, + "loss": 2.1376, + "step": 9313500 + }, + { + "epoch": 26.96, + "learning_rate": 3.6524770458966284e-05, + "loss": 2.1499, + "step": 9314000 + }, + { + "epoch": 26.96, + "learning_rate": 3.6524046811319006e-05, + "loss": 2.1365, + "step": 9314500 + }, + { + "epoch": 26.96, + "learning_rate": 3.652332316367173e-05, + "loss": 2.1465, + "step": 9315000 + }, + { + "epoch": 26.96, + "learning_rate": 3.6522600963319744e-05, + "loss": 2.1642, + "step": 9315500 + }, + { + "epoch": 26.97, + "learning_rate": 3.652187731567247e-05, + "loss": 2.1571, + "step": 9316000 + }, + { + "epoch": 26.97, + "learning_rate": 3.6521153668025195e-05, + "loss": 2.1414, + "step": 9316500 + }, + { + "epoch": 26.97, + "learning_rate": 3.6520430020377924e-05, + "loss": 2.1382, + "step": 9317000 + }, + { + "epoch": 26.97, + "learning_rate": 3.6519706372730647e-05, + "loss": 2.1122, + "step": 9317500 + }, + { + "epoch": 26.97, + "learning_rate": 3.651898272508337e-05, + "loss": 2.1298, + "step": 9318000 + }, + { + "epoch": 26.97, + "learning_rate": 3.651825907743609e-05, + "loss": 2.152, + "step": 9318500 + }, + { + "epoch": 26.97, + "learning_rate": 3.651753542978881e-05, + "loss": 2.1294, + "step": 9319000 + }, + { + "epoch": 26.98, + "learning_rate": 3.6516811782141535e-05, + "loss": 2.1448, + "step": 9319500 + }, + { + "epoch": 26.98, + "learning_rate": 3.651608813449426e-05, + "loss": 2.1297, + "step": 9320000 + }, + { + "epoch": 26.98, + "learning_rate": 3.651536448684698e-05, + "loss": 2.1291, + "step": 9320500 + }, + { + "epoch": 26.98, + "learning_rate": 3.65146408391997e-05, + "loss": 2.1351, + "step": 9321000 + }, + { + "epoch": 26.98, + "learning_rate": 3.6513917191552424e-05, + "loss": 2.1276, + "step": 9321500 + }, + { + "epoch": 26.98, + "learning_rate": 3.651319354390515e-05, + "loss": 2.1426, + "step": 9322000 + }, + { + "epoch": 26.98, + "learning_rate": 3.651247134355317e-05, + "loss": 2.1344, + "step": 9322500 + }, + { + "epoch": 26.99, + "learning_rate": 3.6511749143201185e-05, + "loss": 2.1516, + "step": 9323000 + }, + { + "epoch": 26.99, + "learning_rate": 3.6511025495553914e-05, + "loss": 2.1658, + "step": 9323500 + }, + { + "epoch": 26.99, + "learning_rate": 3.6510301847906636e-05, + "loss": 2.1379, + "step": 9324000 + }, + { + "epoch": 26.99, + "learning_rate": 3.650957820025936e-05, + "loss": 2.1527, + "step": 9324500 + }, + { + "epoch": 26.99, + "learning_rate": 3.650885455261208e-05, + "loss": 2.11, + "step": 9325000 + }, + { + "epoch": 26.99, + "learning_rate": 3.65081309049648e-05, + "loss": 2.1295, + "step": 9325500 + }, + { + "epoch": 26.99, + "learning_rate": 3.6507407257317525e-05, + "loss": 2.1344, + "step": 9326000 + }, + { + "epoch": 27.0, + "learning_rate": 3.650668360967025e-05, + "loss": 2.1454, + "step": 9326500 + }, + { + "epoch": 27.0, + "learning_rate": 3.6505959962022976e-05, + "loss": 2.1264, + "step": 9327000 + }, + { + "epoch": 27.0, + "learning_rate": 3.650523776167099e-05, + "loss": 2.1357, + "step": 9327500 + }, + { + "epoch": 27.0, + "eval_accuracy": 0.6654050335667157, + "eval_accuracy_mlm": 0.6298091509434769, + "eval_accuracy_nsp": 0.8562923650162929, + "eval_loss": 2.195469617843628, + "eval_runtime": 332.0987, + "eval_samples_per_second": 1314.025, + "eval_steps_per_second": 54.752, + "step": 9327744 + }, + { + "epoch": 27.0, + "learning_rate": 3.6504514114023714e-05, + "loss": 2.1241, + "step": 9328000 + }, + { + "epoch": 27.0, + "learning_rate": 3.6503790466376436e-05, + "loss": 2.1168, + "step": 9328500 + }, + { + "epoch": 27.0, + "learning_rate": 3.650306681872916e-05, + "loss": 2.1252, + "step": 9329000 + }, + { + "epoch": 27.01, + "learning_rate": 3.6502344618377174e-05, + "loss": 2.1047, + "step": 9329500 + }, + { + "epoch": 27.01, + "learning_rate": 3.6501622418025196e-05, + "loss": 2.1268, + "step": 9330000 + }, + { + "epoch": 27.01, + "learning_rate": 3.650089877037792e-05, + "loss": 2.1355, + "step": 9330500 + }, + { + "epoch": 27.01, + "learning_rate": 3.650017512273065e-05, + "loss": 2.112, + "step": 9331000 + }, + { + "epoch": 27.01, + "learning_rate": 3.649945147508337e-05, + "loss": 2.1301, + "step": 9331500 + }, + { + "epoch": 27.01, + "learning_rate": 3.649872782743609e-05, + "loss": 2.1215, + "step": 9332000 + }, + { + "epoch": 27.01, + "learning_rate": 3.6498004179788814e-05, + "loss": 2.1114, + "step": 9332500 + }, + { + "epoch": 27.02, + "learning_rate": 3.6497280532141536e-05, + "loss": 2.127, + "step": 9333000 + }, + { + "epoch": 27.02, + "learning_rate": 3.649655688449426e-05, + "loss": 2.107, + "step": 9333500 + }, + { + "epoch": 27.02, + "learning_rate": 3.649583323684698e-05, + "loss": 2.1007, + "step": 9334000 + }, + { + "epoch": 27.02, + "learning_rate": 3.64951095891997e-05, + "loss": 2.1159, + "step": 9334500 + }, + { + "epoch": 27.02, + "learning_rate": 3.6494385941552425e-05, + "loss": 2.1118, + "step": 9335000 + }, + { + "epoch": 27.02, + "learning_rate": 3.6493662293905154e-05, + "loss": 2.1256, + "step": 9335500 + }, + { + "epoch": 27.02, + "learning_rate": 3.6492938646257876e-05, + "loss": 2.1073, + "step": 9336000 + }, + { + "epoch": 27.03, + "learning_rate": 3.649221644590589e-05, + "loss": 2.0944, + "step": 9336500 + }, + { + "epoch": 27.03, + "learning_rate": 3.6491492798258614e-05, + "loss": 2.0876, + "step": 9337000 + }, + { + "epoch": 27.03, + "learning_rate": 3.6490769150611336e-05, + "loss": 2.1152, + "step": 9337500 + }, + { + "epoch": 27.03, + "learning_rate": 3.649004695025935e-05, + "loss": 2.1064, + "step": 9338000 + }, + { + "epoch": 27.03, + "learning_rate": 3.648932330261208e-05, + "loss": 2.1165, + "step": 9338500 + }, + { + "epoch": 27.03, + "learning_rate": 3.64885996549648e-05, + "loss": 2.1109, + "step": 9339000 + }, + { + "epoch": 27.03, + "learning_rate": 3.6487876007317526e-05, + "loss": 2.1144, + "step": 9339500 + }, + { + "epoch": 27.04, + "learning_rate": 3.6487152359670255e-05, + "loss": 2.1415, + "step": 9340000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648642871202298e-05, + "loss": 2.1132, + "step": 9340500 + }, + { + "epoch": 27.04, + "learning_rate": 3.64857050643757e-05, + "loss": 2.1361, + "step": 9341000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648498141672842e-05, + "loss": 2.1143, + "step": 9341500 + }, + { + "epoch": 27.04, + "learning_rate": 3.648425921637644e-05, + "loss": 2.1237, + "step": 9342000 + }, + { + "epoch": 27.04, + "learning_rate": 3.648353556872916e-05, + "loss": 2.1159, + "step": 9342500 + }, + { + "epoch": 27.04, + "learning_rate": 3.648281192108188e-05, + "loss": 2.1212, + "step": 9343000 + }, + { + "epoch": 27.05, + "learning_rate": 3.6482089720729904e-05, + "loss": 2.1147, + "step": 9343500 + }, + { + "epoch": 27.05, + "learning_rate": 3.6481366073082626e-05, + "loss": 2.0983, + "step": 9344000 + }, + { + "epoch": 27.05, + "learning_rate": 3.648064242543535e-05, + "loss": 2.1101, + "step": 9344500 + }, + { + "epoch": 27.05, + "learning_rate": 3.647991877778807e-05, + "loss": 2.1243, + "step": 9345000 + }, + { + "epoch": 27.05, + "learning_rate": 3.647919513014079e-05, + "loss": 2.12, + "step": 9345500 + }, + { + "epoch": 27.05, + "learning_rate": 3.64784758243794e-05, + "loss": 2.1144, + "step": 9346000 + }, + { + "epoch": 27.05, + "learning_rate": 3.647775217673213e-05, + "loss": 2.1069, + "step": 9346500 + }, + { + "epoch": 27.06, + "learning_rate": 3.647702852908485e-05, + "loss": 2.1268, + "step": 9347000 + }, + { + "epoch": 27.06, + "learning_rate": 3.6476304881437575e-05, + "loss": 2.141, + "step": 9347500 + }, + { + "epoch": 27.06, + "learning_rate": 3.64755812337903e-05, + "loss": 2.1165, + "step": 9348000 + }, + { + "epoch": 27.06, + "learning_rate": 3.647485758614302e-05, + "loss": 2.1145, + "step": 9348500 + }, + { + "epoch": 27.06, + "learning_rate": 3.647413393849574e-05, + "loss": 2.1054, + "step": 9349000 + }, + { + "epoch": 27.06, + "learning_rate": 3.6473410290848464e-05, + "loss": 2.1317, + "step": 9349500 + }, + { + "epoch": 27.06, + "learning_rate": 3.6472686643201186e-05, + "loss": 2.1535, + "step": 9350000 + }, + { + "epoch": 27.07, + "learning_rate": 3.647196299555391e-05, + "loss": 2.1028, + "step": 9350500 + }, + { + "epoch": 27.07, + "learning_rate": 3.647123934790663e-05, + "loss": 2.1305, + "step": 9351000 + }, + { + "epoch": 27.07, + "learning_rate": 3.647051570025935e-05, + "loss": 2.1478, + "step": 9351500 + }, + { + "epoch": 27.07, + "learning_rate": 3.6469792052612075e-05, + "loss": 2.1249, + "step": 9352000 + }, + { + "epoch": 27.07, + "learning_rate": 3.6469068404964804e-05, + "loss": 2.1451, + "step": 9352500 + }, + { + "epoch": 27.07, + "learning_rate": 3.6468344757317526e-05, + "loss": 2.1325, + "step": 9353000 + }, + { + "epoch": 27.07, + "learning_rate": 3.6467621109670255e-05, + "loss": 2.1186, + "step": 9353500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646689890931827e-05, + "loss": 2.1638, + "step": 9354000 + }, + { + "epoch": 27.08, + "learning_rate": 3.646617526167099e-05, + "loss": 2.1217, + "step": 9354500 + }, + { + "epoch": 27.08, + "learning_rate": 3.6465451614023715e-05, + "loss": 2.1206, + "step": 9355000 + }, + { + "epoch": 27.08, + "learning_rate": 3.646472796637644e-05, + "loss": 2.1012, + "step": 9355500 + }, + { + "epoch": 27.08, + "learning_rate": 3.646400576602445e-05, + "loss": 2.1042, + "step": 9356000 + }, + { + "epoch": 27.08, + "learning_rate": 3.6463283565672475e-05, + "loss": 2.1169, + "step": 9356500 + }, + { + "epoch": 27.08, + "learning_rate": 3.64625599180252e-05, + "loss": 2.1303, + "step": 9357000 + }, + { + "epoch": 27.09, + "learning_rate": 3.646183627037792e-05, + "loss": 2.0985, + "step": 9357500 + }, + { + "epoch": 27.09, + "learning_rate": 3.646111262273064e-05, + "loss": 2.1195, + "step": 9358000 + }, + { + "epoch": 27.09, + "learning_rate": 3.6460388975083364e-05, + "loss": 2.1092, + "step": 9358500 + }, + { + "epoch": 27.09, + "learning_rate": 3.645966532743609e-05, + "loss": 2.1248, + "step": 9359000 + }, + { + "epoch": 27.09, + "learning_rate": 3.645894167978881e-05, + "loss": 2.1193, + "step": 9359500 + }, + { + "epoch": 27.09, + "learning_rate": 3.645821803214153e-05, + "loss": 2.1186, + "step": 9360000 + }, + { + "epoch": 27.09, + "learning_rate": 3.645749438449425e-05, + "loss": 2.1365, + "step": 9360500 + }, + { + "epoch": 27.1, + "learning_rate": 3.645677073684698e-05, + "loss": 2.1207, + "step": 9361000 + }, + { + "epoch": 27.1, + "learning_rate": 3.6456048536495005e-05, + "loss": 2.1427, + "step": 9361500 + }, + { + "epoch": 27.1, + "learning_rate": 3.645532488884773e-05, + "loss": 2.1196, + "step": 9362000 + }, + { + "epoch": 27.1, + "learning_rate": 3.645460124120045e-05, + "loss": 2.1274, + "step": 9362500 + }, + { + "epoch": 27.1, + "learning_rate": 3.645387759355317e-05, + "loss": 2.0993, + "step": 9363000 + }, + { + "epoch": 27.1, + "learning_rate": 3.6453153945905894e-05, + "loss": 2.1174, + "step": 9363500 + }, + { + "epoch": 27.1, + "learning_rate": 3.6452430298258616e-05, + "loss": 2.1102, + "step": 9364000 + }, + { + "epoch": 27.11, + "learning_rate": 3.645170665061134e-05, + "loss": 2.1362, + "step": 9364500 + }, + { + "epoch": 27.11, + "learning_rate": 3.6450984450259354e-05, + "loss": 2.1262, + "step": 9365000 + }, + { + "epoch": 27.11, + "learning_rate": 3.6450262249907376e-05, + "loss": 2.1112, + "step": 9365500 + }, + { + "epoch": 27.11, + "learning_rate": 3.64495386022601e-05, + "loss": 2.1292, + "step": 9366000 + }, + { + "epoch": 27.11, + "learning_rate": 3.644881495461282e-05, + "loss": 2.0959, + "step": 9366500 + }, + { + "epoch": 27.11, + "learning_rate": 3.644809130696554e-05, + "loss": 2.1159, + "step": 9367000 + }, + { + "epoch": 27.12, + "learning_rate": 3.6447367659318265e-05, + "loss": 2.126, + "step": 9367500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644664401167099e-05, + "loss": 2.1283, + "step": 9368000 + }, + { + "epoch": 27.12, + "learning_rate": 3.6445920364023716e-05, + "loss": 2.1089, + "step": 9368500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644519671637644e-05, + "loss": 2.1168, + "step": 9369000 + }, + { + "epoch": 27.12, + "learning_rate": 3.644447306872916e-05, + "loss": 2.1102, + "step": 9369500 + }, + { + "epoch": 27.12, + "learning_rate": 3.644375086837718e-05, + "loss": 2.1243, + "step": 9370000 + }, + { + "epoch": 27.12, + "learning_rate": 3.6443027220729905e-05, + "loss": 2.1364, + "step": 9370500 + }, + { + "epoch": 27.13, + "learning_rate": 3.644230502037792e-05, + "loss": 2.1184, + "step": 9371000 + }, + { + "epoch": 27.13, + "learning_rate": 3.644158137273064e-05, + "loss": 2.133, + "step": 9371500 + }, + { + "epoch": 27.13, + "learning_rate": 3.6440857725083365e-05, + "loss": 2.1167, + "step": 9372000 + }, + { + "epoch": 27.13, + "learning_rate": 3.644013407743609e-05, + "loss": 2.1125, + "step": 9372500 + }, + { + "epoch": 27.13, + "learning_rate": 3.643941042978881e-05, + "loss": 2.1, + "step": 9373000 + }, + { + "epoch": 27.13, + "learning_rate": 3.643868678214153e-05, + "loss": 2.1185, + "step": 9373500 + }, + { + "epoch": 27.13, + "learning_rate": 3.6437963134494254e-05, + "loss": 2.1121, + "step": 9374000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643723948684698e-05, + "loss": 2.112, + "step": 9374500 + }, + { + "epoch": 27.14, + "learning_rate": 3.6436515839199705e-05, + "loss": 2.1383, + "step": 9375000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643579219155243e-05, + "loss": 2.1072, + "step": 9375500 + }, + { + "epoch": 27.14, + "learning_rate": 3.643506854390516e-05, + "loss": 2.1162, + "step": 9376000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643434489625788e-05, + "loss": 2.1288, + "step": 9376500 + }, + { + "epoch": 27.14, + "learning_rate": 3.64336212486106e-05, + "loss": 2.1308, + "step": 9377000 + }, + { + "epoch": 27.14, + "learning_rate": 3.643289760096332e-05, + "loss": 2.1348, + "step": 9377500 + }, + { + "epoch": 27.15, + "learning_rate": 3.643217540061134e-05, + "loss": 2.107, + "step": 9378000 + }, + { + "epoch": 27.15, + "learning_rate": 3.643145175296406e-05, + "loss": 2.1144, + "step": 9378500 + }, + { + "epoch": 27.15, + "learning_rate": 3.6430729552612083e-05, + "loss": 2.1105, + "step": 9379000 + }, + { + "epoch": 27.15, + "learning_rate": 3.6430005904964806e-05, + "loss": 2.1332, + "step": 9379500 + }, + { + "epoch": 27.15, + "learning_rate": 3.642928225731753e-05, + "loss": 2.111, + "step": 9380000 + }, + { + "epoch": 27.15, + "learning_rate": 3.642855860967025e-05, + "loss": 2.1396, + "step": 9380500 + }, + { + "epoch": 27.15, + "learning_rate": 3.642783496202297e-05, + "loss": 2.1539, + "step": 9381000 + }, + { + "epoch": 27.16, + "learning_rate": 3.6427111314375695e-05, + "loss": 2.1049, + "step": 9381500 + }, + { + "epoch": 27.16, + "learning_rate": 3.642638766672842e-05, + "loss": 2.1525, + "step": 9382000 + }, + { + "epoch": 27.16, + "learning_rate": 3.642566401908114e-05, + "loss": 2.116, + "step": 9382500 + }, + { + "epoch": 27.16, + "learning_rate": 3.6424941818729155e-05, + "loss": 2.1464, + "step": 9383000 + }, + { + "epoch": 27.16, + "learning_rate": 3.6424218171081884e-05, + "loss": 2.1183, + "step": 9383500 + }, + { + "epoch": 27.16, + "learning_rate": 3.6423495970729906e-05, + "loss": 2.1141, + "step": 9384000 + }, + { + "epoch": 27.16, + "learning_rate": 3.642277232308263e-05, + "loss": 2.1176, + "step": 9384500 + }, + { + "epoch": 27.17, + "learning_rate": 3.642204867543535e-05, + "loss": 2.1113, + "step": 9385000 + }, + { + "epoch": 27.17, + "learning_rate": 3.642132502778807e-05, + "loss": 2.1593, + "step": 9385500 + }, + { + "epoch": 27.17, + "learning_rate": 3.6420601380140795e-05, + "loss": 2.134, + "step": 9386000 + }, + { + "epoch": 27.17, + "learning_rate": 3.641987773249352e-05, + "loss": 2.0956, + "step": 9386500 + }, + { + "epoch": 27.17, + "learning_rate": 3.641915553214153e-05, + "loss": 2.1253, + "step": 9387000 + }, + { + "epoch": 27.17, + "learning_rate": 3.641843188449426e-05, + "loss": 2.1252, + "step": 9387500 + }, + { + "epoch": 27.17, + "learning_rate": 3.6417708236846984e-05, + "loss": 2.1253, + "step": 9388000 + }, + { + "epoch": 27.18, + "learning_rate": 3.6416986036495e-05, + "loss": 2.1338, + "step": 9388500 + }, + { + "epoch": 27.18, + "learning_rate": 3.6416263836143015e-05, + "loss": 2.1018, + "step": 9389000 + }, + { + "epoch": 27.18, + "learning_rate": 3.641554018849574e-05, + "loss": 2.1206, + "step": 9389500 + }, + { + "epoch": 27.18, + "learning_rate": 3.641481654084846e-05, + "loss": 2.1274, + "step": 9390000 + }, + { + "epoch": 27.18, + "learning_rate": 3.641409289320118e-05, + "loss": 2.1407, + "step": 9390500 + }, + { + "epoch": 27.18, + "learning_rate": 3.641336924555391e-05, + "loss": 2.1033, + "step": 9391000 + }, + { + "epoch": 27.18, + "learning_rate": 3.641264559790663e-05, + "loss": 2.1043, + "step": 9391500 + }, + { + "epoch": 27.19, + "learning_rate": 3.641192195025936e-05, + "loss": 2.1227, + "step": 9392000 + }, + { + "epoch": 27.19, + "learning_rate": 3.6411198302612084e-05, + "loss": 2.1133, + "step": 9392500 + }, + { + "epoch": 27.19, + "learning_rate": 3.6410474654964806e-05, + "loss": 2.1042, + "step": 9393000 + }, + { + "epoch": 27.19, + "learning_rate": 3.640975100731753e-05, + "loss": 2.1152, + "step": 9393500 + }, + { + "epoch": 27.19, + "learning_rate": 3.640902735967025e-05, + "loss": 2.1534, + "step": 9394000 + }, + { + "epoch": 27.19, + "learning_rate": 3.640830371202297e-05, + "loss": 2.1233, + "step": 9394500 + }, + { + "epoch": 27.19, + "learning_rate": 3.640758151167099e-05, + "loss": 2.1142, + "step": 9395000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640685786402371e-05, + "loss": 2.137, + "step": 9395500 + }, + { + "epoch": 27.2, + "learning_rate": 3.640613421637643e-05, + "loss": 2.1133, + "step": 9396000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640541056872916e-05, + "loss": 2.1185, + "step": 9396500 + }, + { + "epoch": 27.2, + "learning_rate": 3.6404686921081884e-05, + "loss": 2.1367, + "step": 9397000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640396327343461e-05, + "loss": 2.1468, + "step": 9397500 + }, + { + "epoch": 27.2, + "learning_rate": 3.6403239625787336e-05, + "loss": 2.1351, + "step": 9398000 + }, + { + "epoch": 27.2, + "learning_rate": 3.640251597814006e-05, + "loss": 2.1358, + "step": 9398500 + }, + { + "epoch": 27.21, + "learning_rate": 3.640179233049278e-05, + "loss": 2.1508, + "step": 9399000 + }, + { + "epoch": 27.21, + "learning_rate": 3.64010686828455e-05, + "loss": 2.112, + "step": 9399500 + }, + { + "epoch": 27.21, + "learning_rate": 3.6400345035198225e-05, + "loss": 2.1166, + "step": 9400000 + }, + { + "epoch": 27.21, + "learning_rate": 3.639962138755095e-05, + "loss": 2.1465, + "step": 9400500 + }, + { + "epoch": 27.21, + "learning_rate": 3.639889773990367e-05, + "loss": 2.1014, + "step": 9401000 + }, + { + "epoch": 27.21, + "learning_rate": 3.639817409225639e-05, + "loss": 2.1333, + "step": 9401500 + }, + { + "epoch": 27.21, + "learning_rate": 3.6397450444609114e-05, + "loss": 2.1416, + "step": 9402000 + }, + { + "epoch": 27.22, + "learning_rate": 3.6396728244257136e-05, + "loss": 2.1028, + "step": 9402500 + }, + { + "epoch": 27.22, + "learning_rate": 3.639600459660986e-05, + "loss": 2.136, + "step": 9403000 + }, + { + "epoch": 27.22, + "learning_rate": 3.639528094896258e-05, + "loss": 2.1353, + "step": 9403500 + }, + { + "epoch": 27.22, + "learning_rate": 3.63945573013153e-05, + "loss": 2.1107, + "step": 9404000 + }, + { + "epoch": 27.22, + "learning_rate": 3.6393833653668025e-05, + "loss": 2.0979, + "step": 9404500 + }, + { + "epoch": 27.22, + "learning_rate": 3.639311145331604e-05, + "loss": 2.1314, + "step": 9405000 + }, + { + "epoch": 27.23, + "learning_rate": 3.639238780566877e-05, + "loss": 2.1138, + "step": 9405500 + }, + { + "epoch": 27.23, + "learning_rate": 3.6391665605316785e-05, + "loss": 2.1193, + "step": 9406000 + }, + { + "epoch": 27.23, + "learning_rate": 3.6390941957669514e-05, + "loss": 2.1203, + "step": 9406500 + }, + { + "epoch": 27.23, + "learning_rate": 3.6390218310022236e-05, + "loss": 2.1295, + "step": 9407000 + }, + { + "epoch": 27.23, + "learning_rate": 3.638949466237496e-05, + "loss": 2.1479, + "step": 9407500 + }, + { + "epoch": 27.23, + "learning_rate": 3.638877101472768e-05, + "loss": 2.1429, + "step": 9408000 + }, + { + "epoch": 27.23, + "learning_rate": 3.6388048814375696e-05, + "loss": 2.1206, + "step": 9408500 + }, + { + "epoch": 27.24, + "learning_rate": 3.638732516672842e-05, + "loss": 2.1061, + "step": 9409000 + }, + { + "epoch": 27.24, + "learning_rate": 3.638660151908114e-05, + "loss": 2.1364, + "step": 9409500 + }, + { + "epoch": 27.24, + "learning_rate": 3.638587787143386e-05, + "loss": 2.1165, + "step": 9410000 + }, + { + "epoch": 27.24, + "learning_rate": 3.6385154223786585e-05, + "loss": 2.1244, + "step": 9410500 + }, + { + "epoch": 27.24, + "learning_rate": 3.638443202343461e-05, + "loss": 2.1306, + "step": 9411000 + }, + { + "epoch": 27.24, + "learning_rate": 3.638370982308262e-05, + "loss": 2.1197, + "step": 9411500 + }, + { + "epoch": 27.24, + "learning_rate": 3.6382986175435345e-05, + "loss": 2.1139, + "step": 9412000 + }, + { + "epoch": 27.25, + "learning_rate": 3.638226252778807e-05, + "loss": 2.1179, + "step": 9412500 + }, + { + "epoch": 27.25, + "learning_rate": 3.638153888014079e-05, + "loss": 2.1289, + "step": 9413000 + }, + { + "epoch": 27.25, + "learning_rate": 3.638081523249352e-05, + "loss": 2.1565, + "step": 9413500 + }, + { + "epoch": 27.25, + "learning_rate": 3.638009303214154e-05, + "loss": 2.117, + "step": 9414000 + }, + { + "epoch": 27.25, + "learning_rate": 3.637936938449426e-05, + "loss": 2.1167, + "step": 9414500 + }, + { + "epoch": 27.25, + "learning_rate": 3.6378645736846986e-05, + "loss": 2.1219, + "step": 9415000 + }, + { + "epoch": 27.25, + "learning_rate": 3.637792208919971e-05, + "loss": 2.1162, + "step": 9415500 + }, + { + "epoch": 27.26, + "learning_rate": 3.637719844155243e-05, + "loss": 2.1259, + "step": 9416000 + }, + { + "epoch": 27.26, + "learning_rate": 3.637647479390515e-05, + "loss": 2.1318, + "step": 9416500 + }, + { + "epoch": 27.26, + "learning_rate": 3.6375751146257874e-05, + "loss": 2.1223, + "step": 9417000 + }, + { + "epoch": 27.26, + "learning_rate": 3.63750274986106e-05, + "loss": 2.1176, + "step": 9417500 + }, + { + "epoch": 27.26, + "learning_rate": 3.637430385096332e-05, + "loss": 2.1136, + "step": 9418000 + }, + { + "epoch": 27.26, + "learning_rate": 3.637358020331604e-05, + "loss": 2.1365, + "step": 9418500 + }, + { + "epoch": 27.26, + "learning_rate": 3.6372856555668763e-05, + "loss": 2.1179, + "step": 9419000 + }, + { + "epoch": 27.27, + "learning_rate": 3.6372132908021486e-05, + "loss": 2.1056, + "step": 9419500 + }, + { + "epoch": 27.27, + "learning_rate": 3.6371409260374215e-05, + "loss": 2.1146, + "step": 9420000 + }, + { + "epoch": 27.27, + "learning_rate": 3.637068561272694e-05, + "loss": 2.1341, + "step": 9420500 + }, + { + "epoch": 27.27, + "learning_rate": 3.636996341237496e-05, + "loss": 2.1421, + "step": 9421000 + }, + { + "epoch": 27.27, + "learning_rate": 3.636923976472768e-05, + "loss": 2.1355, + "step": 9421500 + }, + { + "epoch": 27.27, + "learning_rate": 3.6368516117080404e-05, + "loss": 2.1241, + "step": 9422000 + }, + { + "epoch": 27.27, + "learning_rate": 3.6367792469433126e-05, + "loss": 2.1196, + "step": 9422500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636706882178585e-05, + "loss": 2.1396, + "step": 9423000 + }, + { + "epoch": 27.28, + "learning_rate": 3.6366346621433864e-05, + "loss": 2.1212, + "step": 9423500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636562297378659e-05, + "loss": 2.1376, + "step": 9424000 + }, + { + "epoch": 27.28, + "learning_rate": 3.6364899326139315e-05, + "loss": 2.124, + "step": 9424500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636417567849204e-05, + "loss": 2.1347, + "step": 9425000 + }, + { + "epoch": 27.28, + "learning_rate": 3.636345203084476e-05, + "loss": 2.1274, + "step": 9425500 + }, + { + "epoch": 27.28, + "learning_rate": 3.636272838319748e-05, + "loss": 2.1162, + "step": 9426000 + }, + { + "epoch": 27.29, + "learning_rate": 3.6362004735550204e-05, + "loss": 2.1105, + "step": 9426500 + }, + { + "epoch": 27.29, + "learning_rate": 3.636128253519822e-05, + "loss": 2.1147, + "step": 9427000 + }, + { + "epoch": 27.29, + "learning_rate": 3.636056033484624e-05, + "loss": 2.1417, + "step": 9427500 + }, + { + "epoch": 27.29, + "learning_rate": 3.6359836687198964e-05, + "loss": 2.1242, + "step": 9428000 + }, + { + "epoch": 27.29, + "learning_rate": 3.635911303955169e-05, + "loss": 2.1308, + "step": 9428500 + }, + { + "epoch": 27.29, + "learning_rate": 3.6358389391904415e-05, + "loss": 2.1121, + "step": 9429000 + }, + { + "epoch": 27.29, + "learning_rate": 3.635766574425714e-05, + "loss": 2.141, + "step": 9429500 + }, + { + "epoch": 27.3, + "learning_rate": 3.635694209660986e-05, + "loss": 2.1219, + "step": 9430000 + }, + { + "epoch": 27.3, + "learning_rate": 3.635621844896258e-05, + "loss": 2.1323, + "step": 9430500 + }, + { + "epoch": 27.3, + "learning_rate": 3.6355494801315304e-05, + "loss": 2.1395, + "step": 9431000 + }, + { + "epoch": 27.3, + "learning_rate": 3.6354771153668026e-05, + "loss": 2.1123, + "step": 9431500 + }, + { + "epoch": 27.3, + "learning_rate": 3.635404750602075e-05, + "loss": 2.1056, + "step": 9432000 + }, + { + "epoch": 27.3, + "learning_rate": 3.635332385837347e-05, + "loss": 2.1416, + "step": 9432500 + }, + { + "epoch": 27.3, + "learning_rate": 3.635260165802149e-05, + "loss": 2.1166, + "step": 9433000 + }, + { + "epoch": 27.31, + "learning_rate": 3.635187945766951e-05, + "loss": 2.1069, + "step": 9433500 + }, + { + "epoch": 27.31, + "learning_rate": 3.635115581002223e-05, + "loss": 2.1161, + "step": 9434000 + }, + { + "epoch": 27.31, + "learning_rate": 3.635043216237495e-05, + "loss": 2.1283, + "step": 9434500 + }, + { + "epoch": 27.31, + "learning_rate": 3.6349708514727675e-05, + "loss": 2.1177, + "step": 9435000 + }, + { + "epoch": 27.31, + "learning_rate": 3.6348984867080404e-05, + "loss": 2.1167, + "step": 9435500 + }, + { + "epoch": 27.31, + "learning_rate": 3.634826121943313e-05, + "loss": 2.1189, + "step": 9436000 + }, + { + "epoch": 27.31, + "learning_rate": 3.634753757178585e-05, + "loss": 2.135, + "step": 9436500 + }, + { + "epoch": 27.32, + "learning_rate": 3.634681392413857e-05, + "loss": 2.1005, + "step": 9437000 + }, + { + "epoch": 27.32, + "learning_rate": 3.6346090276491293e-05, + "loss": 2.144, + "step": 9437500 + }, + { + "epoch": 27.32, + "learning_rate": 3.6345366628844016e-05, + "loss": 2.1205, + "step": 9438000 + }, + { + "epoch": 27.32, + "learning_rate": 3.634464587578733e-05, + "loss": 2.1247, + "step": 9438500 + }, + { + "epoch": 27.32, + "learning_rate": 3.6343922228140054e-05, + "loss": 2.1106, + "step": 9439000 + }, + { + "epoch": 27.32, + "learning_rate": 3.6343198580492776e-05, + "loss": 2.1286, + "step": 9439500 + }, + { + "epoch": 27.32, + "learning_rate": 3.63424749328455e-05, + "loss": 2.131, + "step": 9440000 + }, + { + "epoch": 27.33, + "learning_rate": 3.634175128519822e-05, + "loss": 2.1254, + "step": 9440500 + }, + { + "epoch": 27.33, + "learning_rate": 3.634102763755094e-05, + "loss": 2.1117, + "step": 9441000 + }, + { + "epoch": 27.33, + "learning_rate": 3.6340303989903665e-05, + "loss": 2.1368, + "step": 9441500 + }, + { + "epoch": 27.33, + "learning_rate": 3.6339580342256394e-05, + "loss": 2.1311, + "step": 9442000 + }, + { + "epoch": 27.33, + "learning_rate": 3.633885814190441e-05, + "loss": 2.1257, + "step": 9442500 + }, + { + "epoch": 27.33, + "learning_rate": 3.633813449425714e-05, + "loss": 2.1567, + "step": 9443000 + }, + { + "epoch": 27.34, + "learning_rate": 3.633741084660986e-05, + "loss": 2.1161, + "step": 9443500 + }, + { + "epoch": 27.34, + "learning_rate": 3.633668719896258e-05, + "loss": 2.1364, + "step": 9444000 + }, + { + "epoch": 27.34, + "learning_rate": 3.6335963551315305e-05, + "loss": 2.1235, + "step": 9444500 + }, + { + "epoch": 27.34, + "learning_rate": 3.633523990366803e-05, + "loss": 2.1249, + "step": 9445000 + }, + { + "epoch": 27.34, + "learning_rate": 3.633451625602075e-05, + "loss": 2.1353, + "step": 9445500 + }, + { + "epoch": 27.34, + "learning_rate": 3.6333794055668765e-05, + "loss": 2.1088, + "step": 9446000 + }, + { + "epoch": 27.34, + "learning_rate": 3.6333070408021494e-05, + "loss": 2.1205, + "step": 9446500 + }, + { + "epoch": 27.35, + "learning_rate": 3.6332346760374216e-05, + "loss": 2.1349, + "step": 9447000 + }, + { + "epoch": 27.35, + "learning_rate": 3.633162311272694e-05, + "loss": 2.1224, + "step": 9447500 + }, + { + "epoch": 27.35, + "learning_rate": 3.633089946507966e-05, + "loss": 2.1554, + "step": 9448000 + }, + { + "epoch": 27.35, + "learning_rate": 3.633017581743238e-05, + "loss": 2.1129, + "step": 9448500 + }, + { + "epoch": 27.35, + "learning_rate": 3.6329452169785105e-05, + "loss": 2.1289, + "step": 9449000 + }, + { + "epoch": 27.35, + "learning_rate": 3.632872852213783e-05, + "loss": 2.1087, + "step": 9449500 + }, + { + "epoch": 27.35, + "learning_rate": 3.6328004874490556e-05, + "loss": 2.1153, + "step": 9450000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632728122684328e-05, + "loss": 2.1417, + "step": 9450500 + }, + { + "epoch": 27.36, + "learning_rate": 3.6326557579196e-05, + "loss": 2.1387, + "step": 9451000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632583393154872e-05, + "loss": 2.1071, + "step": 9451500 + }, + { + "epoch": 27.36, + "learning_rate": 3.632511317849204e-05, + "loss": 2.1391, + "step": 9452000 + }, + { + "epoch": 27.36, + "learning_rate": 3.632438953084476e-05, + "loss": 2.1154, + "step": 9452500 + }, + { + "epoch": 27.36, + "learning_rate": 3.632366588319748e-05, + "loss": 2.0988, + "step": 9453000 + }, + { + "epoch": 27.36, + "learning_rate": 3.6322942235550205e-05, + "loss": 2.1395, + "step": 9453500 + }, + { + "epoch": 27.37, + "learning_rate": 3.632222003519822e-05, + "loss": 2.1016, + "step": 9454000 + }, + { + "epoch": 27.37, + "learning_rate": 3.632149638755094e-05, + "loss": 2.1325, + "step": 9454500 + }, + { + "epoch": 27.37, + "learning_rate": 3.632077273990367e-05, + "loss": 2.134, + "step": 9455000 + }, + { + "epoch": 27.37, + "learning_rate": 3.632005053955169e-05, + "loss": 2.1332, + "step": 9455500 + }, + { + "epoch": 27.37, + "learning_rate": 3.631932689190441e-05, + "loss": 2.1176, + "step": 9456000 + }, + { + "epoch": 27.37, + "learning_rate": 3.631860324425713e-05, + "loss": 2.1223, + "step": 9456500 + }, + { + "epoch": 27.37, + "learning_rate": 3.6317879596609855e-05, + "loss": 2.144, + "step": 9457000 + }, + { + "epoch": 27.38, + "learning_rate": 3.631715594896258e-05, + "loss": 2.1235, + "step": 9457500 + }, + { + "epoch": 27.38, + "learning_rate": 3.6316432301315306e-05, + "loss": 2.1245, + "step": 9458000 + }, + { + "epoch": 27.38, + "learning_rate": 3.631570865366803e-05, + "loss": 2.104, + "step": 9458500 + }, + { + "epoch": 27.38, + "learning_rate": 3.631498500602075e-05, + "loss": 2.118, + "step": 9459000 + }, + { + "epoch": 27.38, + "learning_rate": 3.631426135837347e-05, + "loss": 2.149, + "step": 9459500 + }, + { + "epoch": 27.38, + "learning_rate": 3.6313537710726195e-05, + "loss": 2.1454, + "step": 9460000 + }, + { + "epoch": 27.38, + "learning_rate": 3.6312814063078924e-05, + "loss": 2.1441, + "step": 9460500 + }, + { + "epoch": 27.39, + "learning_rate": 3.6312090415431646e-05, + "loss": 2.1428, + "step": 9461000 + }, + { + "epoch": 27.39, + "learning_rate": 3.631136676778437e-05, + "loss": 2.1154, + "step": 9461500 + }, + { + "epoch": 27.39, + "learning_rate": 3.631064312013709e-05, + "loss": 2.1374, + "step": 9462000 + }, + { + "epoch": 27.39, + "learning_rate": 3.6309920919785106e-05, + "loss": 2.1253, + "step": 9462500 + }, + { + "epoch": 27.39, + "learning_rate": 3.630919727213783e-05, + "loss": 2.1299, + "step": 9463000 + }, + { + "epoch": 27.39, + "learning_rate": 3.630847362449055e-05, + "loss": 2.1074, + "step": 9463500 + }, + { + "epoch": 27.39, + "learning_rate": 3.630774997684327e-05, + "loss": 2.1154, + "step": 9464000 + }, + { + "epoch": 27.4, + "learning_rate": 3.6307026329195995e-05, + "loss": 2.1189, + "step": 9464500 + }, + { + "epoch": 27.4, + "learning_rate": 3.6306302681548724e-05, + "loss": 2.1306, + "step": 9465000 + }, + { + "epoch": 27.4, + "learning_rate": 3.6305580481196746e-05, + "loss": 2.1135, + "step": 9465500 + }, + { + "epoch": 27.4, + "learning_rate": 3.630485828084476e-05, + "loss": 2.1419, + "step": 9466000 + }, + { + "epoch": 27.4, + "learning_rate": 3.6304134633197484e-05, + "loss": 2.1205, + "step": 9466500 + }, + { + "epoch": 27.4, + "learning_rate": 3.6303410985550206e-05, + "loss": 2.123, + "step": 9467000 + }, + { + "epoch": 27.4, + "learning_rate": 3.630268733790293e-05, + "loss": 2.1353, + "step": 9467500 + }, + { + "epoch": 27.41, + "learning_rate": 3.630196369025565e-05, + "loss": 2.1059, + "step": 9468000 + }, + { + "epoch": 27.41, + "learning_rate": 3.630124004260837e-05, + "loss": 2.1037, + "step": 9468500 + }, + { + "epoch": 27.41, + "learning_rate": 3.6300516394961095e-05, + "loss": 2.1309, + "step": 9469000 + }, + { + "epoch": 27.41, + "learning_rate": 3.629979564190441e-05, + "loss": 2.1089, + "step": 9469500 + }, + { + "epoch": 27.41, + "learning_rate": 3.629907199425713e-05, + "loss": 2.1391, + "step": 9470000 + }, + { + "epoch": 27.41, + "learning_rate": 3.6298348346609855e-05, + "loss": 2.1275, + "step": 9470500 + }, + { + "epoch": 27.41, + "learning_rate": 3.629762469896258e-05, + "loss": 2.1259, + "step": 9471000 + }, + { + "epoch": 27.42, + "learning_rate": 3.62969010513153e-05, + "loss": 2.1009, + "step": 9471500 + }, + { + "epoch": 27.42, + "learning_rate": 3.629617885096332e-05, + "loss": 2.1243, + "step": 9472000 + }, + { + "epoch": 27.42, + "learning_rate": 3.6295455203316044e-05, + "loss": 2.1183, + "step": 9472500 + }, + { + "epoch": 27.42, + "learning_rate": 3.629473155566877e-05, + "loss": 2.1456, + "step": 9473000 + }, + { + "epoch": 27.42, + "learning_rate": 3.6294007908021496e-05, + "loss": 2.1257, + "step": 9473500 + }, + { + "epoch": 27.42, + "learning_rate": 3.629328426037422e-05, + "loss": 2.1111, + "step": 9474000 + }, + { + "epoch": 27.42, + "learning_rate": 3.629256061272694e-05, + "loss": 2.1249, + "step": 9474500 + }, + { + "epoch": 27.43, + "learning_rate": 3.629183696507966e-05, + "loss": 2.1227, + "step": 9475000 + }, + { + "epoch": 27.43, + "learning_rate": 3.629111476472768e-05, + "loss": 2.1397, + "step": 9475500 + }, + { + "epoch": 27.43, + "learning_rate": 3.62903911170804e-05, + "loss": 2.1201, + "step": 9476000 + }, + { + "epoch": 27.43, + "learning_rate": 3.628966746943312e-05, + "loss": 2.1195, + "step": 9476500 + }, + { + "epoch": 27.43, + "learning_rate": 3.628894382178585e-05, + "loss": 2.1175, + "step": 9477000 + }, + { + "epoch": 27.43, + "learning_rate": 3.6288220174138574e-05, + "loss": 2.1034, + "step": 9477500 + }, + { + "epoch": 27.43, + "learning_rate": 3.6287496526491296e-05, + "loss": 2.1252, + "step": 9478000 + }, + { + "epoch": 27.44, + "learning_rate": 3.628677287884402e-05, + "loss": 2.1167, + "step": 9478500 + }, + { + "epoch": 27.44, + "learning_rate": 3.628604923119674e-05, + "loss": 2.1445, + "step": 9479000 + }, + { + "epoch": 27.44, + "learning_rate": 3.628532558354946e-05, + "loss": 2.1118, + "step": 9479500 + }, + { + "epoch": 27.44, + "learning_rate": 3.628460193590219e-05, + "loss": 2.1121, + "step": 9480000 + }, + { + "epoch": 27.44, + "learning_rate": 3.6283878288254914e-05, + "loss": 2.1105, + "step": 9480500 + }, + { + "epoch": 27.44, + "learning_rate": 3.6283154640607636e-05, + "loss": 2.114, + "step": 9481000 + }, + { + "epoch": 27.45, + "learning_rate": 3.628243099296036e-05, + "loss": 2.1317, + "step": 9481500 + }, + { + "epoch": 27.45, + "learning_rate": 3.628170734531308e-05, + "loss": 2.1198, + "step": 9482000 + }, + { + "epoch": 27.45, + "learning_rate": 3.6280985144961096e-05, + "loss": 2.1091, + "step": 9482500 + }, + { + "epoch": 27.45, + "learning_rate": 3.6280261497313825e-05, + "loss": 2.1437, + "step": 9483000 + }, + { + "epoch": 27.45, + "learning_rate": 3.627953929696184e-05, + "loss": 2.1233, + "step": 9483500 + }, + { + "epoch": 27.45, + "learning_rate": 3.627881564931456e-05, + "loss": 2.1414, + "step": 9484000 + }, + { + "epoch": 27.45, + "learning_rate": 3.6278092001667285e-05, + "loss": 2.1299, + "step": 9484500 + }, + { + "epoch": 27.46, + "learning_rate": 3.62773698013153e-05, + "loss": 2.1404, + "step": 9485000 + }, + { + "epoch": 27.46, + "learning_rate": 3.627664615366802e-05, + "loss": 2.1494, + "step": 9485500 + }, + { + "epoch": 27.46, + "learning_rate": 3.627592250602075e-05, + "loss": 2.1407, + "step": 9486000 + }, + { + "epoch": 27.46, + "learning_rate": 3.6275198858373474e-05, + "loss": 2.1245, + "step": 9486500 + }, + { + "epoch": 27.46, + "learning_rate": 3.6274475210726196e-05, + "loss": 2.1194, + "step": 9487000 + }, + { + "epoch": 27.46, + "learning_rate": 3.627375156307892e-05, + "loss": 2.1281, + "step": 9487500 + }, + { + "epoch": 27.46, + "learning_rate": 3.627302791543165e-05, + "loss": 2.132, + "step": 9488000 + }, + { + "epoch": 27.47, + "learning_rate": 3.627230426778437e-05, + "loss": 2.1419, + "step": 9488500 + }, + { + "epoch": 27.47, + "learning_rate": 3.627158062013709e-05, + "loss": 2.1498, + "step": 9489000 + }, + { + "epoch": 27.47, + "learning_rate": 3.6270856972489814e-05, + "loss": 2.1208, + "step": 9489500 + }, + { + "epoch": 27.47, + "learning_rate": 3.627013477213783e-05, + "loss": 2.1102, + "step": 9490000 + }, + { + "epoch": 27.47, + "learning_rate": 3.626941257178585e-05, + "loss": 2.14, + "step": 9490500 + }, + { + "epoch": 27.47, + "learning_rate": 3.6268688924138574e-05, + "loss": 2.1083, + "step": 9491000 + }, + { + "epoch": 27.47, + "learning_rate": 3.6267965276491297e-05, + "loss": 2.1337, + "step": 9491500 + }, + { + "epoch": 27.48, + "learning_rate": 3.626724162884402e-05, + "loss": 2.1272, + "step": 9492000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626651798119674e-05, + "loss": 2.097, + "step": 9492500 + }, + { + "epoch": 27.48, + "learning_rate": 3.626579433354946e-05, + "loss": 2.128, + "step": 9493000 + }, + { + "epoch": 27.48, + "learning_rate": 3.6265070685902186e-05, + "loss": 2.1427, + "step": 9493500 + }, + { + "epoch": 27.48, + "learning_rate": 3.626434703825491e-05, + "loss": 2.1121, + "step": 9494000 + }, + { + "epoch": 27.48, + "learning_rate": 3.626362339060763e-05, + "loss": 2.1649, + "step": 9494500 + }, + { + "epoch": 27.48, + "learning_rate": 3.626290119025565e-05, + "loss": 2.082, + "step": 9495000 + }, + { + "epoch": 27.49, + "learning_rate": 3.6262177542608375e-05, + "loss": 2.1075, + "step": 9495500 + }, + { + "epoch": 27.49, + "learning_rate": 3.6261453894961104e-05, + "loss": 2.1376, + "step": 9496000 + }, + { + "epoch": 27.49, + "learning_rate": 3.6260730247313826e-05, + "loss": 2.1288, + "step": 9496500 + }, + { + "epoch": 27.49, + "learning_rate": 3.626000659966655e-05, + "loss": 2.156, + "step": 9497000 + }, + { + "epoch": 27.49, + "learning_rate": 3.625928295201927e-05, + "loss": 2.1263, + "step": 9497500 + }, + { + "epoch": 27.49, + "learning_rate": 3.625855930437199e-05, + "loss": 2.1282, + "step": 9498000 + }, + { + "epoch": 27.49, + "learning_rate": 3.625783710402001e-05, + "loss": 2.1351, + "step": 9498500 + }, + { + "epoch": 27.5, + "learning_rate": 3.625711345637273e-05, + "loss": 2.1434, + "step": 9499000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625638980872545e-05, + "loss": 2.1384, + "step": 9499500 + }, + { + "epoch": 27.5, + "learning_rate": 3.6255666161078175e-05, + "loss": 2.1443, + "step": 9500000 + }, + { + "epoch": 27.5, + "learning_rate": 3.6254942513430904e-05, + "loss": 2.1507, + "step": 9500500 + }, + { + "epoch": 27.5, + "learning_rate": 3.6254218865783626e-05, + "loss": 2.1403, + "step": 9501000 + }, + { + "epoch": 27.5, + "learning_rate": 3.625349666543164e-05, + "loss": 2.1203, + "step": 9501500 + }, + { + "epoch": 27.5, + "learning_rate": 3.625277446507966e-05, + "loss": 2.1091, + "step": 9502000 + }, + { + "epoch": 27.51, + "learning_rate": 3.625205081743238e-05, + "loss": 2.1346, + "step": 9502500 + }, + { + "epoch": 27.51, + "learning_rate": 3.625132716978511e-05, + "loss": 2.135, + "step": 9503000 + }, + { + "epoch": 27.51, + "learning_rate": 3.625060352213783e-05, + "loss": 2.108, + "step": 9503500 + }, + { + "epoch": 27.51, + "learning_rate": 3.624987987449055e-05, + "loss": 2.1312, + "step": 9504000 + }, + { + "epoch": 27.51, + "learning_rate": 3.6249156226843275e-05, + "loss": 2.1144, + "step": 9504500 + }, + { + "epoch": 27.51, + "learning_rate": 3.6248432579196004e-05, + "loss": 2.1127, + "step": 9505000 + }, + { + "epoch": 27.51, + "learning_rate": 3.6247708931548726e-05, + "loss": 2.1425, + "step": 9505500 + }, + { + "epoch": 27.52, + "learning_rate": 3.624698673119674e-05, + "loss": 2.1135, + "step": 9506000 + }, + { + "epoch": 27.52, + "learning_rate": 3.6246263083549464e-05, + "loss": 2.1232, + "step": 9506500 + }, + { + "epoch": 27.52, + "learning_rate": 3.6245539435902186e-05, + "loss": 2.112, + "step": 9507000 + }, + { + "epoch": 27.52, + "learning_rate": 3.624481578825491e-05, + "loss": 2.1397, + "step": 9507500 + }, + { + "epoch": 27.52, + "learning_rate": 3.624409214060763e-05, + "loss": 2.1157, + "step": 9508000 + }, + { + "epoch": 27.52, + "learning_rate": 3.624336849296035e-05, + "loss": 2.133, + "step": 9508500 + }, + { + "epoch": 27.52, + "learning_rate": 3.6242644845313075e-05, + "loss": 2.1287, + "step": 9509000 + }, + { + "epoch": 27.53, + "learning_rate": 3.6241921197665804e-05, + "loss": 2.1079, + "step": 9509500 + }, + { + "epoch": 27.53, + "learning_rate": 3.6241197550018526e-05, + "loss": 2.1054, + "step": 9510000 + }, + { + "epoch": 27.53, + "learning_rate": 3.6240473902371256e-05, + "loss": 2.1095, + "step": 9510500 + }, + { + "epoch": 27.53, + "learning_rate": 3.623975025472398e-05, + "loss": 2.1208, + "step": 9511000 + }, + { + "epoch": 27.53, + "learning_rate": 3.62390266070767e-05, + "loss": 2.1257, + "step": 9511500 + }, + { + "epoch": 27.53, + "learning_rate": 3.623830295942942e-05, + "loss": 2.1262, + "step": 9512000 + }, + { + "epoch": 27.53, + "learning_rate": 3.6237579311782144e-05, + "loss": 2.1256, + "step": 9512500 + }, + { + "epoch": 27.54, + "learning_rate": 3.623685711143016e-05, + "loss": 2.122, + "step": 9513000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623613346378288e-05, + "loss": 2.124, + "step": 9513500 + }, + { + "epoch": 27.54, + "learning_rate": 3.6235409816135604e-05, + "loss": 2.1347, + "step": 9514000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623468761578363e-05, + "loss": 2.1202, + "step": 9514500 + }, + { + "epoch": 27.54, + "learning_rate": 3.623396396813635e-05, + "loss": 2.1505, + "step": 9515000 + }, + { + "epoch": 27.54, + "learning_rate": 3.623324032048907e-05, + "loss": 2.1059, + "step": 9515500 + }, + { + "epoch": 27.54, + "learning_rate": 3.6232516672841793e-05, + "loss": 2.1142, + "step": 9516000 + }, + { + "epoch": 27.55, + "learning_rate": 3.6231793025194516e-05, + "loss": 2.1231, + "step": 9516500 + }, + { + "epoch": 27.55, + "learning_rate": 3.623106937754724e-05, + "loss": 2.1407, + "step": 9517000 + }, + { + "epoch": 27.55, + "learning_rate": 3.6230348624490554e-05, + "loss": 2.1399, + "step": 9517500 + }, + { + "epoch": 27.55, + "learning_rate": 3.622962497684328e-05, + "loss": 2.1157, + "step": 9518000 + }, + { + "epoch": 27.55, + "learning_rate": 3.6228901329196005e-05, + "loss": 2.1261, + "step": 9518500 + }, + { + "epoch": 27.55, + "learning_rate": 3.622817768154873e-05, + "loss": 2.1342, + "step": 9519000 + }, + { + "epoch": 27.56, + "learning_rate": 3.622745403390145e-05, + "loss": 2.1429, + "step": 9519500 + }, + { + "epoch": 27.56, + "learning_rate": 3.622673038625417e-05, + "loss": 2.1262, + "step": 9520000 + }, + { + "epoch": 27.56, + "learning_rate": 3.6226006738606894e-05, + "loss": 2.108, + "step": 9520500 + }, + { + "epoch": 27.56, + "learning_rate": 3.6225283090959616e-05, + "loss": 2.1221, + "step": 9521000 + }, + { + "epoch": 27.56, + "learning_rate": 3.622455944331234e-05, + "loss": 2.1313, + "step": 9521500 + }, + { + "epoch": 27.56, + "learning_rate": 3.622383579566506e-05, + "loss": 2.1431, + "step": 9522000 + }, + { + "epoch": 27.56, + "learning_rate": 3.622311214801778e-05, + "loss": 2.146, + "step": 9522500 + }, + { + "epoch": 27.57, + "learning_rate": 3.6222389947665805e-05, + "loss": 2.1275, + "step": 9523000 + }, + { + "epoch": 27.57, + "learning_rate": 3.622166630001853e-05, + "loss": 2.1547, + "step": 9523500 + }, + { + "epoch": 27.57, + "learning_rate": 3.622094265237125e-05, + "loss": 2.1156, + "step": 9524000 + }, + { + "epoch": 27.57, + "learning_rate": 3.6220220452019265e-05, + "loss": 2.1408, + "step": 9524500 + }, + { + "epoch": 27.57, + "learning_rate": 3.6219496804371994e-05, + "loss": 2.1132, + "step": 9525000 + }, + { + "epoch": 27.57, + "learning_rate": 3.6218773156724716e-05, + "loss": 2.1366, + "step": 9525500 + }, + { + "epoch": 27.57, + "learning_rate": 3.621804950907744e-05, + "loss": 2.1272, + "step": 9526000 + }, + { + "epoch": 27.58, + "learning_rate": 3.621732586143016e-05, + "loss": 2.1233, + "step": 9526500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621660221378288e-05, + "loss": 2.134, + "step": 9527000 + }, + { + "epoch": 27.58, + "learning_rate": 3.6215878566135605e-05, + "loss": 2.1163, + "step": 9527500 + }, + { + "epoch": 27.58, + "learning_rate": 3.6215154918488334e-05, + "loss": 2.1026, + "step": 9528000 + }, + { + "epoch": 27.58, + "learning_rate": 3.6214431270841057e-05, + "loss": 2.1383, + "step": 9528500 + }, + { + "epoch": 27.58, + "learning_rate": 3.621370907048907e-05, + "loss": 2.1563, + "step": 9529000 + }, + { + "epoch": 27.58, + "learning_rate": 3.621298687013709e-05, + "loss": 2.118, + "step": 9529500 + }, + { + "epoch": 27.59, + "learning_rate": 3.621226322248981e-05, + "loss": 2.1579, + "step": 9530000 + }, + { + "epoch": 27.59, + "learning_rate": 3.621153957484253e-05, + "loss": 2.1466, + "step": 9530500 + }, + { + "epoch": 27.59, + "learning_rate": 3.6210815927195254e-05, + "loss": 2.1263, + "step": 9531000 + }, + { + "epoch": 27.59, + "learning_rate": 3.621009227954798e-05, + "loss": 2.1511, + "step": 9531500 + }, + { + "epoch": 27.59, + "learning_rate": 3.6209368631900706e-05, + "loss": 2.1201, + "step": 9532000 + }, + { + "epoch": 27.59, + "learning_rate": 3.6208644984253435e-05, + "loss": 2.0989, + "step": 9532500 + }, + { + "epoch": 27.59, + "learning_rate": 3.620792133660616e-05, + "loss": 2.1107, + "step": 9533000 + }, + { + "epoch": 27.6, + "learning_rate": 3.620719768895888e-05, + "loss": 2.1143, + "step": 9533500 + }, + { + "epoch": 27.6, + "learning_rate": 3.6206475488606895e-05, + "loss": 2.1139, + "step": 9534000 + }, + { + "epoch": 27.6, + "learning_rate": 3.620575328825491e-05, + "loss": 2.1432, + "step": 9534500 + }, + { + "epoch": 27.6, + "learning_rate": 3.620502964060763e-05, + "loss": 2.1453, + "step": 9535000 + }, + { + "epoch": 27.6, + "learning_rate": 3.6204307440255655e-05, + "loss": 2.148, + "step": 9535500 + }, + { + "epoch": 27.6, + "learning_rate": 3.620358379260838e-05, + "loss": 2.1096, + "step": 9536000 + }, + { + "epoch": 27.6, + "learning_rate": 3.62028601449611e-05, + "loss": 2.1463, + "step": 9536500 + }, + { + "epoch": 27.61, + "learning_rate": 3.620213649731382e-05, + "loss": 2.1185, + "step": 9537000 + }, + { + "epoch": 27.61, + "learning_rate": 3.6201412849666544e-05, + "loss": 2.1344, + "step": 9537500 + }, + { + "epoch": 27.61, + "learning_rate": 3.6200689202019266e-05, + "loss": 2.1196, + "step": 9538000 + }, + { + "epoch": 27.61, + "learning_rate": 3.619996555437199e-05, + "loss": 2.1241, + "step": 9538500 + }, + { + "epoch": 27.61, + "learning_rate": 3.619924190672471e-05, + "loss": 2.1032, + "step": 9539000 + }, + { + "epoch": 27.61, + "learning_rate": 3.619851970637273e-05, + "loss": 2.1291, + "step": 9539500 + }, + { + "epoch": 27.61, + "learning_rate": 3.619779605872546e-05, + "loss": 2.1353, + "step": 9540000 + }, + { + "epoch": 27.62, + "learning_rate": 3.619707385837348e-05, + "loss": 2.1274, + "step": 9540500 + }, + { + "epoch": 27.62, + "learning_rate": 3.61963502107262e-05, + "loss": 2.1299, + "step": 9541000 + }, + { + "epoch": 27.62, + "learning_rate": 3.619562656307892e-05, + "loss": 2.1234, + "step": 9541500 + }, + { + "epoch": 27.62, + "learning_rate": 3.6194902915431644e-05, + "loss": 2.1596, + "step": 9542000 + }, + { + "epoch": 27.62, + "learning_rate": 3.6194179267784366e-05, + "loss": 2.1227, + "step": 9542500 + }, + { + "epoch": 27.62, + "learning_rate": 3.619345706743238e-05, + "loss": 2.1253, + "step": 9543000 + }, + { + "epoch": 27.62, + "learning_rate": 3.619273341978511e-05, + "loss": 2.1426, + "step": 9543500 + }, + { + "epoch": 27.63, + "learning_rate": 3.619200977213783e-05, + "loss": 2.1342, + "step": 9544000 + }, + { + "epoch": 27.63, + "learning_rate": 3.6191286124490555e-05, + "loss": 2.1213, + "step": 9544500 + }, + { + "epoch": 27.63, + "learning_rate": 3.619056392413857e-05, + "loss": 2.1249, + "step": 9545000 + }, + { + "epoch": 27.63, + "learning_rate": 3.618984027649129e-05, + "loss": 2.1537, + "step": 9545500 + }, + { + "epoch": 27.63, + "learning_rate": 3.6189116628844015e-05, + "loss": 2.1371, + "step": 9546000 + }, + { + "epoch": 27.63, + "learning_rate": 3.618839442849203e-05, + "loss": 2.1244, + "step": 9546500 + }, + { + "epoch": 27.63, + "learning_rate": 3.618767078084476e-05, + "loss": 2.1411, + "step": 9547000 + }, + { + "epoch": 27.64, + "learning_rate": 3.618694713319748e-05, + "loss": 2.1214, + "step": 9547500 + }, + { + "epoch": 27.64, + "learning_rate": 3.6186224932845504e-05, + "loss": 2.1341, + "step": 9548000 + }, + { + "epoch": 27.64, + "learning_rate": 3.6185501285198227e-05, + "loss": 2.137, + "step": 9548500 + }, + { + "epoch": 27.64, + "learning_rate": 3.618477763755095e-05, + "loss": 2.157, + "step": 9549000 + }, + { + "epoch": 27.64, + "learning_rate": 3.618405398990367e-05, + "loss": 2.1509, + "step": 9549500 + }, + { + "epoch": 27.64, + "learning_rate": 3.618333034225639e-05, + "loss": 2.1422, + "step": 9550000 + }, + { + "epoch": 27.64, + "learning_rate": 3.6182606694609116e-05, + "loss": 2.1156, + "step": 9550500 + }, + { + "epoch": 27.65, + "learning_rate": 3.618188304696184e-05, + "loss": 2.1268, + "step": 9551000 + }, + { + "epoch": 27.65, + "learning_rate": 3.618116084660986e-05, + "loss": 2.1137, + "step": 9551500 + }, + { + "epoch": 27.65, + "learning_rate": 3.618043719896258e-05, + "loss": 2.1199, + "step": 9552000 + }, + { + "epoch": 27.65, + "learning_rate": 3.6179713551315305e-05, + "loss": 2.1365, + "step": 9552500 + }, + { + "epoch": 27.65, + "learning_rate": 3.617898990366803e-05, + "loss": 2.1416, + "step": 9553000 + }, + { + "epoch": 27.65, + "learning_rate": 3.617826625602075e-05, + "loss": 2.136, + "step": 9553500 + }, + { + "epoch": 27.65, + "learning_rate": 3.6177544055668765e-05, + "loss": 2.1532, + "step": 9554000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617682040802149e-05, + "loss": 2.129, + "step": 9554500 + }, + { + "epoch": 27.66, + "learning_rate": 3.617609676037421e-05, + "loss": 2.1294, + "step": 9555000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617537311272694e-05, + "loss": 2.1228, + "step": 9555500 + }, + { + "epoch": 27.66, + "learning_rate": 3.617464946507966e-05, + "loss": 2.1441, + "step": 9556000 + }, + { + "epoch": 27.66, + "learning_rate": 3.617392581743238e-05, + "loss": 2.1113, + "step": 9556500 + }, + { + "epoch": 27.66, + "learning_rate": 3.617320216978511e-05, + "loss": 2.1351, + "step": 9557000 + }, + { + "epoch": 27.67, + "learning_rate": 3.6172478522137834e-05, + "loss": 2.1173, + "step": 9557500 + }, + { + "epoch": 27.67, + "learning_rate": 3.6171754874490556e-05, + "loss": 2.1271, + "step": 9558000 + }, + { + "epoch": 27.67, + "learning_rate": 3.617103122684328e-05, + "loss": 2.1384, + "step": 9558500 + }, + { + "epoch": 27.67, + "learning_rate": 3.6170307579196e-05, + "loss": 2.1536, + "step": 9559000 + }, + { + "epoch": 27.67, + "learning_rate": 3.616958393154872e-05, + "loss": 2.1392, + "step": 9559500 + }, + { + "epoch": 27.67, + "learning_rate": 3.6168860283901445e-05, + "loss": 2.1494, + "step": 9560000 + }, + { + "epoch": 27.67, + "learning_rate": 3.616813663625417e-05, + "loss": 2.1201, + "step": 9560500 + }, + { + "epoch": 27.68, + "learning_rate": 3.616741443590218e-05, + "loss": 2.1564, + "step": 9561000 + }, + { + "epoch": 27.68, + "learning_rate": 3.616669078825491e-05, + "loss": 2.1187, + "step": 9561500 + }, + { + "epoch": 27.68, + "learning_rate": 3.6165967140607634e-05, + "loss": 2.1649, + "step": 9562000 + }, + { + "epoch": 27.68, + "learning_rate": 3.616524349296036e-05, + "loss": 2.1708, + "step": 9562500 + }, + { + "epoch": 27.68, + "learning_rate": 3.6164519845313085e-05, + "loss": 2.1444, + "step": 9563000 + }, + { + "epoch": 27.68, + "learning_rate": 3.616379619766581e-05, + "loss": 2.1387, + "step": 9563500 + }, + { + "epoch": 27.68, + "learning_rate": 3.616307255001853e-05, + "loss": 2.1207, + "step": 9564000 + }, + { + "epoch": 27.69, + "learning_rate": 3.616234890237125e-05, + "loss": 2.1487, + "step": 9564500 + }, + { + "epoch": 27.69, + "learning_rate": 3.6161625254723974e-05, + "loss": 2.1186, + "step": 9565000 + }, + { + "epoch": 27.69, + "learning_rate": 3.6160901607076696e-05, + "loss": 2.1254, + "step": 9565500 + }, + { + "epoch": 27.69, + "learning_rate": 3.616017940672471e-05, + "loss": 2.1133, + "step": 9566000 + }, + { + "epoch": 27.69, + "learning_rate": 3.6159455759077434e-05, + "loss": 2.1251, + "step": 9566500 + }, + { + "epoch": 27.69, + "learning_rate": 3.615873211143016e-05, + "loss": 2.1488, + "step": 9567000 + }, + { + "epoch": 27.69, + "learning_rate": 3.6158008463782885e-05, + "loss": 2.1059, + "step": 9567500 + }, + { + "epoch": 27.7, + "learning_rate": 3.615728481613561e-05, + "loss": 2.1446, + "step": 9568000 + }, + { + "epoch": 27.7, + "learning_rate": 3.615656116848833e-05, + "loss": 2.1164, + "step": 9568500 + }, + { + "epoch": 27.7, + "learning_rate": 3.615584041543164e-05, + "loss": 2.1339, + "step": 9569000 + }, + { + "epoch": 27.7, + "learning_rate": 3.615511676778436e-05, + "loss": 2.1331, + "step": 9569500 + }, + { + "epoch": 27.7, + "learning_rate": 3.615439312013709e-05, + "loss": 2.1381, + "step": 9570000 + }, + { + "epoch": 27.7, + "learning_rate": 3.615366947248981e-05, + "loss": 2.1504, + "step": 9570500 + }, + { + "epoch": 27.7, + "learning_rate": 3.615294582484254e-05, + "loss": 2.1334, + "step": 9571000 + }, + { + "epoch": 27.71, + "learning_rate": 3.6152222177195263e-05, + "loss": 2.1107, + "step": 9571500 + }, + { + "epoch": 27.71, + "learning_rate": 3.6151498529547986e-05, + "loss": 2.1297, + "step": 9572000 + }, + { + "epoch": 27.71, + "learning_rate": 3.6150776329196e-05, + "loss": 2.1263, + "step": 9572500 + }, + { + "epoch": 27.71, + "learning_rate": 3.6150052681548724e-05, + "loss": 2.1453, + "step": 9573000 + }, + { + "epoch": 27.71, + "learning_rate": 3.6149329033901446e-05, + "loss": 2.1341, + "step": 9573500 + }, + { + "epoch": 27.71, + "learning_rate": 3.614860538625417e-05, + "loss": 2.1402, + "step": 9574000 + }, + { + "epoch": 27.71, + "learning_rate": 3.614788318590219e-05, + "loss": 2.1449, + "step": 9574500 + }, + { + "epoch": 27.72, + "learning_rate": 3.614715953825491e-05, + "loss": 2.1377, + "step": 9575000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614643733790293e-05, + "loss": 2.113, + "step": 9575500 + }, + { + "epoch": 27.72, + "learning_rate": 3.614571369025565e-05, + "loss": 2.1473, + "step": 9576000 + }, + { + "epoch": 27.72, + "learning_rate": 3.614499004260837e-05, + "loss": 2.1225, + "step": 9576500 + }, + { + "epoch": 27.72, + "learning_rate": 3.6144266394961095e-05, + "loss": 2.1249, + "step": 9577000 + }, + { + "epoch": 27.72, + "learning_rate": 3.6143542747313824e-05, + "loss": 2.1205, + "step": 9577500 + }, + { + "epoch": 27.72, + "learning_rate": 3.6142819099666546e-05, + "loss": 2.1276, + "step": 9578000 + }, + { + "epoch": 27.73, + "learning_rate": 3.614209545201927e-05, + "loss": 2.1068, + "step": 9578500 + }, + { + "epoch": 27.73, + "learning_rate": 3.614137180437199e-05, + "loss": 2.1187, + "step": 9579000 + }, + { + "epoch": 27.73, + "learning_rate": 3.614064815672471e-05, + "loss": 2.1292, + "step": 9579500 + }, + { + "epoch": 27.73, + "learning_rate": 3.613992450907744e-05, + "loss": 2.1332, + "step": 9580000 + }, + { + "epoch": 27.73, + "learning_rate": 3.6139200861430164e-05, + "loss": 2.1407, + "step": 9580500 + }, + { + "epoch": 27.73, + "learning_rate": 3.6138477213782886e-05, + "loss": 2.1521, + "step": 9581000 + }, + { + "epoch": 27.73, + "learning_rate": 3.61377550134309e-05, + "loss": 2.1114, + "step": 9581500 + }, + { + "epoch": 27.74, + "learning_rate": 3.6137031365783624e-05, + "loss": 2.1432, + "step": 9582000 + }, + { + "epoch": 27.74, + "learning_rate": 3.6136307718136346e-05, + "loss": 2.1442, + "step": 9582500 + }, + { + "epoch": 27.74, + "learning_rate": 3.613558407048907e-05, + "loss": 2.1274, + "step": 9583000 + }, + { + "epoch": 27.74, + "learning_rate": 3.613486187013709e-05, + "loss": 2.1041, + "step": 9583500 + }, + { + "epoch": 27.74, + "learning_rate": 3.613413822248981e-05, + "loss": 2.1089, + "step": 9584000 + }, + { + "epoch": 27.74, + "learning_rate": 3.6133414574842535e-05, + "loss": 2.1186, + "step": 9584500 + }, + { + "epoch": 27.74, + "learning_rate": 3.6132690927195264e-05, + "loss": 2.1394, + "step": 9585000 + }, + { + "epoch": 27.75, + "learning_rate": 3.6131967279547987e-05, + "loss": 2.1353, + "step": 9585500 + }, + { + "epoch": 27.75, + "learning_rate": 3.613124363190071e-05, + "loss": 2.1187, + "step": 9586000 + }, + { + "epoch": 27.75, + "learning_rate": 3.613051998425343e-05, + "loss": 2.1251, + "step": 9586500 + }, + { + "epoch": 27.75, + "learning_rate": 3.612979633660615e-05, + "loss": 2.1643, + "step": 9587000 + }, + { + "epoch": 27.75, + "learning_rate": 3.6129072688958875e-05, + "loss": 2.1161, + "step": 9587500 + }, + { + "epoch": 27.75, + "learning_rate": 3.61283490413116e-05, + "loss": 2.1215, + "step": 9588000 + }, + { + "epoch": 27.75, + "learning_rate": 3.612762539366432e-05, + "loss": 2.1219, + "step": 9588500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612690174601704e-05, + "loss": 2.1098, + "step": 9589000 + }, + { + "epoch": 27.76, + "learning_rate": 3.6126178098369764e-05, + "loss": 2.1078, + "step": 9589500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612545445072249e-05, + "loss": 2.1452, + "step": 9590000 + }, + { + "epoch": 27.76, + "learning_rate": 3.612473225037051e-05, + "loss": 2.1479, + "step": 9590500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612400860272323e-05, + "loss": 2.1195, + "step": 9591000 + }, + { + "epoch": 27.76, + "learning_rate": 3.612328640237125e-05, + "loss": 2.1112, + "step": 9591500 + }, + { + "epoch": 27.76, + "learning_rate": 3.612256275472397e-05, + "loss": 2.1198, + "step": 9592000 + }, + { + "epoch": 27.77, + "learning_rate": 3.61218391070767e-05, + "loss": 2.1215, + "step": 9592500 + }, + { + "epoch": 27.77, + "learning_rate": 3.612111545942942e-05, + "loss": 2.1268, + "step": 9593000 + }, + { + "epoch": 27.77, + "learning_rate": 3.612039181178214e-05, + "loss": 2.1409, + "step": 9593500 + }, + { + "epoch": 27.77, + "learning_rate": 3.6119669611430165e-05, + "loss": 2.1392, + "step": 9594000 + }, + { + "epoch": 27.77, + "learning_rate": 3.611894596378289e-05, + "loss": 2.1109, + "step": 9594500 + }, + { + "epoch": 27.77, + "learning_rate": 3.611822231613561e-05, + "loss": 2.1669, + "step": 9595000 + }, + { + "epoch": 27.78, + "learning_rate": 3.611749866848833e-05, + "loss": 2.1258, + "step": 9595500 + }, + { + "epoch": 27.78, + "learning_rate": 3.6116775020841054e-05, + "loss": 2.15, + "step": 9596000 + }, + { + "epoch": 27.78, + "learning_rate": 3.6116051373193776e-05, + "loss": 2.1218, + "step": 9596500 + }, + { + "epoch": 27.78, + "learning_rate": 3.61153277255465e-05, + "loss": 2.1138, + "step": 9597000 + }, + { + "epoch": 27.78, + "learning_rate": 3.611460407789922e-05, + "loss": 2.1274, + "step": 9597500 + }, + { + "epoch": 27.78, + "learning_rate": 3.611388043025194e-05, + "loss": 2.0926, + "step": 9598000 + }, + { + "epoch": 27.78, + "learning_rate": 3.6113156782604665e-05, + "loss": 2.1139, + "step": 9598500 + }, + { + "epoch": 27.79, + "learning_rate": 3.611243458225269e-05, + "loss": 2.1334, + "step": 9599000 + }, + { + "epoch": 27.79, + "learning_rate": 3.6111710934605416e-05, + "loss": 2.1543, + "step": 9599500 + }, + { + "epoch": 27.79, + "learning_rate": 3.6110990181548725e-05, + "loss": 2.107, + "step": 9600000 + }, + { + "epoch": 27.79, + "learning_rate": 3.611026653390145e-05, + "loss": 2.1197, + "step": 9600500 + }, + { + "epoch": 27.79, + "learning_rate": 3.610954288625417e-05, + "loss": 2.123, + "step": 9601000 + }, + { + "epoch": 27.79, + "learning_rate": 3.610881923860689e-05, + "loss": 2.1228, + "step": 9601500 + }, + { + "epoch": 27.79, + "learning_rate": 3.610809559095962e-05, + "loss": 2.1338, + "step": 9602000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610737194331234e-05, + "loss": 2.1162, + "step": 9602500 + }, + { + "epoch": 27.8, + "learning_rate": 3.6106648295665065e-05, + "loss": 2.1359, + "step": 9603000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610592464801779e-05, + "loss": 2.1264, + "step": 9603500 + }, + { + "epoch": 27.8, + "learning_rate": 3.610520100037051e-05, + "loss": 2.1134, + "step": 9604000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610447735272323e-05, + "loss": 2.1314, + "step": 9604500 + }, + { + "epoch": 27.8, + "learning_rate": 3.610375515237125e-05, + "loss": 2.1377, + "step": 9605000 + }, + { + "epoch": 27.8, + "learning_rate": 3.610303150472397e-05, + "loss": 2.1348, + "step": 9605500 + }, + { + "epoch": 27.81, + "learning_rate": 3.610230785707669e-05, + "loss": 2.1335, + "step": 9606000 + }, + { + "epoch": 27.81, + "learning_rate": 3.610158420942942e-05, + "loss": 2.1015, + "step": 9606500 + }, + { + "epoch": 27.81, + "learning_rate": 3.610086056178214e-05, + "loss": 2.1356, + "step": 9607000 + }, + { + "epoch": 27.81, + "learning_rate": 3.610013691413487e-05, + "loss": 2.1391, + "step": 9607500 + }, + { + "epoch": 27.81, + "learning_rate": 3.6099413266487594e-05, + "loss": 2.1297, + "step": 9608000 + }, + { + "epoch": 27.81, + "learning_rate": 3.609868961884032e-05, + "loss": 2.1328, + "step": 9608500 + }, + { + "epoch": 27.81, + "learning_rate": 3.609796597119304e-05, + "loss": 2.1067, + "step": 9609000 + }, + { + "epoch": 27.82, + "learning_rate": 3.6097243770841055e-05, + "loss": 2.15, + "step": 9609500 + }, + { + "epoch": 27.82, + "learning_rate": 3.609652157048907e-05, + "loss": 2.1284, + "step": 9610000 + }, + { + "epoch": 27.82, + "learning_rate": 3.609579792284179e-05, + "loss": 2.1375, + "step": 9610500 + }, + { + "epoch": 27.82, + "learning_rate": 3.609507427519452e-05, + "loss": 2.1493, + "step": 9611000 + }, + { + "epoch": 27.82, + "learning_rate": 3.6094350627547244e-05, + "loss": 2.1137, + "step": 9611500 + }, + { + "epoch": 27.82, + "learning_rate": 3.609362842719526e-05, + "loss": 2.1391, + "step": 9612000 + }, + { + "epoch": 27.82, + "learning_rate": 3.609290477954798e-05, + "loss": 2.1063, + "step": 9612500 + }, + { + "epoch": 27.83, + "learning_rate": 3.6092181131900704e-05, + "loss": 2.1398, + "step": 9613000 + }, + { + "epoch": 27.83, + "learning_rate": 3.6091457484253426e-05, + "loss": 2.1307, + "step": 9613500 + }, + { + "epoch": 27.83, + "learning_rate": 3.609073383660615e-05, + "loss": 2.1197, + "step": 9614000 + }, + { + "epoch": 27.83, + "learning_rate": 3.609001018895887e-05, + "loss": 2.1372, + "step": 9614500 + }, + { + "epoch": 27.83, + "learning_rate": 3.60892865413116e-05, + "loss": 2.1388, + "step": 9615000 + }, + { + "epoch": 27.83, + "learning_rate": 3.608856289366432e-05, + "loss": 2.1187, + "step": 9615500 + }, + { + "epoch": 27.83, + "learning_rate": 3.6087839246017044e-05, + "loss": 2.1265, + "step": 9616000 + }, + { + "epoch": 27.84, + "learning_rate": 3.608711559836977e-05, + "loss": 2.1271, + "step": 9616500 + }, + { + "epoch": 27.84, + "learning_rate": 3.6086391950722495e-05, + "loss": 2.1419, + "step": 9617000 + }, + { + "epoch": 27.84, + "learning_rate": 3.608566830307522e-05, + "loss": 2.1247, + "step": 9617500 + }, + { + "epoch": 27.84, + "learning_rate": 3.608494610272323e-05, + "loss": 2.1075, + "step": 9618000 + }, + { + "epoch": 27.84, + "learning_rate": 3.6084222455075955e-05, + "loss": 2.1124, + "step": 9618500 + }, + { + "epoch": 27.84, + "learning_rate": 3.608349880742868e-05, + "loss": 2.1225, + "step": 9619000 + }, + { + "epoch": 27.84, + "learning_rate": 3.60827751597814e-05, + "loss": 2.1242, + "step": 9619500 + }, + { + "epoch": 27.85, + "learning_rate": 3.608205151213412e-05, + "loss": 2.1294, + "step": 9620000 + }, + { + "epoch": 27.85, + "learning_rate": 3.6081327864486844e-05, + "loss": 2.13, + "step": 9620500 + }, + { + "epoch": 27.85, + "learning_rate": 3.608060421683957e-05, + "loss": 2.1577, + "step": 9621000 + }, + { + "epoch": 27.85, + "learning_rate": 3.6079880569192295e-05, + "loss": 2.1386, + "step": 9621500 + }, + { + "epoch": 27.85, + "learning_rate": 3.6079156921545024e-05, + "loss": 2.1183, + "step": 9622000 + }, + { + "epoch": 27.85, + "learning_rate": 3.6078433273897746e-05, + "loss": 2.1544, + "step": 9622500 + }, + { + "epoch": 27.85, + "learning_rate": 3.607770962625047e-05, + "loss": 2.1098, + "step": 9623000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607698597860319e-05, + "loss": 2.1281, + "step": 9623500 + }, + { + "epoch": 27.86, + "learning_rate": 3.6076263778251206e-05, + "loss": 2.1417, + "step": 9624000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607554013060393e-05, + "loss": 2.1485, + "step": 9624500 + }, + { + "epoch": 27.86, + "learning_rate": 3.607481648295665e-05, + "loss": 2.1342, + "step": 9625000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607409283530937e-05, + "loss": 2.1379, + "step": 9625500 + }, + { + "epoch": 27.86, + "learning_rate": 3.6073369187662095e-05, + "loss": 2.1298, + "step": 9626000 + }, + { + "epoch": 27.86, + "learning_rate": 3.607264698731012e-05, + "loss": 2.1281, + "step": 9626500 + }, + { + "epoch": 27.87, + "learning_rate": 3.607192478695813e-05, + "loss": 2.1611, + "step": 9627000 + }, + { + "epoch": 27.87, + "learning_rate": 3.6071201139310855e-05, + "loss": 2.1368, + "step": 9627500 + }, + { + "epoch": 27.87, + "learning_rate": 3.607047749166358e-05, + "loss": 2.1243, + "step": 9628000 + }, + { + "epoch": 27.87, + "learning_rate": 3.60697538440163e-05, + "loss": 2.1295, + "step": 9628500 + }, + { + "epoch": 27.87, + "learning_rate": 3.606903019636902e-05, + "loss": 2.1619, + "step": 9629000 + }, + { + "epoch": 27.87, + "learning_rate": 3.606830654872175e-05, + "loss": 2.1307, + "step": 9629500 + }, + { + "epoch": 27.87, + "learning_rate": 3.6067582901074473e-05, + "loss": 2.1454, + "step": 9630000 + }, + { + "epoch": 27.88, + "learning_rate": 3.6066860700722496e-05, + "loss": 2.1367, + "step": 9630500 + }, + { + "epoch": 27.88, + "learning_rate": 3.606613705307522e-05, + "loss": 2.1338, + "step": 9631000 + }, + { + "epoch": 27.88, + "learning_rate": 3.606541340542794e-05, + "loss": 2.1179, + "step": 9631500 + }, + { + "epoch": 27.88, + "learning_rate": 3.606468975778066e-05, + "loss": 2.1327, + "step": 9632000 + }, + { + "epoch": 27.88, + "learning_rate": 3.6063966110133385e-05, + "loss": 2.1236, + "step": 9632500 + }, + { + "epoch": 27.88, + "learning_rate": 3.606324246248611e-05, + "loss": 2.122, + "step": 9633000 + }, + { + "epoch": 27.89, + "learning_rate": 3.606251881483883e-05, + "loss": 2.1371, + "step": 9633500 + }, + { + "epoch": 27.89, + "learning_rate": 3.606179516719155e-05, + "loss": 2.1099, + "step": 9634000 + }, + { + "epoch": 27.89, + "learning_rate": 3.6061071519544274e-05, + "loss": 2.1343, + "step": 9634500 + }, + { + "epoch": 27.89, + "learning_rate": 3.6060347871896996e-05, + "loss": 2.134, + "step": 9635000 + }, + { + "epoch": 27.89, + "learning_rate": 3.6059624224249725e-05, + "loss": 2.1288, + "step": 9635500 + }, + { + "epoch": 27.89, + "learning_rate": 3.605890057660245e-05, + "loss": 2.1352, + "step": 9636000 + }, + { + "epoch": 27.89, + "learning_rate": 3.6058176928955176e-05, + "loss": 2.107, + "step": 9636500 + }, + { + "epoch": 27.9, + "learning_rate": 3.605745472860319e-05, + "loss": 2.1166, + "step": 9637000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605673252825121e-05, + "loss": 2.1099, + "step": 9637500 + }, + { + "epoch": 27.9, + "learning_rate": 3.605600888060393e-05, + "loss": 2.1298, + "step": 9638000 + }, + { + "epoch": 27.9, + "learning_rate": 3.605528523295665e-05, + "loss": 2.1625, + "step": 9638500 + }, + { + "epoch": 27.9, + "learning_rate": 3.6054561585309374e-05, + "loss": 2.1411, + "step": 9639000 + }, + { + "epoch": 27.9, + "learning_rate": 3.6053837937662096e-05, + "loss": 2.1446, + "step": 9639500 + }, + { + "epoch": 27.9, + "learning_rate": 3.6053114290014825e-05, + "loss": 2.1408, + "step": 9640000 + }, + { + "epoch": 27.91, + "learning_rate": 3.605239208966284e-05, + "loss": 2.0942, + "step": 9640500 + }, + { + "epoch": 27.91, + "learning_rate": 3.605166844201556e-05, + "loss": 2.1201, + "step": 9641000 + }, + { + "epoch": 27.91, + "learning_rate": 3.6050944794368285e-05, + "loss": 2.1447, + "step": 9641500 + }, + { + "epoch": 27.91, + "learning_rate": 3.605022114672101e-05, + "loss": 2.107, + "step": 9642000 + }, + { + "epoch": 27.91, + "learning_rate": 3.604949894636902e-05, + "loss": 2.1312, + "step": 9642500 + }, + { + "epoch": 27.91, + "learning_rate": 3.604877529872175e-05, + "loss": 2.1211, + "step": 9643000 + }, + { + "epoch": 27.91, + "learning_rate": 3.6048051651074474e-05, + "loss": 2.1299, + "step": 9643500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604732945072249e-05, + "loss": 2.1183, + "step": 9644000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604660580307522e-05, + "loss": 2.1179, + "step": 9644500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604588215542794e-05, + "loss": 2.1398, + "step": 9645000 + }, + { + "epoch": 27.92, + "learning_rate": 3.604515850778066e-05, + "loss": 2.1322, + "step": 9645500 + }, + { + "epoch": 27.92, + "learning_rate": 3.6044434860133386e-05, + "loss": 2.1187, + "step": 9646000 + }, + { + "epoch": 27.92, + "learning_rate": 3.60437126597814e-05, + "loss": 2.1373, + "step": 9646500 + }, + { + "epoch": 27.92, + "learning_rate": 3.604298901213412e-05, + "loss": 2.1187, + "step": 9647000 + }, + { + "epoch": 27.93, + "learning_rate": 3.604226536448685e-05, + "loss": 2.1388, + "step": 9647500 + }, + { + "epoch": 27.93, + "learning_rate": 3.6041541716839575e-05, + "loss": 2.1416, + "step": 9648000 + }, + { + "epoch": 27.93, + "learning_rate": 3.604081951648759e-05, + "loss": 2.1082, + "step": 9648500 + }, + { + "epoch": 27.93, + "learning_rate": 3.6040097316135606e-05, + "loss": 2.1092, + "step": 9649000 + }, + { + "epoch": 27.93, + "learning_rate": 3.603937366848833e-05, + "loss": 2.1464, + "step": 9649500 + }, + { + "epoch": 27.93, + "learning_rate": 3.603865002084105e-05, + "loss": 2.1291, + "step": 9650000 + }, + { + "epoch": 27.93, + "learning_rate": 3.603792637319377e-05, + "loss": 2.1387, + "step": 9650500 + }, + { + "epoch": 27.94, + "learning_rate": 3.60372027255465e-05, + "loss": 2.1787, + "step": 9651000 + }, + { + "epoch": 27.94, + "learning_rate": 3.6036479077899224e-05, + "loss": 2.1305, + "step": 9651500 + }, + { + "epoch": 27.94, + "learning_rate": 3.603575543025195e-05, + "loss": 2.1446, + "step": 9652000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603503322989997e-05, + "loss": 2.1378, + "step": 9652500 + }, + { + "epoch": 27.94, + "learning_rate": 3.603430958225269e-05, + "loss": 2.1284, + "step": 9653000 + }, + { + "epoch": 27.94, + "learning_rate": 3.603358593460541e-05, + "loss": 2.123, + "step": 9653500 + }, + { + "epoch": 27.94, + "learning_rate": 3.6032862286958135e-05, + "loss": 2.1125, + "step": 9654000 + }, + { + "epoch": 27.95, + "learning_rate": 3.603213863931086e-05, + "loss": 2.1343, + "step": 9654500 + }, + { + "epoch": 27.95, + "learning_rate": 3.603141499166358e-05, + "loss": 2.1392, + "step": 9655000 + }, + { + "epoch": 27.95, + "learning_rate": 3.60306913440163e-05, + "loss": 2.1062, + "step": 9655500 + }, + { + "epoch": 27.95, + "learning_rate": 3.6029967696369024e-05, + "loss": 2.1388, + "step": 9656000 + }, + { + "epoch": 27.95, + "learning_rate": 3.602924404872175e-05, + "loss": 2.1242, + "step": 9656500 + }, + { + "epoch": 27.95, + "learning_rate": 3.6028520401074475e-05, + "loss": 2.1361, + "step": 9657000 + }, + { + "epoch": 27.95, + "learning_rate": 3.60277967534272e-05, + "loss": 2.1455, + "step": 9657500 + }, + { + "epoch": 27.96, + "learning_rate": 3.602707455307521e-05, + "loss": 2.1104, + "step": 9658000 + }, + { + "epoch": 27.96, + "learning_rate": 3.602635235272323e-05, + "loss": 2.1188, + "step": 9658500 + }, + { + "epoch": 27.96, + "learning_rate": 3.602562870507595e-05, + "loss": 2.1517, + "step": 9659000 + }, + { + "epoch": 27.96, + "learning_rate": 3.602490505742868e-05, + "loss": 2.1288, + "step": 9659500 + }, + { + "epoch": 27.96, + "learning_rate": 3.60241814097814e-05, + "loss": 2.1118, + "step": 9660000 + }, + { + "epoch": 27.96, + "learning_rate": 3.6023457762134124e-05, + "loss": 2.1258, + "step": 9660500 + }, + { + "epoch": 27.96, + "learning_rate": 3.602273411448685e-05, + "loss": 2.1268, + "step": 9661000 + }, + { + "epoch": 27.97, + "learning_rate": 3.6022010466839575e-05, + "loss": 2.1035, + "step": 9661500 + }, + { + "epoch": 27.97, + "learning_rate": 3.60212868191923e-05, + "loss": 2.1396, + "step": 9662000 + }, + { + "epoch": 27.97, + "learning_rate": 3.602056461884031e-05, + "loss": 2.1087, + "step": 9662500 + }, + { + "epoch": 27.97, + "learning_rate": 3.6019840971193035e-05, + "loss": 2.1088, + "step": 9663000 + }, + { + "epoch": 27.97, + "learning_rate": 3.601911732354576e-05, + "loss": 2.1218, + "step": 9663500 + }, + { + "epoch": 27.97, + "learning_rate": 3.601839367589848e-05, + "loss": 2.1318, + "step": 9664000 + }, + { + "epoch": 27.97, + "learning_rate": 3.60176700282512e-05, + "loss": 2.1261, + "step": 9664500 + }, + { + "epoch": 27.98, + "learning_rate": 3.6016946380603924e-05, + "loss": 2.1323, + "step": 9665000 + }, + { + "epoch": 27.98, + "learning_rate": 3.601622273295665e-05, + "loss": 2.1172, + "step": 9665500 + }, + { + "epoch": 27.98, + "learning_rate": 3.6015499085309376e-05, + "loss": 2.1254, + "step": 9666000 + }, + { + "epoch": 27.98, + "learning_rate": 3.60147754376621e-05, + "loss": 2.1237, + "step": 9666500 + }, + { + "epoch": 27.98, + "learning_rate": 3.601405179001483e-05, + "loss": 2.1486, + "step": 9667000 + }, + { + "epoch": 27.98, + "learning_rate": 3.601332814236755e-05, + "loss": 2.1296, + "step": 9667500 + }, + { + "epoch": 27.98, + "learning_rate": 3.601260449472027e-05, + "loss": 2.1071, + "step": 9668000 + }, + { + "epoch": 27.99, + "learning_rate": 3.601188229436829e-05, + "loss": 2.1483, + "step": 9668500 + }, + { + "epoch": 27.99, + "learning_rate": 3.60111600940163e-05, + "loss": 2.1151, + "step": 9669000 + }, + { + "epoch": 27.99, + "learning_rate": 3.601043644636903e-05, + "loss": 2.1529, + "step": 9669500 + }, + { + "epoch": 27.99, + "learning_rate": 3.6009712798721754e-05, + "loss": 2.1361, + "step": 9670000 + }, + { + "epoch": 27.99, + "learning_rate": 3.6008989151074476e-05, + "loss": 2.1446, + "step": 9670500 + }, + { + "epoch": 27.99, + "learning_rate": 3.60082655034272e-05, + "loss": 2.1146, + "step": 9671000 + }, + { + "epoch": 28.0, + "learning_rate": 3.600754185577992e-05, + "loss": 2.1497, + "step": 9671500 + }, + { + "epoch": 28.0, + "learning_rate": 3.600681820813264e-05, + "loss": 2.1167, + "step": 9672000 + }, + { + "epoch": 28.0, + "learning_rate": 3.600609600778066e-05, + "loss": 2.1237, + "step": 9672500 + }, + { + "epoch": 28.0, + "learning_rate": 3.600537236013338e-05, + "loss": 2.1563, + "step": 9673000 + }, + { + "epoch": 28.0, + "eval_accuracy": 0.6667694438971397, + "eval_accuracy_mlm": 0.6312277095277679, + "eval_accuracy_nsp": 0.8573396030120123, + "eval_loss": 2.182663917541504, + "eval_runtime": 331.438, + "eval_samples_per_second": 1316.645, + "eval_steps_per_second": 54.861, + "step": 9673216 + }, + { + "epoch": 28.0, + "learning_rate": 3.60046487124861e-05, + "loss": 2.1061, + "step": 9673500 + }, + { + "epoch": 28.0, + "learning_rate": 3.600392506483883e-05, + "loss": 2.1082, + "step": 9674000 + }, + { + "epoch": 28.0, + "learning_rate": 3.6003201417191554e-05, + "loss": 2.1198, + "step": 9674500 + }, + { + "epoch": 28.01, + "learning_rate": 3.600247776954428e-05, + "loss": 2.1286, + "step": 9675000 + }, + { + "epoch": 28.01, + "learning_rate": 3.6001754121897005e-05, + "loss": 2.1053, + "step": 9675500 + }, + { + "epoch": 28.01, + "learning_rate": 3.600103047424973e-05, + "loss": 2.1165, + "step": 9676000 + }, + { + "epoch": 28.01, + "learning_rate": 3.600030827389774e-05, + "loss": 2.0749, + "step": 9676500 + }, + { + "epoch": 28.01, + "learning_rate": 3.5999584626250465e-05, + "loss": 2.107, + "step": 9677000 + }, + { + "epoch": 28.01, + "learning_rate": 3.599886097860319e-05, + "loss": 2.1012, + "step": 9677500 + }, + { + "epoch": 28.01, + "learning_rate": 3.599813733095591e-05, + "loss": 2.074, + "step": 9678000 + }, + { + "epoch": 28.02, + "learning_rate": 3.599741513060393e-05, + "loss": 2.0736, + "step": 9678500 + }, + { + "epoch": 28.02, + "learning_rate": 3.5996691482956654e-05, + "loss": 2.1095, + "step": 9679000 + }, + { + "epoch": 28.02, + "learning_rate": 3.5995967835309376e-05, + "loss": 2.1076, + "step": 9679500 + }, + { + "epoch": 28.02, + "learning_rate": 3.59952441876621e-05, + "loss": 2.0927, + "step": 9680000 + }, + { + "epoch": 28.02, + "learning_rate": 3.599452054001482e-05, + "loss": 2.0745, + "step": 9680500 + }, + { + "epoch": 28.02, + "learning_rate": 3.599379689236754e-05, + "loss": 2.1202, + "step": 9681000 + }, + { + "epoch": 28.02, + "learning_rate": 3.599307324472027e-05, + "loss": 2.1223, + "step": 9681500 + }, + { + "epoch": 28.03, + "learning_rate": 3.599235104436829e-05, + "loss": 2.0964, + "step": 9682000 + }, + { + "epoch": 28.03, + "learning_rate": 3.599162739672101e-05, + "loss": 2.099, + "step": 9682500 + }, + { + "epoch": 28.03, + "learning_rate": 3.599090374907373e-05, + "loss": 2.1141, + "step": 9683000 + }, + { + "epoch": 28.03, + "learning_rate": 3.5990180101426454e-05, + "loss": 2.1131, + "step": 9683500 + }, + { + "epoch": 28.03, + "learning_rate": 3.598945645377918e-05, + "loss": 2.1317, + "step": 9684000 + }, + { + "epoch": 28.03, + "learning_rate": 3.59887342534272e-05, + "loss": 2.0862, + "step": 9684500 + }, + { + "epoch": 28.03, + "learning_rate": 3.598801060577992e-05, + "loss": 2.1147, + "step": 9685000 + }, + { + "epoch": 28.04, + "learning_rate": 3.598728695813264e-05, + "loss": 2.0986, + "step": 9685500 + }, + { + "epoch": 28.04, + "learning_rate": 3.5986563310485366e-05, + "loss": 2.1117, + "step": 9686000 + }, + { + "epoch": 28.04, + "learning_rate": 3.598583966283809e-05, + "loss": 2.1287, + "step": 9686500 + }, + { + "epoch": 28.04, + "learning_rate": 3.59851174624861e-05, + "loss": 2.1126, + "step": 9687000 + }, + { + "epoch": 28.04, + "learning_rate": 3.598439381483883e-05, + "loss": 2.097, + "step": 9687500 + }, + { + "epoch": 28.04, + "learning_rate": 3.598367161448685e-05, + "loss": 2.1014, + "step": 9688000 + }, + { + "epoch": 28.04, + "learning_rate": 3.598294796683957e-05, + "loss": 2.1132, + "step": 9688500 + }, + { + "epoch": 28.05, + "learning_rate": 3.598222431919229e-05, + "loss": 2.0837, + "step": 9689000 + }, + { + "epoch": 28.05, + "learning_rate": 3.598150067154502e-05, + "loss": 2.1264, + "step": 9689500 + }, + { + "epoch": 28.05, + "learning_rate": 3.5980777023897744e-05, + "loss": 2.1075, + "step": 9690000 + }, + { + "epoch": 28.05, + "learning_rate": 3.5980053376250466e-05, + "loss": 2.1201, + "step": 9690500 + }, + { + "epoch": 28.05, + "learning_rate": 3.597932972860319e-05, + "loss": 2.1236, + "step": 9691000 + }, + { + "epoch": 28.05, + "learning_rate": 3.597860608095591e-05, + "loss": 2.1086, + "step": 9691500 + }, + { + "epoch": 28.05, + "learning_rate": 3.597788243330863e-05, + "loss": 2.1238, + "step": 9692000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597716168025195e-05, + "loss": 2.1132, + "step": 9692500 + }, + { + "epoch": 28.06, + "learning_rate": 3.597643803260467e-05, + "loss": 2.1133, + "step": 9693000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597571438495739e-05, + "loss": 2.1301, + "step": 9693500 + }, + { + "epoch": 28.06, + "learning_rate": 3.597499218460541e-05, + "loss": 2.1253, + "step": 9694000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597426853695813e-05, + "loss": 2.1069, + "step": 9694500 + }, + { + "epoch": 28.06, + "learning_rate": 3.597354488931086e-05, + "loss": 2.1051, + "step": 9695000 + }, + { + "epoch": 28.06, + "learning_rate": 3.597282124166358e-05, + "loss": 2.1015, + "step": 9695500 + }, + { + "epoch": 28.07, + "learning_rate": 3.5972097594016304e-05, + "loss": 2.1002, + "step": 9696000 + }, + { + "epoch": 28.07, + "learning_rate": 3.5971373946369026e-05, + "loss": 2.1073, + "step": 9696500 + }, + { + "epoch": 28.07, + "learning_rate": 3.5970650298721755e-05, + "loss": 2.113, + "step": 9697000 + }, + { + "epoch": 28.07, + "learning_rate": 3.596992665107448e-05, + "loss": 2.1286, + "step": 9697500 + }, + { + "epoch": 28.07, + "learning_rate": 3.59692030034272e-05, + "loss": 2.0978, + "step": 9698000 + }, + { + "epoch": 28.07, + "learning_rate": 3.596847935577992e-05, + "loss": 2.1021, + "step": 9698500 + }, + { + "epoch": 28.07, + "learning_rate": 3.5967755708132644e-05, + "loss": 2.1171, + "step": 9699000 + }, + { + "epoch": 28.08, + "learning_rate": 3.5967032060485366e-05, + "loss": 2.1204, + "step": 9699500 + }, + { + "epoch": 28.08, + "learning_rate": 3.596630841283809e-05, + "loss": 2.1066, + "step": 9700000 + }, + { + "epoch": 28.08, + "learning_rate": 3.596558476519081e-05, + "loss": 2.1341, + "step": 9700500 + }, + { + "epoch": 28.08, + "learning_rate": 3.596486111754353e-05, + "loss": 2.0861, + "step": 9701000 + }, + { + "epoch": 28.08, + "learning_rate": 3.5964137469896255e-05, + "loss": 2.1171, + "step": 9701500 + }, + { + "epoch": 28.08, + "learning_rate": 3.5963413822248984e-05, + "loss": 2.1173, + "step": 9702000 + }, + { + "epoch": 28.08, + "learning_rate": 3.5962690174601707e-05, + "loss": 2.1187, + "step": 9702500 + }, + { + "epoch": 28.09, + "learning_rate": 3.596196652695443e-05, + "loss": 2.1059, + "step": 9703000 + }, + { + "epoch": 28.09, + "learning_rate": 3.596124287930715e-05, + "loss": 2.1111, + "step": 9703500 + }, + { + "epoch": 28.09, + "learning_rate": 3.596051923165988e-05, + "loss": 2.0957, + "step": 9704000 + }, + { + "epoch": 28.09, + "learning_rate": 3.59597955840126e-05, + "loss": 2.0935, + "step": 9704500 + }, + { + "epoch": 28.09, + "learning_rate": 3.595907338366062e-05, + "loss": 2.1162, + "step": 9705000 + }, + { + "epoch": 28.09, + "learning_rate": 3.595834973601334e-05, + "loss": 2.0782, + "step": 9705500 + }, + { + "epoch": 28.09, + "learning_rate": 3.595762608836606e-05, + "loss": 2.1191, + "step": 9706000 + }, + { + "epoch": 28.1, + "learning_rate": 3.5956902440718784e-05, + "loss": 2.111, + "step": 9706500 + }, + { + "epoch": 28.1, + "learning_rate": 3.595617879307151e-05, + "loss": 2.1414, + "step": 9707000 + }, + { + "epoch": 28.1, + "learning_rate": 3.5955455145424236e-05, + "loss": 2.1114, + "step": 9707500 + }, + { + "epoch": 28.1, + "learning_rate": 3.595473294507225e-05, + "loss": 2.1124, + "step": 9708000 + }, + { + "epoch": 28.1, + "learning_rate": 3.5954009297424974e-05, + "loss": 2.122, + "step": 9708500 + }, + { + "epoch": 28.1, + "learning_rate": 3.5953285649777696e-05, + "loss": 2.1112, + "step": 9709000 + }, + { + "epoch": 28.11, + "learning_rate": 3.595256200213042e-05, + "loss": 2.1096, + "step": 9709500 + }, + { + "epoch": 28.11, + "learning_rate": 3.5951839801778434e-05, + "loss": 2.0982, + "step": 9710000 + }, + { + "epoch": 28.11, + "learning_rate": 3.595111615413116e-05, + "loss": 2.1295, + "step": 9710500 + }, + { + "epoch": 28.11, + "learning_rate": 3.5950392506483885e-05, + "loss": 2.1374, + "step": 9711000 + }, + { + "epoch": 28.11, + "learning_rate": 3.594967030613191e-05, + "loss": 2.1336, + "step": 9711500 + }, + { + "epoch": 28.11, + "learning_rate": 3.594894665848463e-05, + "loss": 2.0976, + "step": 9712000 + }, + { + "epoch": 28.11, + "learning_rate": 3.594822301083735e-05, + "loss": 2.1118, + "step": 9712500 + }, + { + "epoch": 28.12, + "learning_rate": 3.5947499363190074e-05, + "loss": 2.1132, + "step": 9713000 + }, + { + "epoch": 28.12, + "learning_rate": 3.5946775715542796e-05, + "loss": 2.1001, + "step": 9713500 + }, + { + "epoch": 28.12, + "learning_rate": 3.594605351519081e-05, + "loss": 2.1274, + "step": 9714000 + }, + { + "epoch": 28.12, + "learning_rate": 3.5945329867543534e-05, + "loss": 2.1312, + "step": 9714500 + }, + { + "epoch": 28.12, + "learning_rate": 3.594460621989626e-05, + "loss": 2.0991, + "step": 9715000 + }, + { + "epoch": 28.12, + "learning_rate": 3.5943882572248985e-05, + "loss": 2.1227, + "step": 9715500 + }, + { + "epoch": 28.12, + "learning_rate": 3.594315892460171e-05, + "loss": 2.1156, + "step": 9716000 + }, + { + "epoch": 28.13, + "learning_rate": 3.594243527695443e-05, + "loss": 2.1033, + "step": 9716500 + }, + { + "epoch": 28.13, + "learning_rate": 3.594171162930715e-05, + "loss": 2.1374, + "step": 9717000 + }, + { + "epoch": 28.13, + "learning_rate": 3.5940987981659874e-05, + "loss": 2.1077, + "step": 9717500 + }, + { + "epoch": 28.13, + "learning_rate": 3.594026578130789e-05, + "loss": 2.1252, + "step": 9718000 + }, + { + "epoch": 28.13, + "learning_rate": 3.593954213366061e-05, + "loss": 2.116, + "step": 9718500 + }, + { + "epoch": 28.13, + "learning_rate": 3.593881848601334e-05, + "loss": 2.1305, + "step": 9719000 + }, + { + "epoch": 28.13, + "learning_rate": 3.593809483836606e-05, + "loss": 2.1143, + "step": 9719500 + }, + { + "epoch": 28.14, + "learning_rate": 3.5937371190718785e-05, + "loss": 2.1179, + "step": 9720000 + }, + { + "epoch": 28.14, + "learning_rate": 3.593664899036681e-05, + "loss": 2.1189, + "step": 9720500 + }, + { + "epoch": 28.14, + "learning_rate": 3.593592534271953e-05, + "loss": 2.1159, + "step": 9721000 + }, + { + "epoch": 28.14, + "learning_rate": 3.593520169507225e-05, + "loss": 2.106, + "step": 9721500 + }, + { + "epoch": 28.14, + "learning_rate": 3.5934478047424974e-05, + "loss": 2.1107, + "step": 9722000 + }, + { + "epoch": 28.14, + "learning_rate": 3.5933754399777697e-05, + "loss": 2.1076, + "step": 9722500 + }, + { + "epoch": 28.14, + "learning_rate": 3.593303219942571e-05, + "loss": 2.0999, + "step": 9723000 + }, + { + "epoch": 28.15, + "learning_rate": 3.5932308551778434e-05, + "loss": 2.1012, + "step": 9723500 + }, + { + "epoch": 28.15, + "learning_rate": 3.593158490413116e-05, + "loss": 2.1255, + "step": 9724000 + }, + { + "epoch": 28.15, + "learning_rate": 3.5930861256483886e-05, + "loss": 2.1432, + "step": 9724500 + }, + { + "epoch": 28.15, + "learning_rate": 3.593013760883661e-05, + "loss": 2.1175, + "step": 9725000 + }, + { + "epoch": 28.15, + "learning_rate": 3.592941396118933e-05, + "loss": 2.0825, + "step": 9725500 + }, + { + "epoch": 28.15, + "learning_rate": 3.592869031354205e-05, + "loss": 2.1055, + "step": 9726000 + }, + { + "epoch": 28.15, + "learning_rate": 3.592796666589478e-05, + "loss": 2.0972, + "step": 9726500 + }, + { + "epoch": 28.16, + "learning_rate": 3.59272444655428e-05, + "loss": 2.1253, + "step": 9727000 + }, + { + "epoch": 28.16, + "learning_rate": 3.592652226519081e-05, + "loss": 2.1051, + "step": 9727500 + }, + { + "epoch": 28.16, + "learning_rate": 3.5925798617543535e-05, + "loss": 2.113, + "step": 9728000 + }, + { + "epoch": 28.16, + "learning_rate": 3.5925074969896264e-05, + "loss": 2.1197, + "step": 9728500 + }, + { + "epoch": 28.16, + "learning_rate": 3.5924351322248986e-05, + "loss": 2.1027, + "step": 9729000 + }, + { + "epoch": 28.16, + "learning_rate": 3.592362767460171e-05, + "loss": 2.1086, + "step": 9729500 + }, + { + "epoch": 28.16, + "learning_rate": 3.592290402695443e-05, + "loss": 2.1291, + "step": 9730000 + }, + { + "epoch": 28.17, + "learning_rate": 3.592218037930715e-05, + "loss": 2.1228, + "step": 9730500 + }, + { + "epoch": 28.17, + "learning_rate": 3.592145817895517e-05, + "loss": 2.0911, + "step": 9731000 + }, + { + "epoch": 28.17, + "learning_rate": 3.592073453130789e-05, + "loss": 2.0891, + "step": 9731500 + }, + { + "epoch": 28.17, + "learning_rate": 3.592001088366061e-05, + "loss": 2.1078, + "step": 9732000 + }, + { + "epoch": 28.17, + "learning_rate": 3.5919287236013335e-05, + "loss": 2.1269, + "step": 9732500 + }, + { + "epoch": 28.17, + "learning_rate": 3.5918563588366064e-05, + "loss": 2.113, + "step": 9733000 + }, + { + "epoch": 28.17, + "learning_rate": 3.5917839940718786e-05, + "loss": 2.1318, + "step": 9733500 + }, + { + "epoch": 28.18, + "learning_rate": 3.5917116293071515e-05, + "loss": 2.1302, + "step": 9734000 + }, + { + "epoch": 28.18, + "learning_rate": 3.591639264542424e-05, + "loss": 2.1092, + "step": 9734500 + }, + { + "epoch": 28.18, + "learning_rate": 3.591566899777696e-05, + "loss": 2.1128, + "step": 9735000 + }, + { + "epoch": 28.18, + "learning_rate": 3.591494535012968e-05, + "loss": 2.1336, + "step": 9735500 + }, + { + "epoch": 28.18, + "learning_rate": 3.5914221702482404e-05, + "loss": 2.129, + "step": 9736000 + }, + { + "epoch": 28.18, + "learning_rate": 3.591349950213042e-05, + "loss": 2.1164, + "step": 9736500 + }, + { + "epoch": 28.18, + "learning_rate": 3.591277585448314e-05, + "loss": 2.1208, + "step": 9737000 + }, + { + "epoch": 28.19, + "learning_rate": 3.5912052206835864e-05, + "loss": 2.1181, + "step": 9737500 + }, + { + "epoch": 28.19, + "learning_rate": 3.5911330006483886e-05, + "loss": 2.1108, + "step": 9738000 + }, + { + "epoch": 28.19, + "learning_rate": 3.591060635883661e-05, + "loss": 2.1379, + "step": 9738500 + }, + { + "epoch": 28.19, + "learning_rate": 3.5909884158484624e-05, + "loss": 2.1188, + "step": 9739000 + }, + { + "epoch": 28.19, + "learning_rate": 3.5909160510837346e-05, + "loss": 2.1071, + "step": 9739500 + }, + { + "epoch": 28.19, + "learning_rate": 3.590843686319007e-05, + "loss": 2.1368, + "step": 9740000 + }, + { + "epoch": 28.19, + "learning_rate": 3.590771321554279e-05, + "loss": 2.1228, + "step": 9740500 + }, + { + "epoch": 28.2, + "learning_rate": 3.590698956789551e-05, + "loss": 2.0931, + "step": 9741000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590626736754354e-05, + "loss": 2.1131, + "step": 9741500 + }, + { + "epoch": 28.2, + "learning_rate": 3.5905543719896264e-05, + "loss": 2.1081, + "step": 9742000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590482007224899e-05, + "loss": 2.1264, + "step": 9742500 + }, + { + "epoch": 28.2, + "learning_rate": 3.590409642460171e-05, + "loss": 2.1113, + "step": 9743000 + }, + { + "epoch": 28.2, + "learning_rate": 3.590337277695443e-05, + "loss": 2.1298, + "step": 9743500 + }, + { + "epoch": 28.2, + "learning_rate": 3.590264912930715e-05, + "loss": 2.0918, + "step": 9744000 + }, + { + "epoch": 28.21, + "learning_rate": 3.5901925481659876e-05, + "loss": 2.1193, + "step": 9744500 + }, + { + "epoch": 28.21, + "learning_rate": 3.590120328130789e-05, + "loss": 2.1092, + "step": 9745000 + }, + { + "epoch": 28.21, + "learning_rate": 3.5900481080955914e-05, + "loss": 2.1235, + "step": 9745500 + }, + { + "epoch": 28.21, + "learning_rate": 3.5899757433308636e-05, + "loss": 2.133, + "step": 9746000 + }, + { + "epoch": 28.21, + "learning_rate": 3.589903378566136e-05, + "loss": 2.1233, + "step": 9746500 + }, + { + "epoch": 28.21, + "learning_rate": 3.589831013801408e-05, + "loss": 2.1275, + "step": 9747000 + }, + { + "epoch": 28.22, + "learning_rate": 3.58975864903668e-05, + "loss": 2.1009, + "step": 9747500 + }, + { + "epoch": 28.22, + "learning_rate": 3.5896862842719525e-05, + "loss": 2.0962, + "step": 9748000 + }, + { + "epoch": 28.22, + "learning_rate": 3.589613919507225e-05, + "loss": 2.1337, + "step": 9748500 + }, + { + "epoch": 28.22, + "learning_rate": 3.5895415547424976e-05, + "loss": 2.1135, + "step": 9749000 + }, + { + "epoch": 28.22, + "learning_rate": 3.58946918997777e-05, + "loss": 2.1099, + "step": 9749500 + }, + { + "epoch": 28.22, + "learning_rate": 3.589396825213042e-05, + "loss": 2.1018, + "step": 9750000 + }, + { + "epoch": 28.22, + "learning_rate": 3.589324460448314e-05, + "loss": 2.1461, + "step": 9750500 + }, + { + "epoch": 28.23, + "learning_rate": 3.5892520956835865e-05, + "loss": 2.123, + "step": 9751000 + }, + { + "epoch": 28.23, + "learning_rate": 3.5891797309188594e-05, + "loss": 2.1269, + "step": 9751500 + }, + { + "epoch": 28.23, + "learning_rate": 3.5891073661541316e-05, + "loss": 2.1197, + "step": 9752000 + }, + { + "epoch": 28.23, + "learning_rate": 3.589035001389404e-05, + "loss": 2.1178, + "step": 9752500 + }, + { + "epoch": 28.23, + "learning_rate": 3.588962636624676e-05, + "loss": 2.0853, + "step": 9753000 + }, + { + "epoch": 28.23, + "learning_rate": 3.588890271859948e-05, + "loss": 2.0897, + "step": 9753500 + }, + { + "epoch": 28.23, + "learning_rate": 3.5888179070952205e-05, + "loss": 2.1092, + "step": 9754000 + }, + { + "epoch": 28.24, + "learning_rate": 3.588745542330493e-05, + "loss": 2.1079, + "step": 9754500 + }, + { + "epoch": 28.24, + "learning_rate": 3.588673177565765e-05, + "loss": 2.12, + "step": 9755000 + }, + { + "epoch": 28.24, + "learning_rate": 3.5886009575305665e-05, + "loss": 2.1188, + "step": 9755500 + }, + { + "epoch": 28.24, + "learning_rate": 3.5885285927658394e-05, + "loss": 2.1172, + "step": 9756000 + }, + { + "epoch": 28.24, + "learning_rate": 3.5884562280011116e-05, + "loss": 2.0877, + "step": 9756500 + }, + { + "epoch": 28.24, + "learning_rate": 3.5883838632363845e-05, + "loss": 2.1093, + "step": 9757000 + }, + { + "epoch": 28.24, + "learning_rate": 3.588311498471657e-05, + "loss": 2.0726, + "step": 9757500 + }, + { + "epoch": 28.25, + "learning_rate": 3.588239278436458e-05, + "loss": 2.1199, + "step": 9758000 + }, + { + "epoch": 28.25, + "learning_rate": 3.58816705840126e-05, + "loss": 2.1303, + "step": 9758500 + }, + { + "epoch": 28.25, + "learning_rate": 3.588094693636532e-05, + "loss": 2.1251, + "step": 9759000 + }, + { + "epoch": 28.25, + "learning_rate": 3.588022328871804e-05, + "loss": 2.1037, + "step": 9759500 + }, + { + "epoch": 28.25, + "learning_rate": 3.5879501088366065e-05, + "loss": 2.1147, + "step": 9760000 + }, + { + "epoch": 28.25, + "learning_rate": 3.587877744071879e-05, + "loss": 2.1048, + "step": 9760500 + }, + { + "epoch": 28.25, + "learning_rate": 3.587805379307151e-05, + "loss": 2.1051, + "step": 9761000 + }, + { + "epoch": 28.26, + "learning_rate": 3.587733014542423e-05, + "loss": 2.1207, + "step": 9761500 + }, + { + "epoch": 28.26, + "learning_rate": 3.5876606497776954e-05, + "loss": 2.108, + "step": 9762000 + }, + { + "epoch": 28.26, + "learning_rate": 3.587588285012968e-05, + "loss": 2.1296, + "step": 9762500 + }, + { + "epoch": 28.26, + "learning_rate": 3.58751592024824e-05, + "loss": 2.1368, + "step": 9763000 + }, + { + "epoch": 28.26, + "learning_rate": 3.587443555483513e-05, + "loss": 2.1373, + "step": 9763500 + }, + { + "epoch": 28.26, + "learning_rate": 3.587371190718785e-05, + "loss": 2.1152, + "step": 9764000 + }, + { + "epoch": 28.26, + "learning_rate": 3.5872991154131166e-05, + "loss": 2.0997, + "step": 9764500 + }, + { + "epoch": 28.27, + "learning_rate": 3.587226750648389e-05, + "loss": 2.1188, + "step": 9765000 + }, + { + "epoch": 28.27, + "learning_rate": 3.587154385883661e-05, + "loss": 2.121, + "step": 9765500 + }, + { + "epoch": 28.27, + "learning_rate": 3.587082021118933e-05, + "loss": 2.1049, + "step": 9766000 + }, + { + "epoch": 28.27, + "learning_rate": 3.587009801083735e-05, + "loss": 2.1265, + "step": 9766500 + }, + { + "epoch": 28.27, + "learning_rate": 3.586937436319007e-05, + "loss": 2.1336, + "step": 9767000 + }, + { + "epoch": 28.27, + "learning_rate": 3.586865071554279e-05, + "loss": 2.118, + "step": 9767500 + }, + { + "epoch": 28.27, + "learning_rate": 3.586792706789552e-05, + "loss": 2.0857, + "step": 9768000 + }, + { + "epoch": 28.28, + "learning_rate": 3.5867203420248244e-05, + "loss": 2.1091, + "step": 9768500 + }, + { + "epoch": 28.28, + "learning_rate": 3.5866479772600966e-05, + "loss": 2.1117, + "step": 9769000 + }, + { + "epoch": 28.28, + "learning_rate": 3.586575612495369e-05, + "loss": 2.1078, + "step": 9769500 + }, + { + "epoch": 28.28, + "learning_rate": 3.586503247730641e-05, + "loss": 2.1325, + "step": 9770000 + }, + { + "epoch": 28.28, + "learning_rate": 3.5864310276954426e-05, + "loss": 2.0993, + "step": 9770500 + }, + { + "epoch": 28.28, + "learning_rate": 3.586358662930715e-05, + "loss": 2.1342, + "step": 9771000 + }, + { + "epoch": 28.28, + "learning_rate": 3.586286298165988e-05, + "loss": 2.1069, + "step": 9771500 + }, + { + "epoch": 28.29, + "learning_rate": 3.58621393340126e-05, + "loss": 2.1118, + "step": 9772000 + }, + { + "epoch": 28.29, + "learning_rate": 3.586141568636532e-05, + "loss": 2.1175, + "step": 9772500 + }, + { + "epoch": 28.29, + "learning_rate": 3.5860692038718044e-05, + "loss": 2.098, + "step": 9773000 + }, + { + "epoch": 28.29, + "learning_rate": 3.585996839107077e-05, + "loss": 2.1196, + "step": 9773500 + }, + { + "epoch": 28.29, + "learning_rate": 3.5859244743423495e-05, + "loss": 2.1388, + "step": 9774000 + }, + { + "epoch": 28.29, + "learning_rate": 3.585852109577622e-05, + "loss": 2.1055, + "step": 9774500 + }, + { + "epoch": 28.29, + "learning_rate": 3.585779889542423e-05, + "loss": 2.1197, + "step": 9775000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585707669507225e-05, + "loss": 2.1116, + "step": 9775500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585635304742497e-05, + "loss": 2.1101, + "step": 9776000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585562939977769e-05, + "loss": 2.1035, + "step": 9776500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585490575213042e-05, + "loss": 2.1427, + "step": 9777000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585418355177844e-05, + "loss": 2.1214, + "step": 9777500 + }, + { + "epoch": 28.3, + "learning_rate": 3.585345990413116e-05, + "loss": 2.1162, + "step": 9778000 + }, + { + "epoch": 28.3, + "learning_rate": 3.585273625648388e-05, + "loss": 2.114, + "step": 9778500 + }, + { + "epoch": 28.31, + "learning_rate": 3.5852014056131904e-05, + "loss": 2.0958, + "step": 9779000 + }, + { + "epoch": 28.31, + "learning_rate": 3.5851290408484627e-05, + "loss": 2.1082, + "step": 9779500 + }, + { + "epoch": 28.31, + "learning_rate": 3.585056676083735e-05, + "loss": 2.1168, + "step": 9780000 + }, + { + "epoch": 28.31, + "learning_rate": 3.584984456048537e-05, + "loss": 2.1255, + "step": 9780500 + }, + { + "epoch": 28.31, + "learning_rate": 3.584912091283809e-05, + "loss": 2.1245, + "step": 9781000 + }, + { + "epoch": 28.31, + "learning_rate": 3.5848397265190816e-05, + "loss": 2.1295, + "step": 9781500 + }, + { + "epoch": 28.31, + "learning_rate": 3.584767361754354e-05, + "loss": 2.1004, + "step": 9782000 + }, + { + "epoch": 28.32, + "learning_rate": 3.584694996989626e-05, + "loss": 2.1135, + "step": 9782500 + }, + { + "epoch": 28.32, + "learning_rate": 3.584622632224898e-05, + "loss": 2.1314, + "step": 9783000 + }, + { + "epoch": 28.32, + "learning_rate": 3.5845502674601705e-05, + "loss": 2.1205, + "step": 9783500 + }, + { + "epoch": 28.32, + "learning_rate": 3.584477902695443e-05, + "loss": 2.1112, + "step": 9784000 + }, + { + "epoch": 28.32, + "learning_rate": 3.584405682660244e-05, + "loss": 2.1346, + "step": 9784500 + }, + { + "epoch": 28.32, + "learning_rate": 3.584333317895517e-05, + "loss": 2.1227, + "step": 9785000 + }, + { + "epoch": 28.33, + "learning_rate": 3.5842609531307894e-05, + "loss": 2.1237, + "step": 9785500 + }, + { + "epoch": 28.33, + "learning_rate": 3.5841885883660616e-05, + "loss": 2.1023, + "step": 9786000 + }, + { + "epoch": 28.33, + "learning_rate": 3.5841162236013345e-05, + "loss": 2.1134, + "step": 9786500 + }, + { + "epoch": 28.33, + "learning_rate": 3.584043858836607e-05, + "loss": 2.1003, + "step": 9787000 + }, + { + "epoch": 28.33, + "learning_rate": 3.583971494071879e-05, + "loss": 2.0957, + "step": 9787500 + }, + { + "epoch": 28.33, + "learning_rate": 3.583899129307151e-05, + "loss": 2.1128, + "step": 9788000 + }, + { + "epoch": 28.33, + "learning_rate": 3.583826909271953e-05, + "loss": 2.1329, + "step": 9788500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583754544507225e-05, + "loss": 2.1094, + "step": 9789000 + }, + { + "epoch": 28.34, + "learning_rate": 3.583682179742497e-05, + "loss": 2.1169, + "step": 9789500 + }, + { + "epoch": 28.34, + "learning_rate": 3.5836098149777694e-05, + "loss": 2.135, + "step": 9790000 + }, + { + "epoch": 28.34, + "learning_rate": 3.583537450213042e-05, + "loss": 2.1215, + "step": 9790500 + }, + { + "epoch": 28.34, + "learning_rate": 3.5834650854483145e-05, + "loss": 2.0952, + "step": 9791000 + }, + { + "epoch": 28.34, + "learning_rate": 3.583392720683587e-05, + "loss": 2.1154, + "step": 9791500 + }, + { + "epoch": 28.34, + "learning_rate": 3.583320355918859e-05, + "loss": 2.1077, + "step": 9792000 + }, + { + "epoch": 28.35, + "learning_rate": 3.583247991154131e-05, + "loss": 2.0838, + "step": 9792500 + }, + { + "epoch": 28.35, + "learning_rate": 3.5831756263894034e-05, + "loss": 2.1259, + "step": 9793000 + }, + { + "epoch": 28.35, + "learning_rate": 3.583103261624676e-05, + "loss": 2.1493, + "step": 9793500 + }, + { + "epoch": 28.35, + "learning_rate": 3.5830308968599485e-05, + "loss": 2.1401, + "step": 9794000 + }, + { + "epoch": 28.35, + "learning_rate": 3.582958532095221e-05, + "loss": 2.0956, + "step": 9794500 + }, + { + "epoch": 28.35, + "learning_rate": 3.582886167330493e-05, + "loss": 2.1065, + "step": 9795000 + }, + { + "epoch": 28.35, + "learning_rate": 3.582813802565765e-05, + "loss": 2.1162, + "step": 9795500 + }, + { + "epoch": 28.36, + "learning_rate": 3.5827414378010374e-05, + "loss": 2.1199, + "step": 9796000 + }, + { + "epoch": 28.36, + "learning_rate": 3.5826690730363096e-05, + "loss": 2.1204, + "step": 9796500 + }, + { + "epoch": 28.36, + "learning_rate": 3.5825967082715825e-05, + "loss": 2.1336, + "step": 9797000 + }, + { + "epoch": 28.36, + "learning_rate": 3.582524343506855e-05, + "loss": 2.0934, + "step": 9797500 + }, + { + "epoch": 28.36, + "learning_rate": 3.582452123471656e-05, + "loss": 2.1321, + "step": 9798000 + }, + { + "epoch": 28.36, + "learning_rate": 3.5823797587069285e-05, + "loss": 2.1188, + "step": 9798500 + }, + { + "epoch": 28.36, + "learning_rate": 3.582307393942201e-05, + "loss": 2.1139, + "step": 9799000 + }, + { + "epoch": 28.37, + "learning_rate": 3.582235029177473e-05, + "loss": 2.1445, + "step": 9799500 + }, + { + "epoch": 28.37, + "learning_rate": 3.5821628091422745e-05, + "loss": 2.1099, + "step": 9800000 + }, + { + "epoch": 28.37, + "learning_rate": 3.5820904443775474e-05, + "loss": 2.1213, + "step": 9800500 + }, + { + "epoch": 28.37, + "learning_rate": 3.58201807961282e-05, + "loss": 2.1135, + "step": 9801000 + }, + { + "epoch": 28.37, + "learning_rate": 3.581945859577622e-05, + "loss": 2.1255, + "step": 9801500 + }, + { + "epoch": 28.37, + "learning_rate": 3.581873494812894e-05, + "loss": 2.1185, + "step": 9802000 + }, + { + "epoch": 28.37, + "learning_rate": 3.5818011300481663e-05, + "loss": 2.1169, + "step": 9802500 + }, + { + "epoch": 28.38, + "learning_rate": 3.5817287652834386e-05, + "loss": 2.1245, + "step": 9803000 + }, + { + "epoch": 28.38, + "learning_rate": 3.581656400518711e-05, + "loss": 2.113, + "step": 9803500 + }, + { + "epoch": 28.38, + "learning_rate": 3.581584035753983e-05, + "loss": 2.1206, + "step": 9804000 + }, + { + "epoch": 28.38, + "learning_rate": 3.581511670989255e-05, + "loss": 2.1102, + "step": 9804500 + }, + { + "epoch": 28.38, + "learning_rate": 3.5814393062245275e-05, + "loss": 2.1223, + "step": 9805000 + }, + { + "epoch": 28.38, + "learning_rate": 3.5813669414598e-05, + "loss": 2.1247, + "step": 9805500 + }, + { + "epoch": 28.38, + "learning_rate": 3.5812945766950726e-05, + "loss": 2.1378, + "step": 9806000 + }, + { + "epoch": 28.39, + "learning_rate": 3.581222211930345e-05, + "loss": 2.1345, + "step": 9806500 + }, + { + "epoch": 28.39, + "learning_rate": 3.581149847165617e-05, + "loss": 2.1241, + "step": 9807000 + }, + { + "epoch": 28.39, + "learning_rate": 3.581077482400889e-05, + "loss": 2.1054, + "step": 9807500 + }, + { + "epoch": 28.39, + "learning_rate": 3.581005262365691e-05, + "loss": 2.1356, + "step": 9808000 + }, + { + "epoch": 28.39, + "learning_rate": 3.580932897600964e-05, + "loss": 2.1182, + "step": 9808500 + }, + { + "epoch": 28.39, + "learning_rate": 3.580860677565765e-05, + "loss": 2.1131, + "step": 9809000 + }, + { + "epoch": 28.39, + "learning_rate": 3.5807883128010375e-05, + "loss": 2.1035, + "step": 9809500 + }, + { + "epoch": 28.4, + "learning_rate": 3.58071594803631e-05, + "loss": 2.1201, + "step": 9810000 + }, + { + "epoch": 28.4, + "learning_rate": 3.5806435832715826e-05, + "loss": 2.1187, + "step": 9810500 + }, + { + "epoch": 28.4, + "learning_rate": 3.580571218506855e-05, + "loss": 2.131, + "step": 9811000 + }, + { + "epoch": 28.4, + "learning_rate": 3.580498853742127e-05, + "loss": 2.126, + "step": 9811500 + }, + { + "epoch": 28.4, + "learning_rate": 3.580426488977399e-05, + "loss": 2.1069, + "step": 9812000 + }, + { + "epoch": 28.4, + "learning_rate": 3.5803541242126715e-05, + "loss": 2.1087, + "step": 9812500 + }, + { + "epoch": 28.4, + "learning_rate": 3.580281904177473e-05, + "loss": 2.1309, + "step": 9813000 + }, + { + "epoch": 28.41, + "learning_rate": 3.580209539412745e-05, + "loss": 2.1127, + "step": 9813500 + }, + { + "epoch": 28.41, + "learning_rate": 3.5801371746480175e-05, + "loss": 2.1318, + "step": 9814000 + }, + { + "epoch": 28.41, + "learning_rate": 3.58006480988329e-05, + "loss": 2.1047, + "step": 9814500 + }, + { + "epoch": 28.41, + "learning_rate": 3.579992734577621e-05, + "loss": 2.1024, + "step": 9815000 + }, + { + "epoch": 28.41, + "learning_rate": 3.5799203698128935e-05, + "loss": 2.1306, + "step": 9815500 + }, + { + "epoch": 28.41, + "learning_rate": 3.5798480050481664e-05, + "loss": 2.1236, + "step": 9816000 + }, + { + "epoch": 28.41, + "learning_rate": 3.5797756402834386e-05, + "loss": 2.1389, + "step": 9816500 + }, + { + "epoch": 28.42, + "learning_rate": 3.579703275518711e-05, + "loss": 2.1108, + "step": 9817000 + }, + { + "epoch": 28.42, + "learning_rate": 3.5796310554835124e-05, + "loss": 2.1106, + "step": 9817500 + }, + { + "epoch": 28.42, + "learning_rate": 3.579558690718785e-05, + "loss": 2.1048, + "step": 9818000 + }, + { + "epoch": 28.42, + "learning_rate": 3.5794863259540576e-05, + "loss": 2.1417, + "step": 9818500 + }, + { + "epoch": 28.42, + "learning_rate": 3.57941396118933e-05, + "loss": 2.1071, + "step": 9819000 + }, + { + "epoch": 28.42, + "learning_rate": 3.579341741154131e-05, + "loss": 2.1094, + "step": 9819500 + }, + { + "epoch": 28.42, + "learning_rate": 3.5792693763894036e-05, + "loss": 2.1025, + "step": 9820000 + }, + { + "epoch": 28.43, + "learning_rate": 3.579197011624676e-05, + "loss": 2.118, + "step": 9820500 + }, + { + "epoch": 28.43, + "learning_rate": 3.579124791589477e-05, + "loss": 2.1428, + "step": 9821000 + }, + { + "epoch": 28.43, + "learning_rate": 3.57905242682475e-05, + "loss": 2.1052, + "step": 9821500 + }, + { + "epoch": 28.43, + "learning_rate": 3.5789800620600225e-05, + "loss": 2.1111, + "step": 9822000 + }, + { + "epoch": 28.43, + "learning_rate": 3.578907697295295e-05, + "loss": 2.1208, + "step": 9822500 + }, + { + "epoch": 28.43, + "learning_rate": 3.578835332530567e-05, + "loss": 2.1222, + "step": 9823000 + }, + { + "epoch": 28.44, + "learning_rate": 3.57876296776584e-05, + "loss": 2.1196, + "step": 9823500 + }, + { + "epoch": 28.44, + "learning_rate": 3.578690603001112e-05, + "loss": 2.1002, + "step": 9824000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578618238236384e-05, + "loss": 2.1224, + "step": 9824500 + }, + { + "epoch": 28.44, + "learning_rate": 3.5785458734716565e-05, + "loss": 2.1259, + "step": 9825000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578473508706929e-05, + "loss": 2.1108, + "step": 9825500 + }, + { + "epoch": 28.44, + "learning_rate": 3.578401143942201e-05, + "loss": 2.1137, + "step": 9826000 + }, + { + "epoch": 28.44, + "learning_rate": 3.578328779177473e-05, + "loss": 2.1241, + "step": 9826500 + }, + { + "epoch": 28.45, + "learning_rate": 3.5782564144127454e-05, + "loss": 2.0793, + "step": 9827000 + }, + { + "epoch": 28.45, + "learning_rate": 3.5781840496480176e-05, + "loss": 2.1182, + "step": 9827500 + }, + { + "epoch": 28.45, + "learning_rate": 3.5781116848832905e-05, + "loss": 2.0881, + "step": 9828000 + }, + { + "epoch": 28.45, + "learning_rate": 3.5780396095776214e-05, + "loss": 2.1403, + "step": 9828500 + }, + { + "epoch": 28.45, + "learning_rate": 3.5779672448128936e-05, + "loss": 2.0986, + "step": 9829000 + }, + { + "epoch": 28.45, + "learning_rate": 3.577894880048166e-05, + "loss": 2.1291, + "step": 9829500 + }, + { + "epoch": 28.45, + "learning_rate": 3.577822515283438e-05, + "loss": 2.0975, + "step": 9830000 + }, + { + "epoch": 28.46, + "learning_rate": 3.57775015051871e-05, + "loss": 2.1168, + "step": 9830500 + }, + { + "epoch": 28.46, + "learning_rate": 3.577677785753983e-05, + "loss": 2.1261, + "step": 9831000 + }, + { + "epoch": 28.46, + "learning_rate": 3.5776054209892554e-05, + "loss": 2.1234, + "step": 9831500 + }, + { + "epoch": 28.46, + "learning_rate": 3.5775330562245276e-05, + "loss": 2.1235, + "step": 9832000 + }, + { + "epoch": 28.46, + "learning_rate": 3.5774606914598005e-05, + "loss": 2.1073, + "step": 9832500 + }, + { + "epoch": 28.46, + "learning_rate": 3.577388326695073e-05, + "loss": 2.1105, + "step": 9833000 + }, + { + "epoch": 28.46, + "learning_rate": 3.577315961930345e-05, + "loss": 2.1124, + "step": 9833500 + }, + { + "epoch": 28.47, + "learning_rate": 3.577243597165617e-05, + "loss": 2.1292, + "step": 9834000 + }, + { + "epoch": 28.47, + "learning_rate": 3.5771712324008894e-05, + "loss": 2.1294, + "step": 9834500 + }, + { + "epoch": 28.47, + "learning_rate": 3.5770988676361616e-05, + "loss": 2.1268, + "step": 9835000 + }, + { + "epoch": 28.47, + "learning_rate": 3.577026502871434e-05, + "loss": 2.1317, + "step": 9835500 + }, + { + "epoch": 28.47, + "learning_rate": 3.576954138106706e-05, + "loss": 2.105, + "step": 9836000 + }, + { + "epoch": 28.47, + "learning_rate": 3.576881773341978e-05, + "loss": 2.1292, + "step": 9836500 + }, + { + "epoch": 28.47, + "learning_rate": 3.5768095533067805e-05, + "loss": 2.1137, + "step": 9837000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576737333271582e-05, + "loss": 2.1355, + "step": 9837500 + }, + { + "epoch": 28.48, + "learning_rate": 3.576664968506854e-05, + "loss": 2.1267, + "step": 9838000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576592603742127e-05, + "loss": 2.1257, + "step": 9838500 + }, + { + "epoch": 28.48, + "learning_rate": 3.5765202389773994e-05, + "loss": 2.1247, + "step": 9839000 + }, + { + "epoch": 28.48, + "learning_rate": 3.576448018942201e-05, + "loss": 2.1351, + "step": 9839500 + }, + { + "epoch": 28.48, + "learning_rate": 3.576375798907003e-05, + "loss": 2.1, + "step": 9840000 + }, + { + "epoch": 28.48, + "learning_rate": 3.5763034341422755e-05, + "loss": 2.093, + "step": 9840500 + }, + { + "epoch": 28.49, + "learning_rate": 3.576231069377548e-05, + "loss": 2.1247, + "step": 9841000 + }, + { + "epoch": 28.49, + "learning_rate": 3.57615870461282e-05, + "loss": 2.112, + "step": 9841500 + }, + { + "epoch": 28.49, + "learning_rate": 3.576086339848092e-05, + "loss": 2.1111, + "step": 9842000 + }, + { + "epoch": 28.49, + "learning_rate": 3.5760139750833644e-05, + "loss": 2.1213, + "step": 9842500 + }, + { + "epoch": 28.49, + "learning_rate": 3.575941755048166e-05, + "loss": 2.1113, + "step": 9843000 + }, + { + "epoch": 28.49, + "learning_rate": 3.575869535012968e-05, + "loss": 2.1223, + "step": 9843500 + }, + { + "epoch": 28.49, + "learning_rate": 3.5757971702482404e-05, + "loss": 2.1162, + "step": 9844000 + }, + { + "epoch": 28.5, + "learning_rate": 3.5757248054835126e-05, + "loss": 2.1232, + "step": 9844500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575652440718785e-05, + "loss": 2.1208, + "step": 9845000 + }, + { + "epoch": 28.5, + "learning_rate": 3.5755802206835864e-05, + "loss": 2.0977, + "step": 9845500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575507855918859e-05, + "loss": 2.1307, + "step": 9846000 + }, + { + "epoch": 28.5, + "learning_rate": 3.5754354911541315e-05, + "loss": 2.1225, + "step": 9846500 + }, + { + "epoch": 28.5, + "learning_rate": 3.575363126389404e-05, + "loss": 2.1274, + "step": 9847000 + }, + { + "epoch": 28.5, + "learning_rate": 3.575290906354205e-05, + "loss": 2.1438, + "step": 9847500 + }, + { + "epoch": 28.51, + "learning_rate": 3.575218541589478e-05, + "loss": 2.1223, + "step": 9848000 + }, + { + "epoch": 28.51, + "learning_rate": 3.5751461768247504e-05, + "loss": 2.1202, + "step": 9848500 + }, + { + "epoch": 28.51, + "learning_rate": 3.5750738120600226e-05, + "loss": 2.1022, + "step": 9849000 + }, + { + "epoch": 28.51, + "learning_rate": 3.575001447295295e-05, + "loss": 2.104, + "step": 9849500 + }, + { + "epoch": 28.51, + "learning_rate": 3.574929082530567e-05, + "loss": 2.1203, + "step": 9850000 + }, + { + "epoch": 28.51, + "learning_rate": 3.574856717765839e-05, + "loss": 2.1342, + "step": 9850500 + }, + { + "epoch": 28.51, + "learning_rate": 3.5747843530011115e-05, + "loss": 2.1152, + "step": 9851000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574711988236384e-05, + "loss": 2.12, + "step": 9851500 + }, + { + "epoch": 28.52, + "learning_rate": 3.574639623471656e-05, + "loss": 2.1258, + "step": 9852000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574567258706928e-05, + "loss": 2.1044, + "step": 9852500 + }, + { + "epoch": 28.52, + "learning_rate": 3.5744948939422004e-05, + "loss": 2.137, + "step": 9853000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574422529177473e-05, + "loss": 2.1273, + "step": 9853500 + }, + { + "epoch": 28.52, + "learning_rate": 3.5743501644127455e-05, + "loss": 2.1254, + "step": 9854000 + }, + { + "epoch": 28.52, + "learning_rate": 3.574277944377548e-05, + "loss": 2.1192, + "step": 9854500 + }, + { + "epoch": 28.53, + "learning_rate": 3.57420557961282e-05, + "loss": 2.1, + "step": 9855000 + }, + { + "epoch": 28.53, + "learning_rate": 3.574133214848092e-05, + "loss": 2.1237, + "step": 9855500 + }, + { + "epoch": 28.53, + "learning_rate": 3.5740608500833644e-05, + "loss": 2.1088, + "step": 9856000 + }, + { + "epoch": 28.53, + "learning_rate": 3.573988919507225e-05, + "loss": 2.1301, + "step": 9856500 + }, + { + "epoch": 28.53, + "learning_rate": 3.5739165547424976e-05, + "loss": 2.1057, + "step": 9857000 + }, + { + "epoch": 28.53, + "learning_rate": 3.57384418997777e-05, + "loss": 2.1023, + "step": 9857500 + }, + { + "epoch": 28.53, + "learning_rate": 3.573771825213042e-05, + "loss": 2.1206, + "step": 9858000 + }, + { + "epoch": 28.54, + "learning_rate": 3.5736996051778436e-05, + "loss": 2.1206, + "step": 9858500 + }, + { + "epoch": 28.54, + "learning_rate": 3.573627385142646e-05, + "loss": 2.1222, + "step": 9859000 + }, + { + "epoch": 28.54, + "learning_rate": 3.573555020377918e-05, + "loss": 2.1742, + "step": 9859500 + }, + { + "epoch": 28.54, + "learning_rate": 3.57348265561319e-05, + "loss": 2.127, + "step": 9860000 + }, + { + "epoch": 28.54, + "learning_rate": 3.5734102908484625e-05, + "loss": 2.1315, + "step": 9860500 + }, + { + "epoch": 28.54, + "learning_rate": 3.573337926083735e-05, + "loss": 2.1113, + "step": 9861000 + }, + { + "epoch": 28.55, + "learning_rate": 3.5732655613190076e-05, + "loss": 2.1281, + "step": 9861500 + }, + { + "epoch": 28.55, + "learning_rate": 3.57319319655428e-05, + "loss": 2.1248, + "step": 9862000 + }, + { + "epoch": 28.55, + "learning_rate": 3.573120831789552e-05, + "loss": 2.1267, + "step": 9862500 + }, + { + "epoch": 28.55, + "learning_rate": 3.573048467024824e-05, + "loss": 2.1038, + "step": 9863000 + }, + { + "epoch": 28.55, + "learning_rate": 3.572976246989626e-05, + "loss": 2.1268, + "step": 9863500 + }, + { + "epoch": 28.55, + "learning_rate": 3.572903882224898e-05, + "loss": 2.1396, + "step": 9864000 + }, + { + "epoch": 28.55, + "learning_rate": 3.572831517460171e-05, + "loss": 2.112, + "step": 9864500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572759152695443e-05, + "loss": 2.1244, + "step": 9865000 + }, + { + "epoch": 28.56, + "learning_rate": 3.5726867879307154e-05, + "loss": 2.1293, + "step": 9865500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572614567895517e-05, + "loss": 2.1077, + "step": 9866000 + }, + { + "epoch": 28.56, + "learning_rate": 3.572542203130789e-05, + "loss": 2.099, + "step": 9866500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572469983095591e-05, + "loss": 2.1153, + "step": 9867000 + }, + { + "epoch": 28.56, + "learning_rate": 3.5723976183308636e-05, + "loss": 2.1222, + "step": 9867500 + }, + { + "epoch": 28.56, + "learning_rate": 3.572325253566136e-05, + "loss": 2.1436, + "step": 9868000 + }, + { + "epoch": 28.57, + "learning_rate": 3.572252888801408e-05, + "loss": 2.118, + "step": 9868500 + }, + { + "epoch": 28.57, + "learning_rate": 3.572180524036681e-05, + "loss": 2.1236, + "step": 9869000 + }, + { + "epoch": 28.57, + "learning_rate": 3.5721083040014825e-05, + "loss": 2.1346, + "step": 9869500 + }, + { + "epoch": 28.57, + "learning_rate": 3.572035939236755e-05, + "loss": 2.1186, + "step": 9870000 + }, + { + "epoch": 28.57, + "learning_rate": 3.571963574472027e-05, + "loss": 2.1223, + "step": 9870500 + }, + { + "epoch": 28.57, + "learning_rate": 3.571891209707299e-05, + "loss": 2.109, + "step": 9871000 + }, + { + "epoch": 28.57, + "learning_rate": 3.5718188449425714e-05, + "loss": 2.133, + "step": 9871500 + }, + { + "epoch": 28.58, + "learning_rate": 3.5717464801778436e-05, + "loss": 2.1274, + "step": 9872000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571674115413116e-05, + "loss": 2.1256, + "step": 9872500 + }, + { + "epoch": 28.58, + "learning_rate": 3.571601750648389e-05, + "loss": 2.1406, + "step": 9873000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571529385883661e-05, + "loss": 2.1038, + "step": 9873500 + }, + { + "epoch": 28.58, + "learning_rate": 3.5714571658484625e-05, + "loss": 2.1238, + "step": 9874000 + }, + { + "epoch": 28.58, + "learning_rate": 3.571384801083735e-05, + "loss": 2.111, + "step": 9874500 + }, + { + "epoch": 28.58, + "learning_rate": 3.571312436319007e-05, + "loss": 2.1085, + "step": 9875000 + }, + { + "epoch": 28.59, + "learning_rate": 3.5712402162838085e-05, + "loss": 2.1222, + "step": 9875500 + }, + { + "epoch": 28.59, + "learning_rate": 3.571167996248611e-05, + "loss": 2.1288, + "step": 9876000 + }, + { + "epoch": 28.59, + "learning_rate": 3.571095631483883e-05, + "loss": 2.1315, + "step": 9876500 + }, + { + "epoch": 28.59, + "learning_rate": 3.571023266719156e-05, + "loss": 2.1071, + "step": 9877000 + }, + { + "epoch": 28.59, + "learning_rate": 3.570950901954428e-05, + "loss": 2.1149, + "step": 9877500 + }, + { + "epoch": 28.59, + "learning_rate": 3.5708785371897003e-05, + "loss": 2.1025, + "step": 9878000 + }, + { + "epoch": 28.59, + "learning_rate": 3.5708061724249726e-05, + "loss": 2.1029, + "step": 9878500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570733807660245e-05, + "loss": 2.1142, + "step": 9879000 + }, + { + "epoch": 28.6, + "learning_rate": 3.570661442895517e-05, + "loss": 2.1267, + "step": 9879500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570589078130789e-05, + "loss": 2.1202, + "step": 9880000 + }, + { + "epoch": 28.6, + "learning_rate": 3.5705167133660615e-05, + "loss": 2.1123, + "step": 9880500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570444348601334e-05, + "loss": 2.1053, + "step": 9881000 + }, + { + "epoch": 28.6, + "learning_rate": 3.570371983836606e-05, + "loss": 2.1314, + "step": 9881500 + }, + { + "epoch": 28.6, + "learning_rate": 3.570299619071879e-05, + "loss": 2.1404, + "step": 9882000 + }, + { + "epoch": 28.61, + "learning_rate": 3.570227254307151e-05, + "loss": 2.1321, + "step": 9882500 + }, + { + "epoch": 28.61, + "learning_rate": 3.570154889542423e-05, + "loss": 2.1232, + "step": 9883000 + }, + { + "epoch": 28.61, + "learning_rate": 3.5700825247776955e-05, + "loss": 2.1228, + "step": 9883500 + }, + { + "epoch": 28.61, + "learning_rate": 3.5700101600129684e-05, + "loss": 2.1087, + "step": 9884000 + }, + { + "epoch": 28.61, + "learning_rate": 3.5699377952482406e-05, + "loss": 2.0966, + "step": 9884500 + }, + { + "epoch": 28.61, + "learning_rate": 3.569865430483513e-05, + "loss": 2.1453, + "step": 9885000 + }, + { + "epoch": 28.61, + "learning_rate": 3.569793065718785e-05, + "loss": 2.1107, + "step": 9885500 + }, + { + "epoch": 28.62, + "learning_rate": 3.5697208456835866e-05, + "loss": 2.1266, + "step": 9886000 + }, + { + "epoch": 28.62, + "learning_rate": 3.569648480918859e-05, + "loss": 2.1393, + "step": 9886500 + }, + { + "epoch": 28.62, + "learning_rate": 3.569576116154131e-05, + "loss": 2.1222, + "step": 9887000 + }, + { + "epoch": 28.62, + "learning_rate": 3.569503896118933e-05, + "loss": 2.1214, + "step": 9887500 + }, + { + "epoch": 28.62, + "learning_rate": 3.5694315313542055e-05, + "loss": 2.116, + "step": 9888000 + }, + { + "epoch": 28.62, + "learning_rate": 3.569359166589478e-05, + "loss": 2.1235, + "step": 9888500 + }, + { + "epoch": 28.62, + "learning_rate": 3.56928680182475e-05, + "loss": 2.1251, + "step": 9889000 + }, + { + "epoch": 28.63, + "learning_rate": 3.569214437060022e-05, + "loss": 2.1259, + "step": 9889500 + }, + { + "epoch": 28.63, + "learning_rate": 3.5691420722952944e-05, + "loss": 2.1277, + "step": 9890000 + }, + { + "epoch": 28.63, + "learning_rate": 3.5690697075305666e-05, + "loss": 2.1083, + "step": 9890500 + }, + { + "epoch": 28.63, + "learning_rate": 3.568997487495369e-05, + "loss": 2.089, + "step": 9891000 + }, + { + "epoch": 28.63, + "learning_rate": 3.568925122730641e-05, + "loss": 2.12, + "step": 9891500 + }, + { + "epoch": 28.63, + "learning_rate": 3.568852757965914e-05, + "loss": 2.1131, + "step": 9892000 + }, + { + "epoch": 28.63, + "learning_rate": 3.568780393201186e-05, + "loss": 2.1152, + "step": 9892500 + }, + { + "epoch": 28.64, + "learning_rate": 3.5687080284364584e-05, + "loss": 2.096, + "step": 9893000 + }, + { + "epoch": 28.64, + "learning_rate": 3.5686356636717307e-05, + "loss": 2.1328, + "step": 9893500 + }, + { + "epoch": 28.64, + "learning_rate": 3.568563298907003e-05, + "loss": 2.0819, + "step": 9894000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568490934142275e-05, + "loss": 2.119, + "step": 9894500 + }, + { + "epoch": 28.64, + "learning_rate": 3.5684187141070767e-05, + "loss": 2.1089, + "step": 9895000 + }, + { + "epoch": 28.64, + "learning_rate": 3.568346349342349e-05, + "loss": 2.1064, + "step": 9895500 + }, + { + "epoch": 28.64, + "learning_rate": 3.568273984577621e-05, + "loss": 2.1283, + "step": 9896000 + }, + { + "epoch": 28.65, + "learning_rate": 3.568201619812894e-05, + "loss": 2.1276, + "step": 9896500 + }, + { + "epoch": 28.65, + "learning_rate": 3.568129255048166e-05, + "loss": 2.1333, + "step": 9897000 + }, + { + "epoch": 28.65, + "learning_rate": 3.5680568902834384e-05, + "loss": 2.1138, + "step": 9897500 + }, + { + "epoch": 28.65, + "learning_rate": 3.567984525518711e-05, + "loss": 2.1138, + "step": 9898000 + }, + { + "epoch": 28.65, + "learning_rate": 3.5679121607539836e-05, + "loss": 2.1133, + "step": 9898500 + }, + { + "epoch": 28.65, + "learning_rate": 3.567839795989256e-05, + "loss": 2.1412, + "step": 9899000 + }, + { + "epoch": 28.65, + "learning_rate": 3.5677675759540574e-05, + "loss": 2.1368, + "step": 9899500 + }, + { + "epoch": 28.66, + "learning_rate": 3.567695355918859e-05, + "loss": 2.1011, + "step": 9900000 + }, + { + "epoch": 28.66, + "learning_rate": 3.567622991154131e-05, + "loss": 2.1404, + "step": 9900500 + }, + { + "epoch": 28.66, + "learning_rate": 3.567550626389404e-05, + "loss": 2.1189, + "step": 9901000 + }, + { + "epoch": 28.66, + "learning_rate": 3.567478261624676e-05, + "loss": 2.1181, + "step": 9901500 + }, + { + "epoch": 28.66, + "learning_rate": 3.5674058968599485e-05, + "loss": 2.1057, + "step": 9902000 + }, + { + "epoch": 28.66, + "learning_rate": 3.567333532095221e-05, + "loss": 2.1118, + "step": 9902500 + }, + { + "epoch": 28.67, + "learning_rate": 3.567261167330493e-05, + "loss": 2.1242, + "step": 9903000 + }, + { + "epoch": 28.67, + "learning_rate": 3.567188802565765e-05, + "loss": 2.0965, + "step": 9903500 + }, + { + "epoch": 28.67, + "learning_rate": 3.5671164378010374e-05, + "loss": 2.116, + "step": 9904000 + }, + { + "epoch": 28.67, + "learning_rate": 3.567044217765839e-05, + "loss": 2.1028, + "step": 9904500 + }, + { + "epoch": 28.67, + "learning_rate": 3.566971997730641e-05, + "loss": 2.1047, + "step": 9905000 + }, + { + "epoch": 28.67, + "learning_rate": 3.5668996329659134e-05, + "loss": 2.1127, + "step": 9905500 + }, + { + "epoch": 28.67, + "learning_rate": 3.566827412930715e-05, + "loss": 2.1076, + "step": 9906000 + }, + { + "epoch": 28.68, + "learning_rate": 3.566755048165988e-05, + "loss": 2.1361, + "step": 9906500 + }, + { + "epoch": 28.68, + "learning_rate": 3.56668268340126e-05, + "loss": 2.1333, + "step": 9907000 + }, + { + "epoch": 28.68, + "learning_rate": 3.5666104633660616e-05, + "loss": 2.1383, + "step": 9907500 + }, + { + "epoch": 28.68, + "learning_rate": 3.566538098601334e-05, + "loss": 2.1064, + "step": 9908000 + }, + { + "epoch": 28.68, + "learning_rate": 3.566465733836607e-05, + "loss": 2.1328, + "step": 9908500 + }, + { + "epoch": 28.68, + "learning_rate": 3.566393369071879e-05, + "loss": 2.0844, + "step": 9909000 + }, + { + "epoch": 28.68, + "learning_rate": 3.566321004307151e-05, + "loss": 2.1031, + "step": 9909500 + }, + { + "epoch": 28.69, + "learning_rate": 3.566248784271953e-05, + "loss": 2.1222, + "step": 9910000 + }, + { + "epoch": 28.69, + "learning_rate": 3.566176419507225e-05, + "loss": 2.1328, + "step": 9910500 + }, + { + "epoch": 28.69, + "learning_rate": 3.5661041994720265e-05, + "loss": 2.1261, + "step": 9911000 + }, + { + "epoch": 28.69, + "learning_rate": 3.566031834707299e-05, + "loss": 2.1293, + "step": 9911500 + }, + { + "epoch": 28.69, + "learning_rate": 3.5659594699425716e-05, + "loss": 2.1336, + "step": 9912000 + }, + { + "epoch": 28.69, + "learning_rate": 3.565887105177844e-05, + "loss": 2.1166, + "step": 9912500 + }, + { + "epoch": 28.69, + "learning_rate": 3.565814740413116e-05, + "loss": 2.1113, + "step": 9913000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565742375648388e-05, + "loss": 2.106, + "step": 9913500 + }, + { + "epoch": 28.7, + "learning_rate": 3.565670010883661e-05, + "loss": 2.1276, + "step": 9914000 + }, + { + "epoch": 28.7, + "learning_rate": 3.5655976461189334e-05, + "loss": 2.1252, + "step": 9914500 + }, + { + "epoch": 28.7, + "learning_rate": 3.565525281354206e-05, + "loss": 2.1281, + "step": 9915000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565452916589478e-05, + "loss": 2.1347, + "step": 9915500 + }, + { + "epoch": 28.7, + "learning_rate": 3.56538055182475e-05, + "loss": 2.1117, + "step": 9916000 + }, + { + "epoch": 28.7, + "learning_rate": 3.565308187060022e-05, + "loss": 2.153, + "step": 9916500 + }, + { + "epoch": 28.71, + "learning_rate": 3.5652358222952946e-05, + "loss": 2.1481, + "step": 9917000 + }, + { + "epoch": 28.71, + "learning_rate": 3.565163457530567e-05, + "loss": 2.1251, + "step": 9917500 + }, + { + "epoch": 28.71, + "learning_rate": 3.565091237495369e-05, + "loss": 2.1152, + "step": 9918000 + }, + { + "epoch": 28.71, + "learning_rate": 3.565018872730641e-05, + "loss": 2.1146, + "step": 9918500 + }, + { + "epoch": 28.71, + "learning_rate": 3.5649465079659135e-05, + "loss": 2.1237, + "step": 9919000 + }, + { + "epoch": 28.71, + "learning_rate": 3.564874143201186e-05, + "loss": 2.1236, + "step": 9919500 + }, + { + "epoch": 28.71, + "learning_rate": 3.564801778436458e-05, + "loss": 2.1139, + "step": 9920000 + }, + { + "epoch": 28.72, + "learning_rate": 3.5647295584012595e-05, + "loss": 2.123, + "step": 9920500 + }, + { + "epoch": 28.72, + "learning_rate": 3.564657338366062e-05, + "loss": 2.1333, + "step": 9921000 + }, + { + "epoch": 28.72, + "learning_rate": 3.564584973601334e-05, + "loss": 2.1068, + "step": 9921500 + }, + { + "epoch": 28.72, + "learning_rate": 3.564512608836607e-05, + "loss": 2.1423, + "step": 9922000 + }, + { + "epoch": 28.72, + "learning_rate": 3.564440244071879e-05, + "loss": 2.1118, + "step": 9922500 + }, + { + "epoch": 28.72, + "learning_rate": 3.564367879307151e-05, + "loss": 2.1491, + "step": 9923000 + }, + { + "epoch": 28.72, + "learning_rate": 3.5642955145424235e-05, + "loss": 2.1252, + "step": 9923500 + }, + { + "epoch": 28.73, + "learning_rate": 3.564223149777696e-05, + "loss": 2.1035, + "step": 9924000 + }, + { + "epoch": 28.73, + "learning_rate": 3.564150785012968e-05, + "loss": 2.1048, + "step": 9924500 + }, + { + "epoch": 28.73, + "learning_rate": 3.56407842024824e-05, + "loss": 2.1005, + "step": 9925000 + }, + { + "epoch": 28.73, + "learning_rate": 3.5640060554835124e-05, + "loss": 2.1277, + "step": 9925500 + }, + { + "epoch": 28.73, + "learning_rate": 3.5639336907187846e-05, + "loss": 2.1255, + "step": 9926000 + }, + { + "epoch": 28.73, + "learning_rate": 3.563861325954057e-05, + "loss": 2.125, + "step": 9926500 + }, + { + "epoch": 28.73, + "learning_rate": 3.563788961189329e-05, + "loss": 2.1176, + "step": 9927000 + }, + { + "epoch": 28.74, + "learning_rate": 3.563716596424602e-05, + "loss": 2.1107, + "step": 9927500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563644231659874e-05, + "loss": 2.1106, + "step": 9928000 + }, + { + "epoch": 28.74, + "learning_rate": 3.5635720116246764e-05, + "loss": 2.1156, + "step": 9928500 + }, + { + "epoch": 28.74, + "learning_rate": 3.5634996468599486e-05, + "loss": 2.1397, + "step": 9929000 + }, + { + "epoch": 28.74, + "learning_rate": 3.563427282095221e-05, + "loss": 2.1293, + "step": 9929500 + }, + { + "epoch": 28.74, + "learning_rate": 3.563354917330493e-05, + "loss": 2.1244, + "step": 9930000 + }, + { + "epoch": 28.74, + "learning_rate": 3.5632826972952946e-05, + "loss": 2.1279, + "step": 9930500 + }, + { + "epoch": 28.75, + "learning_rate": 3.563210477260097e-05, + "loss": 2.109, + "step": 9931000 + }, + { + "epoch": 28.75, + "learning_rate": 3.563138112495369e-05, + "loss": 2.1182, + "step": 9931500 + }, + { + "epoch": 28.75, + "learning_rate": 3.563065747730641e-05, + "loss": 2.1414, + "step": 9932000 + }, + { + "epoch": 28.75, + "learning_rate": 3.5629933829659135e-05, + "loss": 2.1029, + "step": 9932500 + }, + { + "epoch": 28.75, + "learning_rate": 3.562921018201186e-05, + "loss": 2.1087, + "step": 9933000 + }, + { + "epoch": 28.75, + "learning_rate": 3.562848653436458e-05, + "loss": 2.1121, + "step": 9933500 + }, + { + "epoch": 28.75, + "learning_rate": 3.56277628867173e-05, + "loss": 2.1282, + "step": 9934000 + }, + { + "epoch": 28.76, + "learning_rate": 3.5627039239070024e-05, + "loss": 2.1274, + "step": 9934500 + }, + { + "epoch": 28.76, + "learning_rate": 3.5626315591422747e-05, + "loss": 2.1175, + "step": 9935000 + }, + { + "epoch": 28.76, + "learning_rate": 3.562559194377547e-05, + "loss": 2.1191, + "step": 9935500 + }, + { + "epoch": 28.76, + "learning_rate": 3.56248682961282e-05, + "loss": 2.0979, + "step": 9936000 + }, + { + "epoch": 28.76, + "learning_rate": 3.562414609577622e-05, + "loss": 2.1195, + "step": 9936500 + }, + { + "epoch": 28.76, + "learning_rate": 3.562342244812894e-05, + "loss": 2.1039, + "step": 9937000 + }, + { + "epoch": 28.76, + "learning_rate": 3.5622698800481665e-05, + "loss": 2.1042, + "step": 9937500 + }, + { + "epoch": 28.77, + "learning_rate": 3.562197515283439e-05, + "loss": 2.1364, + "step": 9938000 + }, + { + "epoch": 28.77, + "learning_rate": 3.562125150518711e-05, + "loss": 2.1089, + "step": 9938500 + }, + { + "epoch": 28.77, + "learning_rate": 3.562052785753983e-05, + "loss": 2.1136, + "step": 9939000 + }, + { + "epoch": 28.77, + "learning_rate": 3.5619804209892554e-05, + "loss": 2.1247, + "step": 9939500 + }, + { + "epoch": 28.77, + "learning_rate": 3.5619080562245276e-05, + "loss": 2.1139, + "step": 9940000 + }, + { + "epoch": 28.77, + "learning_rate": 3.5618356914598e-05, + "loss": 2.1322, + "step": 9940500 + }, + { + "epoch": 28.78, + "learning_rate": 3.561763326695072e-05, + "loss": 2.116, + "step": 9941000 + }, + { + "epoch": 28.78, + "learning_rate": 3.561690961930344e-05, + "loss": 2.1115, + "step": 9941500 + }, + { + "epoch": 28.78, + "learning_rate": 3.561618597165617e-05, + "loss": 2.1234, + "step": 9942000 + }, + { + "epoch": 28.78, + "learning_rate": 3.5615462324008894e-05, + "loss": 2.1311, + "step": 9942500 + }, + { + "epoch": 28.78, + "learning_rate": 3.561474012365691e-05, + "loss": 2.1029, + "step": 9943000 + }, + { + "epoch": 28.78, + "learning_rate": 3.561401792330493e-05, + "loss": 2.1162, + "step": 9943500 + }, + { + "epoch": 28.78, + "learning_rate": 3.5613294275657654e-05, + "loss": 2.1395, + "step": 9944000 + }, + { + "epoch": 28.79, + "learning_rate": 3.5612570628010376e-05, + "loss": 2.1428, + "step": 9944500 + }, + { + "epoch": 28.79, + "learning_rate": 3.56118469803631e-05, + "loss": 2.1258, + "step": 9945000 + }, + { + "epoch": 28.79, + "learning_rate": 3.561112333271582e-05, + "loss": 2.1098, + "step": 9945500 + }, + { + "epoch": 28.79, + "learning_rate": 3.561039968506854e-05, + "loss": 2.1309, + "step": 9946000 + }, + { + "epoch": 28.79, + "learning_rate": 3.560967603742127e-05, + "loss": 2.1278, + "step": 9946500 + }, + { + "epoch": 28.79, + "learning_rate": 3.560895383706929e-05, + "loss": 2.116, + "step": 9947000 + }, + { + "epoch": 28.79, + "learning_rate": 3.560823018942201e-05, + "loss": 2.1039, + "step": 9947500 + }, + { + "epoch": 28.8, + "learning_rate": 3.5607507989070025e-05, + "loss": 2.1296, + "step": 9948000 + }, + { + "epoch": 28.8, + "learning_rate": 3.560678434142275e-05, + "loss": 2.1257, + "step": 9948500 + }, + { + "epoch": 28.8, + "learning_rate": 3.560606069377547e-05, + "loss": 2.1199, + "step": 9949000 + }, + { + "epoch": 28.8, + "learning_rate": 3.56053370461282e-05, + "loss": 2.1199, + "step": 9949500 + }, + { + "epoch": 28.8, + "learning_rate": 3.560461339848092e-05, + "loss": 2.1225, + "step": 9950000 + }, + { + "epoch": 28.8, + "learning_rate": 3.560388975083364e-05, + "loss": 2.1296, + "step": 9950500 + }, + { + "epoch": 28.8, + "learning_rate": 3.5603167550481665e-05, + "loss": 2.1256, + "step": 9951000 + }, + { + "epoch": 28.81, + "learning_rate": 3.560244535012968e-05, + "loss": 2.1484, + "step": 9951500 + }, + { + "epoch": 28.81, + "learning_rate": 3.56017217024824e-05, + "loss": 2.1169, + "step": 9952000 + }, + { + "epoch": 28.81, + "learning_rate": 3.5600998054835125e-05, + "loss": 2.1363, + "step": 9952500 + }, + { + "epoch": 28.81, + "learning_rate": 3.560027440718785e-05, + "loss": 2.1287, + "step": 9953000 + }, + { + "epoch": 28.81, + "learning_rate": 3.559955075954057e-05, + "loss": 2.1115, + "step": 9953500 + }, + { + "epoch": 28.81, + "learning_rate": 3.55988271118933e-05, + "loss": 2.1599, + "step": 9954000 + }, + { + "epoch": 28.81, + "learning_rate": 3.559810346424602e-05, + "loss": 2.1142, + "step": 9954500 + }, + { + "epoch": 28.82, + "learning_rate": 3.5597379816598743e-05, + "loss": 2.1238, + "step": 9955000 + }, + { + "epoch": 28.82, + "learning_rate": 3.5596656168951466e-05, + "loss": 2.1118, + "step": 9955500 + }, + { + "epoch": 28.82, + "learning_rate": 3.559593252130419e-05, + "loss": 2.1253, + "step": 9956000 + }, + { + "epoch": 28.82, + "learning_rate": 3.5595210320952203e-05, + "loss": 2.1195, + "step": 9956500 + }, + { + "epoch": 28.82, + "learning_rate": 3.5594486673304926e-05, + "loss": 2.1301, + "step": 9957000 + }, + { + "epoch": 28.82, + "learning_rate": 3.559376302565765e-05, + "loss": 2.1329, + "step": 9957500 + }, + { + "epoch": 28.82, + "learning_rate": 3.559303937801037e-05, + "loss": 2.1453, + "step": 9958000 + }, + { + "epoch": 28.83, + "learning_rate": 3.55923157303631e-05, + "loss": 2.1479, + "step": 9958500 + }, + { + "epoch": 28.83, + "learning_rate": 3.559159208271582e-05, + "loss": 2.1278, + "step": 9959000 + }, + { + "epoch": 28.83, + "learning_rate": 3.559086843506855e-05, + "loss": 2.119, + "step": 9959500 + }, + { + "epoch": 28.83, + "learning_rate": 3.559014478742127e-05, + "loss": 2.1224, + "step": 9960000 + }, + { + "epoch": 28.83, + "learning_rate": 3.5589421139773995e-05, + "loss": 2.137, + "step": 9960500 + }, + { + "epoch": 28.83, + "learning_rate": 3.558869893942201e-05, + "loss": 2.1381, + "step": 9961000 + }, + { + "epoch": 28.83, + "learning_rate": 3.558797529177473e-05, + "loss": 2.1382, + "step": 9961500 + }, + { + "epoch": 28.84, + "learning_rate": 3.5587251644127455e-05, + "loss": 2.1392, + "step": 9962000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558652799648018e-05, + "loss": 2.1117, + "step": 9962500 + }, + { + "epoch": 28.84, + "learning_rate": 3.55858043488329e-05, + "loss": 2.1267, + "step": 9963000 + }, + { + "epoch": 28.84, + "learning_rate": 3.558508070118562e-05, + "loss": 2.0989, + "step": 9963500 + }, + { + "epoch": 28.84, + "learning_rate": 3.5584358500833644e-05, + "loss": 2.1455, + "step": 9964000 + }, + { + "epoch": 28.84, + "learning_rate": 3.5583634853186366e-05, + "loss": 2.1252, + "step": 9964500 + }, + { + "epoch": 28.84, + "learning_rate": 3.558291265283438e-05, + "loss": 2.1478, + "step": 9965000 + }, + { + "epoch": 28.85, + "learning_rate": 3.5582189005187104e-05, + "loss": 2.1452, + "step": 9965500 + }, + { + "epoch": 28.85, + "learning_rate": 3.558146535753983e-05, + "loss": 2.1156, + "step": 9966000 + }, + { + "epoch": 28.85, + "learning_rate": 3.5580741709892555e-05, + "loss": 2.1415, + "step": 9966500 + }, + { + "epoch": 28.85, + "learning_rate": 3.558001806224528e-05, + "loss": 2.1377, + "step": 9967000 + }, + { + "epoch": 28.85, + "learning_rate": 3.5579294414598e-05, + "loss": 2.1316, + "step": 9967500 + }, + { + "epoch": 28.85, + "learning_rate": 3.557857076695072e-05, + "loss": 2.1107, + "step": 9968000 + }, + { + "epoch": 28.85, + "learning_rate": 3.557784711930345e-05, + "loss": 2.1122, + "step": 9968500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557712347165617e-05, + "loss": 2.1062, + "step": 9969000 + }, + { + "epoch": 28.86, + "learning_rate": 3.5576399824008895e-05, + "loss": 2.1113, + "step": 9969500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557567762365691e-05, + "loss": 2.1405, + "step": 9970000 + }, + { + "epoch": 28.86, + "learning_rate": 3.557495397600963e-05, + "loss": 2.1206, + "step": 9970500 + }, + { + "epoch": 28.86, + "learning_rate": 3.557423177565765e-05, + "loss": 2.1295, + "step": 9971000 + }, + { + "epoch": 28.86, + "learning_rate": 3.557350812801038e-05, + "loss": 2.1024, + "step": 9971500 + }, + { + "epoch": 28.86, + "learning_rate": 3.55727844803631e-05, + "loss": 2.1213, + "step": 9972000 + }, + { + "epoch": 28.87, + "learning_rate": 3.557206083271582e-05, + "loss": 2.1113, + "step": 9972500 + }, + { + "epoch": 28.87, + "learning_rate": 3.5571337185068544e-05, + "loss": 2.1388, + "step": 9973000 + }, + { + "epoch": 28.87, + "learning_rate": 3.5570613537421273e-05, + "loss": 2.1143, + "step": 9973500 + }, + { + "epoch": 28.87, + "learning_rate": 3.5569889889773996e-05, + "loss": 2.1291, + "step": 9974000 + }, + { + "epoch": 28.87, + "learning_rate": 3.556916624212672e-05, + "loss": 2.119, + "step": 9974500 + }, + { + "epoch": 28.87, + "learning_rate": 3.556844259447944e-05, + "loss": 2.1275, + "step": 9975000 + }, + { + "epoch": 28.87, + "learning_rate": 3.556771894683216e-05, + "loss": 2.1332, + "step": 9975500 + }, + { + "epoch": 28.88, + "learning_rate": 3.5566995299184885e-05, + "loss": 2.1147, + "step": 9976000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556627165153761e-05, + "loss": 2.149, + "step": 9976500 + }, + { + "epoch": 28.88, + "learning_rate": 3.556554945118563e-05, + "loss": 2.1211, + "step": 9977000 + }, + { + "epoch": 28.88, + "learning_rate": 3.556482580353835e-05, + "loss": 2.1241, + "step": 9977500 + }, + { + "epoch": 28.88, + "learning_rate": 3.5564102155891074e-05, + "loss": 2.1157, + "step": 9978000 + }, + { + "epoch": 28.88, + "learning_rate": 3.5563378508243796e-05, + "loss": 2.1271, + "step": 9978500 + }, + { + "epoch": 28.89, + "learning_rate": 3.556265630789181e-05, + "loss": 2.144, + "step": 9979000 + }, + { + "epoch": 28.89, + "learning_rate": 3.556193555483513e-05, + "loss": 2.1351, + "step": 9979500 + }, + { + "epoch": 28.89, + "learning_rate": 3.556121190718785e-05, + "loss": 2.1219, + "step": 9980000 + }, + { + "epoch": 28.89, + "learning_rate": 3.5560489706835865e-05, + "loss": 2.1085, + "step": 9980500 + }, + { + "epoch": 28.89, + "learning_rate": 3.5559766059188594e-05, + "loss": 2.1248, + "step": 9981000 + }, + { + "epoch": 28.89, + "learning_rate": 3.5559042411541316e-05, + "loss": 2.1212, + "step": 9981500 + }, + { + "epoch": 28.89, + "learning_rate": 3.555831876389404e-05, + "loss": 2.1092, + "step": 9982000 + }, + { + "epoch": 28.9, + "learning_rate": 3.555759511624676e-05, + "loss": 2.1262, + "step": 9982500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555687146859948e-05, + "loss": 2.1341, + "step": 9983000 + }, + { + "epoch": 28.9, + "learning_rate": 3.5556147820952205e-05, + "loss": 2.1081, + "step": 9983500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555542417330493e-05, + "loss": 2.1099, + "step": 9984000 + }, + { + "epoch": 28.9, + "learning_rate": 3.555470052565765e-05, + "loss": 2.1126, + "step": 9984500 + }, + { + "epoch": 28.9, + "learning_rate": 3.555397832530567e-05, + "loss": 2.1272, + "step": 9985000 + }, + { + "epoch": 28.9, + "learning_rate": 3.5553254677658394e-05, + "loss": 2.1062, + "step": 9985500 + }, + { + "epoch": 28.91, + "learning_rate": 3.5552531030011116e-05, + "loss": 2.118, + "step": 9986000 + }, + { + "epoch": 28.91, + "learning_rate": 3.555180738236384e-05, + "loss": 2.1222, + "step": 9986500 + }, + { + "epoch": 28.91, + "learning_rate": 3.555108373471656e-05, + "loss": 2.1528, + "step": 9987000 + }, + { + "epoch": 28.91, + "learning_rate": 3.555036008706928e-05, + "loss": 2.1166, + "step": 9987500 + }, + { + "epoch": 28.91, + "learning_rate": 3.5549636439422005e-05, + "loss": 2.0957, + "step": 9988000 + }, + { + "epoch": 28.91, + "learning_rate": 3.5548912791774734e-05, + "loss": 2.1213, + "step": 9988500 + }, + { + "epoch": 28.91, + "learning_rate": 3.554819059142275e-05, + "loss": 2.1021, + "step": 9989000 + }, + { + "epoch": 28.92, + "learning_rate": 3.554746694377548e-05, + "loss": 2.1138, + "step": 9989500 + }, + { + "epoch": 28.92, + "learning_rate": 3.55467432961282e-05, + "loss": 2.1155, + "step": 9990000 + }, + { + "epoch": 28.92, + "learning_rate": 3.554601964848092e-05, + "loss": 2.1273, + "step": 9990500 + }, + { + "epoch": 28.92, + "learning_rate": 3.5545296000833645e-05, + "loss": 2.1018, + "step": 9991000 + }, + { + "epoch": 28.92, + "learning_rate": 3.554457235318637e-05, + "loss": 2.1229, + "step": 9991500 + }, + { + "epoch": 28.92, + "learning_rate": 3.554384870553909e-05, + "loss": 2.1173, + "step": 9992000 + }, + { + "epoch": 28.92, + "learning_rate": 3.554312505789181e-05, + "loss": 2.1118, + "step": 9992500 + }, + { + "epoch": 28.93, + "learning_rate": 3.5542401410244534e-05, + "loss": 2.1104, + "step": 9993000 + }, + { + "epoch": 28.93, + "learning_rate": 3.554167920989255e-05, + "loss": 2.1203, + "step": 9993500 + }, + { + "epoch": 28.93, + "learning_rate": 3.554095700954057e-05, + "loss": 2.1198, + "step": 9994000 + }, + { + "epoch": 28.93, + "learning_rate": 3.5540233361893295e-05, + "loss": 2.12, + "step": 9994500 + }, + { + "epoch": 28.93, + "learning_rate": 3.5539512608836603e-05, + "loss": 2.1148, + "step": 9995000 + }, + { + "epoch": 28.93, + "learning_rate": 3.5538788961189326e-05, + "loss": 2.1509, + "step": 9995500 + }, + { + "epoch": 28.93, + "learning_rate": 3.5538065313542055e-05, + "loss": 2.1294, + "step": 9996000 + }, + { + "epoch": 28.94, + "learning_rate": 3.553734166589478e-05, + "loss": 2.1334, + "step": 9996500 + }, + { + "epoch": 28.94, + "learning_rate": 3.5536618018247506e-05, + "loss": 2.1393, + "step": 9997000 + }, + { + "epoch": 28.94, + "learning_rate": 3.553589437060023e-05, + "loss": 2.1287, + "step": 9997500 + }, + { + "epoch": 28.94, + "learning_rate": 3.553517072295295e-05, + "loss": 2.1171, + "step": 9998000 + }, + { + "epoch": 28.94, + "learning_rate": 3.553444707530567e-05, + "loss": 2.1068, + "step": 9998500 + }, + { + "epoch": 28.94, + "learning_rate": 3.5533723427658395e-05, + "loss": 2.1243, + "step": 9999000 + }, + { + "epoch": 28.94, + "learning_rate": 3.553299978001112e-05, + "loss": 2.1243, + "step": 9999500 + }, + { + "epoch": 28.95, + "learning_rate": 3.553227613236384e-05, + "loss": 2.1165, + "step": 10000000 + }, + { + "epoch": 28.95, + "learning_rate": 3.553155248471656e-05, + "loss": 2.1262, + "step": 10000500 + }, + { + "epoch": 28.95, + "learning_rate": 3.5530828837069284e-05, + "loss": 2.1372, + "step": 10001000 + }, + { + "epoch": 28.95, + "learning_rate": 3.5530105189422006e-05, + "loss": 2.113, + "step": 10001500 + }, + { + "epoch": 28.95, + "learning_rate": 3.552938154177473e-05, + "loss": 2.1161, + "step": 10002000 + }, + { + "epoch": 28.95, + "learning_rate": 3.552865789412746e-05, + "loss": 2.117, + "step": 10002500 + }, + { + "epoch": 28.95, + "learning_rate": 3.552793424648018e-05, + "loss": 2.1401, + "step": 10003000 + }, + { + "epoch": 28.96, + "learning_rate": 3.55272120461282e-05, + "loss": 2.1165, + "step": 10003500 + }, + { + "epoch": 28.96, + "learning_rate": 3.5526488398480924e-05, + "loss": 2.1269, + "step": 10004000 + }, + { + "epoch": 28.96, + "learning_rate": 3.5525764750833646e-05, + "loss": 2.1217, + "step": 10004500 + }, + { + "epoch": 28.96, + "learning_rate": 3.552504110318637e-05, + "loss": 2.1015, + "step": 10005000 + }, + { + "epoch": 28.96, + "learning_rate": 3.5524318902834384e-05, + "loss": 2.1069, + "step": 10005500 + }, + { + "epoch": 28.96, + "learning_rate": 3.5523595255187106e-05, + "loss": 2.1272, + "step": 10006000 + }, + { + "epoch": 28.96, + "learning_rate": 3.552287160753983e-05, + "loss": 2.1312, + "step": 10006500 + }, + { + "epoch": 28.97, + "learning_rate": 3.552214795989256e-05, + "loss": 2.123, + "step": 10007000 + }, + { + "epoch": 28.97, + "learning_rate": 3.552142431224528e-05, + "loss": 2.1673, + "step": 10007500 + }, + { + "epoch": 28.97, + "learning_rate": 3.5520702111893295e-05, + "loss": 2.1334, + "step": 10008000 + }, + { + "epoch": 28.97, + "learning_rate": 3.551997846424602e-05, + "loss": 2.1039, + "step": 10008500 + }, + { + "epoch": 28.97, + "learning_rate": 3.551925481659874e-05, + "loss": 2.134, + "step": 10009000 + }, + { + "epoch": 28.97, + "learning_rate": 3.551853116895146e-05, + "loss": 2.1271, + "step": 10009500 + }, + { + "epoch": 28.97, + "learning_rate": 3.5517807521304184e-05, + "loss": 2.1175, + "step": 10010000 + }, + { + "epoch": 28.98, + "learning_rate": 3.5517085320952207e-05, + "loss": 2.123, + "step": 10010500 + }, + { + "epoch": 28.98, + "learning_rate": 3.551636167330493e-05, + "loss": 2.1119, + "step": 10011000 + }, + { + "epoch": 28.98, + "learning_rate": 3.551563802565766e-05, + "loss": 2.1212, + "step": 10011500 + }, + { + "epoch": 28.98, + "learning_rate": 3.551491437801038e-05, + "loss": 2.1231, + "step": 10012000 + }, + { + "epoch": 28.98, + "learning_rate": 3.55141907303631e-05, + "loss": 2.1192, + "step": 10012500 + }, + { + "epoch": 28.98, + "learning_rate": 3.5513467082715825e-05, + "loss": 2.1198, + "step": 10013000 + }, + { + "epoch": 28.98, + "learning_rate": 3.551274343506855e-05, + "loss": 2.104, + "step": 10013500 + }, + { + "epoch": 28.99, + "learning_rate": 3.551201978742127e-05, + "loss": 2.1169, + "step": 10014000 + }, + { + "epoch": 28.99, + "learning_rate": 3.551129613977399e-05, + "loss": 2.1155, + "step": 10014500 + }, + { + "epoch": 28.99, + "learning_rate": 3.551057393942201e-05, + "loss": 2.1271, + "step": 10015000 + }, + { + "epoch": 28.99, + "learning_rate": 3.550985173907003e-05, + "loss": 2.1298, + "step": 10015500 + }, + { + "epoch": 28.99, + "learning_rate": 3.5509129538718045e-05, + "loss": 2.1075, + "step": 10016000 + }, + { + "epoch": 28.99, + "learning_rate": 3.550840589107077e-05, + "loss": 2.1213, + "step": 10016500 + }, + { + "epoch": 29.0, + "learning_rate": 3.550768369071878e-05, + "loss": 2.1133, + "step": 10017000 + }, + { + "epoch": 29.0, + "learning_rate": 3.5506960043071505e-05, + "loss": 2.1327, + "step": 10017500 + }, + { + "epoch": 29.0, + "learning_rate": 3.5506236395424234e-05, + "loss": 2.1346, + "step": 10018000 + }, + { + "epoch": 29.0, + "learning_rate": 3.5505512747776956e-05, + "loss": 2.1271, + "step": 10018500 + }, + { + "epoch": 29.0, + "eval_accuracy": 0.6662792394368734, + "eval_accuracy_mlm": 0.6307335576560936, + "eval_accuracy_nsp": 0.856817129788765, + "eval_loss": 2.1900088787078857, + "eval_runtime": 331.3704, + "eval_samples_per_second": 1316.913, + "eval_steps_per_second": 54.872, + "step": 10018688 + }, + { + "epoch": 29.0, + "learning_rate": 3.5504789100129685e-05, + "loss": 2.0982, + "step": 10019000 + }, + { + "epoch": 29.0, + "learning_rate": 3.550406545248241e-05, + "loss": 2.1009, + "step": 10019500 + }, + { + "epoch": 29.0, + "learning_rate": 3.550334180483513e-05, + "loss": 2.1048, + "step": 10020000 + }, + { + "epoch": 29.01, + "learning_rate": 3.550261815718785e-05, + "loss": 2.1176, + "step": 10020500 + }, + { + "epoch": 29.01, + "learning_rate": 3.5501894509540574e-05, + "loss": 2.0853, + "step": 10021000 + }, + { + "epoch": 29.01, + "learning_rate": 3.5501170861893296e-05, + "loss": 2.0882, + "step": 10021500 + }, + { + "epoch": 29.01, + "learning_rate": 3.550044866154131e-05, + "loss": 2.0952, + "step": 10022000 + }, + { + "epoch": 29.01, + "learning_rate": 3.5499726461189334e-05, + "loss": 2.1082, + "step": 10022500 + }, + { + "epoch": 29.01, + "learning_rate": 3.5499002813542056e-05, + "loss": 2.0918, + "step": 10023000 + }, + { + "epoch": 29.01, + "learning_rate": 3.549827916589478e-05, + "loss": 2.0879, + "step": 10023500 + }, + { + "epoch": 29.02, + "learning_rate": 3.54975555182475e-05, + "loss": 2.1145, + "step": 10024000 + }, + { + "epoch": 29.02, + "learning_rate": 3.5496833317895516e-05, + "loss": 2.1052, + "step": 10024500 + }, + { + "epoch": 29.02, + "learning_rate": 3.549610967024824e-05, + "loss": 2.1257, + "step": 10025000 + }, + { + "epoch": 29.02, + "learning_rate": 3.549538602260096e-05, + "loss": 2.092, + "step": 10025500 + }, + { + "epoch": 29.02, + "learning_rate": 3.549466237495368e-05, + "loss": 2.1166, + "step": 10026000 + }, + { + "epoch": 29.02, + "learning_rate": 3.549393872730641e-05, + "loss": 2.0745, + "step": 10026500 + }, + { + "epoch": 29.02, + "learning_rate": 3.5493216526954434e-05, + "loss": 2.1014, + "step": 10027000 + }, + { + "epoch": 29.03, + "learning_rate": 3.5492492879307157e-05, + "loss": 2.0886, + "step": 10027500 + }, + { + "epoch": 29.03, + "learning_rate": 3.549176923165988e-05, + "loss": 2.0981, + "step": 10028000 + }, + { + "epoch": 29.03, + "learning_rate": 3.54910455840126e-05, + "loss": 2.082, + "step": 10028500 + }, + { + "epoch": 29.03, + "learning_rate": 3.549032193636532e-05, + "loss": 2.1012, + "step": 10029000 + }, + { + "epoch": 29.03, + "learning_rate": 3.5489598288718045e-05, + "loss": 2.0996, + "step": 10029500 + }, + { + "epoch": 29.03, + "learning_rate": 3.548887464107077e-05, + "loss": 2.0904, + "step": 10030000 + }, + { + "epoch": 29.03, + "learning_rate": 3.548815099342349e-05, + "loss": 2.101, + "step": 10030500 + }, + { + "epoch": 29.04, + "learning_rate": 3.5487428793071506e-05, + "loss": 2.1211, + "step": 10031000 + }, + { + "epoch": 29.04, + "learning_rate": 3.5486705145424235e-05, + "loss": 2.1169, + "step": 10031500 + }, + { + "epoch": 29.04, + "learning_rate": 3.548598439236754e-05, + "loss": 2.0889, + "step": 10032000 + }, + { + "epoch": 29.04, + "learning_rate": 3.5485260744720266e-05, + "loss": 2.1029, + "step": 10032500 + }, + { + "epoch": 29.04, + "learning_rate": 3.548453709707299e-05, + "loss": 2.098, + "step": 10033000 + }, + { + "epoch": 29.04, + "learning_rate": 3.548381344942571e-05, + "loss": 2.0799, + "step": 10033500 + }, + { + "epoch": 29.04, + "learning_rate": 3.548308980177844e-05, + "loss": 2.0904, + "step": 10034000 + }, + { + "epoch": 29.05, + "learning_rate": 3.548236760142646e-05, + "loss": 2.0925, + "step": 10034500 + }, + { + "epoch": 29.05, + "learning_rate": 3.5481643953779184e-05, + "loss": 2.1226, + "step": 10035000 + }, + { + "epoch": 29.05, + "learning_rate": 3.5480920306131906e-05, + "loss": 2.1151, + "step": 10035500 + }, + { + "epoch": 29.05, + "learning_rate": 3.548019665848463e-05, + "loss": 2.0998, + "step": 10036000 + }, + { + "epoch": 29.05, + "learning_rate": 3.547947301083735e-05, + "loss": 2.12, + "step": 10036500 + }, + { + "epoch": 29.05, + "learning_rate": 3.547874936319007e-05, + "loss": 2.1126, + "step": 10037000 + }, + { + "epoch": 29.05, + "learning_rate": 3.5478025715542795e-05, + "loss": 2.084, + "step": 10037500 + }, + { + "epoch": 29.06, + "learning_rate": 3.547730206789552e-05, + "loss": 2.1149, + "step": 10038000 + }, + { + "epoch": 29.06, + "learning_rate": 3.547657842024824e-05, + "loss": 2.104, + "step": 10038500 + }, + { + "epoch": 29.06, + "learning_rate": 3.547585477260096e-05, + "loss": 2.104, + "step": 10039000 + }, + { + "epoch": 29.06, + "learning_rate": 3.5475131124953684e-05, + "loss": 2.0888, + "step": 10039500 + }, + { + "epoch": 29.06, + "learning_rate": 3.547440747730641e-05, + "loss": 2.1084, + "step": 10040000 + }, + { + "epoch": 29.06, + "learning_rate": 3.5473683829659135e-05, + "loss": 2.0827, + "step": 10040500 + }, + { + "epoch": 29.06, + "learning_rate": 3.547296162930715e-05, + "loss": 2.1294, + "step": 10041000 + }, + { + "epoch": 29.07, + "learning_rate": 3.547223798165988e-05, + "loss": 2.1124, + "step": 10041500 + }, + { + "epoch": 29.07, + "learning_rate": 3.54715143340126e-05, + "loss": 2.0882, + "step": 10042000 + }, + { + "epoch": 29.07, + "learning_rate": 3.5470790686365324e-05, + "loss": 2.0722, + "step": 10042500 + }, + { + "epoch": 29.07, + "learning_rate": 3.547006848601334e-05, + "loss": 2.1021, + "step": 10043000 + }, + { + "epoch": 29.07, + "learning_rate": 3.546934483836606e-05, + "loss": 2.0901, + "step": 10043500 + }, + { + "epoch": 29.07, + "learning_rate": 3.5468621190718784e-05, + "loss": 2.1086, + "step": 10044000 + }, + { + "epoch": 29.07, + "learning_rate": 3.546789754307151e-05, + "loss": 2.1175, + "step": 10044500 + }, + { + "epoch": 29.08, + "learning_rate": 3.546717534271953e-05, + "loss": 2.108, + "step": 10045000 + }, + { + "epoch": 29.08, + "learning_rate": 3.546645169507225e-05, + "loss": 2.0909, + "step": 10045500 + }, + { + "epoch": 29.08, + "learning_rate": 3.5465729494720266e-05, + "loss": 2.0755, + "step": 10046000 + }, + { + "epoch": 29.08, + "learning_rate": 3.546500584707299e-05, + "loss": 2.0929, + "step": 10046500 + }, + { + "epoch": 29.08, + "learning_rate": 3.546428219942571e-05, + "loss": 2.1164, + "step": 10047000 + }, + { + "epoch": 29.08, + "learning_rate": 3.546355855177843e-05, + "loss": 2.1038, + "step": 10047500 + }, + { + "epoch": 29.08, + "learning_rate": 3.546283490413116e-05, + "loss": 2.0862, + "step": 10048000 + }, + { + "epoch": 29.09, + "learning_rate": 3.5462111256483884e-05, + "loss": 2.1005, + "step": 10048500 + }, + { + "epoch": 29.09, + "learning_rate": 3.546138760883661e-05, + "loss": 2.0959, + "step": 10049000 + }, + { + "epoch": 29.09, + "learning_rate": 3.5460663961189336e-05, + "loss": 2.1033, + "step": 10049500 + }, + { + "epoch": 29.09, + "learning_rate": 3.545994031354206e-05, + "loss": 2.1339, + "step": 10050000 + }, + { + "epoch": 29.09, + "learning_rate": 3.545921666589478e-05, + "loss": 2.0974, + "step": 10050500 + }, + { + "epoch": 29.09, + "learning_rate": 3.54584930182475e-05, + "loss": 2.1015, + "step": 10051000 + }, + { + "epoch": 29.09, + "learning_rate": 3.5457769370600225e-05, + "loss": 2.1039, + "step": 10051500 + }, + { + "epoch": 29.1, + "learning_rate": 3.545704572295295e-05, + "loss": 2.0994, + "step": 10052000 + }, + { + "epoch": 29.1, + "learning_rate": 3.545632352260096e-05, + "loss": 2.0851, + "step": 10052500 + }, + { + "epoch": 29.1, + "learning_rate": 3.545560276954428e-05, + "loss": 2.0928, + "step": 10053000 + }, + { + "epoch": 29.1, + "learning_rate": 3.5454879121897e-05, + "loss": 2.094, + "step": 10053500 + }, + { + "epoch": 29.1, + "learning_rate": 3.5454156921545016e-05, + "loss": 2.1112, + "step": 10054000 + }, + { + "epoch": 29.1, + "learning_rate": 3.545343327389774e-05, + "loss": 2.0995, + "step": 10054500 + }, + { + "epoch": 29.11, + "learning_rate": 3.545270962625046e-05, + "loss": 2.0966, + "step": 10055000 + }, + { + "epoch": 29.11, + "learning_rate": 3.545198597860319e-05, + "loss": 2.0848, + "step": 10055500 + }, + { + "epoch": 29.11, + "learning_rate": 3.545126233095591e-05, + "loss": 2.0994, + "step": 10056000 + }, + { + "epoch": 29.11, + "learning_rate": 3.545053868330864e-05, + "loss": 2.1201, + "step": 10056500 + }, + { + "epoch": 29.11, + "learning_rate": 3.544981503566136e-05, + "loss": 2.1009, + "step": 10057000 + }, + { + "epoch": 29.11, + "learning_rate": 3.544909283530938e-05, + "loss": 2.0938, + "step": 10057500 + }, + { + "epoch": 29.11, + "learning_rate": 3.54483691876621e-05, + "loss": 2.1009, + "step": 10058000 + }, + { + "epoch": 29.12, + "learning_rate": 3.544764554001482e-05, + "loss": 2.1127, + "step": 10058500 + }, + { + "epoch": 29.12, + "learning_rate": 3.5446921892367545e-05, + "loss": 2.1195, + "step": 10059000 + }, + { + "epoch": 29.12, + "learning_rate": 3.544619824472027e-05, + "loss": 2.1151, + "step": 10059500 + }, + { + "epoch": 29.12, + "learning_rate": 3.544547459707299e-05, + "loss": 2.1096, + "step": 10060000 + }, + { + "epoch": 29.12, + "learning_rate": 3.544475094942571e-05, + "loss": 2.0873, + "step": 10060500 + }, + { + "epoch": 29.12, + "learning_rate": 3.5444028749073734e-05, + "loss": 2.1079, + "step": 10061000 + }, + { + "epoch": 29.12, + "learning_rate": 3.5443305101426456e-05, + "loss": 2.0924, + "step": 10061500 + }, + { + "epoch": 29.13, + "learning_rate": 3.544258145377918e-05, + "loss": 2.1137, + "step": 10062000 + }, + { + "epoch": 29.13, + "learning_rate": 3.54418578061319e-05, + "loss": 2.0996, + "step": 10062500 + }, + { + "epoch": 29.13, + "learning_rate": 3.544113415848462e-05, + "loss": 2.1074, + "step": 10063000 + }, + { + "epoch": 29.13, + "learning_rate": 3.5440410510837345e-05, + "loss": 2.0977, + "step": 10063500 + }, + { + "epoch": 29.13, + "learning_rate": 3.5439686863190074e-05, + "loss": 2.097, + "step": 10064000 + }, + { + "epoch": 29.13, + "learning_rate": 3.5438963215542796e-05, + "loss": 2.1042, + "step": 10064500 + }, + { + "epoch": 29.13, + "learning_rate": 3.543823956789552e-05, + "loss": 2.0965, + "step": 10065000 + }, + { + "epoch": 29.14, + "learning_rate": 3.543751592024824e-05, + "loss": 2.1078, + "step": 10065500 + }, + { + "epoch": 29.14, + "learning_rate": 3.543679227260096e-05, + "loss": 2.1071, + "step": 10066000 + }, + { + "epoch": 29.14, + "learning_rate": 3.543606862495369e-05, + "loss": 2.093, + "step": 10066500 + }, + { + "epoch": 29.14, + "learning_rate": 3.5435344977306414e-05, + "loss": 2.1206, + "step": 10067000 + }, + { + "epoch": 29.14, + "learning_rate": 3.5434621329659137e-05, + "loss": 2.129, + "step": 10067500 + }, + { + "epoch": 29.14, + "learning_rate": 3.543389912930715e-05, + "loss": 2.1102, + "step": 10068000 + }, + { + "epoch": 29.14, + "learning_rate": 3.5433175481659874e-05, + "loss": 2.0843, + "step": 10068500 + }, + { + "epoch": 29.15, + "learning_rate": 3.54324518340126e-05, + "loss": 2.1039, + "step": 10069000 + }, + { + "epoch": 29.15, + "learning_rate": 3.543172818636532e-05, + "loss": 2.1112, + "step": 10069500 + }, + { + "epoch": 29.15, + "learning_rate": 3.543100453871804e-05, + "loss": 2.1044, + "step": 10070000 + }, + { + "epoch": 29.15, + "learning_rate": 3.543028089107076e-05, + "loss": 2.0929, + "step": 10070500 + }, + { + "epoch": 29.15, + "learning_rate": 3.5429558690718786e-05, + "loss": 2.1076, + "step": 10071000 + }, + { + "epoch": 29.15, + "learning_rate": 3.5428835043071515e-05, + "loss": 2.1004, + "step": 10071500 + }, + { + "epoch": 29.15, + "learning_rate": 3.542811139542424e-05, + "loss": 2.1186, + "step": 10072000 + }, + { + "epoch": 29.16, + "learning_rate": 3.542738919507225e-05, + "loss": 2.1283, + "step": 10072500 + }, + { + "epoch": 29.16, + "learning_rate": 3.5426665547424975e-05, + "loss": 2.087, + "step": 10073000 + }, + { + "epoch": 29.16, + "learning_rate": 3.54259418997777e-05, + "loss": 2.0945, + "step": 10073500 + }, + { + "epoch": 29.16, + "learning_rate": 3.542521825213042e-05, + "loss": 2.0937, + "step": 10074000 + }, + { + "epoch": 29.16, + "learning_rate": 3.542449460448314e-05, + "loss": 2.1217, + "step": 10074500 + }, + { + "epoch": 29.16, + "learning_rate": 3.5423770956835864e-05, + "loss": 2.1126, + "step": 10075000 + }, + { + "epoch": 29.16, + "learning_rate": 3.542304730918859e-05, + "loss": 2.1206, + "step": 10075500 + }, + { + "epoch": 29.17, + "learning_rate": 3.542232510883661e-05, + "loss": 2.1067, + "step": 10076000 + }, + { + "epoch": 29.17, + "learning_rate": 3.542160146118933e-05, + "loss": 2.1164, + "step": 10076500 + }, + { + "epoch": 29.17, + "learning_rate": 3.542087781354205e-05, + "loss": 2.0922, + "step": 10077000 + }, + { + "epoch": 29.17, + "learning_rate": 3.5420154165894775e-05, + "loss": 2.0974, + "step": 10077500 + }, + { + "epoch": 29.17, + "learning_rate": 3.54194305182475e-05, + "loss": 2.0901, + "step": 10078000 + }, + { + "epoch": 29.17, + "learning_rate": 3.5418706870600226e-05, + "loss": 2.1171, + "step": 10078500 + }, + { + "epoch": 29.17, + "learning_rate": 3.541798322295295e-05, + "loss": 2.1219, + "step": 10079000 + }, + { + "epoch": 29.18, + "learning_rate": 3.541725957530567e-05, + "loss": 2.1036, + "step": 10079500 + }, + { + "epoch": 29.18, + "learning_rate": 3.541653592765839e-05, + "loss": 2.0928, + "step": 10080000 + }, + { + "epoch": 29.18, + "learning_rate": 3.5415812280011115e-05, + "loss": 2.0988, + "step": 10080500 + }, + { + "epoch": 29.18, + "learning_rate": 3.5415088632363844e-05, + "loss": 2.108, + "step": 10081000 + }, + { + "epoch": 29.18, + "learning_rate": 3.5414364984716566e-05, + "loss": 2.0931, + "step": 10081500 + }, + { + "epoch": 29.18, + "learning_rate": 3.541364133706929e-05, + "loss": 2.0936, + "step": 10082000 + }, + { + "epoch": 29.18, + "learning_rate": 3.5412919136717304e-05, + "loss": 2.1037, + "step": 10082500 + }, + { + "epoch": 29.19, + "learning_rate": 3.5412195489070026e-05, + "loss": 2.1157, + "step": 10083000 + }, + { + "epoch": 29.19, + "learning_rate": 3.541147184142275e-05, + "loss": 2.1112, + "step": 10083500 + }, + { + "epoch": 29.19, + "learning_rate": 3.541074819377547e-05, + "loss": 2.1344, + "step": 10084000 + }, + { + "epoch": 29.19, + "learning_rate": 3.541002599342349e-05, + "loss": 2.1111, + "step": 10084500 + }, + { + "epoch": 29.19, + "learning_rate": 3.540930379307151e-05, + "loss": 2.0956, + "step": 10085000 + }, + { + "epoch": 29.19, + "learning_rate": 3.540858014542423e-05, + "loss": 2.1105, + "step": 10085500 + }, + { + "epoch": 29.19, + "learning_rate": 3.540785649777695e-05, + "loss": 2.0927, + "step": 10086000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540713285012968e-05, + "loss": 2.0944, + "step": 10086500 + }, + { + "epoch": 29.2, + "learning_rate": 3.5406409202482404e-05, + "loss": 2.1158, + "step": 10087000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540568555483513e-05, + "loss": 2.1223, + "step": 10087500 + }, + { + "epoch": 29.2, + "learning_rate": 3.540496190718785e-05, + "loss": 2.1121, + "step": 10088000 + }, + { + "epoch": 29.2, + "learning_rate": 3.5404239706835864e-05, + "loss": 2.0998, + "step": 10088500 + }, + { + "epoch": 29.2, + "learning_rate": 3.540351750648389e-05, + "loss": 2.1016, + "step": 10089000 + }, + { + "epoch": 29.2, + "learning_rate": 3.540279385883661e-05, + "loss": 2.1194, + "step": 10089500 + }, + { + "epoch": 29.21, + "learning_rate": 3.540207021118933e-05, + "loss": 2.1171, + "step": 10090000 + }, + { + "epoch": 29.21, + "learning_rate": 3.5401346563542053e-05, + "loss": 2.1074, + "step": 10090500 + }, + { + "epoch": 29.21, + "learning_rate": 3.5400622915894776e-05, + "loss": 2.1131, + "step": 10091000 + }, + { + "epoch": 29.21, + "learning_rate": 3.53998992682475e-05, + "loss": 2.1268, + "step": 10091500 + }, + { + "epoch": 29.21, + "learning_rate": 3.539917562060022e-05, + "loss": 2.098, + "step": 10092000 + }, + { + "epoch": 29.21, + "learning_rate": 3.539845197295294e-05, + "loss": 2.1144, + "step": 10092500 + }, + { + "epoch": 29.22, + "learning_rate": 3.5397729772600965e-05, + "loss": 2.1159, + "step": 10093000 + }, + { + "epoch": 29.22, + "learning_rate": 3.539700757224898e-05, + "loss": 2.1003, + "step": 10093500 + }, + { + "epoch": 29.22, + "learning_rate": 3.539628392460171e-05, + "loss": 2.1299, + "step": 10094000 + }, + { + "epoch": 29.22, + "learning_rate": 3.539556027695443e-05, + "loss": 2.0894, + "step": 10094500 + }, + { + "epoch": 29.22, + "learning_rate": 3.5394836629307154e-05, + "loss": 2.1106, + "step": 10095000 + }, + { + "epoch": 29.22, + "learning_rate": 3.5394112981659876e-05, + "loss": 2.11, + "step": 10095500 + }, + { + "epoch": 29.22, + "learning_rate": 3.53933893340126e-05, + "loss": 2.0997, + "step": 10096000 + }, + { + "epoch": 29.23, + "learning_rate": 3.539266568636532e-05, + "loss": 2.1033, + "step": 10096500 + }, + { + "epoch": 29.23, + "learning_rate": 3.539194203871804e-05, + "loss": 2.1105, + "step": 10097000 + }, + { + "epoch": 29.23, + "learning_rate": 3.539121839107077e-05, + "loss": 2.1027, + "step": 10097500 + }, + { + "epoch": 29.23, + "learning_rate": 3.5390494743423494e-05, + "loss": 2.1187, + "step": 10098000 + }, + { + "epoch": 29.23, + "learning_rate": 3.5389771095776216e-05, + "loss": 2.1438, + "step": 10098500 + }, + { + "epoch": 29.23, + "learning_rate": 3.538904744812894e-05, + "loss": 2.131, + "step": 10099000 + }, + { + "epoch": 29.23, + "learning_rate": 3.538832380048166e-05, + "loss": 2.1234, + "step": 10099500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538760015283438e-05, + "loss": 2.1057, + "step": 10100000 + }, + { + "epoch": 29.24, + "learning_rate": 3.53868779524824e-05, + "loss": 2.1114, + "step": 10100500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538615430483513e-05, + "loss": 2.1141, + "step": 10101000 + }, + { + "epoch": 29.24, + "learning_rate": 3.538543065718785e-05, + "loss": 2.116, + "step": 10101500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538470700954057e-05, + "loss": 2.1367, + "step": 10102000 + }, + { + "epoch": 29.24, + "learning_rate": 3.5383983361893294e-05, + "loss": 2.0767, + "step": 10102500 + }, + { + "epoch": 29.24, + "learning_rate": 3.538325971424602e-05, + "loss": 2.1188, + "step": 10103000 + }, + { + "epoch": 29.25, + "learning_rate": 3.538253751389404e-05, + "loss": 2.1192, + "step": 10103500 + }, + { + "epoch": 29.25, + "learning_rate": 3.538181386624676e-05, + "loss": 2.1221, + "step": 10104000 + }, + { + "epoch": 29.25, + "learning_rate": 3.538109021859948e-05, + "loss": 2.1275, + "step": 10104500 + }, + { + "epoch": 29.25, + "learning_rate": 3.5380366570952205e-05, + "loss": 2.1072, + "step": 10105000 + }, + { + "epoch": 29.25, + "learning_rate": 3.537964292330493e-05, + "loss": 2.1255, + "step": 10105500 + }, + { + "epoch": 29.25, + "learning_rate": 3.537891927565765e-05, + "loss": 2.1018, + "step": 10106000 + }, + { + "epoch": 29.25, + "learning_rate": 3.537819562801037e-05, + "loss": 2.1032, + "step": 10106500 + }, + { + "epoch": 29.26, + "learning_rate": 3.5377471980363094e-05, + "loss": 2.1093, + "step": 10107000 + }, + { + "epoch": 29.26, + "learning_rate": 3.537674833271582e-05, + "loss": 2.1085, + "step": 10107500 + }, + { + "epoch": 29.26, + "learning_rate": 3.5376024685068546e-05, + "loss": 2.1094, + "step": 10108000 + }, + { + "epoch": 29.26, + "learning_rate": 3.5375301037421275e-05, + "loss": 2.0979, + "step": 10108500 + }, + { + "epoch": 29.26, + "learning_rate": 3.5374577389774e-05, + "loss": 2.0836, + "step": 10109000 + }, + { + "epoch": 29.26, + "learning_rate": 3.537385374212672e-05, + "loss": 2.0949, + "step": 10109500 + }, + { + "epoch": 29.26, + "learning_rate": 3.537313009447944e-05, + "loss": 2.1178, + "step": 10110000 + }, + { + "epoch": 29.27, + "learning_rate": 3.5372406446832164e-05, + "loss": 2.0883, + "step": 10110500 + }, + { + "epoch": 29.27, + "learning_rate": 3.5371682799184886e-05, + "loss": 2.1264, + "step": 10111000 + }, + { + "epoch": 29.27, + "learning_rate": 3.53709605988329e-05, + "loss": 2.09, + "step": 10111500 + }, + { + "epoch": 29.27, + "learning_rate": 3.5370236951185624e-05, + "loss": 2.0995, + "step": 10112000 + }, + { + "epoch": 29.27, + "learning_rate": 3.5369513303538346e-05, + "loss": 2.1285, + "step": 10112500 + }, + { + "epoch": 29.27, + "learning_rate": 3.536879255048166e-05, + "loss": 2.1151, + "step": 10113000 + }, + { + "epoch": 29.27, + "learning_rate": 3.5368068902834384e-05, + "loss": 2.0835, + "step": 10113500 + }, + { + "epoch": 29.28, + "learning_rate": 3.5367345255187106e-05, + "loss": 2.1137, + "step": 10114000 + }, + { + "epoch": 29.28, + "learning_rate": 3.536662160753983e-05, + "loss": 2.1143, + "step": 10114500 + }, + { + "epoch": 29.28, + "learning_rate": 3.536589795989255e-05, + "loss": 2.116, + "step": 10115000 + }, + { + "epoch": 29.28, + "learning_rate": 3.536517575954057e-05, + "loss": 2.1189, + "step": 10115500 + }, + { + "epoch": 29.28, + "learning_rate": 3.5364453559188595e-05, + "loss": 2.1276, + "step": 10116000 + }, + { + "epoch": 29.28, + "learning_rate": 3.536372991154132e-05, + "loss": 2.0934, + "step": 10116500 + }, + { + "epoch": 29.28, + "learning_rate": 3.536300626389404e-05, + "loss": 2.1313, + "step": 10117000 + }, + { + "epoch": 29.29, + "learning_rate": 3.536228261624676e-05, + "loss": 2.0721, + "step": 10117500 + }, + { + "epoch": 29.29, + "learning_rate": 3.5361558968599484e-05, + "loss": 2.0895, + "step": 10118000 + }, + { + "epoch": 29.29, + "learning_rate": 3.5360835320952206e-05, + "loss": 2.1166, + "step": 10118500 + }, + { + "epoch": 29.29, + "learning_rate": 3.536011167330493e-05, + "loss": 2.0963, + "step": 10119000 + }, + { + "epoch": 29.29, + "learning_rate": 3.535938802565765e-05, + "loss": 2.122, + "step": 10119500 + }, + { + "epoch": 29.29, + "learning_rate": 3.535866437801037e-05, + "loss": 2.1048, + "step": 10120000 + }, + { + "epoch": 29.29, + "learning_rate": 3.5357940730363095e-05, + "loss": 2.1212, + "step": 10120500 + }, + { + "epoch": 29.3, + "learning_rate": 3.5357217082715824e-05, + "loss": 2.0951, + "step": 10121000 + }, + { + "epoch": 29.3, + "learning_rate": 3.5356493435068546e-05, + "loss": 2.0906, + "step": 10121500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535576978742127e-05, + "loss": 2.1043, + "step": 10122000 + }, + { + "epoch": 29.3, + "learning_rate": 3.5355047587069284e-05, + "loss": 2.0728, + "step": 10122500 + }, + { + "epoch": 29.3, + "learning_rate": 3.53543253867173e-05, + "loss": 2.0867, + "step": 10123000 + }, + { + "epoch": 29.3, + "learning_rate": 3.535360173907003e-05, + "loss": 2.0928, + "step": 10123500 + }, + { + "epoch": 29.3, + "learning_rate": 3.535287809142275e-05, + "loss": 2.1054, + "step": 10124000 + }, + { + "epoch": 29.31, + "learning_rate": 3.535215444377547e-05, + "loss": 2.1018, + "step": 10124500 + }, + { + "epoch": 29.31, + "learning_rate": 3.535143369071879e-05, + "loss": 2.1008, + "step": 10125000 + }, + { + "epoch": 29.31, + "learning_rate": 3.535071004307151e-05, + "loss": 2.105, + "step": 10125500 + }, + { + "epoch": 29.31, + "learning_rate": 3.534998639542423e-05, + "loss": 2.1305, + "step": 10126000 + }, + { + "epoch": 29.31, + "learning_rate": 3.5349262747776956e-05, + "loss": 2.1059, + "step": 10126500 + }, + { + "epoch": 29.31, + "learning_rate": 3.534853910012968e-05, + "loss": 2.1162, + "step": 10127000 + }, + { + "epoch": 29.31, + "learning_rate": 3.53478154524824e-05, + "loss": 2.121, + "step": 10127500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534709180483512e-05, + "loss": 2.0983, + "step": 10128000 + }, + { + "epoch": 29.32, + "learning_rate": 3.534636815718785e-05, + "loss": 2.1087, + "step": 10128500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534564595683587e-05, + "loss": 2.1009, + "step": 10129000 + }, + { + "epoch": 29.32, + "learning_rate": 3.534492230918859e-05, + "loss": 2.1082, + "step": 10129500 + }, + { + "epoch": 29.32, + "learning_rate": 3.534419866154131e-05, + "loss": 2.1222, + "step": 10130000 + }, + { + "epoch": 29.32, + "learning_rate": 3.5343475013894034e-05, + "loss": 2.0979, + "step": 10130500 + }, + { + "epoch": 29.33, + "learning_rate": 3.534275136624676e-05, + "loss": 2.093, + "step": 10131000 + }, + { + "epoch": 29.33, + "learning_rate": 3.534202916589478e-05, + "loss": 2.1366, + "step": 10131500 + }, + { + "epoch": 29.33, + "learning_rate": 3.53413055182475e-05, + "loss": 2.0985, + "step": 10132000 + }, + { + "epoch": 29.33, + "learning_rate": 3.534058187060022e-05, + "loss": 2.1058, + "step": 10132500 + }, + { + "epoch": 29.33, + "learning_rate": 3.533985822295295e-05, + "loss": 2.1194, + "step": 10133000 + }, + { + "epoch": 29.33, + "learning_rate": 3.5339134575305674e-05, + "loss": 2.0976, + "step": 10133500 + }, + { + "epoch": 29.33, + "learning_rate": 3.5338410927658396e-05, + "loss": 2.0871, + "step": 10134000 + }, + { + "epoch": 29.34, + "learning_rate": 3.533768728001112e-05, + "loss": 2.1024, + "step": 10134500 + }, + { + "epoch": 29.34, + "learning_rate": 3.533696363236384e-05, + "loss": 2.1072, + "step": 10135000 + }, + { + "epoch": 29.34, + "learning_rate": 3.533623998471656e-05, + "loss": 2.1013, + "step": 10135500 + }, + { + "epoch": 29.34, + "learning_rate": 3.533551778436458e-05, + "loss": 2.105, + "step": 10136000 + }, + { + "epoch": 29.34, + "learning_rate": 3.53347941367173e-05, + "loss": 2.1342, + "step": 10136500 + }, + { + "epoch": 29.34, + "learning_rate": 3.533407193636532e-05, + "loss": 2.1156, + "step": 10137000 + }, + { + "epoch": 29.34, + "learning_rate": 3.5333348288718045e-05, + "loss": 2.103, + "step": 10137500 + }, + { + "epoch": 29.35, + "learning_rate": 3.533262608836606e-05, + "loss": 2.1184, + "step": 10138000 + }, + { + "epoch": 29.35, + "learning_rate": 3.533190244071878e-05, + "loss": 2.1141, + "step": 10138500 + }, + { + "epoch": 29.35, + "learning_rate": 3.533117879307151e-05, + "loss": 2.1148, + "step": 10139000 + }, + { + "epoch": 29.35, + "learning_rate": 3.5330455145424234e-05, + "loss": 2.1212, + "step": 10139500 + }, + { + "epoch": 29.35, + "learning_rate": 3.5329731497776956e-05, + "loss": 2.1252, + "step": 10140000 + }, + { + "epoch": 29.35, + "learning_rate": 3.532900785012968e-05, + "loss": 2.1205, + "step": 10140500 + }, + { + "epoch": 29.35, + "learning_rate": 3.53282842024824e-05, + "loss": 2.1031, + "step": 10141000 + }, + { + "epoch": 29.36, + "learning_rate": 3.532756055483512e-05, + "loss": 2.109, + "step": 10141500 + }, + { + "epoch": 29.36, + "learning_rate": 3.532683690718785e-05, + "loss": 2.0919, + "step": 10142000 + }, + { + "epoch": 29.36, + "learning_rate": 3.5326113259540574e-05, + "loss": 2.1027, + "step": 10142500 + }, + { + "epoch": 29.36, + "learning_rate": 3.5325389611893297e-05, + "loss": 2.1076, + "step": 10143000 + }, + { + "epoch": 29.36, + "learning_rate": 3.532466596424602e-05, + "loss": 2.1344, + "step": 10143500 + }, + { + "epoch": 29.36, + "learning_rate": 3.5323943763894034e-05, + "loss": 2.1049, + "step": 10144000 + }, + { + "epoch": 29.36, + "learning_rate": 3.5323220116246757e-05, + "loss": 2.0946, + "step": 10144500 + }, + { + "epoch": 29.37, + "learning_rate": 3.532249646859948e-05, + "loss": 2.1283, + "step": 10145000 + }, + { + "epoch": 29.37, + "learning_rate": 3.53217728209522e-05, + "loss": 2.1056, + "step": 10145500 + }, + { + "epoch": 29.37, + "learning_rate": 3.532104917330493e-05, + "loss": 2.0956, + "step": 10146000 + }, + { + "epoch": 29.37, + "learning_rate": 3.532032552565765e-05, + "loss": 2.1071, + "step": 10146500 + }, + { + "epoch": 29.37, + "learning_rate": 3.5319601878010374e-05, + "loss": 2.1017, + "step": 10147000 + }, + { + "epoch": 29.37, + "learning_rate": 3.5318878230363103e-05, + "loss": 2.1078, + "step": 10147500 + }, + { + "epoch": 29.37, + "learning_rate": 3.5318154582715826e-05, + "loss": 2.0792, + "step": 10148000 + }, + { + "epoch": 29.38, + "learning_rate": 3.531743093506855e-05, + "loss": 2.1029, + "step": 10148500 + }, + { + "epoch": 29.38, + "learning_rate": 3.531670728742127e-05, + "loss": 2.1167, + "step": 10149000 + }, + { + "epoch": 29.38, + "learning_rate": 3.5315985087069286e-05, + "loss": 2.1294, + "step": 10149500 + }, + { + "epoch": 29.38, + "learning_rate": 3.531526143942201e-05, + "loss": 2.1129, + "step": 10150000 + }, + { + "epoch": 29.38, + "learning_rate": 3.531453779177473e-05, + "loss": 2.1165, + "step": 10150500 + }, + { + "epoch": 29.38, + "learning_rate": 3.531381414412745e-05, + "loss": 2.1068, + "step": 10151000 + }, + { + "epoch": 29.38, + "learning_rate": 3.5313090496480175e-05, + "loss": 2.1112, + "step": 10151500 + }, + { + "epoch": 29.39, + "learning_rate": 3.5312366848832904e-05, + "loss": 2.1092, + "step": 10152000 + }, + { + "epoch": 29.39, + "learning_rate": 3.5311643201185626e-05, + "loss": 2.1378, + "step": 10152500 + }, + { + "epoch": 29.39, + "learning_rate": 3.5310919553538355e-05, + "loss": 2.1203, + "step": 10153000 + }, + { + "epoch": 29.39, + "learning_rate": 3.531019590589108e-05, + "loss": 2.0985, + "step": 10153500 + }, + { + "epoch": 29.39, + "learning_rate": 3.530947370553909e-05, + "loss": 2.0924, + "step": 10154000 + }, + { + "epoch": 29.39, + "learning_rate": 3.5308750057891815e-05, + "loss": 2.099, + "step": 10154500 + }, + { + "epoch": 29.39, + "learning_rate": 3.530802641024454e-05, + "loss": 2.1033, + "step": 10155000 + }, + { + "epoch": 29.4, + "learning_rate": 3.530730276259726e-05, + "loss": 2.1017, + "step": 10155500 + }, + { + "epoch": 29.4, + "learning_rate": 3.5306580562245275e-05, + "loss": 2.0951, + "step": 10156000 + }, + { + "epoch": 29.4, + "learning_rate": 3.5305856914598004e-05, + "loss": 2.1089, + "step": 10156500 + }, + { + "epoch": 29.4, + "learning_rate": 3.5305133266950726e-05, + "loss": 2.1044, + "step": 10157000 + }, + { + "epoch": 29.4, + "learning_rate": 3.530440961930345e-05, + "loss": 2.103, + "step": 10157500 + }, + { + "epoch": 29.4, + "learning_rate": 3.530368597165617e-05, + "loss": 2.1001, + "step": 10158000 + }, + { + "epoch": 29.4, + "learning_rate": 3.5302963771304186e-05, + "loss": 2.1024, + "step": 10158500 + }, + { + "epoch": 29.41, + "learning_rate": 3.53022430182475e-05, + "loss": 2.1164, + "step": 10159000 + }, + { + "epoch": 29.41, + "learning_rate": 3.5301519370600224e-05, + "loss": 2.1045, + "step": 10159500 + }, + { + "epoch": 29.41, + "learning_rate": 3.5300795722952946e-05, + "loss": 2.1154, + "step": 10160000 + }, + { + "epoch": 29.41, + "learning_rate": 3.530007207530567e-05, + "loss": 2.0998, + "step": 10160500 + }, + { + "epoch": 29.41, + "learning_rate": 3.52993484276584e-05, + "loss": 2.1152, + "step": 10161000 + }, + { + "epoch": 29.41, + "learning_rate": 3.529862478001112e-05, + "loss": 2.0962, + "step": 10161500 + }, + { + "epoch": 29.41, + "learning_rate": 3.529790113236384e-05, + "loss": 2.1202, + "step": 10162000 + }, + { + "epoch": 29.42, + "learning_rate": 3.5297177484716564e-05, + "loss": 2.1352, + "step": 10162500 + }, + { + "epoch": 29.42, + "learning_rate": 3.5296453837069287e-05, + "loss": 2.1282, + "step": 10163000 + }, + { + "epoch": 29.42, + "learning_rate": 3.529573018942201e-05, + "loss": 2.1083, + "step": 10163500 + }, + { + "epoch": 29.42, + "learning_rate": 3.529500654177473e-05, + "loss": 2.1109, + "step": 10164000 + }, + { + "epoch": 29.42, + "learning_rate": 3.529428289412745e-05, + "loss": 2.0879, + "step": 10164500 + }, + { + "epoch": 29.42, + "learning_rate": 3.529355924648018e-05, + "loss": 2.0928, + "step": 10165000 + }, + { + "epoch": 29.42, + "learning_rate": 3.52928370461282e-05, + "loss": 2.126, + "step": 10165500 + }, + { + "epoch": 29.43, + "learning_rate": 3.529211484577621e-05, + "loss": 2.1132, + "step": 10166000 + }, + { + "epoch": 29.43, + "learning_rate": 3.5291391198128936e-05, + "loss": 2.1046, + "step": 10166500 + }, + { + "epoch": 29.43, + "learning_rate": 3.529066755048166e-05, + "loss": 2.1224, + "step": 10167000 + }, + { + "epoch": 29.43, + "learning_rate": 3.528994390283438e-05, + "loss": 2.1248, + "step": 10167500 + }, + { + "epoch": 29.43, + "learning_rate": 3.52892202551871e-05, + "loss": 2.0978, + "step": 10168000 + }, + { + "epoch": 29.43, + "learning_rate": 3.528849805483513e-05, + "loss": 2.11, + "step": 10168500 + }, + { + "epoch": 29.44, + "learning_rate": 3.528777585448315e-05, + "loss": 2.1371, + "step": 10169000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528705220683587e-05, + "loss": 2.1095, + "step": 10169500 + }, + { + "epoch": 29.44, + "learning_rate": 3.528632855918859e-05, + "loss": 2.1371, + "step": 10170000 + }, + { + "epoch": 29.44, + "learning_rate": 3.5285604911541314e-05, + "loss": 2.1178, + "step": 10170500 + }, + { + "epoch": 29.44, + "learning_rate": 3.5284881263894036e-05, + "loss": 2.1129, + "step": 10171000 + }, + { + "epoch": 29.44, + "learning_rate": 3.528415761624676e-05, + "loss": 2.1197, + "step": 10171500 + }, + { + "epoch": 29.44, + "learning_rate": 3.528343396859948e-05, + "loss": 2.1004, + "step": 10172000 + }, + { + "epoch": 29.45, + "learning_rate": 3.528271466283809e-05, + "loss": 2.106, + "step": 10172500 + }, + { + "epoch": 29.45, + "learning_rate": 3.528199101519081e-05, + "loss": 2.1093, + "step": 10173000 + }, + { + "epoch": 29.45, + "learning_rate": 3.5281267367543534e-05, + "loss": 2.1193, + "step": 10173500 + }, + { + "epoch": 29.45, + "learning_rate": 3.5280543719896256e-05, + "loss": 2.0901, + "step": 10174000 + }, + { + "epoch": 29.45, + "learning_rate": 3.527982007224898e-05, + "loss": 2.1182, + "step": 10174500 + }, + { + "epoch": 29.45, + "learning_rate": 3.527909642460171e-05, + "loss": 2.1215, + "step": 10175000 + }, + { + "epoch": 29.45, + "learning_rate": 3.527837277695443e-05, + "loss": 2.0913, + "step": 10175500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527764912930715e-05, + "loss": 2.1154, + "step": 10176000 + }, + { + "epoch": 29.46, + "learning_rate": 3.527692548165988e-05, + "loss": 2.0852, + "step": 10176500 + }, + { + "epoch": 29.46, + "learning_rate": 3.52762018340126e-05, + "loss": 2.0995, + "step": 10177000 + }, + { + "epoch": 29.46, + "learning_rate": 3.5275478186365325e-05, + "loss": 2.1336, + "step": 10177500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527475453871805e-05, + "loss": 2.1288, + "step": 10178000 + }, + { + "epoch": 29.46, + "learning_rate": 3.527403089107077e-05, + "loss": 2.1193, + "step": 10178500 + }, + { + "epoch": 29.46, + "learning_rate": 3.527330724342349e-05, + "loss": 2.1213, + "step": 10179000 + }, + { + "epoch": 29.47, + "learning_rate": 3.5272583595776214e-05, + "loss": 2.1178, + "step": 10179500 + }, + { + "epoch": 29.47, + "learning_rate": 3.5271859948128936e-05, + "loss": 2.1031, + "step": 10180000 + }, + { + "epoch": 29.47, + "learning_rate": 3.527113630048166e-05, + "loss": 2.1057, + "step": 10180500 + }, + { + "epoch": 29.47, + "learning_rate": 3.527041265283438e-05, + "loss": 2.0922, + "step": 10181000 + }, + { + "epoch": 29.47, + "learning_rate": 3.526968900518711e-05, + "loss": 2.0927, + "step": 10181500 + }, + { + "epoch": 29.47, + "learning_rate": 3.5268966804835125e-05, + "loss": 2.1036, + "step": 10182000 + }, + { + "epoch": 29.47, + "learning_rate": 3.526824315718785e-05, + "loss": 2.1107, + "step": 10182500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526751950954057e-05, + "loss": 2.1316, + "step": 10183000 + }, + { + "epoch": 29.48, + "learning_rate": 3.52667958618933e-05, + "loss": 2.1473, + "step": 10183500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526607221424602e-05, + "loss": 2.1143, + "step": 10184000 + }, + { + "epoch": 29.48, + "learning_rate": 3.526535001389404e-05, + "loss": 2.1082, + "step": 10184500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526462636624676e-05, + "loss": 2.1121, + "step": 10185000 + }, + { + "epoch": 29.48, + "learning_rate": 3.526390271859948e-05, + "loss": 2.0948, + "step": 10185500 + }, + { + "epoch": 29.48, + "learning_rate": 3.526317907095221e-05, + "loss": 2.1285, + "step": 10186000 + }, + { + "epoch": 29.49, + "learning_rate": 3.526245542330493e-05, + "loss": 2.1175, + "step": 10186500 + }, + { + "epoch": 29.49, + "learning_rate": 3.526173322295295e-05, + "loss": 2.1139, + "step": 10187000 + }, + { + "epoch": 29.49, + "learning_rate": 3.526100957530567e-05, + "loss": 2.0972, + "step": 10187500 + }, + { + "epoch": 29.49, + "learning_rate": 3.526028592765839e-05, + "loss": 2.1153, + "step": 10188000 + }, + { + "epoch": 29.49, + "learning_rate": 3.5259562280011115e-05, + "loss": 2.0871, + "step": 10188500 + }, + { + "epoch": 29.49, + "learning_rate": 3.525883863236384e-05, + "loss": 2.104, + "step": 10189000 + }, + { + "epoch": 29.49, + "learning_rate": 3.525811498471656e-05, + "loss": 2.1058, + "step": 10189500 + }, + { + "epoch": 29.5, + "learning_rate": 3.525739133706928e-05, + "loss": 2.1092, + "step": 10190000 + }, + { + "epoch": 29.5, + "learning_rate": 3.5256669136717304e-05, + "loss": 2.1488, + "step": 10190500 + }, + { + "epoch": 29.5, + "learning_rate": 3.525594693636532e-05, + "loss": 2.1031, + "step": 10191000 + }, + { + "epoch": 29.5, + "learning_rate": 3.525522473601334e-05, + "loss": 2.1264, + "step": 10191500 + }, + { + "epoch": 29.5, + "learning_rate": 3.5254501088366064e-05, + "loss": 2.1004, + "step": 10192000 + }, + { + "epoch": 29.5, + "learning_rate": 3.5253777440718786e-05, + "loss": 2.1073, + "step": 10192500 + }, + { + "epoch": 29.5, + "learning_rate": 3.525305379307151e-05, + "loss": 2.1287, + "step": 10193000 + }, + { + "epoch": 29.51, + "learning_rate": 3.525233014542423e-05, + "loss": 2.1277, + "step": 10193500 + }, + { + "epoch": 29.51, + "learning_rate": 3.525160649777696e-05, + "loss": 2.1333, + "step": 10194000 + }, + { + "epoch": 29.51, + "learning_rate": 3.525088285012968e-05, + "loss": 2.0879, + "step": 10194500 + }, + { + "epoch": 29.51, + "learning_rate": 3.52501606497777e-05, + "loss": 2.1002, + "step": 10195000 + }, + { + "epoch": 29.51, + "learning_rate": 3.524943700213042e-05, + "loss": 2.1175, + "step": 10195500 + }, + { + "epoch": 29.51, + "learning_rate": 3.524871335448314e-05, + "loss": 2.1135, + "step": 10196000 + }, + { + "epoch": 29.51, + "learning_rate": 3.5247989706835864e-05, + "loss": 2.1047, + "step": 10196500 + }, + { + "epoch": 29.52, + "learning_rate": 3.5247266059188586e-05, + "loss": 2.1317, + "step": 10197000 + }, + { + "epoch": 29.52, + "learning_rate": 3.524654241154131e-05, + "loss": 2.1054, + "step": 10197500 + }, + { + "epoch": 29.52, + "learning_rate": 3.524581876389403e-05, + "loss": 2.1159, + "step": 10198000 + }, + { + "epoch": 29.52, + "learning_rate": 3.524509511624676e-05, + "loss": 2.107, + "step": 10198500 + }, + { + "epoch": 29.52, + "learning_rate": 3.524437146859948e-05, + "loss": 2.1379, + "step": 10199000 + }, + { + "epoch": 29.52, + "learning_rate": 3.524364782095221e-05, + "loss": 2.1186, + "step": 10199500 + }, + { + "epoch": 29.52, + "learning_rate": 3.524292417330493e-05, + "loss": 2.1054, + "step": 10200000 + }, + { + "epoch": 29.53, + "learning_rate": 3.5242200525657655e-05, + "loss": 2.1386, + "step": 10200500 + }, + { + "epoch": 29.53, + "learning_rate": 3.524147687801038e-05, + "loss": 2.0921, + "step": 10201000 + }, + { + "epoch": 29.53, + "learning_rate": 3.524075467765839e-05, + "loss": 2.1101, + "step": 10201500 + }, + { + "epoch": 29.53, + "learning_rate": 3.5240031030011115e-05, + "loss": 2.1308, + "step": 10202000 + }, + { + "epoch": 29.53, + "learning_rate": 3.523930882965914e-05, + "loss": 2.1321, + "step": 10202500 + }, + { + "epoch": 29.53, + "learning_rate": 3.523858518201186e-05, + "loss": 2.102, + "step": 10203000 + }, + { + "epoch": 29.53, + "learning_rate": 3.523786153436458e-05, + "loss": 2.1208, + "step": 10203500 + }, + { + "epoch": 29.54, + "learning_rate": 3.5237137886717304e-05, + "loss": 2.1341, + "step": 10204000 + }, + { + "epoch": 29.54, + "learning_rate": 3.523641423907003e-05, + "loss": 2.1154, + "step": 10204500 + }, + { + "epoch": 29.54, + "learning_rate": 3.523569059142275e-05, + "loss": 2.1375, + "step": 10205000 + }, + { + "epoch": 29.54, + "learning_rate": 3.5234968391070765e-05, + "loss": 2.1052, + "step": 10205500 + }, + { + "epoch": 29.54, + "learning_rate": 3.5234244743423494e-05, + "loss": 2.1009, + "step": 10206000 + }, + { + "epoch": 29.54, + "learning_rate": 3.5233521095776216e-05, + "loss": 2.0865, + "step": 10206500 + }, + { + "epoch": 29.55, + "learning_rate": 3.523279744812894e-05, + "loss": 2.1158, + "step": 10207000 + }, + { + "epoch": 29.55, + "learning_rate": 3.523207380048166e-05, + "loss": 2.1036, + "step": 10207500 + }, + { + "epoch": 29.55, + "learning_rate": 3.523135015283439e-05, + "loss": 2.0814, + "step": 10208000 + }, + { + "epoch": 29.55, + "learning_rate": 3.523062650518711e-05, + "loss": 2.1023, + "step": 10208500 + }, + { + "epoch": 29.55, + "learning_rate": 3.5229902857539834e-05, + "loss": 2.1272, + "step": 10209000 + }, + { + "epoch": 29.55, + "learning_rate": 3.5229179209892556e-05, + "loss": 2.1229, + "step": 10209500 + }, + { + "epoch": 29.55, + "learning_rate": 3.522845556224528e-05, + "loss": 2.1319, + "step": 10210000 + }, + { + "epoch": 29.56, + "learning_rate": 3.5227731914598e-05, + "loss": 2.0992, + "step": 10210500 + }, + { + "epoch": 29.56, + "learning_rate": 3.5227009714246016e-05, + "loss": 2.1075, + "step": 10211000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522628606659874e-05, + "loss": 2.1068, + "step": 10211500 + }, + { + "epoch": 29.56, + "learning_rate": 3.522556241895146e-05, + "loss": 2.092, + "step": 10212000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522483877130419e-05, + "loss": 2.1199, + "step": 10212500 + }, + { + "epoch": 29.56, + "learning_rate": 3.522411512365691e-05, + "loss": 2.1159, + "step": 10213000 + }, + { + "epoch": 29.56, + "learning_rate": 3.522339147600964e-05, + "loss": 2.1518, + "step": 10213500 + }, + { + "epoch": 29.57, + "learning_rate": 3.5222669275657656e-05, + "loss": 2.1365, + "step": 10214000 + }, + { + "epoch": 29.57, + "learning_rate": 3.522194562801038e-05, + "loss": 2.1085, + "step": 10214500 + }, + { + "epoch": 29.57, + "learning_rate": 3.5221223427658394e-05, + "loss": 2.0976, + "step": 10215000 + }, + { + "epoch": 29.57, + "learning_rate": 3.5220499780011116e-05, + "loss": 2.1006, + "step": 10215500 + }, + { + "epoch": 29.57, + "learning_rate": 3.521977613236384e-05, + "loss": 2.1127, + "step": 10216000 + }, + { + "epoch": 29.57, + "learning_rate": 3.521905248471656e-05, + "loss": 2.1273, + "step": 10216500 + }, + { + "epoch": 29.57, + "learning_rate": 3.521833028436458e-05, + "loss": 2.1, + "step": 10217000 + }, + { + "epoch": 29.58, + "learning_rate": 3.5217606636717305e-05, + "loss": 2.1045, + "step": 10217500 + }, + { + "epoch": 29.58, + "learning_rate": 3.521688298907003e-05, + "loss": 2.1024, + "step": 10218000 + }, + { + "epoch": 29.58, + "learning_rate": 3.521615934142275e-05, + "loss": 2.1136, + "step": 10218500 + }, + { + "epoch": 29.58, + "learning_rate": 3.521543569377547e-05, + "loss": 2.0962, + "step": 10219000 + }, + { + "epoch": 29.58, + "learning_rate": 3.5214712046128194e-05, + "loss": 2.1329, + "step": 10219500 + }, + { + "epoch": 29.58, + "learning_rate": 3.5213988398480916e-05, + "loss": 2.1099, + "step": 10220000 + }, + { + "epoch": 29.58, + "learning_rate": 3.521326475083364e-05, + "loss": 2.0751, + "step": 10220500 + }, + { + "epoch": 29.59, + "learning_rate": 3.521254110318637e-05, + "loss": 2.1279, + "step": 10221000 + }, + { + "epoch": 29.59, + "learning_rate": 3.521181890283439e-05, + "loss": 2.1184, + "step": 10221500 + }, + { + "epoch": 29.59, + "learning_rate": 3.521109525518711e-05, + "loss": 2.108, + "step": 10222000 + }, + { + "epoch": 29.59, + "learning_rate": 3.5210371607539834e-05, + "loss": 2.1202, + "step": 10222500 + }, + { + "epoch": 29.59, + "learning_rate": 3.520964795989256e-05, + "loss": 2.1204, + "step": 10223000 + }, + { + "epoch": 29.59, + "learning_rate": 3.520892431224528e-05, + "loss": 2.116, + "step": 10223500 + }, + { + "epoch": 29.59, + "learning_rate": 3.5208200664598e-05, + "loss": 2.0814, + "step": 10224000 + }, + { + "epoch": 29.6, + "learning_rate": 3.5207477016950723e-05, + "loss": 2.1033, + "step": 10224500 + }, + { + "epoch": 29.6, + "learning_rate": 3.5206753369303446e-05, + "loss": 2.1197, + "step": 10225000 + }, + { + "epoch": 29.6, + "learning_rate": 3.520602972165617e-05, + "loss": 2.1186, + "step": 10225500 + }, + { + "epoch": 29.6, + "learning_rate": 3.520530607400889e-05, + "loss": 2.1058, + "step": 10226000 + }, + { + "epoch": 29.6, + "learning_rate": 3.520458242636161e-05, + "loss": 2.1148, + "step": 10226500 + }, + { + "epoch": 29.6, + "learning_rate": 3.5203860226009635e-05, + "loss": 2.1362, + "step": 10227000 + }, + { + "epoch": 29.6, + "learning_rate": 3.520313802565765e-05, + "loss": 2.1019, + "step": 10227500 + }, + { + "epoch": 29.61, + "learning_rate": 3.520241437801037e-05, + "loss": 2.1289, + "step": 10228000 + }, + { + "epoch": 29.61, + "learning_rate": 3.52016907303631e-05, + "loss": 2.0993, + "step": 10228500 + }, + { + "epoch": 29.61, + "learning_rate": 3.520096853001112e-05, + "loss": 2.1159, + "step": 10229000 + }, + { + "epoch": 29.61, + "learning_rate": 3.520024488236384e-05, + "loss": 2.1293, + "step": 10229500 + }, + { + "epoch": 29.61, + "learning_rate": 3.519952123471656e-05, + "loss": 2.1072, + "step": 10230000 + }, + { + "epoch": 29.61, + "learning_rate": 3.519879758706929e-05, + "loss": 2.1008, + "step": 10230500 + }, + { + "epoch": 29.61, + "learning_rate": 3.519807393942201e-05, + "loss": 2.1046, + "step": 10231000 + }, + { + "epoch": 29.62, + "learning_rate": 3.5197350291774735e-05, + "loss": 2.0959, + "step": 10231500 + }, + { + "epoch": 29.62, + "learning_rate": 3.519662664412746e-05, + "loss": 2.132, + "step": 10232000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519590299648018e-05, + "loss": 2.1051, + "step": 10232500 + }, + { + "epoch": 29.62, + "learning_rate": 3.51951793488329e-05, + "loss": 2.1322, + "step": 10233000 + }, + { + "epoch": 29.62, + "learning_rate": 3.5194455701185624e-05, + "loss": 2.0929, + "step": 10233500 + }, + { + "epoch": 29.62, + "learning_rate": 3.5193732053538346e-05, + "loss": 2.1258, + "step": 10234000 + }, + { + "epoch": 29.62, + "learning_rate": 3.519300840589107e-05, + "loss": 2.1223, + "step": 10234500 + }, + { + "epoch": 29.63, + "learning_rate": 3.519228475824379e-05, + "loss": 2.1199, + "step": 10235000 + }, + { + "epoch": 29.63, + "learning_rate": 3.519156111059652e-05, + "loss": 2.1067, + "step": 10235500 + }, + { + "epoch": 29.63, + "learning_rate": 3.519083746294924e-05, + "loss": 2.108, + "step": 10236000 + }, + { + "epoch": 29.63, + "learning_rate": 3.5190113815301964e-05, + "loss": 2.1081, + "step": 10236500 + }, + { + "epoch": 29.63, + "learning_rate": 3.518939016765469e-05, + "loss": 2.1255, + "step": 10237000 + }, + { + "epoch": 29.63, + "learning_rate": 3.5188666520007415e-05, + "loss": 2.1109, + "step": 10237500 + }, + { + "epoch": 29.63, + "learning_rate": 3.518794287236014e-05, + "loss": 2.1141, + "step": 10238000 + }, + { + "epoch": 29.64, + "learning_rate": 3.518721922471286e-05, + "loss": 2.105, + "step": 10238500 + }, + { + "epoch": 29.64, + "learning_rate": 3.5186497024360875e-05, + "loss": 2.106, + "step": 10239000 + }, + { + "epoch": 29.64, + "learning_rate": 3.51857733767136e-05, + "loss": 2.1004, + "step": 10239500 + }, + { + "epoch": 29.64, + "learning_rate": 3.518505117636161e-05, + "loss": 2.098, + "step": 10240000 + }, + { + "epoch": 29.64, + "learning_rate": 3.5184328976009635e-05, + "loss": 2.1135, + "step": 10240500 + }, + { + "epoch": 29.64, + "learning_rate": 3.518360532836236e-05, + "loss": 2.1278, + "step": 10241000 + }, + { + "epoch": 29.64, + "learning_rate": 3.518288312801037e-05, + "loss": 2.1082, + "step": 10241500 + }, + { + "epoch": 29.65, + "learning_rate": 3.518216092765839e-05, + "loss": 2.1071, + "step": 10242000 + }, + { + "epoch": 29.65, + "learning_rate": 3.518143728001112e-05, + "loss": 2.1356, + "step": 10242500 + }, + { + "epoch": 29.65, + "learning_rate": 3.518071363236384e-05, + "loss": 2.1168, + "step": 10243000 + }, + { + "epoch": 29.65, + "learning_rate": 3.517998998471657e-05, + "loss": 2.1038, + "step": 10243500 + }, + { + "epoch": 29.65, + "learning_rate": 3.517926633706929e-05, + "loss": 2.1229, + "step": 10244000 + }, + { + "epoch": 29.65, + "learning_rate": 3.5178542689422014e-05, + "loss": 2.1182, + "step": 10244500 + }, + { + "epoch": 29.66, + "learning_rate": 3.5177819041774736e-05, + "loss": 2.1204, + "step": 10245000 + }, + { + "epoch": 29.66, + "learning_rate": 3.517709539412746e-05, + "loss": 2.13, + "step": 10245500 + }, + { + "epoch": 29.66, + "learning_rate": 3.517637174648018e-05, + "loss": 2.126, + "step": 10246000 + }, + { + "epoch": 29.66, + "learning_rate": 3.51756480988329e-05, + "loss": 2.1271, + "step": 10246500 + }, + { + "epoch": 29.66, + "learning_rate": 3.5174924451185625e-05, + "loss": 2.1293, + "step": 10247000 + }, + { + "epoch": 29.66, + "learning_rate": 3.517420080353835e-05, + "loss": 2.1183, + "step": 10247500 + }, + { + "epoch": 29.66, + "learning_rate": 3.517347715589107e-05, + "loss": 2.0908, + "step": 10248000 + }, + { + "epoch": 29.67, + "learning_rate": 3.517275350824379e-05, + "loss": 2.1158, + "step": 10248500 + }, + { + "epoch": 29.67, + "learning_rate": 3.5172031307891814e-05, + "loss": 2.123, + "step": 10249000 + }, + { + "epoch": 29.67, + "learning_rate": 3.5171307660244536e-05, + "loss": 2.1139, + "step": 10249500 + }, + { + "epoch": 29.67, + "learning_rate": 3.517058401259726e-05, + "loss": 2.1062, + "step": 10250000 + }, + { + "epoch": 29.67, + "learning_rate": 3.516986036494999e-05, + "loss": 2.1003, + "step": 10250500 + }, + { + "epoch": 29.67, + "learning_rate": 3.516913671730271e-05, + "loss": 2.1267, + "step": 10251000 + }, + { + "epoch": 29.67, + "learning_rate": 3.5168414516950725e-05, + "loss": 2.1204, + "step": 10251500 + }, + { + "epoch": 29.68, + "learning_rate": 3.516769086930345e-05, + "loss": 2.0959, + "step": 10252000 + }, + { + "epoch": 29.68, + "learning_rate": 3.516696866895147e-05, + "loss": 2.096, + "step": 10252500 + }, + { + "epoch": 29.68, + "learning_rate": 3.516624502130419e-05, + "loss": 2.1229, + "step": 10253000 + }, + { + "epoch": 29.68, + "learning_rate": 3.5165521373656914e-05, + "loss": 2.1272, + "step": 10253500 + }, + { + "epoch": 29.68, + "learning_rate": 3.5164797726009636e-05, + "loss": 2.0996, + "step": 10254000 + }, + { + "epoch": 29.68, + "learning_rate": 3.516407407836236e-05, + "loss": 2.1232, + "step": 10254500 + }, + { + "epoch": 29.68, + "learning_rate": 3.5163351878010374e-05, + "loss": 2.1184, + "step": 10255000 + }, + { + "epoch": 29.69, + "learning_rate": 3.5162628230363096e-05, + "loss": 2.1159, + "step": 10255500 + }, + { + "epoch": 29.69, + "learning_rate": 3.516190458271582e-05, + "loss": 2.1139, + "step": 10256000 + }, + { + "epoch": 29.69, + "learning_rate": 3.516118093506854e-05, + "loss": 2.1479, + "step": 10256500 + }, + { + "epoch": 29.69, + "learning_rate": 3.516045728742127e-05, + "loss": 2.0913, + "step": 10257000 + }, + { + "epoch": 29.69, + "learning_rate": 3.515973363977399e-05, + "loss": 2.0999, + "step": 10257500 + }, + { + "epoch": 29.69, + "learning_rate": 3.515900999212672e-05, + "loss": 2.108, + "step": 10258000 + }, + { + "epoch": 29.69, + "learning_rate": 3.515828634447944e-05, + "loss": 2.1207, + "step": 10258500 + }, + { + "epoch": 29.7, + "learning_rate": 3.5157562696832165e-05, + "loss": 2.1045, + "step": 10259000 + }, + { + "epoch": 29.7, + "learning_rate": 3.515683904918489e-05, + "loss": 2.0791, + "step": 10259500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515611540153761e-05, + "loss": 2.0841, + "step": 10260000 + }, + { + "epoch": 29.7, + "learning_rate": 3.515539175389033e-05, + "loss": 2.0996, + "step": 10260500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515466955353835e-05, + "loss": 2.1166, + "step": 10261000 + }, + { + "epoch": 29.7, + "learning_rate": 3.515394590589107e-05, + "loss": 2.1033, + "step": 10261500 + }, + { + "epoch": 29.7, + "learning_rate": 3.515322225824379e-05, + "loss": 2.1085, + "step": 10262000 + }, + { + "epoch": 29.71, + "learning_rate": 3.515249861059652e-05, + "loss": 2.0997, + "step": 10262500 + }, + { + "epoch": 29.71, + "learning_rate": 3.5151774962949243e-05, + "loss": 2.0946, + "step": 10263000 + }, + { + "epoch": 29.71, + "learning_rate": 3.515105276259726e-05, + "loss": 2.1382, + "step": 10263500 + }, + { + "epoch": 29.71, + "learning_rate": 3.515032911494998e-05, + "loss": 2.1323, + "step": 10264000 + }, + { + "epoch": 29.71, + "learning_rate": 3.5149605467302703e-05, + "loss": 2.1098, + "step": 10264500 + }, + { + "epoch": 29.71, + "learning_rate": 3.5148881819655426e-05, + "loss": 2.0981, + "step": 10265000 + }, + { + "epoch": 29.71, + "learning_rate": 3.5148158172008155e-05, + "loss": 2.1097, + "step": 10265500 + }, + { + "epoch": 29.72, + "learning_rate": 3.514743452436088e-05, + "loss": 2.1366, + "step": 10266000 + }, + { + "epoch": 29.72, + "learning_rate": 3.51467108767136e-05, + "loss": 2.0885, + "step": 10266500 + }, + { + "epoch": 29.72, + "learning_rate": 3.514598722906632e-05, + "loss": 2.1093, + "step": 10267000 + }, + { + "epoch": 29.72, + "learning_rate": 3.5145265028714344e-05, + "loss": 2.1117, + "step": 10267500 + }, + { + "epoch": 29.72, + "learning_rate": 3.514454282836236e-05, + "loss": 2.1112, + "step": 10268000 + }, + { + "epoch": 29.72, + "learning_rate": 3.514381918071508e-05, + "loss": 2.1242, + "step": 10268500 + }, + { + "epoch": 29.72, + "learning_rate": 3.5143095533067804e-05, + "loss": 2.1179, + "step": 10269000 + }, + { + "epoch": 29.73, + "learning_rate": 3.5142371885420526e-05, + "loss": 2.0993, + "step": 10269500 + }, + { + "epoch": 29.73, + "learning_rate": 3.514164968506855e-05, + "loss": 2.0976, + "step": 10270000 + }, + { + "epoch": 29.73, + "learning_rate": 3.514092603742127e-05, + "loss": 2.1208, + "step": 10270500 + }, + { + "epoch": 29.73, + "learning_rate": 3.514020238977399e-05, + "loss": 2.118, + "step": 10271000 + }, + { + "epoch": 29.73, + "learning_rate": 3.5139478742126715e-05, + "loss": 2.116, + "step": 10271500 + }, + { + "epoch": 29.73, + "learning_rate": 3.513875509447944e-05, + "loss": 2.1124, + "step": 10272000 + }, + { + "epoch": 29.73, + "learning_rate": 3.513803144683216e-05, + "loss": 2.0891, + "step": 10272500 + }, + { + "epoch": 29.74, + "learning_rate": 3.513730779918489e-05, + "loss": 2.1068, + "step": 10273000 + }, + { + "epoch": 29.74, + "learning_rate": 3.513658415153761e-05, + "loss": 2.1024, + "step": 10273500 + }, + { + "epoch": 29.74, + "learning_rate": 3.513586050389033e-05, + "loss": 2.1281, + "step": 10274000 + }, + { + "epoch": 29.74, + "learning_rate": 3.513513830353835e-05, + "loss": 2.1109, + "step": 10274500 + }, + { + "epoch": 29.74, + "learning_rate": 3.513441465589107e-05, + "loss": 2.0959, + "step": 10275000 + }, + { + "epoch": 29.74, + "learning_rate": 3.51336910082438e-05, + "loss": 2.1353, + "step": 10275500 + }, + { + "epoch": 29.74, + "learning_rate": 3.5132968807891815e-05, + "loss": 2.141, + "step": 10276000 + }, + { + "epoch": 29.75, + "learning_rate": 3.513224516024454e-05, + "loss": 2.1089, + "step": 10276500 + }, + { + "epoch": 29.75, + "learning_rate": 3.513152151259726e-05, + "loss": 2.0886, + "step": 10277000 + }, + { + "epoch": 29.75, + "learning_rate": 3.513079786494998e-05, + "loss": 2.1127, + "step": 10277500 + }, + { + "epoch": 29.75, + "learning_rate": 3.5130074217302704e-05, + "loss": 2.1377, + "step": 10278000 + }, + { + "epoch": 29.75, + "learning_rate": 3.5129350569655427e-05, + "loss": 2.1022, + "step": 10278500 + }, + { + "epoch": 29.75, + "learning_rate": 3.512862692200815e-05, + "loss": 2.1141, + "step": 10279000 + }, + { + "epoch": 29.75, + "learning_rate": 3.512790327436087e-05, + "loss": 2.1152, + "step": 10279500 + }, + { + "epoch": 29.76, + "learning_rate": 3.51271796267136e-05, + "loss": 2.1037, + "step": 10280000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512645742636162e-05, + "loss": 2.0929, + "step": 10280500 + }, + { + "epoch": 29.76, + "learning_rate": 3.5125733778714345e-05, + "loss": 2.109, + "step": 10281000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512501013106707e-05, + "loss": 2.1228, + "step": 10281500 + }, + { + "epoch": 29.76, + "learning_rate": 3.512428648341979e-05, + "loss": 2.1201, + "step": 10282000 + }, + { + "epoch": 29.76, + "learning_rate": 3.512356283577251e-05, + "loss": 2.1225, + "step": 10282500 + }, + { + "epoch": 29.77, + "learning_rate": 3.512284208271582e-05, + "loss": 2.1054, + "step": 10283000 + }, + { + "epoch": 29.77, + "learning_rate": 3.512211843506855e-05, + "loss": 2.1151, + "step": 10283500 + }, + { + "epoch": 29.77, + "learning_rate": 3.512139478742127e-05, + "loss": 2.1429, + "step": 10284000 + }, + { + "epoch": 29.77, + "learning_rate": 3.512067258706929e-05, + "loss": 2.1193, + "step": 10284500 + }, + { + "epoch": 29.77, + "learning_rate": 3.511994893942201e-05, + "loss": 2.0916, + "step": 10285000 + }, + { + "epoch": 29.77, + "learning_rate": 3.511922529177473e-05, + "loss": 2.11, + "step": 10285500 + }, + { + "epoch": 29.77, + "learning_rate": 3.5118501644127454e-05, + "loss": 2.1206, + "step": 10286000 + }, + { + "epoch": 29.78, + "learning_rate": 3.5117777996480176e-05, + "loss": 2.1103, + "step": 10286500 + }, + { + "epoch": 29.78, + "learning_rate": 3.51170543488329e-05, + "loss": 2.1012, + "step": 10287000 + }, + { + "epoch": 29.78, + "learning_rate": 3.511633070118562e-05, + "loss": 2.0877, + "step": 10287500 + }, + { + "epoch": 29.78, + "learning_rate": 3.511560705353835e-05, + "loss": 2.0958, + "step": 10288000 + }, + { + "epoch": 29.78, + "learning_rate": 3.511488485318637e-05, + "loss": 2.0903, + "step": 10288500 + }, + { + "epoch": 29.78, + "learning_rate": 3.5114161205539094e-05, + "loss": 2.1312, + "step": 10289000 + }, + { + "epoch": 29.78, + "learning_rate": 3.5113437557891816e-05, + "loss": 2.1131, + "step": 10289500 + }, + { + "epoch": 29.79, + "learning_rate": 3.511271391024454e-05, + "loss": 2.127, + "step": 10290000 + }, + { + "epoch": 29.79, + "learning_rate": 3.511199026259726e-05, + "loss": 2.0806, + "step": 10290500 + }, + { + "epoch": 29.79, + "learning_rate": 3.511126661494998e-05, + "loss": 2.1315, + "step": 10291000 + }, + { + "epoch": 29.79, + "learning_rate": 3.5110542967302705e-05, + "loss": 2.1237, + "step": 10291500 + }, + { + "epoch": 29.79, + "learning_rate": 3.510981931965543e-05, + "loss": 2.1159, + "step": 10292000 + }, + { + "epoch": 29.79, + "learning_rate": 3.510909567200815e-05, + "loss": 2.0942, + "step": 10292500 + }, + { + "epoch": 29.79, + "learning_rate": 3.510837202436087e-05, + "loss": 2.119, + "step": 10293000 + }, + { + "epoch": 29.8, + "learning_rate": 3.51076483767136e-05, + "loss": 2.1045, + "step": 10293500 + }, + { + "epoch": 29.8, + "learning_rate": 3.5106926176361616e-05, + "loss": 2.1205, + "step": 10294000 + }, + { + "epoch": 29.8, + "learning_rate": 3.510620252871434e-05, + "loss": 2.0996, + "step": 10294500 + }, + { + "epoch": 29.8, + "learning_rate": 3.510547888106706e-05, + "loss": 2.121, + "step": 10295000 + }, + { + "epoch": 29.8, + "learning_rate": 3.510475523341979e-05, + "loss": 2.0856, + "step": 10295500 + }, + { + "epoch": 29.8, + "learning_rate": 3.510403158577251e-05, + "loss": 2.1039, + "step": 10296000 + }, + { + "epoch": 29.8, + "learning_rate": 3.510330938542053e-05, + "loss": 2.1253, + "step": 10296500 + }, + { + "epoch": 29.81, + "learning_rate": 3.510258573777325e-05, + "loss": 2.1182, + "step": 10297000 + }, + { + "epoch": 29.81, + "learning_rate": 3.510186209012597e-05, + "loss": 2.1173, + "step": 10297500 + }, + { + "epoch": 29.81, + "learning_rate": 3.51011384424787e-05, + "loss": 2.1053, + "step": 10298000 + }, + { + "epoch": 29.81, + "learning_rate": 3.510041479483142e-05, + "loss": 2.124, + "step": 10298500 + }, + { + "epoch": 29.81, + "learning_rate": 3.5099691147184146e-05, + "loss": 2.0907, + "step": 10299000 + }, + { + "epoch": 29.81, + "learning_rate": 3.509896749953687e-05, + "loss": 2.1057, + "step": 10299500 + }, + { + "epoch": 29.81, + "learning_rate": 3.509824385188959e-05, + "loss": 2.1031, + "step": 10300000 + }, + { + "epoch": 29.82, + "learning_rate": 3.509752020424231e-05, + "loss": 2.1287, + "step": 10300500 + }, + { + "epoch": 29.82, + "learning_rate": 3.5096796556595034e-05, + "loss": 2.093, + "step": 10301000 + }, + { + "epoch": 29.82, + "learning_rate": 3.509607435624305e-05, + "loss": 2.0915, + "step": 10301500 + }, + { + "epoch": 29.82, + "learning_rate": 3.509535070859577e-05, + "loss": 2.1291, + "step": 10302000 + }, + { + "epoch": 29.82, + "learning_rate": 3.50946270609485e-05, + "loss": 2.0945, + "step": 10302500 + }, + { + "epoch": 29.82, + "learning_rate": 3.5093903413301224e-05, + "loss": 2.1464, + "step": 10303000 + }, + { + "epoch": 29.82, + "learning_rate": 3.509317976565395e-05, + "loss": 2.0889, + "step": 10303500 + }, + { + "epoch": 29.83, + "learning_rate": 3.5092456118006675e-05, + "loss": 2.1135, + "step": 10304000 + }, + { + "epoch": 29.83, + "learning_rate": 3.509173391765469e-05, + "loss": 2.0937, + "step": 10304500 + }, + { + "epoch": 29.83, + "learning_rate": 3.509101027000741e-05, + "loss": 2.0971, + "step": 10305000 + }, + { + "epoch": 29.83, + "learning_rate": 3.5090286622360135e-05, + "loss": 2.0857, + "step": 10305500 + }, + { + "epoch": 29.83, + "learning_rate": 3.508956297471286e-05, + "loss": 2.1109, + "step": 10306000 + }, + { + "epoch": 29.83, + "learning_rate": 3.508883932706558e-05, + "loss": 2.0777, + "step": 10306500 + }, + { + "epoch": 29.83, + "learning_rate": 3.50881156794183e-05, + "loss": 2.1212, + "step": 10307000 + }, + { + "epoch": 29.84, + "learning_rate": 3.5087392031771024e-05, + "loss": 2.1044, + "step": 10307500 + }, + { + "epoch": 29.84, + "learning_rate": 3.5086669831419046e-05, + "loss": 2.1279, + "step": 10308000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508594618377177e-05, + "loss": 2.114, + "step": 10308500 + }, + { + "epoch": 29.84, + "learning_rate": 3.508522253612449e-05, + "loss": 2.1412, + "step": 10309000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508449888847721e-05, + "loss": 2.1124, + "step": 10309500 + }, + { + "epoch": 29.84, + "learning_rate": 3.508377668812523e-05, + "loss": 2.1197, + "step": 10310000 + }, + { + "epoch": 29.84, + "learning_rate": 3.508305304047796e-05, + "loss": 2.1028, + "step": 10310500 + }, + { + "epoch": 29.85, + "learning_rate": 3.508233084012598e-05, + "loss": 2.1179, + "step": 10311000 + }, + { + "epoch": 29.85, + "learning_rate": 3.50816071924787e-05, + "loss": 2.1142, + "step": 10311500 + }, + { + "epoch": 29.85, + "learning_rate": 3.5080883544831424e-05, + "loss": 2.1107, + "step": 10312000 + }, + { + "epoch": 29.85, + "learning_rate": 3.5080159897184146e-05, + "loss": 2.0965, + "step": 10312500 + }, + { + "epoch": 29.85, + "learning_rate": 3.507943624953687e-05, + "loss": 2.1237, + "step": 10313000 + }, + { + "epoch": 29.85, + "learning_rate": 3.507871260188959e-05, + "loss": 2.1081, + "step": 10313500 + }, + { + "epoch": 29.85, + "learning_rate": 3.507798895424231e-05, + "loss": 2.0986, + "step": 10314000 + }, + { + "epoch": 29.86, + "learning_rate": 3.5077265306595035e-05, + "loss": 2.1146, + "step": 10314500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507654310624305e-05, + "loss": 2.0997, + "step": 10315000 + }, + { + "epoch": 29.86, + "learning_rate": 3.507582090589107e-05, + "loss": 2.0968, + "step": 10315500 + }, + { + "epoch": 29.86, + "learning_rate": 3.5075097258243795e-05, + "loss": 2.1059, + "step": 10316000 + }, + { + "epoch": 29.86, + "learning_rate": 3.507437361059652e-05, + "loss": 2.1141, + "step": 10316500 + }, + { + "epoch": 29.86, + "learning_rate": 3.507364996294924e-05, + "loss": 2.13, + "step": 10317000 + }, + { + "epoch": 29.86, + "learning_rate": 3.507292631530196e-05, + "loss": 2.126, + "step": 10317500 + }, + { + "epoch": 29.87, + "learning_rate": 3.507220266765469e-05, + "loss": 2.1293, + "step": 10318000 + }, + { + "epoch": 29.87, + "learning_rate": 3.507147902000741e-05, + "loss": 2.1024, + "step": 10318500 + }, + { + "epoch": 29.87, + "learning_rate": 3.5070755372360136e-05, + "loss": 2.0927, + "step": 10319000 + }, + { + "epoch": 29.87, + "learning_rate": 3.507003317200815e-05, + "loss": 2.1222, + "step": 10319500 + }, + { + "epoch": 29.87, + "learning_rate": 3.506930952436088e-05, + "loss": 2.1126, + "step": 10320000 + }, + { + "epoch": 29.87, + "learning_rate": 3.50685858767136e-05, + "loss": 2.124, + "step": 10320500 + }, + { + "epoch": 29.88, + "learning_rate": 3.5067862229066325e-05, + "loss": 2.1235, + "step": 10321000 + }, + { + "epoch": 29.88, + "learning_rate": 3.506713858141905e-05, + "loss": 2.0973, + "step": 10321500 + }, + { + "epoch": 29.88, + "learning_rate": 3.506641493377177e-05, + "loss": 2.1164, + "step": 10322000 + }, + { + "epoch": 29.88, + "learning_rate": 3.5065692733419785e-05, + "loss": 2.1189, + "step": 10322500 + }, + { + "epoch": 29.88, + "learning_rate": 3.50649705330678e-05, + "loss": 2.095, + "step": 10323000 + }, + { + "epoch": 29.88, + "learning_rate": 3.506424688542053e-05, + "loss": 2.1269, + "step": 10323500 + }, + { + "epoch": 29.88, + "learning_rate": 3.506352323777325e-05, + "loss": 2.107, + "step": 10324000 + }, + { + "epoch": 29.89, + "learning_rate": 3.5062799590125974e-05, + "loss": 2.0958, + "step": 10324500 + }, + { + "epoch": 29.89, + "learning_rate": 3.5062075942478696e-05, + "loss": 2.1201, + "step": 10325000 + }, + { + "epoch": 29.89, + "learning_rate": 3.5061352294831425e-05, + "loss": 2.1084, + "step": 10325500 + }, + { + "epoch": 29.89, + "learning_rate": 3.506062864718415e-05, + "loss": 2.1035, + "step": 10326000 + }, + { + "epoch": 29.89, + "learning_rate": 3.505990499953687e-05, + "loss": 2.1009, + "step": 10326500 + }, + { + "epoch": 29.89, + "learning_rate": 3.505918135188959e-05, + "loss": 2.0851, + "step": 10327000 + }, + { + "epoch": 29.89, + "learning_rate": 3.5058457704242314e-05, + "loss": 2.0958, + "step": 10327500 + }, + { + "epoch": 29.9, + "learning_rate": 3.5057734056595036e-05, + "loss": 2.0774, + "step": 10328000 + }, + { + "epoch": 29.9, + "learning_rate": 3.505701040894776e-05, + "loss": 2.1328, + "step": 10328500 + }, + { + "epoch": 29.9, + "learning_rate": 3.505628820859578e-05, + "loss": 2.1031, + "step": 10329000 + }, + { + "epoch": 29.9, + "learning_rate": 3.5055566008243796e-05, + "loss": 2.1249, + "step": 10329500 + }, + { + "epoch": 29.9, + "learning_rate": 3.505484236059652e-05, + "loss": 2.0982, + "step": 10330000 + }, + { + "epoch": 29.9, + "learning_rate": 3.505411871294924e-05, + "loss": 2.0988, + "step": 10330500 + }, + { + "epoch": 29.9, + "learning_rate": 3.505339506530196e-05, + "loss": 2.1075, + "step": 10331000 + }, + { + "epoch": 29.91, + "learning_rate": 3.5052671417654685e-05, + "loss": 2.1253, + "step": 10331500 + }, + { + "epoch": 29.91, + "learning_rate": 3.505194777000741e-05, + "loss": 2.1128, + "step": 10332000 + }, + { + "epoch": 29.91, + "learning_rate": 3.505122412236013e-05, + "loss": 2.1027, + "step": 10332500 + }, + { + "epoch": 29.91, + "learning_rate": 3.505050047471286e-05, + "loss": 2.1191, + "step": 10333000 + }, + { + "epoch": 29.91, + "learning_rate": 3.504977682706558e-05, + "loss": 2.0969, + "step": 10333500 + }, + { + "epoch": 29.91, + "learning_rate": 3.50490531794183e-05, + "loss": 2.11, + "step": 10334000 + }, + { + "epoch": 29.91, + "learning_rate": 3.504832953177103e-05, + "loss": 2.1086, + "step": 10334500 + }, + { + "epoch": 29.92, + "learning_rate": 3.5047605884123754e-05, + "loss": 2.0911, + "step": 10335000 + }, + { + "epoch": 29.92, + "learning_rate": 3.5046882236476477e-05, + "loss": 2.1363, + "step": 10335500 + }, + { + "epoch": 29.92, + "learning_rate": 3.50461585888292e-05, + "loss": 2.1077, + "step": 10336000 + }, + { + "epoch": 29.92, + "learning_rate": 3.504543783577251e-05, + "loss": 2.1242, + "step": 10336500 + }, + { + "epoch": 29.92, + "learning_rate": 3.504471418812523e-05, + "loss": 2.1258, + "step": 10337000 + }, + { + "epoch": 29.92, + "learning_rate": 3.504399054047796e-05, + "loss": 2.1157, + "step": 10337500 + }, + { + "epoch": 29.92, + "learning_rate": 3.504326689283068e-05, + "loss": 2.1037, + "step": 10338000 + }, + { + "epoch": 29.93, + "learning_rate": 3.50425432451834e-05, + "loss": 2.1157, + "step": 10338500 + }, + { + "epoch": 29.93, + "learning_rate": 3.5041819597536126e-05, + "loss": 2.1323, + "step": 10339000 + }, + { + "epoch": 29.93, + "learning_rate": 3.504109594988885e-05, + "loss": 2.1242, + "step": 10339500 + }, + { + "epoch": 29.93, + "learning_rate": 3.504037230224158e-05, + "loss": 2.1248, + "step": 10340000 + }, + { + "epoch": 29.93, + "learning_rate": 3.50396486545943e-05, + "loss": 2.1205, + "step": 10340500 + }, + { + "epoch": 29.93, + "learning_rate": 3.503892500694702e-05, + "loss": 2.1261, + "step": 10341000 + }, + { + "epoch": 29.93, + "learning_rate": 3.503820425389033e-05, + "loss": 2.1112, + "step": 10341500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503748060624306e-05, + "loss": 2.1174, + "step": 10342000 + }, + { + "epoch": 29.94, + "learning_rate": 3.503675695859578e-05, + "loss": 2.1267, + "step": 10342500 + }, + { + "epoch": 29.94, + "learning_rate": 3.5036033310948504e-05, + "loss": 2.1154, + "step": 10343000 + }, + { + "epoch": 29.94, + "learning_rate": 3.5035309663301226e-05, + "loss": 2.1113, + "step": 10343500 + }, + { + "epoch": 29.94, + "learning_rate": 3.503458601565395e-05, + "loss": 2.0955, + "step": 10344000 + }, + { + "epoch": 29.94, + "learning_rate": 3.503386236800667e-05, + "loss": 2.1247, + "step": 10344500 + }, + { + "epoch": 29.94, + "learning_rate": 3.5033140167654686e-05, + "loss": 2.0924, + "step": 10345000 + }, + { + "epoch": 29.95, + "learning_rate": 3.503241796730271e-05, + "loss": 2.1218, + "step": 10345500 + }, + { + "epoch": 29.95, + "learning_rate": 3.503169431965543e-05, + "loss": 2.1285, + "step": 10346000 + }, + { + "epoch": 29.95, + "learning_rate": 3.503097067200815e-05, + "loss": 2.1402, + "step": 10346500 + }, + { + "epoch": 29.95, + "learning_rate": 3.5030247024360875e-05, + "loss": 2.1073, + "step": 10347000 + }, + { + "epoch": 29.95, + "learning_rate": 3.50295233767136e-05, + "loss": 2.1025, + "step": 10347500 + }, + { + "epoch": 29.95, + "learning_rate": 3.5028799729066326e-05, + "loss": 2.0933, + "step": 10348000 + }, + { + "epoch": 29.95, + "learning_rate": 3.502807752871434e-05, + "loss": 2.1174, + "step": 10348500 + }, + { + "epoch": 29.96, + "learning_rate": 3.5027353881067064e-05, + "loss": 2.1067, + "step": 10349000 + }, + { + "epoch": 29.96, + "learning_rate": 3.5026630233419786e-05, + "loss": 2.1269, + "step": 10349500 + }, + { + "epoch": 29.96, + "learning_rate": 3.502590658577251e-05, + "loss": 2.1089, + "step": 10350000 + }, + { + "epoch": 29.96, + "learning_rate": 3.502518293812523e-05, + "loss": 2.1373, + "step": 10350500 + }, + { + "epoch": 29.96, + "learning_rate": 3.502445929047796e-05, + "loss": 2.1092, + "step": 10351000 + }, + { + "epoch": 29.96, + "learning_rate": 3.502373564283068e-05, + "loss": 2.1041, + "step": 10351500 + }, + { + "epoch": 29.96, + "learning_rate": 3.5023011995183404e-05, + "loss": 2.1282, + "step": 10352000 + }, + { + "epoch": 29.97, + "learning_rate": 3.5022288347536126e-05, + "loss": 2.1508, + "step": 10352500 + }, + { + "epoch": 29.97, + "learning_rate": 3.502156469988885e-05, + "loss": 2.1082, + "step": 10353000 + }, + { + "epoch": 29.97, + "learning_rate": 3.502084105224157e-05, + "loss": 2.1203, + "step": 10353500 + }, + { + "epoch": 29.97, + "learning_rate": 3.502011740459429e-05, + "loss": 2.125, + "step": 10354000 + }, + { + "epoch": 29.97, + "learning_rate": 3.5019393756947015e-05, + "loss": 2.1222, + "step": 10354500 + }, + { + "epoch": 29.97, + "learning_rate": 3.5018670109299744e-05, + "loss": 2.1277, + "step": 10355000 + }, + { + "epoch": 29.97, + "learning_rate": 3.5017946461652467e-05, + "loss": 2.1167, + "step": 10355500 + }, + { + "epoch": 29.98, + "learning_rate": 3.501722281400519e-05, + "loss": 2.1318, + "step": 10356000 + }, + { + "epoch": 29.98, + "learning_rate": 3.50165035082438e-05, + "loss": 2.1085, + "step": 10356500 + }, + { + "epoch": 29.98, + "learning_rate": 3.501577986059652e-05, + "loss": 2.1203, + "step": 10357000 + }, + { + "epoch": 29.98, + "learning_rate": 3.501505621294924e-05, + "loss": 2.1128, + "step": 10357500 + }, + { + "epoch": 29.98, + "learning_rate": 3.5014332565301964e-05, + "loss": 2.1036, + "step": 10358000 + }, + { + "epoch": 29.98, + "learning_rate": 3.501360891765469e-05, + "loss": 2.1303, + "step": 10358500 + }, + { + "epoch": 29.99, + "learning_rate": 3.501288527000741e-05, + "loss": 2.1141, + "step": 10359000 + }, + { + "epoch": 29.99, + "learning_rate": 3.501216162236013e-05, + "loss": 2.1066, + "step": 10359500 + }, + { + "epoch": 29.99, + "learning_rate": 3.501143797471286e-05, + "loss": 2.1166, + "step": 10360000 + }, + { + "epoch": 29.99, + "learning_rate": 3.501071432706558e-05, + "loss": 2.0972, + "step": 10360500 + }, + { + "epoch": 29.99, + "learning_rate": 3.50099921267136e-05, + "loss": 2.1028, + "step": 10361000 + }, + { + "epoch": 29.99, + "learning_rate": 3.500926847906632e-05, + "loss": 2.1232, + "step": 10361500 + }, + { + "epoch": 29.99, + "learning_rate": 3.500854483141904e-05, + "loss": 2.1076, + "step": 10362000 + }, + { + "epoch": 30.0, + "learning_rate": 3.5007821183771765e-05, + "loss": 2.1259, + "step": 10362500 + }, + { + "epoch": 30.0, + "learning_rate": 3.5007097536124494e-05, + "loss": 2.1214, + "step": 10363000 + }, + { + "epoch": 30.0, + "learning_rate": 3.5006373888477216e-05, + "loss": 2.1293, + "step": 10363500 + }, + { + "epoch": 30.0, + "learning_rate": 3.500565024082994e-05, + "loss": 2.0928, + "step": 10364000 + }, + { + "epoch": 30.0, + "eval_accuracy": 0.6665779107038252, + "eval_accuracy_mlm": 0.6310435936946435, + "eval_accuracy_nsp": 0.8573464776596866, + "eval_loss": 2.1807310581207275, + "eval_runtime": 331.5713, + "eval_samples_per_second": 1316.115, + "eval_steps_per_second": 54.839, + "step": 10364160 } ], "max_steps": 34547200, "num_train_epochs": 100, - "total_flos": 9.466493397162928e+18, + "total_flos": 1.4199760659998734e+19, "trial_name": null, "trial_params": null }