{ "best_metric": 52.1498, "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-14400", "epoch": 516.5562913907285, "eval_steps": 3600, "global_step": 46800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011037527593818985, "learning_rate": 5e-09, "loss": 2.1638, "step": 1 }, { "epoch": 0.22075055187637968, "learning_rate": 1e-07, "loss": 1.9727, "step": 20 }, { "epoch": 0.44150110375275936, "learning_rate": 2e-07, "loss": 2.0028, "step": 40 }, { "epoch": 0.6622516556291391, "learning_rate": 3e-07, "loss": 2.0448, "step": 60 }, { "epoch": 0.8830022075055187, "learning_rate": 4e-07, "loss": 1.9696, "step": 80 }, { "epoch": 1.1037527593818985, "learning_rate": 5e-07, "loss": 1.9375, "step": 100 }, { "epoch": 1.3245033112582782, "learning_rate": 6e-07, "loss": 1.9075, "step": 120 }, { "epoch": 1.5452538631346577, "learning_rate": 7e-07, "loss": 1.9866, "step": 140 }, { "epoch": 1.7660044150110377, "learning_rate": 8e-07, "loss": 1.882, "step": 160 }, { "epoch": 1.9867549668874172, "learning_rate": 9e-07, "loss": 1.9916, "step": 180 }, { "epoch": 2.207505518763797, "learning_rate": 1e-06, "loss": 1.9132, "step": 200 }, { "epoch": 2.4282560706401766, "learning_rate": 9.996864111498258e-07, "loss": 1.9858, "step": 220 }, { "epoch": 2.6490066225165565, "learning_rate": 9.993728222996515e-07, "loss": 1.9373, "step": 240 }, { "epoch": 2.869757174392936, "learning_rate": 9.990592334494773e-07, "loss": 1.9146, "step": 260 }, { "epoch": 3.0905077262693155, "learning_rate": 9.987456445993032e-07, "loss": 1.8665, "step": 280 }, { "epoch": 3.3112582781456954, "learning_rate": 9.98432055749129e-07, "loss": 1.9419, "step": 300 }, { "epoch": 3.5320088300220753, "learning_rate": 9.981184668989545e-07, "loss": 1.8332, "step": 320 }, { "epoch": 3.752759381898455, "learning_rate": 9.978048780487803e-07, "loss": 1.8534, "step": 340 }, { "epoch": 3.9735099337748343, "learning_rate": 9.974912891986062e-07, "loss": 1.9421, "step": 360 }, { "epoch": 4.194260485651214, "learning_rate": 9.97177700348432e-07, "loss": 1.9374, "step": 380 }, { "epoch": 4.415011037527594, "learning_rate": 9.96864111498258e-07, "loss": 1.8876, "step": 400 }, { "epoch": 4.635761589403973, "learning_rate": 9.965505226480835e-07, "loss": 1.8593, "step": 420 }, { "epoch": 4.856512141280353, "learning_rate": 9.962369337979094e-07, "loss": 1.8243, "step": 440 }, { "epoch": 5.077262693156733, "learning_rate": 9.95923344947735e-07, "loss": 1.8791, "step": 460 }, { "epoch": 5.298013245033113, "learning_rate": 9.95609756097561e-07, "loss": 1.8878, "step": 480 }, { "epoch": 5.518763796909492, "learning_rate": 9.952961672473868e-07, "loss": 1.8985, "step": 500 }, { "epoch": 5.739514348785872, "learning_rate": 9.949825783972126e-07, "loss": 1.84, "step": 520 }, { "epoch": 5.960264900662252, "learning_rate": 9.946689895470383e-07, "loss": 1.8468, "step": 540 }, { "epoch": 6.181015452538631, "learning_rate": 9.94355400696864e-07, "loss": 1.8469, "step": 560 }, { "epoch": 6.401766004415011, "learning_rate": 9.940418118466898e-07, "loss": 1.8528, "step": 580 }, { "epoch": 6.622516556291391, "learning_rate": 9.937282229965156e-07, "loss": 1.8269, "step": 600 }, { "epoch": 6.843267108167771, "learning_rate": 9.934146341463415e-07, "loss": 1.8324, "step": 620 }, { "epoch": 7.06401766004415, "learning_rate": 9.931010452961673e-07, "loss": 1.808, "step": 640 }, { "epoch": 7.28476821192053, "learning_rate": 9.92787456445993e-07, "loss": 1.9003, "step": 660 }, { "epoch": 7.50551876379691, "learning_rate": 9.924738675958186e-07, "loss": 1.908, "step": 680 }, { "epoch": 7.72626931567329, "learning_rate": 9.921602787456445e-07, "loss": 1.8165, "step": 700 }, { "epoch": 7.947019867549669, "learning_rate": 9.918466898954704e-07, "loss": 1.7924, "step": 720 }, { "epoch": 8.167770419426049, "learning_rate": 9.915331010452962e-07, "loss": 1.8275, "step": 740 }, { "epoch": 8.388520971302428, "learning_rate": 9.912195121951219e-07, "loss": 1.8033, "step": 760 }, { "epoch": 8.609271523178808, "learning_rate": 9.909059233449477e-07, "loss": 1.8283, "step": 780 }, { "epoch": 8.830022075055188, "learning_rate": 9.905923344947734e-07, "loss": 1.8361, "step": 800 }, { "epoch": 9.050772626931566, "learning_rate": 9.902787456445992e-07, "loss": 1.8264, "step": 820 }, { "epoch": 9.271523178807946, "learning_rate": 9.89965156794425e-07, "loss": 1.8157, "step": 840 }, { "epoch": 9.492273730684326, "learning_rate": 9.89651567944251e-07, "loss": 1.8038, "step": 860 }, { "epoch": 9.713024282560706, "learning_rate": 9.893379790940768e-07, "loss": 1.8495, "step": 880 }, { "epoch": 9.933774834437086, "learning_rate": 9.890243902439024e-07, "loss": 1.8379, "step": 900 }, { "epoch": 10.154525386313466, "learning_rate": 9.88710801393728e-07, "loss": 1.807, "step": 920 }, { "epoch": 10.375275938189846, "learning_rate": 9.88397212543554e-07, "loss": 1.7354, "step": 940 }, { "epoch": 10.596026490066226, "learning_rate": 9.880836236933798e-07, "loss": 1.7909, "step": 960 }, { "epoch": 10.816777041942604, "learning_rate": 9.877700348432054e-07, "loss": 1.8504, "step": 980 }, { "epoch": 11.037527593818984, "learning_rate": 9.874564459930313e-07, "loss": 1.7565, "step": 1000 }, { "epoch": 11.258278145695364, "learning_rate": 9.871428571428572e-07, "loss": 1.8464, "step": 1020 }, { "epoch": 11.479028697571744, "learning_rate": 9.868292682926828e-07, "loss": 1.7463, "step": 1040 }, { "epoch": 11.699779249448124, "learning_rate": 9.865156794425087e-07, "loss": 1.8286, "step": 1060 }, { "epoch": 11.920529801324504, "learning_rate": 9.862020905923345e-07, "loss": 1.7821, "step": 1080 }, { "epoch": 12.141280353200884, "learning_rate": 9.858885017421604e-07, "loss": 1.8069, "step": 1100 }, { "epoch": 12.362030905077262, "learning_rate": 9.85574912891986e-07, "loss": 1.7943, "step": 1120 }, { "epoch": 12.582781456953642, "learning_rate": 9.852613240418117e-07, "loss": 1.7535, "step": 1140 }, { "epoch": 12.803532008830022, "learning_rate": 9.849477351916375e-07, "loss": 1.7881, "step": 1160 }, { "epoch": 13.024282560706402, "learning_rate": 9.846341463414634e-07, "loss": 1.815, "step": 1180 }, { "epoch": 13.245033112582782, "learning_rate": 9.843205574912892e-07, "loss": 1.8258, "step": 1200 }, { "epoch": 13.465783664459162, "learning_rate": 9.840069686411149e-07, "loss": 1.7228, "step": 1220 }, { "epoch": 13.686534216335541, "learning_rate": 9.836933797909407e-07, "loss": 1.7866, "step": 1240 }, { "epoch": 13.90728476821192, "learning_rate": 9.833797909407664e-07, "loss": 1.7871, "step": 1260 }, { "epoch": 14.1280353200883, "learning_rate": 9.830662020905923e-07, "loss": 1.7238, "step": 1280 }, { "epoch": 14.34878587196468, "learning_rate": 9.827526132404181e-07, "loss": 1.805, "step": 1300 }, { "epoch": 14.56953642384106, "learning_rate": 9.82439024390244e-07, "loss": 1.7909, "step": 1320 }, { "epoch": 14.79028697571744, "learning_rate": 9.821254355400698e-07, "loss": 1.7861, "step": 1340 }, { "epoch": 15.01103752759382, "learning_rate": 9.818118466898953e-07, "loss": 1.7651, "step": 1360 }, { "epoch": 15.2317880794702, "learning_rate": 9.814982578397211e-07, "loss": 1.7666, "step": 1380 }, { "epoch": 15.45253863134658, "learning_rate": 9.81184668989547e-07, "loss": 1.7159, "step": 1400 }, { "epoch": 15.673289183222957, "learning_rate": 9.808710801393728e-07, "loss": 1.7701, "step": 1420 }, { "epoch": 15.894039735099337, "learning_rate": 9.805574912891987e-07, "loss": 1.7977, "step": 1440 }, { "epoch": 16.11479028697572, "learning_rate": 9.802439024390243e-07, "loss": 1.7805, "step": 1460 }, { "epoch": 16.335540838852097, "learning_rate": 9.799303135888502e-07, "loss": 1.7711, "step": 1480 }, { "epoch": 16.556291390728475, "learning_rate": 9.796167247386758e-07, "loss": 1.6767, "step": 1500 }, { "epoch": 16.777041942604857, "learning_rate": 9.793031358885017e-07, "loss": 1.722, "step": 1520 }, { "epoch": 16.997792494481235, "learning_rate": 9.789895470383276e-07, "loss": 1.783, "step": 1540 }, { "epoch": 17.218543046357617, "learning_rate": 9.786759581881534e-07, "loss": 1.7233, "step": 1560 }, { "epoch": 17.439293598233995, "learning_rate": 9.78362369337979e-07, "loss": 1.7209, "step": 1580 }, { "epoch": 17.660044150110377, "learning_rate": 9.780487804878047e-07, "loss": 1.6968, "step": 1600 }, { "epoch": 17.880794701986755, "learning_rate": 9.777351916376306e-07, "loss": 1.6818, "step": 1620 }, { "epoch": 18.101545253863133, "learning_rate": 9.774216027874564e-07, "loss": 1.7819, "step": 1640 }, { "epoch": 18.322295805739515, "learning_rate": 9.771080139372823e-07, "loss": 1.7445, "step": 1660 }, { "epoch": 18.543046357615893, "learning_rate": 9.76794425087108e-07, "loss": 1.6888, "step": 1680 }, { "epoch": 18.763796909492275, "learning_rate": 9.764808362369338e-07, "loss": 1.7562, "step": 1700 }, { "epoch": 18.984547461368653, "learning_rate": 9.761672473867594e-07, "loss": 1.7368, "step": 1720 }, { "epoch": 19.205298013245034, "learning_rate": 9.758536585365853e-07, "loss": 1.6816, "step": 1740 }, { "epoch": 19.426048565121413, "learning_rate": 9.755400696864111e-07, "loss": 1.7307, "step": 1760 }, { "epoch": 19.64679911699779, "learning_rate": 9.75226480836237e-07, "loss": 1.7313, "step": 1780 }, { "epoch": 19.867549668874172, "learning_rate": 9.749128919860627e-07, "loss": 1.7407, "step": 1800 }, { "epoch": 20.08830022075055, "learning_rate": 9.745993031358883e-07, "loss": 1.6885, "step": 1820 }, { "epoch": 20.309050772626932, "learning_rate": 9.742857142857142e-07, "loss": 1.6741, "step": 1840 }, { "epoch": 20.52980132450331, "learning_rate": 9.7397212543554e-07, "loss": 1.6958, "step": 1860 }, { "epoch": 20.750551876379692, "learning_rate": 9.736585365853659e-07, "loss": 1.7036, "step": 1880 }, { "epoch": 20.97130242825607, "learning_rate": 9.733449477351917e-07, "loss": 1.7489, "step": 1900 }, { "epoch": 21.192052980132452, "learning_rate": 9.730313588850174e-07, "loss": 1.7719, "step": 1920 }, { "epoch": 21.41280353200883, "learning_rate": 9.72717770034843e-07, "loss": 1.7401, "step": 1940 }, { "epoch": 21.63355408388521, "learning_rate": 9.724041811846689e-07, "loss": 1.7247, "step": 1960 }, { "epoch": 21.85430463576159, "learning_rate": 9.720905923344947e-07, "loss": 1.7175, "step": 1980 }, { "epoch": 22.075055187637968, "learning_rate": 9.717770034843206e-07, "loss": 1.7288, "step": 2000 }, { "epoch": 22.29580573951435, "learning_rate": 9.714634146341462e-07, "loss": 1.7068, "step": 2020 }, { "epoch": 22.516556291390728, "learning_rate": 9.71149825783972e-07, "loss": 1.6812, "step": 2040 }, { "epoch": 22.73730684326711, "learning_rate": 9.708362369337977e-07, "loss": 1.6772, "step": 2060 }, { "epoch": 22.958057395143488, "learning_rate": 9.705226480836236e-07, "loss": 1.7103, "step": 2080 }, { "epoch": 23.178807947019866, "learning_rate": 9.702090592334495e-07, "loss": 1.6984, "step": 2100 }, { "epoch": 23.399558498896248, "learning_rate": 9.698954703832753e-07, "loss": 1.7281, "step": 2120 }, { "epoch": 23.620309050772626, "learning_rate": 9.695818815331012e-07, "loss": 1.7123, "step": 2140 }, { "epoch": 23.841059602649008, "learning_rate": 9.692682926829266e-07, "loss": 1.7174, "step": 2160 }, { "epoch": 24.061810154525386, "learning_rate": 9.689547038327525e-07, "loss": 1.7281, "step": 2180 }, { "epoch": 24.282560706401767, "learning_rate": 9.686411149825783e-07, "loss": 1.7189, "step": 2200 }, { "epoch": 24.503311258278146, "learning_rate": 9.683275261324042e-07, "loss": 1.6272, "step": 2220 }, { "epoch": 24.724061810154524, "learning_rate": 9.6801393728223e-07, "loss": 1.6588, "step": 2240 }, { "epoch": 24.944812362030905, "learning_rate": 9.677003484320557e-07, "loss": 1.6627, "step": 2260 }, { "epoch": 25.165562913907284, "learning_rate": 9.673867595818815e-07, "loss": 1.6504, "step": 2280 }, { "epoch": 25.386313465783665, "learning_rate": 9.670731707317072e-07, "loss": 1.6847, "step": 2300 }, { "epoch": 25.607064017660043, "learning_rate": 9.66759581881533e-07, "loss": 1.6793, "step": 2320 }, { "epoch": 25.827814569536425, "learning_rate": 9.66445993031359e-07, "loss": 1.7009, "step": 2340 }, { "epoch": 26.048565121412803, "learning_rate": 9.661324041811848e-07, "loss": 1.6581, "step": 2360 }, { "epoch": 26.26931567328918, "learning_rate": 9.658188153310104e-07, "loss": 1.6648, "step": 2380 }, { "epoch": 26.490066225165563, "learning_rate": 9.65505226480836e-07, "loss": 1.6303, "step": 2400 }, { "epoch": 26.71081677704194, "learning_rate": 9.65191637630662e-07, "loss": 1.7222, "step": 2420 }, { "epoch": 26.931567328918323, "learning_rate": 9.648780487804878e-07, "loss": 1.7164, "step": 2440 }, { "epoch": 27.1523178807947, "learning_rate": 9.645644599303136e-07, "loss": 1.6894, "step": 2460 }, { "epoch": 27.373068432671083, "learning_rate": 9.642508710801393e-07, "loss": 1.6904, "step": 2480 }, { "epoch": 27.59381898454746, "learning_rate": 9.639372822299651e-07, "loss": 1.6133, "step": 2500 }, { "epoch": 27.814569536423843, "learning_rate": 9.636236933797908e-07, "loss": 1.6445, "step": 2520 }, { "epoch": 28.03532008830022, "learning_rate": 9.633101045296166e-07, "loss": 1.6421, "step": 2540 }, { "epoch": 28.2560706401766, "learning_rate": 9.629965156794425e-07, "loss": 1.672, "step": 2560 }, { "epoch": 28.47682119205298, "learning_rate": 9.626829268292684e-07, "loss": 1.6592, "step": 2580 }, { "epoch": 28.69757174392936, "learning_rate": 9.62369337979094e-07, "loss": 1.649, "step": 2600 }, { "epoch": 28.91832229580574, "learning_rate": 9.620557491289199e-07, "loss": 1.6667, "step": 2620 }, { "epoch": 29.13907284768212, "learning_rate": 9.617421602787455e-07, "loss": 1.6172, "step": 2640 }, { "epoch": 29.3598233995585, "learning_rate": 9.614285714285714e-07, "loss": 1.6696, "step": 2660 }, { "epoch": 29.58057395143488, "learning_rate": 9.611149825783972e-07, "loss": 1.7091, "step": 2680 }, { "epoch": 29.801324503311257, "learning_rate": 9.60801393728223e-07, "loss": 1.6111, "step": 2700 }, { "epoch": 30.02207505518764, "learning_rate": 9.604878048780487e-07, "loss": 1.6274, "step": 2720 }, { "epoch": 30.242825607064017, "learning_rate": 9.601742160278746e-07, "loss": 1.6186, "step": 2740 }, { "epoch": 30.4635761589404, "learning_rate": 9.598606271777002e-07, "loss": 1.6336, "step": 2760 }, { "epoch": 30.684326710816777, "learning_rate": 9.59547038327526e-07, "loss": 1.6414, "step": 2780 }, { "epoch": 30.90507726269316, "learning_rate": 9.59233449477352e-07, "loss": 1.6628, "step": 2800 }, { "epoch": 31.125827814569536, "learning_rate": 9.589198606271776e-07, "loss": 1.6092, "step": 2820 }, { "epoch": 31.346578366445915, "learning_rate": 9.586062717770034e-07, "loss": 1.669, "step": 2840 }, { "epoch": 31.567328918322296, "learning_rate": 9.58292682926829e-07, "loss": 1.6597, "step": 2860 }, { "epoch": 31.788079470198674, "learning_rate": 9.57979094076655e-07, "loss": 1.5658, "step": 2880 }, { "epoch": 32.00883002207505, "learning_rate": 9.576655052264808e-07, "loss": 1.6662, "step": 2900 }, { "epoch": 32.22958057395144, "learning_rate": 9.573519163763067e-07, "loss": 1.6241, "step": 2920 }, { "epoch": 32.450331125827816, "learning_rate": 9.570383275261325e-07, "loss": 1.6181, "step": 2940 }, { "epoch": 32.671081677704194, "learning_rate": 9.567247386759582e-07, "loss": 1.6561, "step": 2960 }, { "epoch": 32.89183222958057, "learning_rate": 9.564111498257838e-07, "loss": 1.6176, "step": 2980 }, { "epoch": 33.11258278145695, "learning_rate": 9.560975609756097e-07, "loss": 1.686, "step": 3000 }, { "epoch": 33.333333333333336, "learning_rate": 9.557839721254355e-07, "loss": 1.5938, "step": 3020 }, { "epoch": 33.554083885209714, "learning_rate": 9.554703832752614e-07, "loss": 1.5952, "step": 3040 }, { "epoch": 33.77483443708609, "learning_rate": 9.55156794425087e-07, "loss": 1.5772, "step": 3060 }, { "epoch": 33.99558498896247, "learning_rate": 9.548432055749129e-07, "loss": 1.5889, "step": 3080 }, { "epoch": 34.216335540838855, "learning_rate": 9.545296167247385e-07, "loss": 1.5953, "step": 3100 }, { "epoch": 34.437086092715234, "learning_rate": 9.542160278745644e-07, "loss": 1.6379, "step": 3120 }, { "epoch": 34.65783664459161, "learning_rate": 9.539024390243903e-07, "loss": 1.6252, "step": 3140 }, { "epoch": 34.87858719646799, "learning_rate": 9.53588850174216e-07, "loss": 1.628, "step": 3160 }, { "epoch": 35.09933774834437, "learning_rate": 9.532752613240419e-07, "loss": 1.6547, "step": 3180 }, { "epoch": 35.32008830022075, "learning_rate": 9.529616724738675e-07, "loss": 1.6308, "step": 3200 }, { "epoch": 35.54083885209713, "learning_rate": 9.526480836236935e-07, "loss": 1.599, "step": 3220 }, { "epoch": 35.76158940397351, "learning_rate": 9.523344947735191e-07, "loss": 1.6198, "step": 3240 }, { "epoch": 35.98233995584989, "learning_rate": 9.520209059233449e-07, "loss": 1.6248, "step": 3260 }, { "epoch": 36.203090507726266, "learning_rate": 9.517073170731706e-07, "loss": 1.576, "step": 3280 }, { "epoch": 36.42384105960265, "learning_rate": 9.513937282229965e-07, "loss": 1.6648, "step": 3300 }, { "epoch": 36.64459161147903, "learning_rate": 9.510801393728223e-07, "loss": 1.6035, "step": 3320 }, { "epoch": 36.86534216335541, "learning_rate": 9.50766550522648e-07, "loss": 1.6051, "step": 3340 }, { "epoch": 37.086092715231786, "learning_rate": 9.504529616724738e-07, "loss": 1.6052, "step": 3360 }, { "epoch": 37.30684326710817, "learning_rate": 9.501393728222996e-07, "loss": 1.6081, "step": 3380 }, { "epoch": 37.52759381898455, "learning_rate": 9.498257839721255e-07, "loss": 1.6066, "step": 3400 }, { "epoch": 37.74834437086093, "learning_rate": 9.495121951219511e-07, "loss": 1.5763, "step": 3420 }, { "epoch": 37.969094922737305, "learning_rate": 9.49198606271777e-07, "loss": 1.6039, "step": 3440 }, { "epoch": 38.18984547461368, "learning_rate": 9.488850174216028e-07, "loss": 1.6203, "step": 3460 }, { "epoch": 38.41059602649007, "learning_rate": 9.485714285714285e-07, "loss": 1.619, "step": 3480 }, { "epoch": 38.63134657836645, "learning_rate": 9.482578397212543e-07, "loss": 1.6055, "step": 3500 }, { "epoch": 38.852097130242825, "learning_rate": 9.479442508710801e-07, "loss": 1.6037, "step": 3520 }, { "epoch": 39.0728476821192, "learning_rate": 9.476306620209059e-07, "loss": 1.6259, "step": 3540 }, { "epoch": 39.29359823399559, "learning_rate": 9.473170731707316e-07, "loss": 1.5917, "step": 3560 }, { "epoch": 39.51434878587197, "learning_rate": 9.470034843205574e-07, "loss": 1.6039, "step": 3580 }, { "epoch": 39.735099337748345, "learning_rate": 9.466898954703833e-07, "loss": 1.5364, "step": 3600 }, { "epoch": 39.735099337748345, "eval_bleu": 41.1359, "eval_gen_len": 9.5667, "eval_loss": 2.0473527908325195, "eval_runtime": 3.9497, "eval_samples_per_second": 7.596, "eval_steps_per_second": 1.519, "step": 3600 }, { "epoch": 39.95584988962472, "learning_rate": 9.46376306620209e-07, "loss": 1.6224, "step": 3620 }, { "epoch": 40.1766004415011, "learning_rate": 9.460627177700348e-07, "loss": 1.5808, "step": 3640 }, { "epoch": 40.397350993377486, "learning_rate": 9.457491289198605e-07, "loss": 1.6187, "step": 3660 }, { "epoch": 40.618101545253865, "learning_rate": 9.454355400696864e-07, "loss": 1.6131, "step": 3680 }, { "epoch": 40.83885209713024, "learning_rate": 9.451219512195122e-07, "loss": 1.603, "step": 3700 }, { "epoch": 41.05960264900662, "learning_rate": 9.448083623693379e-07, "loss": 1.6253, "step": 3720 }, { "epoch": 41.280353200883, "learning_rate": 9.444947735191638e-07, "loss": 1.6125, "step": 3740 }, { "epoch": 41.501103752759384, "learning_rate": 9.441811846689895e-07, "loss": 1.5777, "step": 3760 }, { "epoch": 41.72185430463576, "learning_rate": 9.438675958188153e-07, "loss": 1.5642, "step": 3780 }, { "epoch": 41.94260485651214, "learning_rate": 9.43554006968641e-07, "loss": 1.5773, "step": 3800 }, { "epoch": 42.16335540838852, "learning_rate": 9.432404181184669e-07, "loss": 1.5218, "step": 3820 }, { "epoch": 42.384105960264904, "learning_rate": 9.429268292682926e-07, "loss": 1.5751, "step": 3840 }, { "epoch": 42.60485651214128, "learning_rate": 9.426132404181184e-07, "loss": 1.5686, "step": 3860 }, { "epoch": 42.82560706401766, "learning_rate": 9.422996515679442e-07, "loss": 1.5917, "step": 3880 }, { "epoch": 43.04635761589404, "learning_rate": 9.4198606271777e-07, "loss": 1.5427, "step": 3900 }, { "epoch": 43.26710816777042, "learning_rate": 9.416724738675958e-07, "loss": 1.6199, "step": 3920 }, { "epoch": 43.4878587196468, "learning_rate": 9.413588850174215e-07, "loss": 1.5946, "step": 3940 }, { "epoch": 43.70860927152318, "learning_rate": 9.410452961672474e-07, "loss": 1.6212, "step": 3960 }, { "epoch": 43.92935982339956, "learning_rate": 9.407317073170731e-07, "loss": 1.5569, "step": 3980 }, { "epoch": 44.150110375275936, "learning_rate": 9.404181184668989e-07, "loss": 1.521, "step": 4000 }, { "epoch": 44.370860927152314, "learning_rate": 9.401045296167247e-07, "loss": 1.56, "step": 4020 }, { "epoch": 44.5916114790287, "learning_rate": 9.397909407665504e-07, "loss": 1.6059, "step": 4040 }, { "epoch": 44.81236203090508, "learning_rate": 9.394773519163763e-07, "loss": 1.5684, "step": 4060 }, { "epoch": 45.033112582781456, "learning_rate": 9.39163763066202e-07, "loss": 1.539, "step": 4080 }, { "epoch": 45.253863134657834, "learning_rate": 9.388501742160278e-07, "loss": 1.5336, "step": 4100 }, { "epoch": 45.47461368653422, "learning_rate": 9.385365853658536e-07, "loss": 1.5521, "step": 4120 }, { "epoch": 45.6953642384106, "learning_rate": 9.382229965156794e-07, "loss": 1.5281, "step": 4140 }, { "epoch": 45.916114790286976, "learning_rate": 9.379094076655052e-07, "loss": 1.6375, "step": 4160 }, { "epoch": 46.136865342163354, "learning_rate": 9.375958188153309e-07, "loss": 1.5615, "step": 4180 }, { "epoch": 46.35761589403973, "learning_rate": 9.372822299651568e-07, "loss": 1.5181, "step": 4200 }, { "epoch": 46.57836644591612, "learning_rate": 9.369686411149824e-07, "loss": 1.6009, "step": 4220 }, { "epoch": 46.799116997792495, "learning_rate": 9.366550522648083e-07, "loss": 1.5895, "step": 4240 }, { "epoch": 47.019867549668874, "learning_rate": 9.363414634146342e-07, "loss": 1.5159, "step": 4260 }, { "epoch": 47.24061810154525, "learning_rate": 9.360278745644599e-07, "loss": 1.5179, "step": 4280 }, { "epoch": 47.46136865342164, "learning_rate": 9.357142857142857e-07, "loss": 1.5585, "step": 4300 }, { "epoch": 47.682119205298015, "learning_rate": 9.354006968641114e-07, "loss": 1.6147, "step": 4320 }, { "epoch": 47.90286975717439, "learning_rate": 9.350871080139373e-07, "loss": 1.5307, "step": 4340 }, { "epoch": 48.12362030905077, "learning_rate": 9.34773519163763e-07, "loss": 1.5673, "step": 4360 }, { "epoch": 48.34437086092715, "learning_rate": 9.344599303135888e-07, "loss": 1.5442, "step": 4380 }, { "epoch": 48.565121412803535, "learning_rate": 9.341463414634146e-07, "loss": 1.5177, "step": 4400 }, { "epoch": 48.78587196467991, "learning_rate": 9.338327526132404e-07, "loss": 1.5909, "step": 4420 }, { "epoch": 49.00662251655629, "learning_rate": 9.335191637630661e-07, "loss": 1.5565, "step": 4440 }, { "epoch": 49.22737306843267, "learning_rate": 9.332055749128919e-07, "loss": 1.4945, "step": 4460 }, { "epoch": 49.44812362030905, "learning_rate": 9.328919860627177e-07, "loss": 1.5253, "step": 4480 }, { "epoch": 49.66887417218543, "learning_rate": 9.325783972125436e-07, "loss": 1.563, "step": 4500 }, { "epoch": 49.88962472406181, "learning_rate": 9.322648083623693e-07, "loss": 1.6016, "step": 4520 }, { "epoch": 50.11037527593819, "learning_rate": 9.319512195121951e-07, "loss": 1.6032, "step": 4540 }, { "epoch": 50.33112582781457, "learning_rate": 9.316376306620209e-07, "loss": 1.5038, "step": 4560 }, { "epoch": 50.55187637969095, "learning_rate": 9.313240418118467e-07, "loss": 1.545, "step": 4580 }, { "epoch": 50.77262693156733, "learning_rate": 9.310104529616724e-07, "loss": 1.5676, "step": 4600 }, { "epoch": 50.99337748344371, "learning_rate": 9.306968641114981e-07, "loss": 1.5443, "step": 4620 }, { "epoch": 51.21412803532009, "learning_rate": 9.303832752613241e-07, "loss": 1.5608, "step": 4640 }, { "epoch": 51.434878587196465, "learning_rate": 9.300696864111497e-07, "loss": 1.551, "step": 4660 }, { "epoch": 51.65562913907285, "learning_rate": 9.297560975609756e-07, "loss": 1.5556, "step": 4680 }, { "epoch": 51.87637969094923, "learning_rate": 9.294425087108013e-07, "loss": 1.5517, "step": 4700 }, { "epoch": 52.09713024282561, "learning_rate": 9.291289198606272e-07, "loss": 1.5384, "step": 4720 }, { "epoch": 52.317880794701985, "learning_rate": 9.288153310104528e-07, "loss": 1.4869, "step": 4740 }, { "epoch": 52.53863134657836, "learning_rate": 9.285017421602787e-07, "loss": 1.5221, "step": 4760 }, { "epoch": 52.75938189845475, "learning_rate": 9.281881533101046e-07, "loss": 1.5883, "step": 4780 }, { "epoch": 52.980132450331126, "learning_rate": 9.278745644599303e-07, "loss": 1.5276, "step": 4800 }, { "epoch": 53.200883002207505, "learning_rate": 9.275609756097561e-07, "loss": 1.4969, "step": 4820 }, { "epoch": 53.42163355408388, "learning_rate": 9.272473867595818e-07, "loss": 1.5043, "step": 4840 }, { "epoch": 53.64238410596027, "learning_rate": 9.269337979094077e-07, "loss": 1.5205, "step": 4860 }, { "epoch": 53.863134657836646, "learning_rate": 9.266202090592334e-07, "loss": 1.4685, "step": 4880 }, { "epoch": 54.083885209713024, "learning_rate": 9.263066202090592e-07, "loss": 1.5099, "step": 4900 }, { "epoch": 54.3046357615894, "learning_rate": 9.25993031358885e-07, "loss": 1.531, "step": 4920 }, { "epoch": 54.52538631346578, "learning_rate": 9.256794425087108e-07, "loss": 1.5637, "step": 4940 }, { "epoch": 54.746136865342166, "learning_rate": 9.253658536585365e-07, "loss": 1.5795, "step": 4960 }, { "epoch": 54.966887417218544, "learning_rate": 9.250522648083623e-07, "loss": 1.473, "step": 4980 }, { "epoch": 55.18763796909492, "learning_rate": 9.247386759581881e-07, "loss": 1.4921, "step": 5000 }, { "epoch": 55.4083885209713, "learning_rate": 9.244250871080139e-07, "loss": 1.5147, "step": 5020 }, { "epoch": 55.629139072847686, "learning_rate": 9.241114982578397e-07, "loss": 1.5259, "step": 5040 }, { "epoch": 55.849889624724064, "learning_rate": 9.237979094076655e-07, "loss": 1.5481, "step": 5060 }, { "epoch": 56.07064017660044, "learning_rate": 9.234843205574913e-07, "loss": 1.5238, "step": 5080 }, { "epoch": 56.29139072847682, "learning_rate": 9.23170731707317e-07, "loss": 1.5497, "step": 5100 }, { "epoch": 56.5121412803532, "learning_rate": 9.228571428571428e-07, "loss": 1.5003, "step": 5120 }, { "epoch": 56.73289183222958, "learning_rate": 9.225435540069686e-07, "loss": 1.511, "step": 5140 }, { "epoch": 56.95364238410596, "learning_rate": 9.222299651567944e-07, "loss": 1.5426, "step": 5160 }, { "epoch": 57.17439293598234, "learning_rate": 9.219163763066201e-07, "loss": 1.4937, "step": 5180 }, { "epoch": 57.39514348785872, "learning_rate": 9.216027874564459e-07, "loss": 1.5059, "step": 5200 }, { "epoch": 57.615894039735096, "learning_rate": 9.212891986062717e-07, "loss": 1.548, "step": 5220 }, { "epoch": 57.83664459161148, "learning_rate": 9.209756097560976e-07, "loss": 1.4691, "step": 5240 }, { "epoch": 58.05739514348786, "learning_rate": 9.206620209059232e-07, "loss": 1.5459, "step": 5260 }, { "epoch": 58.27814569536424, "learning_rate": 9.203484320557491e-07, "loss": 1.5569, "step": 5280 }, { "epoch": 58.498896247240616, "learning_rate": 9.200348432055748e-07, "loss": 1.4634, "step": 5300 }, { "epoch": 58.719646799117, "learning_rate": 9.197212543554007e-07, "loss": 1.4567, "step": 5320 }, { "epoch": 58.94039735099338, "learning_rate": 9.194076655052265e-07, "loss": 1.5349, "step": 5340 }, { "epoch": 59.16114790286976, "learning_rate": 9.190940766550522e-07, "loss": 1.4706, "step": 5360 }, { "epoch": 59.381898454746135, "learning_rate": 9.187804878048781e-07, "loss": 1.4932, "step": 5380 }, { "epoch": 59.602649006622514, "learning_rate": 9.184668989547037e-07, "loss": 1.5204, "step": 5400 }, { "epoch": 59.8233995584989, "learning_rate": 9.181533101045296e-07, "loss": 1.5267, "step": 5420 }, { "epoch": 60.04415011037528, "learning_rate": 9.178397212543552e-07, "loss": 1.5275, "step": 5440 }, { "epoch": 60.264900662251655, "learning_rate": 9.175261324041812e-07, "loss": 1.5116, "step": 5460 }, { "epoch": 60.48565121412803, "learning_rate": 9.172125435540069e-07, "loss": 1.4686, "step": 5480 }, { "epoch": 60.70640176600442, "learning_rate": 9.168989547038327e-07, "loss": 1.4902, "step": 5500 }, { "epoch": 60.9271523178808, "learning_rate": 9.165853658536585e-07, "loss": 1.4856, "step": 5520 }, { "epoch": 61.147902869757175, "learning_rate": 9.162717770034843e-07, "loss": 1.4982, "step": 5540 }, { "epoch": 61.36865342163355, "learning_rate": 9.1595818815331e-07, "loss": 1.4829, "step": 5560 }, { "epoch": 61.58940397350993, "learning_rate": 9.156445993031358e-07, "loss": 1.5049, "step": 5580 }, { "epoch": 61.81015452538632, "learning_rate": 9.153310104529617e-07, "loss": 1.5125, "step": 5600 }, { "epoch": 62.030905077262695, "learning_rate": 9.150174216027874e-07, "loss": 1.4746, "step": 5620 }, { "epoch": 62.25165562913907, "learning_rate": 9.147038327526132e-07, "loss": 1.5128, "step": 5640 }, { "epoch": 62.47240618101545, "learning_rate": 9.14390243902439e-07, "loss": 1.4799, "step": 5660 }, { "epoch": 62.69315673289183, "learning_rate": 9.140766550522648e-07, "loss": 1.4395, "step": 5680 }, { "epoch": 62.913907284768214, "learning_rate": 9.137630662020905e-07, "loss": 1.5277, "step": 5700 }, { "epoch": 63.13465783664459, "learning_rate": 9.134494773519163e-07, "loss": 1.4806, "step": 5720 }, { "epoch": 63.35540838852097, "learning_rate": 9.131358885017421e-07, "loss": 1.5123, "step": 5740 }, { "epoch": 63.57615894039735, "learning_rate": 9.12822299651568e-07, "loss": 1.5502, "step": 5760 }, { "epoch": 63.796909492273734, "learning_rate": 9.125087108013936e-07, "loss": 1.4732, "step": 5780 }, { "epoch": 64.0176600441501, "learning_rate": 9.121951219512195e-07, "loss": 1.4694, "step": 5800 }, { "epoch": 64.23841059602648, "learning_rate": 9.118815331010452e-07, "loss": 1.4394, "step": 5820 }, { "epoch": 64.45916114790288, "learning_rate": 9.11567944250871e-07, "loss": 1.4776, "step": 5840 }, { "epoch": 64.67991169977925, "learning_rate": 9.112543554006967e-07, "loss": 1.5363, "step": 5860 }, { "epoch": 64.90066225165563, "learning_rate": 9.109407665505226e-07, "loss": 1.4584, "step": 5880 }, { "epoch": 65.12141280353201, "learning_rate": 9.106271777003485e-07, "loss": 1.4956, "step": 5900 }, { "epoch": 65.34216335540839, "learning_rate": 9.103135888501741e-07, "loss": 1.4672, "step": 5920 }, { "epoch": 65.56291390728477, "learning_rate": 9.1e-07, "loss": 1.4765, "step": 5940 }, { "epoch": 65.78366445916114, "learning_rate": 9.096864111498257e-07, "loss": 1.4821, "step": 5960 }, { "epoch": 66.00441501103752, "learning_rate": 9.093728222996516e-07, "loss": 1.4561, "step": 5980 }, { "epoch": 66.2251655629139, "learning_rate": 9.090592334494772e-07, "loss": 1.514, "step": 6000 }, { "epoch": 66.4459161147903, "learning_rate": 9.08745644599303e-07, "loss": 1.495, "step": 6020 }, { "epoch": 66.66666666666667, "learning_rate": 9.084320557491289e-07, "loss": 1.4413, "step": 6040 }, { "epoch": 66.88741721854305, "learning_rate": 9.081184668989546e-07, "loss": 1.4489, "step": 6060 }, { "epoch": 67.10816777041943, "learning_rate": 9.078048780487804e-07, "loss": 1.4897, "step": 6080 }, { "epoch": 67.3289183222958, "learning_rate": 9.074912891986062e-07, "loss": 1.4299, "step": 6100 }, { "epoch": 67.54966887417218, "learning_rate": 9.071777003484321e-07, "loss": 1.4706, "step": 6120 }, { "epoch": 67.77041942604856, "learning_rate": 9.068641114982577e-07, "loss": 1.5075, "step": 6140 }, { "epoch": 67.99116997792494, "learning_rate": 9.065505226480836e-07, "loss": 1.5227, "step": 6160 }, { "epoch": 68.21192052980132, "learning_rate": 9.062369337979094e-07, "loss": 1.456, "step": 6180 }, { "epoch": 68.43267108167771, "learning_rate": 9.059233449477352e-07, "loss": 1.4677, "step": 6200 }, { "epoch": 68.65342163355409, "learning_rate": 9.056097560975609e-07, "loss": 1.4993, "step": 6220 }, { "epoch": 68.87417218543047, "learning_rate": 9.052961672473867e-07, "loss": 1.4612, "step": 6240 }, { "epoch": 69.09492273730685, "learning_rate": 9.049825783972125e-07, "loss": 1.4549, "step": 6260 }, { "epoch": 69.31567328918322, "learning_rate": 9.046689895470383e-07, "loss": 1.5033, "step": 6280 }, { "epoch": 69.5364238410596, "learning_rate": 9.04355400696864e-07, "loss": 1.4558, "step": 6300 }, { "epoch": 69.75717439293598, "learning_rate": 9.040418118466899e-07, "loss": 1.4884, "step": 6320 }, { "epoch": 69.97792494481236, "learning_rate": 9.037282229965156e-07, "loss": 1.4332, "step": 6340 }, { "epoch": 70.19867549668874, "learning_rate": 9.034146341463414e-07, "loss": 1.455, "step": 6360 }, { "epoch": 70.41942604856513, "learning_rate": 9.031010452961671e-07, "loss": 1.4155, "step": 6380 }, { "epoch": 70.6401766004415, "learning_rate": 9.02787456445993e-07, "loss": 1.5361, "step": 6400 }, { "epoch": 70.86092715231788, "learning_rate": 9.024738675958189e-07, "loss": 1.4696, "step": 6420 }, { "epoch": 71.08167770419426, "learning_rate": 9.021602787456445e-07, "loss": 1.4936, "step": 6440 }, { "epoch": 71.30242825607064, "learning_rate": 9.018466898954704e-07, "loss": 1.4689, "step": 6460 }, { "epoch": 71.52317880794702, "learning_rate": 9.015331010452961e-07, "loss": 1.4862, "step": 6480 }, { "epoch": 71.7439293598234, "learning_rate": 9.012195121951219e-07, "loss": 1.463, "step": 6500 }, { "epoch": 71.96467991169978, "learning_rate": 9.009059233449477e-07, "loss": 1.4663, "step": 6520 }, { "epoch": 72.18543046357615, "learning_rate": 9.005923344947735e-07, "loss": 1.4918, "step": 6540 }, { "epoch": 72.40618101545253, "learning_rate": 9.002787456445993e-07, "loss": 1.474, "step": 6560 }, { "epoch": 72.62693156732892, "learning_rate": 8.99965156794425e-07, "loss": 1.4693, "step": 6580 }, { "epoch": 72.8476821192053, "learning_rate": 8.996515679442507e-07, "loss": 1.438, "step": 6600 }, { "epoch": 73.06843267108168, "learning_rate": 8.993379790940766e-07, "loss": 1.4405, "step": 6620 }, { "epoch": 73.28918322295806, "learning_rate": 8.990243902439025e-07, "loss": 1.4501, "step": 6640 }, { "epoch": 73.50993377483444, "learning_rate": 8.987108013937282e-07, "loss": 1.4801, "step": 6660 }, { "epoch": 73.73068432671081, "learning_rate": 8.98397212543554e-07, "loss": 1.4407, "step": 6680 }, { "epoch": 73.9514348785872, "learning_rate": 8.980836236933798e-07, "loss": 1.4393, "step": 6700 }, { "epoch": 74.17218543046357, "learning_rate": 8.977700348432056e-07, "loss": 1.3913, "step": 6720 }, { "epoch": 74.39293598233995, "learning_rate": 8.974564459930313e-07, "loss": 1.4949, "step": 6740 }, { "epoch": 74.61368653421634, "learning_rate": 8.971428571428571e-07, "loss": 1.4363, "step": 6760 }, { "epoch": 74.83443708609272, "learning_rate": 8.968292682926829e-07, "loss": 1.4655, "step": 6780 }, { "epoch": 75.0551876379691, "learning_rate": 8.965156794425087e-07, "loss": 1.4991, "step": 6800 }, { "epoch": 75.27593818984548, "learning_rate": 8.962020905923344e-07, "loss": 1.4516, "step": 6820 }, { "epoch": 75.49668874172185, "learning_rate": 8.958885017421603e-07, "loss": 1.5084, "step": 6840 }, { "epoch": 75.71743929359823, "learning_rate": 8.95574912891986e-07, "loss": 1.4542, "step": 6860 }, { "epoch": 75.93818984547461, "learning_rate": 8.952613240418118e-07, "loss": 1.475, "step": 6880 }, { "epoch": 76.15894039735099, "learning_rate": 8.949477351916375e-07, "loss": 1.4287, "step": 6900 }, { "epoch": 76.37969094922737, "learning_rate": 8.946341463414634e-07, "loss": 1.453, "step": 6920 }, { "epoch": 76.60044150110376, "learning_rate": 8.943205574912893e-07, "loss": 1.4372, "step": 6940 }, { "epoch": 76.82119205298014, "learning_rate": 8.940069686411149e-07, "loss": 1.4665, "step": 6960 }, { "epoch": 77.04194260485652, "learning_rate": 8.936933797909408e-07, "loss": 1.4841, "step": 6980 }, { "epoch": 77.2626931567329, "learning_rate": 8.933797909407665e-07, "loss": 1.4491, "step": 7000 }, { "epoch": 77.48344370860927, "learning_rate": 8.930662020905923e-07, "loss": 1.4382, "step": 7020 }, { "epoch": 77.70419426048565, "learning_rate": 8.92752613240418e-07, "loss": 1.4206, "step": 7040 }, { "epoch": 77.92494481236203, "learning_rate": 8.924390243902439e-07, "loss": 1.4521, "step": 7060 }, { "epoch": 78.1456953642384, "learning_rate": 8.921254355400697e-07, "loss": 1.4688, "step": 7080 }, { "epoch": 78.36644591611478, "learning_rate": 8.918118466898954e-07, "loss": 1.4304, "step": 7100 }, { "epoch": 78.58719646799118, "learning_rate": 8.914982578397212e-07, "loss": 1.4165, "step": 7120 }, { "epoch": 78.80794701986756, "learning_rate": 8.91184668989547e-07, "loss": 1.4728, "step": 7140 }, { "epoch": 79.02869757174393, "learning_rate": 8.908710801393728e-07, "loss": 1.4959, "step": 7160 }, { "epoch": 79.24944812362031, "learning_rate": 8.905574912891986e-07, "loss": 1.4211, "step": 7180 }, { "epoch": 79.47019867549669, "learning_rate": 8.902439024390244e-07, "loss": 1.4794, "step": 7200 }, { "epoch": 79.47019867549669, "eval_bleu": 44.4125, "eval_gen_len": 9.0667, "eval_loss": 2.0255990028381348, "eval_runtime": 3.4282, "eval_samples_per_second": 8.751, "eval_steps_per_second": 1.75, "step": 7200 }, { "epoch": 79.69094922737307, "learning_rate": 8.899303135888502e-07, "loss": 1.4294, "step": 7220 }, { "epoch": 79.91169977924945, "learning_rate": 8.896167247386759e-07, "loss": 1.4691, "step": 7240 }, { "epoch": 80.13245033112582, "learning_rate": 8.893031358885017e-07, "loss": 1.4619, "step": 7260 }, { "epoch": 80.3532008830022, "learning_rate": 8.889895470383275e-07, "loss": 1.4952, "step": 7280 }, { "epoch": 80.57395143487858, "learning_rate": 8.886759581881533e-07, "loss": 1.4284, "step": 7300 }, { "epoch": 80.79470198675497, "learning_rate": 8.88362369337979e-07, "loss": 1.3992, "step": 7320 }, { "epoch": 81.01545253863135, "learning_rate": 8.880487804878048e-07, "loss": 1.4512, "step": 7340 }, { "epoch": 81.23620309050773, "learning_rate": 8.877351916376307e-07, "loss": 1.4298, "step": 7360 }, { "epoch": 81.45695364238411, "learning_rate": 8.874216027874564e-07, "loss": 1.4442, "step": 7380 }, { "epoch": 81.67770419426049, "learning_rate": 8.871080139372822e-07, "loss": 1.4112, "step": 7400 }, { "epoch": 81.89845474613686, "learning_rate": 8.867944250871079e-07, "loss": 1.4365, "step": 7420 }, { "epoch": 82.11920529801324, "learning_rate": 8.864808362369338e-07, "loss": 1.5076, "step": 7440 }, { "epoch": 82.33995584988962, "learning_rate": 8.861672473867594e-07, "loss": 1.4004, "step": 7460 }, { "epoch": 82.560706401766, "learning_rate": 8.858536585365853e-07, "loss": 1.4641, "step": 7480 }, { "epoch": 82.78145695364239, "learning_rate": 8.855400696864112e-07, "loss": 1.4321, "step": 7500 }, { "epoch": 83.00220750551877, "learning_rate": 8.85226480836237e-07, "loss": 1.4592, "step": 7520 }, { "epoch": 83.22295805739515, "learning_rate": 8.849128919860627e-07, "loss": 1.4101, "step": 7540 }, { "epoch": 83.44370860927152, "learning_rate": 8.845993031358884e-07, "loss": 1.4455, "step": 7560 }, { "epoch": 83.6644591611479, "learning_rate": 8.842857142857143e-07, "loss": 1.4175, "step": 7580 }, { "epoch": 83.88520971302428, "learning_rate": 8.8397212543554e-07, "loss": 1.4389, "step": 7600 }, { "epoch": 84.10596026490066, "learning_rate": 8.836585365853658e-07, "loss": 1.4963, "step": 7620 }, { "epoch": 84.32671081677704, "learning_rate": 8.833449477351916e-07, "loss": 1.4683, "step": 7640 }, { "epoch": 84.54746136865342, "learning_rate": 8.830313588850174e-07, "loss": 1.4528, "step": 7660 }, { "epoch": 84.76821192052981, "learning_rate": 8.827177700348431e-07, "loss": 1.4128, "step": 7680 }, { "epoch": 84.98896247240619, "learning_rate": 8.824041811846689e-07, "loss": 1.4012, "step": 7700 }, { "epoch": 85.20971302428256, "learning_rate": 8.820905923344947e-07, "loss": 1.4448, "step": 7720 }, { "epoch": 85.43046357615894, "learning_rate": 8.817770034843205e-07, "loss": 1.3746, "step": 7740 }, { "epoch": 85.65121412803532, "learning_rate": 8.814634146341464e-07, "loss": 1.4086, "step": 7760 }, { "epoch": 85.8719646799117, "learning_rate": 8.811498257839721e-07, "loss": 1.4774, "step": 7780 }, { "epoch": 86.09271523178808, "learning_rate": 8.808362369337979e-07, "loss": 1.4534, "step": 7800 }, { "epoch": 86.31346578366445, "learning_rate": 8.805226480836237e-07, "loss": 1.3844, "step": 7820 }, { "epoch": 86.53421633554083, "learning_rate": 8.802090592334494e-07, "loss": 1.4751, "step": 7840 }, { "epoch": 86.75496688741723, "learning_rate": 8.798954703832752e-07, "loss": 1.4002, "step": 7860 }, { "epoch": 86.9757174392936, "learning_rate": 8.79581881533101e-07, "loss": 1.4659, "step": 7880 }, { "epoch": 87.19646799116998, "learning_rate": 8.792682926829268e-07, "loss": 1.4322, "step": 7900 }, { "epoch": 87.41721854304636, "learning_rate": 8.789547038327526e-07, "loss": 1.4278, "step": 7920 }, { "epoch": 87.63796909492274, "learning_rate": 8.786411149825783e-07, "loss": 1.453, "step": 7940 }, { "epoch": 87.85871964679912, "learning_rate": 8.783275261324042e-07, "loss": 1.4371, "step": 7960 }, { "epoch": 88.0794701986755, "learning_rate": 8.780139372822298e-07, "loss": 1.4775, "step": 7980 }, { "epoch": 88.30022075055187, "learning_rate": 8.777003484320557e-07, "loss": 1.4058, "step": 8000 }, { "epoch": 88.52097130242825, "learning_rate": 8.773867595818815e-07, "loss": 1.4119, "step": 8020 }, { "epoch": 88.74172185430463, "learning_rate": 8.770731707317073e-07, "loss": 1.4316, "step": 8040 }, { "epoch": 88.96247240618102, "learning_rate": 8.767595818815331e-07, "loss": 1.4198, "step": 8060 }, { "epoch": 89.1832229580574, "learning_rate": 8.764459930313588e-07, "loss": 1.4434, "step": 8080 }, { "epoch": 89.40397350993378, "learning_rate": 8.761324041811848e-07, "loss": 1.4309, "step": 8100 }, { "epoch": 89.62472406181016, "learning_rate": 8.758188153310104e-07, "loss": 1.3757, "step": 8120 }, { "epoch": 89.84547461368653, "learning_rate": 8.755052264808362e-07, "loss": 1.479, "step": 8140 }, { "epoch": 90.06622516556291, "learning_rate": 8.751916376306619e-07, "loss": 1.3926, "step": 8160 }, { "epoch": 90.28697571743929, "learning_rate": 8.748780487804878e-07, "loss": 1.4629, "step": 8180 }, { "epoch": 90.50772626931567, "learning_rate": 8.745644599303135e-07, "loss": 1.3793, "step": 8200 }, { "epoch": 90.72847682119205, "learning_rate": 8.742508710801393e-07, "loss": 1.4113, "step": 8220 }, { "epoch": 90.94922737306844, "learning_rate": 8.739372822299651e-07, "loss": 1.4187, "step": 8240 }, { "epoch": 91.16997792494482, "learning_rate": 8.73623693379791e-07, "loss": 1.4466, "step": 8260 }, { "epoch": 91.3907284768212, "learning_rate": 8.733101045296167e-07, "loss": 1.4219, "step": 8280 }, { "epoch": 91.61147902869757, "learning_rate": 8.729965156794424e-07, "loss": 1.4783, "step": 8300 }, { "epoch": 91.83222958057395, "learning_rate": 8.726829268292683e-07, "loss": 1.4111, "step": 8320 }, { "epoch": 92.05298013245033, "learning_rate": 8.723693379790941e-07, "loss": 1.427, "step": 8340 }, { "epoch": 92.27373068432671, "learning_rate": 8.720557491289198e-07, "loss": 1.414, "step": 8360 }, { "epoch": 92.49448123620309, "learning_rate": 8.717421602787456e-07, "loss": 1.4097, "step": 8380 }, { "epoch": 92.71523178807946, "learning_rate": 8.714285714285715e-07, "loss": 1.4818, "step": 8400 }, { "epoch": 92.93598233995586, "learning_rate": 8.711149825783971e-07, "loss": 1.4127, "step": 8420 }, { "epoch": 93.15673289183223, "learning_rate": 8.708013937282229e-07, "loss": 1.4264, "step": 8440 }, { "epoch": 93.37748344370861, "learning_rate": 8.704878048780487e-07, "loss": 1.4663, "step": 8460 }, { "epoch": 93.59823399558499, "learning_rate": 8.701742160278746e-07, "loss": 1.4151, "step": 8480 }, { "epoch": 93.81898454746137, "learning_rate": 8.698606271777002e-07, "loss": 1.4362, "step": 8500 }, { "epoch": 94.03973509933775, "learning_rate": 8.695470383275261e-07, "loss": 1.3755, "step": 8520 }, { "epoch": 94.26048565121413, "learning_rate": 8.69233449477352e-07, "loss": 1.3927, "step": 8540 }, { "epoch": 94.4812362030905, "learning_rate": 8.689198606271777e-07, "loss": 1.4137, "step": 8560 }, { "epoch": 94.70198675496688, "learning_rate": 8.686062717770035e-07, "loss": 1.4284, "step": 8580 }, { "epoch": 94.92273730684327, "learning_rate": 8.682926829268292e-07, "loss": 1.4287, "step": 8600 }, { "epoch": 95.14348785871965, "learning_rate": 8.679790940766551e-07, "loss": 1.4243, "step": 8620 }, { "epoch": 95.36423841059603, "learning_rate": 8.676655052264807e-07, "loss": 1.3588, "step": 8640 }, { "epoch": 95.58498896247241, "learning_rate": 8.673519163763066e-07, "loss": 1.3846, "step": 8660 }, { "epoch": 95.80573951434879, "learning_rate": 8.670383275261325e-07, "loss": 1.4513, "step": 8680 }, { "epoch": 96.02649006622516, "learning_rate": 8.667247386759582e-07, "loss": 1.4022, "step": 8700 }, { "epoch": 96.24724061810154, "learning_rate": 8.664111498257838e-07, "loss": 1.3969, "step": 8720 }, { "epoch": 96.46799116997792, "learning_rate": 8.660975609756097e-07, "loss": 1.38, "step": 8740 }, { "epoch": 96.6887417218543, "learning_rate": 8.657839721254355e-07, "loss": 1.4701, "step": 8760 }, { "epoch": 96.90949227373068, "learning_rate": 8.654703832752613e-07, "loss": 1.3637, "step": 8780 }, { "epoch": 97.13024282560707, "learning_rate": 8.65156794425087e-07, "loss": 1.3748, "step": 8800 }, { "epoch": 97.35099337748345, "learning_rate": 8.648432055749129e-07, "loss": 1.461, "step": 8820 }, { "epoch": 97.57174392935983, "learning_rate": 8.645296167247387e-07, "loss": 1.4615, "step": 8840 }, { "epoch": 97.7924944812362, "learning_rate": 8.642160278745644e-07, "loss": 1.3775, "step": 8860 }, { "epoch": 98.01324503311258, "learning_rate": 8.639024390243902e-07, "loss": 1.4125, "step": 8880 }, { "epoch": 98.23399558498896, "learning_rate": 8.63588850174216e-07, "loss": 1.3846, "step": 8900 }, { "epoch": 98.45474613686534, "learning_rate": 8.632752613240419e-07, "loss": 1.3948, "step": 8920 }, { "epoch": 98.67549668874172, "learning_rate": 8.629616724738675e-07, "loss": 1.348, "step": 8940 }, { "epoch": 98.8962472406181, "learning_rate": 8.626480836236934e-07, "loss": 1.4504, "step": 8960 }, { "epoch": 99.11699779249449, "learning_rate": 8.623344947735191e-07, "loss": 1.3899, "step": 8980 }, { "epoch": 99.33774834437087, "learning_rate": 8.62020905923345e-07, "loss": 1.4558, "step": 9000 }, { "epoch": 99.55849889624724, "learning_rate": 8.617073170731706e-07, "loss": 1.3781, "step": 9020 }, { "epoch": 99.77924944812362, "learning_rate": 8.613937282229965e-07, "loss": 1.3847, "step": 9040 }, { "epoch": 100.0, "learning_rate": 8.610801393728222e-07, "loss": 1.4043, "step": 9060 }, { "epoch": 100.22075055187638, "learning_rate": 8.60766550522648e-07, "loss": 1.4069, "step": 9080 }, { "epoch": 100.44150110375276, "learning_rate": 8.604529616724739e-07, "loss": 1.3562, "step": 9100 }, { "epoch": 100.66225165562913, "learning_rate": 8.601393728222996e-07, "loss": 1.3854, "step": 9120 }, { "epoch": 100.88300220750551, "learning_rate": 8.598257839721255e-07, "loss": 1.3842, "step": 9140 }, { "epoch": 101.1037527593819, "learning_rate": 8.595121951219512e-07, "loss": 1.3954, "step": 9160 }, { "epoch": 101.32450331125828, "learning_rate": 8.59198606271777e-07, "loss": 1.3966, "step": 9180 }, { "epoch": 101.54525386313466, "learning_rate": 8.588850174216027e-07, "loss": 1.4175, "step": 9200 }, { "epoch": 101.76600441501104, "learning_rate": 8.585714285714286e-07, "loss": 1.3944, "step": 9220 }, { "epoch": 101.98675496688742, "learning_rate": 8.582578397212543e-07, "loss": 1.343, "step": 9240 }, { "epoch": 102.2075055187638, "learning_rate": 8.579442508710801e-07, "loss": 1.3974, "step": 9260 }, { "epoch": 102.42825607064017, "learning_rate": 8.576306620209059e-07, "loss": 1.4235, "step": 9280 }, { "epoch": 102.64900662251655, "learning_rate": 8.573170731707317e-07, "loss": 1.379, "step": 9300 }, { "epoch": 102.86975717439293, "learning_rate": 8.570034843205574e-07, "loss": 1.3593, "step": 9320 }, { "epoch": 103.09050772626932, "learning_rate": 8.566898954703832e-07, "loss": 1.4164, "step": 9340 }, { "epoch": 103.3112582781457, "learning_rate": 8.563763066202091e-07, "loss": 1.3811, "step": 9360 }, { "epoch": 103.53200883002208, "learning_rate": 8.560627177700348e-07, "loss": 1.3853, "step": 9380 }, { "epoch": 103.75275938189846, "learning_rate": 8.557491289198606e-07, "loss": 1.3948, "step": 9400 }, { "epoch": 103.97350993377484, "learning_rate": 8.554355400696864e-07, "loss": 1.4206, "step": 9420 }, { "epoch": 104.19426048565121, "learning_rate": 8.551219512195122e-07, "loss": 1.3967, "step": 9440 }, { "epoch": 104.41501103752759, "learning_rate": 8.548083623693379e-07, "loss": 1.3597, "step": 9460 }, { "epoch": 104.63576158940397, "learning_rate": 8.544947735191637e-07, "loss": 1.4107, "step": 9480 }, { "epoch": 104.85651214128035, "learning_rate": 8.541811846689896e-07, "loss": 1.3626, "step": 9500 }, { "epoch": 105.07726269315673, "learning_rate": 8.538675958188153e-07, "loss": 1.4051, "step": 9520 }, { "epoch": 105.29801324503312, "learning_rate": 8.53554006968641e-07, "loss": 1.4063, "step": 9540 }, { "epoch": 105.5187637969095, "learning_rate": 8.532404181184669e-07, "loss": 1.4275, "step": 9560 }, { "epoch": 105.73951434878587, "learning_rate": 8.529268292682926e-07, "loss": 1.3462, "step": 9580 }, { "epoch": 105.96026490066225, "learning_rate": 8.526132404181184e-07, "loss": 1.3702, "step": 9600 }, { "epoch": 106.18101545253863, "learning_rate": 8.522996515679441e-07, "loss": 1.3683, "step": 9620 }, { "epoch": 106.40176600441501, "learning_rate": 8.5198606271777e-07, "loss": 1.4073, "step": 9640 }, { "epoch": 106.62251655629139, "learning_rate": 8.516724738675959e-07, "loss": 1.3627, "step": 9660 }, { "epoch": 106.84326710816777, "learning_rate": 8.513588850174215e-07, "loss": 1.4, "step": 9680 }, { "epoch": 107.06401766004414, "learning_rate": 8.510452961672474e-07, "loss": 1.3322, "step": 9700 }, { "epoch": 107.28476821192054, "learning_rate": 8.507317073170731e-07, "loss": 1.3776, "step": 9720 }, { "epoch": 107.50551876379691, "learning_rate": 8.50418118466899e-07, "loss": 1.3837, "step": 9740 }, { "epoch": 107.72626931567329, "learning_rate": 8.501045296167246e-07, "loss": 1.3633, "step": 9760 }, { "epoch": 107.94701986754967, "learning_rate": 8.497909407665505e-07, "loss": 1.4582, "step": 9780 }, { "epoch": 108.16777041942605, "learning_rate": 8.494773519163763e-07, "loss": 1.4204, "step": 9800 }, { "epoch": 108.38852097130243, "learning_rate": 8.49163763066202e-07, "loss": 1.3606, "step": 9820 }, { "epoch": 108.6092715231788, "learning_rate": 8.488501742160278e-07, "loss": 1.41, "step": 9840 }, { "epoch": 108.83002207505518, "learning_rate": 8.485365853658536e-07, "loss": 1.321, "step": 9860 }, { "epoch": 109.05077262693156, "learning_rate": 8.482229965156795e-07, "loss": 1.3471, "step": 9880 }, { "epoch": 109.27152317880795, "learning_rate": 8.479094076655051e-07, "loss": 1.3809, "step": 9900 }, { "epoch": 109.49227373068433, "learning_rate": 8.47595818815331e-07, "loss": 1.3795, "step": 9920 }, { "epoch": 109.71302428256071, "learning_rate": 8.472822299651568e-07, "loss": 1.3751, "step": 9940 }, { "epoch": 109.93377483443709, "learning_rate": 8.469686411149826e-07, "loss": 1.3513, "step": 9960 }, { "epoch": 110.15452538631347, "learning_rate": 8.466550522648083e-07, "loss": 1.3526, "step": 9980 }, { "epoch": 110.37527593818984, "learning_rate": 8.463414634146341e-07, "loss": 1.3819, "step": 10000 }, { "epoch": 110.59602649006622, "learning_rate": 8.460278745644599e-07, "loss": 1.3961, "step": 10020 }, { "epoch": 110.8167770419426, "learning_rate": 8.457142857142856e-07, "loss": 1.3934, "step": 10040 }, { "epoch": 111.03752759381898, "learning_rate": 8.454006968641114e-07, "loss": 1.3603, "step": 10060 }, { "epoch": 111.25827814569537, "learning_rate": 8.450871080139372e-07, "loss": 1.3933, "step": 10080 }, { "epoch": 111.47902869757175, "learning_rate": 8.44773519163763e-07, "loss": 1.381, "step": 10100 }, { "epoch": 111.69977924944813, "learning_rate": 8.444599303135888e-07, "loss": 1.4153, "step": 10120 }, { "epoch": 111.9205298013245, "learning_rate": 8.441463414634147e-07, "loss": 1.361, "step": 10140 }, { "epoch": 112.14128035320088, "learning_rate": 8.438327526132404e-07, "loss": 1.3341, "step": 10160 }, { "epoch": 112.36203090507726, "learning_rate": 8.435191637630662e-07, "loss": 1.3696, "step": 10180 }, { "epoch": 112.58278145695364, "learning_rate": 8.432055749128919e-07, "loss": 1.3899, "step": 10200 }, { "epoch": 112.80353200883002, "learning_rate": 8.428919860627178e-07, "loss": 1.3883, "step": 10220 }, { "epoch": 113.0242825607064, "learning_rate": 8.425783972125435e-07, "loss": 1.3401, "step": 10240 }, { "epoch": 113.24503311258277, "learning_rate": 8.422648083623693e-07, "loss": 1.3599, "step": 10260 }, { "epoch": 113.46578366445917, "learning_rate": 8.419512195121951e-07, "loss": 1.3688, "step": 10280 }, { "epoch": 113.68653421633555, "learning_rate": 8.416376306620209e-07, "loss": 1.3976, "step": 10300 }, { "epoch": 113.90728476821192, "learning_rate": 8.413240418118465e-07, "loss": 1.3909, "step": 10320 }, { "epoch": 114.1280353200883, "learning_rate": 8.410104529616724e-07, "loss": 1.4132, "step": 10340 }, { "epoch": 114.34878587196468, "learning_rate": 8.406968641114982e-07, "loss": 1.3741, "step": 10360 }, { "epoch": 114.56953642384106, "learning_rate": 8.40383275261324e-07, "loss": 1.3339, "step": 10380 }, { "epoch": 114.79028697571744, "learning_rate": 8.400696864111498e-07, "loss": 1.3869, "step": 10400 }, { "epoch": 115.01103752759381, "learning_rate": 8.397560975609756e-07, "loss": 1.3644, "step": 10420 }, { "epoch": 115.23178807947019, "learning_rate": 8.394425087108014e-07, "loss": 1.3584, "step": 10440 }, { "epoch": 115.45253863134658, "learning_rate": 8.391289198606271e-07, "loss": 1.3698, "step": 10460 }, { "epoch": 115.67328918322296, "learning_rate": 8.388153310104529e-07, "loss": 1.39, "step": 10480 }, { "epoch": 115.89403973509934, "learning_rate": 8.385017421602787e-07, "loss": 1.3651, "step": 10500 }, { "epoch": 116.11479028697572, "learning_rate": 8.381881533101045e-07, "loss": 1.339, "step": 10520 }, { "epoch": 116.3355408388521, "learning_rate": 8.378745644599303e-07, "loss": 1.3469, "step": 10540 }, { "epoch": 116.55629139072848, "learning_rate": 8.375609756097561e-07, "loss": 1.4008, "step": 10560 }, { "epoch": 116.77704194260485, "learning_rate": 8.372473867595818e-07, "loss": 1.3757, "step": 10580 }, { "epoch": 116.99779249448123, "learning_rate": 8.369337979094076e-07, "loss": 1.3792, "step": 10600 }, { "epoch": 117.21854304635761, "learning_rate": 8.366202090592334e-07, "loss": 1.3422, "step": 10620 }, { "epoch": 117.439293598234, "learning_rate": 8.363066202090592e-07, "loss": 1.3876, "step": 10640 }, { "epoch": 117.66004415011038, "learning_rate": 8.359930313588849e-07, "loss": 1.3519, "step": 10660 }, { "epoch": 117.88079470198676, "learning_rate": 8.356794425087108e-07, "loss": 1.3268, "step": 10680 }, { "epoch": 118.10154525386314, "learning_rate": 8.353658536585366e-07, "loss": 1.4245, "step": 10700 }, { "epoch": 118.32229580573951, "learning_rate": 8.350522648083623e-07, "loss": 1.3755, "step": 10720 }, { "epoch": 118.54304635761589, "learning_rate": 8.347386759581881e-07, "loss": 1.3318, "step": 10740 }, { "epoch": 118.76379690949227, "learning_rate": 8.344250871080139e-07, "loss": 1.3395, "step": 10760 }, { "epoch": 118.98454746136865, "learning_rate": 8.341114982578397e-07, "loss": 1.407, "step": 10780 }, { "epoch": 119.20529801324503, "learning_rate": 8.337979094076654e-07, "loss": 1.3621, "step": 10800 }, { "epoch": 119.20529801324503, "eval_bleu": 49.121, "eval_gen_len": 8.8, "eval_loss": 2.0037317276000977, "eval_runtime": 3.2416, "eval_samples_per_second": 9.255, "eval_steps_per_second": 1.851, "step": 10800 }, { "epoch": 119.42604856512142, "learning_rate": 8.334843205574913e-07, "loss": 1.3468, "step": 10820 }, { "epoch": 119.6467991169978, "learning_rate": 8.331707317073171e-07, "loss": 1.3168, "step": 10840 }, { "epoch": 119.86754966887418, "learning_rate": 8.328571428571428e-07, "loss": 1.3797, "step": 10860 }, { "epoch": 120.08830022075055, "learning_rate": 8.325435540069686e-07, "loss": 1.3921, "step": 10880 }, { "epoch": 120.30905077262693, "learning_rate": 8.322299651567943e-07, "loss": 1.3714, "step": 10900 }, { "epoch": 120.52980132450331, "learning_rate": 8.319163763066202e-07, "loss": 1.3932, "step": 10920 }, { "epoch": 120.75055187637969, "learning_rate": 8.316027874564459e-07, "loss": 1.361, "step": 10940 }, { "epoch": 120.97130242825607, "learning_rate": 8.312891986062718e-07, "loss": 1.3852, "step": 10960 }, { "epoch": 121.19205298013244, "learning_rate": 8.309756097560976e-07, "loss": 1.3572, "step": 10980 }, { "epoch": 121.41280353200882, "learning_rate": 8.306620209059233e-07, "loss": 1.368, "step": 11000 }, { "epoch": 121.63355408388522, "learning_rate": 8.303484320557491e-07, "loss": 1.3301, "step": 11020 }, { "epoch": 121.8543046357616, "learning_rate": 8.300348432055749e-07, "loss": 1.3618, "step": 11040 }, { "epoch": 122.07505518763797, "learning_rate": 8.297212543554007e-07, "loss": 1.3663, "step": 11060 }, { "epoch": 122.29580573951435, "learning_rate": 8.294076655052264e-07, "loss": 1.3595, "step": 11080 }, { "epoch": 122.51655629139073, "learning_rate": 8.290940766550522e-07, "loss": 1.3469, "step": 11100 }, { "epoch": 122.7373068432671, "learning_rate": 8.287804878048781e-07, "loss": 1.3225, "step": 11120 }, { "epoch": 122.95805739514348, "learning_rate": 8.284668989547038e-07, "loss": 1.3757, "step": 11140 }, { "epoch": 123.17880794701986, "learning_rate": 8.281533101045296e-07, "loss": 1.3782, "step": 11160 }, { "epoch": 123.39955849889624, "learning_rate": 8.278397212543553e-07, "loss": 1.3417, "step": 11180 }, { "epoch": 123.62030905077263, "learning_rate": 8.275261324041812e-07, "loss": 1.353, "step": 11200 }, { "epoch": 123.84105960264901, "learning_rate": 8.272125435540068e-07, "loss": 1.3455, "step": 11220 }, { "epoch": 124.06181015452539, "learning_rate": 8.268989547038327e-07, "loss": 1.3426, "step": 11240 }, { "epoch": 124.28256070640177, "learning_rate": 8.265853658536586e-07, "loss": 1.3728, "step": 11260 }, { "epoch": 124.50331125827815, "learning_rate": 8.262717770034843e-07, "loss": 1.3162, "step": 11280 }, { "epoch": 124.72406181015452, "learning_rate": 8.259581881533101e-07, "loss": 1.3784, "step": 11300 }, { "epoch": 124.9448123620309, "learning_rate": 8.256445993031358e-07, "loss": 1.3341, "step": 11320 }, { "epoch": 125.16556291390728, "learning_rate": 8.253310104529617e-07, "loss": 1.3837, "step": 11340 }, { "epoch": 125.38631346578366, "learning_rate": 8.250174216027874e-07, "loss": 1.3848, "step": 11360 }, { "epoch": 125.60706401766005, "learning_rate": 8.247038327526132e-07, "loss": 1.3667, "step": 11380 }, { "epoch": 125.82781456953643, "learning_rate": 8.24390243902439e-07, "loss": 1.3061, "step": 11400 }, { "epoch": 126.0485651214128, "learning_rate": 8.240766550522648e-07, "loss": 1.3405, "step": 11420 }, { "epoch": 126.26931567328919, "learning_rate": 8.237630662020905e-07, "loss": 1.3561, "step": 11440 }, { "epoch": 126.49006622516556, "learning_rate": 8.234494773519163e-07, "loss": 1.3472, "step": 11460 }, { "epoch": 126.71081677704194, "learning_rate": 8.23135888501742e-07, "loss": 1.3836, "step": 11480 }, { "epoch": 126.93156732891832, "learning_rate": 8.228222996515679e-07, "loss": 1.4228, "step": 11500 }, { "epoch": 127.1523178807947, "learning_rate": 8.225087108013937e-07, "loss": 1.3512, "step": 11520 }, { "epoch": 127.37306843267108, "learning_rate": 8.221951219512195e-07, "loss": 1.3561, "step": 11540 }, { "epoch": 127.59381898454747, "learning_rate": 8.218815331010453e-07, "loss": 1.3186, "step": 11560 }, { "epoch": 127.81456953642385, "learning_rate": 8.215679442508711e-07, "loss": 1.3349, "step": 11580 }, { "epoch": 128.0353200883002, "learning_rate": 8.212543554006968e-07, "loss": 1.3944, "step": 11600 }, { "epoch": 128.2560706401766, "learning_rate": 8.209407665505226e-07, "loss": 1.3425, "step": 11620 }, { "epoch": 128.47682119205297, "learning_rate": 8.206271777003484e-07, "loss": 1.3553, "step": 11640 }, { "epoch": 128.69757174392936, "learning_rate": 8.203135888501741e-07, "loss": 1.339, "step": 11660 }, { "epoch": 128.91832229580575, "learning_rate": 8.2e-07, "loss": 1.303, "step": 11680 }, { "epoch": 129.13907284768212, "learning_rate": 8.196864111498257e-07, "loss": 1.3808, "step": 11700 }, { "epoch": 129.3598233995585, "learning_rate": 8.193728222996516e-07, "loss": 1.3813, "step": 11720 }, { "epoch": 129.58057395143487, "learning_rate": 8.190592334494772e-07, "loss": 1.321, "step": 11740 }, { "epoch": 129.80132450331126, "learning_rate": 8.187456445993031e-07, "loss": 1.3458, "step": 11760 }, { "epoch": 130.02207505518763, "learning_rate": 8.184320557491288e-07, "loss": 1.3039, "step": 11780 }, { "epoch": 130.24282560706402, "learning_rate": 8.181184668989547e-07, "loss": 1.3619, "step": 11800 }, { "epoch": 130.46357615894038, "learning_rate": 8.178048780487805e-07, "loss": 1.3726, "step": 11820 }, { "epoch": 130.68432671081678, "learning_rate": 8.174912891986062e-07, "loss": 1.3511, "step": 11840 }, { "epoch": 130.90507726269317, "learning_rate": 8.171777003484321e-07, "loss": 1.3184, "step": 11860 }, { "epoch": 131.12582781456953, "learning_rate": 8.168641114982578e-07, "loss": 1.3295, "step": 11880 }, { "epoch": 131.34657836644593, "learning_rate": 8.165505226480836e-07, "loss": 1.3437, "step": 11900 }, { "epoch": 131.5673289183223, "learning_rate": 8.162369337979093e-07, "loss": 1.3191, "step": 11920 }, { "epoch": 131.78807947019868, "learning_rate": 8.159233449477352e-07, "loss": 1.3698, "step": 11940 }, { "epoch": 132.00883002207505, "learning_rate": 8.156097560975609e-07, "loss": 1.3018, "step": 11960 }, { "epoch": 132.22958057395144, "learning_rate": 8.152961672473867e-07, "loss": 1.3678, "step": 11980 }, { "epoch": 132.4503311258278, "learning_rate": 8.149825783972125e-07, "loss": 1.3626, "step": 12000 }, { "epoch": 132.6710816777042, "learning_rate": 8.146689895470383e-07, "loss": 1.3456, "step": 12020 }, { "epoch": 132.8918322295806, "learning_rate": 8.14355400696864e-07, "loss": 1.3649, "step": 12040 }, { "epoch": 133.11258278145695, "learning_rate": 8.140418118466898e-07, "loss": 1.3615, "step": 12060 }, { "epoch": 133.33333333333334, "learning_rate": 8.137282229965157e-07, "loss": 1.3447, "step": 12080 }, { "epoch": 133.5540838852097, "learning_rate": 8.134146341463414e-07, "loss": 1.3378, "step": 12100 }, { "epoch": 133.7748344370861, "learning_rate": 8.131010452961672e-07, "loss": 1.2747, "step": 12120 }, { "epoch": 133.99558498896246, "learning_rate": 8.12787456445993e-07, "loss": 1.3212, "step": 12140 }, { "epoch": 134.21633554083886, "learning_rate": 8.124738675958189e-07, "loss": 1.274, "step": 12160 }, { "epoch": 134.43708609271522, "learning_rate": 8.121602787456445e-07, "loss": 1.3424, "step": 12180 }, { "epoch": 134.6578366445916, "learning_rate": 8.118466898954703e-07, "loss": 1.3688, "step": 12200 }, { "epoch": 134.878587196468, "learning_rate": 8.115331010452961e-07, "loss": 1.2944, "step": 12220 }, { "epoch": 135.09933774834437, "learning_rate": 8.11219512195122e-07, "loss": 1.4009, "step": 12240 }, { "epoch": 135.32008830022076, "learning_rate": 8.109059233449476e-07, "loss": 1.3217, "step": 12260 }, { "epoch": 135.54083885209712, "learning_rate": 8.105923344947735e-07, "loss": 1.3039, "step": 12280 }, { "epoch": 135.76158940397352, "learning_rate": 8.102787456445994e-07, "loss": 1.3223, "step": 12300 }, { "epoch": 135.98233995584988, "learning_rate": 8.099651567944251e-07, "loss": 1.3649, "step": 12320 }, { "epoch": 136.20309050772627, "learning_rate": 8.096515679442508e-07, "loss": 1.3254, "step": 12340 }, { "epoch": 136.42384105960264, "learning_rate": 8.093379790940766e-07, "loss": 1.3474, "step": 12360 }, { "epoch": 136.64459161147903, "learning_rate": 8.090243902439025e-07, "loss": 1.3528, "step": 12380 }, { "epoch": 136.86534216335542, "learning_rate": 8.087108013937281e-07, "loss": 1.3654, "step": 12400 }, { "epoch": 137.08609271523179, "learning_rate": 8.08397212543554e-07, "loss": 1.3761, "step": 12420 }, { "epoch": 137.30684326710818, "learning_rate": 8.080836236933798e-07, "loss": 1.3623, "step": 12440 }, { "epoch": 137.52759381898454, "learning_rate": 8.077700348432056e-07, "loss": 1.322, "step": 12460 }, { "epoch": 137.74834437086093, "learning_rate": 8.074564459930312e-07, "loss": 1.3221, "step": 12480 }, { "epoch": 137.9690949227373, "learning_rate": 8.071428571428571e-07, "loss": 1.3333, "step": 12500 }, { "epoch": 138.1898454746137, "learning_rate": 8.068292682926829e-07, "loss": 1.3036, "step": 12520 }, { "epoch": 138.41059602649005, "learning_rate": 8.065156794425087e-07, "loss": 1.3236, "step": 12540 }, { "epoch": 138.63134657836645, "learning_rate": 8.062020905923344e-07, "loss": 1.2922, "step": 12560 }, { "epoch": 138.85209713024284, "learning_rate": 8.058885017421603e-07, "loss": 1.3773, "step": 12580 }, { "epoch": 139.0728476821192, "learning_rate": 8.055749128919861e-07, "loss": 1.3378, "step": 12600 }, { "epoch": 139.2935982339956, "learning_rate": 8.052613240418117e-07, "loss": 1.3817, "step": 12620 }, { "epoch": 139.51434878587196, "learning_rate": 8.049477351916376e-07, "loss": 1.3296, "step": 12640 }, { "epoch": 139.73509933774835, "learning_rate": 8.046341463414634e-07, "loss": 1.311, "step": 12660 }, { "epoch": 139.95584988962472, "learning_rate": 8.043205574912892e-07, "loss": 1.3402, "step": 12680 }, { "epoch": 140.1766004415011, "learning_rate": 8.040069686411149e-07, "loss": 1.3487, "step": 12700 }, { "epoch": 140.39735099337747, "learning_rate": 8.036933797909408e-07, "loss": 1.3242, "step": 12720 }, { "epoch": 140.61810154525386, "learning_rate": 8.033797909407665e-07, "loss": 1.3486, "step": 12740 }, { "epoch": 140.83885209713026, "learning_rate": 8.030662020905923e-07, "loss": 1.3231, "step": 12760 }, { "epoch": 141.05960264900662, "learning_rate": 8.02752613240418e-07, "loss": 1.3466, "step": 12780 }, { "epoch": 141.280353200883, "learning_rate": 8.024390243902439e-07, "loss": 1.3044, "step": 12800 }, { "epoch": 141.50110375275938, "learning_rate": 8.021254355400696e-07, "loss": 1.3127, "step": 12820 }, { "epoch": 141.72185430463577, "learning_rate": 8.018118466898954e-07, "loss": 1.3426, "step": 12840 }, { "epoch": 141.94260485651213, "learning_rate": 8.014982578397213e-07, "loss": 1.3484, "step": 12860 }, { "epoch": 142.16335540838853, "learning_rate": 8.011846689895469e-07, "loss": 1.3492, "step": 12880 }, { "epoch": 142.3841059602649, "learning_rate": 8.008710801393729e-07, "loss": 1.3381, "step": 12900 }, { "epoch": 142.60485651214128, "learning_rate": 8.005574912891985e-07, "loss": 1.3326, "step": 12920 }, { "epoch": 142.82560706401765, "learning_rate": 8.002439024390244e-07, "loss": 1.3415, "step": 12940 }, { "epoch": 143.04635761589404, "learning_rate": 7.999303135888501e-07, "loss": 1.3218, "step": 12960 }, { "epoch": 143.26710816777043, "learning_rate": 7.99616724738676e-07, "loss": 1.3619, "step": 12980 }, { "epoch": 143.4878587196468, "learning_rate": 7.993031358885017e-07, "loss": 1.2859, "step": 13000 }, { "epoch": 143.7086092715232, "learning_rate": 7.989895470383275e-07, "loss": 1.3311, "step": 13020 }, { "epoch": 143.92935982339955, "learning_rate": 7.986759581881533e-07, "loss": 1.3699, "step": 13040 }, { "epoch": 144.15011037527594, "learning_rate": 7.98362369337979e-07, "loss": 1.3038, "step": 13060 }, { "epoch": 144.3708609271523, "learning_rate": 7.980487804878048e-07, "loss": 1.3118, "step": 13080 }, { "epoch": 144.5916114790287, "learning_rate": 7.977351916376306e-07, "loss": 1.3181, "step": 13100 }, { "epoch": 144.81236203090506, "learning_rate": 7.974216027874565e-07, "loss": 1.3102, "step": 13120 }, { "epoch": 145.03311258278146, "learning_rate": 7.971080139372822e-07, "loss": 1.3476, "step": 13140 }, { "epoch": 145.25386313465785, "learning_rate": 7.96794425087108e-07, "loss": 1.3301, "step": 13160 }, { "epoch": 145.4746136865342, "learning_rate": 7.964808362369338e-07, "loss": 1.3198, "step": 13180 }, { "epoch": 145.6953642384106, "learning_rate": 7.961672473867596e-07, "loss": 1.3133, "step": 13200 }, { "epoch": 145.91611479028697, "learning_rate": 7.958536585365853e-07, "loss": 1.3621, "step": 13220 }, { "epoch": 146.13686534216336, "learning_rate": 7.955400696864111e-07, "loss": 1.2893, "step": 13240 }, { "epoch": 146.35761589403972, "learning_rate": 7.952264808362369e-07, "loss": 1.3464, "step": 13260 }, { "epoch": 146.57836644591612, "learning_rate": 7.949128919860627e-07, "loss": 1.3016, "step": 13280 }, { "epoch": 146.79911699779248, "learning_rate": 7.945993031358884e-07, "loss": 1.3302, "step": 13300 }, { "epoch": 147.01986754966887, "learning_rate": 7.942857142857143e-07, "loss": 1.3465, "step": 13320 }, { "epoch": 147.24061810154527, "learning_rate": 7.9397212543554e-07, "loss": 1.3497, "step": 13340 }, { "epoch": 147.46136865342163, "learning_rate": 7.936585365853658e-07, "loss": 1.335, "step": 13360 }, { "epoch": 147.68211920529802, "learning_rate": 7.933449477351915e-07, "loss": 1.312, "step": 13380 }, { "epoch": 147.9028697571744, "learning_rate": 7.930313588850174e-07, "loss": 1.3215, "step": 13400 }, { "epoch": 148.12362030905078, "learning_rate": 7.927177700348433e-07, "loss": 1.3434, "step": 13420 }, { "epoch": 148.34437086092714, "learning_rate": 7.924041811846689e-07, "loss": 1.3217, "step": 13440 }, { "epoch": 148.56512141280353, "learning_rate": 7.920905923344948e-07, "loss": 1.3013, "step": 13460 }, { "epoch": 148.7858719646799, "learning_rate": 7.917770034843205e-07, "loss": 1.2845, "step": 13480 }, { "epoch": 149.0066225165563, "learning_rate": 7.914634146341463e-07, "loss": 1.3128, "step": 13500 }, { "epoch": 149.22737306843268, "learning_rate": 7.91149825783972e-07, "loss": 1.3168, "step": 13520 }, { "epoch": 149.44812362030905, "learning_rate": 7.908362369337979e-07, "loss": 1.2723, "step": 13540 }, { "epoch": 149.66887417218544, "learning_rate": 7.905226480836238e-07, "loss": 1.3252, "step": 13560 }, { "epoch": 149.8896247240618, "learning_rate": 7.902090592334494e-07, "loss": 1.3171, "step": 13580 }, { "epoch": 150.1103752759382, "learning_rate": 7.898954703832752e-07, "loss": 1.2999, "step": 13600 }, { "epoch": 150.33112582781456, "learning_rate": 7.89581881533101e-07, "loss": 1.3204, "step": 13620 }, { "epoch": 150.55187637969095, "learning_rate": 7.892682926829268e-07, "loss": 1.3109, "step": 13640 }, { "epoch": 150.77262693156732, "learning_rate": 7.889547038327525e-07, "loss": 1.2844, "step": 13660 }, { "epoch": 150.9933774834437, "learning_rate": 7.886411149825784e-07, "loss": 1.3289, "step": 13680 }, { "epoch": 151.2141280353201, "learning_rate": 7.883275261324042e-07, "loss": 1.2994, "step": 13700 }, { "epoch": 151.43487858719647, "learning_rate": 7.8801393728223e-07, "loss": 1.3577, "step": 13720 }, { "epoch": 151.65562913907286, "learning_rate": 7.877003484320557e-07, "loss": 1.3082, "step": 13740 }, { "epoch": 151.87637969094922, "learning_rate": 7.873867595818815e-07, "loss": 1.327, "step": 13760 }, { "epoch": 152.0971302428256, "learning_rate": 7.870731707317073e-07, "loss": 1.2884, "step": 13780 }, { "epoch": 152.31788079470198, "learning_rate": 7.86759581881533e-07, "loss": 1.3056, "step": 13800 }, { "epoch": 152.53863134657837, "learning_rate": 7.864459930313588e-07, "loss": 1.3033, "step": 13820 }, { "epoch": 152.75938189845473, "learning_rate": 7.861324041811847e-07, "loss": 1.3124, "step": 13840 }, { "epoch": 152.98013245033113, "learning_rate": 7.858188153310104e-07, "loss": 1.3514, "step": 13860 }, { "epoch": 153.20088300220752, "learning_rate": 7.855052264808362e-07, "loss": 1.3502, "step": 13880 }, { "epoch": 153.42163355408388, "learning_rate": 7.851916376306619e-07, "loss": 1.3594, "step": 13900 }, { "epoch": 153.64238410596028, "learning_rate": 7.848780487804878e-07, "loss": 1.3061, "step": 13920 }, { "epoch": 153.86313465783664, "learning_rate": 7.845644599303136e-07, "loss": 1.2751, "step": 13940 }, { "epoch": 154.08388520971303, "learning_rate": 7.842508710801393e-07, "loss": 1.3334, "step": 13960 }, { "epoch": 154.3046357615894, "learning_rate": 7.839372822299652e-07, "loss": 1.337, "step": 13980 }, { "epoch": 154.5253863134658, "learning_rate": 7.836236933797909e-07, "loss": 1.3272, "step": 14000 }, { "epoch": 154.74613686534215, "learning_rate": 7.833101045296167e-07, "loss": 1.3093, "step": 14020 }, { "epoch": 154.96688741721854, "learning_rate": 7.829965156794425e-07, "loss": 1.2967, "step": 14040 }, { "epoch": 155.18763796909494, "learning_rate": 7.826829268292683e-07, "loss": 1.3127, "step": 14060 }, { "epoch": 155.4083885209713, "learning_rate": 7.82369337979094e-07, "loss": 1.3198, "step": 14080 }, { "epoch": 155.6291390728477, "learning_rate": 7.820557491289198e-07, "loss": 1.2706, "step": 14100 }, { "epoch": 155.84988962472406, "learning_rate": 7.817421602787456e-07, "loss": 1.3346, "step": 14120 }, { "epoch": 156.07064017660045, "learning_rate": 7.814285714285714e-07, "loss": 1.3161, "step": 14140 }, { "epoch": 156.2913907284768, "learning_rate": 7.811149825783972e-07, "loss": 1.3083, "step": 14160 }, { "epoch": 156.5121412803532, "learning_rate": 7.80801393728223e-07, "loss": 1.3371, "step": 14180 }, { "epoch": 156.73289183222957, "learning_rate": 7.804878048780488e-07, "loss": 1.3168, "step": 14200 }, { "epoch": 156.95364238410596, "learning_rate": 7.801742160278745e-07, "loss": 1.3037, "step": 14220 }, { "epoch": 157.17439293598235, "learning_rate": 7.798606271777003e-07, "loss": 1.2958, "step": 14240 }, { "epoch": 157.39514348785872, "learning_rate": 7.795470383275261e-07, "loss": 1.3027, "step": 14260 }, { "epoch": 157.6158940397351, "learning_rate": 7.792334494773519e-07, "loss": 1.2728, "step": 14280 }, { "epoch": 157.83664459161147, "learning_rate": 7.789198606271777e-07, "loss": 1.3138, "step": 14300 }, { "epoch": 158.05739514348787, "learning_rate": 7.786062717770035e-07, "loss": 1.3205, "step": 14320 }, { "epoch": 158.27814569536423, "learning_rate": 7.782926829268292e-07, "loss": 1.308, "step": 14340 }, { "epoch": 158.49889624724062, "learning_rate": 7.77979094076655e-07, "loss": 1.2679, "step": 14360 }, { "epoch": 158.719646799117, "learning_rate": 7.776655052264809e-07, "loss": 1.3194, "step": 14380 }, { "epoch": 158.94039735099338, "learning_rate": 7.773519163763066e-07, "loss": 1.3278, "step": 14400 }, { "epoch": 158.94039735099338, "eval_bleu": 52.1498, "eval_gen_len": 8.5, "eval_loss": 1.9856631755828857, "eval_runtime": 3.1482, "eval_samples_per_second": 9.529, "eval_steps_per_second": 1.906, "step": 14400 }, { "epoch": 159.16114790286974, "learning_rate": 7.770383275261323e-07, "loss": 1.2825, "step": 14420 }, { "epoch": 159.38189845474614, "learning_rate": 7.767247386759582e-07, "loss": 1.3146, "step": 14440 }, { "epoch": 159.60264900662253, "learning_rate": 7.76411149825784e-07, "loss": 1.3051, "step": 14460 }, { "epoch": 159.8233995584989, "learning_rate": 7.760975609756097e-07, "loss": 1.2975, "step": 14480 }, { "epoch": 160.04415011037528, "learning_rate": 7.757839721254355e-07, "loss": 1.3279, "step": 14500 }, { "epoch": 160.26490066225165, "learning_rate": 7.754703832752613e-07, "loss": 1.3128, "step": 14520 }, { "epoch": 160.48565121412804, "learning_rate": 7.751567944250871e-07, "loss": 1.2796, "step": 14540 }, { "epoch": 160.7064017660044, "learning_rate": 7.748432055749128e-07, "loss": 1.313, "step": 14560 }, { "epoch": 160.9271523178808, "learning_rate": 7.745296167247387e-07, "loss": 1.2979, "step": 14580 }, { "epoch": 161.14790286975716, "learning_rate": 7.742160278745645e-07, "loss": 1.3405, "step": 14600 }, { "epoch": 161.36865342163355, "learning_rate": 7.739024390243903e-07, "loss": 1.2926, "step": 14620 }, { "epoch": 161.58940397350995, "learning_rate": 7.735888501742159e-07, "loss": 1.3202, "step": 14640 }, { "epoch": 161.8101545253863, "learning_rate": 7.732752613240418e-07, "loss": 1.3303, "step": 14660 }, { "epoch": 162.0309050772627, "learning_rate": 7.729616724738675e-07, "loss": 1.271, "step": 14680 }, { "epoch": 162.25165562913907, "learning_rate": 7.726480836236933e-07, "loss": 1.2651, "step": 14700 }, { "epoch": 162.47240618101546, "learning_rate": 7.723344947735191e-07, "loss": 1.2809, "step": 14720 }, { "epoch": 162.69315673289182, "learning_rate": 7.72020905923345e-07, "loss": 1.3978, "step": 14740 }, { "epoch": 162.91390728476821, "learning_rate": 7.717073170731707e-07, "loss": 1.2972, "step": 14760 }, { "epoch": 163.13465783664458, "learning_rate": 7.713937282229964e-07, "loss": 1.3079, "step": 14780 }, { "epoch": 163.35540838852097, "learning_rate": 7.710801393728223e-07, "loss": 1.2769, "step": 14800 }, { "epoch": 163.57615894039736, "learning_rate": 7.707665505226481e-07, "loss": 1.3133, "step": 14820 }, { "epoch": 163.79690949227373, "learning_rate": 7.704529616724738e-07, "loss": 1.2618, "step": 14840 }, { "epoch": 164.01766004415012, "learning_rate": 7.701393728222996e-07, "loss": 1.2768, "step": 14860 }, { "epoch": 164.23841059602648, "learning_rate": 7.698257839721255e-07, "loss": 1.2407, "step": 14880 }, { "epoch": 164.45916114790288, "learning_rate": 7.695121951219512e-07, "loss": 1.2921, "step": 14900 }, { "epoch": 164.67991169977924, "learning_rate": 7.691986062717769e-07, "loss": 1.3135, "step": 14920 }, { "epoch": 164.90066225165563, "learning_rate": 7.688850174216027e-07, "loss": 1.2811, "step": 14940 }, { "epoch": 165.121412803532, "learning_rate": 7.685714285714287e-07, "loss": 1.2964, "step": 14960 }, { "epoch": 165.3421633554084, "learning_rate": 7.682578397212542e-07, "loss": 1.2674, "step": 14980 }, { "epoch": 165.56291390728478, "learning_rate": 7.679442508710801e-07, "loss": 1.3111, "step": 15000 }, { "epoch": 165.78366445916114, "learning_rate": 7.67630662020906e-07, "loss": 1.2993, "step": 15020 }, { "epoch": 166.00441501103754, "learning_rate": 7.673170731707317e-07, "loss": 1.268, "step": 15040 }, { "epoch": 166.2251655629139, "learning_rate": 7.670034843205574e-07, "loss": 1.3067, "step": 15060 }, { "epoch": 166.4459161147903, "learning_rate": 7.666898954703832e-07, "loss": 1.3156, "step": 15080 }, { "epoch": 166.66666666666666, "learning_rate": 7.663763066202091e-07, "loss": 1.3127, "step": 15100 }, { "epoch": 166.88741721854305, "learning_rate": 7.660627177700348e-07, "loss": 1.2802, "step": 15120 }, { "epoch": 167.1081677704194, "learning_rate": 7.657491289198606e-07, "loss": 1.252, "step": 15140 }, { "epoch": 167.3289183222958, "learning_rate": 7.654355400696864e-07, "loss": 1.3094, "step": 15160 }, { "epoch": 167.5496688741722, "learning_rate": 7.651219512195122e-07, "loss": 1.2655, "step": 15180 }, { "epoch": 167.77041942604856, "learning_rate": 7.648083623693378e-07, "loss": 1.315, "step": 15200 }, { "epoch": 167.99116997792495, "learning_rate": 7.644947735191637e-07, "loss": 1.2746, "step": 15220 }, { "epoch": 168.21192052980132, "learning_rate": 7.641811846689895e-07, "loss": 1.279, "step": 15240 }, { "epoch": 168.4326710816777, "learning_rate": 7.638675958188153e-07, "loss": 1.288, "step": 15260 }, { "epoch": 168.65342163355407, "learning_rate": 7.63554006968641e-07, "loss": 1.3345, "step": 15280 }, { "epoch": 168.87417218543047, "learning_rate": 7.632404181184669e-07, "loss": 1.2753, "step": 15300 }, { "epoch": 169.09492273730683, "learning_rate": 7.629268292682927e-07, "loss": 1.3172, "step": 15320 }, { "epoch": 169.31567328918322, "learning_rate": 7.626132404181184e-07, "loss": 1.26, "step": 15340 }, { "epoch": 169.53642384105962, "learning_rate": 7.622996515679443e-07, "loss": 1.2803, "step": 15360 }, { "epoch": 169.75717439293598, "learning_rate": 7.6198606271777e-07, "loss": 1.2895, "step": 15380 }, { "epoch": 169.97792494481237, "learning_rate": 7.616724738675958e-07, "loss": 1.3389, "step": 15400 }, { "epoch": 170.19867549668874, "learning_rate": 7.613588850174215e-07, "loss": 1.2815, "step": 15420 }, { "epoch": 170.41942604856513, "learning_rate": 7.610452961672474e-07, "loss": 1.2479, "step": 15440 }, { "epoch": 170.6401766004415, "learning_rate": 7.607317073170731e-07, "loss": 1.2462, "step": 15460 }, { "epoch": 170.86092715231788, "learning_rate": 7.604181184668989e-07, "loss": 1.2922, "step": 15480 }, { "epoch": 171.08167770419425, "learning_rate": 7.601045296167247e-07, "loss": 1.3001, "step": 15500 }, { "epoch": 171.30242825607064, "learning_rate": 7.597909407665505e-07, "loss": 1.2975, "step": 15520 }, { "epoch": 171.52317880794703, "learning_rate": 7.594773519163762e-07, "loss": 1.2716, "step": 15540 }, { "epoch": 171.7439293598234, "learning_rate": 7.591637630662021e-07, "loss": 1.2841, "step": 15560 }, { "epoch": 171.9646799116998, "learning_rate": 7.588501742160279e-07, "loss": 1.2861, "step": 15580 }, { "epoch": 172.18543046357615, "learning_rate": 7.585365853658536e-07, "loss": 1.2708, "step": 15600 }, { "epoch": 172.40618101545255, "learning_rate": 7.582229965156794e-07, "loss": 1.2853, "step": 15620 }, { "epoch": 172.6269315673289, "learning_rate": 7.579094076655052e-07, "loss": 1.2959, "step": 15640 }, { "epoch": 172.8476821192053, "learning_rate": 7.57595818815331e-07, "loss": 1.2862, "step": 15660 }, { "epoch": 173.06843267108167, "learning_rate": 7.572822299651567e-07, "loss": 1.287, "step": 15680 }, { "epoch": 173.28918322295806, "learning_rate": 7.569686411149826e-07, "loss": 1.2946, "step": 15700 }, { "epoch": 173.50993377483445, "learning_rate": 7.566550522648083e-07, "loss": 1.2862, "step": 15720 }, { "epoch": 173.73068432671081, "learning_rate": 7.563414634146341e-07, "loss": 1.2715, "step": 15740 }, { "epoch": 173.9514348785872, "learning_rate": 7.560278745644598e-07, "loss": 1.3114, "step": 15760 }, { "epoch": 174.17218543046357, "learning_rate": 7.557142857142858e-07, "loss": 1.3279, "step": 15780 }, { "epoch": 174.39293598233996, "learning_rate": 7.554006968641114e-07, "loss": 1.2848, "step": 15800 }, { "epoch": 174.61368653421633, "learning_rate": 7.550871080139372e-07, "loss": 1.3029, "step": 15820 }, { "epoch": 174.83443708609272, "learning_rate": 7.547735191637631e-07, "loss": 1.2794, "step": 15840 }, { "epoch": 175.05518763796908, "learning_rate": 7.544599303135888e-07, "loss": 1.3058, "step": 15860 }, { "epoch": 175.27593818984548, "learning_rate": 7.541463414634146e-07, "loss": 1.2509, "step": 15880 }, { "epoch": 175.49668874172184, "learning_rate": 7.538327526132404e-07, "loss": 1.2677, "step": 15900 }, { "epoch": 175.71743929359823, "learning_rate": 7.535191637630663e-07, "loss": 1.2854, "step": 15920 }, { "epoch": 175.93818984547462, "learning_rate": 7.532055749128919e-07, "loss": 1.2456, "step": 15940 }, { "epoch": 176.158940397351, "learning_rate": 7.528919860627177e-07, "loss": 1.2687, "step": 15960 }, { "epoch": 176.37969094922738, "learning_rate": 7.525783972125435e-07, "loss": 1.2569, "step": 15980 }, { "epoch": 176.60044150110375, "learning_rate": 7.522648083623694e-07, "loss": 1.2962, "step": 16000 }, { "epoch": 176.82119205298014, "learning_rate": 7.519512195121951e-07, "loss": 1.2896, "step": 16020 }, { "epoch": 177.0419426048565, "learning_rate": 7.516376306620209e-07, "loss": 1.2979, "step": 16040 }, { "epoch": 177.2626931567329, "learning_rate": 7.513240418118468e-07, "loss": 1.2901, "step": 16060 }, { "epoch": 177.48344370860926, "learning_rate": 7.510104529616724e-07, "loss": 1.2471, "step": 16080 }, { "epoch": 177.70419426048565, "learning_rate": 7.506968641114981e-07, "loss": 1.2514, "step": 16100 }, { "epoch": 177.92494481236204, "learning_rate": 7.50383275261324e-07, "loss": 1.2979, "step": 16120 }, { "epoch": 178.1456953642384, "learning_rate": 7.500696864111499e-07, "loss": 1.275, "step": 16140 }, { "epoch": 178.3664459161148, "learning_rate": 7.497560975609755e-07, "loss": 1.2554, "step": 16160 }, { "epoch": 178.58719646799116, "learning_rate": 7.494425087108014e-07, "loss": 1.2885, "step": 16180 }, { "epoch": 178.80794701986756, "learning_rate": 7.491289198606272e-07, "loss": 1.2416, "step": 16200 }, { "epoch": 179.02869757174392, "learning_rate": 7.48815331010453e-07, "loss": 1.2668, "step": 16220 }, { "epoch": 179.2494481236203, "learning_rate": 7.485017421602786e-07, "loss": 1.2995, "step": 16240 }, { "epoch": 179.47019867549668, "learning_rate": 7.481881533101045e-07, "loss": 1.2525, "step": 16260 }, { "epoch": 179.69094922737307, "learning_rate": 7.478745644599303e-07, "loss": 1.2723, "step": 16280 }, { "epoch": 179.91169977924946, "learning_rate": 7.475609756097561e-07, "loss": 1.3271, "step": 16300 }, { "epoch": 180.13245033112582, "learning_rate": 7.472473867595818e-07, "loss": 1.2475, "step": 16320 }, { "epoch": 180.35320088300222, "learning_rate": 7.469337979094077e-07, "loss": 1.2938, "step": 16340 }, { "epoch": 180.57395143487858, "learning_rate": 7.466202090592336e-07, "loss": 1.28, "step": 16360 }, { "epoch": 180.79470198675497, "learning_rate": 7.463066202090591e-07, "loss": 1.2954, "step": 16380 }, { "epoch": 181.01545253863134, "learning_rate": 7.45993031358885e-07, "loss": 1.2661, "step": 16400 }, { "epoch": 181.23620309050773, "learning_rate": 7.456794425087108e-07, "loss": 1.3225, "step": 16420 }, { "epoch": 181.4569536423841, "learning_rate": 7.453658536585366e-07, "loss": 1.2522, "step": 16440 }, { "epoch": 181.67770419426049, "learning_rate": 7.450522648083623e-07, "loss": 1.312, "step": 16460 }, { "epoch": 181.89845474613688, "learning_rate": 7.447386759581882e-07, "loss": 1.2597, "step": 16480 }, { "epoch": 182.11920529801324, "learning_rate": 7.444250871080139e-07, "loss": 1.3262, "step": 16500 }, { "epoch": 182.33995584988963, "learning_rate": 7.441114982578397e-07, "loss": 1.3519, "step": 16520 }, { "epoch": 182.560706401766, "learning_rate": 7.437979094076654e-07, "loss": 1.2241, "step": 16540 }, { "epoch": 182.7814569536424, "learning_rate": 7.434843205574913e-07, "loss": 1.3083, "step": 16560 }, { "epoch": 183.00220750551875, "learning_rate": 7.43170731707317e-07, "loss": 1.2853, "step": 16580 }, { "epoch": 183.22295805739515, "learning_rate": 7.428571428571427e-07, "loss": 1.3211, "step": 16600 }, { "epoch": 183.4437086092715, "learning_rate": 7.425435540069687e-07, "loss": 1.2444, "step": 16620 }, { "epoch": 183.6644591611479, "learning_rate": 7.422299651567944e-07, "loss": 1.2557, "step": 16640 }, { "epoch": 183.8852097130243, "learning_rate": 7.419163763066202e-07, "loss": 1.249, "step": 16660 }, { "epoch": 184.10596026490066, "learning_rate": 7.416027874564459e-07, "loss": 1.2379, "step": 16680 }, { "epoch": 184.32671081677705, "learning_rate": 7.412891986062718e-07, "loss": 1.2756, "step": 16700 }, { "epoch": 184.54746136865342, "learning_rate": 7.409756097560975e-07, "loss": 1.2827, "step": 16720 }, { "epoch": 184.7682119205298, "learning_rate": 7.406620209059234e-07, "loss": 1.2726, "step": 16740 }, { "epoch": 184.98896247240617, "learning_rate": 7.403484320557491e-07, "loss": 1.298, "step": 16760 }, { "epoch": 185.20971302428256, "learning_rate": 7.400348432055749e-07, "loss": 1.2739, "step": 16780 }, { "epoch": 185.43046357615893, "learning_rate": 7.397212543554006e-07, "loss": 1.2657, "step": 16800 }, { "epoch": 185.65121412803532, "learning_rate": 7.394076655052264e-07, "loss": 1.2605, "step": 16820 }, { "epoch": 185.8719646799117, "learning_rate": 7.390940766550522e-07, "loss": 1.2664, "step": 16840 }, { "epoch": 186.09271523178808, "learning_rate": 7.38780487804878e-07, "loss": 1.278, "step": 16860 }, { "epoch": 186.31346578366447, "learning_rate": 7.384668989547038e-07, "loss": 1.276, "step": 16880 }, { "epoch": 186.53421633554083, "learning_rate": 7.381533101045296e-07, "loss": 1.2683, "step": 16900 }, { "epoch": 186.75496688741723, "learning_rate": 7.378397212543554e-07, "loss": 1.265, "step": 16920 }, { "epoch": 186.9757174392936, "learning_rate": 7.375261324041811e-07, "loss": 1.3122, "step": 16940 }, { "epoch": 187.19646799116998, "learning_rate": 7.37212543554007e-07, "loss": 1.2669, "step": 16960 }, { "epoch": 187.41721854304635, "learning_rate": 7.368989547038327e-07, "loss": 1.2789, "step": 16980 }, { "epoch": 187.63796909492274, "learning_rate": 7.365853658536585e-07, "loss": 1.2457, "step": 17000 }, { "epoch": 187.85871964679913, "learning_rate": 7.362717770034843e-07, "loss": 1.3173, "step": 17020 }, { "epoch": 188.0794701986755, "learning_rate": 7.359581881533101e-07, "loss": 1.2793, "step": 17040 }, { "epoch": 188.3002207505519, "learning_rate": 7.356445993031358e-07, "loss": 1.2538, "step": 17060 }, { "epoch": 188.52097130242825, "learning_rate": 7.353310104529616e-07, "loss": 1.2564, "step": 17080 }, { "epoch": 188.74172185430464, "learning_rate": 7.350174216027874e-07, "loss": 1.2794, "step": 17100 }, { "epoch": 188.962472406181, "learning_rate": 7.347038327526132e-07, "loss": 1.2664, "step": 17120 }, { "epoch": 189.1832229580574, "learning_rate": 7.343902439024389e-07, "loss": 1.2489, "step": 17140 }, { "epoch": 189.40397350993376, "learning_rate": 7.340766550522648e-07, "loss": 1.29, "step": 17160 }, { "epoch": 189.62472406181016, "learning_rate": 7.337630662020907e-07, "loss": 1.2308, "step": 17180 }, { "epoch": 189.84547461368655, "learning_rate": 7.334494773519163e-07, "loss": 1.2809, "step": 17200 }, { "epoch": 190.0662251655629, "learning_rate": 7.331358885017421e-07, "loss": 1.2614, "step": 17220 }, { "epoch": 190.2869757174393, "learning_rate": 7.328222996515679e-07, "loss": 1.3223, "step": 17240 }, { "epoch": 190.50772626931567, "learning_rate": 7.325087108013937e-07, "loss": 1.3043, "step": 17260 }, { "epoch": 190.72847682119206, "learning_rate": 7.321951219512194e-07, "loss": 1.2612, "step": 17280 }, { "epoch": 190.94922737306842, "learning_rate": 7.318815331010453e-07, "loss": 1.2209, "step": 17300 }, { "epoch": 191.16997792494482, "learning_rate": 7.315679442508711e-07, "loss": 1.2478, "step": 17320 }, { "epoch": 191.39072847682118, "learning_rate": 7.312543554006968e-07, "loss": 1.3277, "step": 17340 }, { "epoch": 191.61147902869757, "learning_rate": 7.309407665505225e-07, "loss": 1.2715, "step": 17360 }, { "epoch": 191.83222958057394, "learning_rate": 7.306271777003485e-07, "loss": 1.2433, "step": 17380 }, { "epoch": 192.05298013245033, "learning_rate": 7.303135888501742e-07, "loss": 1.2975, "step": 17400 }, { "epoch": 192.27373068432672, "learning_rate": 7.299999999999998e-07, "loss": 1.2325, "step": 17420 }, { "epoch": 192.4944812362031, "learning_rate": 7.296864111498258e-07, "loss": 1.2685, "step": 17440 }, { "epoch": 192.71523178807948, "learning_rate": 7.293728222996516e-07, "loss": 1.285, "step": 17460 }, { "epoch": 192.93598233995584, "learning_rate": 7.290592334494773e-07, "loss": 1.2741, "step": 17480 }, { "epoch": 193.15673289183223, "learning_rate": 7.28745644599303e-07, "loss": 1.275, "step": 17500 }, { "epoch": 193.3774834437086, "learning_rate": 7.284320557491289e-07, "loss": 1.2641, "step": 17520 }, { "epoch": 193.598233995585, "learning_rate": 7.281184668989547e-07, "loss": 1.3078, "step": 17540 }, { "epoch": 193.81898454746135, "learning_rate": 7.278048780487804e-07, "loss": 1.2516, "step": 17560 }, { "epoch": 194.03973509933775, "learning_rate": 7.274912891986062e-07, "loss": 1.2708, "step": 17580 }, { "epoch": 194.26048565121414, "learning_rate": 7.271777003484321e-07, "loss": 1.265, "step": 17600 }, { "epoch": 194.4812362030905, "learning_rate": 7.268641114982578e-07, "loss": 1.2597, "step": 17620 }, { "epoch": 194.7019867549669, "learning_rate": 7.265505226480835e-07, "loss": 1.2685, "step": 17640 }, { "epoch": 194.92273730684326, "learning_rate": 7.262369337979094e-07, "loss": 1.2869, "step": 17660 }, { "epoch": 195.14348785871965, "learning_rate": 7.259233449477352e-07, "loss": 1.2597, "step": 17680 }, { "epoch": 195.36423841059602, "learning_rate": 7.25609756097561e-07, "loss": 1.2579, "step": 17700 }, { "epoch": 195.5849889624724, "learning_rate": 7.252961672473867e-07, "loss": 1.2641, "step": 17720 }, { "epoch": 195.80573951434877, "learning_rate": 7.249825783972126e-07, "loss": 1.2782, "step": 17740 }, { "epoch": 196.02649006622516, "learning_rate": 7.246689895470382e-07, "loss": 1.2705, "step": 17760 }, { "epoch": 196.24724061810156, "learning_rate": 7.24355400696864e-07, "loss": 1.2878, "step": 17780 }, { "epoch": 196.46799116997792, "learning_rate": 7.240418118466899e-07, "loss": 1.3158, "step": 17800 }, { "epoch": 196.6887417218543, "learning_rate": 7.237282229965157e-07, "loss": 1.2371, "step": 17820 }, { "epoch": 196.90949227373068, "learning_rate": 7.234146341463414e-07, "loss": 1.2553, "step": 17840 }, { "epoch": 197.13024282560707, "learning_rate": 7.231010452961672e-07, "loss": 1.2847, "step": 17860 }, { "epoch": 197.35099337748343, "learning_rate": 7.22787456445993e-07, "loss": 1.2516, "step": 17880 }, { "epoch": 197.57174392935983, "learning_rate": 7.224738675958188e-07, "loss": 1.2802, "step": 17900 }, { "epoch": 197.7924944812362, "learning_rate": 7.221602787456446e-07, "loss": 1.2839, "step": 17920 }, { "epoch": 198.01324503311258, "learning_rate": 7.218466898954704e-07, "loss": 1.2259, "step": 17940 }, { "epoch": 198.23399558498897, "learning_rate": 7.215331010452961e-07, "loss": 1.2203, "step": 17960 }, { "epoch": 198.45474613686534, "learning_rate": 7.212195121951219e-07, "loss": 1.2837, "step": 17980 }, { "epoch": 198.67549668874173, "learning_rate": 7.209059233449475e-07, "loss": 1.2906, "step": 18000 }, { "epoch": 198.67549668874173, "eval_bleu": 48.3044, "eval_gen_len": 8.5333, "eval_loss": 2.004776954650879, "eval_runtime": 3.1873, "eval_samples_per_second": 9.412, "eval_steps_per_second": 1.882, "step": 18000 }, { "epoch": 198.8962472406181, "learning_rate": 7.205923344947735e-07, "loss": 1.2383, "step": 18020 }, { "epoch": 199.1169977924945, "learning_rate": 7.202787456445993e-07, "loss": 1.2724, "step": 18040 }, { "epoch": 199.33774834437085, "learning_rate": 7.199651567944251e-07, "loss": 1.2588, "step": 18060 }, { "epoch": 199.55849889624724, "learning_rate": 7.196515679442509e-07, "loss": 1.2528, "step": 18080 }, { "epoch": 199.7792494481236, "learning_rate": 7.193379790940766e-07, "loss": 1.247, "step": 18100 }, { "epoch": 200.0, "learning_rate": 7.190243902439024e-07, "loss": 1.2697, "step": 18120 }, { "epoch": 200.2207505518764, "learning_rate": 7.187108013937282e-07, "loss": 1.2868, "step": 18140 }, { "epoch": 200.44150110375276, "learning_rate": 7.18397212543554e-07, "loss": 1.291, "step": 18160 }, { "epoch": 200.66225165562915, "learning_rate": 7.180836236933797e-07, "loss": 1.2906, "step": 18180 }, { "epoch": 200.8830022075055, "learning_rate": 7.177700348432056e-07, "loss": 1.2543, "step": 18200 }, { "epoch": 201.1037527593819, "learning_rate": 7.174564459930313e-07, "loss": 1.2414, "step": 18220 }, { "epoch": 201.32450331125827, "learning_rate": 7.171428571428571e-07, "loss": 1.2393, "step": 18240 }, { "epoch": 201.54525386313466, "learning_rate": 7.168292682926829e-07, "loss": 1.2781, "step": 18260 }, { "epoch": 201.76600441501103, "learning_rate": 7.165156794425087e-07, "loss": 1.2625, "step": 18280 }, { "epoch": 201.98675496688742, "learning_rate": 7.162020905923345e-07, "loss": 1.2435, "step": 18300 }, { "epoch": 202.2075055187638, "learning_rate": 7.158885017421602e-07, "loss": 1.2254, "step": 18320 }, { "epoch": 202.42825607064017, "learning_rate": 7.15574912891986e-07, "loss": 1.2802, "step": 18340 }, { "epoch": 202.64900662251657, "learning_rate": 7.152613240418119e-07, "loss": 1.259, "step": 18360 }, { "epoch": 202.86975717439293, "learning_rate": 7.149477351916376e-07, "loss": 1.2928, "step": 18380 }, { "epoch": 203.09050772626932, "learning_rate": 7.146341463414633e-07, "loss": 1.2767, "step": 18400 }, { "epoch": 203.3112582781457, "learning_rate": 7.143205574912892e-07, "loss": 1.2271, "step": 18420 }, { "epoch": 203.53200883002208, "learning_rate": 7.140069686411149e-07, "loss": 1.241, "step": 18440 }, { "epoch": 203.75275938189844, "learning_rate": 7.136933797909407e-07, "loss": 1.261, "step": 18460 }, { "epoch": 203.97350993377484, "learning_rate": 7.133797909407665e-07, "loss": 1.2636, "step": 18480 }, { "epoch": 204.19426048565123, "learning_rate": 7.130662020905924e-07, "loss": 1.2872, "step": 18500 }, { "epoch": 204.4150110375276, "learning_rate": 7.12752613240418e-07, "loss": 1.2455, "step": 18520 }, { "epoch": 204.63576158940398, "learning_rate": 7.124390243902438e-07, "loss": 1.2424, "step": 18540 }, { "epoch": 204.85651214128035, "learning_rate": 7.121254355400697e-07, "loss": 1.2389, "step": 18560 }, { "epoch": 205.07726269315674, "learning_rate": 7.118118466898955e-07, "loss": 1.2512, "step": 18580 }, { "epoch": 205.2980132450331, "learning_rate": 7.114982578397212e-07, "loss": 1.2628, "step": 18600 }, { "epoch": 205.5187637969095, "learning_rate": 7.11184668989547e-07, "loss": 1.2864, "step": 18620 }, { "epoch": 205.73951434878586, "learning_rate": 7.108710801393729e-07, "loss": 1.2508, "step": 18640 }, { "epoch": 205.96026490066225, "learning_rate": 7.105574912891985e-07, "loss": 1.2848, "step": 18660 }, { "epoch": 206.18101545253865, "learning_rate": 7.102439024390243e-07, "loss": 1.2312, "step": 18680 }, { "epoch": 206.401766004415, "learning_rate": 7.099303135888501e-07, "loss": 1.262, "step": 18700 }, { "epoch": 206.6225165562914, "learning_rate": 7.09616724738676e-07, "loss": 1.2394, "step": 18720 }, { "epoch": 206.84326710816777, "learning_rate": 7.093031358885016e-07, "loss": 1.2877, "step": 18740 }, { "epoch": 207.06401766004416, "learning_rate": 7.089895470383275e-07, "loss": 1.3101, "step": 18760 }, { "epoch": 207.28476821192052, "learning_rate": 7.086759581881534e-07, "loss": 1.2197, "step": 18780 }, { "epoch": 207.5055187637969, "learning_rate": 7.083623693379791e-07, "loss": 1.275, "step": 18800 }, { "epoch": 207.72626931567328, "learning_rate": 7.080487804878046e-07, "loss": 1.2956, "step": 18820 }, { "epoch": 207.94701986754967, "learning_rate": 7.077351916376306e-07, "loss": 1.2694, "step": 18840 }, { "epoch": 208.16777041942606, "learning_rate": 7.074216027874565e-07, "loss": 1.2505, "step": 18860 }, { "epoch": 208.38852097130243, "learning_rate": 7.071080139372822e-07, "loss": 1.2509, "step": 18880 }, { "epoch": 208.60927152317882, "learning_rate": 7.06794425087108e-07, "loss": 1.2856, "step": 18900 }, { "epoch": 208.83002207505518, "learning_rate": 7.064808362369337e-07, "loss": 1.2689, "step": 18920 }, { "epoch": 209.05077262693158, "learning_rate": 7.061672473867596e-07, "loss": 1.2656, "step": 18940 }, { "epoch": 209.27152317880794, "learning_rate": 7.058536585365852e-07, "loss": 1.2616, "step": 18960 }, { "epoch": 209.49227373068433, "learning_rate": 7.055400696864111e-07, "loss": 1.2555, "step": 18980 }, { "epoch": 209.7130242825607, "learning_rate": 7.052264808362369e-07, "loss": 1.2557, "step": 19000 }, { "epoch": 209.9337748344371, "learning_rate": 7.049128919860627e-07, "loss": 1.2544, "step": 19020 }, { "epoch": 210.15452538631345, "learning_rate": 7.045993031358884e-07, "loss": 1.2361, "step": 19040 }, { "epoch": 210.37527593818984, "learning_rate": 7.042857142857143e-07, "loss": 1.2912, "step": 19060 }, { "epoch": 210.59602649006624, "learning_rate": 7.039721254355401e-07, "loss": 1.2768, "step": 19080 }, { "epoch": 210.8167770419426, "learning_rate": 7.036585365853658e-07, "loss": 1.2375, "step": 19100 }, { "epoch": 211.037527593819, "learning_rate": 7.033449477351916e-07, "loss": 1.2215, "step": 19120 }, { "epoch": 211.25827814569536, "learning_rate": 7.030313588850174e-07, "loss": 1.2571, "step": 19140 }, { "epoch": 211.47902869757175, "learning_rate": 7.027177700348431e-07, "loss": 1.2415, "step": 19160 }, { "epoch": 211.6997792494481, "learning_rate": 7.024041811846689e-07, "loss": 1.2412, "step": 19180 }, { "epoch": 211.9205298013245, "learning_rate": 7.020905923344948e-07, "loss": 1.2865, "step": 19200 }, { "epoch": 212.14128035320087, "learning_rate": 7.017770034843205e-07, "loss": 1.2568, "step": 19220 }, { "epoch": 212.36203090507726, "learning_rate": 7.014634146341463e-07, "loss": 1.234, "step": 19240 }, { "epoch": 212.58278145695365, "learning_rate": 7.01149825783972e-07, "loss": 1.2683, "step": 19260 }, { "epoch": 212.80353200883002, "learning_rate": 7.008362369337979e-07, "loss": 1.2617, "step": 19280 }, { "epoch": 213.0242825607064, "learning_rate": 7.005226480836236e-07, "loss": 1.2274, "step": 19300 }, { "epoch": 213.24503311258277, "learning_rate": 7.002090592334495e-07, "loss": 1.2525, "step": 19320 }, { "epoch": 213.46578366445917, "learning_rate": 6.998954703832753e-07, "loss": 1.2286, "step": 19340 }, { "epoch": 213.68653421633553, "learning_rate": 6.99581881533101e-07, "loss": 1.2269, "step": 19360 }, { "epoch": 213.90728476821192, "learning_rate": 6.992682926829268e-07, "loss": 1.2655, "step": 19380 }, { "epoch": 214.1280353200883, "learning_rate": 6.989547038327524e-07, "loss": 1.2273, "step": 19400 }, { "epoch": 214.34878587196468, "learning_rate": 6.986411149825784e-07, "loss": 1.2376, "step": 19420 }, { "epoch": 214.56953642384107, "learning_rate": 6.983275261324041e-07, "loss": 1.2571, "step": 19440 }, { "epoch": 214.79028697571744, "learning_rate": 6.9801393728223e-07, "loss": 1.2735, "step": 19460 }, { "epoch": 215.01103752759383, "learning_rate": 6.977003484320557e-07, "loss": 1.2574, "step": 19480 }, { "epoch": 215.2317880794702, "learning_rate": 6.973867595818815e-07, "loss": 1.2462, "step": 19500 }, { "epoch": 215.45253863134658, "learning_rate": 6.970731707317072e-07, "loss": 1.2383, "step": 19520 }, { "epoch": 215.67328918322295, "learning_rate": 6.967595818815331e-07, "loss": 1.2504, "step": 19540 }, { "epoch": 215.89403973509934, "learning_rate": 6.964459930313588e-07, "loss": 1.2697, "step": 19560 }, { "epoch": 216.1147902869757, "learning_rate": 6.961324041811846e-07, "loss": 1.2213, "step": 19580 }, { "epoch": 216.3355408388521, "learning_rate": 6.958188153310105e-07, "loss": 1.2401, "step": 19600 }, { "epoch": 216.5562913907285, "learning_rate": 6.955052264808362e-07, "loss": 1.247, "step": 19620 }, { "epoch": 216.77704194260485, "learning_rate": 6.95191637630662e-07, "loss": 1.2869, "step": 19640 }, { "epoch": 216.99779249448125, "learning_rate": 6.948780487804877e-07, "loss": 1.3116, "step": 19660 }, { "epoch": 217.2185430463576, "learning_rate": 6.945644599303137e-07, "loss": 1.2763, "step": 19680 }, { "epoch": 217.439293598234, "learning_rate": 6.942508710801393e-07, "loss": 1.2473, "step": 19700 }, { "epoch": 217.66004415011037, "learning_rate": 6.939372822299651e-07, "loss": 1.2393, "step": 19720 }, { "epoch": 217.88079470198676, "learning_rate": 6.936236933797909e-07, "loss": 1.2357, "step": 19740 }, { "epoch": 218.10154525386312, "learning_rate": 6.933101045296168e-07, "loss": 1.2495, "step": 19760 }, { "epoch": 218.32229580573951, "learning_rate": 6.929965156794424e-07, "loss": 1.2524, "step": 19780 }, { "epoch": 218.5430463576159, "learning_rate": 6.926829268292682e-07, "loss": 1.2583, "step": 19800 }, { "epoch": 218.76379690949227, "learning_rate": 6.923693379790941e-07, "loss": 1.2818, "step": 19820 }, { "epoch": 218.98454746136866, "learning_rate": 6.920557491289198e-07, "loss": 1.2079, "step": 19840 }, { "epoch": 219.20529801324503, "learning_rate": 6.917421602787455e-07, "loss": 1.2712, "step": 19860 }, { "epoch": 219.42604856512142, "learning_rate": 6.914285714285714e-07, "loss": 1.2168, "step": 19880 }, { "epoch": 219.64679911699778, "learning_rate": 6.911149825783973e-07, "loss": 1.2321, "step": 19900 }, { "epoch": 219.86754966887418, "learning_rate": 6.908013937282229e-07, "loss": 1.2472, "step": 19920 }, { "epoch": 220.08830022075054, "learning_rate": 6.904878048780488e-07, "loss": 1.2392, "step": 19940 }, { "epoch": 220.30905077262693, "learning_rate": 6.901742160278746e-07, "loss": 1.2442, "step": 19960 }, { "epoch": 220.52980132450332, "learning_rate": 6.898606271777004e-07, "loss": 1.2349, "step": 19980 }, { "epoch": 220.7505518763797, "learning_rate": 6.89547038327526e-07, "loss": 1.2566, "step": 20000 }, { "epoch": 220.97130242825608, "learning_rate": 6.892334494773519e-07, "loss": 1.2482, "step": 20020 }, { "epoch": 221.19205298013244, "learning_rate": 6.889198606271777e-07, "loss": 1.2711, "step": 20040 }, { "epoch": 221.41280353200884, "learning_rate": 6.886062717770034e-07, "loss": 1.23, "step": 20060 }, { "epoch": 221.6335540838852, "learning_rate": 6.882926829268293e-07, "loss": 1.2464, "step": 20080 }, { "epoch": 221.8543046357616, "learning_rate": 6.879790940766551e-07, "loss": 1.2735, "step": 20100 }, { "epoch": 222.07505518763796, "learning_rate": 6.876655052264808e-07, "loss": 1.2507, "step": 20120 }, { "epoch": 222.29580573951435, "learning_rate": 6.873519163763065e-07, "loss": 1.2515, "step": 20140 }, { "epoch": 222.51655629139074, "learning_rate": 6.870383275261324e-07, "loss": 1.2369, "step": 20160 }, { "epoch": 222.7373068432671, "learning_rate": 6.867247386759582e-07, "loss": 1.2528, "step": 20180 }, { "epoch": 222.9580573951435, "learning_rate": 6.86411149825784e-07, "loss": 1.2579, "step": 20200 }, { "epoch": 223.17880794701986, "learning_rate": 6.860975609756097e-07, "loss": 1.2472, "step": 20220 }, { "epoch": 223.39955849889625, "learning_rate": 6.857839721254356e-07, "loss": 1.2572, "step": 20240 }, { "epoch": 223.62030905077262, "learning_rate": 6.854703832752613e-07, "loss": 1.2051, "step": 20260 }, { "epoch": 223.841059602649, "learning_rate": 6.851567944250871e-07, "loss": 1.2518, "step": 20280 }, { "epoch": 224.06181015452538, "learning_rate": 6.848432055749128e-07, "loss": 1.3164, "step": 20300 }, { "epoch": 224.28256070640177, "learning_rate": 6.845296167247387e-07, "loss": 1.2392, "step": 20320 }, { "epoch": 224.50331125827816, "learning_rate": 6.842160278745644e-07, "loss": 1.2513, "step": 20340 }, { "epoch": 224.72406181015452, "learning_rate": 6.839024390243902e-07, "loss": 1.2702, "step": 20360 }, { "epoch": 224.94481236203092, "learning_rate": 6.83588850174216e-07, "loss": 1.2733, "step": 20380 }, { "epoch": 225.16556291390728, "learning_rate": 6.832752613240418e-07, "loss": 1.2207, "step": 20400 }, { "epoch": 225.38631346578367, "learning_rate": 6.829616724738676e-07, "loss": 1.2608, "step": 20420 }, { "epoch": 225.60706401766004, "learning_rate": 6.826480836236933e-07, "loss": 1.2361, "step": 20440 }, { "epoch": 225.82781456953643, "learning_rate": 6.823344947735192e-07, "loss": 1.2218, "step": 20460 }, { "epoch": 226.0485651214128, "learning_rate": 6.820209059233449e-07, "loss": 1.2405, "step": 20480 }, { "epoch": 226.26931567328919, "learning_rate": 6.817073170731707e-07, "loss": 1.2829, "step": 20500 }, { "epoch": 226.49006622516555, "learning_rate": 6.813937282229965e-07, "loss": 1.2286, "step": 20520 }, { "epoch": 226.71081677704194, "learning_rate": 6.810801393728223e-07, "loss": 1.2402, "step": 20540 }, { "epoch": 226.93156732891833, "learning_rate": 6.80766550522648e-07, "loss": 1.2534, "step": 20560 }, { "epoch": 227.1523178807947, "learning_rate": 6.804529616724738e-07, "loss": 1.2742, "step": 20580 }, { "epoch": 227.3730684326711, "learning_rate": 6.801393728222996e-07, "loss": 1.2341, "step": 20600 }, { "epoch": 227.59381898454745, "learning_rate": 6.798257839721254e-07, "loss": 1.2615, "step": 20620 }, { "epoch": 227.81456953642385, "learning_rate": 6.795121951219512e-07, "loss": 1.239, "step": 20640 }, { "epoch": 228.0353200883002, "learning_rate": 6.791986062717771e-07, "loss": 1.2177, "step": 20660 }, { "epoch": 228.2560706401766, "learning_rate": 6.788850174216028e-07, "loss": 1.1791, "step": 20680 }, { "epoch": 228.47682119205297, "learning_rate": 6.785714285714285e-07, "loss": 1.2365, "step": 20700 }, { "epoch": 228.69757174392936, "learning_rate": 6.782578397212544e-07, "loss": 1.3025, "step": 20720 }, { "epoch": 228.91832229580575, "learning_rate": 6.779442508710801e-07, "loss": 1.2399, "step": 20740 }, { "epoch": 229.13907284768212, "learning_rate": 6.776306620209059e-07, "loss": 1.2693, "step": 20760 }, { "epoch": 229.3598233995585, "learning_rate": 6.773170731707317e-07, "loss": 1.2371, "step": 20780 }, { "epoch": 229.58057395143487, "learning_rate": 6.770034843205575e-07, "loss": 1.2414, "step": 20800 }, { "epoch": 229.80132450331126, "learning_rate": 6.766898954703832e-07, "loss": 1.2504, "step": 20820 }, { "epoch": 230.02207505518763, "learning_rate": 6.76376306620209e-07, "loss": 1.251, "step": 20840 }, { "epoch": 230.24282560706402, "learning_rate": 6.760627177700348e-07, "loss": 1.278, "step": 20860 }, { "epoch": 230.46357615894038, "learning_rate": 6.757491289198606e-07, "loss": 1.2421, "step": 20880 }, { "epoch": 230.68432671081678, "learning_rate": 6.754355400696864e-07, "loss": 1.2473, "step": 20900 }, { "epoch": 230.90507726269317, "learning_rate": 6.751219512195122e-07, "loss": 1.2127, "step": 20920 }, { "epoch": 231.12582781456953, "learning_rate": 6.74808362369338e-07, "loss": 1.2465, "step": 20940 }, { "epoch": 231.34657836644593, "learning_rate": 6.744947735191637e-07, "loss": 1.238, "step": 20960 }, { "epoch": 231.5673289183223, "learning_rate": 6.741811846689895e-07, "loss": 1.2301, "step": 20980 }, { "epoch": 231.78807947019868, "learning_rate": 6.738675958188153e-07, "loss": 1.2398, "step": 21000 }, { "epoch": 232.00883002207505, "learning_rate": 6.735540069686411e-07, "loss": 1.2685, "step": 21020 }, { "epoch": 232.22958057395144, "learning_rate": 6.732404181184668e-07, "loss": 1.2084, "step": 21040 }, { "epoch": 232.4503311258278, "learning_rate": 6.729268292682927e-07, "loss": 1.2448, "step": 21060 }, { "epoch": 232.6710816777042, "learning_rate": 6.726132404181185e-07, "loss": 1.2279, "step": 21080 }, { "epoch": 232.8918322295806, "learning_rate": 6.722996515679442e-07, "loss": 1.2432, "step": 21100 }, { "epoch": 233.11258278145695, "learning_rate": 6.719860627177699e-07, "loss": 1.2139, "step": 21120 }, { "epoch": 233.33333333333334, "learning_rate": 6.716724738675958e-07, "loss": 1.2181, "step": 21140 }, { "epoch": 233.5540838852097, "learning_rate": 6.713588850174216e-07, "loss": 1.2451, "step": 21160 }, { "epoch": 233.7748344370861, "learning_rate": 6.710452961672473e-07, "loss": 1.2809, "step": 21180 }, { "epoch": 233.99558498896246, "learning_rate": 6.707317073170731e-07, "loss": 1.2033, "step": 21200 }, { "epoch": 234.21633554083886, "learning_rate": 6.70418118466899e-07, "loss": 1.2253, "step": 21220 }, { "epoch": 234.43708609271522, "learning_rate": 6.701045296167247e-07, "loss": 1.2227, "step": 21240 }, { "epoch": 234.6578366445916, "learning_rate": 6.697909407665504e-07, "loss": 1.2314, "step": 21260 }, { "epoch": 234.878587196468, "learning_rate": 6.694773519163763e-07, "loss": 1.2252, "step": 21280 }, { "epoch": 235.09933774834437, "learning_rate": 6.691637630662021e-07, "loss": 1.2433, "step": 21300 }, { "epoch": 235.32008830022076, "learning_rate": 6.688501742160278e-07, "loss": 1.2805, "step": 21320 }, { "epoch": 235.54083885209712, "learning_rate": 6.685365853658536e-07, "loss": 1.2463, "step": 21340 }, { "epoch": 235.76158940397352, "learning_rate": 6.682229965156795e-07, "loss": 1.2023, "step": 21360 }, { "epoch": 235.98233995584988, "learning_rate": 6.679094076655052e-07, "loss": 1.2361, "step": 21380 }, { "epoch": 236.20309050772627, "learning_rate": 6.675958188153309e-07, "loss": 1.2028, "step": 21400 }, { "epoch": 236.42384105960264, "learning_rate": 6.672822299651567e-07, "loss": 1.2244, "step": 21420 }, { "epoch": 236.64459161147903, "learning_rate": 6.669686411149826e-07, "loss": 1.2769, "step": 21440 }, { "epoch": 236.86534216335542, "learning_rate": 6.666550522648082e-07, "loss": 1.2867, "step": 21460 }, { "epoch": 237.08609271523179, "learning_rate": 6.663414634146342e-07, "loss": 1.2298, "step": 21480 }, { "epoch": 237.30684326710818, "learning_rate": 6.6602787456446e-07, "loss": 1.2307, "step": 21500 }, { "epoch": 237.52759381898454, "learning_rate": 6.657142857142857e-07, "loss": 1.2522, "step": 21520 }, { "epoch": 237.74834437086093, "learning_rate": 6.654006968641114e-07, "loss": 1.2607, "step": 21540 }, { "epoch": 237.9690949227373, "learning_rate": 6.650871080139373e-07, "loss": 1.1926, "step": 21560 }, { "epoch": 238.1898454746137, "learning_rate": 6.647735191637631e-07, "loss": 1.209, "step": 21580 }, { "epoch": 238.41059602649005, "learning_rate": 6.644599303135888e-07, "loss": 1.2272, "step": 21600 }, { "epoch": 238.41059602649005, "eval_bleu": 47.3367, "eval_gen_len": 8.6, "eval_loss": 2.001694440841675, "eval_runtime": 3.1432, "eval_samples_per_second": 9.544, "eval_steps_per_second": 1.909, "step": 21600 }, { "epoch": 238.63134657836645, "learning_rate": 6.641463414634146e-07, "loss": 1.2312, "step": 21620 }, { "epoch": 238.85209713024284, "learning_rate": 6.638327526132404e-07, "loss": 1.295, "step": 21640 }, { "epoch": 239.0728476821192, "learning_rate": 6.635191637630662e-07, "loss": 1.2304, "step": 21660 }, { "epoch": 239.2935982339956, "learning_rate": 6.632055749128919e-07, "loss": 1.2744, "step": 21680 }, { "epoch": 239.51434878587196, "learning_rate": 6.628919860627178e-07, "loss": 1.2519, "step": 21700 }, { "epoch": 239.73509933774835, "learning_rate": 6.625783972125435e-07, "loss": 1.2586, "step": 21720 }, { "epoch": 239.95584988962472, "learning_rate": 6.622648083623693e-07, "loss": 1.2388, "step": 21740 }, { "epoch": 240.1766004415011, "learning_rate": 6.61951219512195e-07, "loss": 1.2829, "step": 21760 }, { "epoch": 240.39735099337747, "learning_rate": 6.616376306620209e-07, "loss": 1.1969, "step": 21780 }, { "epoch": 240.61810154525386, "learning_rate": 6.613240418118467e-07, "loss": 1.2442, "step": 21800 }, { "epoch": 240.83885209713026, "learning_rate": 6.610104529616724e-07, "loss": 1.2221, "step": 21820 }, { "epoch": 241.05960264900662, "learning_rate": 6.606968641114983e-07, "loss": 1.1862, "step": 21840 }, { "epoch": 241.280353200883, "learning_rate": 6.60383275261324e-07, "loss": 1.2341, "step": 21860 }, { "epoch": 241.50110375275938, "learning_rate": 6.600696864111498e-07, "loss": 1.1884, "step": 21880 }, { "epoch": 241.72185430463577, "learning_rate": 6.597560975609756e-07, "loss": 1.2691, "step": 21900 }, { "epoch": 241.94260485651213, "learning_rate": 6.594425087108014e-07, "loss": 1.2316, "step": 21920 }, { "epoch": 242.16335540838853, "learning_rate": 6.591289198606271e-07, "loss": 1.224, "step": 21940 }, { "epoch": 242.3841059602649, "learning_rate": 6.588153310104529e-07, "loss": 1.256, "step": 21960 }, { "epoch": 242.60485651214128, "learning_rate": 6.585017421602787e-07, "loss": 1.2487, "step": 21980 }, { "epoch": 242.82560706401765, "learning_rate": 6.581881533101045e-07, "loss": 1.2238, "step": 22000 }, { "epoch": 243.04635761589404, "learning_rate": 6.578745644599302e-07, "loss": 1.2165, "step": 22020 }, { "epoch": 243.26710816777043, "learning_rate": 6.575609756097561e-07, "loss": 1.2546, "step": 22040 }, { "epoch": 243.4878587196468, "learning_rate": 6.57247386759582e-07, "loss": 1.2281, "step": 22060 }, { "epoch": 243.7086092715232, "learning_rate": 6.569337979094076e-07, "loss": 1.2472, "step": 22080 }, { "epoch": 243.92935982339955, "learning_rate": 6.566202090592335e-07, "loss": 1.2454, "step": 22100 }, { "epoch": 244.15011037527594, "learning_rate": 6.563066202090592e-07, "loss": 1.1844, "step": 22120 }, { "epoch": 244.3708609271523, "learning_rate": 6.55993031358885e-07, "loss": 1.2077, "step": 22140 }, { "epoch": 244.5916114790287, "learning_rate": 6.556794425087107e-07, "loss": 1.266, "step": 22160 }, { "epoch": 244.81236203090506, "learning_rate": 6.553658536585366e-07, "loss": 1.2259, "step": 22180 }, { "epoch": 245.03311258278146, "learning_rate": 6.550522648083623e-07, "loss": 1.3201, "step": 22200 }, { "epoch": 245.25386313465785, "learning_rate": 6.547386759581881e-07, "loss": 1.2751, "step": 22220 }, { "epoch": 245.4746136865342, "learning_rate": 6.544250871080139e-07, "loss": 1.2238, "step": 22240 }, { "epoch": 245.6953642384106, "learning_rate": 6.541114982578398e-07, "loss": 1.256, "step": 22260 }, { "epoch": 245.91611479028697, "learning_rate": 6.537979094076654e-07, "loss": 1.2226, "step": 22280 }, { "epoch": 246.13686534216336, "learning_rate": 6.534843205574913e-07, "loss": 1.2107, "step": 22300 }, { "epoch": 246.35761589403972, "learning_rate": 6.531707317073171e-07, "loss": 1.2401, "step": 22320 }, { "epoch": 246.57836644591612, "learning_rate": 6.528571428571429e-07, "loss": 1.2591, "step": 22340 }, { "epoch": 246.79911699779248, "learning_rate": 6.525435540069686e-07, "loss": 1.2154, "step": 22360 }, { "epoch": 247.01986754966887, "learning_rate": 6.522299651567944e-07, "loss": 1.2455, "step": 22380 }, { "epoch": 247.24061810154527, "learning_rate": 6.519163763066204e-07, "loss": 1.2695, "step": 22400 }, { "epoch": 247.46136865342163, "learning_rate": 6.516027874564459e-07, "loss": 1.2037, "step": 22420 }, { "epoch": 247.68211920529802, "learning_rate": 6.512891986062717e-07, "loss": 1.2058, "step": 22440 }, { "epoch": 247.9028697571744, "learning_rate": 6.509756097560975e-07, "loss": 1.2298, "step": 22460 }, { "epoch": 248.12362030905078, "learning_rate": 6.506620209059234e-07, "loss": 1.2185, "step": 22480 }, { "epoch": 248.34437086092714, "learning_rate": 6.50348432055749e-07, "loss": 1.2271, "step": 22500 }, { "epoch": 248.56512141280353, "learning_rate": 6.500348432055749e-07, "loss": 1.2277, "step": 22520 }, { "epoch": 248.7858719646799, "learning_rate": 6.497212543554008e-07, "loss": 1.2763, "step": 22540 }, { "epoch": 249.0066225165563, "learning_rate": 6.494076655052265e-07, "loss": 1.2158, "step": 22560 }, { "epoch": 249.22737306843268, "learning_rate": 6.490940766550521e-07, "loss": 1.205, "step": 22580 }, { "epoch": 249.44812362030905, "learning_rate": 6.48780487804878e-07, "loss": 1.2283, "step": 22600 }, { "epoch": 249.66887417218544, "learning_rate": 6.484668989547039e-07, "loss": 1.2114, "step": 22620 }, { "epoch": 249.8896247240618, "learning_rate": 6.481533101045295e-07, "loss": 1.2301, "step": 22640 }, { "epoch": 250.1103752759382, "learning_rate": 6.478397212543554e-07, "loss": 1.2607, "step": 22660 }, { "epoch": 250.33112582781456, "learning_rate": 6.475261324041812e-07, "loss": 1.2013, "step": 22680 }, { "epoch": 250.55187637969095, "learning_rate": 6.47212543554007e-07, "loss": 1.2215, "step": 22700 }, { "epoch": 250.77262693156732, "learning_rate": 6.468989547038326e-07, "loss": 1.2495, "step": 22720 }, { "epoch": 250.9933774834437, "learning_rate": 6.465853658536585e-07, "loss": 1.2359, "step": 22740 }, { "epoch": 251.2141280353201, "learning_rate": 6.462717770034843e-07, "loss": 1.2297, "step": 22760 }, { "epoch": 251.43487858719647, "learning_rate": 6.459581881533101e-07, "loss": 1.2304, "step": 22780 }, { "epoch": 251.65562913907286, "learning_rate": 6.456445993031358e-07, "loss": 1.2113, "step": 22800 }, { "epoch": 251.87637969094922, "learning_rate": 6.453310104529617e-07, "loss": 1.2395, "step": 22820 }, { "epoch": 252.0971302428256, "learning_rate": 6.450174216027875e-07, "loss": 1.2217, "step": 22840 }, { "epoch": 252.31788079470198, "learning_rate": 6.447038327526132e-07, "loss": 1.2416, "step": 22860 }, { "epoch": 252.53863134657837, "learning_rate": 6.443902439024391e-07, "loss": 1.2418, "step": 22880 }, { "epoch": 252.75938189845473, "learning_rate": 6.440766550522648e-07, "loss": 1.2268, "step": 22900 }, { "epoch": 252.98013245033113, "learning_rate": 6.437630662020906e-07, "loss": 1.2491, "step": 22920 }, { "epoch": 253.20088300220752, "learning_rate": 6.434494773519163e-07, "loss": 1.2296, "step": 22940 }, { "epoch": 253.42163355408388, "learning_rate": 6.431358885017422e-07, "loss": 1.2027, "step": 22960 }, { "epoch": 253.64238410596028, "learning_rate": 6.428222996515679e-07, "loss": 1.2225, "step": 22980 }, { "epoch": 253.86313465783664, "learning_rate": 6.425087108013937e-07, "loss": 1.2255, "step": 23000 }, { "epoch": 254.08388520971303, "learning_rate": 6.421951219512194e-07, "loss": 1.1998, "step": 23020 }, { "epoch": 254.3046357615894, "learning_rate": 6.418815331010453e-07, "loss": 1.1963, "step": 23040 }, { "epoch": 254.5253863134658, "learning_rate": 6.41567944250871e-07, "loss": 1.2718, "step": 23060 }, { "epoch": 254.74613686534215, "learning_rate": 6.412543554006968e-07, "loss": 1.2729, "step": 23080 }, { "epoch": 254.96688741721854, "learning_rate": 6.409407665505227e-07, "loss": 1.2323, "step": 23100 }, { "epoch": 255.18763796909494, "learning_rate": 6.406271777003484e-07, "loss": 1.1998, "step": 23120 }, { "epoch": 255.4083885209713, "learning_rate": 6.403135888501742e-07, "loss": 1.2034, "step": 23140 }, { "epoch": 255.6291390728477, "learning_rate": 6.399999999999999e-07, "loss": 1.255, "step": 23160 }, { "epoch": 255.84988962472406, "learning_rate": 6.396864111498258e-07, "loss": 1.2207, "step": 23180 }, { "epoch": 256.0706401766004, "learning_rate": 6.393728222996515e-07, "loss": 1.202, "step": 23200 }, { "epoch": 256.2913907284768, "learning_rate": 6.390592334494773e-07, "loss": 1.2386, "step": 23220 }, { "epoch": 256.5121412803532, "learning_rate": 6.387456445993031e-07, "loss": 1.2212, "step": 23240 }, { "epoch": 256.7328918322296, "learning_rate": 6.384320557491289e-07, "loss": 1.2496, "step": 23260 }, { "epoch": 256.95364238410593, "learning_rate": 6.381184668989546e-07, "loss": 1.2011, "step": 23280 }, { "epoch": 257.1743929359823, "learning_rate": 6.378048780487805e-07, "loss": 1.2317, "step": 23300 }, { "epoch": 257.3951434878587, "learning_rate": 6.374912891986062e-07, "loss": 1.2106, "step": 23320 }, { "epoch": 257.6158940397351, "learning_rate": 6.37177700348432e-07, "loss": 1.2412, "step": 23340 }, { "epoch": 257.8366445916115, "learning_rate": 6.368641114982579e-07, "loss": 1.2499, "step": 23360 }, { "epoch": 258.05739514348784, "learning_rate": 6.365505226480836e-07, "loss": 1.2144, "step": 23380 }, { "epoch": 258.27814569536423, "learning_rate": 6.362369337979094e-07, "loss": 1.2171, "step": 23400 }, { "epoch": 258.4988962472406, "learning_rate": 6.359233449477351e-07, "loss": 1.2023, "step": 23420 }, { "epoch": 258.719646799117, "learning_rate": 6.35609756097561e-07, "loss": 1.2195, "step": 23440 }, { "epoch": 258.94039735099335, "learning_rate": 6.352961672473866e-07, "loss": 1.2402, "step": 23460 }, { "epoch": 259.16114790286974, "learning_rate": 6.349825783972125e-07, "loss": 1.2099, "step": 23480 }, { "epoch": 259.38189845474614, "learning_rate": 6.346689895470383e-07, "loss": 1.2101, "step": 23500 }, { "epoch": 259.6026490066225, "learning_rate": 6.343554006968641e-07, "loss": 1.2063, "step": 23520 }, { "epoch": 259.8233995584989, "learning_rate": 6.340418118466898e-07, "loss": 1.2622, "step": 23540 }, { "epoch": 260.04415011037526, "learning_rate": 6.337282229965156e-07, "loss": 1.2606, "step": 23560 }, { "epoch": 260.26490066225165, "learning_rate": 6.334146341463415e-07, "loss": 1.2232, "step": 23580 }, { "epoch": 260.48565121412804, "learning_rate": 6.331010452961672e-07, "loss": 1.247, "step": 23600 }, { "epoch": 260.70640176600443, "learning_rate": 6.327874564459929e-07, "loss": 1.1941, "step": 23620 }, { "epoch": 260.92715231788077, "learning_rate": 6.324738675958188e-07, "loss": 1.1894, "step": 23640 }, { "epoch": 261.14790286975716, "learning_rate": 6.321602787456447e-07, "loss": 1.2888, "step": 23660 }, { "epoch": 261.36865342163355, "learning_rate": 6.318466898954703e-07, "loss": 1.1849, "step": 23680 }, { "epoch": 261.58940397350995, "learning_rate": 6.31533101045296e-07, "loss": 1.1739, "step": 23700 }, { "epoch": 261.81015452538634, "learning_rate": 6.31219512195122e-07, "loss": 1.2167, "step": 23720 }, { "epoch": 262.0309050772627, "learning_rate": 6.309059233449478e-07, "loss": 1.2133, "step": 23740 }, { "epoch": 262.25165562913907, "learning_rate": 6.305923344947734e-07, "loss": 1.2285, "step": 23760 }, { "epoch": 262.47240618101546, "learning_rate": 6.302787456445993e-07, "loss": 1.2533, "step": 23780 }, { "epoch": 262.69315673289185, "learning_rate": 6.29965156794425e-07, "loss": 1.1534, "step": 23800 }, { "epoch": 262.9139072847682, "learning_rate": 6.296515679442508e-07, "loss": 1.2009, "step": 23820 }, { "epoch": 263.1346578366446, "learning_rate": 6.293379790940765e-07, "loss": 1.1822, "step": 23840 }, { "epoch": 263.35540838852097, "learning_rate": 6.290243902439025e-07, "loss": 1.2322, "step": 23860 }, { "epoch": 263.57615894039736, "learning_rate": 6.287108013937282e-07, "loss": 1.2095, "step": 23880 }, { "epoch": 263.79690949227376, "learning_rate": 6.283972125435539e-07, "loss": 1.2768, "step": 23900 }, { "epoch": 264.0176600441501, "learning_rate": 6.280836236933798e-07, "loss": 1.1598, "step": 23920 }, { "epoch": 264.2384105960265, "learning_rate": 6.277700348432056e-07, "loss": 1.2398, "step": 23940 }, { "epoch": 264.4591611479029, "learning_rate": 6.274564459930314e-07, "loss": 1.2455, "step": 23960 }, { "epoch": 264.67991169977927, "learning_rate": 6.27142857142857e-07, "loss": 1.24, "step": 23980 }, { "epoch": 264.9006622516556, "learning_rate": 6.26829268292683e-07, "loss": 1.2203, "step": 24000 }, { "epoch": 265.121412803532, "learning_rate": 6.265156794425087e-07, "loss": 1.2501, "step": 24020 }, { "epoch": 265.3421633554084, "learning_rate": 6.262020905923344e-07, "loss": 1.2449, "step": 24040 }, { "epoch": 265.5629139072848, "learning_rate": 6.258885017421602e-07, "loss": 1.1949, "step": 24060 }, { "epoch": 265.7836644591612, "learning_rate": 6.255749128919861e-07, "loss": 1.2228, "step": 24080 }, { "epoch": 266.0044150110375, "learning_rate": 6.252613240418118e-07, "loss": 1.2237, "step": 24100 }, { "epoch": 266.2251655629139, "learning_rate": 6.249477351916376e-07, "loss": 1.2144, "step": 24120 }, { "epoch": 266.4459161147903, "learning_rate": 6.246341463414634e-07, "loss": 1.2376, "step": 24140 }, { "epoch": 266.6666666666667, "learning_rate": 6.243205574912892e-07, "loss": 1.2158, "step": 24160 }, { "epoch": 266.887417218543, "learning_rate": 6.24006968641115e-07, "loss": 1.2253, "step": 24180 }, { "epoch": 267.1081677704194, "learning_rate": 6.236933797909407e-07, "loss": 1.229, "step": 24200 }, { "epoch": 267.3289183222958, "learning_rate": 6.233797909407666e-07, "loss": 1.209, "step": 24220 }, { "epoch": 267.5496688741722, "learning_rate": 6.230662020905923e-07, "loss": 1.2141, "step": 24240 }, { "epoch": 267.7704194260486, "learning_rate": 6.227526132404181e-07, "loss": 1.1775, "step": 24260 }, { "epoch": 267.9911699779249, "learning_rate": 6.224390243902439e-07, "loss": 1.212, "step": 24280 }, { "epoch": 268.2119205298013, "learning_rate": 6.221254355400697e-07, "loss": 1.1793, "step": 24300 }, { "epoch": 268.4326710816777, "learning_rate": 6.218118466898954e-07, "loss": 1.2344, "step": 24320 }, { "epoch": 268.6534216335541, "learning_rate": 6.214982578397212e-07, "loss": 1.225, "step": 24340 }, { "epoch": 268.87417218543044, "learning_rate": 6.21184668989547e-07, "loss": 1.2174, "step": 24360 }, { "epoch": 269.09492273730683, "learning_rate": 6.208710801393728e-07, "loss": 1.2349, "step": 24380 }, { "epoch": 269.3156732891832, "learning_rate": 6.205574912891986e-07, "loss": 1.2278, "step": 24400 }, { "epoch": 269.5364238410596, "learning_rate": 6.202439024390244e-07, "loss": 1.2107, "step": 24420 }, { "epoch": 269.757174392936, "learning_rate": 6.199303135888501e-07, "loss": 1.2683, "step": 24440 }, { "epoch": 269.97792494481234, "learning_rate": 6.196167247386759e-07, "loss": 1.1847, "step": 24460 }, { "epoch": 270.19867549668874, "learning_rate": 6.193031358885017e-07, "loss": 1.2206, "step": 24480 }, { "epoch": 270.41942604856513, "learning_rate": 6.189895470383275e-07, "loss": 1.201, "step": 24500 }, { "epoch": 270.6401766004415, "learning_rate": 6.186759581881533e-07, "loss": 1.245, "step": 24520 }, { "epoch": 270.86092715231786, "learning_rate": 6.183623693379791e-07, "loss": 1.1934, "step": 24540 }, { "epoch": 271.08167770419425, "learning_rate": 6.180487804878049e-07, "loss": 1.2011, "step": 24560 }, { "epoch": 271.30242825607064, "learning_rate": 6.177351916376306e-07, "loss": 1.2161, "step": 24580 }, { "epoch": 271.52317880794703, "learning_rate": 6.174216027874564e-07, "loss": 1.2299, "step": 24600 }, { "epoch": 271.7439293598234, "learning_rate": 6.171080139372821e-07, "loss": 1.1873, "step": 24620 }, { "epoch": 271.96467991169976, "learning_rate": 6.16794425087108e-07, "loss": 1.2246, "step": 24640 }, { "epoch": 272.18543046357615, "learning_rate": 6.164808362369337e-07, "loss": 1.251, "step": 24660 }, { "epoch": 272.40618101545255, "learning_rate": 6.161672473867596e-07, "loss": 1.2046, "step": 24680 }, { "epoch": 272.62693156732894, "learning_rate": 6.158536585365853e-07, "loss": 1.2391, "step": 24700 }, { "epoch": 272.8476821192053, "learning_rate": 6.155400696864111e-07, "loss": 1.2197, "step": 24720 }, { "epoch": 273.06843267108167, "learning_rate": 6.152264808362369e-07, "loss": 1.1918, "step": 24740 }, { "epoch": 273.28918322295806, "learning_rate": 6.149128919860627e-07, "loss": 1.2409, "step": 24760 }, { "epoch": 273.50993377483445, "learning_rate": 6.145993031358885e-07, "loss": 1.2091, "step": 24780 }, { "epoch": 273.73068432671084, "learning_rate": 6.142857142857142e-07, "loss": 1.1942, "step": 24800 }, { "epoch": 273.9514348785872, "learning_rate": 6.139721254355401e-07, "loss": 1.2063, "step": 24820 }, { "epoch": 274.17218543046357, "learning_rate": 6.136585365853659e-07, "loss": 1.215, "step": 24840 }, { "epoch": 274.39293598233996, "learning_rate": 6.133449477351915e-07, "loss": 1.2067, "step": 24860 }, { "epoch": 274.61368653421636, "learning_rate": 6.130313588850173e-07, "loss": 1.2142, "step": 24880 }, { "epoch": 274.8344370860927, "learning_rate": 6.127177700348432e-07, "loss": 1.2379, "step": 24900 }, { "epoch": 275.0551876379691, "learning_rate": 6.124041811846689e-07, "loss": 1.156, "step": 24920 }, { "epoch": 275.2759381898455, "learning_rate": 6.120905923344947e-07, "loss": 1.2272, "step": 24940 }, { "epoch": 275.49668874172187, "learning_rate": 6.117770034843205e-07, "loss": 1.1731, "step": 24960 }, { "epoch": 275.71743929359826, "learning_rate": 6.114634146341464e-07, "loss": 1.2582, "step": 24980 }, { "epoch": 275.9381898454746, "learning_rate": 6.11149825783972e-07, "loss": 1.2158, "step": 25000 }, { "epoch": 276.158940397351, "learning_rate": 6.108362369337978e-07, "loss": 1.2254, "step": 25020 }, { "epoch": 276.3796909492274, "learning_rate": 6.105226480836237e-07, "loss": 1.2137, "step": 25040 }, { "epoch": 276.6004415011038, "learning_rate": 6.102090592334495e-07, "loss": 1.171, "step": 25060 }, { "epoch": 276.8211920529801, "learning_rate": 6.098954703832752e-07, "loss": 1.2115, "step": 25080 }, { "epoch": 277.0419426048565, "learning_rate": 6.09581881533101e-07, "loss": 1.2124, "step": 25100 }, { "epoch": 277.2626931567329, "learning_rate": 6.092682926829269e-07, "loss": 1.2222, "step": 25120 }, { "epoch": 277.4834437086093, "learning_rate": 6.089547038327526e-07, "loss": 1.2309, "step": 25140 }, { "epoch": 277.7041942604857, "learning_rate": 6.086411149825783e-07, "loss": 1.2246, "step": 25160 }, { "epoch": 277.924944812362, "learning_rate": 6.083275261324041e-07, "loss": 1.1875, "step": 25180 }, { "epoch": 278.1456953642384, "learning_rate": 6.080139372822299e-07, "loss": 1.2339, "step": 25200 }, { "epoch": 278.1456953642384, "eval_bleu": 46.5655, "eval_gen_len": 8.5667, "eval_loss": 1.970467448234558, "eval_runtime": 3.2684, "eval_samples_per_second": 9.179, "eval_steps_per_second": 1.836, "step": 25200 }, { "epoch": 278.3664459161148, "learning_rate": 6.077003484320556e-07, "loss": 1.1705, "step": 25220 }, { "epoch": 278.5871964679912, "learning_rate": 6.073867595818815e-07, "loss": 1.2458, "step": 25240 }, { "epoch": 278.8079470198675, "learning_rate": 6.070731707317074e-07, "loss": 1.225, "step": 25260 }, { "epoch": 279.0286975717439, "learning_rate": 6.067595818815331e-07, "loss": 1.2393, "step": 25280 }, { "epoch": 279.2494481236203, "learning_rate": 6.064459930313588e-07, "loss": 1.1811, "step": 25300 }, { "epoch": 279.4701986754967, "learning_rate": 6.061324041811846e-07, "loss": 1.1949, "step": 25320 }, { "epoch": 279.6909492273731, "learning_rate": 6.058188153310105e-07, "loss": 1.2285, "step": 25340 }, { "epoch": 279.91169977924943, "learning_rate": 6.055052264808362e-07, "loss": 1.2115, "step": 25360 }, { "epoch": 280.1324503311258, "learning_rate": 6.05191637630662e-07, "loss": 1.2489, "step": 25380 }, { "epoch": 280.3532008830022, "learning_rate": 6.048780487804878e-07, "loss": 1.2012, "step": 25400 }, { "epoch": 280.5739514348786, "learning_rate": 6.045644599303136e-07, "loss": 1.1805, "step": 25420 }, { "epoch": 280.79470198675494, "learning_rate": 6.042508710801392e-07, "loss": 1.2035, "step": 25440 }, { "epoch": 281.01545253863134, "learning_rate": 6.039372822299651e-07, "loss": 1.2033, "step": 25460 }, { "epoch": 281.23620309050773, "learning_rate": 6.036236933797909e-07, "loss": 1.1734, "step": 25480 }, { "epoch": 281.4569536423841, "learning_rate": 6.033101045296167e-07, "loss": 1.2108, "step": 25500 }, { "epoch": 281.6777041942605, "learning_rate": 6.029965156794424e-07, "loss": 1.1953, "step": 25520 }, { "epoch": 281.89845474613685, "learning_rate": 6.026829268292683e-07, "loss": 1.2069, "step": 25540 }, { "epoch": 282.11920529801324, "learning_rate": 6.023693379790941e-07, "loss": 1.2031, "step": 25560 }, { "epoch": 282.33995584988963, "learning_rate": 6.020557491289198e-07, "loss": 1.2203, "step": 25580 }, { "epoch": 282.560706401766, "learning_rate": 6.017421602787457e-07, "loss": 1.232, "step": 25600 }, { "epoch": 282.78145695364236, "learning_rate": 6.014285714285714e-07, "loss": 1.1678, "step": 25620 }, { "epoch": 283.00220750551875, "learning_rate": 6.011149825783972e-07, "loss": 1.223, "step": 25640 }, { "epoch": 283.22295805739515, "learning_rate": 6.008013937282229e-07, "loss": 1.1983, "step": 25660 }, { "epoch": 283.44370860927154, "learning_rate": 6.004878048780488e-07, "loss": 1.2263, "step": 25680 }, { "epoch": 283.6644591611479, "learning_rate": 6.001742160278745e-07, "loss": 1.1873, "step": 25700 }, { "epoch": 283.88520971302427, "learning_rate": 5.998606271777003e-07, "loss": 1.1983, "step": 25720 }, { "epoch": 284.10596026490066, "learning_rate": 5.995470383275261e-07, "loss": 1.2363, "step": 25740 }, { "epoch": 284.32671081677705, "learning_rate": 5.992334494773519e-07, "loss": 1.2258, "step": 25760 }, { "epoch": 284.54746136865344, "learning_rate": 5.989198606271776e-07, "loss": 1.2215, "step": 25780 }, { "epoch": 284.7682119205298, "learning_rate": 5.986062717770035e-07, "loss": 1.1816, "step": 25800 }, { "epoch": 284.9889624724062, "learning_rate": 5.982926829268293e-07, "loss": 1.2286, "step": 25820 }, { "epoch": 285.20971302428256, "learning_rate": 5.97979094076655e-07, "loss": 1.2205, "step": 25840 }, { "epoch": 285.43046357615896, "learning_rate": 5.976655052264808e-07, "loss": 1.2523, "step": 25860 }, { "epoch": 285.6512141280353, "learning_rate": 5.973519163763066e-07, "loss": 1.195, "step": 25880 }, { "epoch": 285.8719646799117, "learning_rate": 5.970383275261324e-07, "loss": 1.2195, "step": 25900 }, { "epoch": 286.0927152317881, "learning_rate": 5.967247386759581e-07, "loss": 1.211, "step": 25920 }, { "epoch": 286.31346578366447, "learning_rate": 5.96411149825784e-07, "loss": 1.2293, "step": 25940 }, { "epoch": 286.53421633554086, "learning_rate": 5.960975609756097e-07, "loss": 1.2403, "step": 25960 }, { "epoch": 286.7549668874172, "learning_rate": 5.957839721254355e-07, "loss": 1.1658, "step": 25980 }, { "epoch": 286.9757174392936, "learning_rate": 5.954703832752612e-07, "loss": 1.1995, "step": 26000 }, { "epoch": 287.19646799117, "learning_rate": 5.951567944250872e-07, "loss": 1.2052, "step": 26020 }, { "epoch": 287.4172185430464, "learning_rate": 5.948432055749128e-07, "loss": 1.2132, "step": 26040 }, { "epoch": 287.6379690949227, "learning_rate": 5.945296167247386e-07, "loss": 1.2423, "step": 26060 }, { "epoch": 287.8587196467991, "learning_rate": 5.942160278745645e-07, "loss": 1.2359, "step": 26080 }, { "epoch": 288.0794701986755, "learning_rate": 5.939024390243902e-07, "loss": 1.1798, "step": 26100 }, { "epoch": 288.3002207505519, "learning_rate": 5.93588850174216e-07, "loss": 1.1791, "step": 26120 }, { "epoch": 288.5209713024283, "learning_rate": 5.932752613240418e-07, "loss": 1.1888, "step": 26140 }, { "epoch": 288.7417218543046, "learning_rate": 5.929616724738677e-07, "loss": 1.1584, "step": 26160 }, { "epoch": 288.962472406181, "learning_rate": 5.926480836236933e-07, "loss": 1.2086, "step": 26180 }, { "epoch": 289.1832229580574, "learning_rate": 5.923344947735191e-07, "loss": 1.2015, "step": 26200 }, { "epoch": 289.4039735099338, "learning_rate": 5.920209059233449e-07, "loss": 1.1905, "step": 26220 }, { "epoch": 289.6247240618101, "learning_rate": 5.917073170731708e-07, "loss": 1.181, "step": 26240 }, { "epoch": 289.8454746136865, "learning_rate": 5.913937282229964e-07, "loss": 1.1961, "step": 26260 }, { "epoch": 290.0662251655629, "learning_rate": 5.910801393728223e-07, "loss": 1.2372, "step": 26280 }, { "epoch": 290.2869757174393, "learning_rate": 5.907665505226481e-07, "loss": 1.2319, "step": 26300 }, { "epoch": 290.5077262693157, "learning_rate": 5.904529616724739e-07, "loss": 1.1856, "step": 26320 }, { "epoch": 290.72847682119203, "learning_rate": 5.901393728222995e-07, "loss": 1.2184, "step": 26340 }, { "epoch": 290.9492273730684, "learning_rate": 5.898257839721254e-07, "loss": 1.2286, "step": 26360 }, { "epoch": 291.1699779249448, "learning_rate": 5.895121951219513e-07, "loss": 1.2022, "step": 26380 }, { "epoch": 291.3907284768212, "learning_rate": 5.891986062717769e-07, "loss": 1.1817, "step": 26400 }, { "epoch": 291.61147902869754, "learning_rate": 5.888850174216028e-07, "loss": 1.1717, "step": 26420 }, { "epoch": 291.83222958057394, "learning_rate": 5.885714285714286e-07, "loss": 1.2329, "step": 26440 }, { "epoch": 292.05298013245033, "learning_rate": 5.882578397212544e-07, "loss": 1.2172, "step": 26460 }, { "epoch": 292.2737306843267, "learning_rate": 5.8794425087108e-07, "loss": 1.1863, "step": 26480 }, { "epoch": 292.4944812362031, "learning_rate": 5.876306620209059e-07, "loss": 1.1673, "step": 26500 }, { "epoch": 292.71523178807945, "learning_rate": 5.873170731707317e-07, "loss": 1.207, "step": 26520 }, { "epoch": 292.93598233995584, "learning_rate": 5.870034843205575e-07, "loss": 1.2263, "step": 26540 }, { "epoch": 293.15673289183223, "learning_rate": 5.866898954703832e-07, "loss": 1.1734, "step": 26560 }, { "epoch": 293.3774834437086, "learning_rate": 5.863763066202091e-07, "loss": 1.1811, "step": 26580 }, { "epoch": 293.59823399558496, "learning_rate": 5.860627177700349e-07, "loss": 1.1802, "step": 26600 }, { "epoch": 293.81898454746135, "learning_rate": 5.857491289198605e-07, "loss": 1.1793, "step": 26620 }, { "epoch": 294.03973509933775, "learning_rate": 5.854355400696864e-07, "loss": 1.183, "step": 26640 }, { "epoch": 294.26048565121414, "learning_rate": 5.851219512195122e-07, "loss": 1.2072, "step": 26660 }, { "epoch": 294.48123620309053, "learning_rate": 5.84808362369338e-07, "loss": 1.1929, "step": 26680 }, { "epoch": 294.70198675496687, "learning_rate": 5.844947735191637e-07, "loss": 1.1652, "step": 26700 }, { "epoch": 294.92273730684326, "learning_rate": 5.841811846689896e-07, "loss": 1.1923, "step": 26720 }, { "epoch": 295.14348785871965, "learning_rate": 5.838675958188153e-07, "loss": 1.1919, "step": 26740 }, { "epoch": 295.36423841059604, "learning_rate": 5.835540069686411e-07, "loss": 1.1995, "step": 26760 }, { "epoch": 295.5849889624724, "learning_rate": 5.832404181184668e-07, "loss": 1.2217, "step": 26780 }, { "epoch": 295.8057395143488, "learning_rate": 5.829268292682927e-07, "loss": 1.2089, "step": 26800 }, { "epoch": 296.02649006622516, "learning_rate": 5.826132404181184e-07, "loss": 1.2007, "step": 26820 }, { "epoch": 296.24724061810156, "learning_rate": 5.822996515679442e-07, "loss": 1.1839, "step": 26840 }, { "epoch": 296.46799116997795, "learning_rate": 5.8198606271777e-07, "loss": 1.1946, "step": 26860 }, { "epoch": 296.6887417218543, "learning_rate": 5.816724738675958e-07, "loss": 1.2003, "step": 26880 }, { "epoch": 296.9094922737307, "learning_rate": 5.813588850174216e-07, "loss": 1.2044, "step": 26900 }, { "epoch": 297.13024282560707, "learning_rate": 5.810452961672473e-07, "loss": 1.2163, "step": 26920 }, { "epoch": 297.35099337748346, "learning_rate": 5.807317073170733e-07, "loss": 1.1956, "step": 26940 }, { "epoch": 297.5717439293598, "learning_rate": 5.804181184668989e-07, "loss": 1.2227, "step": 26960 }, { "epoch": 297.7924944812362, "learning_rate": 5.801045296167248e-07, "loss": 1.216, "step": 26980 }, { "epoch": 298.0132450331126, "learning_rate": 5.797909407665505e-07, "loss": 1.2001, "step": 27000 }, { "epoch": 298.233995584989, "learning_rate": 5.794773519163763e-07, "loss": 1.1819, "step": 27020 }, { "epoch": 298.45474613686537, "learning_rate": 5.79163763066202e-07, "loss": 1.172, "step": 27040 }, { "epoch": 298.6754966887417, "learning_rate": 5.788501742160278e-07, "loss": 1.1822, "step": 27060 }, { "epoch": 298.8962472406181, "learning_rate": 5.785365853658536e-07, "loss": 1.2188, "step": 27080 }, { "epoch": 299.1169977924945, "learning_rate": 5.782229965156794e-07, "loss": 1.2073, "step": 27100 }, { "epoch": 299.3377483443709, "learning_rate": 5.779094076655052e-07, "loss": 1.2013, "step": 27120 }, { "epoch": 299.5584988962472, "learning_rate": 5.77595818815331e-07, "loss": 1.2272, "step": 27140 }, { "epoch": 299.7792494481236, "learning_rate": 5.772822299651568e-07, "loss": 1.1641, "step": 27160 }, { "epoch": 300.0, "learning_rate": 5.769686411149826e-07, "loss": 1.1675, "step": 27180 }, { "epoch": 300.2207505518764, "learning_rate": 5.766550522648083e-07, "loss": 1.2051, "step": 27200 }, { "epoch": 300.4415011037528, "learning_rate": 5.763414634146341e-07, "loss": 1.1936, "step": 27220 }, { "epoch": 300.6622516556291, "learning_rate": 5.760278745644599e-07, "loss": 1.2252, "step": 27240 }, { "epoch": 300.8830022075055, "learning_rate": 5.757142857142857e-07, "loss": 1.1681, "step": 27260 }, { "epoch": 301.1037527593819, "learning_rate": 5.754006968641115e-07, "loss": 1.2009, "step": 27280 }, { "epoch": 301.3245033112583, "learning_rate": 5.750871080139372e-07, "loss": 1.2106, "step": 27300 }, { "epoch": 301.54525386313463, "learning_rate": 5.74773519163763e-07, "loss": 1.1752, "step": 27320 }, { "epoch": 301.766004415011, "learning_rate": 5.744599303135888e-07, "loss": 1.1914, "step": 27340 }, { "epoch": 301.9867549668874, "learning_rate": 5.741463414634147e-07, "loss": 1.2181, "step": 27360 }, { "epoch": 302.2075055187638, "learning_rate": 5.738327526132403e-07, "loss": 1.1858, "step": 27380 }, { "epoch": 302.4282560706402, "learning_rate": 5.735191637630662e-07, "loss": 1.2108, "step": 27400 }, { "epoch": 302.64900662251654, "learning_rate": 5.732055749128921e-07, "loss": 1.2267, "step": 27420 }, { "epoch": 302.86975717439293, "learning_rate": 5.728919860627177e-07, "loss": 1.1994, "step": 27440 }, { "epoch": 303.0905077262693, "learning_rate": 5.725783972125435e-07, "loss": 1.2005, "step": 27460 }, { "epoch": 303.3112582781457, "learning_rate": 5.722648083623693e-07, "loss": 1.1943, "step": 27480 }, { "epoch": 303.53200883002205, "learning_rate": 5.719512195121951e-07, "loss": 1.1911, "step": 27500 }, { "epoch": 303.75275938189844, "learning_rate": 5.716376306620208e-07, "loss": 1.1845, "step": 27520 }, { "epoch": 303.97350993377484, "learning_rate": 5.713240418118467e-07, "loss": 1.1822, "step": 27540 }, { "epoch": 304.1942604856512, "learning_rate": 5.710104529616724e-07, "loss": 1.2154, "step": 27560 }, { "epoch": 304.4150110375276, "learning_rate": 5.706968641114983e-07, "loss": 1.1526, "step": 27580 }, { "epoch": 304.63576158940396, "learning_rate": 5.703832752613239e-07, "loss": 1.1992, "step": 27600 }, { "epoch": 304.85651214128035, "learning_rate": 5.700696864111499e-07, "loss": 1.1996, "step": 27620 }, { "epoch": 305.07726269315674, "learning_rate": 5.697560975609756e-07, "loss": 1.2453, "step": 27640 }, { "epoch": 305.29801324503313, "learning_rate": 5.694425087108013e-07, "loss": 1.1897, "step": 27660 }, { "epoch": 305.51876379690947, "learning_rate": 5.691289198606271e-07, "loss": 1.1674, "step": 27680 }, { "epoch": 305.73951434878586, "learning_rate": 5.68815331010453e-07, "loss": 1.1978, "step": 27700 }, { "epoch": 305.96026490066225, "learning_rate": 5.685017421602787e-07, "loss": 1.2132, "step": 27720 }, { "epoch": 306.18101545253865, "learning_rate": 5.681881533101045e-07, "loss": 1.1438, "step": 27740 }, { "epoch": 306.40176600441504, "learning_rate": 5.678745644599304e-07, "loss": 1.2112, "step": 27760 }, { "epoch": 306.6225165562914, "learning_rate": 5.67560975609756e-07, "loss": 1.2042, "step": 27780 }, { "epoch": 306.84326710816777, "learning_rate": 5.672473867595819e-07, "loss": 1.1631, "step": 27800 }, { "epoch": 307.06401766004416, "learning_rate": 5.669337979094076e-07, "loss": 1.2175, "step": 27820 }, { "epoch": 307.28476821192055, "learning_rate": 5.666202090592335e-07, "loss": 1.1607, "step": 27840 }, { "epoch": 307.5055187637969, "learning_rate": 5.663066202090592e-07, "loss": 1.1965, "step": 27860 }, { "epoch": 307.7262693156733, "learning_rate": 5.659930313588849e-07, "loss": 1.2026, "step": 27880 }, { "epoch": 307.94701986754967, "learning_rate": 5.656794425087108e-07, "loss": 1.2316, "step": 27900 }, { "epoch": 308.16777041942606, "learning_rate": 5.653658536585366e-07, "loss": 1.163, "step": 27920 }, { "epoch": 308.38852097130246, "learning_rate": 5.650522648083622e-07, "loss": 1.2085, "step": 27940 }, { "epoch": 308.6092715231788, "learning_rate": 5.647386759581881e-07, "loss": 1.2146, "step": 27960 }, { "epoch": 308.8300220750552, "learning_rate": 5.64425087108014e-07, "loss": 1.2172, "step": 27980 }, { "epoch": 309.0507726269316, "learning_rate": 5.641114982578397e-07, "loss": 1.1308, "step": 28000 }, { "epoch": 309.27152317880797, "learning_rate": 5.637979094076655e-07, "loss": 1.1976, "step": 28020 }, { "epoch": 309.4922737306843, "learning_rate": 5.634843205574913e-07, "loss": 1.2266, "step": 28040 }, { "epoch": 309.7130242825607, "learning_rate": 5.631707317073171e-07, "loss": 1.2106, "step": 28060 }, { "epoch": 309.9337748344371, "learning_rate": 5.628571428571428e-07, "loss": 1.1707, "step": 28080 }, { "epoch": 310.1545253863135, "learning_rate": 5.625435540069688e-07, "loss": 1.149, "step": 28100 }, { "epoch": 310.3752759381899, "learning_rate": 5.622299651567944e-07, "loss": 1.1601, "step": 28120 }, { "epoch": 310.5960264900662, "learning_rate": 5.619163763066202e-07, "loss": 1.1933, "step": 28140 }, { "epoch": 310.8167770419426, "learning_rate": 5.61602787456446e-07, "loss": 1.2091, "step": 28160 }, { "epoch": 311.037527593819, "learning_rate": 5.612891986062718e-07, "loss": 1.2123, "step": 28180 }, { "epoch": 311.2582781456954, "learning_rate": 5.609756097560975e-07, "loss": 1.1963, "step": 28200 }, { "epoch": 311.4790286975717, "learning_rate": 5.606620209059233e-07, "loss": 1.1684, "step": 28220 }, { "epoch": 311.6997792494481, "learning_rate": 5.60348432055749e-07, "loss": 1.2162, "step": 28240 }, { "epoch": 311.9205298013245, "learning_rate": 5.600348432055749e-07, "loss": 1.1923, "step": 28260 }, { "epoch": 312.1412803532009, "learning_rate": 5.597212543554007e-07, "loss": 1.2096, "step": 28280 }, { "epoch": 312.3620309050773, "learning_rate": 5.594076655052264e-07, "loss": 1.1683, "step": 28300 }, { "epoch": 312.5827814569536, "learning_rate": 5.590940766550523e-07, "loss": 1.1773, "step": 28320 }, { "epoch": 312.80353200883, "learning_rate": 5.587804878048781e-07, "loss": 1.2004, "step": 28340 }, { "epoch": 313.0242825607064, "learning_rate": 5.584668989547038e-07, "loss": 1.1871, "step": 28360 }, { "epoch": 313.2450331125828, "learning_rate": 5.581533101045296e-07, "loss": 1.1895, "step": 28380 }, { "epoch": 313.46578366445914, "learning_rate": 5.578397212543555e-07, "loss": 1.1981, "step": 28400 }, { "epoch": 313.68653421633553, "learning_rate": 5.575261324041811e-07, "loss": 1.2169, "step": 28420 }, { "epoch": 313.9072847682119, "learning_rate": 5.57212543554007e-07, "loss": 1.1895, "step": 28440 }, { "epoch": 314.1280353200883, "learning_rate": 5.568989547038329e-07, "loss": 1.2034, "step": 28460 }, { "epoch": 314.3487858719647, "learning_rate": 5.565853658536585e-07, "loss": 1.1766, "step": 28480 }, { "epoch": 314.56953642384104, "learning_rate": 5.562717770034842e-07, "loss": 1.204, "step": 28500 }, { "epoch": 314.79028697571744, "learning_rate": 5.5595818815331e-07, "loss": 1.1877, "step": 28520 }, { "epoch": 315.0110375275938, "learning_rate": 5.556445993031359e-07, "loss": 1.1443, "step": 28540 }, { "epoch": 315.2317880794702, "learning_rate": 5.553310104529616e-07, "loss": 1.1559, "step": 28560 }, { "epoch": 315.45253863134656, "learning_rate": 5.550174216027875e-07, "loss": 1.1696, "step": 28580 }, { "epoch": 315.67328918322295, "learning_rate": 5.547038327526132e-07, "loss": 1.1773, "step": 28600 }, { "epoch": 315.89403973509934, "learning_rate": 5.543902439024391e-07, "loss": 1.1586, "step": 28620 }, { "epoch": 316.11479028697573, "learning_rate": 5.540766550522647e-07, "loss": 1.1868, "step": 28640 }, { "epoch": 316.3355408388521, "learning_rate": 5.537630662020906e-07, "loss": 1.1764, "step": 28660 }, { "epoch": 316.55629139072846, "learning_rate": 5.534494773519165e-07, "loss": 1.1469, "step": 28680 }, { "epoch": 316.77704194260485, "learning_rate": 5.531358885017421e-07, "loss": 1.1953, "step": 28700 }, { "epoch": 316.99779249448125, "learning_rate": 5.528222996515679e-07, "loss": 1.1843, "step": 28720 }, { "epoch": 317.21854304635764, "learning_rate": 5.525087108013938e-07, "loss": 1.1964, "step": 28740 }, { "epoch": 317.439293598234, "learning_rate": 5.521951219512194e-07, "loss": 1.203, "step": 28760 }, { "epoch": 317.66004415011037, "learning_rate": 5.518815331010452e-07, "loss": 1.18, "step": 28780 }, { "epoch": 317.88079470198676, "learning_rate": 5.515679442508711e-07, "loss": 1.194, "step": 28800 }, { "epoch": 317.88079470198676, "eval_bleu": 51.4066, "eval_gen_len": 8.5, "eval_loss": 1.9817793369293213, "eval_runtime": 3.2253, "eval_samples_per_second": 9.302, "eval_steps_per_second": 1.86, "step": 28800 }, { "epoch": 318.10154525386315, "learning_rate": 5.512543554006968e-07, "loss": 1.234, "step": 28820 }, { "epoch": 318.3222958057395, "learning_rate": 5.509407665505227e-07, "loss": 1.1729, "step": 28840 }, { "epoch": 318.5430463576159, "learning_rate": 5.506271777003484e-07, "loss": 1.2175, "step": 28860 }, { "epoch": 318.76379690949227, "learning_rate": 5.503135888501742e-07, "loss": 1.2113, "step": 28880 }, { "epoch": 318.98454746136866, "learning_rate": 5.5e-07, "loss": 1.1887, "step": 28900 }, { "epoch": 319.20529801324506, "learning_rate": 5.496864111498259e-07, "loss": 1.1669, "step": 28920 }, { "epoch": 319.4260485651214, "learning_rate": 5.493728222996515e-07, "loss": 1.1865, "step": 28940 }, { "epoch": 319.6467991169978, "learning_rate": 5.490592334494774e-07, "loss": 1.2081, "step": 28960 }, { "epoch": 319.8675496688742, "learning_rate": 5.48745644599303e-07, "loss": 1.19, "step": 28980 }, { "epoch": 320.08830022075057, "learning_rate": 5.484320557491289e-07, "loss": 1.1859, "step": 29000 }, { "epoch": 320.3090507726269, "learning_rate": 5.481184668989546e-07, "loss": 1.1067, "step": 29020 }, { "epoch": 320.5298013245033, "learning_rate": 5.478048780487804e-07, "loss": 1.1809, "step": 29040 }, { "epoch": 320.7505518763797, "learning_rate": 5.474912891986063e-07, "loss": 1.1919, "step": 29060 }, { "epoch": 320.9713024282561, "learning_rate": 5.47177700348432e-07, "loss": 1.2178, "step": 29080 }, { "epoch": 321.1920529801325, "learning_rate": 5.468641114982579e-07, "loss": 1.1705, "step": 29100 }, { "epoch": 321.4128035320088, "learning_rate": 5.465505226480836e-07, "loss": 1.1619, "step": 29120 }, { "epoch": 321.6335540838852, "learning_rate": 5.462369337979094e-07, "loss": 1.1919, "step": 29140 }, { "epoch": 321.8543046357616, "learning_rate": 5.459233449477352e-07, "loss": 1.1679, "step": 29160 }, { "epoch": 322.075055187638, "learning_rate": 5.45609756097561e-07, "loss": 1.1742, "step": 29180 }, { "epoch": 322.2958057395143, "learning_rate": 5.452961672473868e-07, "loss": 1.1394, "step": 29200 }, { "epoch": 322.5165562913907, "learning_rate": 5.449825783972125e-07, "loss": 1.1978, "step": 29220 }, { "epoch": 322.7373068432671, "learning_rate": 5.446689895470383e-07, "loss": 1.1737, "step": 29240 }, { "epoch": 322.9580573951435, "learning_rate": 5.44355400696864e-07, "loss": 1.1663, "step": 29260 }, { "epoch": 323.1788079470199, "learning_rate": 5.440418118466898e-07, "loss": 1.1925, "step": 29280 }, { "epoch": 323.3995584988962, "learning_rate": 5.437282229965156e-07, "loss": 1.1958, "step": 29300 }, { "epoch": 323.6203090507726, "learning_rate": 5.434146341463415e-07, "loss": 1.2169, "step": 29320 }, { "epoch": 323.841059602649, "learning_rate": 5.431010452961672e-07, "loss": 1.161, "step": 29340 }, { "epoch": 324.0618101545254, "learning_rate": 5.427874564459931e-07, "loss": 1.1456, "step": 29360 }, { "epoch": 324.28256070640174, "learning_rate": 5.424738675958188e-07, "loss": 1.1658, "step": 29380 }, { "epoch": 324.50331125827813, "learning_rate": 5.421602787456446e-07, "loss": 1.1708, "step": 29400 }, { "epoch": 324.7240618101545, "learning_rate": 5.418466898954704e-07, "loss": 1.1884, "step": 29420 }, { "epoch": 324.9448123620309, "learning_rate": 5.415331010452962e-07, "loss": 1.195, "step": 29440 }, { "epoch": 325.1655629139073, "learning_rate": 5.412195121951219e-07, "loss": 1.2413, "step": 29460 }, { "epoch": 325.38631346578364, "learning_rate": 5.409059233449478e-07, "loss": 1.178, "step": 29480 }, { "epoch": 325.60706401766004, "learning_rate": 5.405923344947734e-07, "loss": 1.1932, "step": 29500 }, { "epoch": 325.82781456953643, "learning_rate": 5.402787456445993e-07, "loss": 1.1841, "step": 29520 }, { "epoch": 326.0485651214128, "learning_rate": 5.39965156794425e-07, "loss": 1.1766, "step": 29540 }, { "epoch": 326.26931567328916, "learning_rate": 5.396515679442508e-07, "loss": 1.1359, "step": 29560 }, { "epoch": 326.49006622516555, "learning_rate": 5.393379790940767e-07, "loss": 1.1609, "step": 29580 }, { "epoch": 326.71081677704194, "learning_rate": 5.390243902439024e-07, "loss": 1.1551, "step": 29600 }, { "epoch": 326.93156732891833, "learning_rate": 5.387108013937282e-07, "loss": 1.2131, "step": 29620 }, { "epoch": 327.1523178807947, "learning_rate": 5.38397212543554e-07, "loss": 1.2387, "step": 29640 }, { "epoch": 327.37306843267106, "learning_rate": 5.380836236933798e-07, "loss": 1.1454, "step": 29660 }, { "epoch": 327.59381898454745, "learning_rate": 5.377700348432055e-07, "loss": 1.1845, "step": 29680 }, { "epoch": 327.81456953642385, "learning_rate": 5.374564459930314e-07, "loss": 1.1905, "step": 29700 }, { "epoch": 328.03532008830024, "learning_rate": 5.371428571428572e-07, "loss": 1.1777, "step": 29720 }, { "epoch": 328.2560706401766, "learning_rate": 5.368292682926828e-07, "loss": 1.176, "step": 29740 }, { "epoch": 328.47682119205297, "learning_rate": 5.365156794425087e-07, "loss": 1.1587, "step": 29760 }, { "epoch": 328.69757174392936, "learning_rate": 5.362020905923344e-07, "loss": 1.189, "step": 29780 }, { "epoch": 328.91832229580575, "learning_rate": 5.358885017421602e-07, "loss": 1.1826, "step": 29800 }, { "epoch": 329.13907284768214, "learning_rate": 5.35574912891986e-07, "loss": 1.182, "step": 29820 }, { "epoch": 329.3598233995585, "learning_rate": 5.352613240418117e-07, "loss": 1.1905, "step": 29840 }, { "epoch": 329.58057395143487, "learning_rate": 5.349477351916376e-07, "loss": 1.1627, "step": 29860 }, { "epoch": 329.80132450331126, "learning_rate": 5.346341463414634e-07, "loss": 1.1429, "step": 29880 }, { "epoch": 330.02207505518766, "learning_rate": 5.343205574912892e-07, "loss": 1.1788, "step": 29900 }, { "epoch": 330.242825607064, "learning_rate": 5.34006968641115e-07, "loss": 1.139, "step": 29920 }, { "epoch": 330.4635761589404, "learning_rate": 5.336933797909408e-07, "loss": 1.195, "step": 29940 }, { "epoch": 330.6843267108168, "learning_rate": 5.333797909407665e-07, "loss": 1.1667, "step": 29960 }, { "epoch": 330.90507726269317, "learning_rate": 5.330662020905923e-07, "loss": 1.2169, "step": 29980 }, { "epoch": 331.12582781456956, "learning_rate": 5.327526132404182e-07, "loss": 1.2115, "step": 30000 }, { "epoch": 331.3465783664459, "learning_rate": 5.324390243902438e-07, "loss": 1.1799, "step": 30020 }, { "epoch": 331.5673289183223, "learning_rate": 5.321254355400697e-07, "loss": 1.1713, "step": 30040 }, { "epoch": 331.7880794701987, "learning_rate": 5.318118466898954e-07, "loss": 1.1347, "step": 30060 }, { "epoch": 332.0088300220751, "learning_rate": 5.314982578397212e-07, "loss": 1.1641, "step": 30080 }, { "epoch": 332.2295805739514, "learning_rate": 5.311846689895469e-07, "loss": 1.2001, "step": 30100 }, { "epoch": 332.4503311258278, "learning_rate": 5.308710801393728e-07, "loss": 1.1617, "step": 30120 }, { "epoch": 332.6710816777042, "learning_rate": 5.305574912891986e-07, "loss": 1.1591, "step": 30140 }, { "epoch": 332.8918322295806, "learning_rate": 5.302439024390244e-07, "loss": 1.1553, "step": 30160 }, { "epoch": 333.112582781457, "learning_rate": 5.299303135888502e-07, "loss": 1.1895, "step": 30180 }, { "epoch": 333.3333333333333, "learning_rate": 5.296167247386759e-07, "loss": 1.1721, "step": 30200 }, { "epoch": 333.5540838852097, "learning_rate": 5.293031358885018e-07, "loss": 1.1746, "step": 30220 }, { "epoch": 333.7748344370861, "learning_rate": 5.289895470383274e-07, "loss": 1.1505, "step": 30240 }, { "epoch": 333.9955849889625, "learning_rate": 5.286759581881533e-07, "loss": 1.1609, "step": 30260 }, { "epoch": 334.2163355408388, "learning_rate": 5.283623693379791e-07, "loss": 1.1453, "step": 30280 }, { "epoch": 334.4370860927152, "learning_rate": 5.280487804878048e-07, "loss": 1.1834, "step": 30300 }, { "epoch": 334.6578366445916, "learning_rate": 5.277351916376305e-07, "loss": 1.2259, "step": 30320 }, { "epoch": 334.878587196468, "learning_rate": 5.274216027874564e-07, "loss": 1.1974, "step": 30340 }, { "epoch": 335.0993377483444, "learning_rate": 5.271080139372821e-07, "loss": 1.1794, "step": 30360 }, { "epoch": 335.32008830022073, "learning_rate": 5.26794425087108e-07, "loss": 1.1433, "step": 30380 }, { "epoch": 335.5408388520971, "learning_rate": 5.264808362369338e-07, "loss": 1.1405, "step": 30400 }, { "epoch": 335.7615894039735, "learning_rate": 5.261672473867596e-07, "loss": 1.2012, "step": 30420 }, { "epoch": 335.9823399558499, "learning_rate": 5.258536585365854e-07, "loss": 1.1916, "step": 30440 }, { "epoch": 336.20309050772624, "learning_rate": 5.255400696864112e-07, "loss": 1.1924, "step": 30460 }, { "epoch": 336.42384105960264, "learning_rate": 5.252264808362369e-07, "loss": 1.1476, "step": 30480 }, { "epoch": 336.64459161147903, "learning_rate": 5.249128919860627e-07, "loss": 1.158, "step": 30500 }, { "epoch": 336.8653421633554, "learning_rate": 5.245993031358886e-07, "loss": 1.1524, "step": 30520 }, { "epoch": 337.0860927152318, "learning_rate": 5.242857142857142e-07, "loss": 1.1872, "step": 30540 }, { "epoch": 337.30684326710815, "learning_rate": 5.239721254355401e-07, "loss": 1.1946, "step": 30560 }, { "epoch": 337.52759381898454, "learning_rate": 5.236585365853657e-07, "loss": 1.2015, "step": 30580 }, { "epoch": 337.74834437086093, "learning_rate": 5.233449477351916e-07, "loss": 1.1733, "step": 30600 }, { "epoch": 337.9690949227373, "learning_rate": 5.230313588850173e-07, "loss": 1.1593, "step": 30620 }, { "epoch": 338.18984547461366, "learning_rate": 5.227177700348432e-07, "loss": 1.1967, "step": 30640 }, { "epoch": 338.41059602649005, "learning_rate": 5.22404181184669e-07, "loss": 1.1877, "step": 30660 }, { "epoch": 338.63134657836645, "learning_rate": 5.220905923344948e-07, "loss": 1.142, "step": 30680 }, { "epoch": 338.85209713024284, "learning_rate": 5.217770034843206e-07, "loss": 1.1526, "step": 30700 }, { "epoch": 339.07284768211923, "learning_rate": 5.214634146341463e-07, "loss": 1.1642, "step": 30720 }, { "epoch": 339.29359823399557, "learning_rate": 5.211498257839722e-07, "loss": 1.1686, "step": 30740 }, { "epoch": 339.51434878587196, "learning_rate": 5.208362369337978e-07, "loss": 1.1572, "step": 30760 }, { "epoch": 339.73509933774835, "learning_rate": 5.205226480836237e-07, "loss": 1.1538, "step": 30780 }, { "epoch": 339.95584988962474, "learning_rate": 5.202090592334495e-07, "loss": 1.1739, "step": 30800 }, { "epoch": 340.1766004415011, "learning_rate": 5.198954703832752e-07, "loss": 1.2046, "step": 30820 }, { "epoch": 340.3973509933775, "learning_rate": 5.19581881533101e-07, "loss": 1.1681, "step": 30840 }, { "epoch": 340.61810154525386, "learning_rate": 5.192682926829268e-07, "loss": 1.1633, "step": 30860 }, { "epoch": 340.83885209713026, "learning_rate": 5.189547038327525e-07, "loss": 1.1735, "step": 30880 }, { "epoch": 341.05960264900665, "learning_rate": 5.186411149825783e-07, "loss": 1.1636, "step": 30900 }, { "epoch": 341.280353200883, "learning_rate": 5.183275261324041e-07, "loss": 1.1792, "step": 30920 }, { "epoch": 341.5011037527594, "learning_rate": 5.1801393728223e-07, "loss": 1.195, "step": 30940 }, { "epoch": 341.72185430463577, "learning_rate": 5.177003484320558e-07, "loss": 1.1563, "step": 30960 }, { "epoch": 341.94260485651216, "learning_rate": 5.173867595818816e-07, "loss": 1.1721, "step": 30980 }, { "epoch": 342.1633554083885, "learning_rate": 5.170731707317073e-07, "loss": 1.1566, "step": 31000 }, { "epoch": 342.3841059602649, "learning_rate": 5.167595818815331e-07, "loss": 1.1617, "step": 31020 }, { "epoch": 342.6048565121413, "learning_rate": 5.164459930313588e-07, "loss": 1.146, "step": 31040 }, { "epoch": 342.8256070640177, "learning_rate": 5.161324041811846e-07, "loss": 1.1685, "step": 31060 }, { "epoch": 343.04635761589407, "learning_rate": 5.158188153310105e-07, "loss": 1.1861, "step": 31080 }, { "epoch": 343.2671081677704, "learning_rate": 5.155052264808361e-07, "loss": 1.1599, "step": 31100 }, { "epoch": 343.4878587196468, "learning_rate": 5.15191637630662e-07, "loss": 1.1844, "step": 31120 }, { "epoch": 343.7086092715232, "learning_rate": 5.148780487804876e-07, "loss": 1.1672, "step": 31140 }, { "epoch": 343.9293598233996, "learning_rate": 5.145644599303136e-07, "loss": 1.1302, "step": 31160 }, { "epoch": 344.1501103752759, "learning_rate": 5.142508710801393e-07, "loss": 1.2002, "step": 31180 }, { "epoch": 344.3708609271523, "learning_rate": 5.139372822299652e-07, "loss": 1.191, "step": 31200 }, { "epoch": 344.5916114790287, "learning_rate": 5.13623693379791e-07, "loss": 1.1869, "step": 31220 }, { "epoch": 344.8123620309051, "learning_rate": 5.133101045296167e-07, "loss": 1.1655, "step": 31240 }, { "epoch": 345.0331125827815, "learning_rate": 5.129965156794426e-07, "loss": 1.1354, "step": 31260 }, { "epoch": 345.2538631346578, "learning_rate": 5.126829268292682e-07, "loss": 1.1772, "step": 31280 }, { "epoch": 345.4746136865342, "learning_rate": 5.123693379790941e-07, "loss": 1.1738, "step": 31300 }, { "epoch": 345.6953642384106, "learning_rate": 5.120557491289197e-07, "loss": 1.1995, "step": 31320 }, { "epoch": 345.916114790287, "learning_rate": 5.117421602787456e-07, "loss": 1.1945, "step": 31340 }, { "epoch": 346.13686534216333, "learning_rate": 5.114285714285714e-07, "loss": 1.1692, "step": 31360 }, { "epoch": 346.3576158940397, "learning_rate": 5.111149825783972e-07, "loss": 1.2119, "step": 31380 }, { "epoch": 346.5783664459161, "learning_rate": 5.108013937282229e-07, "loss": 1.1656, "step": 31400 }, { "epoch": 346.7991169977925, "learning_rate": 5.104878048780488e-07, "loss": 1.1491, "step": 31420 }, { "epoch": 347.0198675496689, "learning_rate": 5.101742160278745e-07, "loss": 1.1632, "step": 31440 }, { "epoch": 347.24061810154524, "learning_rate": 5.098606271777003e-07, "loss": 1.1851, "step": 31460 }, { "epoch": 347.46136865342163, "learning_rate": 5.09547038327526e-07, "loss": 1.1948, "step": 31480 }, { "epoch": 347.682119205298, "learning_rate": 5.092334494773519e-07, "loss": 1.1337, "step": 31500 }, { "epoch": 347.9028697571744, "learning_rate": 5.089198606271777e-07, "loss": 1.1976, "step": 31520 }, { "epoch": 348.12362030905075, "learning_rate": 5.086062717770035e-07, "loss": 1.2112, "step": 31540 }, { "epoch": 348.34437086092714, "learning_rate": 5.082926829268292e-07, "loss": 1.1144, "step": 31560 }, { "epoch": 348.56512141280353, "learning_rate": 5.07979094076655e-07, "loss": 1.1663, "step": 31580 }, { "epoch": 348.7858719646799, "learning_rate": 5.076655052264809e-07, "loss": 1.1801, "step": 31600 }, { "epoch": 349.0066225165563, "learning_rate": 5.073519163763065e-07, "loss": 1.1764, "step": 31620 }, { "epoch": 349.22737306843266, "learning_rate": 5.070383275261324e-07, "loss": 1.1734, "step": 31640 }, { "epoch": 349.44812362030905, "learning_rate": 5.067247386759581e-07, "loss": 1.1605, "step": 31660 }, { "epoch": 349.66887417218544, "learning_rate": 5.06411149825784e-07, "loss": 1.2098, "step": 31680 }, { "epoch": 349.88962472406183, "learning_rate": 5.060975609756097e-07, "loss": 1.1533, "step": 31700 }, { "epoch": 350.11037527593817, "learning_rate": 5.057839721254354e-07, "loss": 1.1666, "step": 31720 }, { "epoch": 350.33112582781456, "learning_rate": 5.054703832752614e-07, "loss": 1.1604, "step": 31740 }, { "epoch": 350.55187637969095, "learning_rate": 5.051567944250871e-07, "loss": 1.1899, "step": 31760 }, { "epoch": 350.77262693156734, "learning_rate": 5.04843205574913e-07, "loss": 1.1404, "step": 31780 }, { "epoch": 350.9933774834437, "learning_rate": 5.045296167247386e-07, "loss": 1.16, "step": 31800 }, { "epoch": 351.2141280353201, "learning_rate": 5.042160278745645e-07, "loss": 1.1792, "step": 31820 }, { "epoch": 351.43487858719647, "learning_rate": 5.039024390243901e-07, "loss": 1.2048, "step": 31840 }, { "epoch": 351.65562913907286, "learning_rate": 5.03588850174216e-07, "loss": 1.1659, "step": 31860 }, { "epoch": 351.87637969094925, "learning_rate": 5.032752613240418e-07, "loss": 1.1488, "step": 31880 }, { "epoch": 352.0971302428256, "learning_rate": 5.029616724738676e-07, "loss": 1.1646, "step": 31900 }, { "epoch": 352.317880794702, "learning_rate": 5.026480836236933e-07, "loss": 1.1681, "step": 31920 }, { "epoch": 352.53863134657837, "learning_rate": 5.023344947735192e-07, "loss": 1.1924, "step": 31940 }, { "epoch": 352.75938189845476, "learning_rate": 5.020209059233449e-07, "loss": 1.1631, "step": 31960 }, { "epoch": 352.9801324503311, "learning_rate": 5.017073170731707e-07, "loss": 1.1437, "step": 31980 }, { "epoch": 353.2008830022075, "learning_rate": 5.013937282229966e-07, "loss": 1.1479, "step": 32000 }, { "epoch": 353.4216335540839, "learning_rate": 5.010801393728223e-07, "loss": 1.1799, "step": 32020 }, { "epoch": 353.6423841059603, "learning_rate": 5.007665505226481e-07, "loss": 1.1763, "step": 32040 }, { "epoch": 353.86313465783667, "learning_rate": 5.004529616724738e-07, "loss": 1.216, "step": 32060 }, { "epoch": 354.083885209713, "learning_rate": 5.001393728222996e-07, "loss": 1.1665, "step": 32080 }, { "epoch": 354.3046357615894, "learning_rate": 4.998257839721254e-07, "loss": 1.1589, "step": 32100 }, { "epoch": 354.5253863134658, "learning_rate": 4.995121951219512e-07, "loss": 1.1646, "step": 32120 }, { "epoch": 354.7461368653422, "learning_rate": 4.991986062717769e-07, "loss": 1.1749, "step": 32140 }, { "epoch": 354.9668874172185, "learning_rate": 4.988850174216028e-07, "loss": 1.2224, "step": 32160 }, { "epoch": 355.1876379690949, "learning_rate": 4.985714285714285e-07, "loss": 1.1945, "step": 32180 }, { "epoch": 355.4083885209713, "learning_rate": 4.982578397212543e-07, "loss": 1.1612, "step": 32200 }, { "epoch": 355.6291390728477, "learning_rate": 4.979442508710801e-07, "loss": 1.156, "step": 32220 }, { "epoch": 355.8498896247241, "learning_rate": 4.976306620209059e-07, "loss": 1.2042, "step": 32240 }, { "epoch": 356.0706401766004, "learning_rate": 4.973170731707316e-07, "loss": 1.1951, "step": 32260 }, { "epoch": 356.2913907284768, "learning_rate": 4.970034843205575e-07, "loss": 1.1731, "step": 32280 }, { "epoch": 356.5121412803532, "learning_rate": 4.966898954703834e-07, "loss": 1.1627, "step": 32300 }, { "epoch": 356.7328918322296, "learning_rate": 4.96376306620209e-07, "loss": 1.1532, "step": 32320 }, { "epoch": 356.95364238410593, "learning_rate": 4.960627177700349e-07, "loss": 1.2106, "step": 32340 }, { "epoch": 357.1743929359823, "learning_rate": 4.957491289198605e-07, "loss": 1.1187, "step": 32360 }, { "epoch": 357.3951434878587, "learning_rate": 4.954355400696864e-07, "loss": 1.2261, "step": 32380 }, { "epoch": 357.6158940397351, "learning_rate": 4.951219512195121e-07, "loss": 1.1416, "step": 32400 }, { "epoch": 357.6158940397351, "eval_bleu": 45.7022, "eval_gen_len": 8.4333, "eval_loss": 1.9698816537857056, "eval_runtime": 3.174, "eval_samples_per_second": 9.452, "eval_steps_per_second": 1.89, "step": 32400 }, { "epoch": 357.8366445916115, "learning_rate": 4.948083623693379e-07, "loss": 1.1481, "step": 32420 }, { "epoch": 358.05739514348784, "learning_rate": 4.944947735191637e-07, "loss": 1.1564, "step": 32440 }, { "epoch": 358.27814569536423, "learning_rate": 4.941811846689895e-07, "loss": 1.1771, "step": 32460 }, { "epoch": 358.4988962472406, "learning_rate": 4.938675958188153e-07, "loss": 1.1402, "step": 32480 }, { "epoch": 358.719646799117, "learning_rate": 4.935540069686411e-07, "loss": 1.1897, "step": 32500 }, { "epoch": 358.94039735099335, "learning_rate": 4.93240418118467e-07, "loss": 1.2065, "step": 32520 }, { "epoch": 359.16114790286974, "learning_rate": 4.929268292682927e-07, "loss": 1.1553, "step": 32540 }, { "epoch": 359.38189845474614, "learning_rate": 4.926132404181185e-07, "loss": 1.224, "step": 32560 }, { "epoch": 359.6026490066225, "learning_rate": 4.922996515679443e-07, "loss": 1.1704, "step": 32580 }, { "epoch": 359.8233995584989, "learning_rate": 4.9198606271777e-07, "loss": 1.1194, "step": 32600 }, { "epoch": 360.04415011037526, "learning_rate": 4.916724738675958e-07, "loss": 1.1772, "step": 32620 }, { "epoch": 360.26490066225165, "learning_rate": 4.913588850174216e-07, "loss": 1.1531, "step": 32640 }, { "epoch": 360.48565121412804, "learning_rate": 4.910452961672473e-07, "loss": 1.1752, "step": 32660 }, { "epoch": 360.70640176600443, "learning_rate": 4.907317073170732e-07, "loss": 1.1692, "step": 32680 }, { "epoch": 360.92715231788077, "learning_rate": 4.904181184668989e-07, "loss": 1.1234, "step": 32700 }, { "epoch": 361.14790286975716, "learning_rate": 4.901045296167247e-07, "loss": 1.1295, "step": 32720 }, { "epoch": 361.36865342163355, "learning_rate": 4.897909407665505e-07, "loss": 1.1615, "step": 32740 }, { "epoch": 361.58940397350995, "learning_rate": 4.894773519163763e-07, "loss": 1.1748, "step": 32760 }, { "epoch": 361.81015452538634, "learning_rate": 4.89163763066202e-07, "loss": 1.1254, "step": 32780 }, { "epoch": 362.0309050772627, "learning_rate": 4.888501742160279e-07, "loss": 1.165, "step": 32800 }, { "epoch": 362.25165562913907, "learning_rate": 4.885365853658538e-07, "loss": 1.1824, "step": 32820 }, { "epoch": 362.47240618101546, "learning_rate": 4.882229965156794e-07, "loss": 1.1703, "step": 32840 }, { "epoch": 362.69315673289185, "learning_rate": 4.879094076655053e-07, "loss": 1.1826, "step": 32860 }, { "epoch": 362.9139072847682, "learning_rate": 4.875958188153309e-07, "loss": 1.1704, "step": 32880 }, { "epoch": 363.1346578366446, "learning_rate": 4.872822299651568e-07, "loss": 1.1456, "step": 32900 }, { "epoch": 363.35540838852097, "learning_rate": 4.869686411149825e-07, "loss": 1.1702, "step": 32920 }, { "epoch": 363.57615894039736, "learning_rate": 4.866550522648083e-07, "loss": 1.1534, "step": 32940 }, { "epoch": 363.79690949227376, "learning_rate": 4.863414634146341e-07, "loss": 1.155, "step": 32960 }, { "epoch": 364.0176600441501, "learning_rate": 4.860278745644599e-07, "loss": 1.169, "step": 32980 }, { "epoch": 364.2384105960265, "learning_rate": 4.857142857142857e-07, "loss": 1.1725, "step": 33000 }, { "epoch": 364.4591611479029, "learning_rate": 4.854006968641115e-07, "loss": 1.1441, "step": 33020 }, { "epoch": 364.67991169977927, "learning_rate": 4.850871080139373e-07, "loss": 1.1205, "step": 33040 }, { "epoch": 364.9006622516556, "learning_rate": 4.84773519163763e-07, "loss": 1.1632, "step": 33060 }, { "epoch": 365.121412803532, "learning_rate": 4.844599303135889e-07, "loss": 1.1767, "step": 33080 }, { "epoch": 365.3421633554084, "learning_rate": 4.841463414634147e-07, "loss": 1.1543, "step": 33100 }, { "epoch": 365.5629139072848, "learning_rate": 4.838327526132404e-07, "loss": 1.2035, "step": 33120 }, { "epoch": 365.7836644591612, "learning_rate": 4.835191637630662e-07, "loss": 1.1284, "step": 33140 }, { "epoch": 366.0044150110375, "learning_rate": 4.832055749128919e-07, "loss": 1.1924, "step": 33160 }, { "epoch": 366.2251655629139, "learning_rate": 4.828919860627177e-07, "loss": 1.1987, "step": 33180 }, { "epoch": 366.4459161147903, "learning_rate": 4.825783972125435e-07, "loss": 1.1411, "step": 33200 }, { "epoch": 366.6666666666667, "learning_rate": 4.822648083623693e-07, "loss": 1.1467, "step": 33220 }, { "epoch": 366.887417218543, "learning_rate": 4.819512195121951e-07, "loss": 1.1609, "step": 33240 }, { "epoch": 367.1081677704194, "learning_rate": 4.816376306620209e-07, "loss": 1.138, "step": 33260 }, { "epoch": 367.3289183222958, "learning_rate": 4.813240418118467e-07, "loss": 1.1967, "step": 33280 }, { "epoch": 367.5496688741722, "learning_rate": 4.810104529616724e-07, "loss": 1.1784, "step": 33300 }, { "epoch": 367.7704194260486, "learning_rate": 4.806968641114983e-07, "loss": 1.1341, "step": 33320 }, { "epoch": 367.9911699779249, "learning_rate": 4.803832752613239e-07, "loss": 1.163, "step": 33340 }, { "epoch": 368.2119205298013, "learning_rate": 4.800696864111498e-07, "loss": 1.1603, "step": 33360 }, { "epoch": 368.4326710816777, "learning_rate": 4.797560975609757e-07, "loss": 1.1226, "step": 33380 }, { "epoch": 368.6534216335541, "learning_rate": 4.794425087108013e-07, "loss": 1.1896, "step": 33400 }, { "epoch": 368.87417218543044, "learning_rate": 4.791289198606272e-07, "loss": 1.1895, "step": 33420 }, { "epoch": 369.09492273730683, "learning_rate": 4.788153310104529e-07, "loss": 1.1875, "step": 33440 }, { "epoch": 369.3156732891832, "learning_rate": 4.785017421602787e-07, "loss": 1.1469, "step": 33460 }, { "epoch": 369.5364238410596, "learning_rate": 4.781881533101045e-07, "loss": 1.1542, "step": 33480 }, { "epoch": 369.757174392936, "learning_rate": 4.778745644599303e-07, "loss": 1.219, "step": 33500 }, { "epoch": 369.97792494481234, "learning_rate": 4.775609756097561e-07, "loss": 1.1547, "step": 33520 }, { "epoch": 370.19867549668874, "learning_rate": 4.772473867595819e-07, "loss": 1.1719, "step": 33540 }, { "epoch": 370.41942604856513, "learning_rate": 4.769337979094076e-07, "loss": 1.123, "step": 33560 }, { "epoch": 370.6401766004415, "learning_rate": 4.766202090592334e-07, "loss": 1.1784, "step": 33580 }, { "epoch": 370.86092715231786, "learning_rate": 4.763066202090592e-07, "loss": 1.1852, "step": 33600 }, { "epoch": 371.08167770419425, "learning_rate": 4.7599303135888505e-07, "loss": 1.1725, "step": 33620 }, { "epoch": 371.30242825607064, "learning_rate": 4.7567944250871075e-07, "loss": 1.1537, "step": 33640 }, { "epoch": 371.52317880794703, "learning_rate": 4.753658536585366e-07, "loss": 1.1854, "step": 33660 }, { "epoch": 371.7439293598234, "learning_rate": 4.750522648083623e-07, "loss": 1.1336, "step": 33680 }, { "epoch": 371.96467991169976, "learning_rate": 4.7473867595818817e-07, "loss": 1.2089, "step": 33700 }, { "epoch": 372.18543046357615, "learning_rate": 4.7442508710801387e-07, "loss": 1.1628, "step": 33720 }, { "epoch": 372.40618101545255, "learning_rate": 4.741114982578397e-07, "loss": 1.1535, "step": 33740 }, { "epoch": 372.62693156732894, "learning_rate": 4.7379790940766553e-07, "loss": 1.2004, "step": 33760 }, { "epoch": 372.8476821192053, "learning_rate": 4.734843205574913e-07, "loss": 1.1668, "step": 33780 }, { "epoch": 373.06843267108167, "learning_rate": 4.731707317073171e-07, "loss": 1.1721, "step": 33800 }, { "epoch": 373.28918322295806, "learning_rate": 4.728571428571428e-07, "loss": 1.1719, "step": 33820 }, { "epoch": 373.50993377483445, "learning_rate": 4.7254355400696864e-07, "loss": 1.1815, "step": 33840 }, { "epoch": 373.73068432671084, "learning_rate": 4.7222996515679434e-07, "loss": 1.1809, "step": 33860 }, { "epoch": 373.9514348785872, "learning_rate": 4.719163763066202e-07, "loss": 1.1849, "step": 33880 }, { "epoch": 374.17218543046357, "learning_rate": 4.71602787456446e-07, "loss": 1.179, "step": 33900 }, { "epoch": 374.39293598233996, "learning_rate": 4.7128919860627176e-07, "loss": 1.1572, "step": 33920 }, { "epoch": 374.61368653421636, "learning_rate": 4.7097560975609756e-07, "loss": 1.1734, "step": 33940 }, { "epoch": 374.8344370860927, "learning_rate": 4.706620209059233e-07, "loss": 1.1608, "step": 33960 }, { "epoch": 375.0551876379691, "learning_rate": 4.7034843205574917e-07, "loss": 1.1355, "step": 33980 }, { "epoch": 375.2759381898455, "learning_rate": 4.7003484320557487e-07, "loss": 1.1185, "step": 34000 }, { "epoch": 375.49668874172187, "learning_rate": 4.697212543554007e-07, "loss": 1.1525, "step": 34020 }, { "epoch": 375.71743929359826, "learning_rate": 4.694076655052265e-07, "loss": 1.1526, "step": 34040 }, { "epoch": 375.9381898454746, "learning_rate": 4.6909407665505223e-07, "loss": 1.1603, "step": 34060 }, { "epoch": 376.158940397351, "learning_rate": 4.6878048780487804e-07, "loss": 1.1153, "step": 34080 }, { "epoch": 376.3796909492274, "learning_rate": 4.6846689895470384e-07, "loss": 1.1728, "step": 34100 }, { "epoch": 376.6004415011038, "learning_rate": 4.681533101045296e-07, "loss": 1.1574, "step": 34120 }, { "epoch": 376.8211920529801, "learning_rate": 4.6783972125435534e-07, "loss": 1.1542, "step": 34140 }, { "epoch": 377.0419426048565, "learning_rate": 4.6752613240418115e-07, "loss": 1.1492, "step": 34160 }, { "epoch": 377.2626931567329, "learning_rate": 4.67212543554007e-07, "loss": 1.1756, "step": 34180 }, { "epoch": 377.4834437086093, "learning_rate": 4.668989547038327e-07, "loss": 1.1558, "step": 34200 }, { "epoch": 377.7041942604857, "learning_rate": 4.6658536585365856e-07, "loss": 1.1589, "step": 34220 }, { "epoch": 377.924944812362, "learning_rate": 4.6627177700348426e-07, "loss": 1.1709, "step": 34240 }, { "epoch": 378.1456953642384, "learning_rate": 4.659581881533101e-07, "loss": 1.1598, "step": 34260 }, { "epoch": 378.3664459161148, "learning_rate": 4.656445993031358e-07, "loss": 1.165, "step": 34280 }, { "epoch": 378.5871964679912, "learning_rate": 4.653310104529616e-07, "loss": 1.1712, "step": 34300 }, { "epoch": 378.8079470198675, "learning_rate": 4.650174216027875e-07, "loss": 1.1503, "step": 34320 }, { "epoch": 379.0286975717439, "learning_rate": 4.647038327526132e-07, "loss": 1.1694, "step": 34340 }, { "epoch": 379.2494481236203, "learning_rate": 4.6439024390243904e-07, "loss": 1.1221, "step": 34360 }, { "epoch": 379.4701986754967, "learning_rate": 4.6407665505226474e-07, "loss": 1.1639, "step": 34380 }, { "epoch": 379.6909492273731, "learning_rate": 4.637630662020906e-07, "loss": 1.2062, "step": 34400 }, { "epoch": 379.91169977924943, "learning_rate": 4.634494773519163e-07, "loss": 1.1965, "step": 34420 }, { "epoch": 380.1324503311258, "learning_rate": 4.6313588850174215e-07, "loss": 1.1416, "step": 34440 }, { "epoch": 380.3532008830022, "learning_rate": 4.6282229965156796e-07, "loss": 1.1331, "step": 34460 }, { "epoch": 380.5739514348786, "learning_rate": 4.625087108013937e-07, "loss": 1.1529, "step": 34480 }, { "epoch": 380.79470198675494, "learning_rate": 4.621951219512195e-07, "loss": 1.1931, "step": 34500 }, { "epoch": 381.01545253863134, "learning_rate": 4.618815331010452e-07, "loss": 1.1638, "step": 34520 }, { "epoch": 381.23620309050773, "learning_rate": 4.6156794425087107e-07, "loss": 1.1293, "step": 34540 }, { "epoch": 381.4569536423841, "learning_rate": 4.612543554006968e-07, "loss": 1.1728, "step": 34560 }, { "epoch": 381.6777041942605, "learning_rate": 4.6094076655052263e-07, "loss": 1.1847, "step": 34580 }, { "epoch": 381.89845474613685, "learning_rate": 4.6062717770034843e-07, "loss": 1.1777, "step": 34600 }, { "epoch": 382.11920529801324, "learning_rate": 4.603135888501742e-07, "loss": 1.167, "step": 34620 }, { "epoch": 382.33995584988963, "learning_rate": 4.6e-07, "loss": 1.1808, "step": 34640 }, { "epoch": 382.560706401766, "learning_rate": 4.5968641114982574e-07, "loss": 1.116, "step": 34660 }, { "epoch": 382.78145695364236, "learning_rate": 4.593728222996515e-07, "loss": 1.1357, "step": 34680 }, { "epoch": 383.00220750551875, "learning_rate": 4.590592334494774e-07, "loss": 1.1878, "step": 34700 }, { "epoch": 383.22295805739515, "learning_rate": 4.587456445993031e-07, "loss": 1.1284, "step": 34720 }, { "epoch": 383.44370860927154, "learning_rate": 4.584320557491289e-07, "loss": 1.1819, "step": 34740 }, { "epoch": 383.6644591611479, "learning_rate": 4.5811846689895466e-07, "loss": 1.1144, "step": 34760 }, { "epoch": 383.88520971302427, "learning_rate": 4.5780487804878047e-07, "loss": 1.1786, "step": 34780 }, { "epoch": 384.10596026490066, "learning_rate": 4.5749128919860616e-07, "loss": 1.1793, "step": 34800 }, { "epoch": 384.32671081677705, "learning_rate": 4.57177700348432e-07, "loss": 1.1671, "step": 34820 }, { "epoch": 384.54746136865344, "learning_rate": 4.568641114982579e-07, "loss": 1.1962, "step": 34840 }, { "epoch": 384.7682119205298, "learning_rate": 4.565505226480836e-07, "loss": 1.1392, "step": 34860 }, { "epoch": 384.9889624724062, "learning_rate": 4.5623693379790944e-07, "loss": 1.1296, "step": 34880 }, { "epoch": 385.20971302428256, "learning_rate": 4.5592334494773514e-07, "loss": 1.1262, "step": 34900 }, { "epoch": 385.43046357615896, "learning_rate": 4.55609756097561e-07, "loss": 1.1584, "step": 34920 }, { "epoch": 385.6512141280353, "learning_rate": 4.552961672473867e-07, "loss": 1.1438, "step": 34940 }, { "epoch": 385.8719646799117, "learning_rate": 4.5498257839721255e-07, "loss": 1.1533, "step": 34960 }, { "epoch": 386.0927152317881, "learning_rate": 4.5466898954703835e-07, "loss": 1.1437, "step": 34980 }, { "epoch": 386.31346578366447, "learning_rate": 4.5435540069686405e-07, "loss": 1.1541, "step": 35000 }, { "epoch": 386.53421633554086, "learning_rate": 4.540418118466899e-07, "loss": 1.1999, "step": 35020 }, { "epoch": 386.7549668874172, "learning_rate": 4.537282229965156e-07, "loss": 1.1495, "step": 35040 }, { "epoch": 386.9757174392936, "learning_rate": 4.5341463414634147e-07, "loss": 1.1571, "step": 35060 }, { "epoch": 387.19646799117, "learning_rate": 4.5310104529616717e-07, "loss": 1.178, "step": 35080 }, { "epoch": 387.4172185430464, "learning_rate": 4.52787456445993e-07, "loss": 1.1715, "step": 35100 }, { "epoch": 387.6379690949227, "learning_rate": 4.5247386759581883e-07, "loss": 1.1834, "step": 35120 }, { "epoch": 387.8587196467991, "learning_rate": 4.521602787456446e-07, "loss": 1.1213, "step": 35140 }, { "epoch": 388.0794701986755, "learning_rate": 4.518466898954704e-07, "loss": 1.179, "step": 35160 }, { "epoch": 388.3002207505519, "learning_rate": 4.5153310104529614e-07, "loss": 1.1465, "step": 35180 }, { "epoch": 388.5209713024283, "learning_rate": 4.5121951219512194e-07, "loss": 1.1355, "step": 35200 }, { "epoch": 388.7417218543046, "learning_rate": 4.509059233449477e-07, "loss": 1.1686, "step": 35220 }, { "epoch": 388.962472406181, "learning_rate": 4.505923344947735e-07, "loss": 1.1378, "step": 35240 }, { "epoch": 389.1832229580574, "learning_rate": 4.5027874564459925e-07, "loss": 1.1601, "step": 35260 }, { "epoch": 389.4039735099338, "learning_rate": 4.4996515679442506e-07, "loss": 1.1491, "step": 35280 }, { "epoch": 389.6247240618101, "learning_rate": 4.4965156794425086e-07, "loss": 1.1138, "step": 35300 }, { "epoch": 389.8454746136865, "learning_rate": 4.493379790940766e-07, "loss": 1.1747, "step": 35320 }, { "epoch": 390.0662251655629, "learning_rate": 4.490243902439024e-07, "loss": 1.1572, "step": 35340 }, { "epoch": 390.2869757174393, "learning_rate": 4.4871080139372817e-07, "loss": 1.1155, "step": 35360 }, { "epoch": 390.5077262693157, "learning_rate": 4.483972125435539e-07, "loss": 1.1676, "step": 35380 }, { "epoch": 390.72847682119203, "learning_rate": 4.4808362369337983e-07, "loss": 1.1616, "step": 35400 }, { "epoch": 390.9492273730684, "learning_rate": 4.4777003484320553e-07, "loss": 1.1607, "step": 35420 }, { "epoch": 391.1699779249448, "learning_rate": 4.4745644599303134e-07, "loss": 1.1482, "step": 35440 }, { "epoch": 391.3907284768212, "learning_rate": 4.471428571428571e-07, "loss": 1.1485, "step": 35460 }, { "epoch": 391.61147902869754, "learning_rate": 4.468292682926829e-07, "loss": 1.1441, "step": 35480 }, { "epoch": 391.83222958057394, "learning_rate": 4.465156794425086e-07, "loss": 1.2112, "step": 35500 }, { "epoch": 392.05298013245033, "learning_rate": 4.4620209059233445e-07, "loss": 1.1937, "step": 35520 }, { "epoch": 392.2737306843267, "learning_rate": 4.458885017421603e-07, "loss": 1.1341, "step": 35540 }, { "epoch": 392.4944812362031, "learning_rate": 4.45574912891986e-07, "loss": 1.1756, "step": 35560 }, { "epoch": 392.71523178807945, "learning_rate": 4.4526132404181187e-07, "loss": 1.1754, "step": 35580 }, { "epoch": 392.93598233995584, "learning_rate": 4.4494773519163756e-07, "loss": 1.1678, "step": 35600 }, { "epoch": 393.15673289183223, "learning_rate": 4.446341463414634e-07, "loss": 1.1252, "step": 35620 }, { "epoch": 393.3774834437086, "learning_rate": 4.4432055749128923e-07, "loss": 1.1646, "step": 35640 }, { "epoch": 393.59823399558496, "learning_rate": 4.44006968641115e-07, "loss": 1.1361, "step": 35660 }, { "epoch": 393.81898454746135, "learning_rate": 4.436933797909408e-07, "loss": 1.1557, "step": 35680 }, { "epoch": 394.03973509933775, "learning_rate": 4.433797909407665e-07, "loss": 1.1226, "step": 35700 }, { "epoch": 394.26048565121414, "learning_rate": 4.4306620209059234e-07, "loss": 1.1205, "step": 35720 }, { "epoch": 394.48123620309053, "learning_rate": 4.4275261324041804e-07, "loss": 1.1669, "step": 35740 }, { "epoch": 394.70198675496687, "learning_rate": 4.424390243902439e-07, "loss": 1.1618, "step": 35760 }, { "epoch": 394.92273730684326, "learning_rate": 4.421254355400697e-07, "loss": 1.1301, "step": 35780 }, { "epoch": 395.14348785871965, "learning_rate": 4.4181184668989545e-07, "loss": 1.171, "step": 35800 }, { "epoch": 395.36423841059604, "learning_rate": 4.4149825783972126e-07, "loss": 1.1389, "step": 35820 }, { "epoch": 395.5849889624724, "learning_rate": 4.41184668989547e-07, "loss": 1.1758, "step": 35840 }, { "epoch": 395.8057395143488, "learning_rate": 4.408710801393728e-07, "loss": 1.148, "step": 35860 }, { "epoch": 396.02649006622516, "learning_rate": 4.4055749128919857e-07, "loss": 1.1375, "step": 35880 }, { "epoch": 396.24724061810156, "learning_rate": 4.4024390243902437e-07, "loss": 1.1157, "step": 35900 }, { "epoch": 396.46799116997795, "learning_rate": 4.399303135888502e-07, "loss": 1.1181, "step": 35920 }, { "epoch": 396.6887417218543, "learning_rate": 4.3961672473867593e-07, "loss": 1.1487, "step": 35940 }, { "epoch": 396.9094922737307, "learning_rate": 4.393031358885017e-07, "loss": 1.1366, "step": 35960 }, { "epoch": 397.13024282560707, "learning_rate": 4.389895470383275e-07, "loss": 1.1268, "step": 35980 }, { "epoch": 397.35099337748346, "learning_rate": 4.386759581881533e-07, "loss": 1.1437, "step": 36000 }, { "epoch": 397.35099337748346, "eval_bleu": 46.8726, "eval_gen_len": 8.2, "eval_loss": 1.9691740274429321, "eval_runtime": 3.1722, "eval_samples_per_second": 9.457, "eval_steps_per_second": 1.891, "step": 36000 }, { "epoch": 397.5717439293598, "learning_rate": 4.3836236933797904e-07, "loss": 1.1699, "step": 36020 }, { "epoch": 397.7924944812362, "learning_rate": 4.3804878048780485e-07, "loss": 1.1739, "step": 36040 }, { "epoch": 398.0132450331126, "learning_rate": 4.377351916376307e-07, "loss": 1.1427, "step": 36060 }, { "epoch": 398.233995584989, "learning_rate": 4.374216027874564e-07, "loss": 1.1355, "step": 36080 }, { "epoch": 398.45474613686537, "learning_rate": 4.3710801393728226e-07, "loss": 1.1802, "step": 36100 }, { "epoch": 398.6754966887417, "learning_rate": 4.3679442508710796e-07, "loss": 1.171, "step": 36120 }, { "epoch": 398.8962472406181, "learning_rate": 4.3648083623693377e-07, "loss": 1.1463, "step": 36140 }, { "epoch": 399.1169977924945, "learning_rate": 4.361672473867595e-07, "loss": 1.1102, "step": 36160 }, { "epoch": 399.3377483443709, "learning_rate": 4.358536585365853e-07, "loss": 1.1136, "step": 36180 }, { "epoch": 399.5584988962472, "learning_rate": 4.355400696864112e-07, "loss": 1.1909, "step": 36200 }, { "epoch": 399.7792494481236, "learning_rate": 4.352264808362369e-07, "loss": 1.1672, "step": 36220 }, { "epoch": 400.0, "learning_rate": 4.3491289198606274e-07, "loss": 1.0953, "step": 36240 }, { "epoch": 400.2207505518764, "learning_rate": 4.3459930313588844e-07, "loss": 1.1247, "step": 36260 }, { "epoch": 400.4415011037528, "learning_rate": 4.342857142857143e-07, "loss": 1.169, "step": 36280 }, { "epoch": 400.6622516556291, "learning_rate": 4.3397212543554e-07, "loss": 1.1571, "step": 36300 }, { "epoch": 400.8830022075055, "learning_rate": 4.3365853658536585e-07, "loss": 1.2385, "step": 36320 }, { "epoch": 401.1037527593819, "learning_rate": 4.3334494773519166e-07, "loss": 1.1616, "step": 36340 }, { "epoch": 401.3245033112583, "learning_rate": 4.330313588850174e-07, "loss": 1.1949, "step": 36360 }, { "epoch": 401.54525386313463, "learning_rate": 4.327177700348432e-07, "loss": 1.185, "step": 36380 }, { "epoch": 401.766004415011, "learning_rate": 4.324041811846689e-07, "loss": 1.1708, "step": 36400 }, { "epoch": 401.9867549668874, "learning_rate": 4.320905923344948e-07, "loss": 1.1252, "step": 36420 }, { "epoch": 402.2075055187638, "learning_rate": 4.3177700348432047e-07, "loss": 1.1517, "step": 36440 }, { "epoch": 402.4282560706402, "learning_rate": 4.3146341463414633e-07, "loss": 1.1446, "step": 36460 }, { "epoch": 402.64900662251654, "learning_rate": 4.3114982578397213e-07, "loss": 1.1321, "step": 36480 }, { "epoch": 402.86975717439293, "learning_rate": 4.308362369337979e-07, "loss": 1.1393, "step": 36500 }, { "epoch": 403.0905077262693, "learning_rate": 4.305226480836237e-07, "loss": 1.1481, "step": 36520 }, { "epoch": 403.3112582781457, "learning_rate": 4.302090592334495e-07, "loss": 1.1189, "step": 36540 }, { "epoch": 403.53200883002205, "learning_rate": 4.2989547038327525e-07, "loss": 1.1586, "step": 36560 }, { "epoch": 403.75275938189844, "learning_rate": 4.29581881533101e-07, "loss": 1.184, "step": 36580 }, { "epoch": 403.97350993377484, "learning_rate": 4.292682926829268e-07, "loss": 1.1367, "step": 36600 }, { "epoch": 404.1942604856512, "learning_rate": 4.289547038327526e-07, "loss": 1.1961, "step": 36620 }, { "epoch": 404.4150110375276, "learning_rate": 4.2864111498257836e-07, "loss": 1.1672, "step": 36640 }, { "epoch": 404.63576158940396, "learning_rate": 4.2832752613240416e-07, "loss": 1.128, "step": 36660 }, { "epoch": 404.85651214128035, "learning_rate": 4.280139372822299e-07, "loss": 1.1529, "step": 36680 }, { "epoch": 405.07726269315674, "learning_rate": 4.277003484320557e-07, "loss": 1.1188, "step": 36700 }, { "epoch": 405.29801324503313, "learning_rate": 4.273867595818816e-07, "loss": 1.1708, "step": 36720 }, { "epoch": 405.51876379690947, "learning_rate": 4.270731707317073e-07, "loss": 1.1476, "step": 36740 }, { "epoch": 405.73951434878586, "learning_rate": 4.2675958188153314e-07, "loss": 1.1487, "step": 36760 }, { "epoch": 405.96026490066225, "learning_rate": 4.2644599303135883e-07, "loss": 1.1542, "step": 36780 }, { "epoch": 406.18101545253865, "learning_rate": 4.261324041811847e-07, "loss": 1.1383, "step": 36800 }, { "epoch": 406.40176600441504, "learning_rate": 4.258188153310104e-07, "loss": 1.1552, "step": 36820 }, { "epoch": 406.6225165562914, "learning_rate": 4.255052264808362e-07, "loss": 1.1357, "step": 36840 }, { "epoch": 406.84326710816777, "learning_rate": 4.2519163763066205e-07, "loss": 1.1506, "step": 36860 }, { "epoch": 407.06401766004416, "learning_rate": 4.2487804878048775e-07, "loss": 1.1682, "step": 36880 }, { "epoch": 407.28476821192055, "learning_rate": 4.245644599303136e-07, "loss": 1.1692, "step": 36900 }, { "epoch": 407.5055187637969, "learning_rate": 4.242508710801393e-07, "loss": 1.1206, "step": 36920 }, { "epoch": 407.7262693156733, "learning_rate": 4.2393728222996517e-07, "loss": 1.1806, "step": 36940 }, { "epoch": 407.94701986754967, "learning_rate": 4.2362369337979087e-07, "loss": 1.1506, "step": 36960 }, { "epoch": 408.16777041942606, "learning_rate": 4.233101045296167e-07, "loss": 1.1418, "step": 36980 }, { "epoch": 408.38852097130246, "learning_rate": 4.229965156794426e-07, "loss": 1.1389, "step": 37000 }, { "epoch": 408.6092715231788, "learning_rate": 4.226829268292683e-07, "loss": 1.1643, "step": 37020 }, { "epoch": 408.8300220750552, "learning_rate": 4.223693379790941e-07, "loss": 1.1332, "step": 37040 }, { "epoch": 409.0507726269316, "learning_rate": 4.2205574912891984e-07, "loss": 1.1497, "step": 37060 }, { "epoch": 409.27152317880797, "learning_rate": 4.2174216027874564e-07, "loss": 1.1002, "step": 37080 }, { "epoch": 409.4922737306843, "learning_rate": 4.2142857142857134e-07, "loss": 1.1924, "step": 37100 }, { "epoch": 409.7130242825607, "learning_rate": 4.2111498257839725e-07, "loss": 1.1697, "step": 37120 }, { "epoch": 409.9337748344371, "learning_rate": 4.20801393728223e-07, "loss": 1.1467, "step": 37140 }, { "epoch": 410.1545253863135, "learning_rate": 4.2048780487804876e-07, "loss": 1.1527, "step": 37160 }, { "epoch": 410.3752759381899, "learning_rate": 4.2017421602787456e-07, "loss": 1.1282, "step": 37180 }, { "epoch": 410.5960264900662, "learning_rate": 4.198606271777003e-07, "loss": 1.1604, "step": 37200 }, { "epoch": 410.8167770419426, "learning_rate": 4.195470383275261e-07, "loss": 1.1222, "step": 37220 }, { "epoch": 411.037527593819, "learning_rate": 4.192334494773519e-07, "loss": 1.1339, "step": 37240 }, { "epoch": 411.2582781456954, "learning_rate": 4.189198606271777e-07, "loss": 1.1465, "step": 37260 }, { "epoch": 411.4790286975717, "learning_rate": 4.186062717770035e-07, "loss": 1.1561, "step": 37280 }, { "epoch": 411.6997792494481, "learning_rate": 4.1829268292682923e-07, "loss": 1.1597, "step": 37300 }, { "epoch": 411.9205298013245, "learning_rate": 4.1797909407665504e-07, "loss": 1.1627, "step": 37320 }, { "epoch": 412.1412803532009, "learning_rate": 4.176655052264808e-07, "loss": 1.1128, "step": 37340 }, { "epoch": 412.3620309050773, "learning_rate": 4.173519163763066e-07, "loss": 1.1165, "step": 37360 }, { "epoch": 412.5827814569536, "learning_rate": 4.1703832752613234e-07, "loss": 1.1412, "step": 37380 }, { "epoch": 412.80353200883, "learning_rate": 4.1672473867595815e-07, "loss": 1.1862, "step": 37400 }, { "epoch": 413.0242825607064, "learning_rate": 4.16411149825784e-07, "loss": 1.1466, "step": 37420 }, { "epoch": 413.2450331125828, "learning_rate": 4.160975609756097e-07, "loss": 1.1462, "step": 37440 }, { "epoch": 413.46578366445914, "learning_rate": 4.1578397212543556e-07, "loss": 1.1418, "step": 37460 }, { "epoch": 413.68653421633553, "learning_rate": 4.1547038327526126e-07, "loss": 1.1285, "step": 37480 }, { "epoch": 413.9072847682119, "learning_rate": 4.151567944250871e-07, "loss": 1.1471, "step": 37500 }, { "epoch": 414.1280353200883, "learning_rate": 4.148432055749128e-07, "loss": 1.1541, "step": 37520 }, { "epoch": 414.3487858719647, "learning_rate": 4.145296167247386e-07, "loss": 1.1447, "step": 37540 }, { "epoch": 414.56953642384104, "learning_rate": 4.142160278745645e-07, "loss": 1.1477, "step": 37560 }, { "epoch": 414.79028697571744, "learning_rate": 4.139024390243902e-07, "loss": 1.1384, "step": 37580 }, { "epoch": 415.0110375275938, "learning_rate": 4.1358885017421604e-07, "loss": 1.1892, "step": 37600 }, { "epoch": 415.2317880794702, "learning_rate": 4.1327526132404174e-07, "loss": 1.128, "step": 37620 }, { "epoch": 415.45253863134656, "learning_rate": 4.129616724738676e-07, "loss": 1.1521, "step": 37640 }, { "epoch": 415.67328918322295, "learning_rate": 4.126480836236934e-07, "loss": 1.127, "step": 37660 }, { "epoch": 415.89403973509934, "learning_rate": 4.1233449477351915e-07, "loss": 1.1564, "step": 37680 }, { "epoch": 416.11479028697573, "learning_rate": 4.12020905923345e-07, "loss": 1.1432, "step": 37700 }, { "epoch": 416.3355408388521, "learning_rate": 4.117073170731707e-07, "loss": 1.1652, "step": 37720 }, { "epoch": 416.55629139072846, "learning_rate": 4.113937282229965e-07, "loss": 1.1644, "step": 37740 }, { "epoch": 416.77704194260485, "learning_rate": 4.1108013937282227e-07, "loss": 1.1459, "step": 37760 }, { "epoch": 416.99779249448125, "learning_rate": 4.1076655052264807e-07, "loss": 1.148, "step": 37780 }, { "epoch": 417.21854304635764, "learning_rate": 4.104529616724739e-07, "loss": 1.1711, "step": 37800 }, { "epoch": 417.439293598234, "learning_rate": 4.101393728222996e-07, "loss": 1.142, "step": 37820 }, { "epoch": 417.66004415011037, "learning_rate": 4.0982578397212543e-07, "loss": 1.1569, "step": 37840 }, { "epoch": 417.88079470198676, "learning_rate": 4.095121951219512e-07, "loss": 1.1337, "step": 37860 }, { "epoch": 418.10154525386315, "learning_rate": 4.09198606271777e-07, "loss": 1.1481, "step": 37880 }, { "epoch": 418.3222958057395, "learning_rate": 4.0888501742160274e-07, "loss": 1.1402, "step": 37900 }, { "epoch": 418.5430463576159, "learning_rate": 4.0857142857142855e-07, "loss": 1.143, "step": 37920 }, { "epoch": 418.76379690949227, "learning_rate": 4.082578397212544e-07, "loss": 1.1103, "step": 37940 }, { "epoch": 418.98454746136866, "learning_rate": 4.079442508710801e-07, "loss": 1.1221, "step": 37960 }, { "epoch": 419.20529801324506, "learning_rate": 4.0763066202090596e-07, "loss": 1.1496, "step": 37980 }, { "epoch": 419.4260485651214, "learning_rate": 4.0731707317073166e-07, "loss": 1.1512, "step": 38000 }, { "epoch": 419.6467991169978, "learning_rate": 4.0700348432055747e-07, "loss": 1.1456, "step": 38020 }, { "epoch": 419.8675496688742, "learning_rate": 4.066898954703832e-07, "loss": 1.1294, "step": 38040 }, { "epoch": 420.08830022075057, "learning_rate": 4.06376306620209e-07, "loss": 1.1663, "step": 38060 }, { "epoch": 420.3090507726269, "learning_rate": 4.060627177700349e-07, "loss": 1.1684, "step": 38080 }, { "epoch": 420.5298013245033, "learning_rate": 4.057491289198606e-07, "loss": 1.1241, "step": 38100 }, { "epoch": 420.7505518763797, "learning_rate": 4.0543554006968644e-07, "loss": 1.1597, "step": 38120 }, { "epoch": 420.9713024282561, "learning_rate": 4.0512195121951214e-07, "loss": 1.1643, "step": 38140 }, { "epoch": 421.1920529801325, "learning_rate": 4.04808362369338e-07, "loss": 1.1592, "step": 38160 }, { "epoch": 421.4128035320088, "learning_rate": 4.044947735191637e-07, "loss": 1.1483, "step": 38180 }, { "epoch": 421.6335540838852, "learning_rate": 4.0418118466898955e-07, "loss": 1.1539, "step": 38200 }, { "epoch": 421.8543046357616, "learning_rate": 4.0386759581881536e-07, "loss": 1.1445, "step": 38220 }, { "epoch": 422.075055187638, "learning_rate": 4.035540069686411e-07, "loss": 1.1763, "step": 38240 }, { "epoch": 422.2958057395143, "learning_rate": 4.032404181184669e-07, "loss": 1.1422, "step": 38260 }, { "epoch": 422.5165562913907, "learning_rate": 4.029268292682926e-07, "loss": 1.1585, "step": 38280 }, { "epoch": 422.7373068432671, "learning_rate": 4.0261324041811847e-07, "loss": 1.1285, "step": 38300 }, { "epoch": 422.9580573951435, "learning_rate": 4.0229965156794417e-07, "loss": 1.1413, "step": 38320 }, { "epoch": 423.1788079470199, "learning_rate": 4.0198606271777e-07, "loss": 1.1166, "step": 38340 }, { "epoch": 423.3995584988962, "learning_rate": 4.0167247386759583e-07, "loss": 1.1441, "step": 38360 }, { "epoch": 423.6203090507726, "learning_rate": 4.013588850174216e-07, "loss": 1.1839, "step": 38380 }, { "epoch": 423.841059602649, "learning_rate": 4.0104529616724733e-07, "loss": 1.1659, "step": 38400 }, { "epoch": 424.0618101545254, "learning_rate": 4.0073170731707314e-07, "loss": 1.1344, "step": 38420 }, { "epoch": 424.28256070640174, "learning_rate": 4.0041811846689894e-07, "loss": 1.1552, "step": 38440 }, { "epoch": 424.50331125827813, "learning_rate": 4.001045296167247e-07, "loss": 1.134, "step": 38460 }, { "epoch": 424.7240618101545, "learning_rate": 3.997909407665505e-07, "loss": 1.1405, "step": 38480 }, { "epoch": 424.9448123620309, "learning_rate": 3.994773519163763e-07, "loss": 1.1448, "step": 38500 }, { "epoch": 425.1655629139073, "learning_rate": 3.99163763066202e-07, "loss": 1.1329, "step": 38520 }, { "epoch": 425.38631346578364, "learning_rate": 3.9885017421602786e-07, "loss": 1.1272, "step": 38540 }, { "epoch": 425.60706401766004, "learning_rate": 3.985365853658536e-07, "loss": 1.1722, "step": 38560 }, { "epoch": 425.82781456953643, "learning_rate": 3.982229965156794e-07, "loss": 1.1337, "step": 38580 }, { "epoch": 426.0485651214128, "learning_rate": 3.9790940766550517e-07, "loss": 1.1551, "step": 38600 }, { "epoch": 426.26931567328916, "learning_rate": 3.97595818815331e-07, "loss": 1.1655, "step": 38620 }, { "epoch": 426.49006622516555, "learning_rate": 3.9728222996515683e-07, "loss": 1.1145, "step": 38640 }, { "epoch": 426.71081677704194, "learning_rate": 3.9696864111498253e-07, "loss": 1.1747, "step": 38660 }, { "epoch": 426.93156732891833, "learning_rate": 3.966550522648084e-07, "loss": 1.1222, "step": 38680 }, { "epoch": 427.1523178807947, "learning_rate": 3.963414634146341e-07, "loss": 1.1597, "step": 38700 }, { "epoch": 427.37306843267106, "learning_rate": 3.960278745644599e-07, "loss": 1.1525, "step": 38720 }, { "epoch": 427.59381898454745, "learning_rate": 3.9571428571428575e-07, "loss": 1.1365, "step": 38740 }, { "epoch": 427.81456953642385, "learning_rate": 3.9540069686411145e-07, "loss": 1.144, "step": 38760 }, { "epoch": 428.03532008830024, "learning_rate": 3.950871080139373e-07, "loss": 1.117, "step": 38780 }, { "epoch": 428.2560706401766, "learning_rate": 3.94773519163763e-07, "loss": 1.1474, "step": 38800 }, { "epoch": 428.47682119205297, "learning_rate": 3.9445993031358887e-07, "loss": 1.1054, "step": 38820 }, { "epoch": 428.69757174392936, "learning_rate": 3.9414634146341456e-07, "loss": 1.1707, "step": 38840 }, { "epoch": 428.91832229580575, "learning_rate": 3.938327526132404e-07, "loss": 1.1234, "step": 38860 }, { "epoch": 429.13907284768214, "learning_rate": 3.9351916376306623e-07, "loss": 1.1648, "step": 38880 }, { "epoch": 429.3598233995585, "learning_rate": 3.93205574912892e-07, "loss": 1.1243, "step": 38900 }, { "epoch": 429.58057395143487, "learning_rate": 3.928919860627178e-07, "loss": 1.1617, "step": 38920 }, { "epoch": 429.80132450331126, "learning_rate": 3.9257839721254354e-07, "loss": 1.1192, "step": 38940 }, { "epoch": 430.02207505518766, "learning_rate": 3.9226480836236934e-07, "loss": 1.159, "step": 38960 }, { "epoch": 430.242825607064, "learning_rate": 3.9195121951219504e-07, "loss": 1.1673, "step": 38980 }, { "epoch": 430.4635761589404, "learning_rate": 3.916376306620209e-07, "loss": 1.1815, "step": 39000 }, { "epoch": 430.6843267108168, "learning_rate": 3.913240418118467e-07, "loss": 1.1173, "step": 39020 }, { "epoch": 430.90507726269317, "learning_rate": 3.9101045296167245e-07, "loss": 1.1484, "step": 39040 }, { "epoch": 431.12582781456956, "learning_rate": 3.9069686411149826e-07, "loss": 1.1731, "step": 39060 }, { "epoch": 431.3465783664459, "learning_rate": 3.90383275261324e-07, "loss": 1.1409, "step": 39080 }, { "epoch": 431.5673289183223, "learning_rate": 3.9006968641114976e-07, "loss": 1.1594, "step": 39100 }, { "epoch": 431.7880794701987, "learning_rate": 3.8975609756097557e-07, "loss": 1.1579, "step": 39120 }, { "epoch": 432.0088300220751, "learning_rate": 3.8944250871080137e-07, "loss": 1.1521, "step": 39140 }, { "epoch": 432.2295805739514, "learning_rate": 3.891289198606272e-07, "loss": 1.1229, "step": 39160 }, { "epoch": 432.4503311258278, "learning_rate": 3.8881533101045293e-07, "loss": 1.1652, "step": 39180 }, { "epoch": 432.6710816777042, "learning_rate": 3.8850174216027873e-07, "loss": 1.1542, "step": 39200 }, { "epoch": 432.8918322295806, "learning_rate": 3.8818815331010443e-07, "loss": 1.088, "step": 39220 }, { "epoch": 433.112582781457, "learning_rate": 3.878745644599303e-07, "loss": 1.1239, "step": 39240 }, { "epoch": 433.3333333333333, "learning_rate": 3.8756097560975604e-07, "loss": 1.1119, "step": 39260 }, { "epoch": 433.5540838852097, "learning_rate": 3.8724738675958185e-07, "loss": 1.1393, "step": 39280 }, { "epoch": 433.7748344370861, "learning_rate": 3.869337979094077e-07, "loss": 1.1361, "step": 39300 }, { "epoch": 433.9955849889625, "learning_rate": 3.866202090592334e-07, "loss": 1.1372, "step": 39320 }, { "epoch": 434.2163355408388, "learning_rate": 3.8630662020905926e-07, "loss": 1.1554, "step": 39340 }, { "epoch": 434.4370860927152, "learning_rate": 3.8599303135888496e-07, "loss": 1.1433, "step": 39360 }, { "epoch": 434.6578366445916, "learning_rate": 3.856794425087108e-07, "loss": 1.1224, "step": 39380 }, { "epoch": 434.878587196468, "learning_rate": 3.853658536585365e-07, "loss": 1.1458, "step": 39400 }, { "epoch": 435.0993377483444, "learning_rate": 3.850522648083623e-07, "loss": 1.0863, "step": 39420 }, { "epoch": 435.32008830022073, "learning_rate": 3.847386759581882e-07, "loss": 1.1512, "step": 39440 }, { "epoch": 435.5408388520971, "learning_rate": 3.844250871080139e-07, "loss": 1.147, "step": 39460 }, { "epoch": 435.7615894039735, "learning_rate": 3.8411149825783974e-07, "loss": 1.1392, "step": 39480 }, { "epoch": 435.9823399558499, "learning_rate": 3.8379790940766544e-07, "loss": 1.1789, "step": 39500 }, { "epoch": 436.20309050772624, "learning_rate": 3.834843205574913e-07, "loss": 1.1395, "step": 39520 }, { "epoch": 436.42384105960264, "learning_rate": 3.83170731707317e-07, "loss": 1.1263, "step": 39540 }, { "epoch": 436.64459161147903, "learning_rate": 3.828571428571429e-07, "loss": 1.13, "step": 39560 }, { "epoch": 436.8653421633554, "learning_rate": 3.8254355400696866e-07, "loss": 1.1497, "step": 39580 }, { "epoch": 437.0860927152318, "learning_rate": 3.822299651567944e-07, "loss": 1.156, "step": 39600 }, { "epoch": 437.0860927152318, "eval_bleu": 48.7386, "eval_gen_len": 8.4333, "eval_loss": 1.9549425840377808, "eval_runtime": 3.2851, "eval_samples_per_second": 9.132, "eval_steps_per_second": 1.826, "step": 39600 }, { "epoch": 437.30684326710815, "learning_rate": 3.819163763066202e-07, "loss": 1.177, "step": 39620 }, { "epoch": 437.52759381898454, "learning_rate": 3.8160278745644597e-07, "loss": 1.1015, "step": 39640 }, { "epoch": 437.74834437086093, "learning_rate": 3.8128919860627177e-07, "loss": 1.1403, "step": 39660 }, { "epoch": 437.9690949227373, "learning_rate": 3.809756097560976e-07, "loss": 1.144, "step": 39680 }, { "epoch": 438.18984547461366, "learning_rate": 3.8066202090592333e-07, "loss": 1.1744, "step": 39700 }, { "epoch": 438.41059602649005, "learning_rate": 3.8034843205574913e-07, "loss": 1.1481, "step": 39720 }, { "epoch": 438.63134657836645, "learning_rate": 3.800348432055749e-07, "loss": 1.1002, "step": 39740 }, { "epoch": 438.85209713024284, "learning_rate": 3.797212543554007e-07, "loss": 1.1463, "step": 39760 }, { "epoch": 439.07284768211923, "learning_rate": 3.7940766550522644e-07, "loss": 1.1197, "step": 39780 }, { "epoch": 439.29359823399557, "learning_rate": 3.7909407665505225e-07, "loss": 1.1338, "step": 39800 }, { "epoch": 439.51434878587196, "learning_rate": 3.787804878048781e-07, "loss": 1.174, "step": 39820 }, { "epoch": 439.73509933774835, "learning_rate": 3.784668989547038e-07, "loss": 1.1651, "step": 39840 }, { "epoch": 439.95584988962474, "learning_rate": 3.781533101045296e-07, "loss": 1.1388, "step": 39860 }, { "epoch": 440.1766004415011, "learning_rate": 3.7783972125435536e-07, "loss": 1.1651, "step": 39880 }, { "epoch": 440.3973509933775, "learning_rate": 3.7752613240418116e-07, "loss": 1.1577, "step": 39900 }, { "epoch": 440.61810154525386, "learning_rate": 3.772125435540069e-07, "loss": 1.1453, "step": 39920 }, { "epoch": 440.83885209713026, "learning_rate": 3.768989547038327e-07, "loss": 1.1037, "step": 39940 }, { "epoch": 441.05960264900665, "learning_rate": 3.765853658536586e-07, "loss": 1.1393, "step": 39960 }, { "epoch": 441.280353200883, "learning_rate": 3.762717770034843e-07, "loss": 1.1566, "step": 39980 }, { "epoch": 441.5011037527594, "learning_rate": 3.7595818815331014e-07, "loss": 1.0984, "step": 40000 }, { "epoch": 441.72185430463577, "learning_rate": 3.7564459930313583e-07, "loss": 1.1659, "step": 40020 }, { "epoch": 441.94260485651216, "learning_rate": 3.753310104529617e-07, "loss": 1.1299, "step": 40040 }, { "epoch": 442.1633554083885, "learning_rate": 3.750174216027874e-07, "loss": 1.1503, "step": 40060 }, { "epoch": 442.3841059602649, "learning_rate": 3.7470383275261325e-07, "loss": 1.1451, "step": 40080 }, { "epoch": 442.6048565121413, "learning_rate": 3.7439024390243905e-07, "loss": 1.1444, "step": 40100 }, { "epoch": 442.8256070640177, "learning_rate": 3.7407665505226475e-07, "loss": 1.1223, "step": 40120 }, { "epoch": 443.04635761589407, "learning_rate": 3.7376306620209066e-07, "loss": 1.1695, "step": 40140 }, { "epoch": 443.2671081677704, "learning_rate": 3.734494773519163e-07, "loss": 1.1616, "step": 40160 }, { "epoch": 443.4878587196468, "learning_rate": 3.7313588850174217e-07, "loss": 1.1511, "step": 40180 }, { "epoch": 443.7086092715232, "learning_rate": 3.7282229965156787e-07, "loss": 1.1121, "step": 40200 }, { "epoch": 443.9293598233996, "learning_rate": 3.725087108013937e-07, "loss": 1.121, "step": 40220 }, { "epoch": 444.1501103752759, "learning_rate": 3.7219512195121953e-07, "loss": 1.1332, "step": 40240 }, { "epoch": 444.3708609271523, "learning_rate": 3.7188153310104533e-07, "loss": 1.1323, "step": 40260 }, { "epoch": 444.5916114790287, "learning_rate": 3.715679442508711e-07, "loss": 1.1534, "step": 40280 }, { "epoch": 444.8123620309051, "learning_rate": 3.7125435540069684e-07, "loss": 1.1098, "step": 40300 }, { "epoch": 445.0331125827815, "learning_rate": 3.7094076655052264e-07, "loss": 1.1334, "step": 40320 }, { "epoch": 445.2538631346578, "learning_rate": 3.706271777003484e-07, "loss": 1.1431, "step": 40340 }, { "epoch": 445.4746136865342, "learning_rate": 3.703135888501742e-07, "loss": 1.1335, "step": 40360 }, { "epoch": 445.6953642384106, "learning_rate": 3.7e-07, "loss": 1.1261, "step": 40380 }, { "epoch": 445.916114790287, "learning_rate": 3.6968641114982576e-07, "loss": 1.115, "step": 40400 }, { "epoch": 446.13686534216333, "learning_rate": 3.6937282229965156e-07, "loss": 1.1489, "step": 40420 }, { "epoch": 446.3576158940397, "learning_rate": 3.690592334494773e-07, "loss": 1.1683, "step": 40440 }, { "epoch": 446.5783664459161, "learning_rate": 3.687456445993031e-07, "loss": 1.1336, "step": 40460 }, { "epoch": 446.7991169977925, "learning_rate": 3.6843205574912887e-07, "loss": 1.1252, "step": 40480 }, { "epoch": 447.0198675496689, "learning_rate": 3.681184668989547e-07, "loss": 1.1317, "step": 40500 }, { "epoch": 447.24061810154524, "learning_rate": 3.6780487804878053e-07, "loss": 1.1273, "step": 40520 }, { "epoch": 447.46136865342163, "learning_rate": 3.6749128919860623e-07, "loss": 1.1509, "step": 40540 }, { "epoch": 447.682119205298, "learning_rate": 3.6717770034843204e-07, "loss": 1.1495, "step": 40560 }, { "epoch": 447.9028697571744, "learning_rate": 3.668641114982578e-07, "loss": 1.1304, "step": 40580 }, { "epoch": 448.12362030905075, "learning_rate": 3.665505226480836e-07, "loss": 1.1187, "step": 40600 }, { "epoch": 448.34437086092714, "learning_rate": 3.6623693379790935e-07, "loss": 1.1221, "step": 40620 }, { "epoch": 448.56512141280353, "learning_rate": 3.6592334494773515e-07, "loss": 1.1638, "step": 40640 }, { "epoch": 448.7858719646799, "learning_rate": 3.65609756097561e-07, "loss": 1.1179, "step": 40660 }, { "epoch": 449.0066225165563, "learning_rate": 3.652961672473867e-07, "loss": 1.131, "step": 40680 }, { "epoch": 449.22737306843266, "learning_rate": 3.6498257839721256e-07, "loss": 1.1289, "step": 40700 }, { "epoch": 449.44812362030905, "learning_rate": 3.6466898954703826e-07, "loss": 1.1436, "step": 40720 }, { "epoch": 449.66887417218544, "learning_rate": 3.643554006968641e-07, "loss": 1.1178, "step": 40740 }, { "epoch": 449.88962472406183, "learning_rate": 3.640418118466899e-07, "loss": 1.1459, "step": 40760 }, { "epoch": 450.11037527593817, "learning_rate": 3.637282229965157e-07, "loss": 1.1443, "step": 40780 }, { "epoch": 450.33112582781456, "learning_rate": 3.634146341463415e-07, "loss": 1.1279, "step": 40800 }, { "epoch": 450.55187637969095, "learning_rate": 3.631010452961672e-07, "loss": 1.1468, "step": 40820 }, { "epoch": 450.77262693156734, "learning_rate": 3.627874564459931e-07, "loss": 1.1435, "step": 40840 }, { "epoch": 450.9933774834437, "learning_rate": 3.6247386759581874e-07, "loss": 1.189, "step": 40860 }, { "epoch": 451.2141280353201, "learning_rate": 3.621602787456446e-07, "loss": 1.1308, "step": 40880 }, { "epoch": 451.43487858719647, "learning_rate": 3.618466898954704e-07, "loss": 1.1471, "step": 40900 }, { "epoch": 451.65562913907286, "learning_rate": 3.6153310104529615e-07, "loss": 1.1316, "step": 40920 }, { "epoch": 451.87637969094925, "learning_rate": 3.6121951219512196e-07, "loss": 1.1592, "step": 40940 }, { "epoch": 452.0971302428256, "learning_rate": 3.6090592334494766e-07, "loss": 1.1262, "step": 40960 }, { "epoch": 452.317880794702, "learning_rate": 3.605923344947735e-07, "loss": 1.1274, "step": 40980 }, { "epoch": 452.53863134657837, "learning_rate": 3.6027874564459927e-07, "loss": 1.1403, "step": 41000 }, { "epoch": 452.75938189845476, "learning_rate": 3.5996515679442507e-07, "loss": 1.1721, "step": 41020 }, { "epoch": 452.9801324503311, "learning_rate": 3.596515679442509e-07, "loss": 1.1051, "step": 41040 }, { "epoch": 453.2008830022075, "learning_rate": 3.5933797909407663e-07, "loss": 1.1413, "step": 41060 }, { "epoch": 453.4216335540839, "learning_rate": 3.5902439024390243e-07, "loss": 1.1231, "step": 41080 }, { "epoch": 453.6423841059603, "learning_rate": 3.587108013937282e-07, "loss": 1.1572, "step": 41100 }, { "epoch": 453.86313465783667, "learning_rate": 3.58397212543554e-07, "loss": 1.121, "step": 41120 }, { "epoch": 454.083885209713, "learning_rate": 3.5808362369337974e-07, "loss": 1.1234, "step": 41140 }, { "epoch": 454.3046357615894, "learning_rate": 3.5777003484320555e-07, "loss": 1.1248, "step": 41160 }, { "epoch": 454.5253863134658, "learning_rate": 3.574564459930314e-07, "loss": 1.1286, "step": 41180 }, { "epoch": 454.7461368653422, "learning_rate": 3.571428571428571e-07, "loss": 1.1519, "step": 41200 }, { "epoch": 454.9668874172185, "learning_rate": 3.5682926829268296e-07, "loss": 1.1209, "step": 41220 }, { "epoch": 455.1876379690949, "learning_rate": 3.5651567944250866e-07, "loss": 1.1182, "step": 41240 }, { "epoch": 455.4083885209713, "learning_rate": 3.5620209059233447e-07, "loss": 1.1495, "step": 41260 }, { "epoch": 455.6291390728477, "learning_rate": 3.558885017421602e-07, "loss": 1.1124, "step": 41280 }, { "epoch": 455.8498896247241, "learning_rate": 3.55574912891986e-07, "loss": 1.1193, "step": 41300 }, { "epoch": 456.0706401766004, "learning_rate": 3.552613240418119e-07, "loss": 1.1411, "step": 41320 }, { "epoch": 456.2913907284768, "learning_rate": 3.549477351916376e-07, "loss": 1.1061, "step": 41340 }, { "epoch": 456.5121412803532, "learning_rate": 3.5463414634146344e-07, "loss": 1.1603, "step": 41360 }, { "epoch": 456.7328918322296, "learning_rate": 3.5432055749128914e-07, "loss": 1.0956, "step": 41380 }, { "epoch": 456.95364238410593, "learning_rate": 3.54006968641115e-07, "loss": 1.1584, "step": 41400 }, { "epoch": 457.1743929359823, "learning_rate": 3.536933797909407e-07, "loss": 1.1137, "step": 41420 }, { "epoch": 457.3951434878587, "learning_rate": 3.5337979094076655e-07, "loss": 1.1808, "step": 41440 }, { "epoch": 457.6158940397351, "learning_rate": 3.5306620209059236e-07, "loss": 1.1562, "step": 41460 }, { "epoch": 457.8366445916115, "learning_rate": 3.527526132404181e-07, "loss": 1.0854, "step": 41480 }, { "epoch": 458.05739514348784, "learning_rate": 3.524390243902439e-07, "loss": 1.1344, "step": 41500 }, { "epoch": 458.27814569536423, "learning_rate": 3.521254355400696e-07, "loss": 1.154, "step": 41520 }, { "epoch": 458.4988962472406, "learning_rate": 3.518118466898954e-07, "loss": 1.1108, "step": 41540 }, { "epoch": 458.719646799117, "learning_rate": 3.5149825783972117e-07, "loss": 1.1306, "step": 41560 }, { "epoch": 458.94039735099335, "learning_rate": 3.51184668989547e-07, "loss": 1.1684, "step": 41580 }, { "epoch": 459.16114790286974, "learning_rate": 3.5087108013937283e-07, "loss": 1.1056, "step": 41600 }, { "epoch": 459.38189845474614, "learning_rate": 3.505574912891986e-07, "loss": 1.125, "step": 41620 }, { "epoch": 459.6026490066225, "learning_rate": 3.502439024390244e-07, "loss": 1.1251, "step": 41640 }, { "epoch": 459.8233995584989, "learning_rate": 3.499303135888501e-07, "loss": 1.1147, "step": 41660 }, { "epoch": 460.04415011037526, "learning_rate": 3.4961672473867594e-07, "loss": 1.1571, "step": 41680 }, { "epoch": 460.26490066225165, "learning_rate": 3.493031358885017e-07, "loss": 1.1576, "step": 41700 }, { "epoch": 460.48565121412804, "learning_rate": 3.489895470383275e-07, "loss": 1.1273, "step": 41720 }, { "epoch": 460.70640176600443, "learning_rate": 3.486759581881533e-07, "loss": 1.1376, "step": 41740 }, { "epoch": 460.92715231788077, "learning_rate": 3.4836236933797906e-07, "loss": 1.1215, "step": 41760 }, { "epoch": 461.14790286975716, "learning_rate": 3.4804878048780486e-07, "loss": 1.1083, "step": 41780 }, { "epoch": 461.36865342163355, "learning_rate": 3.477351916376306e-07, "loss": 1.1667, "step": 41800 }, { "epoch": 461.58940397350995, "learning_rate": 3.474216027874564e-07, "loss": 1.1311, "step": 41820 }, { "epoch": 461.81015452538634, "learning_rate": 3.471080139372823e-07, "loss": 1.1249, "step": 41840 }, { "epoch": 462.0309050772627, "learning_rate": 3.46794425087108e-07, "loss": 1.103, "step": 41860 }, { "epoch": 462.25165562913907, "learning_rate": 3.4648083623693383e-07, "loss": 1.1352, "step": 41880 }, { "epoch": 462.47240618101546, "learning_rate": 3.4616724738675953e-07, "loss": 1.1752, "step": 41900 }, { "epoch": 462.69315673289185, "learning_rate": 3.458536585365854e-07, "loss": 1.1267, "step": 41920 }, { "epoch": 462.9139072847682, "learning_rate": 3.455400696864111e-07, "loss": 1.0625, "step": 41940 }, { "epoch": 463.1346578366446, "learning_rate": 3.4522648083623695e-07, "loss": 1.1038, "step": 41960 }, { "epoch": 463.35540838852097, "learning_rate": 3.4491289198606275e-07, "loss": 1.1499, "step": 41980 }, { "epoch": 463.57615894039736, "learning_rate": 3.4459930313588845e-07, "loss": 1.1417, "step": 42000 }, { "epoch": 463.79690949227376, "learning_rate": 3.442857142857143e-07, "loss": 1.1291, "step": 42020 }, { "epoch": 464.0176600441501, "learning_rate": 3.4397212543554e-07, "loss": 1.142, "step": 42040 }, { "epoch": 464.2384105960265, "learning_rate": 3.4365853658536587e-07, "loss": 1.0993, "step": 42060 }, { "epoch": 464.4591611479029, "learning_rate": 3.4334494773519157e-07, "loss": 1.1713, "step": 42080 }, { "epoch": 464.67991169977927, "learning_rate": 3.430313588850174e-07, "loss": 1.1404, "step": 42100 }, { "epoch": 464.9006622516556, "learning_rate": 3.427177700348432e-07, "loss": 1.1187, "step": 42120 }, { "epoch": 465.121412803532, "learning_rate": 3.42404181184669e-07, "loss": 1.1665, "step": 42140 }, { "epoch": 465.3421633554084, "learning_rate": 3.420905923344948e-07, "loss": 1.1548, "step": 42160 }, { "epoch": 465.5629139072848, "learning_rate": 3.4177700348432054e-07, "loss": 1.1313, "step": 42180 }, { "epoch": 465.7836644591612, "learning_rate": 3.4146341463414634e-07, "loss": 1.1037, "step": 42200 }, { "epoch": 466.0044150110375, "learning_rate": 3.411498257839721e-07, "loss": 1.1544, "step": 42220 }, { "epoch": 466.2251655629139, "learning_rate": 3.4083623693379785e-07, "loss": 1.1605, "step": 42240 }, { "epoch": 466.4459161147903, "learning_rate": 3.405226480836237e-07, "loss": 1.1331, "step": 42260 }, { "epoch": 466.6666666666667, "learning_rate": 3.4020905923344946e-07, "loss": 1.1138, "step": 42280 }, { "epoch": 466.887417218543, "learning_rate": 3.3989547038327526e-07, "loss": 1.1152, "step": 42300 }, { "epoch": 467.1081677704194, "learning_rate": 3.39581881533101e-07, "loss": 1.1406, "step": 42320 }, { "epoch": 467.3289183222958, "learning_rate": 3.392682926829268e-07, "loss": 1.1333, "step": 42340 }, { "epoch": 467.5496688741722, "learning_rate": 3.389547038327525e-07, "loss": 1.1324, "step": 42360 }, { "epoch": 467.7704194260486, "learning_rate": 3.386411149825784e-07, "loss": 1.1196, "step": 42380 }, { "epoch": 467.9911699779249, "learning_rate": 3.383275261324042e-07, "loss": 1.1067, "step": 42400 }, { "epoch": 468.2119205298013, "learning_rate": 3.3801393728222993e-07, "loss": 1.1315, "step": 42420 }, { "epoch": 468.4326710816777, "learning_rate": 3.377003484320558e-07, "loss": 1.1629, "step": 42440 }, { "epoch": 468.6534216335541, "learning_rate": 3.373867595818815e-07, "loss": 1.1046, "step": 42460 }, { "epoch": 468.87417218543044, "learning_rate": 3.370731707317073e-07, "loss": 1.1401, "step": 42480 }, { "epoch": 469.09492273730683, "learning_rate": 3.3675958188153304e-07, "loss": 1.1215, "step": 42500 }, { "epoch": 469.3156732891832, "learning_rate": 3.364459930313589e-07, "loss": 1.1688, "step": 42520 }, { "epoch": 469.5364238410596, "learning_rate": 3.3613240418118465e-07, "loss": 1.1017, "step": 42540 }, { "epoch": 469.757174392936, "learning_rate": 3.358188153310104e-07, "loss": 1.1356, "step": 42560 }, { "epoch": 469.97792494481234, "learning_rate": 3.3550522648083626e-07, "loss": 1.1307, "step": 42580 }, { "epoch": 470.19867549668874, "learning_rate": 3.3519163763066196e-07, "loss": 1.1366, "step": 42600 }, { "epoch": 470.41942604856513, "learning_rate": 3.3487804878048777e-07, "loss": 1.1425, "step": 42620 }, { "epoch": 470.6401766004415, "learning_rate": 3.345644599303135e-07, "loss": 1.1551, "step": 42640 }, { "epoch": 470.86092715231786, "learning_rate": 3.342508710801394e-07, "loss": 1.1276, "step": 42660 }, { "epoch": 471.08167770419425, "learning_rate": 3.339372822299652e-07, "loss": 1.1544, "step": 42680 }, { "epoch": 471.30242825607064, "learning_rate": 3.336236933797909e-07, "loss": 1.1279, "step": 42700 }, { "epoch": 471.52317880794703, "learning_rate": 3.3331010452961674e-07, "loss": 1.1445, "step": 42720 }, { "epoch": 471.7439293598234, "learning_rate": 3.329965156794425e-07, "loss": 1.1409, "step": 42740 }, { "epoch": 471.96467991169976, "learning_rate": 3.326829268292683e-07, "loss": 1.1386, "step": 42760 }, { "epoch": 472.18543046357615, "learning_rate": 3.323693379790941e-07, "loss": 1.1302, "step": 42780 }, { "epoch": 472.40618101545255, "learning_rate": 3.3205574912891985e-07, "loss": 1.1509, "step": 42800 }, { "epoch": 472.62693156732894, "learning_rate": 3.3174216027874566e-07, "loss": 1.1168, "step": 42820 }, { "epoch": 472.8476821192053, "learning_rate": 3.3142857142857136e-07, "loss": 1.095, "step": 42840 }, { "epoch": 473.06843267108167, "learning_rate": 3.311149825783972e-07, "loss": 1.1584, "step": 42860 }, { "epoch": 473.28918322295806, "learning_rate": 3.3080139372822297e-07, "loss": 1.1508, "step": 42880 }, { "epoch": 473.50993377483445, "learning_rate": 3.3048780487804877e-07, "loss": 1.1147, "step": 42900 }, { "epoch": 473.73068432671084, "learning_rate": 3.301742160278746e-07, "loss": 1.1217, "step": 42920 }, { "epoch": 473.9514348785872, "learning_rate": 3.2986062717770033e-07, "loss": 1.1099, "step": 42940 }, { "epoch": 474.17218543046357, "learning_rate": 3.295470383275262e-07, "loss": 1.1578, "step": 42960 }, { "epoch": 474.39293598233996, "learning_rate": 3.292334494773519e-07, "loss": 1.1245, "step": 42980 }, { "epoch": 474.61368653421636, "learning_rate": 3.289198606271777e-07, "loss": 1.1166, "step": 43000 }, { "epoch": 474.8344370860927, "learning_rate": 3.2860627177700344e-07, "loss": 1.1015, "step": 43020 }, { "epoch": 475.0551876379691, "learning_rate": 3.2829268292682925e-07, "loss": 1.1511, "step": 43040 }, { "epoch": 475.2759381898455, "learning_rate": 3.2797909407665505e-07, "loss": 1.139, "step": 43060 }, { "epoch": 475.49668874172187, "learning_rate": 3.276655052264808e-07, "loss": 1.1502, "step": 43080 }, { "epoch": 475.71743929359826, "learning_rate": 3.2735191637630666e-07, "loss": 1.1129, "step": 43100 }, { "epoch": 475.9381898454746, "learning_rate": 3.2703832752613236e-07, "loss": 1.1405, "step": 43120 }, { "epoch": 476.158940397351, "learning_rate": 3.2672473867595816e-07, "loss": 1.1517, "step": 43140 }, { "epoch": 476.3796909492274, "learning_rate": 3.264111498257839e-07, "loss": 1.1224, "step": 43160 }, { "epoch": 476.6004415011038, "learning_rate": 3.260975609756098e-07, "loss": 1.1472, "step": 43180 }, { "epoch": 476.8211920529801, "learning_rate": 3.257839721254356e-07, "loss": 1.1355, "step": 43200 }, { "epoch": 476.8211920529801, "eval_bleu": 48.3929, "eval_gen_len": 8.5667, "eval_loss": 1.9725987911224365, "eval_runtime": 3.2649, "eval_samples_per_second": 9.189, "eval_steps_per_second": 1.838, "step": 43200 }, { "epoch": 477.0419426048565, "learning_rate": 3.254703832752613e-07, "loss": 1.1215, "step": 43220 }, { "epoch": 477.2626931567329, "learning_rate": 3.2515679442508714e-07, "loss": 1.0938, "step": 43240 }, { "epoch": 477.4834437086093, "learning_rate": 3.248432055749129e-07, "loss": 1.1255, "step": 43260 }, { "epoch": 477.7041942604857, "learning_rate": 3.2452961672473864e-07, "loss": 1.1423, "step": 43280 }, { "epoch": 477.924944812362, "learning_rate": 3.242160278745644e-07, "loss": 1.1531, "step": 43300 }, { "epoch": 478.1456953642384, "learning_rate": 3.2390243902439025e-07, "loss": 1.1217, "step": 43320 }, { "epoch": 478.3664459161148, "learning_rate": 3.2358885017421605e-07, "loss": 1.1308, "step": 43340 }, { "epoch": 478.5871964679912, "learning_rate": 3.2327526132404175e-07, "loss": 1.1318, "step": 43360 }, { "epoch": 478.8079470198675, "learning_rate": 3.229616724738676e-07, "loss": 1.1465, "step": 43380 }, { "epoch": 479.0286975717439, "learning_rate": 3.226480836236934e-07, "loss": 1.1182, "step": 43400 }, { "epoch": 479.2494481236203, "learning_rate": 3.2233449477351917e-07, "loss": 1.106, "step": 43420 }, { "epoch": 479.4701986754967, "learning_rate": 3.2202090592334487e-07, "loss": 1.1445, "step": 43440 }, { "epoch": 479.6909492273731, "learning_rate": 3.217073170731707e-07, "loss": 1.1517, "step": 43460 }, { "epoch": 479.91169977924943, "learning_rate": 3.2139372822299653e-07, "loss": 1.1624, "step": 43480 }, { "epoch": 480.1324503311258, "learning_rate": 3.2108013937282223e-07, "loss": 1.1148, "step": 43500 }, { "epoch": 480.3532008830022, "learning_rate": 3.207665505226481e-07, "loss": 1.1493, "step": 43520 }, { "epoch": 480.5739514348786, "learning_rate": 3.2045296167247384e-07, "loss": 1.1442, "step": 43540 }, { "epoch": 480.79470198675494, "learning_rate": 3.2013937282229964e-07, "loss": 1.1218, "step": 43560 }, { "epoch": 481.01545253863134, "learning_rate": 3.1982578397212534e-07, "loss": 1.1261, "step": 43580 }, { "epoch": 481.23620309050773, "learning_rate": 3.195121951219512e-07, "loss": 1.1063, "step": 43600 }, { "epoch": 481.4569536423841, "learning_rate": 3.1919860627177706e-07, "loss": 1.1305, "step": 43620 }, { "epoch": 481.6777041942605, "learning_rate": 3.1888501742160276e-07, "loss": 1.1276, "step": 43640 }, { "epoch": 481.89845474613685, "learning_rate": 3.1857142857142856e-07, "loss": 1.1197, "step": 43660 }, { "epoch": 482.11920529801324, "learning_rate": 3.182578397212543e-07, "loss": 1.1389, "step": 43680 }, { "epoch": 482.33995584988963, "learning_rate": 3.1794425087108017e-07, "loss": 1.1509, "step": 43700 }, { "epoch": 482.560706401766, "learning_rate": 3.1763066202090587e-07, "loss": 1.1369, "step": 43720 }, { "epoch": 482.78145695364236, "learning_rate": 3.173170731707317e-07, "loss": 1.1159, "step": 43740 }, { "epoch": 483.00220750551875, "learning_rate": 3.1700348432055753e-07, "loss": 1.1181, "step": 43760 }, { "epoch": 483.22295805739515, "learning_rate": 3.1668989547038323e-07, "loss": 1.1249, "step": 43780 }, { "epoch": 483.44370860927154, "learning_rate": 3.1637630662020904e-07, "loss": 1.1342, "step": 43800 }, { "epoch": 483.6644591611479, "learning_rate": 3.160627177700348e-07, "loss": 1.1113, "step": 43820 }, { "epoch": 483.88520971302427, "learning_rate": 3.1574912891986065e-07, "loss": 1.1048, "step": 43840 }, { "epoch": 484.10596026490066, "learning_rate": 3.154355400696865e-07, "loss": 1.1291, "step": 43860 }, { "epoch": 484.32671081677705, "learning_rate": 3.1512195121951215e-07, "loss": 1.1107, "step": 43880 }, { "epoch": 484.54746136865344, "learning_rate": 3.14808362369338e-07, "loss": 1.1534, "step": 43900 }, { "epoch": 484.7682119205298, "learning_rate": 3.1449477351916376e-07, "loss": 1.116, "step": 43920 }, { "epoch": 484.9889624724062, "learning_rate": 3.1418118466898956e-07, "loss": 1.147, "step": 43940 }, { "epoch": 485.20971302428256, "learning_rate": 3.1386759581881526e-07, "loss": 1.1347, "step": 43960 }, { "epoch": 485.43046357615896, "learning_rate": 3.135540069686412e-07, "loss": 1.1253, "step": 43980 }, { "epoch": 485.6512141280353, "learning_rate": 3.1324041811846693e-07, "loss": 1.1149, "step": 44000 }, { "epoch": 485.8719646799117, "learning_rate": 3.129268292682926e-07, "loss": 1.1594, "step": 44020 }, { "epoch": 486.0927152317881, "learning_rate": 3.126132404181185e-07, "loss": 1.1279, "step": 44040 }, { "epoch": 486.31346578366447, "learning_rate": 3.1229965156794424e-07, "loss": 1.1097, "step": 44060 }, { "epoch": 486.53421633554086, "learning_rate": 3.1198606271777004e-07, "loss": 1.0929, "step": 44080 }, { "epoch": 486.7549668874172, "learning_rate": 3.1167247386759574e-07, "loss": 1.1549, "step": 44100 }, { "epoch": 486.9757174392936, "learning_rate": 3.113588850174216e-07, "loss": 1.1118, "step": 44120 }, { "epoch": 487.19646799117, "learning_rate": 3.1104529616724745e-07, "loss": 1.1265, "step": 44140 }, { "epoch": 487.4172185430464, "learning_rate": 3.1073170731707315e-07, "loss": 1.1465, "step": 44160 }, { "epoch": 487.6379690949227, "learning_rate": 3.1041811846689896e-07, "loss": 1.1336, "step": 44180 }, { "epoch": 487.8587196467991, "learning_rate": 3.101045296167247e-07, "loss": 1.1667, "step": 44200 }, { "epoch": 488.0794701986755, "learning_rate": 3.097909407665505e-07, "loss": 1.1588, "step": 44220 }, { "epoch": 488.3002207505519, "learning_rate": 3.094773519163762e-07, "loss": 1.096, "step": 44240 }, { "epoch": 488.5209713024283, "learning_rate": 3.0916376306620207e-07, "loss": 1.1389, "step": 44260 }, { "epoch": 488.7417218543046, "learning_rate": 3.0885017421602793e-07, "loss": 1.1224, "step": 44280 }, { "epoch": 488.962472406181, "learning_rate": 3.0853658536585363e-07, "loss": 1.1364, "step": 44300 }, { "epoch": 489.1832229580574, "learning_rate": 3.0822299651567943e-07, "loss": 1.1456, "step": 44320 }, { "epoch": 489.4039735099338, "learning_rate": 3.079094076655052e-07, "loss": 1.1101, "step": 44340 }, { "epoch": 489.6247240618101, "learning_rate": 3.0759581881533104e-07, "loss": 1.1395, "step": 44360 }, { "epoch": 489.8454746136865, "learning_rate": 3.0728222996515674e-07, "loss": 1.1361, "step": 44380 }, { "epoch": 490.0662251655629, "learning_rate": 3.0696864111498255e-07, "loss": 1.1494, "step": 44400 }, { "epoch": 490.2869757174393, "learning_rate": 3.066550522648084e-07, "loss": 1.1222, "step": 44420 }, { "epoch": 490.5077262693157, "learning_rate": 3.063414634146341e-07, "loss": 1.1303, "step": 44440 }, { "epoch": 490.72847682119203, "learning_rate": 3.060278745644599e-07, "loss": 1.095, "step": 44460 }, { "epoch": 490.9492273730684, "learning_rate": 3.0571428571428566e-07, "loss": 1.1407, "step": 44480 }, { "epoch": 491.1699779249448, "learning_rate": 3.054006968641115e-07, "loss": 1.132, "step": 44500 }, { "epoch": 491.3907284768212, "learning_rate": 3.050871080139372e-07, "loss": 1.104, "step": 44520 }, { "epoch": 491.61147902869754, "learning_rate": 3.04773519163763e-07, "loss": 1.121, "step": 44540 }, { "epoch": 491.83222958057394, "learning_rate": 3.0445993031358883e-07, "loss": 1.1607, "step": 44560 }, { "epoch": 492.05298013245033, "learning_rate": 3.0414634146341463e-07, "loss": 1.0907, "step": 44580 }, { "epoch": 492.2737306843267, "learning_rate": 3.0383275261324044e-07, "loss": 1.116, "step": 44600 }, { "epoch": 492.4944812362031, "learning_rate": 3.0351916376306614e-07, "loss": 1.1573, "step": 44620 }, { "epoch": 492.71523178807945, "learning_rate": 3.03205574912892e-07, "loss": 1.112, "step": 44640 }, { "epoch": 492.93598233995584, "learning_rate": 3.0289198606271775e-07, "loss": 1.1136, "step": 44660 }, { "epoch": 493.15673289183223, "learning_rate": 3.025783972125435e-07, "loss": 1.1287, "step": 44680 }, { "epoch": 493.3774834437086, "learning_rate": 3.0226480836236936e-07, "loss": 1.1367, "step": 44700 }, { "epoch": 493.59823399558496, "learning_rate": 3.019512195121951e-07, "loss": 1.134, "step": 44720 }, { "epoch": 493.81898454746135, "learning_rate": 3.016376306620209e-07, "loss": 1.1078, "step": 44740 }, { "epoch": 494.03973509933775, "learning_rate": 3.013240418118466e-07, "loss": 1.1159, "step": 44760 }, { "epoch": 494.26048565121414, "learning_rate": 3.0101045296167247e-07, "loss": 1.1747, "step": 44780 }, { "epoch": 494.48123620309053, "learning_rate": 3.0069686411149833e-07, "loss": 1.1123, "step": 44800 }, { "epoch": 494.70198675496687, "learning_rate": 3.00383275261324e-07, "loss": 1.1273, "step": 44820 }, { "epoch": 494.92273730684326, "learning_rate": 3.0006968641114983e-07, "loss": 1.1231, "step": 44840 }, { "epoch": 495.14348785871965, "learning_rate": 2.997560975609756e-07, "loss": 1.1174, "step": 44860 }, { "epoch": 495.36423841059604, "learning_rate": 2.994425087108014e-07, "loss": 1.1085, "step": 44880 }, { "epoch": 495.5849889624724, "learning_rate": 2.9912891986062714e-07, "loss": 1.1319, "step": 44900 }, { "epoch": 495.8057395143488, "learning_rate": 2.9881533101045294e-07, "loss": 1.138, "step": 44920 }, { "epoch": 496.02649006622516, "learning_rate": 2.985017421602788e-07, "loss": 1.1056, "step": 44940 }, { "epoch": 496.24724061810156, "learning_rate": 2.981881533101045e-07, "loss": 1.0901, "step": 44960 }, { "epoch": 496.46799116997795, "learning_rate": 2.978745644599303e-07, "loss": 1.11, "step": 44980 }, { "epoch": 496.6887417218543, "learning_rate": 2.9756097560975606e-07, "loss": 1.1398, "step": 45000 }, { "epoch": 496.9094922737307, "learning_rate": 2.972473867595819e-07, "loss": 1.1342, "step": 45020 }, { "epoch": 497.13024282560707, "learning_rate": 2.969337979094076e-07, "loss": 1.1409, "step": 45040 }, { "epoch": 497.35099337748346, "learning_rate": 2.966202090592334e-07, "loss": 1.0931, "step": 45060 }, { "epoch": 497.5717439293598, "learning_rate": 2.963066202090593e-07, "loss": 1.1652, "step": 45080 }, { "epoch": 497.7924944812362, "learning_rate": 2.9599303135888503e-07, "loss": 1.1286, "step": 45100 }, { "epoch": 498.0132450331126, "learning_rate": 2.956794425087108e-07, "loss": 1.1267, "step": 45120 }, { "epoch": 498.233995584989, "learning_rate": 2.9536585365853653e-07, "loss": 1.1027, "step": 45140 }, { "epoch": 498.45474613686537, "learning_rate": 2.950522648083624e-07, "loss": 1.1416, "step": 45160 }, { "epoch": 498.6754966887417, "learning_rate": 2.947386759581881e-07, "loss": 1.0954, "step": 45180 }, { "epoch": 498.8962472406181, "learning_rate": 2.944250871080139e-07, "loss": 1.1428, "step": 45200 }, { "epoch": 499.1169977924945, "learning_rate": 2.9411149825783975e-07, "loss": 1.1246, "step": 45220 }, { "epoch": 499.3377483443709, "learning_rate": 2.937979094076655e-07, "loss": 1.1236, "step": 45240 }, { "epoch": 499.5584988962472, "learning_rate": 2.9348432055749126e-07, "loss": 1.1216, "step": 45260 }, { "epoch": 499.7792494481236, "learning_rate": 2.93170731707317e-07, "loss": 1.1215, "step": 45280 }, { "epoch": 500.0, "learning_rate": 2.9285714285714287e-07, "loss": 1.1316, "step": 45300 }, { "epoch": 500.2207505518764, "learning_rate": 2.925435540069686e-07, "loss": 1.1448, "step": 45320 }, { "epoch": 500.4415011037528, "learning_rate": 2.922299651567944e-07, "loss": 1.1379, "step": 45340 }, { "epoch": 500.6622516556291, "learning_rate": 2.9191637630662023e-07, "loss": 1.1104, "step": 45360 }, { "epoch": 500.8830022075055, "learning_rate": 2.9160278745644593e-07, "loss": 1.1084, "step": 45380 }, { "epoch": 501.1037527593819, "learning_rate": 2.912891986062718e-07, "loss": 1.1076, "step": 45400 }, { "epoch": 501.3245033112583, "learning_rate": 2.909756097560975e-07, "loss": 1.1087, "step": 45420 }, { "epoch": 501.54525386313463, "learning_rate": 2.9066202090592334e-07, "loss": 1.1351, "step": 45440 }, { "epoch": 501.766004415011, "learning_rate": 2.903484320557491e-07, "loss": 1.1006, "step": 45460 }, { "epoch": 501.9867549668874, "learning_rate": 2.900348432055749e-07, "loss": 1.1596, "step": 45480 }, { "epoch": 502.2075055187638, "learning_rate": 2.897212543554007e-07, "loss": 1.162, "step": 45500 }, { "epoch": 502.4282560706402, "learning_rate": 2.8940766550522646e-07, "loss": 1.1339, "step": 45520 }, { "epoch": 502.64900662251654, "learning_rate": 2.890940766550523e-07, "loss": 1.1219, "step": 45540 }, { "epoch": 502.86975717439293, "learning_rate": 2.88780487804878e-07, "loss": 1.1373, "step": 45560 }, { "epoch": 503.0905077262693, "learning_rate": 2.884668989547038e-07, "loss": 1.129, "step": 45580 }, { "epoch": 503.3112582781457, "learning_rate": 2.8815331010452957e-07, "loss": 1.1331, "step": 45600 }, { "epoch": 503.53200883002205, "learning_rate": 2.878397212543554e-07, "loss": 1.1526, "step": 45620 }, { "epoch": 503.75275938189844, "learning_rate": 2.875261324041812e-07, "loss": 1.0721, "step": 45640 }, { "epoch": 503.97350993377484, "learning_rate": 2.8721254355400693e-07, "loss": 1.0899, "step": 45660 }, { "epoch": 504.1942604856512, "learning_rate": 2.868989547038328e-07, "loss": 1.1029, "step": 45680 }, { "epoch": 504.4150110375276, "learning_rate": 2.865853658536585e-07, "loss": 1.1579, "step": 45700 }, { "epoch": 504.63576158940396, "learning_rate": 2.862717770034843e-07, "loss": 1.1225, "step": 45720 }, { "epoch": 504.85651214128035, "learning_rate": 2.8595818815331004e-07, "loss": 1.1311, "step": 45740 }, { "epoch": 505.07726269315674, "learning_rate": 2.856445993031359e-07, "loss": 1.1056, "step": 45760 }, { "epoch": 505.29801324503313, "learning_rate": 2.853310104529617e-07, "loss": 1.1246, "step": 45780 }, { "epoch": 505.51876379690947, "learning_rate": 2.850174216027874e-07, "loss": 1.1082, "step": 45800 }, { "epoch": 505.73951434878586, "learning_rate": 2.8470383275261326e-07, "loss": 1.1291, "step": 45820 }, { "epoch": 505.96026490066225, "learning_rate": 2.8439024390243896e-07, "loss": 1.1112, "step": 45840 }, { "epoch": 506.18101545253865, "learning_rate": 2.8407665505226477e-07, "loss": 1.118, "step": 45860 }, { "epoch": 506.40176600441504, "learning_rate": 2.837630662020906e-07, "loss": 1.1045, "step": 45880 }, { "epoch": 506.6225165562914, "learning_rate": 2.834494773519164e-07, "loss": 1.1366, "step": 45900 }, { "epoch": 506.84326710816777, "learning_rate": 2.831358885017422e-07, "loss": 1.1639, "step": 45920 }, { "epoch": 507.06401766004416, "learning_rate": 2.828222996515679e-07, "loss": 1.1473, "step": 45940 }, { "epoch": 507.28476821192055, "learning_rate": 2.8250871080139374e-07, "loss": 1.1251, "step": 45960 }, { "epoch": 507.5055187637969, "learning_rate": 2.821951219512195e-07, "loss": 1.0948, "step": 45980 }, { "epoch": 507.7262693156733, "learning_rate": 2.818815331010453e-07, "loss": 1.1439, "step": 46000 }, { "epoch": 507.94701986754967, "learning_rate": 2.815679442508711e-07, "loss": 1.148, "step": 46020 }, { "epoch": 508.16777041942606, "learning_rate": 2.8125435540069685e-07, "loss": 1.1106, "step": 46040 }, { "epoch": 508.38852097130246, "learning_rate": 2.8094076655052266e-07, "loss": 1.1169, "step": 46060 }, { "epoch": 508.6092715231788, "learning_rate": 2.806271777003484e-07, "loss": 1.1067, "step": 46080 }, { "epoch": 508.8300220750552, "learning_rate": 2.803135888501742e-07, "loss": 1.1447, "step": 46100 }, { "epoch": 509.0507726269316, "learning_rate": 2.7999999999999997e-07, "loss": 1.121, "step": 46120 }, { "epoch": 509.27152317880797, "learning_rate": 2.7968641114982577e-07, "loss": 1.1101, "step": 46140 }, { "epoch": 509.4922737306843, "learning_rate": 2.793728222996516e-07, "loss": 1.1322, "step": 46160 }, { "epoch": 509.7130242825607, "learning_rate": 2.7905923344947733e-07, "loss": 1.1463, "step": 46180 }, { "epoch": 509.9337748344371, "learning_rate": 2.787456445993032e-07, "loss": 1.1159, "step": 46200 }, { "epoch": 510.1545253863135, "learning_rate": 2.784320557491289e-07, "loss": 1.1214, "step": 46220 }, { "epoch": 510.3752759381899, "learning_rate": 2.781184668989547e-07, "loss": 1.1087, "step": 46240 }, { "epoch": 510.5960264900662, "learning_rate": 2.7780487804878044e-07, "loss": 1.0912, "step": 46260 }, { "epoch": 510.8167770419426, "learning_rate": 2.774912891986063e-07, "loss": 1.1016, "step": 46280 }, { "epoch": 511.037527593819, "learning_rate": 2.7717770034843205e-07, "loss": 1.1232, "step": 46300 }, { "epoch": 511.2582781456954, "learning_rate": 2.768641114982578e-07, "loss": 1.1398, "step": 46320 }, { "epoch": 511.4790286975717, "learning_rate": 2.7655052264808366e-07, "loss": 1.1016, "step": 46340 }, { "epoch": 511.6997792494481, "learning_rate": 2.7623693379790936e-07, "loss": 1.1341, "step": 46360 }, { "epoch": 511.9205298013245, "learning_rate": 2.7592334494773516e-07, "loss": 1.1043, "step": 46380 }, { "epoch": 512.1412803532008, "learning_rate": 2.756097560975609e-07, "loss": 1.1382, "step": 46400 }, { "epoch": 512.3620309050773, "learning_rate": 2.7529616724738683e-07, "loss": 1.1341, "step": 46420 }, { "epoch": 512.5827814569536, "learning_rate": 2.749825783972126e-07, "loss": 1.1054, "step": 46440 }, { "epoch": 512.8035320088301, "learning_rate": 2.746689895470383e-07, "loss": 1.1304, "step": 46460 }, { "epoch": 513.0242825607064, "learning_rate": 2.7435540069686414e-07, "loss": 1.1042, "step": 46480 }, { "epoch": 513.2450331125827, "learning_rate": 2.740418118466899e-07, "loss": 1.1057, "step": 46500 }, { "epoch": 513.4657836644592, "learning_rate": 2.7372822299651564e-07, "loss": 1.1488, "step": 46520 }, { "epoch": 513.6865342163355, "learning_rate": 2.734146341463414e-07, "loss": 1.1476, "step": 46540 }, { "epoch": 513.9072847682119, "learning_rate": 2.7310104529616725e-07, "loss": 1.1214, "step": 46560 }, { "epoch": 514.1280353200883, "learning_rate": 2.7278745644599305e-07, "loss": 1.1173, "step": 46580 }, { "epoch": 514.3487858719647, "learning_rate": 2.7247386759581875e-07, "loss": 1.1241, "step": 46600 }, { "epoch": 514.5695364238411, "learning_rate": 2.721602787456446e-07, "loss": 1.136, "step": 46620 }, { "epoch": 514.7902869757174, "learning_rate": 2.7184668989547036e-07, "loss": 1.1653, "step": 46640 }, { "epoch": 515.0110375275938, "learning_rate": 2.7153310104529617e-07, "loss": 1.0816, "step": 46660 }, { "epoch": 515.2317880794702, "learning_rate": 2.7121951219512187e-07, "loss": 1.0901, "step": 46680 }, { "epoch": 515.4525386313466, "learning_rate": 2.709059233449477e-07, "loss": 1.1228, "step": 46700 }, { "epoch": 515.673289183223, "learning_rate": 2.705923344947736e-07, "loss": 1.1257, "step": 46720 }, { "epoch": 515.8940397350993, "learning_rate": 2.702787456445993e-07, "loss": 1.1466, "step": 46740 }, { "epoch": 516.1147902869757, "learning_rate": 2.699651567944251e-07, "loss": 1.135, "step": 46760 }, { "epoch": 516.3355408388521, "learning_rate": 2.6965156794425084e-07, "loss": 1.1148, "step": 46780 }, { "epoch": 516.5562913907285, "learning_rate": 2.6933797909407664e-07, "loss": 1.1246, "step": 46800 }, { "epoch": 516.5562913907285, "eval_bleu": 47.8897, "eval_gen_len": 8.4667, "eval_loss": 1.970095157623291, "eval_runtime": 3.2787, "eval_samples_per_second": 9.15, "eval_steps_per_second": 1.83, "step": 46800 } ], "logging_steps": 20, "max_steps": 57600, "num_input_tokens_seen": 0, "num_train_epochs": 640, "save_steps": 3600, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.385108081516544e+17, "train_batch_size": 5, "trial_name": null, "trial_params": null }