{ "best_metric": 49.121, "best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-10800", "epoch": 119.20529801324503, "eval_steps": 3600, "global_step": 10800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011037527593818985, "learning_rate": 5e-09, "loss": 2.1638, "step": 1 }, { "epoch": 0.22075055187637968, "learning_rate": 1e-07, "loss": 1.9727, "step": 20 }, { "epoch": 0.44150110375275936, "learning_rate": 2e-07, "loss": 2.0028, "step": 40 }, { "epoch": 0.6622516556291391, "learning_rate": 3e-07, "loss": 2.0448, "step": 60 }, { "epoch": 0.8830022075055187, "learning_rate": 4e-07, "loss": 1.9696, "step": 80 }, { "epoch": 1.1037527593818985, "learning_rate": 5e-07, "loss": 1.9375, "step": 100 }, { "epoch": 1.3245033112582782, "learning_rate": 6e-07, "loss": 1.9075, "step": 120 }, { "epoch": 1.5452538631346577, "learning_rate": 7e-07, "loss": 1.9866, "step": 140 }, { "epoch": 1.7660044150110377, "learning_rate": 8e-07, "loss": 1.882, "step": 160 }, { "epoch": 1.9867549668874172, "learning_rate": 9e-07, "loss": 1.9916, "step": 180 }, { "epoch": 2.207505518763797, "learning_rate": 1e-06, "loss": 1.9132, "step": 200 }, { "epoch": 2.4282560706401766, "learning_rate": 9.996864111498258e-07, "loss": 1.9858, "step": 220 }, { "epoch": 2.6490066225165565, "learning_rate": 9.993728222996515e-07, "loss": 1.9373, "step": 240 }, { "epoch": 2.869757174392936, "learning_rate": 9.990592334494773e-07, "loss": 1.9146, "step": 260 }, { "epoch": 3.0905077262693155, "learning_rate": 9.987456445993032e-07, "loss": 1.8665, "step": 280 }, { "epoch": 3.3112582781456954, "learning_rate": 9.98432055749129e-07, "loss": 1.9419, "step": 300 }, { "epoch": 3.5320088300220753, "learning_rate": 9.981184668989545e-07, "loss": 1.8332, "step": 320 }, { "epoch": 3.752759381898455, "learning_rate": 9.978048780487803e-07, "loss": 1.8534, "step": 340 }, { "epoch": 3.9735099337748343, "learning_rate": 9.974912891986062e-07, "loss": 1.9421, "step": 360 }, { "epoch": 4.194260485651214, "learning_rate": 9.97177700348432e-07, "loss": 1.9374, "step": 380 }, { "epoch": 4.415011037527594, "learning_rate": 9.96864111498258e-07, "loss": 1.8876, "step": 400 }, { "epoch": 4.635761589403973, "learning_rate": 9.965505226480835e-07, "loss": 1.8593, "step": 420 }, { "epoch": 4.856512141280353, "learning_rate": 9.962369337979094e-07, "loss": 1.8243, "step": 440 }, { "epoch": 5.077262693156733, "learning_rate": 9.95923344947735e-07, "loss": 1.8791, "step": 460 }, { "epoch": 5.298013245033113, "learning_rate": 9.95609756097561e-07, "loss": 1.8878, "step": 480 }, { "epoch": 5.518763796909492, "learning_rate": 9.952961672473868e-07, "loss": 1.8985, "step": 500 }, { "epoch": 5.739514348785872, "learning_rate": 9.949825783972126e-07, "loss": 1.84, "step": 520 }, { "epoch": 5.960264900662252, "learning_rate": 9.946689895470383e-07, "loss": 1.8468, "step": 540 }, { "epoch": 6.181015452538631, "learning_rate": 9.94355400696864e-07, "loss": 1.8469, "step": 560 }, { "epoch": 6.401766004415011, "learning_rate": 9.940418118466898e-07, "loss": 1.8528, "step": 580 }, { "epoch": 6.622516556291391, "learning_rate": 9.937282229965156e-07, "loss": 1.8269, "step": 600 }, { "epoch": 6.843267108167771, "learning_rate": 9.934146341463415e-07, "loss": 1.8324, "step": 620 }, { "epoch": 7.06401766004415, "learning_rate": 9.931010452961673e-07, "loss": 1.808, "step": 640 }, { "epoch": 7.28476821192053, "learning_rate": 9.92787456445993e-07, "loss": 1.9003, "step": 660 }, { "epoch": 7.50551876379691, "learning_rate": 9.924738675958186e-07, "loss": 1.908, "step": 680 }, { "epoch": 7.72626931567329, "learning_rate": 9.921602787456445e-07, "loss": 1.8165, "step": 700 }, { "epoch": 7.947019867549669, "learning_rate": 9.918466898954704e-07, "loss": 1.7924, "step": 720 }, { "epoch": 8.167770419426049, "learning_rate": 9.915331010452962e-07, "loss": 1.8275, "step": 740 }, { "epoch": 8.388520971302428, "learning_rate": 9.912195121951219e-07, "loss": 1.8033, "step": 760 }, { "epoch": 8.609271523178808, "learning_rate": 9.909059233449477e-07, "loss": 1.8283, "step": 780 }, { "epoch": 8.830022075055188, "learning_rate": 9.905923344947734e-07, "loss": 1.8361, "step": 800 }, { "epoch": 9.050772626931566, "learning_rate": 9.902787456445992e-07, "loss": 1.8264, "step": 820 }, { "epoch": 9.271523178807946, "learning_rate": 9.89965156794425e-07, "loss": 1.8157, "step": 840 }, { "epoch": 9.492273730684326, "learning_rate": 9.89651567944251e-07, "loss": 1.8038, "step": 860 }, { "epoch": 9.713024282560706, "learning_rate": 9.893379790940768e-07, "loss": 1.8495, "step": 880 }, { "epoch": 9.933774834437086, "learning_rate": 9.890243902439024e-07, "loss": 1.8379, "step": 900 }, { "epoch": 10.154525386313466, "learning_rate": 9.88710801393728e-07, "loss": 1.807, "step": 920 }, { "epoch": 10.375275938189846, "learning_rate": 9.88397212543554e-07, "loss": 1.7354, "step": 940 }, { "epoch": 10.596026490066226, "learning_rate": 9.880836236933798e-07, "loss": 1.7909, "step": 960 }, { "epoch": 10.816777041942604, "learning_rate": 9.877700348432054e-07, "loss": 1.8504, "step": 980 }, { "epoch": 11.037527593818984, "learning_rate": 9.874564459930313e-07, "loss": 1.7565, "step": 1000 }, { "epoch": 11.258278145695364, "learning_rate": 9.871428571428572e-07, "loss": 1.8464, "step": 1020 }, { "epoch": 11.479028697571744, "learning_rate": 9.868292682926828e-07, "loss": 1.7463, "step": 1040 }, { "epoch": 11.699779249448124, "learning_rate": 9.865156794425087e-07, "loss": 1.8286, "step": 1060 }, { "epoch": 11.920529801324504, "learning_rate": 9.862020905923345e-07, "loss": 1.7821, "step": 1080 }, { "epoch": 12.141280353200884, "learning_rate": 9.858885017421604e-07, "loss": 1.8069, "step": 1100 }, { "epoch": 12.362030905077262, "learning_rate": 9.85574912891986e-07, "loss": 1.7943, "step": 1120 }, { "epoch": 12.582781456953642, "learning_rate": 9.852613240418117e-07, "loss": 1.7535, "step": 1140 }, { "epoch": 12.803532008830022, "learning_rate": 9.849477351916375e-07, "loss": 1.7881, "step": 1160 }, { "epoch": 13.024282560706402, "learning_rate": 9.846341463414634e-07, "loss": 1.815, "step": 1180 }, { "epoch": 13.245033112582782, "learning_rate": 9.843205574912892e-07, "loss": 1.8258, "step": 1200 }, { "epoch": 13.465783664459162, "learning_rate": 9.840069686411149e-07, "loss": 1.7228, "step": 1220 }, { "epoch": 13.686534216335541, "learning_rate": 9.836933797909407e-07, "loss": 1.7866, "step": 1240 }, { "epoch": 13.90728476821192, "learning_rate": 9.833797909407664e-07, "loss": 1.7871, "step": 1260 }, { "epoch": 14.1280353200883, "learning_rate": 9.830662020905923e-07, "loss": 1.7238, "step": 1280 }, { "epoch": 14.34878587196468, "learning_rate": 9.827526132404181e-07, "loss": 1.805, "step": 1300 }, { "epoch": 14.56953642384106, "learning_rate": 9.82439024390244e-07, "loss": 1.7909, "step": 1320 }, { "epoch": 14.79028697571744, "learning_rate": 9.821254355400698e-07, "loss": 1.7861, "step": 1340 }, { "epoch": 15.01103752759382, "learning_rate": 9.818118466898953e-07, "loss": 1.7651, "step": 1360 }, { "epoch": 15.2317880794702, "learning_rate": 9.814982578397211e-07, "loss": 1.7666, "step": 1380 }, { "epoch": 15.45253863134658, "learning_rate": 9.81184668989547e-07, "loss": 1.7159, "step": 1400 }, { "epoch": 15.673289183222957, "learning_rate": 9.808710801393728e-07, "loss": 1.7701, "step": 1420 }, { "epoch": 15.894039735099337, "learning_rate": 9.805574912891987e-07, "loss": 1.7977, "step": 1440 }, { "epoch": 16.11479028697572, "learning_rate": 9.802439024390243e-07, "loss": 1.7805, "step": 1460 }, { "epoch": 16.335540838852097, "learning_rate": 9.799303135888502e-07, "loss": 1.7711, "step": 1480 }, { "epoch": 16.556291390728475, "learning_rate": 9.796167247386758e-07, "loss": 1.6767, "step": 1500 }, { "epoch": 16.777041942604857, "learning_rate": 9.793031358885017e-07, "loss": 1.722, "step": 1520 }, { "epoch": 16.997792494481235, "learning_rate": 9.789895470383276e-07, "loss": 1.783, "step": 1540 }, { "epoch": 17.218543046357617, "learning_rate": 9.786759581881534e-07, "loss": 1.7233, "step": 1560 }, { "epoch": 17.439293598233995, "learning_rate": 9.78362369337979e-07, "loss": 1.7209, "step": 1580 }, { "epoch": 17.660044150110377, "learning_rate": 9.780487804878047e-07, "loss": 1.6968, "step": 1600 }, { "epoch": 17.880794701986755, "learning_rate": 9.777351916376306e-07, "loss": 1.6818, "step": 1620 }, { "epoch": 18.101545253863133, "learning_rate": 9.774216027874564e-07, "loss": 1.7819, "step": 1640 }, { "epoch": 18.322295805739515, "learning_rate": 9.771080139372823e-07, "loss": 1.7445, "step": 1660 }, { "epoch": 18.543046357615893, "learning_rate": 9.76794425087108e-07, "loss": 1.6888, "step": 1680 }, { "epoch": 18.763796909492275, "learning_rate": 9.764808362369338e-07, "loss": 1.7562, "step": 1700 }, { "epoch": 18.984547461368653, "learning_rate": 9.761672473867594e-07, "loss": 1.7368, "step": 1720 }, { "epoch": 19.205298013245034, "learning_rate": 9.758536585365853e-07, "loss": 1.6816, "step": 1740 }, { "epoch": 19.426048565121413, "learning_rate": 9.755400696864111e-07, "loss": 1.7307, "step": 1760 }, { "epoch": 19.64679911699779, "learning_rate": 9.75226480836237e-07, "loss": 1.7313, "step": 1780 }, { "epoch": 19.867549668874172, "learning_rate": 9.749128919860627e-07, "loss": 1.7407, "step": 1800 }, { "epoch": 20.08830022075055, "learning_rate": 9.745993031358883e-07, "loss": 1.6885, "step": 1820 }, { "epoch": 20.309050772626932, "learning_rate": 9.742857142857142e-07, "loss": 1.6741, "step": 1840 }, { "epoch": 20.52980132450331, "learning_rate": 9.7397212543554e-07, "loss": 1.6958, "step": 1860 }, { "epoch": 20.750551876379692, "learning_rate": 9.736585365853659e-07, "loss": 1.7036, "step": 1880 }, { "epoch": 20.97130242825607, "learning_rate": 9.733449477351917e-07, "loss": 1.7489, "step": 1900 }, { "epoch": 21.192052980132452, "learning_rate": 9.730313588850174e-07, "loss": 1.7719, "step": 1920 }, { "epoch": 21.41280353200883, "learning_rate": 9.72717770034843e-07, "loss": 1.7401, "step": 1940 }, { "epoch": 21.63355408388521, "learning_rate": 9.724041811846689e-07, "loss": 1.7247, "step": 1960 }, { "epoch": 21.85430463576159, "learning_rate": 9.720905923344947e-07, "loss": 1.7175, "step": 1980 }, { "epoch": 22.075055187637968, "learning_rate": 9.717770034843206e-07, "loss": 1.7288, "step": 2000 }, { "epoch": 22.29580573951435, "learning_rate": 9.714634146341462e-07, "loss": 1.7068, "step": 2020 }, { "epoch": 22.516556291390728, "learning_rate": 9.71149825783972e-07, "loss": 1.6812, "step": 2040 }, { "epoch": 22.73730684326711, "learning_rate": 9.708362369337977e-07, "loss": 1.6772, "step": 2060 }, { "epoch": 22.958057395143488, "learning_rate": 9.705226480836236e-07, "loss": 1.7103, "step": 2080 }, { "epoch": 23.178807947019866, "learning_rate": 9.702090592334495e-07, "loss": 1.6984, "step": 2100 }, { "epoch": 23.399558498896248, "learning_rate": 9.698954703832753e-07, "loss": 1.7281, "step": 2120 }, { "epoch": 23.620309050772626, "learning_rate": 9.695818815331012e-07, "loss": 1.7123, "step": 2140 }, { "epoch": 23.841059602649008, "learning_rate": 9.692682926829266e-07, "loss": 1.7174, "step": 2160 }, { "epoch": 24.061810154525386, "learning_rate": 9.689547038327525e-07, "loss": 1.7281, "step": 2180 }, { "epoch": 24.282560706401767, "learning_rate": 9.686411149825783e-07, "loss": 1.7189, "step": 2200 }, { "epoch": 24.503311258278146, "learning_rate": 9.683275261324042e-07, "loss": 1.6272, "step": 2220 }, { "epoch": 24.724061810154524, "learning_rate": 9.6801393728223e-07, "loss": 1.6588, "step": 2240 }, { "epoch": 24.944812362030905, "learning_rate": 9.677003484320557e-07, "loss": 1.6627, "step": 2260 }, { "epoch": 25.165562913907284, "learning_rate": 9.673867595818815e-07, "loss": 1.6504, "step": 2280 }, { "epoch": 25.386313465783665, "learning_rate": 9.670731707317072e-07, "loss": 1.6847, "step": 2300 }, { "epoch": 25.607064017660043, "learning_rate": 9.66759581881533e-07, "loss": 1.6793, "step": 2320 }, { "epoch": 25.827814569536425, "learning_rate": 9.66445993031359e-07, "loss": 1.7009, "step": 2340 }, { "epoch": 26.048565121412803, "learning_rate": 9.661324041811848e-07, "loss": 1.6581, "step": 2360 }, { "epoch": 26.26931567328918, "learning_rate": 9.658188153310104e-07, "loss": 1.6648, "step": 2380 }, { "epoch": 26.490066225165563, "learning_rate": 9.65505226480836e-07, "loss": 1.6303, "step": 2400 }, { "epoch": 26.71081677704194, "learning_rate": 9.65191637630662e-07, "loss": 1.7222, "step": 2420 }, { "epoch": 26.931567328918323, "learning_rate": 9.648780487804878e-07, "loss": 1.7164, "step": 2440 }, { "epoch": 27.1523178807947, "learning_rate": 9.645644599303136e-07, "loss": 1.6894, "step": 2460 }, { "epoch": 27.373068432671083, "learning_rate": 9.642508710801393e-07, "loss": 1.6904, "step": 2480 }, { "epoch": 27.59381898454746, "learning_rate": 9.639372822299651e-07, "loss": 1.6133, "step": 2500 }, { "epoch": 27.814569536423843, "learning_rate": 9.636236933797908e-07, "loss": 1.6445, "step": 2520 }, { "epoch": 28.03532008830022, "learning_rate": 9.633101045296166e-07, "loss": 1.6421, "step": 2540 }, { "epoch": 28.2560706401766, "learning_rate": 9.629965156794425e-07, "loss": 1.672, "step": 2560 }, { "epoch": 28.47682119205298, "learning_rate": 9.626829268292684e-07, "loss": 1.6592, "step": 2580 }, { "epoch": 28.69757174392936, "learning_rate": 9.62369337979094e-07, "loss": 1.649, "step": 2600 }, { "epoch": 28.91832229580574, "learning_rate": 9.620557491289199e-07, "loss": 1.6667, "step": 2620 }, { "epoch": 29.13907284768212, "learning_rate": 9.617421602787455e-07, "loss": 1.6172, "step": 2640 }, { "epoch": 29.3598233995585, "learning_rate": 9.614285714285714e-07, "loss": 1.6696, "step": 2660 }, { "epoch": 29.58057395143488, "learning_rate": 9.611149825783972e-07, "loss": 1.7091, "step": 2680 }, { "epoch": 29.801324503311257, "learning_rate": 9.60801393728223e-07, "loss": 1.6111, "step": 2700 }, { "epoch": 30.02207505518764, "learning_rate": 9.604878048780487e-07, "loss": 1.6274, "step": 2720 }, { "epoch": 30.242825607064017, "learning_rate": 9.601742160278746e-07, "loss": 1.6186, "step": 2740 }, { "epoch": 30.4635761589404, "learning_rate": 9.598606271777002e-07, "loss": 1.6336, "step": 2760 }, { "epoch": 30.684326710816777, "learning_rate": 9.59547038327526e-07, "loss": 1.6414, "step": 2780 }, { "epoch": 30.90507726269316, "learning_rate": 9.59233449477352e-07, "loss": 1.6628, "step": 2800 }, { "epoch": 31.125827814569536, "learning_rate": 9.589198606271776e-07, "loss": 1.6092, "step": 2820 }, { "epoch": 31.346578366445915, "learning_rate": 9.586062717770034e-07, "loss": 1.669, "step": 2840 }, { "epoch": 31.567328918322296, "learning_rate": 9.58292682926829e-07, "loss": 1.6597, "step": 2860 }, { "epoch": 31.788079470198674, "learning_rate": 9.57979094076655e-07, "loss": 1.5658, "step": 2880 }, { "epoch": 32.00883002207505, "learning_rate": 9.576655052264808e-07, "loss": 1.6662, "step": 2900 }, { "epoch": 32.22958057395144, "learning_rate": 9.573519163763067e-07, "loss": 1.6241, "step": 2920 }, { "epoch": 32.450331125827816, "learning_rate": 9.570383275261325e-07, "loss": 1.6181, "step": 2940 }, { "epoch": 32.671081677704194, "learning_rate": 9.567247386759582e-07, "loss": 1.6561, "step": 2960 }, { "epoch": 32.89183222958057, "learning_rate": 9.564111498257838e-07, "loss": 1.6176, "step": 2980 }, { "epoch": 33.11258278145695, "learning_rate": 9.560975609756097e-07, "loss": 1.686, "step": 3000 }, { "epoch": 33.333333333333336, "learning_rate": 9.557839721254355e-07, "loss": 1.5938, "step": 3020 }, { "epoch": 33.554083885209714, "learning_rate": 9.554703832752614e-07, "loss": 1.5952, "step": 3040 }, { "epoch": 33.77483443708609, "learning_rate": 9.55156794425087e-07, "loss": 1.5772, "step": 3060 }, { "epoch": 33.99558498896247, "learning_rate": 9.548432055749129e-07, "loss": 1.5889, "step": 3080 }, { "epoch": 34.216335540838855, "learning_rate": 9.545296167247385e-07, "loss": 1.5953, "step": 3100 }, { "epoch": 34.437086092715234, "learning_rate": 9.542160278745644e-07, "loss": 1.6379, "step": 3120 }, { "epoch": 34.65783664459161, "learning_rate": 9.539024390243903e-07, "loss": 1.6252, "step": 3140 }, { "epoch": 34.87858719646799, "learning_rate": 9.53588850174216e-07, "loss": 1.628, "step": 3160 }, { "epoch": 35.09933774834437, "learning_rate": 9.532752613240419e-07, "loss": 1.6547, "step": 3180 }, { "epoch": 35.32008830022075, "learning_rate": 9.529616724738675e-07, "loss": 1.6308, "step": 3200 }, { "epoch": 35.54083885209713, "learning_rate": 9.526480836236935e-07, "loss": 1.599, "step": 3220 }, { "epoch": 35.76158940397351, "learning_rate": 9.523344947735191e-07, "loss": 1.6198, "step": 3240 }, { "epoch": 35.98233995584989, "learning_rate": 9.520209059233449e-07, "loss": 1.6248, "step": 3260 }, { "epoch": 36.203090507726266, "learning_rate": 9.517073170731706e-07, "loss": 1.576, "step": 3280 }, { "epoch": 36.42384105960265, "learning_rate": 9.513937282229965e-07, "loss": 1.6648, "step": 3300 }, { "epoch": 36.64459161147903, "learning_rate": 9.510801393728223e-07, "loss": 1.6035, "step": 3320 }, { "epoch": 36.86534216335541, "learning_rate": 9.50766550522648e-07, "loss": 1.6051, "step": 3340 }, { "epoch": 37.086092715231786, "learning_rate": 9.504529616724738e-07, "loss": 1.6052, "step": 3360 }, { "epoch": 37.30684326710817, "learning_rate": 9.501393728222996e-07, "loss": 1.6081, "step": 3380 }, { "epoch": 37.52759381898455, "learning_rate": 9.498257839721255e-07, "loss": 1.6066, "step": 3400 }, { "epoch": 37.74834437086093, "learning_rate": 9.495121951219511e-07, "loss": 1.5763, "step": 3420 }, { "epoch": 37.969094922737305, "learning_rate": 9.49198606271777e-07, "loss": 1.6039, "step": 3440 }, { "epoch": 38.18984547461368, "learning_rate": 9.488850174216028e-07, "loss": 1.6203, "step": 3460 }, { "epoch": 38.41059602649007, "learning_rate": 9.485714285714285e-07, "loss": 1.619, "step": 3480 }, { "epoch": 38.63134657836645, "learning_rate": 9.482578397212543e-07, "loss": 1.6055, "step": 3500 }, { "epoch": 38.852097130242825, "learning_rate": 9.479442508710801e-07, "loss": 1.6037, "step": 3520 }, { "epoch": 39.0728476821192, "learning_rate": 9.476306620209059e-07, "loss": 1.6259, "step": 3540 }, { "epoch": 39.29359823399559, "learning_rate": 9.473170731707316e-07, "loss": 1.5917, "step": 3560 }, { "epoch": 39.51434878587197, "learning_rate": 9.470034843205574e-07, "loss": 1.6039, "step": 3580 }, { "epoch": 39.735099337748345, "learning_rate": 9.466898954703833e-07, "loss": 1.5364, "step": 3600 }, { "epoch": 39.735099337748345, "eval_bleu": 41.1359, "eval_gen_len": 9.5667, "eval_loss": 2.0473527908325195, "eval_runtime": 3.9497, "eval_samples_per_second": 7.596, "eval_steps_per_second": 1.519, "step": 3600 }, { "epoch": 39.95584988962472, "learning_rate": 9.46376306620209e-07, "loss": 1.6224, "step": 3620 }, { "epoch": 40.1766004415011, "learning_rate": 9.460627177700348e-07, "loss": 1.5808, "step": 3640 }, { "epoch": 40.397350993377486, "learning_rate": 9.457491289198605e-07, "loss": 1.6187, "step": 3660 }, { "epoch": 40.618101545253865, "learning_rate": 9.454355400696864e-07, "loss": 1.6131, "step": 3680 }, { "epoch": 40.83885209713024, "learning_rate": 9.451219512195122e-07, "loss": 1.603, "step": 3700 }, { "epoch": 41.05960264900662, "learning_rate": 9.448083623693379e-07, "loss": 1.6253, "step": 3720 }, { "epoch": 41.280353200883, "learning_rate": 9.444947735191638e-07, "loss": 1.6125, "step": 3740 }, { "epoch": 41.501103752759384, "learning_rate": 9.441811846689895e-07, "loss": 1.5777, "step": 3760 }, { "epoch": 41.72185430463576, "learning_rate": 9.438675958188153e-07, "loss": 1.5642, "step": 3780 }, { "epoch": 41.94260485651214, "learning_rate": 9.43554006968641e-07, "loss": 1.5773, "step": 3800 }, { "epoch": 42.16335540838852, "learning_rate": 9.432404181184669e-07, "loss": 1.5218, "step": 3820 }, { "epoch": 42.384105960264904, "learning_rate": 9.429268292682926e-07, "loss": 1.5751, "step": 3840 }, { "epoch": 42.60485651214128, "learning_rate": 9.426132404181184e-07, "loss": 1.5686, "step": 3860 }, { "epoch": 42.82560706401766, "learning_rate": 9.422996515679442e-07, "loss": 1.5917, "step": 3880 }, { "epoch": 43.04635761589404, "learning_rate": 9.4198606271777e-07, "loss": 1.5427, "step": 3900 }, { "epoch": 43.26710816777042, "learning_rate": 9.416724738675958e-07, "loss": 1.6199, "step": 3920 }, { "epoch": 43.4878587196468, "learning_rate": 9.413588850174215e-07, "loss": 1.5946, "step": 3940 }, { "epoch": 43.70860927152318, "learning_rate": 9.410452961672474e-07, "loss": 1.6212, "step": 3960 }, { "epoch": 43.92935982339956, "learning_rate": 9.407317073170731e-07, "loss": 1.5569, "step": 3980 }, { "epoch": 44.150110375275936, "learning_rate": 9.404181184668989e-07, "loss": 1.521, "step": 4000 }, { "epoch": 44.370860927152314, "learning_rate": 9.401045296167247e-07, "loss": 1.56, "step": 4020 }, { "epoch": 44.5916114790287, "learning_rate": 9.397909407665504e-07, "loss": 1.6059, "step": 4040 }, { "epoch": 44.81236203090508, "learning_rate": 9.394773519163763e-07, "loss": 1.5684, "step": 4060 }, { "epoch": 45.033112582781456, "learning_rate": 9.39163763066202e-07, "loss": 1.539, "step": 4080 }, { "epoch": 45.253863134657834, "learning_rate": 9.388501742160278e-07, "loss": 1.5336, "step": 4100 }, { "epoch": 45.47461368653422, "learning_rate": 9.385365853658536e-07, "loss": 1.5521, "step": 4120 }, { "epoch": 45.6953642384106, "learning_rate": 9.382229965156794e-07, "loss": 1.5281, "step": 4140 }, { "epoch": 45.916114790286976, "learning_rate": 9.379094076655052e-07, "loss": 1.6375, "step": 4160 }, { "epoch": 46.136865342163354, "learning_rate": 9.375958188153309e-07, "loss": 1.5615, "step": 4180 }, { "epoch": 46.35761589403973, "learning_rate": 9.372822299651568e-07, "loss": 1.5181, "step": 4200 }, { "epoch": 46.57836644591612, "learning_rate": 9.369686411149824e-07, "loss": 1.6009, "step": 4220 }, { "epoch": 46.799116997792495, "learning_rate": 9.366550522648083e-07, "loss": 1.5895, "step": 4240 }, { "epoch": 47.019867549668874, "learning_rate": 9.363414634146342e-07, "loss": 1.5159, "step": 4260 }, { "epoch": 47.24061810154525, "learning_rate": 9.360278745644599e-07, "loss": 1.5179, "step": 4280 }, { "epoch": 47.46136865342164, "learning_rate": 9.357142857142857e-07, "loss": 1.5585, "step": 4300 }, { "epoch": 47.682119205298015, "learning_rate": 9.354006968641114e-07, "loss": 1.6147, "step": 4320 }, { "epoch": 47.90286975717439, "learning_rate": 9.350871080139373e-07, "loss": 1.5307, "step": 4340 }, { "epoch": 48.12362030905077, "learning_rate": 9.34773519163763e-07, "loss": 1.5673, "step": 4360 }, { "epoch": 48.34437086092715, "learning_rate": 9.344599303135888e-07, "loss": 1.5442, "step": 4380 }, { "epoch": 48.565121412803535, "learning_rate": 9.341463414634146e-07, "loss": 1.5177, "step": 4400 }, { "epoch": 48.78587196467991, "learning_rate": 9.338327526132404e-07, "loss": 1.5909, "step": 4420 }, { "epoch": 49.00662251655629, "learning_rate": 9.335191637630661e-07, "loss": 1.5565, "step": 4440 }, { "epoch": 49.22737306843267, "learning_rate": 9.332055749128919e-07, "loss": 1.4945, "step": 4460 }, { "epoch": 49.44812362030905, "learning_rate": 9.328919860627177e-07, "loss": 1.5253, "step": 4480 }, { "epoch": 49.66887417218543, "learning_rate": 9.325783972125436e-07, "loss": 1.563, "step": 4500 }, { "epoch": 49.88962472406181, "learning_rate": 9.322648083623693e-07, "loss": 1.6016, "step": 4520 }, { "epoch": 50.11037527593819, "learning_rate": 9.319512195121951e-07, "loss": 1.6032, "step": 4540 }, { "epoch": 50.33112582781457, "learning_rate": 9.316376306620209e-07, "loss": 1.5038, "step": 4560 }, { "epoch": 50.55187637969095, "learning_rate": 9.313240418118467e-07, "loss": 1.545, "step": 4580 }, { "epoch": 50.77262693156733, "learning_rate": 9.310104529616724e-07, "loss": 1.5676, "step": 4600 }, { "epoch": 50.99337748344371, "learning_rate": 9.306968641114981e-07, "loss": 1.5443, "step": 4620 }, { "epoch": 51.21412803532009, "learning_rate": 9.303832752613241e-07, "loss": 1.5608, "step": 4640 }, { "epoch": 51.434878587196465, "learning_rate": 9.300696864111497e-07, "loss": 1.551, "step": 4660 }, { "epoch": 51.65562913907285, "learning_rate": 9.297560975609756e-07, "loss": 1.5556, "step": 4680 }, { "epoch": 51.87637969094923, "learning_rate": 9.294425087108013e-07, "loss": 1.5517, "step": 4700 }, { "epoch": 52.09713024282561, "learning_rate": 9.291289198606272e-07, "loss": 1.5384, "step": 4720 }, { "epoch": 52.317880794701985, "learning_rate": 9.288153310104528e-07, "loss": 1.4869, "step": 4740 }, { "epoch": 52.53863134657836, "learning_rate": 9.285017421602787e-07, "loss": 1.5221, "step": 4760 }, { "epoch": 52.75938189845475, "learning_rate": 9.281881533101046e-07, "loss": 1.5883, "step": 4780 }, { "epoch": 52.980132450331126, "learning_rate": 9.278745644599303e-07, "loss": 1.5276, "step": 4800 }, { "epoch": 53.200883002207505, "learning_rate": 9.275609756097561e-07, "loss": 1.4969, "step": 4820 }, { "epoch": 53.42163355408388, "learning_rate": 9.272473867595818e-07, "loss": 1.5043, "step": 4840 }, { "epoch": 53.64238410596027, "learning_rate": 9.269337979094077e-07, "loss": 1.5205, "step": 4860 }, { "epoch": 53.863134657836646, "learning_rate": 9.266202090592334e-07, "loss": 1.4685, "step": 4880 }, { "epoch": 54.083885209713024, "learning_rate": 9.263066202090592e-07, "loss": 1.5099, "step": 4900 }, { "epoch": 54.3046357615894, "learning_rate": 9.25993031358885e-07, "loss": 1.531, "step": 4920 }, { "epoch": 54.52538631346578, "learning_rate": 9.256794425087108e-07, "loss": 1.5637, "step": 4940 }, { "epoch": 54.746136865342166, "learning_rate": 9.253658536585365e-07, "loss": 1.5795, "step": 4960 }, { "epoch": 54.966887417218544, "learning_rate": 9.250522648083623e-07, "loss": 1.473, "step": 4980 }, { "epoch": 55.18763796909492, "learning_rate": 9.247386759581881e-07, "loss": 1.4921, "step": 5000 }, { "epoch": 55.4083885209713, "learning_rate": 9.244250871080139e-07, "loss": 1.5147, "step": 5020 }, { "epoch": 55.629139072847686, "learning_rate": 9.241114982578397e-07, "loss": 1.5259, "step": 5040 }, { "epoch": 55.849889624724064, "learning_rate": 9.237979094076655e-07, "loss": 1.5481, "step": 5060 }, { "epoch": 56.07064017660044, "learning_rate": 9.234843205574913e-07, "loss": 1.5238, "step": 5080 }, { "epoch": 56.29139072847682, "learning_rate": 9.23170731707317e-07, "loss": 1.5497, "step": 5100 }, { "epoch": 56.5121412803532, "learning_rate": 9.228571428571428e-07, "loss": 1.5003, "step": 5120 }, { "epoch": 56.73289183222958, "learning_rate": 9.225435540069686e-07, "loss": 1.511, "step": 5140 }, { "epoch": 56.95364238410596, "learning_rate": 9.222299651567944e-07, "loss": 1.5426, "step": 5160 }, { "epoch": 57.17439293598234, "learning_rate": 9.219163763066201e-07, "loss": 1.4937, "step": 5180 }, { "epoch": 57.39514348785872, "learning_rate": 9.216027874564459e-07, "loss": 1.5059, "step": 5200 }, { "epoch": 57.615894039735096, "learning_rate": 9.212891986062717e-07, "loss": 1.548, "step": 5220 }, { "epoch": 57.83664459161148, "learning_rate": 9.209756097560976e-07, "loss": 1.4691, "step": 5240 }, { "epoch": 58.05739514348786, "learning_rate": 9.206620209059232e-07, "loss": 1.5459, "step": 5260 }, { "epoch": 58.27814569536424, "learning_rate": 9.203484320557491e-07, "loss": 1.5569, "step": 5280 }, { "epoch": 58.498896247240616, "learning_rate": 9.200348432055748e-07, "loss": 1.4634, "step": 5300 }, { "epoch": 58.719646799117, "learning_rate": 9.197212543554007e-07, "loss": 1.4567, "step": 5320 }, { "epoch": 58.94039735099338, "learning_rate": 9.194076655052265e-07, "loss": 1.5349, "step": 5340 }, { "epoch": 59.16114790286976, "learning_rate": 9.190940766550522e-07, "loss": 1.4706, "step": 5360 }, { "epoch": 59.381898454746135, "learning_rate": 9.187804878048781e-07, "loss": 1.4932, "step": 5380 }, { "epoch": 59.602649006622514, "learning_rate": 9.184668989547037e-07, "loss": 1.5204, "step": 5400 }, { "epoch": 59.8233995584989, "learning_rate": 9.181533101045296e-07, "loss": 1.5267, "step": 5420 }, { "epoch": 60.04415011037528, "learning_rate": 9.178397212543552e-07, "loss": 1.5275, "step": 5440 }, { "epoch": 60.264900662251655, "learning_rate": 9.175261324041812e-07, "loss": 1.5116, "step": 5460 }, { "epoch": 60.48565121412803, "learning_rate": 9.172125435540069e-07, "loss": 1.4686, "step": 5480 }, { "epoch": 60.70640176600442, "learning_rate": 9.168989547038327e-07, "loss": 1.4902, "step": 5500 }, { "epoch": 60.9271523178808, "learning_rate": 9.165853658536585e-07, "loss": 1.4856, "step": 5520 }, { "epoch": 61.147902869757175, "learning_rate": 9.162717770034843e-07, "loss": 1.4982, "step": 5540 }, { "epoch": 61.36865342163355, "learning_rate": 9.1595818815331e-07, "loss": 1.4829, "step": 5560 }, { "epoch": 61.58940397350993, "learning_rate": 9.156445993031358e-07, "loss": 1.5049, "step": 5580 }, { "epoch": 61.81015452538632, "learning_rate": 9.153310104529617e-07, "loss": 1.5125, "step": 5600 }, { "epoch": 62.030905077262695, "learning_rate": 9.150174216027874e-07, "loss": 1.4746, "step": 5620 }, { "epoch": 62.25165562913907, "learning_rate": 9.147038327526132e-07, "loss": 1.5128, "step": 5640 }, { "epoch": 62.47240618101545, "learning_rate": 9.14390243902439e-07, "loss": 1.4799, "step": 5660 }, { "epoch": 62.69315673289183, "learning_rate": 9.140766550522648e-07, "loss": 1.4395, "step": 5680 }, { "epoch": 62.913907284768214, "learning_rate": 9.137630662020905e-07, "loss": 1.5277, "step": 5700 }, { "epoch": 63.13465783664459, "learning_rate": 9.134494773519163e-07, "loss": 1.4806, "step": 5720 }, { "epoch": 63.35540838852097, "learning_rate": 9.131358885017421e-07, "loss": 1.5123, "step": 5740 }, { "epoch": 63.57615894039735, "learning_rate": 9.12822299651568e-07, "loss": 1.5502, "step": 5760 }, { "epoch": 63.796909492273734, "learning_rate": 9.125087108013936e-07, "loss": 1.4732, "step": 5780 }, { "epoch": 64.0176600441501, "learning_rate": 9.121951219512195e-07, "loss": 1.4694, "step": 5800 }, { "epoch": 64.23841059602648, "learning_rate": 9.118815331010452e-07, "loss": 1.4394, "step": 5820 }, { "epoch": 64.45916114790288, "learning_rate": 9.11567944250871e-07, "loss": 1.4776, "step": 5840 }, { "epoch": 64.67991169977925, "learning_rate": 9.112543554006967e-07, "loss": 1.5363, "step": 5860 }, { "epoch": 64.90066225165563, "learning_rate": 9.109407665505226e-07, "loss": 1.4584, "step": 5880 }, { "epoch": 65.12141280353201, "learning_rate": 9.106271777003485e-07, "loss": 1.4956, "step": 5900 }, { "epoch": 65.34216335540839, "learning_rate": 9.103135888501741e-07, "loss": 1.4672, "step": 5920 }, { "epoch": 65.56291390728477, "learning_rate": 9.1e-07, "loss": 1.4765, "step": 5940 }, { "epoch": 65.78366445916114, "learning_rate": 9.096864111498257e-07, "loss": 1.4821, "step": 5960 }, { "epoch": 66.00441501103752, "learning_rate": 9.093728222996516e-07, "loss": 1.4561, "step": 5980 }, { "epoch": 66.2251655629139, "learning_rate": 9.090592334494772e-07, "loss": 1.514, "step": 6000 }, { "epoch": 66.4459161147903, "learning_rate": 9.08745644599303e-07, "loss": 1.495, "step": 6020 }, { "epoch": 66.66666666666667, "learning_rate": 9.084320557491289e-07, "loss": 1.4413, "step": 6040 }, { "epoch": 66.88741721854305, "learning_rate": 9.081184668989546e-07, "loss": 1.4489, "step": 6060 }, { "epoch": 67.10816777041943, "learning_rate": 9.078048780487804e-07, "loss": 1.4897, "step": 6080 }, { "epoch": 67.3289183222958, "learning_rate": 9.074912891986062e-07, "loss": 1.4299, "step": 6100 }, { "epoch": 67.54966887417218, "learning_rate": 9.071777003484321e-07, "loss": 1.4706, "step": 6120 }, { "epoch": 67.77041942604856, "learning_rate": 9.068641114982577e-07, "loss": 1.5075, "step": 6140 }, { "epoch": 67.99116997792494, "learning_rate": 9.065505226480836e-07, "loss": 1.5227, "step": 6160 }, { "epoch": 68.21192052980132, "learning_rate": 9.062369337979094e-07, "loss": 1.456, "step": 6180 }, { "epoch": 68.43267108167771, "learning_rate": 9.059233449477352e-07, "loss": 1.4677, "step": 6200 }, { "epoch": 68.65342163355409, "learning_rate": 9.056097560975609e-07, "loss": 1.4993, "step": 6220 }, { "epoch": 68.87417218543047, "learning_rate": 9.052961672473867e-07, "loss": 1.4612, "step": 6240 }, { "epoch": 69.09492273730685, "learning_rate": 9.049825783972125e-07, "loss": 1.4549, "step": 6260 }, { "epoch": 69.31567328918322, "learning_rate": 9.046689895470383e-07, "loss": 1.5033, "step": 6280 }, { "epoch": 69.5364238410596, "learning_rate": 9.04355400696864e-07, "loss": 1.4558, "step": 6300 }, { "epoch": 69.75717439293598, "learning_rate": 9.040418118466899e-07, "loss": 1.4884, "step": 6320 }, { "epoch": 69.97792494481236, "learning_rate": 9.037282229965156e-07, "loss": 1.4332, "step": 6340 }, { "epoch": 70.19867549668874, "learning_rate": 9.034146341463414e-07, "loss": 1.455, "step": 6360 }, { "epoch": 70.41942604856513, "learning_rate": 9.031010452961671e-07, "loss": 1.4155, "step": 6380 }, { "epoch": 70.6401766004415, "learning_rate": 9.02787456445993e-07, "loss": 1.5361, "step": 6400 }, { "epoch": 70.86092715231788, "learning_rate": 9.024738675958189e-07, "loss": 1.4696, "step": 6420 }, { "epoch": 71.08167770419426, "learning_rate": 9.021602787456445e-07, "loss": 1.4936, "step": 6440 }, { "epoch": 71.30242825607064, "learning_rate": 9.018466898954704e-07, "loss": 1.4689, "step": 6460 }, { "epoch": 71.52317880794702, "learning_rate": 9.015331010452961e-07, "loss": 1.4862, "step": 6480 }, { "epoch": 71.7439293598234, "learning_rate": 9.012195121951219e-07, "loss": 1.463, "step": 6500 }, { "epoch": 71.96467991169978, "learning_rate": 9.009059233449477e-07, "loss": 1.4663, "step": 6520 }, { "epoch": 72.18543046357615, "learning_rate": 9.005923344947735e-07, "loss": 1.4918, "step": 6540 }, { "epoch": 72.40618101545253, "learning_rate": 9.002787456445993e-07, "loss": 1.474, "step": 6560 }, { "epoch": 72.62693156732892, "learning_rate": 8.99965156794425e-07, "loss": 1.4693, "step": 6580 }, { "epoch": 72.8476821192053, "learning_rate": 8.996515679442507e-07, "loss": 1.438, "step": 6600 }, { "epoch": 73.06843267108168, "learning_rate": 8.993379790940766e-07, "loss": 1.4405, "step": 6620 }, { "epoch": 73.28918322295806, "learning_rate": 8.990243902439025e-07, "loss": 1.4501, "step": 6640 }, { "epoch": 73.50993377483444, "learning_rate": 8.987108013937282e-07, "loss": 1.4801, "step": 6660 }, { "epoch": 73.73068432671081, "learning_rate": 8.98397212543554e-07, "loss": 1.4407, "step": 6680 }, { "epoch": 73.9514348785872, "learning_rate": 8.980836236933798e-07, "loss": 1.4393, "step": 6700 }, { "epoch": 74.17218543046357, "learning_rate": 8.977700348432056e-07, "loss": 1.3913, "step": 6720 }, { "epoch": 74.39293598233995, "learning_rate": 8.974564459930313e-07, "loss": 1.4949, "step": 6740 }, { "epoch": 74.61368653421634, "learning_rate": 8.971428571428571e-07, "loss": 1.4363, "step": 6760 }, { "epoch": 74.83443708609272, "learning_rate": 8.968292682926829e-07, "loss": 1.4655, "step": 6780 }, { "epoch": 75.0551876379691, "learning_rate": 8.965156794425087e-07, "loss": 1.4991, "step": 6800 }, { "epoch": 75.27593818984548, "learning_rate": 8.962020905923344e-07, "loss": 1.4516, "step": 6820 }, { "epoch": 75.49668874172185, "learning_rate": 8.958885017421603e-07, "loss": 1.5084, "step": 6840 }, { "epoch": 75.71743929359823, "learning_rate": 8.95574912891986e-07, "loss": 1.4542, "step": 6860 }, { "epoch": 75.93818984547461, "learning_rate": 8.952613240418118e-07, "loss": 1.475, "step": 6880 }, { "epoch": 76.15894039735099, "learning_rate": 8.949477351916375e-07, "loss": 1.4287, "step": 6900 }, { "epoch": 76.37969094922737, "learning_rate": 8.946341463414634e-07, "loss": 1.453, "step": 6920 }, { "epoch": 76.60044150110376, "learning_rate": 8.943205574912893e-07, "loss": 1.4372, "step": 6940 }, { "epoch": 76.82119205298014, "learning_rate": 8.940069686411149e-07, "loss": 1.4665, "step": 6960 }, { "epoch": 77.04194260485652, "learning_rate": 8.936933797909408e-07, "loss": 1.4841, "step": 6980 }, { "epoch": 77.2626931567329, "learning_rate": 8.933797909407665e-07, "loss": 1.4491, "step": 7000 }, { "epoch": 77.48344370860927, "learning_rate": 8.930662020905923e-07, "loss": 1.4382, "step": 7020 }, { "epoch": 77.70419426048565, "learning_rate": 8.92752613240418e-07, "loss": 1.4206, "step": 7040 }, { "epoch": 77.92494481236203, "learning_rate": 8.924390243902439e-07, "loss": 1.4521, "step": 7060 }, { "epoch": 78.1456953642384, "learning_rate": 8.921254355400697e-07, "loss": 1.4688, "step": 7080 }, { "epoch": 78.36644591611478, "learning_rate": 8.918118466898954e-07, "loss": 1.4304, "step": 7100 }, { "epoch": 78.58719646799118, "learning_rate": 8.914982578397212e-07, "loss": 1.4165, "step": 7120 }, { "epoch": 78.80794701986756, "learning_rate": 8.91184668989547e-07, "loss": 1.4728, "step": 7140 }, { "epoch": 79.02869757174393, "learning_rate": 8.908710801393728e-07, "loss": 1.4959, "step": 7160 }, { "epoch": 79.24944812362031, "learning_rate": 8.905574912891986e-07, "loss": 1.4211, "step": 7180 }, { "epoch": 79.47019867549669, "learning_rate": 8.902439024390244e-07, "loss": 1.4794, "step": 7200 }, { "epoch": 79.47019867549669, "eval_bleu": 44.4125, "eval_gen_len": 9.0667, "eval_loss": 2.0255990028381348, "eval_runtime": 3.4282, "eval_samples_per_second": 8.751, "eval_steps_per_second": 1.75, "step": 7200 }, { "epoch": 79.69094922737307, "learning_rate": 8.899303135888502e-07, "loss": 1.4294, "step": 7220 }, { "epoch": 79.91169977924945, "learning_rate": 8.896167247386759e-07, "loss": 1.4691, "step": 7240 }, { "epoch": 80.13245033112582, "learning_rate": 8.893031358885017e-07, "loss": 1.4619, "step": 7260 }, { "epoch": 80.3532008830022, "learning_rate": 8.889895470383275e-07, "loss": 1.4952, "step": 7280 }, { "epoch": 80.57395143487858, "learning_rate": 8.886759581881533e-07, "loss": 1.4284, "step": 7300 }, { "epoch": 80.79470198675497, "learning_rate": 8.88362369337979e-07, "loss": 1.3992, "step": 7320 }, { "epoch": 81.01545253863135, "learning_rate": 8.880487804878048e-07, "loss": 1.4512, "step": 7340 }, { "epoch": 81.23620309050773, "learning_rate": 8.877351916376307e-07, "loss": 1.4298, "step": 7360 }, { "epoch": 81.45695364238411, "learning_rate": 8.874216027874564e-07, "loss": 1.4442, "step": 7380 }, { "epoch": 81.67770419426049, "learning_rate": 8.871080139372822e-07, "loss": 1.4112, "step": 7400 }, { "epoch": 81.89845474613686, "learning_rate": 8.867944250871079e-07, "loss": 1.4365, "step": 7420 }, { "epoch": 82.11920529801324, "learning_rate": 8.864808362369338e-07, "loss": 1.5076, "step": 7440 }, { "epoch": 82.33995584988962, "learning_rate": 8.861672473867594e-07, "loss": 1.4004, "step": 7460 }, { "epoch": 82.560706401766, "learning_rate": 8.858536585365853e-07, "loss": 1.4641, "step": 7480 }, { "epoch": 82.78145695364239, "learning_rate": 8.855400696864112e-07, "loss": 1.4321, "step": 7500 }, { "epoch": 83.00220750551877, "learning_rate": 8.85226480836237e-07, "loss": 1.4592, "step": 7520 }, { "epoch": 83.22295805739515, "learning_rate": 8.849128919860627e-07, "loss": 1.4101, "step": 7540 }, { "epoch": 83.44370860927152, "learning_rate": 8.845993031358884e-07, "loss": 1.4455, "step": 7560 }, { "epoch": 83.6644591611479, "learning_rate": 8.842857142857143e-07, "loss": 1.4175, "step": 7580 }, { "epoch": 83.88520971302428, "learning_rate": 8.8397212543554e-07, "loss": 1.4389, "step": 7600 }, { "epoch": 84.10596026490066, "learning_rate": 8.836585365853658e-07, "loss": 1.4963, "step": 7620 }, { "epoch": 84.32671081677704, "learning_rate": 8.833449477351916e-07, "loss": 1.4683, "step": 7640 }, { "epoch": 84.54746136865342, "learning_rate": 8.830313588850174e-07, "loss": 1.4528, "step": 7660 }, { "epoch": 84.76821192052981, "learning_rate": 8.827177700348431e-07, "loss": 1.4128, "step": 7680 }, { "epoch": 84.98896247240619, "learning_rate": 8.824041811846689e-07, "loss": 1.4012, "step": 7700 }, { "epoch": 85.20971302428256, "learning_rate": 8.820905923344947e-07, "loss": 1.4448, "step": 7720 }, { "epoch": 85.43046357615894, "learning_rate": 8.817770034843205e-07, "loss": 1.3746, "step": 7740 }, { "epoch": 85.65121412803532, "learning_rate": 8.814634146341464e-07, "loss": 1.4086, "step": 7760 }, { "epoch": 85.8719646799117, "learning_rate": 8.811498257839721e-07, "loss": 1.4774, "step": 7780 }, { "epoch": 86.09271523178808, "learning_rate": 8.808362369337979e-07, "loss": 1.4534, "step": 7800 }, { "epoch": 86.31346578366445, "learning_rate": 8.805226480836237e-07, "loss": 1.3844, "step": 7820 }, { "epoch": 86.53421633554083, "learning_rate": 8.802090592334494e-07, "loss": 1.4751, "step": 7840 }, { "epoch": 86.75496688741723, "learning_rate": 8.798954703832752e-07, "loss": 1.4002, "step": 7860 }, { "epoch": 86.9757174392936, "learning_rate": 8.79581881533101e-07, "loss": 1.4659, "step": 7880 }, { "epoch": 87.19646799116998, "learning_rate": 8.792682926829268e-07, "loss": 1.4322, "step": 7900 }, { "epoch": 87.41721854304636, "learning_rate": 8.789547038327526e-07, "loss": 1.4278, "step": 7920 }, { "epoch": 87.63796909492274, "learning_rate": 8.786411149825783e-07, "loss": 1.453, "step": 7940 }, { "epoch": 87.85871964679912, "learning_rate": 8.783275261324042e-07, "loss": 1.4371, "step": 7960 }, { "epoch": 88.0794701986755, "learning_rate": 8.780139372822298e-07, "loss": 1.4775, "step": 7980 }, { "epoch": 88.30022075055187, "learning_rate": 8.777003484320557e-07, "loss": 1.4058, "step": 8000 }, { "epoch": 88.52097130242825, "learning_rate": 8.773867595818815e-07, "loss": 1.4119, "step": 8020 }, { "epoch": 88.74172185430463, "learning_rate": 8.770731707317073e-07, "loss": 1.4316, "step": 8040 }, { "epoch": 88.96247240618102, "learning_rate": 8.767595818815331e-07, "loss": 1.4198, "step": 8060 }, { "epoch": 89.1832229580574, "learning_rate": 8.764459930313588e-07, "loss": 1.4434, "step": 8080 }, { "epoch": 89.40397350993378, "learning_rate": 8.761324041811848e-07, "loss": 1.4309, "step": 8100 }, { "epoch": 89.62472406181016, "learning_rate": 8.758188153310104e-07, "loss": 1.3757, "step": 8120 }, { "epoch": 89.84547461368653, "learning_rate": 8.755052264808362e-07, "loss": 1.479, "step": 8140 }, { "epoch": 90.06622516556291, "learning_rate": 8.751916376306619e-07, "loss": 1.3926, "step": 8160 }, { "epoch": 90.28697571743929, "learning_rate": 8.748780487804878e-07, "loss": 1.4629, "step": 8180 }, { "epoch": 90.50772626931567, "learning_rate": 8.745644599303135e-07, "loss": 1.3793, "step": 8200 }, { "epoch": 90.72847682119205, "learning_rate": 8.742508710801393e-07, "loss": 1.4113, "step": 8220 }, { "epoch": 90.94922737306844, "learning_rate": 8.739372822299651e-07, "loss": 1.4187, "step": 8240 }, { "epoch": 91.16997792494482, "learning_rate": 8.73623693379791e-07, "loss": 1.4466, "step": 8260 }, { "epoch": 91.3907284768212, "learning_rate": 8.733101045296167e-07, "loss": 1.4219, "step": 8280 }, { "epoch": 91.61147902869757, "learning_rate": 8.729965156794424e-07, "loss": 1.4783, "step": 8300 }, { "epoch": 91.83222958057395, "learning_rate": 8.726829268292683e-07, "loss": 1.4111, "step": 8320 }, { "epoch": 92.05298013245033, "learning_rate": 8.723693379790941e-07, "loss": 1.427, "step": 8340 }, { "epoch": 92.27373068432671, "learning_rate": 8.720557491289198e-07, "loss": 1.414, "step": 8360 }, { "epoch": 92.49448123620309, "learning_rate": 8.717421602787456e-07, "loss": 1.4097, "step": 8380 }, { "epoch": 92.71523178807946, "learning_rate": 8.714285714285715e-07, "loss": 1.4818, "step": 8400 }, { "epoch": 92.93598233995586, "learning_rate": 8.711149825783971e-07, "loss": 1.4127, "step": 8420 }, { "epoch": 93.15673289183223, "learning_rate": 8.708013937282229e-07, "loss": 1.4264, "step": 8440 }, { "epoch": 93.37748344370861, "learning_rate": 8.704878048780487e-07, "loss": 1.4663, "step": 8460 }, { "epoch": 93.59823399558499, "learning_rate": 8.701742160278746e-07, "loss": 1.4151, "step": 8480 }, { "epoch": 93.81898454746137, "learning_rate": 8.698606271777002e-07, "loss": 1.4362, "step": 8500 }, { "epoch": 94.03973509933775, "learning_rate": 8.695470383275261e-07, "loss": 1.3755, "step": 8520 }, { "epoch": 94.26048565121413, "learning_rate": 8.69233449477352e-07, "loss": 1.3927, "step": 8540 }, { "epoch": 94.4812362030905, "learning_rate": 8.689198606271777e-07, "loss": 1.4137, "step": 8560 }, { "epoch": 94.70198675496688, "learning_rate": 8.686062717770035e-07, "loss": 1.4284, "step": 8580 }, { "epoch": 94.92273730684327, "learning_rate": 8.682926829268292e-07, "loss": 1.4287, "step": 8600 }, { "epoch": 95.14348785871965, "learning_rate": 8.679790940766551e-07, "loss": 1.4243, "step": 8620 }, { "epoch": 95.36423841059603, "learning_rate": 8.676655052264807e-07, "loss": 1.3588, "step": 8640 }, { "epoch": 95.58498896247241, "learning_rate": 8.673519163763066e-07, "loss": 1.3846, "step": 8660 }, { "epoch": 95.80573951434879, "learning_rate": 8.670383275261325e-07, "loss": 1.4513, "step": 8680 }, { "epoch": 96.02649006622516, "learning_rate": 8.667247386759582e-07, "loss": 1.4022, "step": 8700 }, { "epoch": 96.24724061810154, "learning_rate": 8.664111498257838e-07, "loss": 1.3969, "step": 8720 }, { "epoch": 96.46799116997792, "learning_rate": 8.660975609756097e-07, "loss": 1.38, "step": 8740 }, { "epoch": 96.6887417218543, "learning_rate": 8.657839721254355e-07, "loss": 1.4701, "step": 8760 }, { "epoch": 96.90949227373068, "learning_rate": 8.654703832752613e-07, "loss": 1.3637, "step": 8780 }, { "epoch": 97.13024282560707, "learning_rate": 8.65156794425087e-07, "loss": 1.3748, "step": 8800 }, { "epoch": 97.35099337748345, "learning_rate": 8.648432055749129e-07, "loss": 1.461, "step": 8820 }, { "epoch": 97.57174392935983, "learning_rate": 8.645296167247387e-07, "loss": 1.4615, "step": 8840 }, { "epoch": 97.7924944812362, "learning_rate": 8.642160278745644e-07, "loss": 1.3775, "step": 8860 }, { "epoch": 98.01324503311258, "learning_rate": 8.639024390243902e-07, "loss": 1.4125, "step": 8880 }, { "epoch": 98.23399558498896, "learning_rate": 8.63588850174216e-07, "loss": 1.3846, "step": 8900 }, { "epoch": 98.45474613686534, "learning_rate": 8.632752613240419e-07, "loss": 1.3948, "step": 8920 }, { "epoch": 98.67549668874172, "learning_rate": 8.629616724738675e-07, "loss": 1.348, "step": 8940 }, { "epoch": 98.8962472406181, "learning_rate": 8.626480836236934e-07, "loss": 1.4504, "step": 8960 }, { "epoch": 99.11699779249449, "learning_rate": 8.623344947735191e-07, "loss": 1.3899, "step": 8980 }, { "epoch": 99.33774834437087, "learning_rate": 8.62020905923345e-07, "loss": 1.4558, "step": 9000 }, { "epoch": 99.55849889624724, "learning_rate": 8.617073170731706e-07, "loss": 1.3781, "step": 9020 }, { "epoch": 99.77924944812362, "learning_rate": 8.613937282229965e-07, "loss": 1.3847, "step": 9040 }, { "epoch": 100.0, "learning_rate": 8.610801393728222e-07, "loss": 1.4043, "step": 9060 }, { "epoch": 100.22075055187638, "learning_rate": 8.60766550522648e-07, "loss": 1.4069, "step": 9080 }, { "epoch": 100.44150110375276, "learning_rate": 8.604529616724739e-07, "loss": 1.3562, "step": 9100 }, { "epoch": 100.66225165562913, "learning_rate": 8.601393728222996e-07, "loss": 1.3854, "step": 9120 }, { "epoch": 100.88300220750551, "learning_rate": 8.598257839721255e-07, "loss": 1.3842, "step": 9140 }, { "epoch": 101.1037527593819, "learning_rate": 8.595121951219512e-07, "loss": 1.3954, "step": 9160 }, { "epoch": 101.32450331125828, "learning_rate": 8.59198606271777e-07, "loss": 1.3966, "step": 9180 }, { "epoch": 101.54525386313466, "learning_rate": 8.588850174216027e-07, "loss": 1.4175, "step": 9200 }, { "epoch": 101.76600441501104, "learning_rate": 8.585714285714286e-07, "loss": 1.3944, "step": 9220 }, { "epoch": 101.98675496688742, "learning_rate": 8.582578397212543e-07, "loss": 1.343, "step": 9240 }, { "epoch": 102.2075055187638, "learning_rate": 8.579442508710801e-07, "loss": 1.3974, "step": 9260 }, { "epoch": 102.42825607064017, "learning_rate": 8.576306620209059e-07, "loss": 1.4235, "step": 9280 }, { "epoch": 102.64900662251655, "learning_rate": 8.573170731707317e-07, "loss": 1.379, "step": 9300 }, { "epoch": 102.86975717439293, "learning_rate": 8.570034843205574e-07, "loss": 1.3593, "step": 9320 }, { "epoch": 103.09050772626932, "learning_rate": 8.566898954703832e-07, "loss": 1.4164, "step": 9340 }, { "epoch": 103.3112582781457, "learning_rate": 8.563763066202091e-07, "loss": 1.3811, "step": 9360 }, { "epoch": 103.53200883002208, "learning_rate": 8.560627177700348e-07, "loss": 1.3853, "step": 9380 }, { "epoch": 103.75275938189846, "learning_rate": 8.557491289198606e-07, "loss": 1.3948, "step": 9400 }, { "epoch": 103.97350993377484, "learning_rate": 8.554355400696864e-07, "loss": 1.4206, "step": 9420 }, { "epoch": 104.19426048565121, "learning_rate": 8.551219512195122e-07, "loss": 1.3967, "step": 9440 }, { "epoch": 104.41501103752759, "learning_rate": 8.548083623693379e-07, "loss": 1.3597, "step": 9460 }, { "epoch": 104.63576158940397, "learning_rate": 8.544947735191637e-07, "loss": 1.4107, "step": 9480 }, { "epoch": 104.85651214128035, "learning_rate": 8.541811846689896e-07, "loss": 1.3626, "step": 9500 }, { "epoch": 105.07726269315673, "learning_rate": 8.538675958188153e-07, "loss": 1.4051, "step": 9520 }, { "epoch": 105.29801324503312, "learning_rate": 8.53554006968641e-07, "loss": 1.4063, "step": 9540 }, { "epoch": 105.5187637969095, "learning_rate": 8.532404181184669e-07, "loss": 1.4275, "step": 9560 }, { "epoch": 105.73951434878587, "learning_rate": 8.529268292682926e-07, "loss": 1.3462, "step": 9580 }, { "epoch": 105.96026490066225, "learning_rate": 8.526132404181184e-07, "loss": 1.3702, "step": 9600 }, { "epoch": 106.18101545253863, "learning_rate": 8.522996515679441e-07, "loss": 1.3683, "step": 9620 }, { "epoch": 106.40176600441501, "learning_rate": 8.5198606271777e-07, "loss": 1.4073, "step": 9640 }, { "epoch": 106.62251655629139, "learning_rate": 8.516724738675959e-07, "loss": 1.3627, "step": 9660 }, { "epoch": 106.84326710816777, "learning_rate": 8.513588850174215e-07, "loss": 1.4, "step": 9680 }, { "epoch": 107.06401766004414, "learning_rate": 8.510452961672474e-07, "loss": 1.3322, "step": 9700 }, { "epoch": 107.28476821192054, "learning_rate": 8.507317073170731e-07, "loss": 1.3776, "step": 9720 }, { "epoch": 107.50551876379691, "learning_rate": 8.50418118466899e-07, "loss": 1.3837, "step": 9740 }, { "epoch": 107.72626931567329, "learning_rate": 8.501045296167246e-07, "loss": 1.3633, "step": 9760 }, { "epoch": 107.94701986754967, "learning_rate": 8.497909407665505e-07, "loss": 1.4582, "step": 9780 }, { "epoch": 108.16777041942605, "learning_rate": 8.494773519163763e-07, "loss": 1.4204, "step": 9800 }, { "epoch": 108.38852097130243, "learning_rate": 8.49163763066202e-07, "loss": 1.3606, "step": 9820 }, { "epoch": 108.6092715231788, "learning_rate": 8.488501742160278e-07, "loss": 1.41, "step": 9840 }, { "epoch": 108.83002207505518, "learning_rate": 8.485365853658536e-07, "loss": 1.321, "step": 9860 }, { "epoch": 109.05077262693156, "learning_rate": 8.482229965156795e-07, "loss": 1.3471, "step": 9880 }, { "epoch": 109.27152317880795, "learning_rate": 8.479094076655051e-07, "loss": 1.3809, "step": 9900 }, { "epoch": 109.49227373068433, "learning_rate": 8.47595818815331e-07, "loss": 1.3795, "step": 9920 }, { "epoch": 109.71302428256071, "learning_rate": 8.472822299651568e-07, "loss": 1.3751, "step": 9940 }, { "epoch": 109.93377483443709, "learning_rate": 8.469686411149826e-07, "loss": 1.3513, "step": 9960 }, { "epoch": 110.15452538631347, "learning_rate": 8.466550522648083e-07, "loss": 1.3526, "step": 9980 }, { "epoch": 110.37527593818984, "learning_rate": 8.463414634146341e-07, "loss": 1.3819, "step": 10000 }, { "epoch": 110.59602649006622, "learning_rate": 8.460278745644599e-07, "loss": 1.3961, "step": 10020 }, { "epoch": 110.8167770419426, "learning_rate": 8.457142857142856e-07, "loss": 1.3934, "step": 10040 }, { "epoch": 111.03752759381898, "learning_rate": 8.454006968641114e-07, "loss": 1.3603, "step": 10060 }, { "epoch": 111.25827814569537, "learning_rate": 8.450871080139372e-07, "loss": 1.3933, "step": 10080 }, { "epoch": 111.47902869757175, "learning_rate": 8.44773519163763e-07, "loss": 1.381, "step": 10100 }, { "epoch": 111.69977924944813, "learning_rate": 8.444599303135888e-07, "loss": 1.4153, "step": 10120 }, { "epoch": 111.9205298013245, "learning_rate": 8.441463414634147e-07, "loss": 1.361, "step": 10140 }, { "epoch": 112.14128035320088, "learning_rate": 8.438327526132404e-07, "loss": 1.3341, "step": 10160 }, { "epoch": 112.36203090507726, "learning_rate": 8.435191637630662e-07, "loss": 1.3696, "step": 10180 }, { "epoch": 112.58278145695364, "learning_rate": 8.432055749128919e-07, "loss": 1.3899, "step": 10200 }, { "epoch": 112.80353200883002, "learning_rate": 8.428919860627178e-07, "loss": 1.3883, "step": 10220 }, { "epoch": 113.0242825607064, "learning_rate": 8.425783972125435e-07, "loss": 1.3401, "step": 10240 }, { "epoch": 113.24503311258277, "learning_rate": 8.422648083623693e-07, "loss": 1.3599, "step": 10260 }, { "epoch": 113.46578366445917, "learning_rate": 8.419512195121951e-07, "loss": 1.3688, "step": 10280 }, { "epoch": 113.68653421633555, "learning_rate": 8.416376306620209e-07, "loss": 1.3976, "step": 10300 }, { "epoch": 113.90728476821192, "learning_rate": 8.413240418118465e-07, "loss": 1.3909, "step": 10320 }, { "epoch": 114.1280353200883, "learning_rate": 8.410104529616724e-07, "loss": 1.4132, "step": 10340 }, { "epoch": 114.34878587196468, "learning_rate": 8.406968641114982e-07, "loss": 1.3741, "step": 10360 }, { "epoch": 114.56953642384106, "learning_rate": 8.40383275261324e-07, "loss": 1.3339, "step": 10380 }, { "epoch": 114.79028697571744, "learning_rate": 8.400696864111498e-07, "loss": 1.3869, "step": 10400 }, { "epoch": 115.01103752759381, "learning_rate": 8.397560975609756e-07, "loss": 1.3644, "step": 10420 }, { "epoch": 115.23178807947019, "learning_rate": 8.394425087108014e-07, "loss": 1.3584, "step": 10440 }, { "epoch": 115.45253863134658, "learning_rate": 8.391289198606271e-07, "loss": 1.3698, "step": 10460 }, { "epoch": 115.67328918322296, "learning_rate": 8.388153310104529e-07, "loss": 1.39, "step": 10480 }, { "epoch": 115.89403973509934, "learning_rate": 8.385017421602787e-07, "loss": 1.3651, "step": 10500 }, { "epoch": 116.11479028697572, "learning_rate": 8.381881533101045e-07, "loss": 1.339, "step": 10520 }, { "epoch": 116.3355408388521, "learning_rate": 8.378745644599303e-07, "loss": 1.3469, "step": 10540 }, { "epoch": 116.55629139072848, "learning_rate": 8.375609756097561e-07, "loss": 1.4008, "step": 10560 }, { "epoch": 116.77704194260485, "learning_rate": 8.372473867595818e-07, "loss": 1.3757, "step": 10580 }, { "epoch": 116.99779249448123, "learning_rate": 8.369337979094076e-07, "loss": 1.3792, "step": 10600 }, { "epoch": 117.21854304635761, "learning_rate": 8.366202090592334e-07, "loss": 1.3422, "step": 10620 }, { "epoch": 117.439293598234, "learning_rate": 8.363066202090592e-07, "loss": 1.3876, "step": 10640 }, { "epoch": 117.66004415011038, "learning_rate": 8.359930313588849e-07, "loss": 1.3519, "step": 10660 }, { "epoch": 117.88079470198676, "learning_rate": 8.356794425087108e-07, "loss": 1.3268, "step": 10680 }, { "epoch": 118.10154525386314, "learning_rate": 8.353658536585366e-07, "loss": 1.4245, "step": 10700 }, { "epoch": 118.32229580573951, "learning_rate": 8.350522648083623e-07, "loss": 1.3755, "step": 10720 }, { "epoch": 118.54304635761589, "learning_rate": 8.347386759581881e-07, "loss": 1.3318, "step": 10740 }, { "epoch": 118.76379690949227, "learning_rate": 8.344250871080139e-07, "loss": 1.3395, "step": 10760 }, { "epoch": 118.98454746136865, "learning_rate": 8.341114982578397e-07, "loss": 1.407, "step": 10780 }, { "epoch": 119.20529801324503, "learning_rate": 8.337979094076654e-07, "loss": 1.3621, "step": 10800 }, { "epoch": 119.20529801324503, "eval_bleu": 49.121, "eval_gen_len": 8.8, "eval_loss": 2.0037317276000977, "eval_runtime": 3.2416, "eval_samples_per_second": 9.255, "eval_steps_per_second": 1.851, "step": 10800 } ], "logging_steps": 20, "max_steps": 57600, "num_input_tokens_seen": 0, "num_train_epochs": 640, "save_steps": 3600, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 3.194864182891315e+16, "train_batch_size": 5, "trial_name": null, "trial_params": null }