abdiharyadi's picture
Training in progress, step 10800, checkpoint
faf95da verified
raw
history blame
74 kB
{
"best_metric": 49.121,
"best_model_checkpoint": "/kaggle/working/amr-tst-indo/AMRBART-id/fine-tune/../outputs/mbart-en-id-smaller-fted-amr-generation-v2-fted/checkpoint-10800",
"epoch": 119.20529801324503,
"eval_steps": 3600,
"global_step": 10800,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.011037527593818985,
"learning_rate": 5e-09,
"loss": 2.1638,
"step": 1
},
{
"epoch": 0.22075055187637968,
"learning_rate": 1e-07,
"loss": 1.9727,
"step": 20
},
{
"epoch": 0.44150110375275936,
"learning_rate": 2e-07,
"loss": 2.0028,
"step": 40
},
{
"epoch": 0.6622516556291391,
"learning_rate": 3e-07,
"loss": 2.0448,
"step": 60
},
{
"epoch": 0.8830022075055187,
"learning_rate": 4e-07,
"loss": 1.9696,
"step": 80
},
{
"epoch": 1.1037527593818985,
"learning_rate": 5e-07,
"loss": 1.9375,
"step": 100
},
{
"epoch": 1.3245033112582782,
"learning_rate": 6e-07,
"loss": 1.9075,
"step": 120
},
{
"epoch": 1.5452538631346577,
"learning_rate": 7e-07,
"loss": 1.9866,
"step": 140
},
{
"epoch": 1.7660044150110377,
"learning_rate": 8e-07,
"loss": 1.882,
"step": 160
},
{
"epoch": 1.9867549668874172,
"learning_rate": 9e-07,
"loss": 1.9916,
"step": 180
},
{
"epoch": 2.207505518763797,
"learning_rate": 1e-06,
"loss": 1.9132,
"step": 200
},
{
"epoch": 2.4282560706401766,
"learning_rate": 9.996864111498258e-07,
"loss": 1.9858,
"step": 220
},
{
"epoch": 2.6490066225165565,
"learning_rate": 9.993728222996515e-07,
"loss": 1.9373,
"step": 240
},
{
"epoch": 2.869757174392936,
"learning_rate": 9.990592334494773e-07,
"loss": 1.9146,
"step": 260
},
{
"epoch": 3.0905077262693155,
"learning_rate": 9.987456445993032e-07,
"loss": 1.8665,
"step": 280
},
{
"epoch": 3.3112582781456954,
"learning_rate": 9.98432055749129e-07,
"loss": 1.9419,
"step": 300
},
{
"epoch": 3.5320088300220753,
"learning_rate": 9.981184668989545e-07,
"loss": 1.8332,
"step": 320
},
{
"epoch": 3.752759381898455,
"learning_rate": 9.978048780487803e-07,
"loss": 1.8534,
"step": 340
},
{
"epoch": 3.9735099337748343,
"learning_rate": 9.974912891986062e-07,
"loss": 1.9421,
"step": 360
},
{
"epoch": 4.194260485651214,
"learning_rate": 9.97177700348432e-07,
"loss": 1.9374,
"step": 380
},
{
"epoch": 4.415011037527594,
"learning_rate": 9.96864111498258e-07,
"loss": 1.8876,
"step": 400
},
{
"epoch": 4.635761589403973,
"learning_rate": 9.965505226480835e-07,
"loss": 1.8593,
"step": 420
},
{
"epoch": 4.856512141280353,
"learning_rate": 9.962369337979094e-07,
"loss": 1.8243,
"step": 440
},
{
"epoch": 5.077262693156733,
"learning_rate": 9.95923344947735e-07,
"loss": 1.8791,
"step": 460
},
{
"epoch": 5.298013245033113,
"learning_rate": 9.95609756097561e-07,
"loss": 1.8878,
"step": 480
},
{
"epoch": 5.518763796909492,
"learning_rate": 9.952961672473868e-07,
"loss": 1.8985,
"step": 500
},
{
"epoch": 5.739514348785872,
"learning_rate": 9.949825783972126e-07,
"loss": 1.84,
"step": 520
},
{
"epoch": 5.960264900662252,
"learning_rate": 9.946689895470383e-07,
"loss": 1.8468,
"step": 540
},
{
"epoch": 6.181015452538631,
"learning_rate": 9.94355400696864e-07,
"loss": 1.8469,
"step": 560
},
{
"epoch": 6.401766004415011,
"learning_rate": 9.940418118466898e-07,
"loss": 1.8528,
"step": 580
},
{
"epoch": 6.622516556291391,
"learning_rate": 9.937282229965156e-07,
"loss": 1.8269,
"step": 600
},
{
"epoch": 6.843267108167771,
"learning_rate": 9.934146341463415e-07,
"loss": 1.8324,
"step": 620
},
{
"epoch": 7.06401766004415,
"learning_rate": 9.931010452961673e-07,
"loss": 1.808,
"step": 640
},
{
"epoch": 7.28476821192053,
"learning_rate": 9.92787456445993e-07,
"loss": 1.9003,
"step": 660
},
{
"epoch": 7.50551876379691,
"learning_rate": 9.924738675958186e-07,
"loss": 1.908,
"step": 680
},
{
"epoch": 7.72626931567329,
"learning_rate": 9.921602787456445e-07,
"loss": 1.8165,
"step": 700
},
{
"epoch": 7.947019867549669,
"learning_rate": 9.918466898954704e-07,
"loss": 1.7924,
"step": 720
},
{
"epoch": 8.167770419426049,
"learning_rate": 9.915331010452962e-07,
"loss": 1.8275,
"step": 740
},
{
"epoch": 8.388520971302428,
"learning_rate": 9.912195121951219e-07,
"loss": 1.8033,
"step": 760
},
{
"epoch": 8.609271523178808,
"learning_rate": 9.909059233449477e-07,
"loss": 1.8283,
"step": 780
},
{
"epoch": 8.830022075055188,
"learning_rate": 9.905923344947734e-07,
"loss": 1.8361,
"step": 800
},
{
"epoch": 9.050772626931566,
"learning_rate": 9.902787456445992e-07,
"loss": 1.8264,
"step": 820
},
{
"epoch": 9.271523178807946,
"learning_rate": 9.89965156794425e-07,
"loss": 1.8157,
"step": 840
},
{
"epoch": 9.492273730684326,
"learning_rate": 9.89651567944251e-07,
"loss": 1.8038,
"step": 860
},
{
"epoch": 9.713024282560706,
"learning_rate": 9.893379790940768e-07,
"loss": 1.8495,
"step": 880
},
{
"epoch": 9.933774834437086,
"learning_rate": 9.890243902439024e-07,
"loss": 1.8379,
"step": 900
},
{
"epoch": 10.154525386313466,
"learning_rate": 9.88710801393728e-07,
"loss": 1.807,
"step": 920
},
{
"epoch": 10.375275938189846,
"learning_rate": 9.88397212543554e-07,
"loss": 1.7354,
"step": 940
},
{
"epoch": 10.596026490066226,
"learning_rate": 9.880836236933798e-07,
"loss": 1.7909,
"step": 960
},
{
"epoch": 10.816777041942604,
"learning_rate": 9.877700348432054e-07,
"loss": 1.8504,
"step": 980
},
{
"epoch": 11.037527593818984,
"learning_rate": 9.874564459930313e-07,
"loss": 1.7565,
"step": 1000
},
{
"epoch": 11.258278145695364,
"learning_rate": 9.871428571428572e-07,
"loss": 1.8464,
"step": 1020
},
{
"epoch": 11.479028697571744,
"learning_rate": 9.868292682926828e-07,
"loss": 1.7463,
"step": 1040
},
{
"epoch": 11.699779249448124,
"learning_rate": 9.865156794425087e-07,
"loss": 1.8286,
"step": 1060
},
{
"epoch": 11.920529801324504,
"learning_rate": 9.862020905923345e-07,
"loss": 1.7821,
"step": 1080
},
{
"epoch": 12.141280353200884,
"learning_rate": 9.858885017421604e-07,
"loss": 1.8069,
"step": 1100
},
{
"epoch": 12.362030905077262,
"learning_rate": 9.85574912891986e-07,
"loss": 1.7943,
"step": 1120
},
{
"epoch": 12.582781456953642,
"learning_rate": 9.852613240418117e-07,
"loss": 1.7535,
"step": 1140
},
{
"epoch": 12.803532008830022,
"learning_rate": 9.849477351916375e-07,
"loss": 1.7881,
"step": 1160
},
{
"epoch": 13.024282560706402,
"learning_rate": 9.846341463414634e-07,
"loss": 1.815,
"step": 1180
},
{
"epoch": 13.245033112582782,
"learning_rate": 9.843205574912892e-07,
"loss": 1.8258,
"step": 1200
},
{
"epoch": 13.465783664459162,
"learning_rate": 9.840069686411149e-07,
"loss": 1.7228,
"step": 1220
},
{
"epoch": 13.686534216335541,
"learning_rate": 9.836933797909407e-07,
"loss": 1.7866,
"step": 1240
},
{
"epoch": 13.90728476821192,
"learning_rate": 9.833797909407664e-07,
"loss": 1.7871,
"step": 1260
},
{
"epoch": 14.1280353200883,
"learning_rate": 9.830662020905923e-07,
"loss": 1.7238,
"step": 1280
},
{
"epoch": 14.34878587196468,
"learning_rate": 9.827526132404181e-07,
"loss": 1.805,
"step": 1300
},
{
"epoch": 14.56953642384106,
"learning_rate": 9.82439024390244e-07,
"loss": 1.7909,
"step": 1320
},
{
"epoch": 14.79028697571744,
"learning_rate": 9.821254355400698e-07,
"loss": 1.7861,
"step": 1340
},
{
"epoch": 15.01103752759382,
"learning_rate": 9.818118466898953e-07,
"loss": 1.7651,
"step": 1360
},
{
"epoch": 15.2317880794702,
"learning_rate": 9.814982578397211e-07,
"loss": 1.7666,
"step": 1380
},
{
"epoch": 15.45253863134658,
"learning_rate": 9.81184668989547e-07,
"loss": 1.7159,
"step": 1400
},
{
"epoch": 15.673289183222957,
"learning_rate": 9.808710801393728e-07,
"loss": 1.7701,
"step": 1420
},
{
"epoch": 15.894039735099337,
"learning_rate": 9.805574912891987e-07,
"loss": 1.7977,
"step": 1440
},
{
"epoch": 16.11479028697572,
"learning_rate": 9.802439024390243e-07,
"loss": 1.7805,
"step": 1460
},
{
"epoch": 16.335540838852097,
"learning_rate": 9.799303135888502e-07,
"loss": 1.7711,
"step": 1480
},
{
"epoch": 16.556291390728475,
"learning_rate": 9.796167247386758e-07,
"loss": 1.6767,
"step": 1500
},
{
"epoch": 16.777041942604857,
"learning_rate": 9.793031358885017e-07,
"loss": 1.722,
"step": 1520
},
{
"epoch": 16.997792494481235,
"learning_rate": 9.789895470383276e-07,
"loss": 1.783,
"step": 1540
},
{
"epoch": 17.218543046357617,
"learning_rate": 9.786759581881534e-07,
"loss": 1.7233,
"step": 1560
},
{
"epoch": 17.439293598233995,
"learning_rate": 9.78362369337979e-07,
"loss": 1.7209,
"step": 1580
},
{
"epoch": 17.660044150110377,
"learning_rate": 9.780487804878047e-07,
"loss": 1.6968,
"step": 1600
},
{
"epoch": 17.880794701986755,
"learning_rate": 9.777351916376306e-07,
"loss": 1.6818,
"step": 1620
},
{
"epoch": 18.101545253863133,
"learning_rate": 9.774216027874564e-07,
"loss": 1.7819,
"step": 1640
},
{
"epoch": 18.322295805739515,
"learning_rate": 9.771080139372823e-07,
"loss": 1.7445,
"step": 1660
},
{
"epoch": 18.543046357615893,
"learning_rate": 9.76794425087108e-07,
"loss": 1.6888,
"step": 1680
},
{
"epoch": 18.763796909492275,
"learning_rate": 9.764808362369338e-07,
"loss": 1.7562,
"step": 1700
},
{
"epoch": 18.984547461368653,
"learning_rate": 9.761672473867594e-07,
"loss": 1.7368,
"step": 1720
},
{
"epoch": 19.205298013245034,
"learning_rate": 9.758536585365853e-07,
"loss": 1.6816,
"step": 1740
},
{
"epoch": 19.426048565121413,
"learning_rate": 9.755400696864111e-07,
"loss": 1.7307,
"step": 1760
},
{
"epoch": 19.64679911699779,
"learning_rate": 9.75226480836237e-07,
"loss": 1.7313,
"step": 1780
},
{
"epoch": 19.867549668874172,
"learning_rate": 9.749128919860627e-07,
"loss": 1.7407,
"step": 1800
},
{
"epoch": 20.08830022075055,
"learning_rate": 9.745993031358883e-07,
"loss": 1.6885,
"step": 1820
},
{
"epoch": 20.309050772626932,
"learning_rate": 9.742857142857142e-07,
"loss": 1.6741,
"step": 1840
},
{
"epoch": 20.52980132450331,
"learning_rate": 9.7397212543554e-07,
"loss": 1.6958,
"step": 1860
},
{
"epoch": 20.750551876379692,
"learning_rate": 9.736585365853659e-07,
"loss": 1.7036,
"step": 1880
},
{
"epoch": 20.97130242825607,
"learning_rate": 9.733449477351917e-07,
"loss": 1.7489,
"step": 1900
},
{
"epoch": 21.192052980132452,
"learning_rate": 9.730313588850174e-07,
"loss": 1.7719,
"step": 1920
},
{
"epoch": 21.41280353200883,
"learning_rate": 9.72717770034843e-07,
"loss": 1.7401,
"step": 1940
},
{
"epoch": 21.63355408388521,
"learning_rate": 9.724041811846689e-07,
"loss": 1.7247,
"step": 1960
},
{
"epoch": 21.85430463576159,
"learning_rate": 9.720905923344947e-07,
"loss": 1.7175,
"step": 1980
},
{
"epoch": 22.075055187637968,
"learning_rate": 9.717770034843206e-07,
"loss": 1.7288,
"step": 2000
},
{
"epoch": 22.29580573951435,
"learning_rate": 9.714634146341462e-07,
"loss": 1.7068,
"step": 2020
},
{
"epoch": 22.516556291390728,
"learning_rate": 9.71149825783972e-07,
"loss": 1.6812,
"step": 2040
},
{
"epoch": 22.73730684326711,
"learning_rate": 9.708362369337977e-07,
"loss": 1.6772,
"step": 2060
},
{
"epoch": 22.958057395143488,
"learning_rate": 9.705226480836236e-07,
"loss": 1.7103,
"step": 2080
},
{
"epoch": 23.178807947019866,
"learning_rate": 9.702090592334495e-07,
"loss": 1.6984,
"step": 2100
},
{
"epoch": 23.399558498896248,
"learning_rate": 9.698954703832753e-07,
"loss": 1.7281,
"step": 2120
},
{
"epoch": 23.620309050772626,
"learning_rate": 9.695818815331012e-07,
"loss": 1.7123,
"step": 2140
},
{
"epoch": 23.841059602649008,
"learning_rate": 9.692682926829266e-07,
"loss": 1.7174,
"step": 2160
},
{
"epoch": 24.061810154525386,
"learning_rate": 9.689547038327525e-07,
"loss": 1.7281,
"step": 2180
},
{
"epoch": 24.282560706401767,
"learning_rate": 9.686411149825783e-07,
"loss": 1.7189,
"step": 2200
},
{
"epoch": 24.503311258278146,
"learning_rate": 9.683275261324042e-07,
"loss": 1.6272,
"step": 2220
},
{
"epoch": 24.724061810154524,
"learning_rate": 9.6801393728223e-07,
"loss": 1.6588,
"step": 2240
},
{
"epoch": 24.944812362030905,
"learning_rate": 9.677003484320557e-07,
"loss": 1.6627,
"step": 2260
},
{
"epoch": 25.165562913907284,
"learning_rate": 9.673867595818815e-07,
"loss": 1.6504,
"step": 2280
},
{
"epoch": 25.386313465783665,
"learning_rate": 9.670731707317072e-07,
"loss": 1.6847,
"step": 2300
},
{
"epoch": 25.607064017660043,
"learning_rate": 9.66759581881533e-07,
"loss": 1.6793,
"step": 2320
},
{
"epoch": 25.827814569536425,
"learning_rate": 9.66445993031359e-07,
"loss": 1.7009,
"step": 2340
},
{
"epoch": 26.048565121412803,
"learning_rate": 9.661324041811848e-07,
"loss": 1.6581,
"step": 2360
},
{
"epoch": 26.26931567328918,
"learning_rate": 9.658188153310104e-07,
"loss": 1.6648,
"step": 2380
},
{
"epoch": 26.490066225165563,
"learning_rate": 9.65505226480836e-07,
"loss": 1.6303,
"step": 2400
},
{
"epoch": 26.71081677704194,
"learning_rate": 9.65191637630662e-07,
"loss": 1.7222,
"step": 2420
},
{
"epoch": 26.931567328918323,
"learning_rate": 9.648780487804878e-07,
"loss": 1.7164,
"step": 2440
},
{
"epoch": 27.1523178807947,
"learning_rate": 9.645644599303136e-07,
"loss": 1.6894,
"step": 2460
},
{
"epoch": 27.373068432671083,
"learning_rate": 9.642508710801393e-07,
"loss": 1.6904,
"step": 2480
},
{
"epoch": 27.59381898454746,
"learning_rate": 9.639372822299651e-07,
"loss": 1.6133,
"step": 2500
},
{
"epoch": 27.814569536423843,
"learning_rate": 9.636236933797908e-07,
"loss": 1.6445,
"step": 2520
},
{
"epoch": 28.03532008830022,
"learning_rate": 9.633101045296166e-07,
"loss": 1.6421,
"step": 2540
},
{
"epoch": 28.2560706401766,
"learning_rate": 9.629965156794425e-07,
"loss": 1.672,
"step": 2560
},
{
"epoch": 28.47682119205298,
"learning_rate": 9.626829268292684e-07,
"loss": 1.6592,
"step": 2580
},
{
"epoch": 28.69757174392936,
"learning_rate": 9.62369337979094e-07,
"loss": 1.649,
"step": 2600
},
{
"epoch": 28.91832229580574,
"learning_rate": 9.620557491289199e-07,
"loss": 1.6667,
"step": 2620
},
{
"epoch": 29.13907284768212,
"learning_rate": 9.617421602787455e-07,
"loss": 1.6172,
"step": 2640
},
{
"epoch": 29.3598233995585,
"learning_rate": 9.614285714285714e-07,
"loss": 1.6696,
"step": 2660
},
{
"epoch": 29.58057395143488,
"learning_rate": 9.611149825783972e-07,
"loss": 1.7091,
"step": 2680
},
{
"epoch": 29.801324503311257,
"learning_rate": 9.60801393728223e-07,
"loss": 1.6111,
"step": 2700
},
{
"epoch": 30.02207505518764,
"learning_rate": 9.604878048780487e-07,
"loss": 1.6274,
"step": 2720
},
{
"epoch": 30.242825607064017,
"learning_rate": 9.601742160278746e-07,
"loss": 1.6186,
"step": 2740
},
{
"epoch": 30.4635761589404,
"learning_rate": 9.598606271777002e-07,
"loss": 1.6336,
"step": 2760
},
{
"epoch": 30.684326710816777,
"learning_rate": 9.59547038327526e-07,
"loss": 1.6414,
"step": 2780
},
{
"epoch": 30.90507726269316,
"learning_rate": 9.59233449477352e-07,
"loss": 1.6628,
"step": 2800
},
{
"epoch": 31.125827814569536,
"learning_rate": 9.589198606271776e-07,
"loss": 1.6092,
"step": 2820
},
{
"epoch": 31.346578366445915,
"learning_rate": 9.586062717770034e-07,
"loss": 1.669,
"step": 2840
},
{
"epoch": 31.567328918322296,
"learning_rate": 9.58292682926829e-07,
"loss": 1.6597,
"step": 2860
},
{
"epoch": 31.788079470198674,
"learning_rate": 9.57979094076655e-07,
"loss": 1.5658,
"step": 2880
},
{
"epoch": 32.00883002207505,
"learning_rate": 9.576655052264808e-07,
"loss": 1.6662,
"step": 2900
},
{
"epoch": 32.22958057395144,
"learning_rate": 9.573519163763067e-07,
"loss": 1.6241,
"step": 2920
},
{
"epoch": 32.450331125827816,
"learning_rate": 9.570383275261325e-07,
"loss": 1.6181,
"step": 2940
},
{
"epoch": 32.671081677704194,
"learning_rate": 9.567247386759582e-07,
"loss": 1.6561,
"step": 2960
},
{
"epoch": 32.89183222958057,
"learning_rate": 9.564111498257838e-07,
"loss": 1.6176,
"step": 2980
},
{
"epoch": 33.11258278145695,
"learning_rate": 9.560975609756097e-07,
"loss": 1.686,
"step": 3000
},
{
"epoch": 33.333333333333336,
"learning_rate": 9.557839721254355e-07,
"loss": 1.5938,
"step": 3020
},
{
"epoch": 33.554083885209714,
"learning_rate": 9.554703832752614e-07,
"loss": 1.5952,
"step": 3040
},
{
"epoch": 33.77483443708609,
"learning_rate": 9.55156794425087e-07,
"loss": 1.5772,
"step": 3060
},
{
"epoch": 33.99558498896247,
"learning_rate": 9.548432055749129e-07,
"loss": 1.5889,
"step": 3080
},
{
"epoch": 34.216335540838855,
"learning_rate": 9.545296167247385e-07,
"loss": 1.5953,
"step": 3100
},
{
"epoch": 34.437086092715234,
"learning_rate": 9.542160278745644e-07,
"loss": 1.6379,
"step": 3120
},
{
"epoch": 34.65783664459161,
"learning_rate": 9.539024390243903e-07,
"loss": 1.6252,
"step": 3140
},
{
"epoch": 34.87858719646799,
"learning_rate": 9.53588850174216e-07,
"loss": 1.628,
"step": 3160
},
{
"epoch": 35.09933774834437,
"learning_rate": 9.532752613240419e-07,
"loss": 1.6547,
"step": 3180
},
{
"epoch": 35.32008830022075,
"learning_rate": 9.529616724738675e-07,
"loss": 1.6308,
"step": 3200
},
{
"epoch": 35.54083885209713,
"learning_rate": 9.526480836236935e-07,
"loss": 1.599,
"step": 3220
},
{
"epoch": 35.76158940397351,
"learning_rate": 9.523344947735191e-07,
"loss": 1.6198,
"step": 3240
},
{
"epoch": 35.98233995584989,
"learning_rate": 9.520209059233449e-07,
"loss": 1.6248,
"step": 3260
},
{
"epoch": 36.203090507726266,
"learning_rate": 9.517073170731706e-07,
"loss": 1.576,
"step": 3280
},
{
"epoch": 36.42384105960265,
"learning_rate": 9.513937282229965e-07,
"loss": 1.6648,
"step": 3300
},
{
"epoch": 36.64459161147903,
"learning_rate": 9.510801393728223e-07,
"loss": 1.6035,
"step": 3320
},
{
"epoch": 36.86534216335541,
"learning_rate": 9.50766550522648e-07,
"loss": 1.6051,
"step": 3340
},
{
"epoch": 37.086092715231786,
"learning_rate": 9.504529616724738e-07,
"loss": 1.6052,
"step": 3360
},
{
"epoch": 37.30684326710817,
"learning_rate": 9.501393728222996e-07,
"loss": 1.6081,
"step": 3380
},
{
"epoch": 37.52759381898455,
"learning_rate": 9.498257839721255e-07,
"loss": 1.6066,
"step": 3400
},
{
"epoch": 37.74834437086093,
"learning_rate": 9.495121951219511e-07,
"loss": 1.5763,
"step": 3420
},
{
"epoch": 37.969094922737305,
"learning_rate": 9.49198606271777e-07,
"loss": 1.6039,
"step": 3440
},
{
"epoch": 38.18984547461368,
"learning_rate": 9.488850174216028e-07,
"loss": 1.6203,
"step": 3460
},
{
"epoch": 38.41059602649007,
"learning_rate": 9.485714285714285e-07,
"loss": 1.619,
"step": 3480
},
{
"epoch": 38.63134657836645,
"learning_rate": 9.482578397212543e-07,
"loss": 1.6055,
"step": 3500
},
{
"epoch": 38.852097130242825,
"learning_rate": 9.479442508710801e-07,
"loss": 1.6037,
"step": 3520
},
{
"epoch": 39.0728476821192,
"learning_rate": 9.476306620209059e-07,
"loss": 1.6259,
"step": 3540
},
{
"epoch": 39.29359823399559,
"learning_rate": 9.473170731707316e-07,
"loss": 1.5917,
"step": 3560
},
{
"epoch": 39.51434878587197,
"learning_rate": 9.470034843205574e-07,
"loss": 1.6039,
"step": 3580
},
{
"epoch": 39.735099337748345,
"learning_rate": 9.466898954703833e-07,
"loss": 1.5364,
"step": 3600
},
{
"epoch": 39.735099337748345,
"eval_bleu": 41.1359,
"eval_gen_len": 9.5667,
"eval_loss": 2.0473527908325195,
"eval_runtime": 3.9497,
"eval_samples_per_second": 7.596,
"eval_steps_per_second": 1.519,
"step": 3600
},
{
"epoch": 39.95584988962472,
"learning_rate": 9.46376306620209e-07,
"loss": 1.6224,
"step": 3620
},
{
"epoch": 40.1766004415011,
"learning_rate": 9.460627177700348e-07,
"loss": 1.5808,
"step": 3640
},
{
"epoch": 40.397350993377486,
"learning_rate": 9.457491289198605e-07,
"loss": 1.6187,
"step": 3660
},
{
"epoch": 40.618101545253865,
"learning_rate": 9.454355400696864e-07,
"loss": 1.6131,
"step": 3680
},
{
"epoch": 40.83885209713024,
"learning_rate": 9.451219512195122e-07,
"loss": 1.603,
"step": 3700
},
{
"epoch": 41.05960264900662,
"learning_rate": 9.448083623693379e-07,
"loss": 1.6253,
"step": 3720
},
{
"epoch": 41.280353200883,
"learning_rate": 9.444947735191638e-07,
"loss": 1.6125,
"step": 3740
},
{
"epoch": 41.501103752759384,
"learning_rate": 9.441811846689895e-07,
"loss": 1.5777,
"step": 3760
},
{
"epoch": 41.72185430463576,
"learning_rate": 9.438675958188153e-07,
"loss": 1.5642,
"step": 3780
},
{
"epoch": 41.94260485651214,
"learning_rate": 9.43554006968641e-07,
"loss": 1.5773,
"step": 3800
},
{
"epoch": 42.16335540838852,
"learning_rate": 9.432404181184669e-07,
"loss": 1.5218,
"step": 3820
},
{
"epoch": 42.384105960264904,
"learning_rate": 9.429268292682926e-07,
"loss": 1.5751,
"step": 3840
},
{
"epoch": 42.60485651214128,
"learning_rate": 9.426132404181184e-07,
"loss": 1.5686,
"step": 3860
},
{
"epoch": 42.82560706401766,
"learning_rate": 9.422996515679442e-07,
"loss": 1.5917,
"step": 3880
},
{
"epoch": 43.04635761589404,
"learning_rate": 9.4198606271777e-07,
"loss": 1.5427,
"step": 3900
},
{
"epoch": 43.26710816777042,
"learning_rate": 9.416724738675958e-07,
"loss": 1.6199,
"step": 3920
},
{
"epoch": 43.4878587196468,
"learning_rate": 9.413588850174215e-07,
"loss": 1.5946,
"step": 3940
},
{
"epoch": 43.70860927152318,
"learning_rate": 9.410452961672474e-07,
"loss": 1.6212,
"step": 3960
},
{
"epoch": 43.92935982339956,
"learning_rate": 9.407317073170731e-07,
"loss": 1.5569,
"step": 3980
},
{
"epoch": 44.150110375275936,
"learning_rate": 9.404181184668989e-07,
"loss": 1.521,
"step": 4000
},
{
"epoch": 44.370860927152314,
"learning_rate": 9.401045296167247e-07,
"loss": 1.56,
"step": 4020
},
{
"epoch": 44.5916114790287,
"learning_rate": 9.397909407665504e-07,
"loss": 1.6059,
"step": 4040
},
{
"epoch": 44.81236203090508,
"learning_rate": 9.394773519163763e-07,
"loss": 1.5684,
"step": 4060
},
{
"epoch": 45.033112582781456,
"learning_rate": 9.39163763066202e-07,
"loss": 1.539,
"step": 4080
},
{
"epoch": 45.253863134657834,
"learning_rate": 9.388501742160278e-07,
"loss": 1.5336,
"step": 4100
},
{
"epoch": 45.47461368653422,
"learning_rate": 9.385365853658536e-07,
"loss": 1.5521,
"step": 4120
},
{
"epoch": 45.6953642384106,
"learning_rate": 9.382229965156794e-07,
"loss": 1.5281,
"step": 4140
},
{
"epoch": 45.916114790286976,
"learning_rate": 9.379094076655052e-07,
"loss": 1.6375,
"step": 4160
},
{
"epoch": 46.136865342163354,
"learning_rate": 9.375958188153309e-07,
"loss": 1.5615,
"step": 4180
},
{
"epoch": 46.35761589403973,
"learning_rate": 9.372822299651568e-07,
"loss": 1.5181,
"step": 4200
},
{
"epoch": 46.57836644591612,
"learning_rate": 9.369686411149824e-07,
"loss": 1.6009,
"step": 4220
},
{
"epoch": 46.799116997792495,
"learning_rate": 9.366550522648083e-07,
"loss": 1.5895,
"step": 4240
},
{
"epoch": 47.019867549668874,
"learning_rate": 9.363414634146342e-07,
"loss": 1.5159,
"step": 4260
},
{
"epoch": 47.24061810154525,
"learning_rate": 9.360278745644599e-07,
"loss": 1.5179,
"step": 4280
},
{
"epoch": 47.46136865342164,
"learning_rate": 9.357142857142857e-07,
"loss": 1.5585,
"step": 4300
},
{
"epoch": 47.682119205298015,
"learning_rate": 9.354006968641114e-07,
"loss": 1.6147,
"step": 4320
},
{
"epoch": 47.90286975717439,
"learning_rate": 9.350871080139373e-07,
"loss": 1.5307,
"step": 4340
},
{
"epoch": 48.12362030905077,
"learning_rate": 9.34773519163763e-07,
"loss": 1.5673,
"step": 4360
},
{
"epoch": 48.34437086092715,
"learning_rate": 9.344599303135888e-07,
"loss": 1.5442,
"step": 4380
},
{
"epoch": 48.565121412803535,
"learning_rate": 9.341463414634146e-07,
"loss": 1.5177,
"step": 4400
},
{
"epoch": 48.78587196467991,
"learning_rate": 9.338327526132404e-07,
"loss": 1.5909,
"step": 4420
},
{
"epoch": 49.00662251655629,
"learning_rate": 9.335191637630661e-07,
"loss": 1.5565,
"step": 4440
},
{
"epoch": 49.22737306843267,
"learning_rate": 9.332055749128919e-07,
"loss": 1.4945,
"step": 4460
},
{
"epoch": 49.44812362030905,
"learning_rate": 9.328919860627177e-07,
"loss": 1.5253,
"step": 4480
},
{
"epoch": 49.66887417218543,
"learning_rate": 9.325783972125436e-07,
"loss": 1.563,
"step": 4500
},
{
"epoch": 49.88962472406181,
"learning_rate": 9.322648083623693e-07,
"loss": 1.6016,
"step": 4520
},
{
"epoch": 50.11037527593819,
"learning_rate": 9.319512195121951e-07,
"loss": 1.6032,
"step": 4540
},
{
"epoch": 50.33112582781457,
"learning_rate": 9.316376306620209e-07,
"loss": 1.5038,
"step": 4560
},
{
"epoch": 50.55187637969095,
"learning_rate": 9.313240418118467e-07,
"loss": 1.545,
"step": 4580
},
{
"epoch": 50.77262693156733,
"learning_rate": 9.310104529616724e-07,
"loss": 1.5676,
"step": 4600
},
{
"epoch": 50.99337748344371,
"learning_rate": 9.306968641114981e-07,
"loss": 1.5443,
"step": 4620
},
{
"epoch": 51.21412803532009,
"learning_rate": 9.303832752613241e-07,
"loss": 1.5608,
"step": 4640
},
{
"epoch": 51.434878587196465,
"learning_rate": 9.300696864111497e-07,
"loss": 1.551,
"step": 4660
},
{
"epoch": 51.65562913907285,
"learning_rate": 9.297560975609756e-07,
"loss": 1.5556,
"step": 4680
},
{
"epoch": 51.87637969094923,
"learning_rate": 9.294425087108013e-07,
"loss": 1.5517,
"step": 4700
},
{
"epoch": 52.09713024282561,
"learning_rate": 9.291289198606272e-07,
"loss": 1.5384,
"step": 4720
},
{
"epoch": 52.317880794701985,
"learning_rate": 9.288153310104528e-07,
"loss": 1.4869,
"step": 4740
},
{
"epoch": 52.53863134657836,
"learning_rate": 9.285017421602787e-07,
"loss": 1.5221,
"step": 4760
},
{
"epoch": 52.75938189845475,
"learning_rate": 9.281881533101046e-07,
"loss": 1.5883,
"step": 4780
},
{
"epoch": 52.980132450331126,
"learning_rate": 9.278745644599303e-07,
"loss": 1.5276,
"step": 4800
},
{
"epoch": 53.200883002207505,
"learning_rate": 9.275609756097561e-07,
"loss": 1.4969,
"step": 4820
},
{
"epoch": 53.42163355408388,
"learning_rate": 9.272473867595818e-07,
"loss": 1.5043,
"step": 4840
},
{
"epoch": 53.64238410596027,
"learning_rate": 9.269337979094077e-07,
"loss": 1.5205,
"step": 4860
},
{
"epoch": 53.863134657836646,
"learning_rate": 9.266202090592334e-07,
"loss": 1.4685,
"step": 4880
},
{
"epoch": 54.083885209713024,
"learning_rate": 9.263066202090592e-07,
"loss": 1.5099,
"step": 4900
},
{
"epoch": 54.3046357615894,
"learning_rate": 9.25993031358885e-07,
"loss": 1.531,
"step": 4920
},
{
"epoch": 54.52538631346578,
"learning_rate": 9.256794425087108e-07,
"loss": 1.5637,
"step": 4940
},
{
"epoch": 54.746136865342166,
"learning_rate": 9.253658536585365e-07,
"loss": 1.5795,
"step": 4960
},
{
"epoch": 54.966887417218544,
"learning_rate": 9.250522648083623e-07,
"loss": 1.473,
"step": 4980
},
{
"epoch": 55.18763796909492,
"learning_rate": 9.247386759581881e-07,
"loss": 1.4921,
"step": 5000
},
{
"epoch": 55.4083885209713,
"learning_rate": 9.244250871080139e-07,
"loss": 1.5147,
"step": 5020
},
{
"epoch": 55.629139072847686,
"learning_rate": 9.241114982578397e-07,
"loss": 1.5259,
"step": 5040
},
{
"epoch": 55.849889624724064,
"learning_rate": 9.237979094076655e-07,
"loss": 1.5481,
"step": 5060
},
{
"epoch": 56.07064017660044,
"learning_rate": 9.234843205574913e-07,
"loss": 1.5238,
"step": 5080
},
{
"epoch": 56.29139072847682,
"learning_rate": 9.23170731707317e-07,
"loss": 1.5497,
"step": 5100
},
{
"epoch": 56.5121412803532,
"learning_rate": 9.228571428571428e-07,
"loss": 1.5003,
"step": 5120
},
{
"epoch": 56.73289183222958,
"learning_rate": 9.225435540069686e-07,
"loss": 1.511,
"step": 5140
},
{
"epoch": 56.95364238410596,
"learning_rate": 9.222299651567944e-07,
"loss": 1.5426,
"step": 5160
},
{
"epoch": 57.17439293598234,
"learning_rate": 9.219163763066201e-07,
"loss": 1.4937,
"step": 5180
},
{
"epoch": 57.39514348785872,
"learning_rate": 9.216027874564459e-07,
"loss": 1.5059,
"step": 5200
},
{
"epoch": 57.615894039735096,
"learning_rate": 9.212891986062717e-07,
"loss": 1.548,
"step": 5220
},
{
"epoch": 57.83664459161148,
"learning_rate": 9.209756097560976e-07,
"loss": 1.4691,
"step": 5240
},
{
"epoch": 58.05739514348786,
"learning_rate": 9.206620209059232e-07,
"loss": 1.5459,
"step": 5260
},
{
"epoch": 58.27814569536424,
"learning_rate": 9.203484320557491e-07,
"loss": 1.5569,
"step": 5280
},
{
"epoch": 58.498896247240616,
"learning_rate": 9.200348432055748e-07,
"loss": 1.4634,
"step": 5300
},
{
"epoch": 58.719646799117,
"learning_rate": 9.197212543554007e-07,
"loss": 1.4567,
"step": 5320
},
{
"epoch": 58.94039735099338,
"learning_rate": 9.194076655052265e-07,
"loss": 1.5349,
"step": 5340
},
{
"epoch": 59.16114790286976,
"learning_rate": 9.190940766550522e-07,
"loss": 1.4706,
"step": 5360
},
{
"epoch": 59.381898454746135,
"learning_rate": 9.187804878048781e-07,
"loss": 1.4932,
"step": 5380
},
{
"epoch": 59.602649006622514,
"learning_rate": 9.184668989547037e-07,
"loss": 1.5204,
"step": 5400
},
{
"epoch": 59.8233995584989,
"learning_rate": 9.181533101045296e-07,
"loss": 1.5267,
"step": 5420
},
{
"epoch": 60.04415011037528,
"learning_rate": 9.178397212543552e-07,
"loss": 1.5275,
"step": 5440
},
{
"epoch": 60.264900662251655,
"learning_rate": 9.175261324041812e-07,
"loss": 1.5116,
"step": 5460
},
{
"epoch": 60.48565121412803,
"learning_rate": 9.172125435540069e-07,
"loss": 1.4686,
"step": 5480
},
{
"epoch": 60.70640176600442,
"learning_rate": 9.168989547038327e-07,
"loss": 1.4902,
"step": 5500
},
{
"epoch": 60.9271523178808,
"learning_rate": 9.165853658536585e-07,
"loss": 1.4856,
"step": 5520
},
{
"epoch": 61.147902869757175,
"learning_rate": 9.162717770034843e-07,
"loss": 1.4982,
"step": 5540
},
{
"epoch": 61.36865342163355,
"learning_rate": 9.1595818815331e-07,
"loss": 1.4829,
"step": 5560
},
{
"epoch": 61.58940397350993,
"learning_rate": 9.156445993031358e-07,
"loss": 1.5049,
"step": 5580
},
{
"epoch": 61.81015452538632,
"learning_rate": 9.153310104529617e-07,
"loss": 1.5125,
"step": 5600
},
{
"epoch": 62.030905077262695,
"learning_rate": 9.150174216027874e-07,
"loss": 1.4746,
"step": 5620
},
{
"epoch": 62.25165562913907,
"learning_rate": 9.147038327526132e-07,
"loss": 1.5128,
"step": 5640
},
{
"epoch": 62.47240618101545,
"learning_rate": 9.14390243902439e-07,
"loss": 1.4799,
"step": 5660
},
{
"epoch": 62.69315673289183,
"learning_rate": 9.140766550522648e-07,
"loss": 1.4395,
"step": 5680
},
{
"epoch": 62.913907284768214,
"learning_rate": 9.137630662020905e-07,
"loss": 1.5277,
"step": 5700
},
{
"epoch": 63.13465783664459,
"learning_rate": 9.134494773519163e-07,
"loss": 1.4806,
"step": 5720
},
{
"epoch": 63.35540838852097,
"learning_rate": 9.131358885017421e-07,
"loss": 1.5123,
"step": 5740
},
{
"epoch": 63.57615894039735,
"learning_rate": 9.12822299651568e-07,
"loss": 1.5502,
"step": 5760
},
{
"epoch": 63.796909492273734,
"learning_rate": 9.125087108013936e-07,
"loss": 1.4732,
"step": 5780
},
{
"epoch": 64.0176600441501,
"learning_rate": 9.121951219512195e-07,
"loss": 1.4694,
"step": 5800
},
{
"epoch": 64.23841059602648,
"learning_rate": 9.118815331010452e-07,
"loss": 1.4394,
"step": 5820
},
{
"epoch": 64.45916114790288,
"learning_rate": 9.11567944250871e-07,
"loss": 1.4776,
"step": 5840
},
{
"epoch": 64.67991169977925,
"learning_rate": 9.112543554006967e-07,
"loss": 1.5363,
"step": 5860
},
{
"epoch": 64.90066225165563,
"learning_rate": 9.109407665505226e-07,
"loss": 1.4584,
"step": 5880
},
{
"epoch": 65.12141280353201,
"learning_rate": 9.106271777003485e-07,
"loss": 1.4956,
"step": 5900
},
{
"epoch": 65.34216335540839,
"learning_rate": 9.103135888501741e-07,
"loss": 1.4672,
"step": 5920
},
{
"epoch": 65.56291390728477,
"learning_rate": 9.1e-07,
"loss": 1.4765,
"step": 5940
},
{
"epoch": 65.78366445916114,
"learning_rate": 9.096864111498257e-07,
"loss": 1.4821,
"step": 5960
},
{
"epoch": 66.00441501103752,
"learning_rate": 9.093728222996516e-07,
"loss": 1.4561,
"step": 5980
},
{
"epoch": 66.2251655629139,
"learning_rate": 9.090592334494772e-07,
"loss": 1.514,
"step": 6000
},
{
"epoch": 66.4459161147903,
"learning_rate": 9.08745644599303e-07,
"loss": 1.495,
"step": 6020
},
{
"epoch": 66.66666666666667,
"learning_rate": 9.084320557491289e-07,
"loss": 1.4413,
"step": 6040
},
{
"epoch": 66.88741721854305,
"learning_rate": 9.081184668989546e-07,
"loss": 1.4489,
"step": 6060
},
{
"epoch": 67.10816777041943,
"learning_rate": 9.078048780487804e-07,
"loss": 1.4897,
"step": 6080
},
{
"epoch": 67.3289183222958,
"learning_rate": 9.074912891986062e-07,
"loss": 1.4299,
"step": 6100
},
{
"epoch": 67.54966887417218,
"learning_rate": 9.071777003484321e-07,
"loss": 1.4706,
"step": 6120
},
{
"epoch": 67.77041942604856,
"learning_rate": 9.068641114982577e-07,
"loss": 1.5075,
"step": 6140
},
{
"epoch": 67.99116997792494,
"learning_rate": 9.065505226480836e-07,
"loss": 1.5227,
"step": 6160
},
{
"epoch": 68.21192052980132,
"learning_rate": 9.062369337979094e-07,
"loss": 1.456,
"step": 6180
},
{
"epoch": 68.43267108167771,
"learning_rate": 9.059233449477352e-07,
"loss": 1.4677,
"step": 6200
},
{
"epoch": 68.65342163355409,
"learning_rate": 9.056097560975609e-07,
"loss": 1.4993,
"step": 6220
},
{
"epoch": 68.87417218543047,
"learning_rate": 9.052961672473867e-07,
"loss": 1.4612,
"step": 6240
},
{
"epoch": 69.09492273730685,
"learning_rate": 9.049825783972125e-07,
"loss": 1.4549,
"step": 6260
},
{
"epoch": 69.31567328918322,
"learning_rate": 9.046689895470383e-07,
"loss": 1.5033,
"step": 6280
},
{
"epoch": 69.5364238410596,
"learning_rate": 9.04355400696864e-07,
"loss": 1.4558,
"step": 6300
},
{
"epoch": 69.75717439293598,
"learning_rate": 9.040418118466899e-07,
"loss": 1.4884,
"step": 6320
},
{
"epoch": 69.97792494481236,
"learning_rate": 9.037282229965156e-07,
"loss": 1.4332,
"step": 6340
},
{
"epoch": 70.19867549668874,
"learning_rate": 9.034146341463414e-07,
"loss": 1.455,
"step": 6360
},
{
"epoch": 70.41942604856513,
"learning_rate": 9.031010452961671e-07,
"loss": 1.4155,
"step": 6380
},
{
"epoch": 70.6401766004415,
"learning_rate": 9.02787456445993e-07,
"loss": 1.5361,
"step": 6400
},
{
"epoch": 70.86092715231788,
"learning_rate": 9.024738675958189e-07,
"loss": 1.4696,
"step": 6420
},
{
"epoch": 71.08167770419426,
"learning_rate": 9.021602787456445e-07,
"loss": 1.4936,
"step": 6440
},
{
"epoch": 71.30242825607064,
"learning_rate": 9.018466898954704e-07,
"loss": 1.4689,
"step": 6460
},
{
"epoch": 71.52317880794702,
"learning_rate": 9.015331010452961e-07,
"loss": 1.4862,
"step": 6480
},
{
"epoch": 71.7439293598234,
"learning_rate": 9.012195121951219e-07,
"loss": 1.463,
"step": 6500
},
{
"epoch": 71.96467991169978,
"learning_rate": 9.009059233449477e-07,
"loss": 1.4663,
"step": 6520
},
{
"epoch": 72.18543046357615,
"learning_rate": 9.005923344947735e-07,
"loss": 1.4918,
"step": 6540
},
{
"epoch": 72.40618101545253,
"learning_rate": 9.002787456445993e-07,
"loss": 1.474,
"step": 6560
},
{
"epoch": 72.62693156732892,
"learning_rate": 8.99965156794425e-07,
"loss": 1.4693,
"step": 6580
},
{
"epoch": 72.8476821192053,
"learning_rate": 8.996515679442507e-07,
"loss": 1.438,
"step": 6600
},
{
"epoch": 73.06843267108168,
"learning_rate": 8.993379790940766e-07,
"loss": 1.4405,
"step": 6620
},
{
"epoch": 73.28918322295806,
"learning_rate": 8.990243902439025e-07,
"loss": 1.4501,
"step": 6640
},
{
"epoch": 73.50993377483444,
"learning_rate": 8.987108013937282e-07,
"loss": 1.4801,
"step": 6660
},
{
"epoch": 73.73068432671081,
"learning_rate": 8.98397212543554e-07,
"loss": 1.4407,
"step": 6680
},
{
"epoch": 73.9514348785872,
"learning_rate": 8.980836236933798e-07,
"loss": 1.4393,
"step": 6700
},
{
"epoch": 74.17218543046357,
"learning_rate": 8.977700348432056e-07,
"loss": 1.3913,
"step": 6720
},
{
"epoch": 74.39293598233995,
"learning_rate": 8.974564459930313e-07,
"loss": 1.4949,
"step": 6740
},
{
"epoch": 74.61368653421634,
"learning_rate": 8.971428571428571e-07,
"loss": 1.4363,
"step": 6760
},
{
"epoch": 74.83443708609272,
"learning_rate": 8.968292682926829e-07,
"loss": 1.4655,
"step": 6780
},
{
"epoch": 75.0551876379691,
"learning_rate": 8.965156794425087e-07,
"loss": 1.4991,
"step": 6800
},
{
"epoch": 75.27593818984548,
"learning_rate": 8.962020905923344e-07,
"loss": 1.4516,
"step": 6820
},
{
"epoch": 75.49668874172185,
"learning_rate": 8.958885017421603e-07,
"loss": 1.5084,
"step": 6840
},
{
"epoch": 75.71743929359823,
"learning_rate": 8.95574912891986e-07,
"loss": 1.4542,
"step": 6860
},
{
"epoch": 75.93818984547461,
"learning_rate": 8.952613240418118e-07,
"loss": 1.475,
"step": 6880
},
{
"epoch": 76.15894039735099,
"learning_rate": 8.949477351916375e-07,
"loss": 1.4287,
"step": 6900
},
{
"epoch": 76.37969094922737,
"learning_rate": 8.946341463414634e-07,
"loss": 1.453,
"step": 6920
},
{
"epoch": 76.60044150110376,
"learning_rate": 8.943205574912893e-07,
"loss": 1.4372,
"step": 6940
},
{
"epoch": 76.82119205298014,
"learning_rate": 8.940069686411149e-07,
"loss": 1.4665,
"step": 6960
},
{
"epoch": 77.04194260485652,
"learning_rate": 8.936933797909408e-07,
"loss": 1.4841,
"step": 6980
},
{
"epoch": 77.2626931567329,
"learning_rate": 8.933797909407665e-07,
"loss": 1.4491,
"step": 7000
},
{
"epoch": 77.48344370860927,
"learning_rate": 8.930662020905923e-07,
"loss": 1.4382,
"step": 7020
},
{
"epoch": 77.70419426048565,
"learning_rate": 8.92752613240418e-07,
"loss": 1.4206,
"step": 7040
},
{
"epoch": 77.92494481236203,
"learning_rate": 8.924390243902439e-07,
"loss": 1.4521,
"step": 7060
},
{
"epoch": 78.1456953642384,
"learning_rate": 8.921254355400697e-07,
"loss": 1.4688,
"step": 7080
},
{
"epoch": 78.36644591611478,
"learning_rate": 8.918118466898954e-07,
"loss": 1.4304,
"step": 7100
},
{
"epoch": 78.58719646799118,
"learning_rate": 8.914982578397212e-07,
"loss": 1.4165,
"step": 7120
},
{
"epoch": 78.80794701986756,
"learning_rate": 8.91184668989547e-07,
"loss": 1.4728,
"step": 7140
},
{
"epoch": 79.02869757174393,
"learning_rate": 8.908710801393728e-07,
"loss": 1.4959,
"step": 7160
},
{
"epoch": 79.24944812362031,
"learning_rate": 8.905574912891986e-07,
"loss": 1.4211,
"step": 7180
},
{
"epoch": 79.47019867549669,
"learning_rate": 8.902439024390244e-07,
"loss": 1.4794,
"step": 7200
},
{
"epoch": 79.47019867549669,
"eval_bleu": 44.4125,
"eval_gen_len": 9.0667,
"eval_loss": 2.0255990028381348,
"eval_runtime": 3.4282,
"eval_samples_per_second": 8.751,
"eval_steps_per_second": 1.75,
"step": 7200
},
{
"epoch": 79.69094922737307,
"learning_rate": 8.899303135888502e-07,
"loss": 1.4294,
"step": 7220
},
{
"epoch": 79.91169977924945,
"learning_rate": 8.896167247386759e-07,
"loss": 1.4691,
"step": 7240
},
{
"epoch": 80.13245033112582,
"learning_rate": 8.893031358885017e-07,
"loss": 1.4619,
"step": 7260
},
{
"epoch": 80.3532008830022,
"learning_rate": 8.889895470383275e-07,
"loss": 1.4952,
"step": 7280
},
{
"epoch": 80.57395143487858,
"learning_rate": 8.886759581881533e-07,
"loss": 1.4284,
"step": 7300
},
{
"epoch": 80.79470198675497,
"learning_rate": 8.88362369337979e-07,
"loss": 1.3992,
"step": 7320
},
{
"epoch": 81.01545253863135,
"learning_rate": 8.880487804878048e-07,
"loss": 1.4512,
"step": 7340
},
{
"epoch": 81.23620309050773,
"learning_rate": 8.877351916376307e-07,
"loss": 1.4298,
"step": 7360
},
{
"epoch": 81.45695364238411,
"learning_rate": 8.874216027874564e-07,
"loss": 1.4442,
"step": 7380
},
{
"epoch": 81.67770419426049,
"learning_rate": 8.871080139372822e-07,
"loss": 1.4112,
"step": 7400
},
{
"epoch": 81.89845474613686,
"learning_rate": 8.867944250871079e-07,
"loss": 1.4365,
"step": 7420
},
{
"epoch": 82.11920529801324,
"learning_rate": 8.864808362369338e-07,
"loss": 1.5076,
"step": 7440
},
{
"epoch": 82.33995584988962,
"learning_rate": 8.861672473867594e-07,
"loss": 1.4004,
"step": 7460
},
{
"epoch": 82.560706401766,
"learning_rate": 8.858536585365853e-07,
"loss": 1.4641,
"step": 7480
},
{
"epoch": 82.78145695364239,
"learning_rate": 8.855400696864112e-07,
"loss": 1.4321,
"step": 7500
},
{
"epoch": 83.00220750551877,
"learning_rate": 8.85226480836237e-07,
"loss": 1.4592,
"step": 7520
},
{
"epoch": 83.22295805739515,
"learning_rate": 8.849128919860627e-07,
"loss": 1.4101,
"step": 7540
},
{
"epoch": 83.44370860927152,
"learning_rate": 8.845993031358884e-07,
"loss": 1.4455,
"step": 7560
},
{
"epoch": 83.6644591611479,
"learning_rate": 8.842857142857143e-07,
"loss": 1.4175,
"step": 7580
},
{
"epoch": 83.88520971302428,
"learning_rate": 8.8397212543554e-07,
"loss": 1.4389,
"step": 7600
},
{
"epoch": 84.10596026490066,
"learning_rate": 8.836585365853658e-07,
"loss": 1.4963,
"step": 7620
},
{
"epoch": 84.32671081677704,
"learning_rate": 8.833449477351916e-07,
"loss": 1.4683,
"step": 7640
},
{
"epoch": 84.54746136865342,
"learning_rate": 8.830313588850174e-07,
"loss": 1.4528,
"step": 7660
},
{
"epoch": 84.76821192052981,
"learning_rate": 8.827177700348431e-07,
"loss": 1.4128,
"step": 7680
},
{
"epoch": 84.98896247240619,
"learning_rate": 8.824041811846689e-07,
"loss": 1.4012,
"step": 7700
},
{
"epoch": 85.20971302428256,
"learning_rate": 8.820905923344947e-07,
"loss": 1.4448,
"step": 7720
},
{
"epoch": 85.43046357615894,
"learning_rate": 8.817770034843205e-07,
"loss": 1.3746,
"step": 7740
},
{
"epoch": 85.65121412803532,
"learning_rate": 8.814634146341464e-07,
"loss": 1.4086,
"step": 7760
},
{
"epoch": 85.8719646799117,
"learning_rate": 8.811498257839721e-07,
"loss": 1.4774,
"step": 7780
},
{
"epoch": 86.09271523178808,
"learning_rate": 8.808362369337979e-07,
"loss": 1.4534,
"step": 7800
},
{
"epoch": 86.31346578366445,
"learning_rate": 8.805226480836237e-07,
"loss": 1.3844,
"step": 7820
},
{
"epoch": 86.53421633554083,
"learning_rate": 8.802090592334494e-07,
"loss": 1.4751,
"step": 7840
},
{
"epoch": 86.75496688741723,
"learning_rate": 8.798954703832752e-07,
"loss": 1.4002,
"step": 7860
},
{
"epoch": 86.9757174392936,
"learning_rate": 8.79581881533101e-07,
"loss": 1.4659,
"step": 7880
},
{
"epoch": 87.19646799116998,
"learning_rate": 8.792682926829268e-07,
"loss": 1.4322,
"step": 7900
},
{
"epoch": 87.41721854304636,
"learning_rate": 8.789547038327526e-07,
"loss": 1.4278,
"step": 7920
},
{
"epoch": 87.63796909492274,
"learning_rate": 8.786411149825783e-07,
"loss": 1.453,
"step": 7940
},
{
"epoch": 87.85871964679912,
"learning_rate": 8.783275261324042e-07,
"loss": 1.4371,
"step": 7960
},
{
"epoch": 88.0794701986755,
"learning_rate": 8.780139372822298e-07,
"loss": 1.4775,
"step": 7980
},
{
"epoch": 88.30022075055187,
"learning_rate": 8.777003484320557e-07,
"loss": 1.4058,
"step": 8000
},
{
"epoch": 88.52097130242825,
"learning_rate": 8.773867595818815e-07,
"loss": 1.4119,
"step": 8020
},
{
"epoch": 88.74172185430463,
"learning_rate": 8.770731707317073e-07,
"loss": 1.4316,
"step": 8040
},
{
"epoch": 88.96247240618102,
"learning_rate": 8.767595818815331e-07,
"loss": 1.4198,
"step": 8060
},
{
"epoch": 89.1832229580574,
"learning_rate": 8.764459930313588e-07,
"loss": 1.4434,
"step": 8080
},
{
"epoch": 89.40397350993378,
"learning_rate": 8.761324041811848e-07,
"loss": 1.4309,
"step": 8100
},
{
"epoch": 89.62472406181016,
"learning_rate": 8.758188153310104e-07,
"loss": 1.3757,
"step": 8120
},
{
"epoch": 89.84547461368653,
"learning_rate": 8.755052264808362e-07,
"loss": 1.479,
"step": 8140
},
{
"epoch": 90.06622516556291,
"learning_rate": 8.751916376306619e-07,
"loss": 1.3926,
"step": 8160
},
{
"epoch": 90.28697571743929,
"learning_rate": 8.748780487804878e-07,
"loss": 1.4629,
"step": 8180
},
{
"epoch": 90.50772626931567,
"learning_rate": 8.745644599303135e-07,
"loss": 1.3793,
"step": 8200
},
{
"epoch": 90.72847682119205,
"learning_rate": 8.742508710801393e-07,
"loss": 1.4113,
"step": 8220
},
{
"epoch": 90.94922737306844,
"learning_rate": 8.739372822299651e-07,
"loss": 1.4187,
"step": 8240
},
{
"epoch": 91.16997792494482,
"learning_rate": 8.73623693379791e-07,
"loss": 1.4466,
"step": 8260
},
{
"epoch": 91.3907284768212,
"learning_rate": 8.733101045296167e-07,
"loss": 1.4219,
"step": 8280
},
{
"epoch": 91.61147902869757,
"learning_rate": 8.729965156794424e-07,
"loss": 1.4783,
"step": 8300
},
{
"epoch": 91.83222958057395,
"learning_rate": 8.726829268292683e-07,
"loss": 1.4111,
"step": 8320
},
{
"epoch": 92.05298013245033,
"learning_rate": 8.723693379790941e-07,
"loss": 1.427,
"step": 8340
},
{
"epoch": 92.27373068432671,
"learning_rate": 8.720557491289198e-07,
"loss": 1.414,
"step": 8360
},
{
"epoch": 92.49448123620309,
"learning_rate": 8.717421602787456e-07,
"loss": 1.4097,
"step": 8380
},
{
"epoch": 92.71523178807946,
"learning_rate": 8.714285714285715e-07,
"loss": 1.4818,
"step": 8400
},
{
"epoch": 92.93598233995586,
"learning_rate": 8.711149825783971e-07,
"loss": 1.4127,
"step": 8420
},
{
"epoch": 93.15673289183223,
"learning_rate": 8.708013937282229e-07,
"loss": 1.4264,
"step": 8440
},
{
"epoch": 93.37748344370861,
"learning_rate": 8.704878048780487e-07,
"loss": 1.4663,
"step": 8460
},
{
"epoch": 93.59823399558499,
"learning_rate": 8.701742160278746e-07,
"loss": 1.4151,
"step": 8480
},
{
"epoch": 93.81898454746137,
"learning_rate": 8.698606271777002e-07,
"loss": 1.4362,
"step": 8500
},
{
"epoch": 94.03973509933775,
"learning_rate": 8.695470383275261e-07,
"loss": 1.3755,
"step": 8520
},
{
"epoch": 94.26048565121413,
"learning_rate": 8.69233449477352e-07,
"loss": 1.3927,
"step": 8540
},
{
"epoch": 94.4812362030905,
"learning_rate": 8.689198606271777e-07,
"loss": 1.4137,
"step": 8560
},
{
"epoch": 94.70198675496688,
"learning_rate": 8.686062717770035e-07,
"loss": 1.4284,
"step": 8580
},
{
"epoch": 94.92273730684327,
"learning_rate": 8.682926829268292e-07,
"loss": 1.4287,
"step": 8600
},
{
"epoch": 95.14348785871965,
"learning_rate": 8.679790940766551e-07,
"loss": 1.4243,
"step": 8620
},
{
"epoch": 95.36423841059603,
"learning_rate": 8.676655052264807e-07,
"loss": 1.3588,
"step": 8640
},
{
"epoch": 95.58498896247241,
"learning_rate": 8.673519163763066e-07,
"loss": 1.3846,
"step": 8660
},
{
"epoch": 95.80573951434879,
"learning_rate": 8.670383275261325e-07,
"loss": 1.4513,
"step": 8680
},
{
"epoch": 96.02649006622516,
"learning_rate": 8.667247386759582e-07,
"loss": 1.4022,
"step": 8700
},
{
"epoch": 96.24724061810154,
"learning_rate": 8.664111498257838e-07,
"loss": 1.3969,
"step": 8720
},
{
"epoch": 96.46799116997792,
"learning_rate": 8.660975609756097e-07,
"loss": 1.38,
"step": 8740
},
{
"epoch": 96.6887417218543,
"learning_rate": 8.657839721254355e-07,
"loss": 1.4701,
"step": 8760
},
{
"epoch": 96.90949227373068,
"learning_rate": 8.654703832752613e-07,
"loss": 1.3637,
"step": 8780
},
{
"epoch": 97.13024282560707,
"learning_rate": 8.65156794425087e-07,
"loss": 1.3748,
"step": 8800
},
{
"epoch": 97.35099337748345,
"learning_rate": 8.648432055749129e-07,
"loss": 1.461,
"step": 8820
},
{
"epoch": 97.57174392935983,
"learning_rate": 8.645296167247387e-07,
"loss": 1.4615,
"step": 8840
},
{
"epoch": 97.7924944812362,
"learning_rate": 8.642160278745644e-07,
"loss": 1.3775,
"step": 8860
},
{
"epoch": 98.01324503311258,
"learning_rate": 8.639024390243902e-07,
"loss": 1.4125,
"step": 8880
},
{
"epoch": 98.23399558498896,
"learning_rate": 8.63588850174216e-07,
"loss": 1.3846,
"step": 8900
},
{
"epoch": 98.45474613686534,
"learning_rate": 8.632752613240419e-07,
"loss": 1.3948,
"step": 8920
},
{
"epoch": 98.67549668874172,
"learning_rate": 8.629616724738675e-07,
"loss": 1.348,
"step": 8940
},
{
"epoch": 98.8962472406181,
"learning_rate": 8.626480836236934e-07,
"loss": 1.4504,
"step": 8960
},
{
"epoch": 99.11699779249449,
"learning_rate": 8.623344947735191e-07,
"loss": 1.3899,
"step": 8980
},
{
"epoch": 99.33774834437087,
"learning_rate": 8.62020905923345e-07,
"loss": 1.4558,
"step": 9000
},
{
"epoch": 99.55849889624724,
"learning_rate": 8.617073170731706e-07,
"loss": 1.3781,
"step": 9020
},
{
"epoch": 99.77924944812362,
"learning_rate": 8.613937282229965e-07,
"loss": 1.3847,
"step": 9040
},
{
"epoch": 100.0,
"learning_rate": 8.610801393728222e-07,
"loss": 1.4043,
"step": 9060
},
{
"epoch": 100.22075055187638,
"learning_rate": 8.60766550522648e-07,
"loss": 1.4069,
"step": 9080
},
{
"epoch": 100.44150110375276,
"learning_rate": 8.604529616724739e-07,
"loss": 1.3562,
"step": 9100
},
{
"epoch": 100.66225165562913,
"learning_rate": 8.601393728222996e-07,
"loss": 1.3854,
"step": 9120
},
{
"epoch": 100.88300220750551,
"learning_rate": 8.598257839721255e-07,
"loss": 1.3842,
"step": 9140
},
{
"epoch": 101.1037527593819,
"learning_rate": 8.595121951219512e-07,
"loss": 1.3954,
"step": 9160
},
{
"epoch": 101.32450331125828,
"learning_rate": 8.59198606271777e-07,
"loss": 1.3966,
"step": 9180
},
{
"epoch": 101.54525386313466,
"learning_rate": 8.588850174216027e-07,
"loss": 1.4175,
"step": 9200
},
{
"epoch": 101.76600441501104,
"learning_rate": 8.585714285714286e-07,
"loss": 1.3944,
"step": 9220
},
{
"epoch": 101.98675496688742,
"learning_rate": 8.582578397212543e-07,
"loss": 1.343,
"step": 9240
},
{
"epoch": 102.2075055187638,
"learning_rate": 8.579442508710801e-07,
"loss": 1.3974,
"step": 9260
},
{
"epoch": 102.42825607064017,
"learning_rate": 8.576306620209059e-07,
"loss": 1.4235,
"step": 9280
},
{
"epoch": 102.64900662251655,
"learning_rate": 8.573170731707317e-07,
"loss": 1.379,
"step": 9300
},
{
"epoch": 102.86975717439293,
"learning_rate": 8.570034843205574e-07,
"loss": 1.3593,
"step": 9320
},
{
"epoch": 103.09050772626932,
"learning_rate": 8.566898954703832e-07,
"loss": 1.4164,
"step": 9340
},
{
"epoch": 103.3112582781457,
"learning_rate": 8.563763066202091e-07,
"loss": 1.3811,
"step": 9360
},
{
"epoch": 103.53200883002208,
"learning_rate": 8.560627177700348e-07,
"loss": 1.3853,
"step": 9380
},
{
"epoch": 103.75275938189846,
"learning_rate": 8.557491289198606e-07,
"loss": 1.3948,
"step": 9400
},
{
"epoch": 103.97350993377484,
"learning_rate": 8.554355400696864e-07,
"loss": 1.4206,
"step": 9420
},
{
"epoch": 104.19426048565121,
"learning_rate": 8.551219512195122e-07,
"loss": 1.3967,
"step": 9440
},
{
"epoch": 104.41501103752759,
"learning_rate": 8.548083623693379e-07,
"loss": 1.3597,
"step": 9460
},
{
"epoch": 104.63576158940397,
"learning_rate": 8.544947735191637e-07,
"loss": 1.4107,
"step": 9480
},
{
"epoch": 104.85651214128035,
"learning_rate": 8.541811846689896e-07,
"loss": 1.3626,
"step": 9500
},
{
"epoch": 105.07726269315673,
"learning_rate": 8.538675958188153e-07,
"loss": 1.4051,
"step": 9520
},
{
"epoch": 105.29801324503312,
"learning_rate": 8.53554006968641e-07,
"loss": 1.4063,
"step": 9540
},
{
"epoch": 105.5187637969095,
"learning_rate": 8.532404181184669e-07,
"loss": 1.4275,
"step": 9560
},
{
"epoch": 105.73951434878587,
"learning_rate": 8.529268292682926e-07,
"loss": 1.3462,
"step": 9580
},
{
"epoch": 105.96026490066225,
"learning_rate": 8.526132404181184e-07,
"loss": 1.3702,
"step": 9600
},
{
"epoch": 106.18101545253863,
"learning_rate": 8.522996515679441e-07,
"loss": 1.3683,
"step": 9620
},
{
"epoch": 106.40176600441501,
"learning_rate": 8.5198606271777e-07,
"loss": 1.4073,
"step": 9640
},
{
"epoch": 106.62251655629139,
"learning_rate": 8.516724738675959e-07,
"loss": 1.3627,
"step": 9660
},
{
"epoch": 106.84326710816777,
"learning_rate": 8.513588850174215e-07,
"loss": 1.4,
"step": 9680
},
{
"epoch": 107.06401766004414,
"learning_rate": 8.510452961672474e-07,
"loss": 1.3322,
"step": 9700
},
{
"epoch": 107.28476821192054,
"learning_rate": 8.507317073170731e-07,
"loss": 1.3776,
"step": 9720
},
{
"epoch": 107.50551876379691,
"learning_rate": 8.50418118466899e-07,
"loss": 1.3837,
"step": 9740
},
{
"epoch": 107.72626931567329,
"learning_rate": 8.501045296167246e-07,
"loss": 1.3633,
"step": 9760
},
{
"epoch": 107.94701986754967,
"learning_rate": 8.497909407665505e-07,
"loss": 1.4582,
"step": 9780
},
{
"epoch": 108.16777041942605,
"learning_rate": 8.494773519163763e-07,
"loss": 1.4204,
"step": 9800
},
{
"epoch": 108.38852097130243,
"learning_rate": 8.49163763066202e-07,
"loss": 1.3606,
"step": 9820
},
{
"epoch": 108.6092715231788,
"learning_rate": 8.488501742160278e-07,
"loss": 1.41,
"step": 9840
},
{
"epoch": 108.83002207505518,
"learning_rate": 8.485365853658536e-07,
"loss": 1.321,
"step": 9860
},
{
"epoch": 109.05077262693156,
"learning_rate": 8.482229965156795e-07,
"loss": 1.3471,
"step": 9880
},
{
"epoch": 109.27152317880795,
"learning_rate": 8.479094076655051e-07,
"loss": 1.3809,
"step": 9900
},
{
"epoch": 109.49227373068433,
"learning_rate": 8.47595818815331e-07,
"loss": 1.3795,
"step": 9920
},
{
"epoch": 109.71302428256071,
"learning_rate": 8.472822299651568e-07,
"loss": 1.3751,
"step": 9940
},
{
"epoch": 109.93377483443709,
"learning_rate": 8.469686411149826e-07,
"loss": 1.3513,
"step": 9960
},
{
"epoch": 110.15452538631347,
"learning_rate": 8.466550522648083e-07,
"loss": 1.3526,
"step": 9980
},
{
"epoch": 110.37527593818984,
"learning_rate": 8.463414634146341e-07,
"loss": 1.3819,
"step": 10000
},
{
"epoch": 110.59602649006622,
"learning_rate": 8.460278745644599e-07,
"loss": 1.3961,
"step": 10020
},
{
"epoch": 110.8167770419426,
"learning_rate": 8.457142857142856e-07,
"loss": 1.3934,
"step": 10040
},
{
"epoch": 111.03752759381898,
"learning_rate": 8.454006968641114e-07,
"loss": 1.3603,
"step": 10060
},
{
"epoch": 111.25827814569537,
"learning_rate": 8.450871080139372e-07,
"loss": 1.3933,
"step": 10080
},
{
"epoch": 111.47902869757175,
"learning_rate": 8.44773519163763e-07,
"loss": 1.381,
"step": 10100
},
{
"epoch": 111.69977924944813,
"learning_rate": 8.444599303135888e-07,
"loss": 1.4153,
"step": 10120
},
{
"epoch": 111.9205298013245,
"learning_rate": 8.441463414634147e-07,
"loss": 1.361,
"step": 10140
},
{
"epoch": 112.14128035320088,
"learning_rate": 8.438327526132404e-07,
"loss": 1.3341,
"step": 10160
},
{
"epoch": 112.36203090507726,
"learning_rate": 8.435191637630662e-07,
"loss": 1.3696,
"step": 10180
},
{
"epoch": 112.58278145695364,
"learning_rate": 8.432055749128919e-07,
"loss": 1.3899,
"step": 10200
},
{
"epoch": 112.80353200883002,
"learning_rate": 8.428919860627178e-07,
"loss": 1.3883,
"step": 10220
},
{
"epoch": 113.0242825607064,
"learning_rate": 8.425783972125435e-07,
"loss": 1.3401,
"step": 10240
},
{
"epoch": 113.24503311258277,
"learning_rate": 8.422648083623693e-07,
"loss": 1.3599,
"step": 10260
},
{
"epoch": 113.46578366445917,
"learning_rate": 8.419512195121951e-07,
"loss": 1.3688,
"step": 10280
},
{
"epoch": 113.68653421633555,
"learning_rate": 8.416376306620209e-07,
"loss": 1.3976,
"step": 10300
},
{
"epoch": 113.90728476821192,
"learning_rate": 8.413240418118465e-07,
"loss": 1.3909,
"step": 10320
},
{
"epoch": 114.1280353200883,
"learning_rate": 8.410104529616724e-07,
"loss": 1.4132,
"step": 10340
},
{
"epoch": 114.34878587196468,
"learning_rate": 8.406968641114982e-07,
"loss": 1.3741,
"step": 10360
},
{
"epoch": 114.56953642384106,
"learning_rate": 8.40383275261324e-07,
"loss": 1.3339,
"step": 10380
},
{
"epoch": 114.79028697571744,
"learning_rate": 8.400696864111498e-07,
"loss": 1.3869,
"step": 10400
},
{
"epoch": 115.01103752759381,
"learning_rate": 8.397560975609756e-07,
"loss": 1.3644,
"step": 10420
},
{
"epoch": 115.23178807947019,
"learning_rate": 8.394425087108014e-07,
"loss": 1.3584,
"step": 10440
},
{
"epoch": 115.45253863134658,
"learning_rate": 8.391289198606271e-07,
"loss": 1.3698,
"step": 10460
},
{
"epoch": 115.67328918322296,
"learning_rate": 8.388153310104529e-07,
"loss": 1.39,
"step": 10480
},
{
"epoch": 115.89403973509934,
"learning_rate": 8.385017421602787e-07,
"loss": 1.3651,
"step": 10500
},
{
"epoch": 116.11479028697572,
"learning_rate": 8.381881533101045e-07,
"loss": 1.339,
"step": 10520
},
{
"epoch": 116.3355408388521,
"learning_rate": 8.378745644599303e-07,
"loss": 1.3469,
"step": 10540
},
{
"epoch": 116.55629139072848,
"learning_rate": 8.375609756097561e-07,
"loss": 1.4008,
"step": 10560
},
{
"epoch": 116.77704194260485,
"learning_rate": 8.372473867595818e-07,
"loss": 1.3757,
"step": 10580
},
{
"epoch": 116.99779249448123,
"learning_rate": 8.369337979094076e-07,
"loss": 1.3792,
"step": 10600
},
{
"epoch": 117.21854304635761,
"learning_rate": 8.366202090592334e-07,
"loss": 1.3422,
"step": 10620
},
{
"epoch": 117.439293598234,
"learning_rate": 8.363066202090592e-07,
"loss": 1.3876,
"step": 10640
},
{
"epoch": 117.66004415011038,
"learning_rate": 8.359930313588849e-07,
"loss": 1.3519,
"step": 10660
},
{
"epoch": 117.88079470198676,
"learning_rate": 8.356794425087108e-07,
"loss": 1.3268,
"step": 10680
},
{
"epoch": 118.10154525386314,
"learning_rate": 8.353658536585366e-07,
"loss": 1.4245,
"step": 10700
},
{
"epoch": 118.32229580573951,
"learning_rate": 8.350522648083623e-07,
"loss": 1.3755,
"step": 10720
},
{
"epoch": 118.54304635761589,
"learning_rate": 8.347386759581881e-07,
"loss": 1.3318,
"step": 10740
},
{
"epoch": 118.76379690949227,
"learning_rate": 8.344250871080139e-07,
"loss": 1.3395,
"step": 10760
},
{
"epoch": 118.98454746136865,
"learning_rate": 8.341114982578397e-07,
"loss": 1.407,
"step": 10780
},
{
"epoch": 119.20529801324503,
"learning_rate": 8.337979094076654e-07,
"loss": 1.3621,
"step": 10800
},
{
"epoch": 119.20529801324503,
"eval_bleu": 49.121,
"eval_gen_len": 8.8,
"eval_loss": 2.0037317276000977,
"eval_runtime": 3.2416,
"eval_samples_per_second": 9.255,
"eval_steps_per_second": 1.851,
"step": 10800
}
],
"logging_steps": 20,
"max_steps": 57600,
"num_input_tokens_seen": 0,
"num_train_epochs": 640,
"save_steps": 3600,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 3.194864182891315e+16,
"train_batch_size": 5,
"trial_name": null,
"trial_params": null
}